ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/compiler/compemu_support.cpp
Revision: 1.35
Committed: 2006-01-15T22:42:51Z (18 years, 8 months ago) by gbeauche
Branch: MAIN
Changes since 1.34: +34 -5 lines
Log Message:
fix stack alignment (theoritically but it was OK in practise) in generated
functions, move m68k_compile_execute() to compiler/ dir since it's JIT
generic and it now depends on USE_PUSH_POP (as it should)

File Contents

# User Rev Content
1 gbeauche 1.11 /*
2     * compiler/compemu_support.cpp - Core dynamic translation engine
3     *
4     * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
5     *
6 gbeauche 1.29 * Adaptation for Basilisk II and improvements, copyright 2000-2005
7 gbeauche 1.11 * Gwenole Beauchesne
8     *
9 gbeauche 1.29 * Basilisk II (C) 1997-2005 Christian Bauer
10 gbeauche 1.11 *
11     * This program is free software; you can redistribute it and/or modify
12     * it under the terms of the GNU General Public License as published by
13     * the Free Software Foundation; either version 2 of the License, or
14     * (at your option) any later version.
15     *
16     * This program is distributed in the hope that it will be useful,
17     * but WITHOUT ANY WARRANTY; without even the implied warranty of
18     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19     * GNU General Public License for more details.
20     *
21     * You should have received a copy of the GNU General Public License
22     * along with this program; if not, write to the Free Software
23     * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24     */
25    
26 gbeauche 1.1 #if !REAL_ADDRESSING && !DIRECT_ADDRESSING
27     #error "Only Real or Direct Addressing is supported with the JIT Compiler"
28     #endif
29    
30 gbeauche 1.4 #if X86_ASSEMBLY && !SAHF_SETO_PROFITABLE
31     #error "Only [LS]AHF scheme to [gs]et flags is supported with the JIT Compiler"
32     #endif
33    
34 gbeauche 1.24 /* NOTE: support for AMD64 assumes translation cache and other code
35     * buffers are allocated into a 32-bit address space because (i) B2/JIT
36     * code is not 64-bit clean and (ii) it's faster to resolve branches
37     * that way.
38     */
39     #if !defined(__i386__) && !defined(__x86_64__)
40     #error "Only IA-32 and X86-64 targets are supported with the JIT Compiler"
41     #endif
42    
43 gbeauche 1.1 #define USE_MATCH 0
44    
45     /* kludge for Brian, so he can compile under MSVC++ */
46     #define USE_NORMAL_CALLING_CONVENTION 0
47    
48     #ifndef WIN32
49 gbeauche 1.20 #include <unistd.h>
50 gbeauche 1.1 #include <sys/types.h>
51     #include <sys/mman.h>
52     #endif
53    
54     #include <stdlib.h>
55     #include <fcntl.h>
56     #include <errno.h>
57    
58     #include "sysdeps.h"
59     #include "cpu_emulation.h"
60     #include "main.h"
61     #include "prefs.h"
62     #include "user_strings.h"
63 gbeauche 1.2 #include "vm_alloc.h"
64 gbeauche 1.1
65     #include "m68k.h"
66     #include "memory.h"
67     #include "readcpu.h"
68     #include "newcpu.h"
69     #include "comptbl.h"
70     #include "compiler/compemu.h"
71     #include "fpu/fpu.h"
72     #include "fpu/flags.h"
73    
74     #define DEBUG 1
75     #include "debug.h"
76    
77     #ifdef ENABLE_MON
78     #include "mon.h"
79     #endif
80    
81     #ifndef WIN32
82 gbeauche 1.9 #define PROFILE_COMPILE_TIME 1
83     #define PROFILE_UNTRANSLATED_INSNS 1
84 gbeauche 1.1 #endif
85    
86 gbeauche 1.28 #if defined(__x86_64__) && 0
87     #define RECORD_REGISTER_USAGE 1
88     #endif
89    
90 gbeauche 1.1 #ifdef WIN32
91     #undef write_log
92     #define write_log dummy_write_log
93     static void dummy_write_log(const char *, ...) { }
94     #endif
95    
96     #if JIT_DEBUG
97     #undef abort
98     #define abort() do { \
99     fprintf(stderr, "Abort in file %s at line %d\n", __FILE__, __LINE__); \
100     exit(EXIT_FAILURE); \
101     } while (0)
102     #endif
103    
104 gbeauche 1.28 #if RECORD_REGISTER_USAGE
105     static uint64 reg_count[16];
106     static int reg_count_local[16];
107    
108     static int reg_count_compare(const void *ap, const void *bp)
109     {
110     const int a = *((int *)ap);
111     const int b = *((int *)bp);
112     return reg_count[b] - reg_count[a];
113     }
114     #endif
115    
116 gbeauche 1.1 #if PROFILE_COMPILE_TIME
117     #include <time.h>
118     static uae_u32 compile_count = 0;
119     static clock_t compile_time = 0;
120     static clock_t emul_start_time = 0;
121     static clock_t emul_end_time = 0;
122     #endif
123    
124 gbeauche 1.9 #if PROFILE_UNTRANSLATED_INSNS
125     const int untranslated_top_ten = 20;
126     static uae_u32 raw_cputbl_count[65536] = { 0, };
127     static uae_u16 opcode_nums[65536];
128    
129     static int untranslated_compfn(const void *e1, const void *e2)
130     {
131     return raw_cputbl_count[*(const uae_u16 *)e1] < raw_cputbl_count[*(const uae_u16 *)e2];
132     }
133     #endif
134    
135 gbeauche 1.35 #if ! USE_PUSH_POP
136     static void (*m68k_do_compile_execute)(void) = NULL;
137     #endif
138    
139 gbeauche 1.24 static compop_func *compfunctbl[65536];
140     static compop_func *nfcompfunctbl[65536];
141     static cpuop_func *nfcpufunctbl[65536];
142 gbeauche 1.1 uae_u8* comp_pc_p;
143    
144 gbeauche 1.26 // From main_unix.cpp
145     extern bool ThirtyThreeBitAddressing;
146    
147 gbeauche 1.6 // From newcpu.cpp
148     extern bool quit_program;
149    
150 gbeauche 1.1 // gb-- Extra data for Basilisk II/JIT
151     #if JIT_DEBUG
152     static bool JITDebug = false; // Enable runtime disassemblers through mon?
153     #else
154     const bool JITDebug = false; // Don't use JIT debug mode at all
155     #endif
156 gbeauche 1.33 #if USE_INLINING
157     static bool follow_const_jumps = true; // Flag: translation through constant jumps
158     #else
159     const bool follow_const_jumps = false;
160     #endif
161 gbeauche 1.1
162 gbeauche 1.22 const uae_u32 MIN_CACHE_SIZE = 1024; // Minimal translation cache size (1 MB)
163 gbeauche 1.1 static uae_u32 cache_size = 0; // Size of total cache allocated for compiled blocks
164 gbeauche 1.3 static uae_u32 current_cache_size = 0; // Cache grows upwards: how much has been consumed already
165 gbeauche 1.1 static bool lazy_flush = true; // Flag: lazy translation cache invalidation
166     static bool avoid_fpu = true; // Flag: compile FPU instructions ?
167     static bool have_cmov = false; // target has CMOV instructions ?
168 gbeauche 1.30 static bool have_lahf_lm = true; // target has LAHF supported in long mode ?
169 gbeauche 1.1 static bool have_rat_stall = true; // target has partial register stalls ?
170 gbeauche 1.12 const bool tune_alignment = true; // Tune code alignments for running CPU ?
171     const bool tune_nop_fillers = true; // Tune no-op fillers for architecture
172 gbeauche 1.15 static bool setzflg_uses_bsf = false; // setzflg virtual instruction can use native BSF instruction correctly?
173 gbeauche 1.5 static int align_loops = 32; // Align the start of loops
174     static int align_jumps = 32; // Align the start of jumps
175 gbeauche 1.1 static int optcount[10] = {
176     10, // How often a block has to be executed before it is translated
177     0, // How often to use naive translation
178     0, 0, 0, 0,
179     -1, -1, -1, -1
180     };
181    
182     struct op_properties {
183     uae_u8 use_flags;
184     uae_u8 set_flags;
185     uae_u8 is_addx;
186     uae_u8 cflow;
187     };
188     static op_properties prop[65536];
189    
190     static inline int end_block(uae_u32 opcode)
191     {
192     return (prop[opcode].cflow & fl_end_block);
193     }
194    
195 gbeauche 1.8 static inline bool is_const_jump(uae_u32 opcode)
196     {
197     return (prop[opcode].cflow == fl_const_jump);
198     }
199    
200 gbeauche 1.18 static inline bool may_trap(uae_u32 opcode)
201     {
202     return (prop[opcode].cflow & fl_trap);
203     }
204    
205     static inline unsigned int cft_map (unsigned int f)
206     {
207     #ifndef HAVE_GET_WORD_UNSWAPPED
208     return f;
209     #else
210     return ((f >> 8) & 255) | ((f & 255) << 8);
211     #endif
212     }
213    
214 gbeauche 1.1 uae_u8* start_pc_p;
215     uae_u32 start_pc;
216     uae_u32 current_block_pc_p;
217 gbeauche 1.24 static uintptr current_block_start_target;
218 gbeauche 1.1 uae_u32 needed_flags;
219 gbeauche 1.24 static uintptr next_pc_p;
220     static uintptr taken_pc_p;
221 gbeauche 1.1 static int branch_cc;
222     static int redo_current_block;
223    
224     int segvcount=0;
225     int soft_flush_count=0;
226     int hard_flush_count=0;
227     int checksum_count=0;
228     static uae_u8* current_compile_p=NULL;
229     static uae_u8* max_compile_start;
230     static uae_u8* compiled_code=NULL;
231     static uae_s32 reg_alloc_run;
232 gbeauche 1.24 const int POPALLSPACE_SIZE = 1024; /* That should be enough space */
233     static uae_u8* popallspace=NULL;
234 gbeauche 1.1
235     void* pushall_call_handler=NULL;
236     static void* popall_do_nothing=NULL;
237     static void* popall_exec_nostats=NULL;
238     static void* popall_execute_normal=NULL;
239     static void* popall_cache_miss=NULL;
240     static void* popall_recompile_block=NULL;
241     static void* popall_check_checksum=NULL;
242    
243     /* The 68k only ever executes from even addresses. So right now, we
244     * waste half the entries in this array
245     * UPDATE: We now use those entries to store the start of the linked
246     * lists that we maintain for each hash result.
247     */
248     cacheline cache_tags[TAGSIZE];
249     int letit=0;
250     blockinfo* hold_bi[MAX_HOLD_BI];
251     blockinfo* active;
252     blockinfo* dormant;
253    
254     /* 68040 */
255     extern struct cputbl op_smalltbl_0_nf[];
256     extern struct comptbl op_smalltbl_0_comp_nf[];
257     extern struct comptbl op_smalltbl_0_comp_ff[];
258    
259     /* 68020 + 68881 */
260     extern struct cputbl op_smalltbl_1_nf[];
261    
262     /* 68020 */
263     extern struct cputbl op_smalltbl_2_nf[];
264    
265     /* 68010 */
266     extern struct cputbl op_smalltbl_3_nf[];
267    
268     /* 68000 */
269     extern struct cputbl op_smalltbl_4_nf[];
270    
271     /* 68000 slow but compatible. */
272     extern struct cputbl op_smalltbl_5_nf[];
273    
274     static void flush_icache_hard(int n);
275     static void flush_icache_lazy(int n);
276     static void flush_icache_none(int n);
277     void (*flush_icache)(int n) = flush_icache_none;
278    
279    
280    
281     bigstate live;
282     smallstate empty_ss;
283     smallstate default_ss;
284     static int optlev;
285    
286     static int writereg(int r, int size);
287     static void unlock2(int r);
288     static void setlock(int r);
289     static int readreg_specific(int r, int size, int spec);
290     static int writereg_specific(int r, int size, int spec);
291     static void prepare_for_call_1(void);
292     static void prepare_for_call_2(void);
293     static void align_target(uae_u32 a);
294    
295     static uae_s32 nextused[VREGS];
296    
297     uae_u32 m68k_pc_offset;
298    
299     /* Some arithmetic ooperations can be optimized away if the operands
300     * are known to be constant. But that's only a good idea when the
301     * side effects they would have on the flags are not important. This
302     * variable indicates whether we need the side effects or not
303     */
304     uae_u32 needflags=0;
305    
306     /* Flag handling is complicated.
307     *
308     * x86 instructions create flags, which quite often are exactly what we
309     * want. So at times, the "68k" flags are actually in the x86 flags.
310     *
311     * Then again, sometimes we do x86 instructions that clobber the x86
312     * flags, but don't represent a corresponding m68k instruction. In that
313     * case, we have to save them.
314     *
315     * We used to save them to the stack, but now store them back directly
316     * into the regflags.cznv of the traditional emulation. Thus some odd
317     * names.
318     *
319     * So flags can be in either of two places (used to be three; boy were
320     * things complicated back then!); And either place can contain either
321     * valid flags or invalid trash (and on the stack, there was also the
322     * option of "nothing at all", now gone). A couple of variables keep
323     * track of the respective states.
324     *
325     * To make things worse, we might or might not be interested in the flags.
326     * by default, we are, but a call to dont_care_flags can change that
327     * until the next call to live_flags. If we are not, pretty much whatever
328     * is in the register and/or the native flags is seen as valid.
329     */
330    
331     static __inline__ blockinfo* get_blockinfo(uae_u32 cl)
332     {
333     return cache_tags[cl+1].bi;
334     }
335    
336     static __inline__ blockinfo* get_blockinfo_addr(void* addr)
337     {
338     blockinfo* bi=get_blockinfo(cacheline(addr));
339    
340     while (bi) {
341     if (bi->pc_p==addr)
342     return bi;
343     bi=bi->next_same_cl;
344     }
345     return NULL;
346     }
347    
348    
349     /*******************************************************************
350     * All sorts of list related functions for all of the lists *
351     *******************************************************************/
352    
353     static __inline__ void remove_from_cl_list(blockinfo* bi)
354     {
355     uae_u32 cl=cacheline(bi->pc_p);
356    
357     if (bi->prev_same_cl_p)
358     *(bi->prev_same_cl_p)=bi->next_same_cl;
359     if (bi->next_same_cl)
360     bi->next_same_cl->prev_same_cl_p=bi->prev_same_cl_p;
361     if (cache_tags[cl+1].bi)
362     cache_tags[cl].handler=cache_tags[cl+1].bi->handler_to_use;
363     else
364     cache_tags[cl].handler=(cpuop_func *)popall_execute_normal;
365     }
366    
367     static __inline__ void remove_from_list(blockinfo* bi)
368     {
369     if (bi->prev_p)
370     *(bi->prev_p)=bi->next;
371     if (bi->next)
372     bi->next->prev_p=bi->prev_p;
373     }
374    
375     static __inline__ void remove_from_lists(blockinfo* bi)
376     {
377     remove_from_list(bi);
378     remove_from_cl_list(bi);
379     }
380    
381     static __inline__ void add_to_cl_list(blockinfo* bi)
382     {
383     uae_u32 cl=cacheline(bi->pc_p);
384    
385     if (cache_tags[cl+1].bi)
386     cache_tags[cl+1].bi->prev_same_cl_p=&(bi->next_same_cl);
387     bi->next_same_cl=cache_tags[cl+1].bi;
388    
389     cache_tags[cl+1].bi=bi;
390     bi->prev_same_cl_p=&(cache_tags[cl+1].bi);
391    
392     cache_tags[cl].handler=bi->handler_to_use;
393     }
394    
395     static __inline__ void raise_in_cl_list(blockinfo* bi)
396     {
397     remove_from_cl_list(bi);
398     add_to_cl_list(bi);
399     }
400    
401     static __inline__ void add_to_active(blockinfo* bi)
402     {
403     if (active)
404     active->prev_p=&(bi->next);
405     bi->next=active;
406    
407     active=bi;
408     bi->prev_p=&active;
409     }
410    
411     static __inline__ void add_to_dormant(blockinfo* bi)
412     {
413     if (dormant)
414     dormant->prev_p=&(bi->next);
415     bi->next=dormant;
416    
417     dormant=bi;
418     bi->prev_p=&dormant;
419     }
420    
421     static __inline__ void remove_dep(dependency* d)
422     {
423     if (d->prev_p)
424     *(d->prev_p)=d->next;
425     if (d->next)
426     d->next->prev_p=d->prev_p;
427     d->prev_p=NULL;
428     d->next=NULL;
429     }
430    
431     /* This block's code is about to be thrown away, so it no longer
432     depends on anything else */
433     static __inline__ void remove_deps(blockinfo* bi)
434     {
435     remove_dep(&(bi->dep[0]));
436     remove_dep(&(bi->dep[1]));
437     }
438    
439     static __inline__ void adjust_jmpdep(dependency* d, cpuop_func* a)
440     {
441     *(d->jmp_off)=(uintptr)a-((uintptr)d->jmp_off+4);
442     }
443    
444     /********************************************************************
445     * Soft flush handling support functions *
446     ********************************************************************/
447    
448     static __inline__ void set_dhtu(blockinfo* bi, cpuop_func* dh)
449     {
450     //write_log("bi is %p\n",bi);
451     if (dh!=bi->direct_handler_to_use) {
452     dependency* x=bi->deplist;
453     //write_log("bi->deplist=%p\n",bi->deplist);
454     while (x) {
455     //write_log("x is %p\n",x);
456     //write_log("x->next is %p\n",x->next);
457     //write_log("x->prev_p is %p\n",x->prev_p);
458    
459     if (x->jmp_off) {
460     adjust_jmpdep(x,dh);
461     }
462     x=x->next;
463     }
464     bi->direct_handler_to_use=dh;
465     }
466     }
467    
468     static __inline__ void invalidate_block(blockinfo* bi)
469     {
470     int i;
471    
472     bi->optlevel=0;
473     bi->count=optcount[0]-1;
474     bi->handler=NULL;
475     bi->handler_to_use=(cpuop_func *)popall_execute_normal;
476     bi->direct_handler=NULL;
477     set_dhtu(bi,bi->direct_pen);
478     bi->needed_flags=0xff;
479     bi->status=BI_INVALID;
480     for (i=0;i<2;i++) {
481     bi->dep[i].jmp_off=NULL;
482     bi->dep[i].target=NULL;
483     }
484     remove_deps(bi);
485     }
486    
487     static __inline__ void create_jmpdep(blockinfo* bi, int i, uae_u32* jmpaddr, uae_u32 target)
488     {
489 gbeauche 1.24 blockinfo* tbi=get_blockinfo_addr((void*)(uintptr)target);
490 gbeauche 1.1
491     Dif(!tbi) {
492     write_log("Could not create jmpdep!\n");
493     abort();
494     }
495     bi->dep[i].jmp_off=jmpaddr;
496     bi->dep[i].source=bi;
497     bi->dep[i].target=tbi;
498     bi->dep[i].next=tbi->deplist;
499     if (bi->dep[i].next)
500     bi->dep[i].next->prev_p=&(bi->dep[i].next);
501     bi->dep[i].prev_p=&(tbi->deplist);
502     tbi->deplist=&(bi->dep[i]);
503     }
504    
505     static __inline__ void block_need_recompile(blockinfo * bi)
506     {
507     uae_u32 cl = cacheline(bi->pc_p);
508    
509     set_dhtu(bi, bi->direct_pen);
510     bi->direct_handler = bi->direct_pen;
511    
512     bi->handler_to_use = (cpuop_func *)popall_execute_normal;
513     bi->handler = (cpuop_func *)popall_execute_normal;
514     if (bi == cache_tags[cl + 1].bi)
515     cache_tags[cl].handler = (cpuop_func *)popall_execute_normal;
516     bi->status = BI_NEED_RECOMP;
517     }
518    
519     static __inline__ void mark_callers_recompile(blockinfo * bi)
520     {
521     dependency *x = bi->deplist;
522    
523     while (x) {
524     dependency *next = x->next; /* This disappears when we mark for
525     * recompilation and thus remove the
526     * blocks from the lists */
527     if (x->jmp_off) {
528     blockinfo *cbi = x->source;
529    
530     Dif(cbi->status == BI_INVALID) {
531     // write_log("invalid block in dependency list\n"); // FIXME?
532     // abort();
533     }
534     if (cbi->status == BI_ACTIVE || cbi->status == BI_NEED_CHECK) {
535     block_need_recompile(cbi);
536     mark_callers_recompile(cbi);
537     }
538     else if (cbi->status == BI_COMPILING) {
539     redo_current_block = 1;
540     }
541     else if (cbi->status == BI_NEED_RECOMP) {
542     /* nothing */
543     }
544     else {
545     //write_log("Status %d in mark_callers\n",cbi->status); // FIXME?
546     }
547     }
548     x = next;
549     }
550     }
551    
552     static __inline__ blockinfo* get_blockinfo_addr_new(void* addr, int setstate)
553     {
554     blockinfo* bi=get_blockinfo_addr(addr);
555     int i;
556    
557     if (!bi) {
558     for (i=0;i<MAX_HOLD_BI && !bi;i++) {
559     if (hold_bi[i]) {
560     uae_u32 cl=cacheline(addr);
561    
562     bi=hold_bi[i];
563     hold_bi[i]=NULL;
564     bi->pc_p=(uae_u8 *)addr;
565     invalidate_block(bi);
566     add_to_active(bi);
567     add_to_cl_list(bi);
568    
569     }
570     }
571     }
572     if (!bi) {
573     write_log("Looking for blockinfo, can't find free one\n");
574     abort();
575     }
576     return bi;
577     }
578    
579     static void prepare_block(blockinfo* bi);
580    
581     /* Managment of blockinfos.
582    
583     A blockinfo struct is allocated whenever a new block has to be
584     compiled. If the list of free blockinfos is empty, we allocate a new
585     pool of blockinfos and link the newly created blockinfos altogether
586     into the list of free blockinfos. Otherwise, we simply pop a structure
587 gbeauche 1.7 off the free list.
588 gbeauche 1.1
589     Blockinfo are lazily deallocated, i.e. chained altogether in the
590     list of free blockinfos whenvever a translation cache flush (hard or
591     soft) request occurs.
592     */
593    
594 gbeauche 1.7 template< class T >
595     class LazyBlockAllocator
596     {
597     enum {
598     kPoolSize = 1 + 4096 / sizeof(T)
599     };
600     struct Pool {
601     T chunk[kPoolSize];
602     Pool * next;
603     };
604     Pool * mPools;
605     T * mChunks;
606     public:
607     LazyBlockAllocator() : mPools(0), mChunks(0) { }
608     ~LazyBlockAllocator();
609     T * acquire();
610     void release(T * const);
611 gbeauche 1.1 };
612    
613 gbeauche 1.7 template< class T >
614     LazyBlockAllocator<T>::~LazyBlockAllocator()
615 gbeauche 1.1 {
616 gbeauche 1.7 Pool * currentPool = mPools;
617     while (currentPool) {
618     Pool * deadPool = currentPool;
619     currentPool = currentPool->next;
620     free(deadPool);
621     }
622     }
623    
624     template< class T >
625     T * LazyBlockAllocator<T>::acquire()
626     {
627     if (!mChunks) {
628     // There is no chunk left, allocate a new pool and link the
629     // chunks into the free list
630     Pool * newPool = (Pool *)malloc(sizeof(Pool));
631     for (T * chunk = &newPool->chunk[0]; chunk < &newPool->chunk[kPoolSize]; chunk++) {
632     chunk->next = mChunks;
633     mChunks = chunk;
634 gbeauche 1.1 }
635 gbeauche 1.7 newPool->next = mPools;
636     mPools = newPool;
637     }
638     T * chunk = mChunks;
639     mChunks = chunk->next;
640     return chunk;
641     }
642    
643     template< class T >
644     void LazyBlockAllocator<T>::release(T * const chunk)
645     {
646     chunk->next = mChunks;
647     mChunks = chunk;
648     }
649    
650     template< class T >
651     class HardBlockAllocator
652     {
653     public:
654     T * acquire() {
655     T * data = (T *)current_compile_p;
656     current_compile_p += sizeof(T);
657     return data;
658 gbeauche 1.1 }
659 gbeauche 1.7
660     void release(T * const chunk) {
661     // Deallocated on invalidation
662     }
663     };
664    
665     #if USE_SEPARATE_BIA
666     static LazyBlockAllocator<blockinfo> BlockInfoAllocator;
667     static LazyBlockAllocator<checksum_info> ChecksumInfoAllocator;
668 gbeauche 1.1 #else
669 gbeauche 1.7 static HardBlockAllocator<blockinfo> BlockInfoAllocator;
670     static HardBlockAllocator<checksum_info> ChecksumInfoAllocator;
671 gbeauche 1.1 #endif
672    
673 gbeauche 1.8 static __inline__ checksum_info *alloc_checksum_info(void)
674     {
675     checksum_info *csi = ChecksumInfoAllocator.acquire();
676     csi->next = NULL;
677     return csi;
678     }
679    
680     static __inline__ void free_checksum_info(checksum_info *csi)
681     {
682     csi->next = NULL;
683     ChecksumInfoAllocator.release(csi);
684     }
685    
686     static __inline__ void free_checksum_info_chain(checksum_info *csi)
687     {
688     while (csi != NULL) {
689     checksum_info *csi2 = csi->next;
690     free_checksum_info(csi);
691     csi = csi2;
692     }
693     }
694 gbeauche 1.7
695     static __inline__ blockinfo *alloc_blockinfo(void)
696 gbeauche 1.1 {
697 gbeauche 1.7 blockinfo *bi = BlockInfoAllocator.acquire();
698     #if USE_CHECKSUM_INFO
699     bi->csi = NULL;
700 gbeauche 1.1 #endif
701 gbeauche 1.7 return bi;
702 gbeauche 1.1 }
703    
704 gbeauche 1.7 static __inline__ void free_blockinfo(blockinfo *bi)
705 gbeauche 1.1 {
706 gbeauche 1.7 #if USE_CHECKSUM_INFO
707 gbeauche 1.8 free_checksum_info_chain(bi->csi);
708     bi->csi = NULL;
709 gbeauche 1.1 #endif
710 gbeauche 1.7 BlockInfoAllocator.release(bi);
711 gbeauche 1.1 }
712    
713     static __inline__ void alloc_blockinfos(void)
714     {
715     int i;
716     blockinfo* bi;
717    
718     for (i=0;i<MAX_HOLD_BI;i++) {
719     if (hold_bi[i])
720     return;
721     bi=hold_bi[i]=alloc_blockinfo();
722     prepare_block(bi);
723     }
724     }
725    
726     /********************************************************************
727     * Functions to emit data into memory, and other general support *
728     ********************************************************************/
729    
730     static uae_u8* target;
731    
732     static void emit_init(void)
733     {
734     }
735    
736     static __inline__ void emit_byte(uae_u8 x)
737     {
738     *target++=x;
739     }
740    
741     static __inline__ void emit_word(uae_u16 x)
742     {
743     *((uae_u16*)target)=x;
744     target+=2;
745     }
746    
747     static __inline__ void emit_long(uae_u32 x)
748     {
749     *((uae_u32*)target)=x;
750     target+=4;
751     }
752    
753 gbeauche 1.24 static __inline__ void emit_quad(uae_u64 x)
754     {
755     *((uae_u64*)target)=x;
756     target+=8;
757     }
758    
759 gbeauche 1.12 static __inline__ void emit_block(const uae_u8 *block, uae_u32 blocklen)
760     {
761     memcpy((uae_u8 *)target,block,blocklen);
762     target+=blocklen;
763     }
764    
765 gbeauche 1.1 static __inline__ uae_u32 reverse32(uae_u32 v)
766     {
767     #if 1
768     // gb-- We have specialized byteswapping functions, just use them
769     return do_byteswap_32(v);
770     #else
771     return ((v>>24)&0xff) | ((v>>8)&0xff00) | ((v<<8)&0xff0000) | ((v<<24)&0xff000000);
772     #endif
773     }
774    
775     /********************************************************************
776     * Getting the information about the target CPU *
777     ********************************************************************/
778    
779     #include "codegen_x86.cpp"
780    
781     void set_target(uae_u8* t)
782     {
783     target=t;
784     }
785    
786     static __inline__ uae_u8* get_target_noopt(void)
787     {
788     return target;
789     }
790    
791     __inline__ uae_u8* get_target(void)
792     {
793     return get_target_noopt();
794     }
795    
796    
797     /********************************************************************
798     * Flags status handling. EMIT TIME! *
799     ********************************************************************/
800    
801     static void bt_l_ri_noclobber(R4 r, IMM i);
802    
803     static void make_flags_live_internal(void)
804     {
805     if (live.flags_in_flags==VALID)
806     return;
807     Dif (live.flags_on_stack==TRASH) {
808     write_log("Want flags, got something on stack, but it is TRASH\n");
809     abort();
810     }
811     if (live.flags_on_stack==VALID) {
812     int tmp;
813     tmp=readreg_specific(FLAGTMP,4,FLAG_NREG2);
814     raw_reg_to_flags(tmp);
815     unlock2(tmp);
816    
817     live.flags_in_flags=VALID;
818     return;
819     }
820     write_log("Huh? live.flags_in_flags=%d, live.flags_on_stack=%d, but need to make live\n",
821     live.flags_in_flags,live.flags_on_stack);
822     abort();
823     }
824    
825     static void flags_to_stack(void)
826     {
827     if (live.flags_on_stack==VALID)
828     return;
829     if (!live.flags_are_important) {
830     live.flags_on_stack=VALID;
831     return;
832     }
833     Dif (live.flags_in_flags!=VALID)
834     abort();
835     else {
836     int tmp;
837     tmp=writereg_specific(FLAGTMP,4,FLAG_NREG1);
838     raw_flags_to_reg(tmp);
839     unlock2(tmp);
840     }
841     live.flags_on_stack=VALID;
842     }
843    
844     static __inline__ void clobber_flags(void)
845     {
846     if (live.flags_in_flags==VALID && live.flags_on_stack!=VALID)
847     flags_to_stack();
848     live.flags_in_flags=TRASH;
849     }
850    
851     /* Prepare for leaving the compiled stuff */
852     static __inline__ void flush_flags(void)
853     {
854     flags_to_stack();
855     return;
856     }
857    
858     int touchcnt;
859    
860     /********************************************************************
861 gbeauche 1.18 * Partial register flushing for optimized calls *
862     ********************************************************************/
863    
864     struct regusage {
865     uae_u16 rmask;
866     uae_u16 wmask;
867     };
868    
869     static inline void ru_set(uae_u16 *mask, int reg)
870     {
871     #if USE_OPTIMIZED_CALLS
872     *mask |= 1 << reg;
873     #endif
874     }
875    
876     static inline bool ru_get(const uae_u16 *mask, int reg)
877     {
878     #if USE_OPTIMIZED_CALLS
879     return (*mask & (1 << reg));
880     #else
881     /* Default: instruction reads & write to register */
882     return true;
883     #endif
884     }
885    
886     static inline void ru_set_read(regusage *ru, int reg)
887     {
888     ru_set(&ru->rmask, reg);
889     }
890    
891     static inline void ru_set_write(regusage *ru, int reg)
892     {
893     ru_set(&ru->wmask, reg);
894     }
895    
896     static inline bool ru_read_p(const regusage *ru, int reg)
897     {
898     return ru_get(&ru->rmask, reg);
899     }
900    
901     static inline bool ru_write_p(const regusage *ru, int reg)
902     {
903     return ru_get(&ru->wmask, reg);
904     }
905    
906     static void ru_fill_ea(regusage *ru, int reg, amodes mode,
907     wordsizes size, int write_mode)
908     {
909     switch (mode) {
910     case Areg:
911     reg += 8;
912     /* fall through */
913     case Dreg:
914     ru_set(write_mode ? &ru->wmask : &ru->rmask, reg);
915     break;
916     case Ad16:
917     /* skip displacment */
918     m68k_pc_offset += 2;
919     case Aind:
920     case Aipi:
921     case Apdi:
922     ru_set_read(ru, reg+8);
923     break;
924     case Ad8r:
925     ru_set_read(ru, reg+8);
926     /* fall through */
927     case PC8r: {
928     uae_u16 dp = comp_get_iword((m68k_pc_offset+=2)-2);
929     reg = (dp >> 12) & 15;
930     ru_set_read(ru, reg);
931     if (dp & 0x100)
932     m68k_pc_offset += (((dp & 0x30) >> 3) & 7) + ((dp & 3) * 2);
933     break;
934     }
935     case PC16:
936     case absw:
937     case imm0:
938     case imm1:
939     m68k_pc_offset += 2;
940     break;
941     case absl:
942     case imm2:
943     m68k_pc_offset += 4;
944     break;
945     case immi:
946     m68k_pc_offset += (size == sz_long) ? 4 : 2;
947     break;
948     }
949     }
950    
951     /* TODO: split into a static initialization part and a dynamic one
952     (instructions depending on extension words) */
953     static void ru_fill(regusage *ru, uae_u32 opcode)
954     {
955     m68k_pc_offset += 2;
956    
957     /* Default: no register is used or written to */
958     ru->rmask = 0;
959     ru->wmask = 0;
960    
961     uae_u32 real_opcode = cft_map(opcode);
962     struct instr *dp = &table68k[real_opcode];
963    
964     bool rw_dest = true;
965     bool handled = false;
966    
967     /* Handle some instructions specifically */
968     uae_u16 reg, ext;
969     switch (dp->mnemo) {
970     case i_BFCHG:
971     case i_BFCLR:
972     case i_BFEXTS:
973     case i_BFEXTU:
974     case i_BFFFO:
975     case i_BFINS:
976     case i_BFSET:
977     case i_BFTST:
978     ext = comp_get_iword((m68k_pc_offset+=2)-2);
979     if (ext & 0x800) ru_set_read(ru, (ext >> 6) & 7);
980     if (ext & 0x020) ru_set_read(ru, ext & 7);
981     ru_fill_ea(ru, dp->dreg, (amodes)dp->dmode, (wordsizes)dp->size, 1);
982     if (dp->dmode == Dreg)
983     ru_set_read(ru, dp->dreg);
984     switch (dp->mnemo) {
985     case i_BFEXTS:
986     case i_BFEXTU:
987     case i_BFFFO:
988     ru_set_write(ru, (ext >> 12) & 7);
989     break;
990     case i_BFINS:
991     ru_set_read(ru, (ext >> 12) & 7);
992     /* fall through */
993     case i_BFCHG:
994     case i_BFCLR:
995     case i_BSET:
996     if (dp->dmode == Dreg)
997     ru_set_write(ru, dp->dreg);
998     break;
999     }
1000     handled = true;
1001     rw_dest = false;
1002     break;
1003    
1004     case i_BTST:
1005     rw_dest = false;
1006     break;
1007    
1008     case i_CAS:
1009     {
1010     ext = comp_get_iword((m68k_pc_offset+=2)-2);
1011     int Du = ext & 7;
1012     ru_set_read(ru, Du);
1013     int Dc = (ext >> 6) & 7;
1014     ru_set_read(ru, Dc);
1015     ru_set_write(ru, Dc);
1016     break;
1017     }
1018     case i_CAS2:
1019     {
1020     int Dc1, Dc2, Du1, Du2, Rn1, Rn2;
1021     ext = comp_get_iword((m68k_pc_offset+=2)-2);
1022     Rn1 = (ext >> 12) & 15;
1023     Du1 = (ext >> 6) & 7;
1024     Dc1 = ext & 7;
1025     ru_set_read(ru, Rn1);
1026     ru_set_read(ru, Du1);
1027     ru_set_read(ru, Dc1);
1028     ru_set_write(ru, Dc1);
1029     ext = comp_get_iword((m68k_pc_offset+=2)-2);
1030     Rn2 = (ext >> 12) & 15;
1031     Du2 = (ext >> 6) & 7;
1032     Dc2 = ext & 7;
1033     ru_set_read(ru, Rn2);
1034     ru_set_read(ru, Du2);
1035     ru_set_write(ru, Dc2);
1036     break;
1037     }
1038     case i_DIVL: case i_MULL:
1039     m68k_pc_offset += 2;
1040     break;
1041     case i_LEA:
1042     case i_MOVE: case i_MOVEA: case i_MOVE16:
1043     rw_dest = false;
1044     break;
1045     case i_PACK: case i_UNPK:
1046     rw_dest = false;
1047     m68k_pc_offset += 2;
1048     break;
1049     case i_TRAPcc:
1050     m68k_pc_offset += (dp->size == sz_long) ? 4 : 2;
1051     break;
1052     case i_RTR:
1053     /* do nothing, just for coverage debugging */
1054     break;
1055     /* TODO: handle EXG instruction */
1056     }
1057    
1058     /* Handle A-Traps better */
1059     if ((real_opcode & 0xf000) == 0xa000) {
1060     handled = true;
1061     }
1062    
1063     /* Handle EmulOps better */
1064     if ((real_opcode & 0xff00) == 0x7100) {
1065     handled = true;
1066     ru->rmask = 0xffff;
1067     ru->wmask = 0;
1068     }
1069    
1070     if (dp->suse && !handled)
1071     ru_fill_ea(ru, dp->sreg, (amodes)dp->smode, (wordsizes)dp->size, 0);
1072    
1073     if (dp->duse && !handled)
1074     ru_fill_ea(ru, dp->dreg, (amodes)dp->dmode, (wordsizes)dp->size, 1);
1075    
1076     if (rw_dest)
1077     ru->rmask |= ru->wmask;
1078    
1079     handled = handled || dp->suse || dp->duse;
1080    
1081     /* Mark all registers as used/written if the instruction may trap */
1082     if (may_trap(opcode)) {
1083     handled = true;
1084     ru->rmask = 0xffff;
1085     ru->wmask = 0xffff;
1086     }
1087    
1088     if (!handled) {
1089     write_log("ru_fill: %04x = { %04x, %04x }\n",
1090     real_opcode, ru->rmask, ru->wmask);
1091     abort();
1092     }
1093     }
1094    
1095     /********************************************************************
1096 gbeauche 1.1 * register allocation per block logging *
1097     ********************************************************************/
1098    
1099     static uae_s8 vstate[VREGS];
1100     static uae_s8 vwritten[VREGS];
1101     static uae_s8 nstate[N_REGS];
1102    
1103     #define L_UNKNOWN -127
1104     #define L_UNAVAIL -1
1105     #define L_NEEDED -2
1106     #define L_UNNEEDED -3
1107    
1108     static __inline__ void big_to_small_state(bigstate * b, smallstate * s)
1109     {
1110     int i;
1111    
1112     for (i = 0; i < VREGS; i++)
1113     s->virt[i] = vstate[i];
1114     for (i = 0; i < N_REGS; i++)
1115     s->nat[i] = nstate[i];
1116     }
1117    
1118     static __inline__ int callers_need_recompile(bigstate * b, smallstate * s)
1119     {
1120     int i;
1121     int reverse = 0;
1122    
1123     for (i = 0; i < VREGS; i++) {
1124     if (vstate[i] != L_UNNEEDED && s->virt[i] == L_UNNEEDED)
1125     return 1;
1126     if (vstate[i] == L_UNNEEDED && s->virt[i] != L_UNNEEDED)
1127     reverse++;
1128     }
1129     for (i = 0; i < N_REGS; i++) {
1130     if (nstate[i] >= 0 && nstate[i] != s->nat[i])
1131     return 1;
1132     if (nstate[i] < 0 && s->nat[i] >= 0)
1133     reverse++;
1134     }
1135     if (reverse >= 2 && USE_MATCH)
1136     return 1; /* In this case, it might be worth recompiling the
1137     * callers */
1138     return 0;
1139     }
1140    
1141     static __inline__ void log_startblock(void)
1142     {
1143     int i;
1144    
1145     for (i = 0; i < VREGS; i++) {
1146     vstate[i] = L_UNKNOWN;
1147     vwritten[i] = 0;
1148     }
1149     for (i = 0; i < N_REGS; i++)
1150     nstate[i] = L_UNKNOWN;
1151     }
1152    
1153     /* Using an n-reg for a temp variable */
1154     static __inline__ void log_isused(int n)
1155     {
1156     if (nstate[n] == L_UNKNOWN)
1157     nstate[n] = L_UNAVAIL;
1158     }
1159    
1160     static __inline__ void log_visused(int r)
1161     {
1162     if (vstate[r] == L_UNKNOWN)
1163     vstate[r] = L_NEEDED;
1164     }
1165    
1166     static __inline__ void do_load_reg(int n, int r)
1167     {
1168     if (r == FLAGTMP)
1169     raw_load_flagreg(n, r);
1170     else if (r == FLAGX)
1171     raw_load_flagx(n, r);
1172     else
1173 gbeauche 1.24 raw_mov_l_rm(n, (uintptr) live.state[r].mem);
1174 gbeauche 1.1 }
1175    
1176     static __inline__ void check_load_reg(int n, int r)
1177     {
1178 gbeauche 1.24 raw_mov_l_rm(n, (uintptr) live.state[r].mem);
1179 gbeauche 1.1 }
1180    
1181     static __inline__ void log_vwrite(int r)
1182     {
1183     vwritten[r] = 1;
1184     }
1185    
1186     /* Using an n-reg to hold a v-reg */
1187     static __inline__ void log_isreg(int n, int r)
1188     {
1189     static int count = 0;
1190    
1191     if (nstate[n] == L_UNKNOWN && r < 16 && !vwritten[r] && USE_MATCH)
1192     nstate[n] = r;
1193     else {
1194     do_load_reg(n, r);
1195     if (nstate[n] == L_UNKNOWN)
1196     nstate[n] = L_UNAVAIL;
1197     }
1198     if (vstate[r] == L_UNKNOWN)
1199     vstate[r] = L_NEEDED;
1200     }
1201    
1202     static __inline__ void log_clobberreg(int r)
1203     {
1204     if (vstate[r] == L_UNKNOWN)
1205     vstate[r] = L_UNNEEDED;
1206     }
1207    
1208     /* This ends all possibility of clever register allocation */
1209    
1210     static __inline__ void log_flush(void)
1211     {
1212     int i;
1213    
1214     for (i = 0; i < VREGS; i++)
1215     if (vstate[i] == L_UNKNOWN)
1216     vstate[i] = L_NEEDED;
1217     for (i = 0; i < N_REGS; i++)
1218     if (nstate[i] == L_UNKNOWN)
1219     nstate[i] = L_UNAVAIL;
1220     }
1221    
1222     static __inline__ void log_dump(void)
1223     {
1224     int i;
1225    
1226     return;
1227    
1228     write_log("----------------------\n");
1229     for (i = 0; i < N_REGS; i++) {
1230     switch (nstate[i]) {
1231     case L_UNKNOWN:
1232     write_log("Nat %d : UNKNOWN\n", i);
1233     break;
1234     case L_UNAVAIL:
1235     write_log("Nat %d : UNAVAIL\n", i);
1236     break;
1237     default:
1238     write_log("Nat %d : %d\n", i, nstate[i]);
1239     break;
1240     }
1241     }
1242     for (i = 0; i < VREGS; i++) {
1243     if (vstate[i] == L_UNNEEDED)
1244     write_log("Virt %d: UNNEEDED\n", i);
1245     }
1246     }
1247    
1248     /********************************************************************
1249     * register status handling. EMIT TIME! *
1250     ********************************************************************/
1251    
1252     static __inline__ void set_status(int r, int status)
1253     {
1254     if (status == ISCONST)
1255     log_clobberreg(r);
1256     live.state[r].status=status;
1257     }
1258    
1259     static __inline__ int isinreg(int r)
1260     {
1261     return live.state[r].status==CLEAN || live.state[r].status==DIRTY;
1262     }
1263    
1264     static __inline__ void adjust_nreg(int r, uae_u32 val)
1265     {
1266     if (!val)
1267     return;
1268     raw_lea_l_brr(r,r,val);
1269     }
1270    
1271     static void tomem(int r)
1272     {
1273     int rr=live.state[r].realreg;
1274    
1275     if (isinreg(r)) {
1276     if (live.state[r].val && live.nat[rr].nholds==1
1277     && !live.nat[rr].locked) {
1278     // write_log("RemovingA offset %x from reg %d (%d) at %p\n",
1279     // live.state[r].val,r,rr,target);
1280     adjust_nreg(rr,live.state[r].val);
1281     live.state[r].val=0;
1282     live.state[r].dirtysize=4;
1283     set_status(r,DIRTY);
1284     }
1285     }
1286    
1287     if (live.state[r].status==DIRTY) {
1288     switch (live.state[r].dirtysize) {
1289 gbeauche 1.24 case 1: raw_mov_b_mr((uintptr)live.state[r].mem,rr); break;
1290     case 2: raw_mov_w_mr((uintptr)live.state[r].mem,rr); break;
1291     case 4: raw_mov_l_mr((uintptr)live.state[r].mem,rr); break;
1292 gbeauche 1.1 default: abort();
1293     }
1294     log_vwrite(r);
1295     set_status(r,CLEAN);
1296     live.state[r].dirtysize=0;
1297     }
1298     }
1299    
1300     static __inline__ int isconst(int r)
1301     {
1302     return live.state[r].status==ISCONST;
1303     }
1304    
1305     int is_const(int r)
1306     {
1307     return isconst(r);
1308     }
1309    
1310     static __inline__ void writeback_const(int r)
1311     {
1312     if (!isconst(r))
1313     return;
1314     Dif (live.state[r].needflush==NF_HANDLER) {
1315     write_log("Trying to write back constant NF_HANDLER!\n");
1316     abort();
1317     }
1318    
1319 gbeauche 1.24 raw_mov_l_mi((uintptr)live.state[r].mem,live.state[r].val);
1320 gbeauche 1.1 log_vwrite(r);
1321     live.state[r].val=0;
1322     set_status(r,INMEM);
1323     }
1324    
1325     static __inline__ void tomem_c(int r)
1326     {
1327     if (isconst(r)) {
1328     writeback_const(r);
1329     }
1330     else
1331     tomem(r);
1332     }
1333    
1334     static void evict(int r)
1335     {
1336     int rr;
1337    
1338     if (!isinreg(r))
1339     return;
1340     tomem(r);
1341     rr=live.state[r].realreg;
1342    
1343     Dif (live.nat[rr].locked &&
1344     live.nat[rr].nholds==1) {
1345     write_log("register %d in nreg %d is locked!\n",r,live.state[r].realreg);
1346     abort();
1347     }
1348    
1349     live.nat[rr].nholds--;
1350     if (live.nat[rr].nholds!=live.state[r].realind) { /* Was not last */
1351     int topreg=live.nat[rr].holds[live.nat[rr].nholds];
1352     int thisind=live.state[r].realind;
1353    
1354     live.nat[rr].holds[thisind]=topreg;
1355     live.state[topreg].realind=thisind;
1356     }
1357     live.state[r].realreg=-1;
1358     set_status(r,INMEM);
1359     }
1360    
1361     static __inline__ void free_nreg(int r)
1362     {
1363     int i=live.nat[r].nholds;
1364    
1365     while (i) {
1366     int vr;
1367    
1368     --i;
1369     vr=live.nat[r].holds[i];
1370     evict(vr);
1371     }
1372     Dif (live.nat[r].nholds!=0) {
1373     write_log("Failed to free nreg %d, nholds is %d\n",r,live.nat[r].nholds);
1374     abort();
1375     }
1376     }
1377    
1378     /* Use with care! */
1379     static __inline__ void isclean(int r)
1380     {
1381     if (!isinreg(r))
1382     return;
1383     live.state[r].validsize=4;
1384     live.state[r].dirtysize=0;
1385     live.state[r].val=0;
1386     set_status(r,CLEAN);
1387     }
1388    
1389     static __inline__ void disassociate(int r)
1390     {
1391     isclean(r);
1392     evict(r);
1393     }
1394    
1395     static __inline__ void set_const(int r, uae_u32 val)
1396     {
1397     disassociate(r);
1398     live.state[r].val=val;
1399     set_status(r,ISCONST);
1400     }
1401    
1402     static __inline__ uae_u32 get_offset(int r)
1403     {
1404     return live.state[r].val;
1405     }
1406    
1407     static int alloc_reg_hinted(int r, int size, int willclobber, int hint)
1408     {
1409     int bestreg;
1410     uae_s32 when;
1411     int i;
1412     uae_s32 badness=0; /* to shut up gcc */
1413     bestreg=-1;
1414     when=2000000000;
1415    
1416     for (i=N_REGS;i--;) {
1417     badness=live.nat[i].touched;
1418     if (live.nat[i].nholds==0)
1419     badness=0;
1420     if (i==hint)
1421     badness-=200000000;
1422     if (!live.nat[i].locked && badness<when) {
1423     if ((size==1 && live.nat[i].canbyte) ||
1424     (size==2 && live.nat[i].canword) ||
1425     (size==4)) {
1426     bestreg=i;
1427     when=badness;
1428     if (live.nat[i].nholds==0 && hint<0)
1429     break;
1430     if (i==hint)
1431     break;
1432     }
1433     }
1434     }
1435     Dif (bestreg==-1)
1436     abort();
1437    
1438     if (live.nat[bestreg].nholds>0) {
1439     free_nreg(bestreg);
1440     }
1441     if (isinreg(r)) {
1442     int rr=live.state[r].realreg;
1443     /* This will happen if we read a partially dirty register at a
1444     bigger size */
1445     Dif (willclobber || live.state[r].validsize>=size)
1446     abort();
1447     Dif (live.nat[rr].nholds!=1)
1448     abort();
1449     if (size==4 && live.state[r].validsize==2) {
1450     log_isused(bestreg);
1451     log_visused(r);
1452 gbeauche 1.24 raw_mov_l_rm(bestreg,(uintptr)live.state[r].mem);
1453 gbeauche 1.1 raw_bswap_32(bestreg);
1454     raw_zero_extend_16_rr(rr,rr);
1455     raw_zero_extend_16_rr(bestreg,bestreg);
1456     raw_bswap_32(bestreg);
1457     raw_lea_l_brr_indexed(rr,rr,bestreg,1,0);
1458     live.state[r].validsize=4;
1459     live.nat[rr].touched=touchcnt++;
1460     return rr;
1461     }
1462     if (live.state[r].validsize==1) {
1463     /* Nothing yet */
1464     }
1465     evict(r);
1466     }
1467    
1468     if (!willclobber) {
1469     if (live.state[r].status!=UNDEF) {
1470     if (isconst(r)) {
1471     raw_mov_l_ri(bestreg,live.state[r].val);
1472     live.state[r].val=0;
1473     live.state[r].dirtysize=4;
1474     set_status(r,DIRTY);
1475     log_isused(bestreg);
1476     }
1477     else {
1478     log_isreg(bestreg, r); /* This will also load it! */
1479     live.state[r].dirtysize=0;
1480     set_status(r,CLEAN);
1481     }
1482     }
1483     else {
1484     live.state[r].val=0;
1485     live.state[r].dirtysize=0;
1486     set_status(r,CLEAN);
1487     log_isused(bestreg);
1488     }
1489     live.state[r].validsize=4;
1490     }
1491     else { /* this is the easiest way, but not optimal. FIXME! */
1492     /* Now it's trickier, but hopefully still OK */
1493     if (!isconst(r) || size==4) {
1494     live.state[r].validsize=size;
1495     live.state[r].dirtysize=size;
1496     live.state[r].val=0;
1497     set_status(r,DIRTY);
1498     if (size == 4) {
1499     log_clobberreg(r);
1500     log_isused(bestreg);
1501     }
1502     else {
1503     log_visused(r);
1504     log_isused(bestreg);
1505     }
1506     }
1507     else {
1508     if (live.state[r].status!=UNDEF)
1509     raw_mov_l_ri(bestreg,live.state[r].val);
1510     live.state[r].val=0;
1511     live.state[r].validsize=4;
1512     live.state[r].dirtysize=4;
1513     set_status(r,DIRTY);
1514     log_isused(bestreg);
1515     }
1516     }
1517     live.state[r].realreg=bestreg;
1518     live.state[r].realind=live.nat[bestreg].nholds;
1519     live.nat[bestreg].touched=touchcnt++;
1520     live.nat[bestreg].holds[live.nat[bestreg].nholds]=r;
1521     live.nat[bestreg].nholds++;
1522    
1523     return bestreg;
1524     }
1525    
1526     static int alloc_reg(int r, int size, int willclobber)
1527     {
1528     return alloc_reg_hinted(r,size,willclobber,-1);
1529     }
1530    
1531     static void unlock2(int r)
1532     {
1533     Dif (!live.nat[r].locked)
1534     abort();
1535     live.nat[r].locked--;
1536     }
1537    
1538     static void setlock(int r)
1539     {
1540     live.nat[r].locked++;
1541     }
1542    
1543    
1544     static void mov_nregs(int d, int s)
1545     {
1546     int ns=live.nat[s].nholds;
1547     int nd=live.nat[d].nholds;
1548     int i;
1549    
1550     if (s==d)
1551     return;
1552    
1553     if (nd>0)
1554     free_nreg(d);
1555    
1556     log_isused(d);
1557     raw_mov_l_rr(d,s);
1558    
1559     for (i=0;i<live.nat[s].nholds;i++) {
1560     int vs=live.nat[s].holds[i];
1561    
1562     live.state[vs].realreg=d;
1563     live.state[vs].realind=i;
1564     live.nat[d].holds[i]=vs;
1565     }
1566     live.nat[d].nholds=live.nat[s].nholds;
1567    
1568     live.nat[s].nholds=0;
1569     }
1570    
1571    
1572     static __inline__ void make_exclusive(int r, int size, int spec)
1573     {
1574     int clobber;
1575     reg_status oldstate;
1576     int rr=live.state[r].realreg;
1577     int nr;
1578     int nind;
1579     int ndirt=0;
1580     int i;
1581    
1582     if (!isinreg(r))
1583     return;
1584     if (live.nat[rr].nholds==1)
1585     return;
1586     for (i=0;i<live.nat[rr].nholds;i++) {
1587     int vr=live.nat[rr].holds[i];
1588     if (vr!=r &&
1589     (live.state[vr].status==DIRTY || live.state[vr].val))
1590     ndirt++;
1591     }
1592     if (!ndirt && size<live.state[r].validsize && !live.nat[rr].locked) {
1593     /* Everything else is clean, so let's keep this register */
1594     for (i=0;i<live.nat[rr].nholds;i++) {
1595     int vr=live.nat[rr].holds[i];
1596     if (vr!=r) {
1597     evict(vr);
1598     i--; /* Try that index again! */
1599     }
1600     }
1601     Dif (live.nat[rr].nholds!=1) {
1602     write_log("natreg %d holds %d vregs, %d not exclusive\n",
1603     rr,live.nat[rr].nholds,r);
1604     abort();
1605     }
1606     return;
1607     }
1608    
1609     /* We have to split the register */
1610     oldstate=live.state[r];
1611    
1612     setlock(rr); /* Make sure this doesn't go away */
1613     /* Forget about r being in the register rr */
1614     disassociate(r);
1615     /* Get a new register, that we will clobber completely */
1616     if (oldstate.status==DIRTY) {
1617     /* If dirtysize is <4, we need a register that can handle the
1618     eventual smaller memory store! Thanks to Quake68k for exposing
1619     this detail ;-) */
1620     nr=alloc_reg_hinted(r,oldstate.dirtysize,1,spec);
1621     }
1622     else {
1623     nr=alloc_reg_hinted(r,4,1,spec);
1624     }
1625     nind=live.state[r].realind;
1626     live.state[r]=oldstate; /* Keep all the old state info */
1627     live.state[r].realreg=nr;
1628     live.state[r].realind=nind;
1629    
1630     if (size<live.state[r].validsize) {
1631     if (live.state[r].val) {
1632     /* Might as well compensate for the offset now */
1633     raw_lea_l_brr(nr,rr,oldstate.val);
1634     live.state[r].val=0;
1635     live.state[r].dirtysize=4;
1636     set_status(r,DIRTY);
1637     }
1638     else
1639     raw_mov_l_rr(nr,rr); /* Make another copy */
1640     }
1641     unlock2(rr);
1642     }
1643    
1644     static __inline__ void add_offset(int r, uae_u32 off)
1645     {
1646     live.state[r].val+=off;
1647     }
1648    
1649     static __inline__ void remove_offset(int r, int spec)
1650     {
1651     reg_status oldstate;
1652     int rr;
1653    
1654     if (isconst(r))
1655     return;
1656     if (live.state[r].val==0)
1657     return;
1658     if (isinreg(r) && live.state[r].validsize<4)
1659     evict(r);
1660    
1661     if (!isinreg(r))
1662     alloc_reg_hinted(r,4,0,spec);
1663    
1664     Dif (live.state[r].validsize!=4) {
1665     write_log("Validsize=%d in remove_offset\n",live.state[r].validsize);
1666     abort();
1667     }
1668     make_exclusive(r,0,-1);
1669     /* make_exclusive might have done the job already */
1670     if (live.state[r].val==0)
1671     return;
1672    
1673     rr=live.state[r].realreg;
1674    
1675     if (live.nat[rr].nholds==1) {
1676     //write_log("RemovingB offset %x from reg %d (%d) at %p\n",
1677     // live.state[r].val,r,rr,target);
1678     adjust_nreg(rr,live.state[r].val);
1679     live.state[r].dirtysize=4;
1680     live.state[r].val=0;
1681     set_status(r,DIRTY);
1682     return;
1683     }
1684     write_log("Failed in remove_offset\n");
1685     abort();
1686     }
1687    
1688     static __inline__ void remove_all_offsets(void)
1689     {
1690     int i;
1691    
1692     for (i=0;i<VREGS;i++)
1693     remove_offset(i,-1);
1694     }
1695    
1696 gbeauche 1.28 static inline void flush_reg_count(void)
1697     {
1698     #if RECORD_REGISTER_USAGE
1699     for (int r = 0; r < 16; r++)
1700     if (reg_count_local[r])
1701     ADDQim(reg_count_local[r], ((uintptr)reg_count) + (8 * r), X86_NOREG, X86_NOREG, 1);
1702     #endif
1703     }
1704    
1705     static inline void record_register(int r)
1706     {
1707     #if RECORD_REGISTER_USAGE
1708     if (r < 16)
1709     reg_count_local[r]++;
1710     #endif
1711     }
1712    
1713 gbeauche 1.1 static __inline__ int readreg_general(int r, int size, int spec, int can_offset)
1714     {
1715     int n;
1716     int answer=-1;
1717    
1718 gbeauche 1.28 record_register(r);
1719 gbeauche 1.1 if (live.state[r].status==UNDEF) {
1720     write_log("WARNING: Unexpected read of undefined register %d\n",r);
1721     }
1722     if (!can_offset)
1723     remove_offset(r,spec);
1724    
1725     if (isinreg(r) && live.state[r].validsize>=size) {
1726     n=live.state[r].realreg;
1727     switch(size) {
1728     case 1:
1729     if (live.nat[n].canbyte || spec>=0) {
1730     answer=n;
1731     }
1732     break;
1733     case 2:
1734     if (live.nat[n].canword || spec>=0) {
1735     answer=n;
1736     }
1737     break;
1738     case 4:
1739     answer=n;
1740     break;
1741     default: abort();
1742     }
1743     if (answer<0)
1744     evict(r);
1745     }
1746     /* either the value was in memory to start with, or it was evicted and
1747     is in memory now */
1748     if (answer<0) {
1749     answer=alloc_reg_hinted(r,spec>=0?4:size,0,spec);
1750     }
1751    
1752     if (spec>=0 && spec!=answer) {
1753     /* Too bad */
1754     mov_nregs(spec,answer);
1755     answer=spec;
1756     }
1757     live.nat[answer].locked++;
1758     live.nat[answer].touched=touchcnt++;
1759     return answer;
1760     }
1761    
1762    
1763    
1764     static int readreg(int r, int size)
1765     {
1766     return readreg_general(r,size,-1,0);
1767     }
1768    
1769     static int readreg_specific(int r, int size, int spec)
1770     {
1771     return readreg_general(r,size,spec,0);
1772     }
1773    
1774     static int readreg_offset(int r, int size)
1775     {
1776     return readreg_general(r,size,-1,1);
1777     }
1778    
1779     /* writereg_general(r, size, spec)
1780     *
1781     * INPUT
1782     * - r : mid-layer register
1783     * - size : requested size (1/2/4)
1784     * - spec : -1 if find or make a register free, otherwise specifies
1785     * the physical register to use in any case
1786     *
1787     * OUTPUT
1788     * - hard (physical, x86 here) register allocated to virtual register r
1789     */
1790     static __inline__ int writereg_general(int r, int size, int spec)
1791     {
1792     int n;
1793     int answer=-1;
1794    
1795 gbeauche 1.28 record_register(r);
1796 gbeauche 1.1 if (size<4) {
1797     remove_offset(r,spec);
1798     }
1799    
1800     make_exclusive(r,size,spec);
1801     if (isinreg(r)) {
1802     int nvsize=size>live.state[r].validsize?size:live.state[r].validsize;
1803     int ndsize=size>live.state[r].dirtysize?size:live.state[r].dirtysize;
1804     n=live.state[r].realreg;
1805    
1806     Dif (live.nat[n].nholds!=1)
1807     abort();
1808     switch(size) {
1809     case 1:
1810     if (live.nat[n].canbyte || spec>=0) {
1811     live.state[r].dirtysize=ndsize;
1812     live.state[r].validsize=nvsize;
1813     answer=n;
1814     }
1815     break;
1816     case 2:
1817     if (live.nat[n].canword || spec>=0) {
1818     live.state[r].dirtysize=ndsize;
1819     live.state[r].validsize=nvsize;
1820     answer=n;
1821     }
1822     break;
1823     case 4:
1824     live.state[r].dirtysize=ndsize;
1825     live.state[r].validsize=nvsize;
1826     answer=n;
1827     break;
1828     default: abort();
1829     }
1830     if (answer<0)
1831     evict(r);
1832     }
1833     /* either the value was in memory to start with, or it was evicted and
1834     is in memory now */
1835     if (answer<0) {
1836     answer=alloc_reg_hinted(r,size,1,spec);
1837     }
1838     if (spec>=0 && spec!=answer) {
1839     mov_nregs(spec,answer);
1840     answer=spec;
1841     }
1842     if (live.state[r].status==UNDEF)
1843     live.state[r].validsize=4;
1844     live.state[r].dirtysize=size>live.state[r].dirtysize?size:live.state[r].dirtysize;
1845     live.state[r].validsize=size>live.state[r].validsize?size:live.state[r].validsize;
1846    
1847     live.nat[answer].locked++;
1848     live.nat[answer].touched=touchcnt++;
1849     if (size==4) {
1850     live.state[r].val=0;
1851     }
1852     else {
1853     Dif (live.state[r].val) {
1854     write_log("Problem with val\n");
1855     abort();
1856     }
1857     }
1858     set_status(r,DIRTY);
1859     return answer;
1860     }
1861    
1862     static int writereg(int r, int size)
1863     {
1864     return writereg_general(r,size,-1);
1865     }
1866    
1867     static int writereg_specific(int r, int size, int spec)
1868     {
1869     return writereg_general(r,size,spec);
1870     }
1871    
1872     static __inline__ int rmw_general(int r, int wsize, int rsize, int spec)
1873     {
1874     int n;
1875     int answer=-1;
1876    
1877 gbeauche 1.28 record_register(r);
1878 gbeauche 1.1 if (live.state[r].status==UNDEF) {
1879     write_log("WARNING: Unexpected read of undefined register %d\n",r);
1880     }
1881     remove_offset(r,spec);
1882     make_exclusive(r,0,spec);
1883    
1884     Dif (wsize<rsize) {
1885     write_log("Cannot handle wsize<rsize in rmw_general()\n");
1886     abort();
1887     }
1888     if (isinreg(r) && live.state[r].validsize>=rsize) {
1889     n=live.state[r].realreg;
1890     Dif (live.nat[n].nholds!=1)
1891     abort();
1892    
1893     switch(rsize) {
1894     case 1:
1895     if (live.nat[n].canbyte || spec>=0) {
1896     answer=n;
1897     }
1898     break;
1899     case 2:
1900     if (live.nat[n].canword || spec>=0) {
1901     answer=n;
1902     }
1903     break;
1904     case 4:
1905     answer=n;
1906     break;
1907     default: abort();
1908     }
1909     if (answer<0)
1910     evict(r);
1911     }
1912     /* either the value was in memory to start with, or it was evicted and
1913     is in memory now */
1914     if (answer<0) {
1915     answer=alloc_reg_hinted(r,spec>=0?4:rsize,0,spec);
1916     }
1917    
1918     if (spec>=0 && spec!=answer) {
1919     /* Too bad */
1920     mov_nregs(spec,answer);
1921     answer=spec;
1922     }
1923     if (wsize>live.state[r].dirtysize)
1924     live.state[r].dirtysize=wsize;
1925     if (wsize>live.state[r].validsize)
1926     live.state[r].validsize=wsize;
1927     set_status(r,DIRTY);
1928    
1929     live.nat[answer].locked++;
1930     live.nat[answer].touched=touchcnt++;
1931    
1932     Dif (live.state[r].val) {
1933     write_log("Problem with val(rmw)\n");
1934     abort();
1935     }
1936     return answer;
1937     }
1938    
1939     static int rmw(int r, int wsize, int rsize)
1940     {
1941     return rmw_general(r,wsize,rsize,-1);
1942     }
1943    
1944     static int rmw_specific(int r, int wsize, int rsize, int spec)
1945     {
1946     return rmw_general(r,wsize,rsize,spec);
1947     }
1948    
1949    
1950     /* needed for restoring the carry flag on non-P6 cores */
1951     static void bt_l_ri_noclobber(R4 r, IMM i)
1952     {
1953     int size=4;
1954     if (i<16)
1955     size=2;
1956     r=readreg(r,size);
1957     raw_bt_l_ri(r,i);
1958     unlock2(r);
1959     }
1960    
1961     /********************************************************************
1962     * FPU register status handling. EMIT TIME! *
1963     ********************************************************************/
1964    
1965     static void f_tomem(int r)
1966     {
1967     if (live.fate[r].status==DIRTY) {
1968     #if USE_LONG_DOUBLE
1969 gbeauche 1.24 raw_fmov_ext_mr((uintptr)live.fate[r].mem,live.fate[r].realreg);
1970 gbeauche 1.1 #else
1971 gbeauche 1.24 raw_fmov_mr((uintptr)live.fate[r].mem,live.fate[r].realreg);
1972 gbeauche 1.1 #endif
1973     live.fate[r].status=CLEAN;
1974     }
1975     }
1976    
1977     static void f_tomem_drop(int r)
1978     {
1979     if (live.fate[r].status==DIRTY) {
1980     #if USE_LONG_DOUBLE
1981 gbeauche 1.24 raw_fmov_ext_mr_drop((uintptr)live.fate[r].mem,live.fate[r].realreg);
1982 gbeauche 1.1 #else
1983 gbeauche 1.24 raw_fmov_mr_drop((uintptr)live.fate[r].mem,live.fate[r].realreg);
1984 gbeauche 1.1 #endif
1985     live.fate[r].status=INMEM;
1986     }
1987     }
1988    
1989    
1990     static __inline__ int f_isinreg(int r)
1991     {
1992     return live.fate[r].status==CLEAN || live.fate[r].status==DIRTY;
1993     }
1994    
1995     static void f_evict(int r)
1996     {
1997     int rr;
1998    
1999     if (!f_isinreg(r))
2000     return;
2001     rr=live.fate[r].realreg;
2002     if (live.fat[rr].nholds==1)
2003     f_tomem_drop(r);
2004     else
2005     f_tomem(r);
2006    
2007     Dif (live.fat[rr].locked &&
2008     live.fat[rr].nholds==1) {
2009     write_log("FPU register %d in nreg %d is locked!\n",r,live.fate[r].realreg);
2010     abort();
2011     }
2012    
2013     live.fat[rr].nholds--;
2014     if (live.fat[rr].nholds!=live.fate[r].realind) { /* Was not last */
2015     int topreg=live.fat[rr].holds[live.fat[rr].nholds];
2016     int thisind=live.fate[r].realind;
2017     live.fat[rr].holds[thisind]=topreg;
2018     live.fate[topreg].realind=thisind;
2019     }
2020     live.fate[r].status=INMEM;
2021     live.fate[r].realreg=-1;
2022     }
2023    
2024     static __inline__ void f_free_nreg(int r)
2025     {
2026     int i=live.fat[r].nholds;
2027    
2028     while (i) {
2029     int vr;
2030    
2031     --i;
2032     vr=live.fat[r].holds[i];
2033     f_evict(vr);
2034     }
2035     Dif (live.fat[r].nholds!=0) {
2036     write_log("Failed to free nreg %d, nholds is %d\n",r,live.fat[r].nholds);
2037     abort();
2038     }
2039     }
2040    
2041    
2042     /* Use with care! */
2043     static __inline__ void f_isclean(int r)
2044     {
2045     if (!f_isinreg(r))
2046     return;
2047     live.fate[r].status=CLEAN;
2048     }
2049    
2050     static __inline__ void f_disassociate(int r)
2051     {
2052     f_isclean(r);
2053     f_evict(r);
2054     }
2055    
2056    
2057    
2058     static int f_alloc_reg(int r, int willclobber)
2059     {
2060     int bestreg;
2061     uae_s32 when;
2062     int i;
2063     uae_s32 badness;
2064     bestreg=-1;
2065     when=2000000000;
2066     for (i=N_FREGS;i--;) {
2067     badness=live.fat[i].touched;
2068     if (live.fat[i].nholds==0)
2069     badness=0;
2070    
2071     if (!live.fat[i].locked && badness<when) {
2072     bestreg=i;
2073     when=badness;
2074     if (live.fat[i].nholds==0)
2075     break;
2076     }
2077     }
2078     Dif (bestreg==-1)
2079     abort();
2080    
2081     if (live.fat[bestreg].nholds>0) {
2082     f_free_nreg(bestreg);
2083     }
2084     if (f_isinreg(r)) {
2085     f_evict(r);
2086     }
2087    
2088     if (!willclobber) {
2089     if (live.fate[r].status!=UNDEF) {
2090     #if USE_LONG_DOUBLE
2091 gbeauche 1.24 raw_fmov_ext_rm(bestreg,(uintptr)live.fate[r].mem);
2092 gbeauche 1.1 #else
2093 gbeauche 1.24 raw_fmov_rm(bestreg,(uintptr)live.fate[r].mem);
2094 gbeauche 1.1 #endif
2095     }
2096     live.fate[r].status=CLEAN;
2097     }
2098     else {
2099     live.fate[r].status=DIRTY;
2100     }
2101     live.fate[r].realreg=bestreg;
2102     live.fate[r].realind=live.fat[bestreg].nholds;
2103     live.fat[bestreg].touched=touchcnt++;
2104     live.fat[bestreg].holds[live.fat[bestreg].nholds]=r;
2105     live.fat[bestreg].nholds++;
2106    
2107     return bestreg;
2108     }
2109    
2110     static void f_unlock(int r)
2111     {
2112     Dif (!live.fat[r].locked)
2113     abort();
2114     live.fat[r].locked--;
2115     }
2116    
2117     static void f_setlock(int r)
2118     {
2119     live.fat[r].locked++;
2120     }
2121    
2122     static __inline__ int f_readreg(int r)
2123     {
2124     int n;
2125     int answer=-1;
2126    
2127     if (f_isinreg(r)) {
2128     n=live.fate[r].realreg;
2129     answer=n;
2130     }
2131     /* either the value was in memory to start with, or it was evicted and
2132     is in memory now */
2133     if (answer<0)
2134     answer=f_alloc_reg(r,0);
2135    
2136     live.fat[answer].locked++;
2137     live.fat[answer].touched=touchcnt++;
2138     return answer;
2139     }
2140    
2141     static __inline__ void f_make_exclusive(int r, int clobber)
2142     {
2143     freg_status oldstate;
2144     int rr=live.fate[r].realreg;
2145     int nr;
2146     int nind;
2147     int ndirt=0;
2148     int i;
2149    
2150     if (!f_isinreg(r))
2151     return;
2152     if (live.fat[rr].nholds==1)
2153     return;
2154     for (i=0;i<live.fat[rr].nholds;i++) {
2155     int vr=live.fat[rr].holds[i];
2156     if (vr!=r && live.fate[vr].status==DIRTY)
2157     ndirt++;
2158     }
2159     if (!ndirt && !live.fat[rr].locked) {
2160     /* Everything else is clean, so let's keep this register */
2161     for (i=0;i<live.fat[rr].nholds;i++) {
2162     int vr=live.fat[rr].holds[i];
2163     if (vr!=r) {
2164     f_evict(vr);
2165     i--; /* Try that index again! */
2166     }
2167     }
2168     Dif (live.fat[rr].nholds!=1) {
2169     write_log("realreg %d holds %d (",rr,live.fat[rr].nholds);
2170     for (i=0;i<live.fat[rr].nholds;i++) {
2171     write_log(" %d(%d,%d)",live.fat[rr].holds[i],
2172     live.fate[live.fat[rr].holds[i]].realreg,
2173     live.fate[live.fat[rr].holds[i]].realind);
2174     }
2175     write_log("\n");
2176     abort();
2177     }
2178     return;
2179     }
2180    
2181     /* We have to split the register */
2182     oldstate=live.fate[r];
2183    
2184     f_setlock(rr); /* Make sure this doesn't go away */
2185     /* Forget about r being in the register rr */
2186     f_disassociate(r);
2187     /* Get a new register, that we will clobber completely */
2188     nr=f_alloc_reg(r,1);
2189     nind=live.fate[r].realind;
2190     if (!clobber)
2191     raw_fmov_rr(nr,rr); /* Make another copy */
2192     live.fate[r]=oldstate; /* Keep all the old state info */
2193     live.fate[r].realreg=nr;
2194     live.fate[r].realind=nind;
2195     f_unlock(rr);
2196     }
2197    
2198    
2199     static __inline__ int f_writereg(int r)
2200     {
2201     int n;
2202     int answer=-1;
2203    
2204     f_make_exclusive(r,1);
2205     if (f_isinreg(r)) {
2206     n=live.fate[r].realreg;
2207     answer=n;
2208     }
2209     if (answer<0) {
2210     answer=f_alloc_reg(r,1);
2211     }
2212     live.fate[r].status=DIRTY;
2213     live.fat[answer].locked++;
2214     live.fat[answer].touched=touchcnt++;
2215     return answer;
2216     }
2217    
2218     static int f_rmw(int r)
2219     {
2220     int n;
2221    
2222     f_make_exclusive(r,0);
2223     if (f_isinreg(r)) {
2224     n=live.fate[r].realreg;
2225     }
2226     else
2227     n=f_alloc_reg(r,0);
2228     live.fate[r].status=DIRTY;
2229     live.fat[n].locked++;
2230     live.fat[n].touched=touchcnt++;
2231     return n;
2232     }
2233    
2234     static void fflags_into_flags_internal(uae_u32 tmp)
2235     {
2236     int r;
2237    
2238     clobber_flags();
2239     r=f_readreg(FP_RESULT);
2240     if (FFLAG_NREG_CLOBBER_CONDITION) {
2241     int tmp2=tmp;
2242     tmp=writereg_specific(tmp,4,FFLAG_NREG);
2243     raw_fflags_into_flags(r);
2244     unlock2(tmp);
2245     forget_about(tmp2);
2246     }
2247     else
2248     raw_fflags_into_flags(r);
2249     f_unlock(r);
2250 gbeauche 1.19 live_flags();
2251 gbeauche 1.1 }
2252    
2253    
2254    
2255    
2256     /********************************************************************
2257     * CPU functions exposed to gencomp. Both CREATE and EMIT time *
2258     ********************************************************************/
2259    
2260     /*
2261     * RULES FOR HANDLING REGISTERS:
2262     *
2263     * * In the function headers, order the parameters
2264     * - 1st registers written to
2265     * - 2nd read/modify/write registers
2266     * - 3rd registers read from
2267     * * Before calling raw_*, you must call readreg, writereg or rmw for
2268     * each register
2269     * * The order for this is
2270     * - 1st call remove_offset for all registers written to with size<4
2271     * - 2nd call readreg for all registers read without offset
2272     * - 3rd call rmw for all rmw registers
2273     * - 4th call readreg_offset for all registers that can handle offsets
2274     * - 5th call get_offset for all the registers from the previous step
2275     * - 6th call writereg for all written-to registers
2276     * - 7th call raw_*
2277     * - 8th unlock2 all registers that were locked
2278     */
2279    
2280     MIDFUNC(0,live_flags,(void))
2281     {
2282     live.flags_on_stack=TRASH;
2283     live.flags_in_flags=VALID;
2284     live.flags_are_important=1;
2285     }
2286     MENDFUNC(0,live_flags,(void))
2287    
2288     MIDFUNC(0,dont_care_flags,(void))
2289     {
2290     live.flags_are_important=0;
2291     }
2292     MENDFUNC(0,dont_care_flags,(void))
2293    
2294    
2295     MIDFUNC(0,duplicate_carry,(void))
2296     {
2297     evict(FLAGX);
2298     make_flags_live_internal();
2299 gbeauche 1.24 COMPCALL(setcc_m)((uintptr)live.state[FLAGX].mem,2);
2300 gbeauche 1.1 log_vwrite(FLAGX);
2301     }
2302     MENDFUNC(0,duplicate_carry,(void))
2303    
2304     MIDFUNC(0,restore_carry,(void))
2305     {
2306     if (!have_rat_stall) { /* Not a P6 core, i.e. no partial stalls */
2307     bt_l_ri_noclobber(FLAGX,0);
2308     }
2309     else { /* Avoid the stall the above creates.
2310     This is slow on non-P6, though.
2311     */
2312     COMPCALL(rol_b_ri(FLAGX,8));
2313     isclean(FLAGX);
2314     }
2315     }
2316     MENDFUNC(0,restore_carry,(void))
2317    
2318     MIDFUNC(0,start_needflags,(void))
2319     {
2320     needflags=1;
2321     }
2322     MENDFUNC(0,start_needflags,(void))
2323    
2324     MIDFUNC(0,end_needflags,(void))
2325     {
2326     needflags=0;
2327     }
2328     MENDFUNC(0,end_needflags,(void))
2329    
2330     MIDFUNC(0,make_flags_live,(void))
2331     {
2332     make_flags_live_internal();
2333     }
2334     MENDFUNC(0,make_flags_live,(void))
2335    
2336     MIDFUNC(1,fflags_into_flags,(W2 tmp))
2337     {
2338     clobber_flags();
2339     fflags_into_flags_internal(tmp);
2340     }
2341     MENDFUNC(1,fflags_into_flags,(W2 tmp))
2342    
2343    
2344     MIDFUNC(2,bt_l_ri,(R4 r, IMM i)) /* This is defined as only affecting C */
2345     {
2346     int size=4;
2347     if (i<16)
2348     size=2;
2349     CLOBBER_BT;
2350     r=readreg(r,size);
2351     raw_bt_l_ri(r,i);
2352     unlock2(r);
2353     }
2354     MENDFUNC(2,bt_l_ri,(R4 r, IMM i)) /* This is defined as only affecting C */
2355    
2356     MIDFUNC(2,bt_l_rr,(R4 r, R4 b)) /* This is defined as only affecting C */
2357     {
2358     CLOBBER_BT;
2359     r=readreg(r,4);
2360     b=readreg(b,4);
2361     raw_bt_l_rr(r,b);
2362     unlock2(r);
2363     unlock2(b);
2364     }
2365     MENDFUNC(2,bt_l_rr,(R4 r, R4 b)) /* This is defined as only affecting C */
2366    
2367     MIDFUNC(2,btc_l_ri,(RW4 r, IMM i))
2368     {
2369     int size=4;
2370     if (i<16)
2371     size=2;
2372     CLOBBER_BT;
2373     r=rmw(r,size,size);
2374     raw_btc_l_ri(r,i);
2375     unlock2(r);
2376     }
2377     MENDFUNC(2,btc_l_ri,(RW4 r, IMM i))
2378    
2379     MIDFUNC(2,btc_l_rr,(RW4 r, R4 b))
2380     {
2381     CLOBBER_BT;
2382     b=readreg(b,4);
2383     r=rmw(r,4,4);
2384     raw_btc_l_rr(r,b);
2385     unlock2(r);
2386     unlock2(b);
2387     }
2388     MENDFUNC(2,btc_l_rr,(RW4 r, R4 b))
2389    
2390    
2391     MIDFUNC(2,btr_l_ri,(RW4 r, IMM i))
2392     {
2393     int size=4;
2394     if (i<16)
2395     size=2;
2396     CLOBBER_BT;
2397     r=rmw(r,size,size);
2398     raw_btr_l_ri(r,i);
2399     unlock2(r);
2400     }
2401     MENDFUNC(2,btr_l_ri,(RW4 r, IMM i))
2402    
2403     MIDFUNC(2,btr_l_rr,(RW4 r, R4 b))
2404     {
2405     CLOBBER_BT;
2406     b=readreg(b,4);
2407     r=rmw(r,4,4);
2408     raw_btr_l_rr(r,b);
2409     unlock2(r);
2410     unlock2(b);
2411     }
2412     MENDFUNC(2,btr_l_rr,(RW4 r, R4 b))
2413    
2414    
2415     MIDFUNC(2,bts_l_ri,(RW4 r, IMM i))
2416     {
2417     int size=4;
2418     if (i<16)
2419     size=2;
2420     CLOBBER_BT;
2421     r=rmw(r,size,size);
2422     raw_bts_l_ri(r,i);
2423     unlock2(r);
2424     }
2425     MENDFUNC(2,bts_l_ri,(RW4 r, IMM i))
2426    
2427     MIDFUNC(2,bts_l_rr,(RW4 r, R4 b))
2428     {
2429     CLOBBER_BT;
2430     b=readreg(b,4);
2431     r=rmw(r,4,4);
2432     raw_bts_l_rr(r,b);
2433     unlock2(r);
2434     unlock2(b);
2435     }
2436     MENDFUNC(2,bts_l_rr,(RW4 r, R4 b))
2437    
2438     MIDFUNC(2,mov_l_rm,(W4 d, IMM s))
2439     {
2440     CLOBBER_MOV;
2441     d=writereg(d,4);
2442     raw_mov_l_rm(d,s);
2443     unlock2(d);
2444     }
2445     MENDFUNC(2,mov_l_rm,(W4 d, IMM s))
2446    
2447    
2448     MIDFUNC(1,call_r,(R4 r)) /* Clobbering is implicit */
2449     {
2450     r=readreg(r,4);
2451     raw_call_r(r);
2452     unlock2(r);
2453     }
2454     MENDFUNC(1,call_r,(R4 r)) /* Clobbering is implicit */
2455    
2456     MIDFUNC(2,sub_l_mi,(IMM d, IMM s))
2457     {
2458     CLOBBER_SUB;
2459     raw_sub_l_mi(d,s) ;
2460     }
2461     MENDFUNC(2,sub_l_mi,(IMM d, IMM s))
2462    
2463     MIDFUNC(2,mov_l_mi,(IMM d, IMM s))
2464     {
2465     CLOBBER_MOV;
2466     raw_mov_l_mi(d,s) ;
2467     }
2468     MENDFUNC(2,mov_l_mi,(IMM d, IMM s))
2469    
2470     MIDFUNC(2,mov_w_mi,(IMM d, IMM s))
2471     {
2472     CLOBBER_MOV;
2473     raw_mov_w_mi(d,s) ;
2474     }
2475     MENDFUNC(2,mov_w_mi,(IMM d, IMM s))
2476    
2477     MIDFUNC(2,mov_b_mi,(IMM d, IMM s))
2478     {
2479     CLOBBER_MOV;
2480     raw_mov_b_mi(d,s) ;
2481     }
2482     MENDFUNC(2,mov_b_mi,(IMM d, IMM s))
2483    
2484     MIDFUNC(2,rol_b_ri,(RW1 r, IMM i))
2485     {
2486     if (!i && !needflags)
2487     return;
2488     CLOBBER_ROL;
2489     r=rmw(r,1,1);
2490     raw_rol_b_ri(r,i);
2491     unlock2(r);
2492     }
2493     MENDFUNC(2,rol_b_ri,(RW1 r, IMM i))
2494    
2495     MIDFUNC(2,rol_w_ri,(RW2 r, IMM i))
2496     {
2497     if (!i && !needflags)
2498     return;
2499     CLOBBER_ROL;
2500     r=rmw(r,2,2);
2501     raw_rol_w_ri(r,i);
2502     unlock2(r);
2503     }
2504     MENDFUNC(2,rol_w_ri,(RW2 r, IMM i))
2505    
2506     MIDFUNC(2,rol_l_ri,(RW4 r, IMM i))
2507     {
2508     if (!i && !needflags)
2509     return;
2510     CLOBBER_ROL;
2511     r=rmw(r,4,4);
2512     raw_rol_l_ri(r,i);
2513     unlock2(r);
2514     }
2515     MENDFUNC(2,rol_l_ri,(RW4 r, IMM i))
2516    
2517     MIDFUNC(2,rol_l_rr,(RW4 d, R1 r))
2518     {
2519     if (isconst(r)) {
2520     COMPCALL(rol_l_ri)(d,(uae_u8)live.state[r].val);
2521     return;
2522     }
2523     CLOBBER_ROL;
2524     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2525     d=rmw(d,4,4);
2526     Dif (r!=1) {
2527     write_log("Illegal register %d in raw_rol_b\n",r);
2528     abort();
2529     }
2530     raw_rol_l_rr(d,r) ;
2531     unlock2(r);
2532     unlock2(d);
2533     }
2534     MENDFUNC(2,rol_l_rr,(RW4 d, R1 r))
2535    
2536     MIDFUNC(2,rol_w_rr,(RW2 d, R1 r))
2537     { /* Can only do this with r==1, i.e. cl */
2538    
2539     if (isconst(r)) {
2540     COMPCALL(rol_w_ri)(d,(uae_u8)live.state[r].val);
2541     return;
2542     }
2543     CLOBBER_ROL;
2544     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2545     d=rmw(d,2,2);
2546     Dif (r!=1) {
2547     write_log("Illegal register %d in raw_rol_b\n",r);
2548     abort();
2549     }
2550     raw_rol_w_rr(d,r) ;
2551     unlock2(r);
2552     unlock2(d);
2553     }
2554     MENDFUNC(2,rol_w_rr,(RW2 d, R1 r))
2555    
2556     MIDFUNC(2,rol_b_rr,(RW1 d, R1 r))
2557     { /* Can only do this with r==1, i.e. cl */
2558    
2559     if (isconst(r)) {
2560     COMPCALL(rol_b_ri)(d,(uae_u8)live.state[r].val);
2561     return;
2562     }
2563    
2564     CLOBBER_ROL;
2565     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2566     d=rmw(d,1,1);
2567     Dif (r!=1) {
2568     write_log("Illegal register %d in raw_rol_b\n",r);
2569     abort();
2570     }
2571     raw_rol_b_rr(d,r) ;
2572     unlock2(r);
2573     unlock2(d);
2574     }
2575     MENDFUNC(2,rol_b_rr,(RW1 d, R1 r))
2576    
2577    
2578     MIDFUNC(2,shll_l_rr,(RW4 d, R1 r))
2579     {
2580     if (isconst(r)) {
2581     COMPCALL(shll_l_ri)(d,(uae_u8)live.state[r].val);
2582     return;
2583     }
2584     CLOBBER_SHLL;
2585     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2586     d=rmw(d,4,4);
2587     Dif (r!=1) {
2588     write_log("Illegal register %d in raw_rol_b\n",r);
2589     abort();
2590     }
2591     raw_shll_l_rr(d,r) ;
2592     unlock2(r);
2593     unlock2(d);
2594     }
2595     MENDFUNC(2,shll_l_rr,(RW4 d, R1 r))
2596    
2597     MIDFUNC(2,shll_w_rr,(RW2 d, R1 r))
2598     { /* Can only do this with r==1, i.e. cl */
2599    
2600     if (isconst(r)) {
2601     COMPCALL(shll_w_ri)(d,(uae_u8)live.state[r].val);
2602     return;
2603     }
2604     CLOBBER_SHLL;
2605     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2606     d=rmw(d,2,2);
2607     Dif (r!=1) {
2608     write_log("Illegal register %d in raw_shll_b\n",r);
2609     abort();
2610     }
2611     raw_shll_w_rr(d,r) ;
2612     unlock2(r);
2613     unlock2(d);
2614     }
2615     MENDFUNC(2,shll_w_rr,(RW2 d, R1 r))
2616    
2617     MIDFUNC(2,shll_b_rr,(RW1 d, R1 r))
2618     { /* Can only do this with r==1, i.e. cl */
2619    
2620     if (isconst(r)) {
2621     COMPCALL(shll_b_ri)(d,(uae_u8)live.state[r].val);
2622     return;
2623     }
2624    
2625     CLOBBER_SHLL;
2626     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2627     d=rmw(d,1,1);
2628     Dif (r!=1) {
2629     write_log("Illegal register %d in raw_shll_b\n",r);
2630     abort();
2631     }
2632     raw_shll_b_rr(d,r) ;
2633     unlock2(r);
2634     unlock2(d);
2635     }
2636     MENDFUNC(2,shll_b_rr,(RW1 d, R1 r))
2637    
2638    
2639     MIDFUNC(2,ror_b_ri,(R1 r, IMM i))
2640     {
2641     if (!i && !needflags)
2642     return;
2643     CLOBBER_ROR;
2644     r=rmw(r,1,1);
2645     raw_ror_b_ri(r,i);
2646     unlock2(r);
2647     }
2648     MENDFUNC(2,ror_b_ri,(R1 r, IMM i))
2649    
2650     MIDFUNC(2,ror_w_ri,(R2 r, IMM i))
2651     {
2652     if (!i && !needflags)
2653     return;
2654     CLOBBER_ROR;
2655     r=rmw(r,2,2);
2656     raw_ror_w_ri(r,i);
2657     unlock2(r);
2658     }
2659     MENDFUNC(2,ror_w_ri,(R2 r, IMM i))
2660    
2661     MIDFUNC(2,ror_l_ri,(R4 r, IMM i))
2662     {
2663     if (!i && !needflags)
2664     return;
2665     CLOBBER_ROR;
2666     r=rmw(r,4,4);
2667     raw_ror_l_ri(r,i);
2668     unlock2(r);
2669     }
2670     MENDFUNC(2,ror_l_ri,(R4 r, IMM i))
2671    
2672     MIDFUNC(2,ror_l_rr,(R4 d, R1 r))
2673     {
2674     if (isconst(r)) {
2675     COMPCALL(ror_l_ri)(d,(uae_u8)live.state[r].val);
2676     return;
2677     }
2678     CLOBBER_ROR;
2679     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2680     d=rmw(d,4,4);
2681     raw_ror_l_rr(d,r) ;
2682     unlock2(r);
2683     unlock2(d);
2684     }
2685     MENDFUNC(2,ror_l_rr,(R4 d, R1 r))
2686    
2687     MIDFUNC(2,ror_w_rr,(R2 d, R1 r))
2688     {
2689     if (isconst(r)) {
2690     COMPCALL(ror_w_ri)(d,(uae_u8)live.state[r].val);
2691     return;
2692     }
2693     CLOBBER_ROR;
2694     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2695     d=rmw(d,2,2);
2696     raw_ror_w_rr(d,r) ;
2697     unlock2(r);
2698     unlock2(d);
2699     }
2700     MENDFUNC(2,ror_w_rr,(R2 d, R1 r))
2701    
2702     MIDFUNC(2,ror_b_rr,(R1 d, R1 r))
2703     {
2704     if (isconst(r)) {
2705     COMPCALL(ror_b_ri)(d,(uae_u8)live.state[r].val);
2706     return;
2707     }
2708    
2709     CLOBBER_ROR;
2710     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2711     d=rmw(d,1,1);
2712     raw_ror_b_rr(d,r) ;
2713     unlock2(r);
2714     unlock2(d);
2715     }
2716     MENDFUNC(2,ror_b_rr,(R1 d, R1 r))
2717    
2718     MIDFUNC(2,shrl_l_rr,(RW4 d, R1 r))
2719     {
2720     if (isconst(r)) {
2721     COMPCALL(shrl_l_ri)(d,(uae_u8)live.state[r].val);
2722     return;
2723     }
2724     CLOBBER_SHRL;
2725     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2726     d=rmw(d,4,4);
2727     Dif (r!=1) {
2728     write_log("Illegal register %d in raw_rol_b\n",r);
2729     abort();
2730     }
2731     raw_shrl_l_rr(d,r) ;
2732     unlock2(r);
2733     unlock2(d);
2734     }
2735     MENDFUNC(2,shrl_l_rr,(RW4 d, R1 r))
2736    
2737     MIDFUNC(2,shrl_w_rr,(RW2 d, R1 r))
2738     { /* Can only do this with r==1, i.e. cl */
2739    
2740     if (isconst(r)) {
2741     COMPCALL(shrl_w_ri)(d,(uae_u8)live.state[r].val);
2742     return;
2743     }
2744     CLOBBER_SHRL;
2745     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2746     d=rmw(d,2,2);
2747     Dif (r!=1) {
2748     write_log("Illegal register %d in raw_shrl_b\n",r);
2749     abort();
2750     }
2751     raw_shrl_w_rr(d,r) ;
2752     unlock2(r);
2753     unlock2(d);
2754     }
2755     MENDFUNC(2,shrl_w_rr,(RW2 d, R1 r))
2756    
2757     MIDFUNC(2,shrl_b_rr,(RW1 d, R1 r))
2758     { /* Can only do this with r==1, i.e. cl */
2759    
2760     if (isconst(r)) {
2761     COMPCALL(shrl_b_ri)(d,(uae_u8)live.state[r].val);
2762     return;
2763     }
2764    
2765     CLOBBER_SHRL;
2766     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2767     d=rmw(d,1,1);
2768     Dif (r!=1) {
2769     write_log("Illegal register %d in raw_shrl_b\n",r);
2770     abort();
2771     }
2772     raw_shrl_b_rr(d,r) ;
2773     unlock2(r);
2774     unlock2(d);
2775     }
2776     MENDFUNC(2,shrl_b_rr,(RW1 d, R1 r))
2777    
2778    
2779    
2780     MIDFUNC(2,shll_l_ri,(RW4 r, IMM i))
2781     {
2782     if (!i && !needflags)
2783     return;
2784     if (isconst(r) && !needflags) {
2785     live.state[r].val<<=i;
2786     return;
2787     }
2788     CLOBBER_SHLL;
2789     r=rmw(r,4,4);
2790     raw_shll_l_ri(r,i);
2791     unlock2(r);
2792     }
2793     MENDFUNC(2,shll_l_ri,(RW4 r, IMM i))
2794    
2795     MIDFUNC(2,shll_w_ri,(RW2 r, IMM i))
2796     {
2797     if (!i && !needflags)
2798     return;
2799     CLOBBER_SHLL;
2800     r=rmw(r,2,2);
2801     raw_shll_w_ri(r,i);
2802     unlock2(r);
2803     }
2804     MENDFUNC(2,shll_w_ri,(RW2 r, IMM i))
2805    
2806     MIDFUNC(2,shll_b_ri,(RW1 r, IMM i))
2807     {
2808     if (!i && !needflags)
2809     return;
2810     CLOBBER_SHLL;
2811     r=rmw(r,1,1);
2812     raw_shll_b_ri(r,i);
2813     unlock2(r);
2814     }
2815     MENDFUNC(2,shll_b_ri,(RW1 r, IMM i))
2816    
2817     MIDFUNC(2,shrl_l_ri,(RW4 r, IMM i))
2818     {
2819     if (!i && !needflags)
2820     return;
2821     if (isconst(r) && !needflags) {
2822     live.state[r].val>>=i;
2823     return;
2824     }
2825     CLOBBER_SHRL;
2826     r=rmw(r,4,4);
2827     raw_shrl_l_ri(r,i);
2828     unlock2(r);
2829     }
2830     MENDFUNC(2,shrl_l_ri,(RW4 r, IMM i))
2831    
2832     MIDFUNC(2,shrl_w_ri,(RW2 r, IMM i))
2833     {
2834     if (!i && !needflags)
2835     return;
2836     CLOBBER_SHRL;
2837     r=rmw(r,2,2);
2838     raw_shrl_w_ri(r,i);
2839     unlock2(r);
2840     }
2841     MENDFUNC(2,shrl_w_ri,(RW2 r, IMM i))
2842    
2843     MIDFUNC(2,shrl_b_ri,(RW1 r, IMM i))
2844     {
2845     if (!i && !needflags)
2846     return;
2847     CLOBBER_SHRL;
2848     r=rmw(r,1,1);
2849     raw_shrl_b_ri(r,i);
2850     unlock2(r);
2851     }
2852     MENDFUNC(2,shrl_b_ri,(RW1 r, IMM i))
2853    
2854     MIDFUNC(2,shra_l_ri,(RW4 r, IMM i))
2855     {
2856     if (!i && !needflags)
2857     return;
2858     CLOBBER_SHRA;
2859     r=rmw(r,4,4);
2860     raw_shra_l_ri(r,i);
2861     unlock2(r);
2862     }
2863     MENDFUNC(2,shra_l_ri,(RW4 r, IMM i))
2864    
2865     MIDFUNC(2,shra_w_ri,(RW2 r, IMM i))
2866     {
2867     if (!i && !needflags)
2868     return;
2869     CLOBBER_SHRA;
2870     r=rmw(r,2,2);
2871     raw_shra_w_ri(r,i);
2872     unlock2(r);
2873     }
2874     MENDFUNC(2,shra_w_ri,(RW2 r, IMM i))
2875    
2876     MIDFUNC(2,shra_b_ri,(RW1 r, IMM i))
2877     {
2878     if (!i && !needflags)
2879     return;
2880     CLOBBER_SHRA;
2881     r=rmw(r,1,1);
2882     raw_shra_b_ri(r,i);
2883     unlock2(r);
2884     }
2885     MENDFUNC(2,shra_b_ri,(RW1 r, IMM i))
2886    
2887     MIDFUNC(2,shra_l_rr,(RW4 d, R1 r))
2888     {
2889     if (isconst(r)) {
2890     COMPCALL(shra_l_ri)(d,(uae_u8)live.state[r].val);
2891     return;
2892     }
2893     CLOBBER_SHRA;
2894     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2895     d=rmw(d,4,4);
2896     Dif (r!=1) {
2897     write_log("Illegal register %d in raw_rol_b\n",r);
2898     abort();
2899     }
2900     raw_shra_l_rr(d,r) ;
2901     unlock2(r);
2902     unlock2(d);
2903     }
2904     MENDFUNC(2,shra_l_rr,(RW4 d, R1 r))
2905    
2906     MIDFUNC(2,shra_w_rr,(RW2 d, R1 r))
2907     { /* Can only do this with r==1, i.e. cl */
2908    
2909     if (isconst(r)) {
2910     COMPCALL(shra_w_ri)(d,(uae_u8)live.state[r].val);
2911     return;
2912     }
2913     CLOBBER_SHRA;
2914     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2915     d=rmw(d,2,2);
2916     Dif (r!=1) {
2917     write_log("Illegal register %d in raw_shra_b\n",r);
2918     abort();
2919     }
2920     raw_shra_w_rr(d,r) ;
2921     unlock2(r);
2922     unlock2(d);
2923     }
2924     MENDFUNC(2,shra_w_rr,(RW2 d, R1 r))
2925    
2926     MIDFUNC(2,shra_b_rr,(RW1 d, R1 r))
2927     { /* Can only do this with r==1, i.e. cl */
2928    
2929     if (isconst(r)) {
2930     COMPCALL(shra_b_ri)(d,(uae_u8)live.state[r].val);
2931     return;
2932     }
2933    
2934     CLOBBER_SHRA;
2935     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2936     d=rmw(d,1,1);
2937     Dif (r!=1) {
2938     write_log("Illegal register %d in raw_shra_b\n",r);
2939     abort();
2940     }
2941     raw_shra_b_rr(d,r) ;
2942     unlock2(r);
2943     unlock2(d);
2944     }
2945     MENDFUNC(2,shra_b_rr,(RW1 d, R1 r))
2946    
2947    
2948     MIDFUNC(2,setcc,(W1 d, IMM cc))
2949     {
2950     CLOBBER_SETCC;
2951     d=writereg(d,1);
2952     raw_setcc(d,cc);
2953     unlock2(d);
2954     }
2955     MENDFUNC(2,setcc,(W1 d, IMM cc))
2956    
2957     MIDFUNC(2,setcc_m,(IMM d, IMM cc))
2958     {
2959     CLOBBER_SETCC;
2960     raw_setcc_m(d,cc);
2961     }
2962     MENDFUNC(2,setcc_m,(IMM d, IMM cc))
2963    
2964     MIDFUNC(3,cmov_l_rr,(RW4 d, R4 s, IMM cc))
2965     {
2966     if (d==s)
2967     return;
2968     CLOBBER_CMOV;
2969     s=readreg(s,4);
2970     d=rmw(d,4,4);
2971     raw_cmov_l_rr(d,s,cc);
2972     unlock2(s);
2973     unlock2(d);
2974     }
2975     MENDFUNC(3,cmov_l_rr,(RW4 d, R4 s, IMM cc))
2976    
2977     MIDFUNC(3,cmov_l_rm,(RW4 d, IMM s, IMM cc))
2978     {
2979     CLOBBER_CMOV;
2980     d=rmw(d,4,4);
2981     raw_cmov_l_rm(d,s,cc);
2982     unlock2(d);
2983     }
2984     MENDFUNC(3,cmov_l_rm,(RW4 d, IMM s, IMM cc))
2985    
2986 gbeauche 1.26 MIDFUNC(2,bsf_l_rr,(W4 d, W4 s))
2987     {
2988     CLOBBER_BSF;
2989     s = readreg(s, 4);
2990     d = writereg(d, 4);
2991     raw_bsf_l_rr(d, s);
2992     unlock2(s);
2993     unlock2(d);
2994     }
2995     MENDFUNC(2,bsf_l_rr,(W4 d, W4 s))
2996    
2997     /* Set the Z flag depending on the value in s. Note that the
2998     value has to be 0 or -1 (or, more precisely, for non-zero
2999     values, bit 14 must be set)! */
3000     MIDFUNC(2,simulate_bsf,(W4 tmp, RW4 s))
3001 gbeauche 1.1 {
3002 gbeauche 1.26 CLOBBER_BSF;
3003     s=rmw_specific(s,4,4,FLAG_NREG3);
3004     tmp=writereg(tmp,4);
3005     raw_flags_set_zero(s, tmp);
3006     unlock2(tmp);
3007     unlock2(s);
3008 gbeauche 1.1 }
3009 gbeauche 1.26 MENDFUNC(2,simulate_bsf,(W4 tmp, RW4 s))
3010 gbeauche 1.1
3011     MIDFUNC(2,imul_32_32,(RW4 d, R4 s))
3012     {
3013     CLOBBER_MUL;
3014     s=readreg(s,4);
3015     d=rmw(d,4,4);
3016     raw_imul_32_32(d,s);
3017     unlock2(s);
3018     unlock2(d);
3019     }
3020     MENDFUNC(2,imul_32_32,(RW4 d, R4 s))
3021    
3022     MIDFUNC(2,imul_64_32,(RW4 d, RW4 s))
3023     {
3024     CLOBBER_MUL;
3025     s=rmw_specific(s,4,4,MUL_NREG2);
3026     d=rmw_specific(d,4,4,MUL_NREG1);
3027     raw_imul_64_32(d,s);
3028     unlock2(s);
3029     unlock2(d);
3030     }
3031     MENDFUNC(2,imul_64_32,(RW4 d, RW4 s))
3032    
3033     MIDFUNC(2,mul_64_32,(RW4 d, RW4 s))
3034     {
3035     CLOBBER_MUL;
3036     s=rmw_specific(s,4,4,MUL_NREG2);
3037     d=rmw_specific(d,4,4,MUL_NREG1);
3038     raw_mul_64_32(d,s);
3039     unlock2(s);
3040     unlock2(d);
3041     }
3042     MENDFUNC(2,mul_64_32,(RW4 d, RW4 s))
3043    
3044     MIDFUNC(2,mul_32_32,(RW4 d, R4 s))
3045     {
3046     CLOBBER_MUL;
3047     s=readreg(s,4);
3048     d=rmw(d,4,4);
3049     raw_mul_32_32(d,s);
3050     unlock2(s);
3051     unlock2(d);
3052     }
3053     MENDFUNC(2,mul_32_32,(RW4 d, R4 s))
3054    
3055 gbeauche 1.24 #if SIZEOF_VOID_P == 8
3056     MIDFUNC(2,sign_extend_32_rr,(W4 d, R2 s))
3057     {
3058     int isrmw;
3059    
3060     if (isconst(s)) {
3061     set_const(d,(uae_s32)live.state[s].val);
3062     return;
3063     }
3064    
3065     CLOBBER_SE32;
3066     isrmw=(s==d);
3067     if (!isrmw) {
3068     s=readreg(s,4);
3069     d=writereg(d,4);
3070     }
3071     else { /* If we try to lock this twice, with different sizes, we
3072     are int trouble! */
3073     s=d=rmw(s,4,4);
3074     }
3075     raw_sign_extend_32_rr(d,s);
3076     if (!isrmw) {
3077     unlock2(d);
3078     unlock2(s);
3079     }
3080     else {
3081     unlock2(s);
3082     }
3083     }
3084     MENDFUNC(2,sign_extend_32_rr,(W4 d, R2 s))
3085     #endif
3086    
3087 gbeauche 1.1 MIDFUNC(2,sign_extend_16_rr,(W4 d, R2 s))
3088     {
3089     int isrmw;
3090    
3091     if (isconst(s)) {
3092     set_const(d,(uae_s32)(uae_s16)live.state[s].val);
3093     return;
3094     }
3095    
3096     CLOBBER_SE16;
3097     isrmw=(s==d);
3098     if (!isrmw) {
3099     s=readreg(s,2);
3100     d=writereg(d,4);
3101     }
3102     else { /* If we try to lock this twice, with different sizes, we
3103     are int trouble! */
3104     s=d=rmw(s,4,2);
3105     }
3106     raw_sign_extend_16_rr(d,s);
3107     if (!isrmw) {
3108     unlock2(d);
3109     unlock2(s);
3110     }
3111     else {
3112     unlock2(s);
3113     }
3114     }
3115     MENDFUNC(2,sign_extend_16_rr,(W4 d, R2 s))
3116    
3117     MIDFUNC(2,sign_extend_8_rr,(W4 d, R1 s))
3118     {
3119     int isrmw;
3120    
3121     if (isconst(s)) {
3122     set_const(d,(uae_s32)(uae_s8)live.state[s].val);
3123     return;
3124     }
3125    
3126     isrmw=(s==d);
3127     CLOBBER_SE8;
3128     if (!isrmw) {
3129     s=readreg(s,1);
3130     d=writereg(d,4);
3131     }
3132     else { /* If we try to lock this twice, with different sizes, we
3133     are int trouble! */
3134     s=d=rmw(s,4,1);
3135     }
3136    
3137     raw_sign_extend_8_rr(d,s);
3138    
3139     if (!isrmw) {
3140     unlock2(d);
3141     unlock2(s);
3142     }
3143     else {
3144     unlock2(s);
3145     }
3146     }
3147     MENDFUNC(2,sign_extend_8_rr,(W4 d, R1 s))
3148    
3149    
3150     MIDFUNC(2,zero_extend_16_rr,(W4 d, R2 s))
3151     {
3152     int isrmw;
3153    
3154     if (isconst(s)) {
3155     set_const(d,(uae_u32)(uae_u16)live.state[s].val);
3156     return;
3157     }
3158    
3159     isrmw=(s==d);
3160     CLOBBER_ZE16;
3161     if (!isrmw) {
3162     s=readreg(s,2);
3163     d=writereg(d,4);
3164     }
3165     else { /* If we try to lock this twice, with different sizes, we
3166     are int trouble! */
3167     s=d=rmw(s,4,2);
3168     }
3169     raw_zero_extend_16_rr(d,s);
3170     if (!isrmw) {
3171     unlock2(d);
3172     unlock2(s);
3173     }
3174     else {
3175     unlock2(s);
3176     }
3177     }
3178     MENDFUNC(2,zero_extend_16_rr,(W4 d, R2 s))
3179    
3180     MIDFUNC(2,zero_extend_8_rr,(W4 d, R1 s))
3181     {
3182     int isrmw;
3183     if (isconst(s)) {
3184     set_const(d,(uae_u32)(uae_u8)live.state[s].val);
3185     return;
3186     }
3187    
3188     isrmw=(s==d);
3189     CLOBBER_ZE8;
3190     if (!isrmw) {
3191     s=readreg(s,1);
3192     d=writereg(d,4);
3193     }
3194     else { /* If we try to lock this twice, with different sizes, we
3195     are int trouble! */
3196     s=d=rmw(s,4,1);
3197     }
3198    
3199     raw_zero_extend_8_rr(d,s);
3200    
3201     if (!isrmw) {
3202     unlock2(d);
3203     unlock2(s);
3204     }
3205     else {
3206     unlock2(s);
3207     }
3208     }
3209     MENDFUNC(2,zero_extend_8_rr,(W4 d, R1 s))
3210    
3211     MIDFUNC(2,mov_b_rr,(W1 d, R1 s))
3212     {
3213     if (d==s)
3214     return;
3215     if (isconst(s)) {
3216     COMPCALL(mov_b_ri)(d,(uae_u8)live.state[s].val);
3217     return;
3218     }
3219    
3220     CLOBBER_MOV;
3221     s=readreg(s,1);
3222     d=writereg(d,1);
3223     raw_mov_b_rr(d,s);
3224     unlock2(d);
3225     unlock2(s);
3226     }
3227     MENDFUNC(2,mov_b_rr,(W1 d, R1 s))
3228    
3229     MIDFUNC(2,mov_w_rr,(W2 d, R2 s))
3230     {
3231     if (d==s)
3232     return;
3233     if (isconst(s)) {
3234     COMPCALL(mov_w_ri)(d,(uae_u16)live.state[s].val);
3235     return;
3236     }
3237    
3238     CLOBBER_MOV;
3239     s=readreg(s,2);
3240     d=writereg(d,2);
3241     raw_mov_w_rr(d,s);
3242     unlock2(d);
3243     unlock2(s);
3244     }
3245     MENDFUNC(2,mov_w_rr,(W2 d, R2 s))
3246    
3247    
3248     MIDFUNC(4,mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
3249     {
3250     CLOBBER_MOV;
3251     baser=readreg(baser,4);
3252     index=readreg(index,4);
3253     d=writereg(d,4);
3254    
3255     raw_mov_l_rrm_indexed(d,baser,index,factor);
3256     unlock2(d);
3257     unlock2(baser);
3258     unlock2(index);
3259     }
3260     MENDFUNC(4,mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
3261    
3262     MIDFUNC(4,mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
3263     {
3264     CLOBBER_MOV;
3265     baser=readreg(baser,4);
3266     index=readreg(index,4);
3267     d=writereg(d,2);
3268    
3269     raw_mov_w_rrm_indexed(d,baser,index,factor);
3270     unlock2(d);
3271     unlock2(baser);
3272     unlock2(index);
3273     }
3274     MENDFUNC(4,mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
3275    
3276     MIDFUNC(4,mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
3277     {
3278     CLOBBER_MOV;
3279     baser=readreg(baser,4);
3280     index=readreg(index,4);
3281     d=writereg(d,1);
3282    
3283     raw_mov_b_rrm_indexed(d,baser,index,factor);
3284    
3285     unlock2(d);
3286     unlock2(baser);
3287     unlock2(index);
3288     }
3289     MENDFUNC(4,mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
3290    
3291    
3292     MIDFUNC(4,mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
3293     {
3294     CLOBBER_MOV;
3295     baser=readreg(baser,4);
3296     index=readreg(index,4);
3297     s=readreg(s,4);
3298    
3299     Dif (baser==s || index==s)
3300     abort();
3301    
3302    
3303     raw_mov_l_mrr_indexed(baser,index,factor,s);
3304     unlock2(s);
3305     unlock2(baser);
3306     unlock2(index);
3307     }
3308     MENDFUNC(4,mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
3309    
3310     MIDFUNC(4,mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
3311     {
3312     CLOBBER_MOV;
3313     baser=readreg(baser,4);
3314     index=readreg(index,4);
3315     s=readreg(s,2);
3316    
3317     raw_mov_w_mrr_indexed(baser,index,factor,s);
3318     unlock2(s);
3319     unlock2(baser);
3320     unlock2(index);
3321     }
3322     MENDFUNC(4,mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
3323    
3324     MIDFUNC(4,mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
3325     {
3326     CLOBBER_MOV;
3327     s=readreg(s,1);
3328     baser=readreg(baser,4);
3329     index=readreg(index,4);
3330    
3331     raw_mov_b_mrr_indexed(baser,index,factor,s);
3332     unlock2(s);
3333     unlock2(baser);
3334     unlock2(index);
3335     }
3336     MENDFUNC(4,mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
3337    
3338    
3339     MIDFUNC(5,mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
3340     {
3341     int basereg=baser;
3342     int indexreg=index;
3343    
3344     CLOBBER_MOV;
3345     s=readreg(s,4);
3346     baser=readreg_offset(baser,4);
3347     index=readreg_offset(index,4);
3348    
3349     base+=get_offset(basereg);
3350     base+=factor*get_offset(indexreg);
3351    
3352     raw_mov_l_bmrr_indexed(base,baser,index,factor,s);
3353     unlock2(s);
3354     unlock2(baser);
3355     unlock2(index);
3356     }
3357     MENDFUNC(5,mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
3358    
3359     MIDFUNC(5,mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
3360     {
3361     int basereg=baser;
3362     int indexreg=index;
3363    
3364     CLOBBER_MOV;
3365     s=readreg(s,2);
3366     baser=readreg_offset(baser,4);
3367     index=readreg_offset(index,4);
3368    
3369     base+=get_offset(basereg);
3370     base+=factor*get_offset(indexreg);
3371    
3372     raw_mov_w_bmrr_indexed(base,baser,index,factor,s);
3373     unlock2(s);
3374     unlock2(baser);
3375     unlock2(index);
3376     }
3377     MENDFUNC(5,mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
3378    
3379     MIDFUNC(5,mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
3380     {
3381     int basereg=baser;
3382     int indexreg=index;
3383    
3384     CLOBBER_MOV;
3385     s=readreg(s,1);
3386     baser=readreg_offset(baser,4);
3387     index=readreg_offset(index,4);
3388    
3389     base+=get_offset(basereg);
3390     base+=factor*get_offset(indexreg);
3391    
3392     raw_mov_b_bmrr_indexed(base,baser,index,factor,s);
3393     unlock2(s);
3394     unlock2(baser);
3395     unlock2(index);
3396     }
3397     MENDFUNC(5,mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
3398    
3399    
3400    
3401     /* Read a long from base+baser+factor*index */
3402     MIDFUNC(5,mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
3403     {
3404     int basereg=baser;
3405     int indexreg=index;
3406    
3407     CLOBBER_MOV;
3408     baser=readreg_offset(baser,4);
3409     index=readreg_offset(index,4);
3410     base+=get_offset(basereg);
3411     base+=factor*get_offset(indexreg);
3412     d=writereg(d,4);
3413     raw_mov_l_brrm_indexed(d,base,baser,index,factor);
3414     unlock2(d);
3415     unlock2(baser);
3416     unlock2(index);
3417     }
3418     MENDFUNC(5,mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
3419    
3420    
3421     MIDFUNC(5,mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
3422     {
3423     int basereg=baser;
3424     int indexreg=index;
3425    
3426     CLOBBER_MOV;
3427     remove_offset(d,-1);
3428     baser=readreg_offset(baser,4);
3429     index=readreg_offset(index,4);
3430     base+=get_offset(basereg);
3431     base+=factor*get_offset(indexreg);
3432     d=writereg(d,2);
3433     raw_mov_w_brrm_indexed(d,base,baser,index,factor);
3434     unlock2(d);
3435     unlock2(baser);
3436     unlock2(index);
3437     }
3438     MENDFUNC(5,mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
3439    
3440    
3441     MIDFUNC(5,mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
3442     {
3443     int basereg=baser;
3444     int indexreg=index;
3445    
3446     CLOBBER_MOV;
3447     remove_offset(d,-1);
3448     baser=readreg_offset(baser,4);
3449     index=readreg_offset(index,4);
3450     base+=get_offset(basereg);
3451     base+=factor*get_offset(indexreg);
3452     d=writereg(d,1);
3453     raw_mov_b_brrm_indexed(d,base,baser,index,factor);
3454     unlock2(d);
3455     unlock2(baser);
3456     unlock2(index);
3457     }
3458     MENDFUNC(5,mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
3459    
3460     /* Read a long from base+factor*index */
3461     MIDFUNC(4,mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
3462     {
3463     int indexreg=index;
3464    
3465     if (isconst(index)) {
3466     COMPCALL(mov_l_rm)(d,base+factor*live.state[index].val);
3467     return;
3468     }
3469    
3470     CLOBBER_MOV;
3471     index=readreg_offset(index,4);
3472     base+=get_offset(indexreg)*factor;
3473     d=writereg(d,4);
3474    
3475     raw_mov_l_rm_indexed(d,base,index,factor);
3476     unlock2(index);
3477     unlock2(d);
3478     }
3479     MENDFUNC(4,mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
3480    
3481    
3482     /* read the long at the address contained in s+offset and store in d */
3483     MIDFUNC(3,mov_l_rR,(W4 d, R4 s, IMM offset))
3484     {
3485     if (isconst(s)) {
3486     COMPCALL(mov_l_rm)(d,live.state[s].val+offset);
3487     return;
3488     }
3489     CLOBBER_MOV;
3490     s=readreg(s,4);
3491     d=writereg(d,4);
3492    
3493     raw_mov_l_rR(d,s,offset);
3494     unlock2(d);
3495     unlock2(s);
3496     }
3497     MENDFUNC(3,mov_l_rR,(W4 d, R4 s, IMM offset))
3498    
3499     /* read the word at the address contained in s+offset and store in d */
3500     MIDFUNC(3,mov_w_rR,(W2 d, R4 s, IMM offset))
3501     {
3502     if (isconst(s)) {
3503     COMPCALL(mov_w_rm)(d,live.state[s].val+offset);
3504     return;
3505     }
3506     CLOBBER_MOV;
3507     s=readreg(s,4);
3508     d=writereg(d,2);
3509    
3510     raw_mov_w_rR(d,s,offset);
3511     unlock2(d);
3512     unlock2(s);
3513     }
3514     MENDFUNC(3,mov_w_rR,(W2 d, R4 s, IMM offset))
3515    
3516     /* read the word at the address contained in s+offset and store in d */
3517     MIDFUNC(3,mov_b_rR,(W1 d, R4 s, IMM offset))
3518     {
3519     if (isconst(s)) {
3520     COMPCALL(mov_b_rm)(d,live.state[s].val+offset);
3521     return;
3522     }
3523     CLOBBER_MOV;
3524     s=readreg(s,4);
3525     d=writereg(d,1);
3526    
3527     raw_mov_b_rR(d,s,offset);
3528     unlock2(d);
3529     unlock2(s);
3530     }
3531     MENDFUNC(3,mov_b_rR,(W1 d, R4 s, IMM offset))
3532    
3533     /* read the long at the address contained in s+offset and store in d */
3534     MIDFUNC(3,mov_l_brR,(W4 d, R4 s, IMM offset))
3535     {
3536     int sreg=s;
3537     if (isconst(s)) {
3538     COMPCALL(mov_l_rm)(d,live.state[s].val+offset);
3539     return;
3540     }
3541     CLOBBER_MOV;
3542     s=readreg_offset(s,4);
3543     offset+=get_offset(sreg);
3544     d=writereg(d,4);
3545    
3546     raw_mov_l_brR(d,s,offset);
3547     unlock2(d);
3548     unlock2(s);
3549     }
3550     MENDFUNC(3,mov_l_brR,(W4 d, R4 s, IMM offset))
3551    
3552     /* read the word at the address contained in s+offset and store in d */
3553     MIDFUNC(3,mov_w_brR,(W2 d, R4 s, IMM offset))
3554     {
3555     int sreg=s;
3556     if (isconst(s)) {
3557     COMPCALL(mov_w_rm)(d,live.state[s].val+offset);
3558     return;
3559     }
3560     CLOBBER_MOV;
3561     remove_offset(d,-1);
3562     s=readreg_offset(s,4);
3563     offset+=get_offset(sreg);
3564     d=writereg(d,2);
3565    
3566     raw_mov_w_brR(d,s,offset);
3567     unlock2(d);
3568     unlock2(s);
3569     }
3570     MENDFUNC(3,mov_w_brR,(W2 d, R4 s, IMM offset))
3571    
3572     /* read the word at the address contained in s+offset and store in d */
3573     MIDFUNC(3,mov_b_brR,(W1 d, R4 s, IMM offset))
3574     {
3575     int sreg=s;
3576     if (isconst(s)) {
3577     COMPCALL(mov_b_rm)(d,live.state[s].val+offset);
3578     return;
3579     }
3580     CLOBBER_MOV;
3581     remove_offset(d,-1);
3582     s=readreg_offset(s,4);
3583     offset+=get_offset(sreg);
3584     d=writereg(d,1);
3585    
3586     raw_mov_b_brR(d,s,offset);
3587     unlock2(d);
3588     unlock2(s);
3589     }
3590     MENDFUNC(3,mov_b_brR,(W1 d, R4 s, IMM offset))
3591    
3592     MIDFUNC(3,mov_l_Ri,(R4 d, IMM i, IMM offset))
3593     {
3594     int dreg=d;
3595     if (isconst(d)) {
3596     COMPCALL(mov_l_mi)(live.state[d].val+offset,i);
3597     return;
3598     }
3599    
3600     CLOBBER_MOV;
3601     d=readreg_offset(d,4);
3602     offset+=get_offset(dreg);
3603     raw_mov_l_Ri(d,i,offset);
3604     unlock2(d);
3605     }
3606     MENDFUNC(3,mov_l_Ri,(R4 d, IMM i, IMM offset))
3607    
3608     MIDFUNC(3,mov_w_Ri,(R4 d, IMM i, IMM offset))
3609     {
3610     int dreg=d;
3611     if (isconst(d)) {
3612     COMPCALL(mov_w_mi)(live.state[d].val+offset,i);
3613     return;
3614     }
3615    
3616     CLOBBER_MOV;
3617     d=readreg_offset(d,4);
3618     offset+=get_offset(dreg);
3619     raw_mov_w_Ri(d,i,offset);
3620     unlock2(d);
3621     }
3622     MENDFUNC(3,mov_w_Ri,(R4 d, IMM i, IMM offset))
3623    
3624     MIDFUNC(3,mov_b_Ri,(R4 d, IMM i, IMM offset))
3625     {
3626     int dreg=d;
3627     if (isconst(d)) {
3628     COMPCALL(mov_b_mi)(live.state[d].val+offset,i);
3629     return;
3630     }
3631    
3632     CLOBBER_MOV;
3633     d=readreg_offset(d,4);
3634     offset+=get_offset(dreg);
3635     raw_mov_b_Ri(d,i,offset);
3636     unlock2(d);
3637     }
3638     MENDFUNC(3,mov_b_Ri,(R4 d, IMM i, IMM offset))
3639    
3640     /* Warning! OFFSET is byte sized only! */
3641     MIDFUNC(3,mov_l_Rr,(R4 d, R4 s, IMM offset))
3642     {
3643     if (isconst(d)) {
3644     COMPCALL(mov_l_mr)(live.state[d].val+offset,s);
3645     return;
3646     }
3647     if (isconst(s)) {
3648     COMPCALL(mov_l_Ri)(d,live.state[s].val,offset);
3649     return;
3650     }
3651    
3652     CLOBBER_MOV;
3653     s=readreg(s,4);
3654     d=readreg(d,4);
3655    
3656     raw_mov_l_Rr(d,s,offset);
3657     unlock2(d);
3658     unlock2(s);
3659     }
3660     MENDFUNC(3,mov_l_Rr,(R4 d, R4 s, IMM offset))
3661    
3662     MIDFUNC(3,mov_w_Rr,(R4 d, R2 s, IMM offset))
3663     {
3664     if (isconst(d)) {
3665     COMPCALL(mov_w_mr)(live.state[d].val+offset,s);
3666     return;
3667     }
3668     if (isconst(s)) {
3669     COMPCALL(mov_w_Ri)(d,(uae_u16)live.state[s].val,offset);
3670     return;
3671     }
3672    
3673     CLOBBER_MOV;
3674     s=readreg(s,2);
3675     d=readreg(d,4);
3676     raw_mov_w_Rr(d,s,offset);
3677     unlock2(d);
3678     unlock2(s);
3679     }
3680     MENDFUNC(3,mov_w_Rr,(R4 d, R2 s, IMM offset))
3681    
3682     MIDFUNC(3,mov_b_Rr,(R4 d, R1 s, IMM offset))
3683     {
3684     if (isconst(d)) {
3685     COMPCALL(mov_b_mr)(live.state[d].val+offset,s);
3686     return;
3687     }
3688     if (isconst(s)) {
3689     COMPCALL(mov_b_Ri)(d,(uae_u8)live.state[s].val,offset);
3690     return;
3691     }
3692    
3693     CLOBBER_MOV;
3694     s=readreg(s,1);
3695     d=readreg(d,4);
3696     raw_mov_b_Rr(d,s,offset);
3697     unlock2(d);
3698     unlock2(s);
3699     }
3700     MENDFUNC(3,mov_b_Rr,(R4 d, R1 s, IMM offset))
3701    
3702     MIDFUNC(3,lea_l_brr,(W4 d, R4 s, IMM offset))
3703     {
3704     if (isconst(s)) {
3705     COMPCALL(mov_l_ri)(d,live.state[s].val+offset);
3706     return;
3707     }
3708     #if USE_OFFSET
3709     if (d==s) {
3710     add_offset(d,offset);
3711     return;
3712     }
3713     #endif
3714     CLOBBER_LEA;
3715     s=readreg(s,4);
3716     d=writereg(d,4);
3717     raw_lea_l_brr(d,s,offset);
3718     unlock2(d);
3719     unlock2(s);
3720     }
3721     MENDFUNC(3,lea_l_brr,(W4 d, R4 s, IMM offset))
3722    
3723     MIDFUNC(5,lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
3724     {
3725     if (!offset) {
3726     COMPCALL(lea_l_rr_indexed)(d,s,index,factor);
3727     return;
3728     }
3729     CLOBBER_LEA;
3730     s=readreg(s,4);
3731     index=readreg(index,4);
3732     d=writereg(d,4);
3733    
3734     raw_lea_l_brr_indexed(d,s,index,factor,offset);
3735     unlock2(d);
3736     unlock2(index);
3737     unlock2(s);
3738     }
3739     MENDFUNC(5,lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
3740    
3741     MIDFUNC(4,lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
3742     {
3743     CLOBBER_LEA;
3744     s=readreg(s,4);
3745     index=readreg(index,4);
3746     d=writereg(d,4);
3747    
3748     raw_lea_l_rr_indexed(d,s,index,factor);
3749     unlock2(d);
3750     unlock2(index);
3751     unlock2(s);
3752     }
3753     MENDFUNC(4,lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
3754    
3755     /* write d to the long at the address contained in s+offset */
3756     MIDFUNC(3,mov_l_bRr,(R4 d, R4 s, IMM offset))
3757     {
3758     int dreg=d;
3759     if (isconst(d)) {
3760     COMPCALL(mov_l_mr)(live.state[d].val+offset,s);
3761     return;
3762     }
3763    
3764     CLOBBER_MOV;
3765     s=readreg(s,4);
3766     d=readreg_offset(d,4);
3767     offset+=get_offset(dreg);
3768    
3769     raw_mov_l_bRr(d,s,offset);
3770     unlock2(d);
3771     unlock2(s);
3772     }
3773     MENDFUNC(3,mov_l_bRr,(R4 d, R4 s, IMM offset))
3774    
3775     /* write the word at the address contained in s+offset and store in d */
3776     MIDFUNC(3,mov_w_bRr,(R4 d, R2 s, IMM offset))
3777     {
3778     int dreg=d;
3779    
3780     if (isconst(d)) {
3781     COMPCALL(mov_w_mr)(live.state[d].val+offset,s);
3782     return;
3783     }
3784    
3785     CLOBBER_MOV;
3786     s=readreg(s,2);
3787     d=readreg_offset(d,4);
3788     offset+=get_offset(dreg);
3789     raw_mov_w_bRr(d,s,offset);
3790     unlock2(d);
3791     unlock2(s);
3792     }
3793     MENDFUNC(3,mov_w_bRr,(R4 d, R2 s, IMM offset))
3794    
3795     MIDFUNC(3,mov_b_bRr,(R4 d, R1 s, IMM offset))
3796     {
3797     int dreg=d;
3798     if (isconst(d)) {
3799     COMPCALL(mov_b_mr)(live.state[d].val+offset,s);
3800     return;
3801     }
3802    
3803     CLOBBER_MOV;
3804     s=readreg(s,1);
3805     d=readreg_offset(d,4);
3806     offset+=get_offset(dreg);
3807     raw_mov_b_bRr(d,s,offset);
3808     unlock2(d);
3809     unlock2(s);
3810     }
3811     MENDFUNC(3,mov_b_bRr,(R4 d, R1 s, IMM offset))
3812    
3813     MIDFUNC(1,bswap_32,(RW4 r))
3814     {
3815     int reg=r;
3816    
3817     if (isconst(r)) {
3818     uae_u32 oldv=live.state[r].val;
3819     live.state[r].val=reverse32(oldv);
3820     return;
3821     }
3822    
3823     CLOBBER_SW32;
3824     r=rmw(r,4,4);
3825     raw_bswap_32(r);
3826     unlock2(r);
3827     }
3828     MENDFUNC(1,bswap_32,(RW4 r))
3829    
3830     MIDFUNC(1,bswap_16,(RW2 r))
3831     {
3832     if (isconst(r)) {
3833     uae_u32 oldv=live.state[r].val;
3834     live.state[r].val=((oldv>>8)&0xff) | ((oldv<<8)&0xff00) |
3835     (oldv&0xffff0000);
3836     return;
3837     }
3838    
3839     CLOBBER_SW16;
3840     r=rmw(r,2,2);
3841    
3842     raw_bswap_16(r);
3843     unlock2(r);
3844     }
3845     MENDFUNC(1,bswap_16,(RW2 r))
3846    
3847    
3848    
3849     MIDFUNC(2,mov_l_rr,(W4 d, R4 s))
3850     {
3851     int olds;
3852    
3853     if (d==s) { /* How pointless! */
3854     return;
3855     }
3856     if (isconst(s)) {
3857     COMPCALL(mov_l_ri)(d,live.state[s].val);
3858     return;
3859     }
3860     olds=s;
3861     disassociate(d);
3862     s=readreg_offset(s,4);
3863     live.state[d].realreg=s;
3864     live.state[d].realind=live.nat[s].nholds;
3865     live.state[d].val=live.state[olds].val;
3866     live.state[d].validsize=4;
3867     live.state[d].dirtysize=4;
3868     set_status(d,DIRTY);
3869    
3870     live.nat[s].holds[live.nat[s].nholds]=d;
3871     live.nat[s].nholds++;
3872     log_clobberreg(d);
3873     /* write_log("Added %d to nreg %d(%d), now holds %d regs\n",
3874     d,s,live.state[d].realind,live.nat[s].nholds); */
3875     unlock2(s);
3876     }
3877     MENDFUNC(2,mov_l_rr,(W4 d, R4 s))
3878    
3879     MIDFUNC(2,mov_l_mr,(IMM d, R4 s))
3880     {
3881     if (isconst(s)) {
3882     COMPCALL(mov_l_mi)(d,live.state[s].val);
3883     return;
3884     }
3885     CLOBBER_MOV;
3886     s=readreg(s,4);
3887    
3888     raw_mov_l_mr(d,s);
3889     unlock2(s);
3890     }
3891     MENDFUNC(2,mov_l_mr,(IMM d, R4 s))
3892    
3893    
3894     MIDFUNC(2,mov_w_mr,(IMM d, R2 s))
3895     {
3896     if (isconst(s)) {
3897     COMPCALL(mov_w_mi)(d,(uae_u16)live.state[s].val);
3898     return;
3899     }
3900     CLOBBER_MOV;
3901     s=readreg(s,2);
3902    
3903     raw_mov_w_mr(d,s);
3904     unlock2(s);
3905     }
3906     MENDFUNC(2,mov_w_mr,(IMM d, R2 s))
3907    
3908     MIDFUNC(2,mov_w_rm,(W2 d, IMM s))
3909     {
3910     CLOBBER_MOV;
3911     d=writereg(d,2);
3912    
3913     raw_mov_w_rm(d,s);
3914     unlock2(d);
3915     }
3916     MENDFUNC(2,mov_w_rm,(W2 d, IMM s))
3917    
3918     MIDFUNC(2,mov_b_mr,(IMM d, R1 s))
3919     {
3920     if (isconst(s)) {
3921     COMPCALL(mov_b_mi)(d,(uae_u8)live.state[s].val);
3922     return;
3923     }
3924    
3925     CLOBBER_MOV;
3926     s=readreg(s,1);
3927    
3928     raw_mov_b_mr(d,s);
3929     unlock2(s);
3930     }
3931     MENDFUNC(2,mov_b_mr,(IMM d, R1 s))
3932    
3933     MIDFUNC(2,mov_b_rm,(W1 d, IMM s))
3934     {
3935     CLOBBER_MOV;
3936     d=writereg(d,1);
3937    
3938     raw_mov_b_rm(d,s);
3939     unlock2(d);
3940     }
3941     MENDFUNC(2,mov_b_rm,(W1 d, IMM s))
3942    
3943     MIDFUNC(2,mov_l_ri,(W4 d, IMM s))
3944     {
3945     set_const(d,s);
3946     return;
3947     }
3948     MENDFUNC(2,mov_l_ri,(W4 d, IMM s))
3949    
3950     MIDFUNC(2,mov_w_ri,(W2 d, IMM s))
3951     {
3952     CLOBBER_MOV;
3953     d=writereg(d,2);
3954    
3955     raw_mov_w_ri(d,s);
3956     unlock2(d);
3957     }
3958     MENDFUNC(2,mov_w_ri,(W2 d, IMM s))
3959    
3960     MIDFUNC(2,mov_b_ri,(W1 d, IMM s))
3961     {
3962     CLOBBER_MOV;
3963     d=writereg(d,1);
3964    
3965     raw_mov_b_ri(d,s);
3966     unlock2(d);
3967     }
3968     MENDFUNC(2,mov_b_ri,(W1 d, IMM s))
3969    
3970    
3971     MIDFUNC(2,add_l_mi,(IMM d, IMM s))
3972     {
3973     CLOBBER_ADD;
3974     raw_add_l_mi(d,s) ;
3975     }
3976     MENDFUNC(2,add_l_mi,(IMM d, IMM s))
3977    
3978     MIDFUNC(2,add_w_mi,(IMM d, IMM s))
3979     {
3980     CLOBBER_ADD;
3981     raw_add_w_mi(d,s) ;
3982     }
3983     MENDFUNC(2,add_w_mi,(IMM d, IMM s))
3984    
3985     MIDFUNC(2,add_b_mi,(IMM d, IMM s))
3986     {
3987     CLOBBER_ADD;
3988     raw_add_b_mi(d,s) ;
3989     }
3990     MENDFUNC(2,add_b_mi,(IMM d, IMM s))
3991    
3992    
3993     MIDFUNC(2,test_l_ri,(R4 d, IMM i))
3994     {
3995     CLOBBER_TEST;
3996     d=readreg(d,4);
3997    
3998     raw_test_l_ri(d,i);
3999     unlock2(d);
4000     }
4001     MENDFUNC(2,test_l_ri,(R4 d, IMM i))
4002    
4003     MIDFUNC(2,test_l_rr,(R4 d, R4 s))
4004     {
4005     CLOBBER_TEST;
4006     d=readreg(d,4);
4007     s=readreg(s,4);
4008    
4009     raw_test_l_rr(d,s);;
4010     unlock2(d);
4011     unlock2(s);
4012     }
4013     MENDFUNC(2,test_l_rr,(R4 d, R4 s))
4014    
4015     MIDFUNC(2,test_w_rr,(R2 d, R2 s))
4016     {
4017     CLOBBER_TEST;
4018     d=readreg(d,2);
4019     s=readreg(s,2);
4020    
4021     raw_test_w_rr(d,s);
4022     unlock2(d);
4023     unlock2(s);
4024     }
4025     MENDFUNC(2,test_w_rr,(R2 d, R2 s))
4026    
4027     MIDFUNC(2,test_b_rr,(R1 d, R1 s))
4028     {
4029     CLOBBER_TEST;
4030     d=readreg(d,1);
4031     s=readreg(s,1);
4032    
4033     raw_test_b_rr(d,s);
4034     unlock2(d);
4035     unlock2(s);
4036     }
4037     MENDFUNC(2,test_b_rr,(R1 d, R1 s))
4038    
4039    
4040     MIDFUNC(2,and_l_ri,(RW4 d, IMM i))
4041     {
4042     if (isconst(d) && !needflags) {
4043     live.state[d].val &= i;
4044     return;
4045     }
4046    
4047     CLOBBER_AND;
4048     d=rmw(d,4,4);
4049    
4050     raw_and_l_ri(d,i);
4051     unlock2(d);
4052     }
4053     MENDFUNC(2,and_l_ri,(RW4 d, IMM i))
4054    
4055     MIDFUNC(2,and_l,(RW4 d, R4 s))
4056     {
4057     CLOBBER_AND;
4058     s=readreg(s,4);
4059     d=rmw(d,4,4);
4060    
4061     raw_and_l(d,s);
4062     unlock2(d);
4063     unlock2(s);
4064     }
4065     MENDFUNC(2,and_l,(RW4 d, R4 s))
4066    
4067     MIDFUNC(2,and_w,(RW2 d, R2 s))
4068     {
4069     CLOBBER_AND;
4070     s=readreg(s,2);
4071     d=rmw(d,2,2);
4072    
4073     raw_and_w(d,s);
4074     unlock2(d);
4075     unlock2(s);
4076     }
4077     MENDFUNC(2,and_w,(RW2 d, R2 s))
4078    
4079     MIDFUNC(2,and_b,(RW1 d, R1 s))
4080     {
4081     CLOBBER_AND;
4082     s=readreg(s,1);
4083     d=rmw(d,1,1);
4084    
4085     raw_and_b(d,s);
4086     unlock2(d);
4087     unlock2(s);
4088     }
4089     MENDFUNC(2,and_b,(RW1 d, R1 s))
4090    
4091     // gb-- used for making an fpcr value in compemu_fpp.cpp
4092     MIDFUNC(2,or_l_rm,(RW4 d, IMM s))
4093     {
4094     CLOBBER_OR;
4095     d=rmw(d,4,4);
4096    
4097     raw_or_l_rm(d,s);
4098     unlock2(d);
4099     }
4100     MENDFUNC(2,or_l_rm,(RW4 d, IMM s))
4101    
4102     MIDFUNC(2,or_l_ri,(RW4 d, IMM i))
4103     {
4104     if (isconst(d) && !needflags) {
4105     live.state[d].val|=i;
4106     return;
4107     }
4108     CLOBBER_OR;
4109     d=rmw(d,4,4);
4110    
4111     raw_or_l_ri(d,i);
4112     unlock2(d);
4113     }
4114     MENDFUNC(2,or_l_ri,(RW4 d, IMM i))
4115    
4116     MIDFUNC(2,or_l,(RW4 d, R4 s))
4117     {
4118     if (isconst(d) && isconst(s) && !needflags) {
4119     live.state[d].val|=live.state[s].val;
4120     return;
4121     }
4122     CLOBBER_OR;
4123     s=readreg(s,4);
4124     d=rmw(d,4,4);
4125    
4126     raw_or_l(d,s);
4127     unlock2(d);
4128     unlock2(s);
4129     }
4130     MENDFUNC(2,or_l,(RW4 d, R4 s))
4131    
4132     MIDFUNC(2,or_w,(RW2 d, R2 s))
4133     {
4134     CLOBBER_OR;
4135     s=readreg(s,2);
4136     d=rmw(d,2,2);
4137    
4138     raw_or_w(d,s);
4139     unlock2(d);
4140     unlock2(s);
4141     }
4142     MENDFUNC(2,or_w,(RW2 d, R2 s))
4143    
4144     MIDFUNC(2,or_b,(RW1 d, R1 s))
4145     {
4146     CLOBBER_OR;
4147     s=readreg(s,1);
4148     d=rmw(d,1,1);
4149    
4150     raw_or_b(d,s);
4151     unlock2(d);
4152     unlock2(s);
4153     }
4154     MENDFUNC(2,or_b,(RW1 d, R1 s))
4155    
4156     MIDFUNC(2,adc_l,(RW4 d, R4 s))
4157     {
4158     CLOBBER_ADC;
4159     s=readreg(s,4);
4160     d=rmw(d,4,4);
4161    
4162     raw_adc_l(d,s);
4163    
4164     unlock2(d);
4165     unlock2(s);
4166     }
4167     MENDFUNC(2,adc_l,(RW4 d, R4 s))
4168    
4169     MIDFUNC(2,adc_w,(RW2 d, R2 s))
4170     {
4171     CLOBBER_ADC;
4172     s=readreg(s,2);
4173     d=rmw(d,2,2);
4174    
4175     raw_adc_w(d,s);
4176     unlock2(d);
4177     unlock2(s);
4178     }
4179     MENDFUNC(2,adc_w,(RW2 d, R2 s))
4180    
4181     MIDFUNC(2,adc_b,(RW1 d, R1 s))
4182     {
4183     CLOBBER_ADC;
4184     s=readreg(s,1);
4185     d=rmw(d,1,1);
4186    
4187     raw_adc_b(d,s);
4188     unlock2(d);
4189     unlock2(s);
4190     }
4191     MENDFUNC(2,adc_b,(RW1 d, R1 s))
4192    
4193     MIDFUNC(2,add_l,(RW4 d, R4 s))
4194     {
4195     if (isconst(s)) {
4196     COMPCALL(add_l_ri)(d,live.state[s].val);
4197     return;
4198     }
4199    
4200     CLOBBER_ADD;
4201     s=readreg(s,4);
4202     d=rmw(d,4,4);
4203    
4204     raw_add_l(d,s);
4205    
4206     unlock2(d);
4207     unlock2(s);
4208     }
4209     MENDFUNC(2,add_l,(RW4 d, R4 s))
4210    
4211     MIDFUNC(2,add_w,(RW2 d, R2 s))
4212     {
4213     if (isconst(s)) {
4214     COMPCALL(add_w_ri)(d,(uae_u16)live.state[s].val);
4215     return;
4216     }
4217    
4218     CLOBBER_ADD;
4219     s=readreg(s,2);
4220     d=rmw(d,2,2);
4221    
4222     raw_add_w(d,s);
4223     unlock2(d);
4224     unlock2(s);
4225     }
4226     MENDFUNC(2,add_w,(RW2 d, R2 s))
4227    
4228     MIDFUNC(2,add_b,(RW1 d, R1 s))
4229     {
4230     if (isconst(s)) {
4231     COMPCALL(add_b_ri)(d,(uae_u8)live.state[s].val);
4232     return;
4233     }
4234    
4235     CLOBBER_ADD;
4236     s=readreg(s,1);
4237     d=rmw(d,1,1);
4238    
4239     raw_add_b(d,s);
4240     unlock2(d);
4241     unlock2(s);
4242     }
4243     MENDFUNC(2,add_b,(RW1 d, R1 s))
4244    
4245     MIDFUNC(2,sub_l_ri,(RW4 d, IMM i))
4246     {
4247     if (!i && !needflags)
4248     return;
4249     if (isconst(d) && !needflags) {
4250     live.state[d].val-=i;
4251     return;
4252     }
4253     #if USE_OFFSET
4254     if (!needflags) {
4255     add_offset(d,-i);
4256     return;
4257     }
4258     #endif
4259    
4260     CLOBBER_SUB;
4261     d=rmw(d,4,4);
4262    
4263     raw_sub_l_ri(d,i);
4264     unlock2(d);
4265     }
4266     MENDFUNC(2,sub_l_ri,(RW4 d, IMM i))
4267    
4268     MIDFUNC(2,sub_w_ri,(RW2 d, IMM i))
4269     {
4270     if (!i && !needflags)
4271     return;
4272    
4273     CLOBBER_SUB;
4274     d=rmw(d,2,2);
4275    
4276     raw_sub_w_ri(d,i);
4277     unlock2(d);
4278     }
4279     MENDFUNC(2,sub_w_ri,(RW2 d, IMM i))
4280    
4281     MIDFUNC(2,sub_b_ri,(RW1 d, IMM i))
4282     {
4283     if (!i && !needflags)
4284     return;
4285    
4286     CLOBBER_SUB;
4287     d=rmw(d,1,1);
4288    
4289     raw_sub_b_ri(d,i);
4290    
4291     unlock2(d);
4292     }
4293     MENDFUNC(2,sub_b_ri,(RW1 d, IMM i))
4294    
4295     MIDFUNC(2,add_l_ri,(RW4 d, IMM i))
4296     {
4297     if (!i && !needflags)
4298     return;
4299     if (isconst(d) && !needflags) {
4300     live.state[d].val+=i;
4301     return;
4302     }
4303     #if USE_OFFSET
4304     if (!needflags) {
4305     add_offset(d,i);
4306     return;
4307     }
4308     #endif
4309     CLOBBER_ADD;
4310     d=rmw(d,4,4);
4311     raw_add_l_ri(d,i);
4312     unlock2(d);
4313     }
4314     MENDFUNC(2,add_l_ri,(RW4 d, IMM i))
4315    
4316     MIDFUNC(2,add_w_ri,(RW2 d, IMM i))
4317     {
4318     if (!i && !needflags)
4319     return;
4320    
4321     CLOBBER_ADD;
4322     d=rmw(d,2,2);
4323    
4324     raw_add_w_ri(d,i);
4325     unlock2(d);
4326     }
4327     MENDFUNC(2,add_w_ri,(RW2 d, IMM i))
4328    
4329     MIDFUNC(2,add_b_ri,(RW1 d, IMM i))
4330     {
4331     if (!i && !needflags)
4332     return;
4333    
4334     CLOBBER_ADD;
4335     d=rmw(d,1,1);
4336    
4337     raw_add_b_ri(d,i);
4338    
4339     unlock2(d);
4340     }
4341     MENDFUNC(2,add_b_ri,(RW1 d, IMM i))
4342    
4343     MIDFUNC(2,sbb_l,(RW4 d, R4 s))
4344     {
4345     CLOBBER_SBB;
4346     s=readreg(s,4);
4347     d=rmw(d,4,4);
4348    
4349     raw_sbb_l(d,s);
4350     unlock2(d);
4351     unlock2(s);
4352     }
4353     MENDFUNC(2,sbb_l,(RW4 d, R4 s))
4354    
4355     MIDFUNC(2,sbb_w,(RW2 d, R2 s))
4356     {
4357     CLOBBER_SBB;
4358     s=readreg(s,2);
4359     d=rmw(d,2,2);
4360    
4361     raw_sbb_w(d,s);
4362     unlock2(d);
4363     unlock2(s);
4364     }
4365     MENDFUNC(2,sbb_w,(RW2 d, R2 s))
4366    
4367     MIDFUNC(2,sbb_b,(RW1 d, R1 s))
4368     {
4369     CLOBBER_SBB;
4370     s=readreg(s,1);
4371     d=rmw(d,1,1);
4372    
4373     raw_sbb_b(d,s);
4374     unlock2(d);
4375     unlock2(s);
4376     }
4377     MENDFUNC(2,sbb_b,(RW1 d, R1 s))
4378    
4379     MIDFUNC(2,sub_l,(RW4 d, R4 s))
4380     {
4381     if (isconst(s)) {
4382     COMPCALL(sub_l_ri)(d,live.state[s].val);
4383     return;
4384     }
4385    
4386     CLOBBER_SUB;
4387     s=readreg(s,4);
4388     d=rmw(d,4,4);
4389    
4390     raw_sub_l(d,s);
4391     unlock2(d);
4392     unlock2(s);
4393     }
4394     MENDFUNC(2,sub_l,(RW4 d, R4 s))
4395    
4396     MIDFUNC(2,sub_w,(RW2 d, R2 s))
4397     {
4398     if (isconst(s)) {
4399     COMPCALL(sub_w_ri)(d,(uae_u16)live.state[s].val);
4400     return;
4401     }
4402    
4403     CLOBBER_SUB;
4404     s=readreg(s,2);
4405     d=rmw(d,2,2);
4406    
4407     raw_sub_w(d,s);
4408     unlock2(d);
4409     unlock2(s);
4410     }
4411     MENDFUNC(2,sub_w,(RW2 d, R2 s))
4412    
4413     MIDFUNC(2,sub_b,(RW1 d, R1 s))
4414     {
4415     if (isconst(s)) {
4416     COMPCALL(sub_b_ri)(d,(uae_u8)live.state[s].val);
4417     return;
4418     }
4419    
4420     CLOBBER_SUB;
4421     s=readreg(s,1);
4422     d=rmw(d,1,1);
4423    
4424     raw_sub_b(d,s);
4425     unlock2(d);
4426     unlock2(s);
4427     }
4428     MENDFUNC(2,sub_b,(RW1 d, R1 s))
4429    
4430     MIDFUNC(2,cmp_l,(R4 d, R4 s))
4431     {
4432     CLOBBER_CMP;
4433     s=readreg(s,4);
4434     d=readreg(d,4);
4435    
4436     raw_cmp_l(d,s);
4437     unlock2(d);
4438     unlock2(s);
4439     }
4440     MENDFUNC(2,cmp_l,(R4 d, R4 s))
4441    
4442     MIDFUNC(2,cmp_l_ri,(R4 r, IMM i))
4443     {
4444     CLOBBER_CMP;
4445     r=readreg(r,4);
4446    
4447     raw_cmp_l_ri(r,i);
4448     unlock2(r);
4449     }
4450     MENDFUNC(2,cmp_l_ri,(R4 r, IMM i))
4451    
4452     MIDFUNC(2,cmp_w,(R2 d, R2 s))
4453     {
4454     CLOBBER_CMP;
4455     s=readreg(s,2);
4456     d=readreg(d,2);
4457    
4458     raw_cmp_w(d,s);
4459     unlock2(d);
4460     unlock2(s);
4461     }
4462     MENDFUNC(2,cmp_w,(R2 d, R2 s))
4463    
4464     MIDFUNC(2,cmp_b,(R1 d, R1 s))
4465     {
4466     CLOBBER_CMP;
4467     s=readreg(s,1);
4468     d=readreg(d,1);
4469    
4470     raw_cmp_b(d,s);
4471     unlock2(d);
4472     unlock2(s);
4473     }
4474     MENDFUNC(2,cmp_b,(R1 d, R1 s))
4475    
4476    
4477     MIDFUNC(2,xor_l,(RW4 d, R4 s))
4478     {
4479     CLOBBER_XOR;
4480     s=readreg(s,4);
4481     d=rmw(d,4,4);
4482    
4483     raw_xor_l(d,s);
4484     unlock2(d);
4485     unlock2(s);
4486     }
4487     MENDFUNC(2,xor_l,(RW4 d, R4 s))
4488    
4489     MIDFUNC(2,xor_w,(RW2 d, R2 s))
4490     {
4491     CLOBBER_XOR;
4492     s=readreg(s,2);
4493     d=rmw(d,2,2);
4494    
4495     raw_xor_w(d,s);
4496     unlock2(d);
4497     unlock2(s);
4498     }
4499     MENDFUNC(2,xor_w,(RW2 d, R2 s))
4500    
4501     MIDFUNC(2,xor_b,(RW1 d, R1 s))
4502     {
4503     CLOBBER_XOR;
4504     s=readreg(s,1);
4505     d=rmw(d,1,1);
4506    
4507     raw_xor_b(d,s);
4508     unlock2(d);
4509     unlock2(s);
4510     }
4511     MENDFUNC(2,xor_b,(RW1 d, R1 s))
4512    
4513     MIDFUNC(5,call_r_11,(W4 out1, R4 r, R4 in1, IMM osize, IMM isize))
4514     {
4515     clobber_flags();
4516     remove_all_offsets();
4517     if (osize==4) {
4518     if (out1!=in1 && out1!=r) {
4519     COMPCALL(forget_about)(out1);
4520     }
4521     }
4522     else {
4523     tomem_c(out1);
4524     }
4525    
4526     in1=readreg_specific(in1,isize,REG_PAR1);
4527     r=readreg(r,4);
4528     prepare_for_call_1(); /* This should ensure that there won't be
4529     any need for swapping nregs in prepare_for_call_2
4530     */
4531     #if USE_NORMAL_CALLING_CONVENTION
4532     raw_push_l_r(in1);
4533     #endif
4534     unlock2(in1);
4535     unlock2(r);
4536    
4537     prepare_for_call_2();
4538     raw_call_r(r);
4539    
4540     #if USE_NORMAL_CALLING_CONVENTION
4541     raw_inc_sp(4);
4542     #endif
4543    
4544    
4545     live.nat[REG_RESULT].holds[0]=out1;
4546     live.nat[REG_RESULT].nholds=1;
4547     live.nat[REG_RESULT].touched=touchcnt++;
4548    
4549     live.state[out1].realreg=REG_RESULT;
4550     live.state[out1].realind=0;
4551     live.state[out1].val=0;
4552     live.state[out1].validsize=osize;
4553     live.state[out1].dirtysize=osize;
4554     set_status(out1,DIRTY);
4555     }
4556     MENDFUNC(5,call_r_11,(W4 out1, R4 r, R4 in1, IMM osize, IMM isize))
4557    
4558     MIDFUNC(5,call_r_02,(R4 r, R4 in1, R4 in2, IMM isize1, IMM isize2))
4559     {
4560     clobber_flags();
4561     remove_all_offsets();
4562     in1=readreg_specific(in1,isize1,REG_PAR1);
4563     in2=readreg_specific(in2,isize2,REG_PAR2);
4564     r=readreg(r,4);
4565     prepare_for_call_1(); /* This should ensure that there won't be
4566     any need for swapping nregs in prepare_for_call_2
4567     */
4568     #if USE_NORMAL_CALLING_CONVENTION
4569     raw_push_l_r(in2);
4570     raw_push_l_r(in1);
4571     #endif
4572     unlock2(r);
4573     unlock2(in1);
4574     unlock2(in2);
4575     prepare_for_call_2();
4576     raw_call_r(r);
4577     #if USE_NORMAL_CALLING_CONVENTION
4578     raw_inc_sp(8);
4579     #endif
4580     }
4581     MENDFUNC(5,call_r_02,(R4 r, R4 in1, R4 in2, IMM isize1, IMM isize2))
4582    
4583     /* forget_about() takes a mid-layer register */
4584     MIDFUNC(1,forget_about,(W4 r))
4585     {
4586     if (isinreg(r))
4587     disassociate(r);
4588     live.state[r].val=0;
4589     set_status(r,UNDEF);
4590     }
4591     MENDFUNC(1,forget_about,(W4 r))
4592    
4593     MIDFUNC(0,nop,(void))
4594     {
4595     raw_nop();
4596     }
4597     MENDFUNC(0,nop,(void))
4598    
4599    
4600     MIDFUNC(1,f_forget_about,(FW r))
4601     {
4602     if (f_isinreg(r))
4603     f_disassociate(r);
4604     live.fate[r].status=UNDEF;
4605     }
4606     MENDFUNC(1,f_forget_about,(FW r))
4607    
4608     MIDFUNC(1,fmov_pi,(FW r))
4609     {
4610     r=f_writereg(r);
4611     raw_fmov_pi(r);
4612     f_unlock(r);
4613     }
4614     MENDFUNC(1,fmov_pi,(FW r))
4615    
4616     MIDFUNC(1,fmov_log10_2,(FW r))
4617     {
4618     r=f_writereg(r);
4619     raw_fmov_log10_2(r);
4620     f_unlock(r);
4621     }
4622     MENDFUNC(1,fmov_log10_2,(FW r))
4623    
4624     MIDFUNC(1,fmov_log2_e,(FW r))
4625     {
4626     r=f_writereg(r);
4627     raw_fmov_log2_e(r);
4628     f_unlock(r);
4629     }
4630     MENDFUNC(1,fmov_log2_e,(FW r))
4631    
4632     MIDFUNC(1,fmov_loge_2,(FW r))
4633     {
4634     r=f_writereg(r);
4635     raw_fmov_loge_2(r);
4636     f_unlock(r);
4637     }
4638     MENDFUNC(1,fmov_loge_2,(FW r))
4639    
4640     MIDFUNC(1,fmov_1,(FW r))
4641     {
4642     r=f_writereg(r);
4643     raw_fmov_1(r);
4644     f_unlock(r);
4645     }
4646     MENDFUNC(1,fmov_1,(FW r))
4647    
4648     MIDFUNC(1,fmov_0,(FW r))
4649     {
4650     r=f_writereg(r);
4651     raw_fmov_0(r);
4652     f_unlock(r);
4653     }
4654     MENDFUNC(1,fmov_0,(FW r))
4655    
4656     MIDFUNC(2,fmov_rm,(FW r, MEMR m))
4657     {
4658     r=f_writereg(r);
4659     raw_fmov_rm(r,m);
4660     f_unlock(r);
4661     }
4662     MENDFUNC(2,fmov_rm,(FW r, MEMR m))
4663    
4664     MIDFUNC(2,fmovi_rm,(FW r, MEMR m))
4665     {
4666     r=f_writereg(r);
4667     raw_fmovi_rm(r,m);
4668     f_unlock(r);
4669     }
4670     MENDFUNC(2,fmovi_rm,(FW r, MEMR m))
4671    
4672     MIDFUNC(2,fmovi_mr,(MEMW m, FR r))
4673     {
4674     r=f_readreg(r);
4675     raw_fmovi_mr(m,r);
4676     f_unlock(r);
4677     }
4678     MENDFUNC(2,fmovi_mr,(MEMW m, FR r))
4679    
4680     MIDFUNC(2,fmovs_rm,(FW r, MEMR m))
4681     {
4682     r=f_writereg(r);
4683     raw_fmovs_rm(r,m);
4684     f_unlock(r);
4685     }
4686     MENDFUNC(2,fmovs_rm,(FW r, MEMR m))
4687    
4688     MIDFUNC(2,fmovs_mr,(MEMW m, FR r))
4689     {
4690     r=f_readreg(r);
4691     raw_fmovs_mr(m,r);
4692     f_unlock(r);
4693     }
4694     MENDFUNC(2,fmovs_mr,(MEMW m, FR r))
4695    
4696     MIDFUNC(2,fmov_ext_mr,(MEMW m, FR r))
4697     {
4698     r=f_readreg(r);
4699     raw_fmov_ext_mr(m,r);
4700     f_unlock(r);
4701     }
4702     MENDFUNC(2,fmov_ext_mr,(MEMW m, FR r))
4703    
4704     MIDFUNC(2,fmov_mr,(MEMW m, FR r))
4705     {
4706     r=f_readreg(r);
4707     raw_fmov_mr(m,r);
4708     f_unlock(r);
4709     }
4710     MENDFUNC(2,fmov_mr,(MEMW m, FR r))
4711    
4712     MIDFUNC(2,fmov_ext_rm,(FW r, MEMR m))
4713     {
4714     r=f_writereg(r);
4715     raw_fmov_ext_rm(r,m);
4716     f_unlock(r);
4717     }
4718     MENDFUNC(2,fmov_ext_rm,(FW r, MEMR m))
4719    
4720     MIDFUNC(2,fmov_rr,(FW d, FR s))
4721     {
4722     if (d==s) { /* How pointless! */
4723     return;
4724     }
4725     #if USE_F_ALIAS
4726     f_disassociate(d);
4727     s=f_readreg(s);
4728     live.fate[d].realreg=s;
4729     live.fate[d].realind=live.fat[s].nholds;
4730     live.fate[d].status=DIRTY;
4731     live.fat[s].holds[live.fat[s].nholds]=d;
4732     live.fat[s].nholds++;
4733     f_unlock(s);
4734     #else
4735     s=f_readreg(s);
4736     d=f_writereg(d);
4737     raw_fmov_rr(d,s);
4738     f_unlock(s);
4739     f_unlock(d);
4740     #endif
4741     }
4742     MENDFUNC(2,fmov_rr,(FW d, FR s))
4743    
4744     MIDFUNC(2,fldcw_m_indexed,(R4 index, IMM base))
4745     {
4746     index=readreg(index,4);
4747    
4748     raw_fldcw_m_indexed(index,base);
4749     unlock2(index);
4750     }
4751     MENDFUNC(2,fldcw_m_indexed,(R4 index, IMM base))
4752    
4753     MIDFUNC(1,ftst_r,(FR r))
4754     {
4755     r=f_readreg(r);
4756     raw_ftst_r(r);
4757     f_unlock(r);
4758     }
4759     MENDFUNC(1,ftst_r,(FR r))
4760    
4761     MIDFUNC(0,dont_care_fflags,(void))
4762     {
4763     f_disassociate(FP_RESULT);
4764     }
4765     MENDFUNC(0,dont_care_fflags,(void))
4766    
4767     MIDFUNC(2,fsqrt_rr,(FW d, FR s))
4768     {
4769     s=f_readreg(s);
4770     d=f_writereg(d);
4771     raw_fsqrt_rr(d,s);
4772     f_unlock(s);
4773     f_unlock(d);
4774     }
4775     MENDFUNC(2,fsqrt_rr,(FW d, FR s))
4776    
4777     MIDFUNC(2,fabs_rr,(FW d, FR s))
4778     {
4779     s=f_readreg(s);
4780     d=f_writereg(d);
4781     raw_fabs_rr(d,s);
4782     f_unlock(s);
4783     f_unlock(d);
4784     }
4785     MENDFUNC(2,fabs_rr,(FW d, FR s))
4786    
4787     MIDFUNC(2,fsin_rr,(FW d, FR s))
4788     {
4789     s=f_readreg(s);
4790     d=f_writereg(d);
4791     raw_fsin_rr(d,s);
4792     f_unlock(s);
4793     f_unlock(d);
4794     }
4795     MENDFUNC(2,fsin_rr,(FW d, FR s))
4796    
4797     MIDFUNC(2,fcos_rr,(FW d, FR s))
4798     {
4799     s=f_readreg(s);
4800     d=f_writereg(d);
4801     raw_fcos_rr(d,s);
4802     f_unlock(s);
4803     f_unlock(d);
4804     }
4805     MENDFUNC(2,fcos_rr,(FW d, FR s))
4806    
4807     MIDFUNC(2,ftwotox_rr,(FW d, FR s))
4808     {
4809     s=f_readreg(s);
4810     d=f_writereg(d);
4811     raw_ftwotox_rr(d,s);
4812     f_unlock(s);
4813     f_unlock(d);
4814     }
4815     MENDFUNC(2,ftwotox_rr,(FW d, FR s))
4816    
4817     MIDFUNC(2,fetox_rr,(FW d, FR s))
4818     {
4819     s=f_readreg(s);
4820     d=f_writereg(d);
4821     raw_fetox_rr(d,s);
4822     f_unlock(s);
4823     f_unlock(d);
4824     }
4825     MENDFUNC(2,fetox_rr,(FW d, FR s))
4826    
4827     MIDFUNC(2,frndint_rr,(FW d, FR s))
4828     {
4829     s=f_readreg(s);
4830     d=f_writereg(d);
4831     raw_frndint_rr(d,s);
4832     f_unlock(s);
4833     f_unlock(d);
4834     }
4835     MENDFUNC(2,frndint_rr,(FW d, FR s))
4836    
4837     MIDFUNC(2,flog2_rr,(FW d, FR s))
4838     {
4839     s=f_readreg(s);
4840     d=f_writereg(d);
4841     raw_flog2_rr(d,s);
4842     f_unlock(s);
4843     f_unlock(d);
4844     }
4845     MENDFUNC(2,flog2_rr,(FW d, FR s))
4846    
4847     MIDFUNC(2,fneg_rr,(FW d, FR s))
4848     {
4849     s=f_readreg(s);
4850     d=f_writereg(d);
4851     raw_fneg_rr(d,s);
4852     f_unlock(s);
4853     f_unlock(d);
4854     }
4855     MENDFUNC(2,fneg_rr,(FW d, FR s))
4856    
4857     MIDFUNC(2,fadd_rr,(FRW d, FR s))
4858     {
4859     s=f_readreg(s);
4860     d=f_rmw(d);
4861     raw_fadd_rr(d,s);
4862     f_unlock(s);
4863     f_unlock(d);
4864     }
4865     MENDFUNC(2,fadd_rr,(FRW d, FR s))
4866    
4867     MIDFUNC(2,fsub_rr,(FRW d, FR s))
4868     {
4869     s=f_readreg(s);
4870     d=f_rmw(d);
4871     raw_fsub_rr(d,s);
4872     f_unlock(s);
4873     f_unlock(d);
4874     }
4875     MENDFUNC(2,fsub_rr,(FRW d, FR s))
4876    
4877     MIDFUNC(2,fcmp_rr,(FR d, FR s))
4878     {
4879     d=f_readreg(d);
4880     s=f_readreg(s);
4881     raw_fcmp_rr(d,s);
4882     f_unlock(s);
4883     f_unlock(d);
4884     }
4885     MENDFUNC(2,fcmp_rr,(FR d, FR s))
4886    
4887     MIDFUNC(2,fdiv_rr,(FRW d, FR s))
4888     {
4889     s=f_readreg(s);
4890     d=f_rmw(d);
4891     raw_fdiv_rr(d,s);
4892     f_unlock(s);
4893     f_unlock(d);
4894     }
4895     MENDFUNC(2,fdiv_rr,(FRW d, FR s))
4896    
4897     MIDFUNC(2,frem_rr,(FRW d, FR s))
4898     {
4899     s=f_readreg(s);
4900     d=f_rmw(d);
4901     raw_frem_rr(d,s);
4902     f_unlock(s);
4903     f_unlock(d);
4904     }
4905     MENDFUNC(2,frem_rr,(FRW d, FR s))
4906    
4907     MIDFUNC(2,frem1_rr,(FRW d, FR s))
4908     {
4909     s=f_readreg(s);
4910     d=f_rmw(d);
4911     raw_frem1_rr(d,s);
4912     f_unlock(s);
4913     f_unlock(d);
4914     }
4915     MENDFUNC(2,frem1_rr,(FRW d, FR s))
4916    
4917     MIDFUNC(2,fmul_rr,(FRW d, FR s))
4918     {
4919     s=f_readreg(s);
4920     d=f_rmw(d);
4921     raw_fmul_rr(d,s);
4922     f_unlock(s);
4923     f_unlock(d);
4924     }
4925     MENDFUNC(2,fmul_rr,(FRW d, FR s))
4926    
4927     /********************************************************************
4928     * Support functions exposed to gencomp. CREATE time *
4929     ********************************************************************/
4930    
4931 gbeauche 1.26 void set_zero(int r, int tmp)
4932     {
4933     if (setzflg_uses_bsf)
4934     bsf_l_rr(r,r);
4935     else
4936     simulate_bsf(tmp,r);
4937     }
4938    
4939 gbeauche 1.1 int kill_rodent(int r)
4940     {
4941     return KILLTHERAT &&
4942     have_rat_stall &&
4943     (live.state[r].status==INMEM ||
4944     live.state[r].status==CLEAN ||
4945     live.state[r].status==ISCONST ||
4946     live.state[r].dirtysize==4);
4947     }
4948    
4949     uae_u32 get_const(int r)
4950     {
4951     Dif (!isconst(r)) {
4952     write_log("Register %d should be constant, but isn't\n",r);
4953     abort();
4954     }
4955     return live.state[r].val;
4956     }
4957    
4958     void sync_m68k_pc(void)
4959     {
4960     if (m68k_pc_offset) {
4961     add_l_ri(PC_P,m68k_pc_offset);
4962     comp_pc_p+=m68k_pc_offset;
4963     m68k_pc_offset=0;
4964     }
4965     }
4966    
4967     /********************************************************************
4968     * Scratch registers management *
4969     ********************************************************************/
4970    
4971     struct scratch_t {
4972     uae_u32 regs[VREGS];
4973     fpu_register fregs[VFREGS];
4974     };
4975    
4976     static scratch_t scratch;
4977    
4978     /********************************************************************
4979     * Support functions exposed to newcpu *
4980     ********************************************************************/
4981    
4982     static inline const char *str_on_off(bool b)
4983     {
4984     return b ? "on" : "off";
4985     }
4986    
4987     void compiler_init(void)
4988     {
4989     static bool initialized = false;
4990     if (initialized)
4991     return;
4992 gbeauche 1.24
4993 gbeauche 1.1 #if JIT_DEBUG
4994     // JIT debug mode ?
4995     JITDebug = PrefsFindBool("jitdebug");
4996     #endif
4997     write_log("<JIT compiler> : enable runtime disassemblers : %s\n", JITDebug ? "yes" : "no");
4998    
4999     #ifdef USE_JIT_FPU
5000     // Use JIT compiler for FPU instructions ?
5001     avoid_fpu = !PrefsFindBool("jitfpu");
5002     #else
5003     // JIT FPU is always disabled
5004     avoid_fpu = true;
5005     #endif
5006     write_log("<JIT compiler> : compile FPU instructions : %s\n", !avoid_fpu ? "yes" : "no");
5007    
5008     // Get size of the translation cache (in KB)
5009     cache_size = PrefsFindInt32("jitcachesize");
5010     write_log("<JIT compiler> : requested translation cache size : %d KB\n", cache_size);
5011    
5012     // Initialize target CPU (check for features, e.g. CMOV, rat stalls)
5013     raw_init_cpu();
5014 gbeauche 1.15 setzflg_uses_bsf = target_check_bsf();
5015 gbeauche 1.1 write_log("<JIT compiler> : target processor has CMOV instructions : %s\n", have_cmov ? "yes" : "no");
5016     write_log("<JIT compiler> : target processor can suffer from partial register stalls : %s\n", have_rat_stall ? "yes" : "no");
5017 gbeauche 1.5 write_log("<JIT compiler> : alignment for loops, jumps are %d, %d\n", align_loops, align_jumps);
5018 gbeauche 1.1
5019     // Translation cache flush mechanism
5020     lazy_flush = PrefsFindBool("jitlazyflush");
5021     write_log("<JIT compiler> : lazy translation cache invalidation : %s\n", str_on_off(lazy_flush));
5022     flush_icache = lazy_flush ? flush_icache_lazy : flush_icache_hard;
5023    
5024     // Compiler features
5025     write_log("<JIT compiler> : register aliasing : %s\n", str_on_off(1));
5026     write_log("<JIT compiler> : FP register aliasing : %s\n", str_on_off(USE_F_ALIAS));
5027     write_log("<JIT compiler> : lazy constant offsetting : %s\n", str_on_off(USE_OFFSET));
5028 gbeauche 1.33 #if USE_INLINING
5029     follow_const_jumps = PrefsFindBool("jitinline");
5030     #endif
5031     write_log("<JIT compiler> : translate through constant jumps : %s\n", str_on_off(follow_const_jumps));
5032 gbeauche 1.1 write_log("<JIT compiler> : separate blockinfo allocation : %s\n", str_on_off(USE_SEPARATE_BIA));
5033    
5034     // Build compiler tables
5035     build_comp();
5036    
5037     initialized = true;
5038    
5039 gbeauche 1.9 #if PROFILE_UNTRANSLATED_INSNS
5040     write_log("<JIT compiler> : gather statistics on untranslated insns count\n");
5041     #endif
5042    
5043 gbeauche 1.1 #if PROFILE_COMPILE_TIME
5044     write_log("<JIT compiler> : gather statistics on translation time\n");
5045     emul_start_time = clock();
5046     #endif
5047     }
5048    
5049     void compiler_exit(void)
5050     {
5051     #if PROFILE_COMPILE_TIME
5052     emul_end_time = clock();
5053     #endif
5054    
5055     // Deallocate translation cache
5056     if (compiled_code) {
5057 gbeauche 1.3 vm_release(compiled_code, cache_size * 1024);
5058 gbeauche 1.1 compiled_code = 0;
5059     }
5060 gbeauche 1.24
5061     // Deallocate popallspace
5062     if (popallspace) {
5063     vm_release(popallspace, POPALLSPACE_SIZE);
5064     popallspace = 0;
5065     }
5066 gbeauche 1.1
5067     #if PROFILE_COMPILE_TIME
5068     write_log("### Compile Block statistics\n");
5069     write_log("Number of calls to compile_block : %d\n", compile_count);
5070     uae_u32 emul_time = emul_end_time - emul_start_time;
5071     write_log("Total emulation time : %.1f sec\n", double(emul_time)/double(CLOCKS_PER_SEC));
5072     write_log("Total compilation time : %.1f sec (%.1f%%)\n", double(compile_time)/double(CLOCKS_PER_SEC),
5073     100.0*double(compile_time)/double(emul_time));
5074     write_log("\n");
5075     #endif
5076 gbeauche 1.9
5077     #if PROFILE_UNTRANSLATED_INSNS
5078     uae_u64 untranslated_count = 0;
5079     for (int i = 0; i < 65536; i++) {
5080     opcode_nums[i] = i;
5081     untranslated_count += raw_cputbl_count[i];
5082     }
5083     write_log("Sorting out untranslated instructions count...\n");
5084     qsort(opcode_nums, 65536, sizeof(uae_u16), untranslated_compfn);
5085     write_log("\nRank Opc Count Name\n");
5086     for (int i = 0; i < untranslated_top_ten; i++) {
5087     uae_u32 count = raw_cputbl_count[opcode_nums[i]];
5088     struct instr *dp;
5089     struct mnemolookup *lookup;
5090     if (!count)
5091     break;
5092     dp = table68k + opcode_nums[i];
5093     for (lookup = lookuptab; lookup->mnemo != dp->mnemo; lookup++)
5094     ;
5095     write_log("%03d: %04x %10lu %s\n", i, opcode_nums[i], count, lookup->name);
5096     }
5097     #endif
5098 gbeauche 1.28
5099     #if RECORD_REGISTER_USAGE
5100     int reg_count_ids[16];
5101     uint64 tot_reg_count = 0;
5102     for (int i = 0; i < 16; i++) {
5103     reg_count_ids[i] = i;
5104     tot_reg_count += reg_count[i];
5105     }
5106     qsort(reg_count_ids, 16, sizeof(int), reg_count_compare);
5107     uint64 cum_reg_count = 0;
5108     for (int i = 0; i < 16; i++) {
5109     int r = reg_count_ids[i];
5110     cum_reg_count += reg_count[r];
5111     printf("%c%d : %16ld %2.1f%% [%2.1f]\n", r < 8 ? 'D' : 'A', r % 8,
5112     reg_count[r],
5113     100.0*double(reg_count[r])/double(tot_reg_count),
5114     100.0*double(cum_reg_count)/double(tot_reg_count));
5115     }
5116     #endif
5117 gbeauche 1.1 }
5118    
5119     bool compiler_use_jit(void)
5120     {
5121     // Check for the "jit" prefs item
5122     if (!PrefsFindBool("jit"))
5123     return false;
5124    
5125     // Don't use JIT if translation cache size is less then MIN_CACHE_SIZE KB
5126     if (PrefsFindInt32("jitcachesize") < MIN_CACHE_SIZE) {
5127     write_log("<JIT compiler> : translation cache size is less than %d KB. Disabling JIT.\n", MIN_CACHE_SIZE);
5128     return false;
5129     }
5130    
5131     // FIXME: there are currently problems with JIT compilation and anything below a 68040
5132     if (CPUType < 4) {
5133     write_log("<JIT compiler> : 68040 emulation is required instead of 680%d0. Disabling JIT.\n", CPUType);
5134     return false;
5135     }
5136    
5137     return true;
5138     }
5139    
5140     void init_comp(void)
5141     {
5142     int i;
5143     uae_s8* cb=can_byte;
5144     uae_s8* cw=can_word;
5145     uae_s8* au=always_used;
5146    
5147 gbeauche 1.28 #if RECORD_REGISTER_USAGE
5148     for (i=0;i<16;i++)
5149     reg_count_local[i] = 0;
5150     #endif
5151    
5152 gbeauche 1.1 for (i=0;i<VREGS;i++) {
5153     live.state[i].realreg=-1;
5154     live.state[i].needflush=NF_SCRATCH;
5155     live.state[i].val=0;
5156     set_status(i,UNDEF);
5157     }
5158    
5159     for (i=0;i<VFREGS;i++) {
5160     live.fate[i].status=UNDEF;
5161     live.fate[i].realreg=-1;
5162     live.fate[i].needflush=NF_SCRATCH;
5163     }
5164    
5165     for (i=0;i<VREGS;i++) {
5166     if (i<16) { /* First 16 registers map to 68k registers */
5167     live.state[i].mem=((uae_u32*)&regs)+i;
5168     live.state[i].needflush=NF_TOMEM;
5169     set_status(i,INMEM);
5170     }
5171     else
5172     live.state[i].mem=scratch.regs+i;
5173     }
5174     live.state[PC_P].mem=(uae_u32*)&(regs.pc_p);
5175     live.state[PC_P].needflush=NF_TOMEM;
5176 gbeauche 1.24 set_const(PC_P,(uintptr)comp_pc_p);
5177 gbeauche 1.1
5178 gbeauche 1.24 live.state[FLAGX].mem=(uae_u32*)&(regflags.x);
5179 gbeauche 1.1 live.state[FLAGX].needflush=NF_TOMEM;
5180     set_status(FLAGX,INMEM);
5181    
5182 gbeauche 1.24 live.state[FLAGTMP].mem=(uae_u32*)&(regflags.cznv);
5183 gbeauche 1.1 live.state[FLAGTMP].needflush=NF_TOMEM;
5184     set_status(FLAGTMP,INMEM);
5185    
5186     live.state[NEXT_HANDLER].needflush=NF_HANDLER;
5187     set_status(NEXT_HANDLER,UNDEF);
5188    
5189     for (i=0;i<VFREGS;i++) {
5190     if (i<8) { /* First 8 registers map to 68k FPU registers */
5191     live.fate[i].mem=(uae_u32*)fpu_register_address(i);
5192     live.fate[i].needflush=NF_TOMEM;
5193     live.fate[i].status=INMEM;
5194     }
5195     else if (i==FP_RESULT) {
5196     live.fate[i].mem=(uae_u32*)(&fpu.result);
5197     live.fate[i].needflush=NF_TOMEM;
5198     live.fate[i].status=INMEM;
5199     }
5200     else
5201 gbeauche 1.25 live.fate[i].mem=(uae_u32*)(&scratch.fregs[i]);
5202 gbeauche 1.1 }
5203    
5204    
5205     for (i=0;i<N_REGS;i++) {
5206     live.nat[i].touched=0;
5207     live.nat[i].nholds=0;
5208     live.nat[i].locked=0;
5209     if (*cb==i) {
5210     live.nat[i].canbyte=1; cb++;
5211     } else live.nat[i].canbyte=0;
5212     if (*cw==i) {
5213     live.nat[i].canword=1; cw++;
5214     } else live.nat[i].canword=0;
5215     if (*au==i) {
5216     live.nat[i].locked=1; au++;
5217     }
5218     }
5219    
5220     for (i=0;i<N_FREGS;i++) {
5221     live.fat[i].touched=0;
5222     live.fat[i].nholds=0;
5223     live.fat[i].locked=0;
5224     }
5225    
5226     touchcnt=1;
5227     m68k_pc_offset=0;
5228     live.flags_in_flags=TRASH;
5229     live.flags_on_stack=VALID;
5230     live.flags_are_important=1;
5231    
5232     raw_fp_init();
5233     }
5234    
5235     /* Only do this if you really mean it! The next call should be to init!*/
5236     void flush(int save_regs)
5237     {
5238     int fi,i;
5239    
5240     log_flush();
5241     flush_flags(); /* low level */
5242     sync_m68k_pc(); /* mid level */
5243    
5244     if (save_regs) {
5245     for (i=0;i<VFREGS;i++) {
5246     if (live.fate[i].needflush==NF_SCRATCH ||
5247     live.fate[i].status==CLEAN) {
5248     f_disassociate(i);
5249     }
5250     }
5251     for (i=0;i<VREGS;i++) {
5252     if (live.state[i].needflush==NF_TOMEM) {
5253     switch(live.state[i].status) {
5254     case INMEM:
5255     if (live.state[i].val) {
5256 gbeauche 1.24 raw_add_l_mi((uintptr)live.state[i].mem,live.state[i].val);
5257 gbeauche 1.1 log_vwrite(i);
5258     live.state[i].val=0;
5259     }
5260     break;
5261     case CLEAN:
5262     case DIRTY:
5263     remove_offset(i,-1); tomem(i); break;
5264     case ISCONST:
5265     if (i!=PC_P)
5266     writeback_const(i);
5267     break;
5268     default: break;
5269     }
5270     Dif (live.state[i].val && i!=PC_P) {
5271     write_log("Register %d still has val %x\n",
5272     i,live.state[i].val);
5273     }
5274     }
5275     }
5276     for (i=0;i<VFREGS;i++) {
5277     if (live.fate[i].needflush==NF_TOMEM &&
5278     live.fate[i].status==DIRTY) {
5279     f_evict(i);
5280     }
5281     }
5282     raw_fp_cleanup_drop();
5283     }
5284     if (needflags) {
5285     write_log("Warning! flush with needflags=1!\n");
5286     }
5287     }
5288    
5289     static void flush_keepflags(void)
5290     {
5291     int fi,i;
5292    
5293     for (i=0;i<VFREGS;i++) {
5294     if (live.fate[i].needflush==NF_SCRATCH ||
5295     live.fate[i].status==CLEAN) {
5296     f_disassociate(i);
5297     }
5298     }
5299     for (i=0;i<VREGS;i++) {
5300     if (live.state[i].needflush==NF_TOMEM) {
5301     switch(live.state[i].status) {
5302     case INMEM:
5303     /* Can't adjust the offset here --- that needs "add" */
5304     break;
5305     case CLEAN:
5306     case DIRTY:
5307     remove_offset(i,-1); tomem(i); break;
5308     case ISCONST:
5309     if (i!=PC_P)
5310     writeback_const(i);
5311     break;
5312     default: break;
5313     }
5314     }
5315     }
5316     for (i=0;i<VFREGS;i++) {
5317     if (live.fate[i].needflush==NF_TOMEM &&
5318     live.fate[i].status==DIRTY) {
5319     f_evict(i);
5320     }
5321     }
5322     raw_fp_cleanup_drop();
5323     }
5324    
5325     void freescratch(void)
5326     {
5327     int i;
5328     for (i=0;i<N_REGS;i++)
5329     if (live.nat[i].locked && i!=4)
5330     write_log("Warning! %d is locked\n",i);
5331    
5332     for (i=0;i<VREGS;i++)
5333     if (live.state[i].needflush==NF_SCRATCH) {
5334     forget_about(i);
5335     }
5336    
5337     for (i=0;i<VFREGS;i++)
5338     if (live.fate[i].needflush==NF_SCRATCH) {
5339     f_forget_about(i);
5340     }
5341     }
5342    
5343     /********************************************************************
5344     * Support functions, internal *
5345     ********************************************************************/
5346    
5347    
5348     static void align_target(uae_u32 a)
5349     {
5350 gbeauche 1.14 if (!a)
5351     return;
5352    
5353 gbeauche 1.12 if (tune_nop_fillers)
5354 gbeauche 1.24 raw_emit_nop_filler(a - (((uintptr)target) & (a - 1)));
5355 gbeauche 1.12 else {
5356     /* Fill with NOPs --- makes debugging with gdb easier */
5357 gbeauche 1.24 while ((uintptr)target&(a-1))
5358 gbeauche 1.12 *target++=0x90;
5359     }
5360 gbeauche 1.1 }
5361    
5362     static __inline__ int isinrom(uintptr addr)
5363     {
5364     return ((addr >= (uintptr)ROMBaseHost) && (addr < (uintptr)ROMBaseHost + ROMSize));
5365     }
5366    
5367     static void flush_all(void)
5368     {
5369     int i;
5370    
5371     log_flush();
5372     for (i=0;i<VREGS;i++)
5373     if (live.state[i].status==DIRTY) {
5374     if (!call_saved[live.state[i].realreg]) {
5375     tomem(i);
5376     }
5377     }
5378     for (i=0;i<VFREGS;i++)
5379     if (f_isinreg(i))
5380     f_evict(i);
5381     raw_fp_cleanup_drop();
5382     }
5383    
5384     /* Make sure all registers that will get clobbered by a call are
5385     save and sound in memory */
5386     static void prepare_for_call_1(void)
5387     {
5388     flush_all(); /* If there are registers that don't get clobbered,
5389     * we should be a bit more selective here */
5390     }
5391    
5392     /* We will call a C routine in a moment. That will clobber all registers,
5393     so we need to disassociate everything */
5394     static void prepare_for_call_2(void)
5395     {
5396     int i;
5397     for (i=0;i<N_REGS;i++)
5398     if (!call_saved[i] && live.nat[i].nholds>0)
5399     free_nreg(i);
5400    
5401     for (i=0;i<N_FREGS;i++)
5402     if (live.fat[i].nholds>0)
5403     f_free_nreg(i);
5404    
5405     live.flags_in_flags=TRASH; /* Note: We assume we already rescued the
5406     flags at the very start of the call_r
5407     functions! */
5408     }
5409    
5410     /********************************************************************
5411     * Memory access and related functions, CREATE time *
5412     ********************************************************************/
5413    
5414     void register_branch(uae_u32 not_taken, uae_u32 taken, uae_u8 cond)
5415     {
5416     next_pc_p=not_taken;
5417     taken_pc_p=taken;
5418     branch_cc=cond;
5419     }
5420    
5421    
5422     static uae_u32 get_handler_address(uae_u32 addr)
5423     {
5424     uae_u32 cl=cacheline(addr);
5425 gbeauche 1.24 blockinfo* bi=get_blockinfo_addr_new((void*)(uintptr)addr,0);
5426     return (uintptr)&(bi->direct_handler_to_use);
5427 gbeauche 1.1 }
5428    
5429     static uae_u32 get_handler(uae_u32 addr)
5430     {
5431     uae_u32 cl=cacheline(addr);
5432 gbeauche 1.24 blockinfo* bi=get_blockinfo_addr_new((void*)(uintptr)addr,0);
5433     return (uintptr)bi->direct_handler_to_use;
5434 gbeauche 1.1 }
5435    
5436     static void load_handler(int reg, uae_u32 addr)
5437     {
5438     mov_l_rm(reg,get_handler_address(addr));
5439     }
5440    
5441     /* This version assumes that it is writing *real* memory, and *will* fail
5442     * if that assumption is wrong! No branches, no second chances, just
5443     * straight go-for-it attitude */
5444    
5445 gbeauche 1.24 static void writemem_real(int address, int source, int size, int tmp, int clobber)
5446 gbeauche 1.1 {
5447     int f=tmp;
5448    
5449     if (clobber)
5450     f=source;
5451 gbeauche 1.24
5452     #if SIZEOF_VOID_P == 8
5453 gbeauche 1.26 if (!ThirtyThreeBitAddressing)
5454     sign_extend_32_rr(address, address);
5455 gbeauche 1.24 #endif
5456    
5457 gbeauche 1.1 switch(size) {
5458     case 1: mov_b_bRr(address,source,MEMBaseDiff); break;
5459     case 2: mov_w_rr(f,source); bswap_16(f); mov_w_bRr(address,f,MEMBaseDiff); break;
5460     case 4: mov_l_rr(f,source); bswap_32(f); mov_l_bRr(address,f,MEMBaseDiff); break;
5461     }
5462     forget_about(tmp);
5463     forget_about(f);
5464     }
5465    
5466     void writebyte(int address, int source, int tmp)
5467     {
5468 gbeauche 1.24 writemem_real(address,source,1,tmp,0);
5469 gbeauche 1.1 }
5470    
5471     static __inline__ void writeword_general(int address, int source, int tmp,
5472     int clobber)
5473     {
5474 gbeauche 1.24 writemem_real(address,source,2,tmp,clobber);
5475 gbeauche 1.1 }
5476    
5477     void writeword_clobber(int address, int source, int tmp)
5478     {
5479     writeword_general(address,source,tmp,1);
5480     }
5481    
5482     void writeword(int address, int source, int tmp)
5483     {
5484     writeword_general(address,source,tmp,0);
5485     }
5486    
5487     static __inline__ void writelong_general(int address, int source, int tmp,
5488     int clobber)
5489     {
5490 gbeauche 1.24 writemem_real(address,source,4,tmp,clobber);
5491 gbeauche 1.1 }
5492    
5493     void writelong_clobber(int address, int source, int tmp)
5494     {
5495     writelong_general(address,source,tmp,1);
5496     }
5497    
5498     void writelong(int address, int source, int tmp)
5499     {
5500     writelong_general(address,source,tmp,0);
5501     }
5502    
5503    
5504    
5505     /* This version assumes that it is reading *real* memory, and *will* fail
5506     * if that assumption is wrong! No branches, no second chances, just
5507     * straight go-for-it attitude */
5508    
5509 gbeauche 1.24 static void readmem_real(int address, int dest, int size, int tmp)
5510 gbeauche 1.1 {
5511     int f=tmp;
5512    
5513     if (size==4 && address!=dest)
5514     f=dest;
5515    
5516 gbeauche 1.24 #if SIZEOF_VOID_P == 8
5517 gbeauche 1.26 if (!ThirtyThreeBitAddressing)
5518     sign_extend_32_rr(address, address);
5519 gbeauche 1.24 #endif
5520    
5521 gbeauche 1.1 switch(size) {
5522     case 1: mov_b_brR(dest,address,MEMBaseDiff); break;
5523     case 2: mov_w_brR(dest,address,MEMBaseDiff); bswap_16(dest); break;
5524     case 4: mov_l_brR(dest,address,MEMBaseDiff); bswap_32(dest); break;
5525     }
5526     forget_about(tmp);
5527     }
5528    
5529     void readbyte(int address, int dest, int tmp)
5530     {
5531 gbeauche 1.24 readmem_real(address,dest,1,tmp);
5532 gbeauche 1.1 }
5533    
5534     void readword(int address, int dest, int tmp)
5535     {
5536 gbeauche 1.24 readmem_real(address,dest,2,tmp);
5537 gbeauche 1.1 }
5538    
5539     void readlong(int address, int dest, int tmp)
5540     {
5541 gbeauche 1.24 readmem_real(address,dest,4,tmp);
5542 gbeauche 1.1 }
5543    
5544     void get_n_addr(int address, int dest, int tmp)
5545     {
5546     // a is the register containing the virtual address
5547     // after the offset had been fetched
5548     int a=tmp;
5549    
5550     // f is the register that will contain the offset
5551     int f=tmp;
5552    
5553     // a == f == tmp if (address == dest)
5554     if (address!=dest) {
5555     a=address;
5556     f=dest;
5557     }
5558    
5559     #if REAL_ADDRESSING
5560     mov_l_rr(dest, address);
5561     #elif DIRECT_ADDRESSING
5562     lea_l_brr(dest,address,MEMBaseDiff);
5563     #endif
5564     forget_about(tmp);
5565     }
5566    
5567     void get_n_addr_jmp(int address, int dest, int tmp)
5568     {
5569     /* For this, we need to get the same address as the rest of UAE
5570     would --- otherwise we end up translating everything twice */
5571     get_n_addr(address,dest,tmp);
5572     }
5573    
5574    
5575     /* base is a register, but dp is an actual value.
5576     target is a register, as is tmp */
5577     void calc_disp_ea_020(int base, uae_u32 dp, int target, int tmp)
5578     {
5579     int reg = (dp >> 12) & 15;
5580     int regd_shift=(dp >> 9) & 3;
5581    
5582     if (dp & 0x100) {
5583     int ignorebase=(dp&0x80);
5584     int ignorereg=(dp&0x40);
5585     int addbase=0;
5586     int outer=0;
5587    
5588     if ((dp & 0x30) == 0x20) addbase = (uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
5589     if ((dp & 0x30) == 0x30) addbase = comp_get_ilong((m68k_pc_offset+=4)-4);
5590    
5591     if ((dp & 0x3) == 0x2) outer = (uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
5592     if ((dp & 0x3) == 0x3) outer = comp_get_ilong((m68k_pc_offset+=4)-4);
5593    
5594     if ((dp & 0x4) == 0) { /* add regd *before* the get_long */
5595     if (!ignorereg) {
5596     if ((dp & 0x800) == 0)
5597     sign_extend_16_rr(target,reg);
5598     else
5599     mov_l_rr(target,reg);
5600     shll_l_ri(target,regd_shift);
5601     }
5602     else
5603     mov_l_ri(target,0);
5604    
5605     /* target is now regd */
5606     if (!ignorebase)
5607     add_l(target,base);
5608     add_l_ri(target,addbase);
5609     if (dp&0x03) readlong(target,target,tmp);
5610     } else { /* do the getlong first, then add regd */
5611     if (!ignorebase) {
5612     mov_l_rr(target,base);
5613     add_l_ri(target,addbase);
5614     }
5615     else
5616     mov_l_ri(target,addbase);
5617     if (dp&0x03) readlong(target,target,tmp);
5618    
5619     if (!ignorereg) {
5620     if ((dp & 0x800) == 0)
5621     sign_extend_16_rr(tmp,reg);
5622     else
5623     mov_l_rr(tmp,reg);
5624     shll_l_ri(tmp,regd_shift);
5625     /* tmp is now regd */
5626     add_l(target,tmp);
5627     }
5628     }
5629     add_l_ri(target,outer);
5630     }
5631     else { /* 68000 version */
5632     if ((dp & 0x800) == 0) { /* Sign extend */
5633     sign_extend_16_rr(target,reg);
5634     lea_l_brr_indexed(target,base,target,1<<regd_shift,(uae_s32)((uae_s8)dp));
5635     }
5636     else {
5637     lea_l_brr_indexed(target,base,reg,1<<regd_shift,(uae_s32)((uae_s8)dp));
5638     }
5639     }
5640     forget_about(tmp);
5641     }
5642    
5643    
5644    
5645    
5646    
5647     void set_cache_state(int enabled)
5648     {
5649     if (enabled!=letit)
5650     flush_icache_hard(77);
5651     letit=enabled;
5652     }
5653    
5654     int get_cache_state(void)
5655     {
5656     return letit;
5657     }
5658    
5659     uae_u32 get_jitted_size(void)
5660     {
5661     if (compiled_code)
5662     return current_compile_p-compiled_code;
5663     return 0;
5664     }
5665    
5666 gbeauche 1.20 const int CODE_ALLOC_MAX_ATTEMPTS = 10;
5667     const int CODE_ALLOC_BOUNDARIES = 128 * 1024; // 128 KB
5668    
5669     static uint8 *do_alloc_code(uint32 size, int depth)
5670     {
5671     #if defined(__linux__) && 0
5672     /*
5673     This is a really awful hack that is known to work on Linux at
5674     least.
5675    
5676     The trick here is to make sure the allocated cache is nearby
5677     code segment, and more precisely in the positive half of a
5678     32-bit address space. i.e. addr < 0x80000000. Actually, it
5679     turned out that a 32-bit binary run on AMD64 yields a cache
5680     allocated around 0xa0000000, thus causing some troubles when
5681     translating addresses from m68k to x86.
5682     */
5683     static uint8 * code_base = NULL;
5684     if (code_base == NULL) {
5685     uintptr page_size = getpagesize();
5686     uintptr boundaries = CODE_ALLOC_BOUNDARIES;
5687     if (boundaries < page_size)
5688     boundaries = page_size;
5689     code_base = (uint8 *)sbrk(0);
5690     for (int attempts = 0; attempts < CODE_ALLOC_MAX_ATTEMPTS; attempts++) {
5691     if (vm_acquire_fixed(code_base, size) == 0) {
5692     uint8 *code = code_base;
5693     code_base += size;
5694     return code;
5695     }
5696     code_base += boundaries;
5697     }
5698     return NULL;
5699     }
5700    
5701     if (vm_acquire_fixed(code_base, size) == 0) {
5702     uint8 *code = code_base;
5703     code_base += size;
5704     return code;
5705     }
5706    
5707     if (depth >= CODE_ALLOC_MAX_ATTEMPTS)
5708     return NULL;
5709    
5710     return do_alloc_code(size, depth + 1);
5711     #else
5712     uint8 *code = (uint8 *)vm_acquire(size);
5713     return code == VM_MAP_FAILED ? NULL : code;
5714     #endif
5715     }
5716    
5717     static inline uint8 *alloc_code(uint32 size)
5718     {
5719 gbeauche 1.31 uint8 *ptr = do_alloc_code(size, 0);
5720     /* allocated code must fit in 32-bit boundaries */
5721     assert((uintptr)ptr <= 0xffffffff);
5722     return ptr;
5723 gbeauche 1.20 }
5724    
5725 gbeauche 1.1 void alloc_cache(void)
5726     {
5727     if (compiled_code) {
5728     flush_icache_hard(6);
5729 gbeauche 1.3 vm_release(compiled_code, cache_size * 1024);
5730 gbeauche 1.1 compiled_code = 0;
5731     }
5732    
5733     if (cache_size == 0)
5734     return;
5735    
5736     while (!compiled_code && cache_size) {
5737 gbeauche 1.20 if ((compiled_code = alloc_code(cache_size * 1024)) == NULL) {
5738 gbeauche 1.1 compiled_code = 0;
5739     cache_size /= 2;
5740     }
5741     }
5742 gbeauche 1.25 vm_protect(compiled_code, cache_size * 1024, VM_PAGE_READ | VM_PAGE_WRITE | VM_PAGE_EXECUTE);
5743 gbeauche 1.1
5744     if (compiled_code) {
5745     write_log("<JIT compiler> : actual translation cache size : %d KB at 0x%08X\n", cache_size, compiled_code);
5746     max_compile_start = compiled_code + cache_size*1024 - BYTES_PER_INST;
5747     current_compile_p = compiled_code;
5748     current_cache_size = 0;
5749     }
5750     }
5751    
5752    
5753    
5754 gbeauche 1.13 extern void op_illg_1 (uae_u32 opcode) REGPARAM;
5755 gbeauche 1.1
5756 gbeauche 1.8 static void calc_checksum(blockinfo* bi, uae_u32* c1, uae_u32* c2)
5757 gbeauche 1.1 {
5758 gbeauche 1.8 uae_u32 k1 = 0;
5759     uae_u32 k2 = 0;
5760    
5761     #if USE_CHECKSUM_INFO
5762     checksum_info *csi = bi->csi;
5763     Dif(!csi) abort();
5764     while (csi) {
5765     uae_s32 len = csi->length;
5766 gbeauche 1.24 uintptr tmp = (uintptr)csi->start_p;
5767 gbeauche 1.8 #else
5768     uae_s32 len = bi->len;
5769 gbeauche 1.24 uintptr tmp = (uintptr)bi->min_pcp;
5770 gbeauche 1.8 #endif
5771     uae_u32*pos;
5772 gbeauche 1.1
5773 gbeauche 1.8 len += (tmp & 3);
5774 gbeauche 1.24 tmp &= ~((uintptr)3);
5775 gbeauche 1.8 pos = (uae_u32 *)tmp;
5776    
5777     if (len >= 0 && len <= MAX_CHECKSUM_LEN) {
5778     while (len > 0) {
5779     k1 += *pos;
5780     k2 ^= *pos;
5781     pos++;
5782     len -= 4;
5783     }
5784     }
5785 gbeauche 1.1
5786 gbeauche 1.8 #if USE_CHECKSUM_INFO
5787     csi = csi->next;
5788 gbeauche 1.1 }
5789 gbeauche 1.8 #endif
5790    
5791     *c1 = k1;
5792     *c2 = k2;
5793 gbeauche 1.1 }
5794    
5795 gbeauche 1.8 #if 0
5796 gbeauche 1.7 static void show_checksum(CSI_TYPE* csi)
5797 gbeauche 1.1 {
5798     uae_u32 k1=0;
5799     uae_u32 k2=0;
5800 gbeauche 1.7 uae_s32 len=CSI_LENGTH(csi);
5801 gbeauche 1.24 uae_u32 tmp=(uintptr)CSI_START_P(csi);
5802 gbeauche 1.1 uae_u32* pos;
5803    
5804     len+=(tmp&3);
5805     tmp&=(~3);
5806     pos=(uae_u32*)tmp;
5807    
5808     if (len<0 || len>MAX_CHECKSUM_LEN) {
5809     return;
5810     }
5811     else {
5812     while (len>0) {
5813     write_log("%08x ",*pos);
5814     pos++;
5815     len-=4;
5816     }
5817     write_log(" bla\n");
5818     }
5819     }
5820 gbeauche 1.8 #endif
5821 gbeauche 1.1
5822    
5823     int check_for_cache_miss(void)
5824     {
5825     blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5826    
5827     if (bi) {
5828     int cl=cacheline(regs.pc_p);
5829     if (bi!=cache_tags[cl+1].bi) {
5830     raise_in_cl_list(bi);
5831     return 1;
5832     }
5833     }
5834     return 0;
5835     }
5836    
5837    
5838     static void recompile_block(void)
5839     {
5840     /* An existing block's countdown code has expired. We need to make
5841     sure that execute_normal doesn't refuse to recompile due to a
5842     perceived cache miss... */
5843     blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5844    
5845     Dif (!bi)
5846     abort();
5847     raise_in_cl_list(bi);
5848     execute_normal();
5849     return;
5850     }
5851     static void cache_miss(void)
5852     {
5853     blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5854     uae_u32 cl=cacheline(regs.pc_p);
5855     blockinfo* bi2=get_blockinfo(cl);
5856    
5857     if (!bi) {
5858     execute_normal(); /* Compile this block now */
5859     return;
5860     }
5861     Dif (!bi2 || bi==bi2) {
5862     write_log("Unexplained cache miss %p %p\n",bi,bi2);
5863     abort();
5864     }
5865     raise_in_cl_list(bi);
5866     return;
5867     }
5868    
5869     static int called_check_checksum(blockinfo* bi);
5870    
5871     static inline int block_check_checksum(blockinfo* bi)
5872     {
5873     uae_u32 c1,c2;
5874 gbeauche 1.7 bool isgood;
5875 gbeauche 1.1
5876     if (bi->status!=BI_NEED_CHECK)
5877     return 1; /* This block is in a checked state */
5878    
5879     checksum_count++;
5880 gbeauche 1.7
5881 gbeauche 1.1 if (bi->c1 || bi->c2)
5882     calc_checksum(bi,&c1,&c2);
5883     else {
5884     c1=c2=1; /* Make sure it doesn't match */
5885 gbeauche 1.7 }
5886 gbeauche 1.1
5887     isgood=(c1==bi->c1 && c2==bi->c2);
5888 gbeauche 1.7
5889 gbeauche 1.1 if (isgood) {
5890     /* This block is still OK. So we reactivate. Of course, that
5891     means we have to move it into the needs-to-be-flushed list */
5892     bi->handler_to_use=bi->handler;
5893     set_dhtu(bi,bi->direct_handler);
5894     bi->status=BI_CHECKING;
5895     isgood=called_check_checksum(bi);
5896     }
5897     if (isgood) {
5898     /* write_log("reactivate %p/%p (%x %x/%x %x)\n",bi,bi->pc_p,
5899     c1,c2,bi->c1,bi->c2);*/
5900     remove_from_list(bi);
5901     add_to_active(bi);
5902     raise_in_cl_list(bi);
5903     bi->status=BI_ACTIVE;
5904     }
5905     else {
5906     /* This block actually changed. We need to invalidate it,
5907     and set it up to be recompiled */
5908     /* write_log("discard %p/%p (%x %x/%x %x)\n",bi,bi->pc_p,
5909     c1,c2,bi->c1,bi->c2); */
5910     invalidate_block(bi);
5911     raise_in_cl_list(bi);
5912     }
5913     return isgood;
5914     }
5915    
5916     static int called_check_checksum(blockinfo* bi)
5917     {
5918     dependency* x=bi->deplist;
5919     int isgood=1;
5920     int i;
5921    
5922     for (i=0;i<2 && isgood;i++) {
5923     if (bi->dep[i].jmp_off) {
5924     isgood=block_check_checksum(bi->dep[i].target);
5925     }
5926     }
5927     return isgood;
5928     }
5929    
5930     static void check_checksum(void)
5931     {
5932     blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5933     uae_u32 cl=cacheline(regs.pc_p);
5934     blockinfo* bi2=get_blockinfo(cl);
5935    
5936     /* These are not the droids you are looking for... */
5937     if (!bi) {
5938     /* Whoever is the primary target is in a dormant state, but
5939     calling it was accidental, and we should just compile this
5940     new block */
5941     execute_normal();
5942     return;
5943     }
5944     if (bi!=bi2) {
5945     /* The block was hit accidentally, but it does exist. Cache miss */
5946     cache_miss();
5947     return;
5948     }
5949    
5950     if (!block_check_checksum(bi))
5951     execute_normal();
5952     }
5953    
5954     static __inline__ void match_states(blockinfo* bi)
5955     {
5956     int i;
5957     smallstate* s=&(bi->env);
5958    
5959     if (bi->status==BI_NEED_CHECK) {
5960     block_check_checksum(bi);
5961     }
5962     if (bi->status==BI_ACTIVE ||
5963     bi->status==BI_FINALIZING) { /* Deal with the *promises* the
5964     block makes (about not using
5965     certain vregs) */
5966     for (i=0;i<16;i++) {
5967     if (s->virt[i]==L_UNNEEDED) {
5968     // write_log("unneeded reg %d at %p\n",i,target);
5969     COMPCALL(forget_about)(i); // FIXME
5970     }
5971     }
5972     }
5973     flush(1);
5974    
5975     /* And now deal with the *demands* the block makes */
5976     for (i=0;i<N_REGS;i++) {
5977     int v=s->nat[i];
5978     if (v>=0) {
5979     // printf("Loading reg %d into %d at %p\n",v,i,target);
5980     readreg_specific(v,4,i);
5981     // do_load_reg(i,v);
5982     // setlock(i);
5983     }
5984     }
5985     for (i=0;i<N_REGS;i++) {
5986     int v=s->nat[i];
5987     if (v>=0) {
5988     unlock2(i);
5989     }
5990     }
5991     }
5992    
5993     static __inline__ void create_popalls(void)
5994     {
5995     int i,r;
5996    
5997 gbeauche 1.24 if ((popallspace = alloc_code(POPALLSPACE_SIZE)) == NULL) {
5998     write_log("FATAL: Could not allocate popallspace!\n");
5999     abort();
6000     }
6001     vm_protect(popallspace, POPALLSPACE_SIZE, VM_PAGE_READ | VM_PAGE_WRITE);
6002    
6003 gbeauche 1.35 int stack_space = STACK_OFFSET;
6004     for (i=0;i<N_REGS;i++) {
6005     if (need_to_preserve[i])
6006     stack_space += sizeof(void *);
6007     }
6008     stack_space %= STACK_ALIGN;
6009     if (stack_space)
6010     stack_space = STACK_ALIGN - stack_space;
6011    
6012 gbeauche 1.1 current_compile_p=popallspace;
6013     set_target(current_compile_p);
6014     #if USE_PUSH_POP
6015     /* If we can't use gcc inline assembly, we need to pop some
6016     registers before jumping back to the various get-out routines.
6017     This generates the code for it.
6018     */
6019 gbeauche 1.5 align_target(align_jumps);
6020     popall_do_nothing=get_target();
6021 gbeauche 1.35 raw_inc_sp(stack_space);
6022 gbeauche 1.1 for (i=0;i<N_REGS;i++) {
6023     if (need_to_preserve[i])
6024     raw_pop_l_r(i);
6025     }
6026 gbeauche 1.24 raw_jmp((uintptr)do_nothing);
6027 gbeauche 1.1
6028 gbeauche 1.5 align_target(align_jumps);
6029 gbeauche 1.1 popall_execute_normal=get_target();
6030 gbeauche 1.35 raw_inc_sp(stack_space);
6031 gbeauche 1.1 for (i=0;i<N_REGS;i++) {
6032     if (need_to_preserve[i])
6033     raw_pop_l_r(i);
6034     }
6035 gbeauche 1.24 raw_jmp((uintptr)execute_normal);
6036 gbeauche 1.1
6037 gbeauche 1.5 align_target(align_jumps);
6038 gbeauche 1.1 popall_cache_miss=get_target();
6039 gbeauche 1.35 raw_inc_sp(stack_space);
6040 gbeauche 1.1 for (i=0;i<N_REGS;i++) {
6041     if (need_to_preserve[i])
6042     raw_pop_l_r(i);
6043     }
6044 gbeauche 1.24 raw_jmp((uintptr)cache_miss);
6045 gbeauche 1.1
6046 gbeauche 1.5 align_target(align_jumps);
6047 gbeauche 1.1 popall_recompile_block=get_target();
6048 gbeauche 1.35 raw_inc_sp(stack_space);
6049 gbeauche 1.1 for (i=0;i<N_REGS;i++) {
6050     if (need_to_preserve[i])
6051     raw_pop_l_r(i);
6052     }
6053 gbeauche 1.24 raw_jmp((uintptr)recompile_block);
6054 gbeauche 1.5
6055     align_target(align_jumps);
6056 gbeauche 1.1 popall_exec_nostats=get_target();
6057 gbeauche 1.35 raw_inc_sp(stack_space);
6058 gbeauche 1.1 for (i=0;i<N_REGS;i++) {
6059     if (need_to_preserve[i])
6060     raw_pop_l_r(i);
6061     }
6062 gbeauche 1.24 raw_jmp((uintptr)exec_nostats);
6063 gbeauche 1.5
6064     align_target(align_jumps);
6065 gbeauche 1.1 popall_check_checksum=get_target();
6066 gbeauche 1.35 raw_inc_sp(stack_space);
6067 gbeauche 1.1 for (i=0;i<N_REGS;i++) {
6068     if (need_to_preserve[i])
6069     raw_pop_l_r(i);
6070     }
6071 gbeauche 1.24 raw_jmp((uintptr)check_checksum);
6072 gbeauche 1.5
6073     align_target(align_jumps);
6074 gbeauche 1.1 current_compile_p=get_target();
6075     #else
6076     popall_exec_nostats=(void *)exec_nostats;
6077     popall_execute_normal=(void *)execute_normal;
6078     popall_cache_miss=(void *)cache_miss;
6079     popall_recompile_block=(void *)recompile_block;
6080     popall_do_nothing=(void *)do_nothing;
6081     popall_check_checksum=(void *)check_checksum;
6082     #endif
6083    
6084     /* And now, the code to do the matching pushes and then jump
6085     into a handler routine */
6086     pushall_call_handler=get_target();
6087     #if USE_PUSH_POP
6088     for (i=N_REGS;i--;) {
6089     if (need_to_preserve[i])
6090     raw_push_l_r(i);
6091     }
6092     #endif
6093 gbeauche 1.35 raw_dec_sp(stack_space);
6094 gbeauche 1.1 r=REG_PC_TMP;
6095 gbeauche 1.24 raw_mov_l_rm(r,(uintptr)&regs.pc_p);
6096 gbeauche 1.1 raw_and_l_ri(r,TAGMASK);
6097 gbeauche 1.24 raw_jmp_m_indexed((uintptr)cache_tags,r,SIZEOF_VOID_P);
6098 gbeauche 1.6
6099 gbeauche 1.35 #if ! USE_PUSH_POP
6100 gbeauche 1.6 align_target(align_jumps);
6101 gbeauche 1.35 m68k_do_compile_execute = (void (*)(void))get_target();
6102 gbeauche 1.6 for (i=N_REGS;i--;) {
6103     if (need_to_preserve[i])
6104     raw_push_l_r(i);
6105     }
6106 gbeauche 1.35 raw_dec_sp(stack_space);
6107 gbeauche 1.6 align_target(align_loops);
6108 gbeauche 1.24 uae_u32 dispatch_loop = (uintptr)get_target();
6109 gbeauche 1.6 r=REG_PC_TMP;
6110 gbeauche 1.24 raw_mov_l_rm(r,(uintptr)&regs.pc_p);
6111 gbeauche 1.6 raw_and_l_ri(r,TAGMASK);
6112 gbeauche 1.24 raw_call_m_indexed((uintptr)cache_tags,r,SIZEOF_VOID_P);
6113     raw_cmp_l_mi((uintptr)&regs.spcflags,0);
6114 gbeauche 1.6 raw_jcc_b_oponly(NATIVE_CC_EQ);
6115 gbeauche 1.24 emit_byte(dispatch_loop-((uintptr)get_target()+1));
6116     raw_call((uintptr)m68k_do_specialties);
6117 gbeauche 1.6 raw_test_l_rr(REG_RESULT,REG_RESULT);
6118     raw_jcc_b_oponly(NATIVE_CC_EQ);
6119 gbeauche 1.24 emit_byte(dispatch_loop-((uintptr)get_target()+1));
6120     raw_cmp_b_mi((uintptr)&quit_program,0);
6121 gbeauche 1.6 raw_jcc_b_oponly(NATIVE_CC_EQ);
6122 gbeauche 1.24 emit_byte(dispatch_loop-((uintptr)get_target()+1));
6123 gbeauche 1.35 raw_inc_sp(stack_space);
6124 gbeauche 1.6 for (i=0;i<N_REGS;i++) {
6125     if (need_to_preserve[i])
6126     raw_pop_l_r(i);
6127     }
6128     raw_ret();
6129     #endif
6130 gbeauche 1.24
6131     // no need to further write into popallspace
6132     vm_protect(popallspace, POPALLSPACE_SIZE, VM_PAGE_READ | VM_PAGE_EXECUTE);
6133 gbeauche 1.1 }
6134    
6135     static __inline__ void reset_lists(void)
6136     {
6137     int i;
6138    
6139     for (i=0;i<MAX_HOLD_BI;i++)
6140     hold_bi[i]=NULL;
6141     active=NULL;
6142     dormant=NULL;
6143     }
6144    
6145     static void prepare_block(blockinfo* bi)
6146     {
6147     int i;
6148    
6149     set_target(current_compile_p);
6150 gbeauche 1.5 align_target(align_jumps);
6151 gbeauche 1.1 bi->direct_pen=(cpuop_func *)get_target();
6152 gbeauche 1.24 raw_mov_l_rm(0,(uintptr)&(bi->pc_p));
6153     raw_mov_l_mr((uintptr)&regs.pc_p,0);
6154     raw_jmp((uintptr)popall_execute_normal);
6155 gbeauche 1.1
6156 gbeauche 1.5 align_target(align_jumps);
6157 gbeauche 1.1 bi->direct_pcc=(cpuop_func *)get_target();
6158 gbeauche 1.24 raw_mov_l_rm(0,(uintptr)&(bi->pc_p));
6159     raw_mov_l_mr((uintptr)&regs.pc_p,0);
6160     raw_jmp((uintptr)popall_check_checksum);
6161 gbeauche 1.1 current_compile_p=get_target();
6162    
6163     bi->deplist=NULL;
6164     for (i=0;i<2;i++) {
6165     bi->dep[i].prev_p=NULL;
6166     bi->dep[i].next=NULL;
6167     }
6168     bi->env=default_ss;
6169     bi->status=BI_INVALID;
6170     bi->havestate=0;
6171     //bi->env=empty_ss;
6172     }
6173    
6174 gbeauche 1.21 // OPCODE is in big endian format, use cft_map() beforehand, if needed.
6175     static inline void reset_compop(int opcode)
6176 gbeauche 1.17 {
6177 gbeauche 1.21 compfunctbl[opcode] = NULL;
6178     nfcompfunctbl[opcode] = NULL;
6179     }
6180    
6181     static int read_opcode(const char *p)
6182     {
6183     int opcode = 0;
6184     for (int i = 0; i < 4; i++) {
6185     int op = p[i];
6186     switch (op) {
6187     case '0': case '1': case '2': case '3': case '4':
6188     case '5': case '6': case '7': case '8': case '9':
6189     opcode = (opcode << 4) | (op - '0');
6190     break;
6191     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
6192     opcode = (opcode << 4) | ((op - 'a') + 10);
6193     break;
6194     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
6195     opcode = (opcode << 4) | ((op - 'A') + 10);
6196     break;
6197     default:
6198     return -1;
6199     }
6200     }
6201     return opcode;
6202     }
6203    
6204     static bool merge_blacklist()
6205     {
6206     const char *blacklist = PrefsFindString("jitblacklist");
6207     if (blacklist) {
6208     const char *p = blacklist;
6209     for (;;) {
6210     if (*p == 0)
6211     return true;
6212    
6213     int opcode1 = read_opcode(p);
6214     if (opcode1 < 0)
6215     return false;
6216     p += 4;
6217    
6218     int opcode2 = opcode1;
6219     if (*p == '-') {
6220     p++;
6221     opcode2 = read_opcode(p);
6222     if (opcode2 < 0)
6223     return false;
6224     p += 4;
6225     }
6226    
6227     if (*p == 0 || *p == ';') {
6228     write_log("<JIT compiler> : blacklist opcodes : %04x-%04x\n", opcode1, opcode2);
6229     for (int opcode = opcode1; opcode <= opcode2; opcode++)
6230     reset_compop(cft_map(opcode));
6231    
6232     if (*p++ == ';')
6233     continue;
6234    
6235     return true;
6236     }
6237    
6238     return false;
6239     }
6240     }
6241     return true;
6242 gbeauche 1.17 }
6243    
6244 gbeauche 1.1 void build_comp(void)
6245     {
6246     int i;
6247     int jumpcount=0;
6248     unsigned long opcode;
6249     struct comptbl* tbl=op_smalltbl_0_comp_ff;
6250     struct comptbl* nftbl=op_smalltbl_0_comp_nf;
6251     int count;
6252     int cpu_level = 0; // 68000 (default)
6253     if (CPUType == 4)
6254     cpu_level = 4; // 68040 with FPU
6255     else {
6256     if (FPUType)
6257     cpu_level = 3; // 68020 with FPU
6258     else if (CPUType >= 2)
6259     cpu_level = 2; // 68020
6260     else if (CPUType == 1)
6261     cpu_level = 1;
6262     }
6263     struct cputbl *nfctbl = (
6264     cpu_level == 4 ? op_smalltbl_0_nf
6265     : cpu_level == 3 ? op_smalltbl_1_nf
6266     : cpu_level == 2 ? op_smalltbl_2_nf
6267     : cpu_level == 1 ? op_smalltbl_3_nf
6268     : op_smalltbl_4_nf);
6269    
6270     write_log ("<JIT compiler> : building compiler function tables\n");
6271    
6272     for (opcode = 0; opcode < 65536; opcode++) {
6273 gbeauche 1.21 reset_compop(opcode);
6274 gbeauche 1.1 nfcpufunctbl[opcode] = op_illg_1;
6275     prop[opcode].use_flags = 0x1f;
6276     prop[opcode].set_flags = 0x1f;
6277     prop[opcode].cflow = fl_trap; // ILLEGAL instructions do trap
6278     }
6279    
6280     for (i = 0; tbl[i].opcode < 65536; i++) {
6281     int cflow = table68k[tbl[i].opcode].cflow;
6282 gbeauche 1.33 if (follow_const_jumps && (tbl[i].specific & 16))
6283 gbeauche 1.10 cflow = fl_const_jump;
6284 gbeauche 1.8 else
6285 gbeauche 1.10 cflow &= ~fl_const_jump;
6286     prop[cft_map(tbl[i].opcode)].cflow = cflow;
6287 gbeauche 1.1
6288     int uses_fpu = tbl[i].specific & 32;
6289 gbeauche 1.21 if (uses_fpu && avoid_fpu)
6290 gbeauche 1.1 compfunctbl[cft_map(tbl[i].opcode)] = NULL;
6291     else
6292     compfunctbl[cft_map(tbl[i].opcode)] = tbl[i].handler;
6293     }
6294 gbeauche 1.8
6295 gbeauche 1.1 for (i = 0; nftbl[i].opcode < 65536; i++) {
6296     int uses_fpu = tbl[i].specific & 32;
6297 gbeauche 1.21 if (uses_fpu && avoid_fpu)
6298 gbeauche 1.1 nfcompfunctbl[cft_map(nftbl[i].opcode)] = NULL;
6299     else
6300     nfcompfunctbl[cft_map(nftbl[i].opcode)] = nftbl[i].handler;
6301    
6302     nfcpufunctbl[cft_map(nftbl[i].opcode)] = nfctbl[i].handler;
6303     }
6304    
6305     for (i = 0; nfctbl[i].handler; i++) {
6306     nfcpufunctbl[cft_map(nfctbl[i].opcode)] = nfctbl[i].handler;
6307     }
6308    
6309     for (opcode = 0; opcode < 65536; opcode++) {
6310     compop_func *f;
6311     compop_func *nff;
6312     cpuop_func *nfcf;
6313     int isaddx,cflow;
6314    
6315     if (table68k[opcode].mnemo == i_ILLG || table68k[opcode].clev > cpu_level)
6316     continue;
6317    
6318     if (table68k[opcode].handler != -1) {
6319     f = compfunctbl[cft_map(table68k[opcode].handler)];
6320     nff = nfcompfunctbl[cft_map(table68k[opcode].handler)];
6321     nfcf = nfcpufunctbl[cft_map(table68k[opcode].handler)];
6322     cflow = prop[cft_map(table68k[opcode].handler)].cflow;
6323     isaddx = prop[cft_map(table68k[opcode].handler)].is_addx;
6324     prop[cft_map(opcode)].cflow = cflow;
6325     prop[cft_map(opcode)].is_addx = isaddx;
6326     compfunctbl[cft_map(opcode)] = f;
6327     nfcompfunctbl[cft_map(opcode)] = nff;
6328     Dif (nfcf == op_illg_1)
6329     abort();
6330     nfcpufunctbl[cft_map(opcode)] = nfcf;
6331     }
6332     prop[cft_map(opcode)].set_flags = table68k[opcode].flagdead;
6333     prop[cft_map(opcode)].use_flags = table68k[opcode].flaglive;
6334 gbeauche 1.33 /* Unconditional jumps don't evaluate condition codes, so they
6335     * don't actually use any flags themselves */
6336     if (prop[cft_map(opcode)].cflow & fl_const_jump)
6337     prop[cft_map(opcode)].use_flags = 0;
6338 gbeauche 1.1 }
6339     for (i = 0; nfctbl[i].handler != NULL; i++) {
6340     if (nfctbl[i].specific)
6341     nfcpufunctbl[cft_map(tbl[i].opcode)] = nfctbl[i].handler;
6342     }
6343 gbeauche 1.21
6344     /* Merge in blacklist */
6345     if (!merge_blacklist())
6346     write_log("<JIT compiler> : blacklist merge failure!\n");
6347 gbeauche 1.1
6348     count=0;
6349     for (opcode = 0; opcode < 65536; opcode++) {
6350     if (compfunctbl[cft_map(opcode)])
6351     count++;
6352     }
6353     write_log("<JIT compiler> : supposedly %d compileable opcodes!\n",count);
6354    
6355     /* Initialise state */
6356     create_popalls();
6357     alloc_cache();
6358     reset_lists();
6359    
6360     for (i=0;i<TAGSIZE;i+=2) {
6361     cache_tags[i].handler=(cpuop_func *)popall_execute_normal;
6362     cache_tags[i+1].bi=NULL;
6363     }
6364    
6365     #if 0
6366     for (i=0;i<N_REGS;i++) {
6367     empty_ss.nat[i].holds=-1;
6368     empty_ss.nat[i].validsize=0;
6369     empty_ss.nat[i].dirtysize=0;
6370     }
6371     #endif
6372     for (i=0;i<VREGS;i++) {
6373     empty_ss.virt[i]=L_NEEDED;
6374     }
6375     for (i=0;i<N_REGS;i++) {
6376     empty_ss.nat[i]=L_UNKNOWN;
6377     }
6378     default_ss=empty_ss;
6379     }
6380    
6381    
6382     static void flush_icache_none(int n)
6383     {
6384     /* Nothing to do. */
6385     }
6386    
6387     static void flush_icache_hard(int n)
6388     {
6389     uae_u32 i;
6390     blockinfo* bi, *dbi;
6391    
6392     hard_flush_count++;
6393     #if 0
6394     write_log("Flush Icache_hard(%d/%x/%p), %u KB\n",
6395     n,regs.pc,regs.pc_p,current_cache_size/1024);
6396     current_cache_size = 0;
6397     #endif
6398     bi=active;
6399     while(bi) {
6400     cache_tags[cacheline(bi->pc_p)].handler=(cpuop_func *)popall_execute_normal;
6401     cache_tags[cacheline(bi->pc_p)+1].bi=NULL;
6402     dbi=bi; bi=bi->next;
6403     free_blockinfo(dbi);
6404     }
6405     bi=dormant;
6406     while(bi) {
6407     cache_tags[cacheline(bi->pc_p)].handler=(cpuop_func *)popall_execute_normal;
6408     cache_tags[cacheline(bi->pc_p)+1].bi=NULL;
6409     dbi=bi; bi=bi->next;
6410     free_blockinfo(dbi);
6411     }
6412    
6413     reset_lists();
6414     if (!compiled_code)
6415     return;
6416     current_compile_p=compiled_code;
6417     SPCFLAGS_SET( SPCFLAG_JIT_EXEC_RETURN ); /* To get out of compiled code */
6418     }
6419    
6420    
6421     /* "Soft flushing" --- instead of actually throwing everything away,
6422     we simply mark everything as "needs to be checked".
6423     */
6424    
6425     static inline void flush_icache_lazy(int n)
6426     {
6427     uae_u32 i;
6428     blockinfo* bi;
6429     blockinfo* bi2;
6430    
6431     soft_flush_count++;
6432     if (!active)
6433     return;
6434    
6435     bi=active;
6436     while (bi) {
6437     uae_u32 cl=cacheline(bi->pc_p);
6438     if (bi->status==BI_INVALID ||
6439     bi->status==BI_NEED_RECOMP) {
6440     if (bi==cache_tags[cl+1].bi)
6441     cache_tags[cl].handler=(cpuop_func *)popall_execute_normal;
6442     bi->handler_to_use=(cpuop_func *)popall_execute_normal;
6443     set_dhtu(bi,bi->direct_pen);
6444     bi->status=BI_INVALID;
6445     }
6446     else {
6447     if (bi==cache_tags[cl+1].bi)
6448     cache_tags[cl].handler=(cpuop_func *)popall_check_checksum;
6449     bi->handler_to_use=(cpuop_func *)popall_check_checksum;
6450     set_dhtu(bi,bi->direct_pcc);
6451     bi->status=BI_NEED_CHECK;
6452     }
6453     bi2=bi;
6454     bi=bi->next;
6455     }
6456     /* bi2 is now the last entry in the active list */
6457     bi2->next=dormant;
6458     if (dormant)
6459     dormant->prev_p=&(bi2->next);
6460    
6461     dormant=active;
6462     active->prev_p=&dormant;
6463     active=NULL;
6464 gbeauche 1.22 }
6465    
6466     void flush_icache_range(uae_u32 start, uae_u32 length)
6467     {
6468     if (!active)
6469     return;
6470    
6471     #if LAZY_FLUSH_ICACHE_RANGE
6472     uae_u8 *start_p = get_real_address(start);
6473     blockinfo *bi = active;
6474     while (bi) {
6475     #if USE_CHECKSUM_INFO
6476     bool invalidate = false;
6477     for (checksum_info *csi = bi->csi; csi && !invalidate; csi = csi->next)
6478     invalidate = (((start_p - csi->start_p) < csi->length) ||
6479     ((csi->start_p - start_p) < length));
6480     #else
6481     // Assume system is consistent and would invalidate the right range
6482     const bool invalidate = (bi->pc_p - start_p) < length;
6483     #endif
6484     if (invalidate) {
6485     uae_u32 cl = cacheline(bi->pc_p);
6486     if (bi == cache_tags[cl + 1].bi)
6487     cache_tags[cl].handler = (cpuop_func *)popall_execute_normal;
6488     bi->handler_to_use = (cpuop_func *)popall_execute_normal;
6489     set_dhtu(bi, bi->direct_pen);
6490     bi->status = BI_NEED_RECOMP;
6491     }
6492     bi = bi->next;
6493     }
6494     return;
6495     #endif
6496     flush_icache(-1);
6497 gbeauche 1.1 }
6498    
6499     static void catastrophe(void)
6500     {
6501     abort();
6502     }
6503    
6504     int failure;
6505    
6506     #define TARGET_M68K 0
6507     #define TARGET_POWERPC 1
6508     #define TARGET_X86 2
6509 gbeauche 1.24 #define TARGET_X86_64 3
6510 gbeauche 1.1 #if defined(i386) || defined(__i386__)
6511     #define TARGET_NATIVE TARGET_X86
6512     #endif
6513     #if defined(powerpc) || defined(__powerpc__)
6514     #define TARGET_NATIVE TARGET_POWERPC
6515     #endif
6516 gbeauche 1.24 #if defined(x86_64) || defined(__x86_64__)
6517     #define TARGET_NATIVE TARGET_X86_64
6518     #endif
6519 gbeauche 1.1
6520     #ifdef ENABLE_MON
6521 gbeauche 1.24 static uae_u32 mon_read_byte_jit(uintptr addr)
6522 gbeauche 1.1 {
6523     uae_u8 *m = (uae_u8 *)addr;
6524 gbeauche 1.24 return (uintptr)(*m);
6525 gbeauche 1.1 }
6526    
6527 gbeauche 1.24 static void mon_write_byte_jit(uintptr addr, uae_u32 b)
6528 gbeauche 1.1 {
6529     uae_u8 *m = (uae_u8 *)addr;
6530     *m = b;
6531     }
6532     #endif
6533    
6534     void disasm_block(int target, uint8 * start, size_t length)
6535     {
6536     if (!JITDebug)
6537     return;
6538    
6539     #if defined(JIT_DEBUG) && defined(ENABLE_MON)
6540     char disasm_str[200];
6541     sprintf(disasm_str, "%s $%x $%x",
6542     target == TARGET_M68K ? "d68" :
6543     target == TARGET_X86 ? "d86" :
6544 gbeauche 1.24 target == TARGET_X86_64 ? "d8664" :
6545 gbeauche 1.1 target == TARGET_POWERPC ? "d" : "x",
6546     start, start + length - 1);
6547    
6548 gbeauche 1.24 uae_u32 (*old_mon_read_byte)(uintptr) = mon_read_byte;
6549     void (*old_mon_write_byte)(uintptr, uae_u32) = mon_write_byte;
6550 gbeauche 1.1
6551     mon_read_byte = mon_read_byte_jit;
6552     mon_write_byte = mon_write_byte_jit;
6553    
6554     char *arg[5] = {"mon", "-m", "-r", disasm_str, NULL};
6555     mon(4, arg);
6556    
6557     mon_read_byte = old_mon_read_byte;
6558     mon_write_byte = old_mon_write_byte;
6559     #endif
6560     }
6561    
6562 gbeauche 1.24 static void disasm_native_block(uint8 *start, size_t length)
6563 gbeauche 1.1 {
6564     disasm_block(TARGET_NATIVE, start, length);
6565     }
6566    
6567 gbeauche 1.24 static void disasm_m68k_block(uint8 *start, size_t length)
6568 gbeauche 1.1 {
6569     disasm_block(TARGET_M68K, start, length);
6570     }
6571    
6572     #ifdef HAVE_GET_WORD_UNSWAPPED
6573     # define DO_GET_OPCODE(a) (do_get_mem_word_unswapped((uae_u16 *)(a)))
6574     #else
6575     # define DO_GET_OPCODE(a) (do_get_mem_word((uae_u16 *)(a)))
6576     #endif
6577    
6578     #if JIT_DEBUG
6579     static uae_u8 *last_regs_pc_p = 0;
6580     static uae_u8 *last_compiled_block_addr = 0;
6581    
6582     void compiler_dumpstate(void)
6583     {
6584     if (!JITDebug)
6585     return;
6586    
6587     write_log("### Host addresses\n");
6588     write_log("MEM_BASE : %x\n", MEMBaseDiff);
6589     write_log("PC_P : %p\n", &regs.pc_p);
6590     write_log("SPCFLAGS : %p\n", &regs.spcflags);
6591     write_log("D0-D7 : %p-%p\n", &regs.regs[0], &regs.regs[7]);
6592     write_log("A0-A7 : %p-%p\n", &regs.regs[8], &regs.regs[15]);
6593     write_log("\n");
6594    
6595     write_log("### M68k processor state\n");
6596     m68k_dumpstate(0);
6597     write_log("\n");
6598    
6599     write_log("### Block in Mac address space\n");
6600     write_log("M68K block : %p\n",
6601 gbeauche 1.24 (void *)(uintptr)get_virtual_address(last_regs_pc_p));
6602 gbeauche 1.1 write_log("Native block : %p (%d bytes)\n",
6603 gbeauche 1.24 (void *)(uintptr)get_virtual_address(last_compiled_block_addr),
6604 gbeauche 1.1 get_blockinfo_addr(last_regs_pc_p)->direct_handler_size);
6605     write_log("\n");
6606     }
6607     #endif
6608    
6609     static void compile_block(cpu_history* pc_hist, int blocklen)
6610     {
6611     if (letit && compiled_code) {
6612     #if PROFILE_COMPILE_TIME
6613     compile_count++;
6614     clock_t start_time = clock();
6615     #endif
6616     #if JIT_DEBUG
6617     bool disasm_block = false;
6618     #endif
6619    
6620     /* OK, here we need to 'compile' a block */
6621     int i;
6622     int r;
6623     int was_comp=0;
6624     uae_u8 liveflags[MAXRUN+1];
6625 gbeauche 1.8 #if USE_CHECKSUM_INFO
6626     bool trace_in_rom = isinrom((uintptr)pc_hist[0].location);
6627 gbeauche 1.24 uintptr max_pcp=(uintptr)pc_hist[blocklen - 1].location;
6628     uintptr min_pcp=max_pcp;
6629 gbeauche 1.8 #else
6630 gbeauche 1.24 uintptr max_pcp=(uintptr)pc_hist[0].location;
6631     uintptr min_pcp=max_pcp;
6632 gbeauche 1.8 #endif
6633 gbeauche 1.1 uae_u32 cl=cacheline(pc_hist[0].location);
6634     void* specflags=(void*)&regs.spcflags;
6635     blockinfo* bi=NULL;
6636     blockinfo* bi2;
6637     int extra_len=0;
6638    
6639     redo_current_block=0;
6640     if (current_compile_p>=max_compile_start)
6641     flush_icache_hard(7);
6642    
6643     alloc_blockinfos();
6644    
6645     bi=get_blockinfo_addr_new(pc_hist[0].location,0);
6646     bi2=get_blockinfo(cl);
6647    
6648     optlev=bi->optlevel;
6649     if (bi->status!=BI_INVALID) {
6650     Dif (bi!=bi2) {
6651     /* I don't think it can happen anymore. Shouldn't, in
6652     any case. So let's make sure... */
6653     write_log("WOOOWOO count=%d, ol=%d %p %p\n",
6654     bi->count,bi->optlevel,bi->handler_to_use,
6655     cache_tags[cl].handler);
6656     abort();
6657     }
6658    
6659     Dif (bi->count!=-1 && bi->status!=BI_NEED_RECOMP) {
6660     write_log("bi->count=%d, bi->status=%d\n",bi->count,bi->status);
6661     /* What the heck? We are not supposed to be here! */
6662     abort();
6663     }
6664     }
6665     if (bi->count==-1) {
6666     optlev++;
6667     while (!optcount[optlev])
6668     optlev++;
6669     bi->count=optcount[optlev]-1;
6670     }
6671 gbeauche 1.24 current_block_pc_p=(uintptr)pc_hist[0].location;
6672 gbeauche 1.1
6673     remove_deps(bi); /* We are about to create new code */
6674     bi->optlevel=optlev;
6675     bi->pc_p=(uae_u8*)pc_hist[0].location;
6676 gbeauche 1.8 #if USE_CHECKSUM_INFO
6677     free_checksum_info_chain(bi->csi);
6678     bi->csi = NULL;
6679     #endif
6680 gbeauche 1.1
6681     liveflags[blocklen]=0x1f; /* All flags needed afterwards */
6682     i=blocklen;
6683     while (i--) {
6684     uae_u16* currpcp=pc_hist[i].location;
6685     uae_u32 op=DO_GET_OPCODE(currpcp);
6686    
6687 gbeauche 1.8 #if USE_CHECKSUM_INFO
6688     trace_in_rom = trace_in_rom && isinrom((uintptr)currpcp);
6689 gbeauche 1.33 if (follow_const_jumps && is_const_jump(op)) {
6690 gbeauche 1.8 checksum_info *csi = alloc_checksum_info();
6691     csi->start_p = (uae_u8 *)min_pcp;
6692     csi->length = max_pcp - min_pcp + LONGEST_68K_INST;
6693     csi->next = bi->csi;
6694     bi->csi = csi;
6695 gbeauche 1.24 max_pcp = (uintptr)currpcp;
6696 gbeauche 1.8 }
6697 gbeauche 1.24 min_pcp = (uintptr)currpcp;
6698 gbeauche 1.8 #else
6699 gbeauche 1.24 if ((uintptr)currpcp<min_pcp)
6700     min_pcp=(uintptr)currpcp;
6701     if ((uintptr)currpcp>max_pcp)
6702     max_pcp=(uintptr)currpcp;
6703 gbeauche 1.8 #endif
6704 gbeauche 1.1
6705     liveflags[i]=((liveflags[i+1]&
6706     (~prop[op].set_flags))|
6707     prop[op].use_flags);
6708     if (prop[op].is_addx && (liveflags[i+1]&FLAG_Z)==0)
6709     liveflags[i]&= ~FLAG_Z;
6710     }
6711    
6712 gbeauche 1.8 #if USE_CHECKSUM_INFO
6713     checksum_info *csi = alloc_checksum_info();
6714     csi->start_p = (uae_u8 *)min_pcp;
6715     csi->length = max_pcp - min_pcp + LONGEST_68K_INST;
6716     csi->next = bi->csi;
6717     bi->csi = csi;
6718     #endif
6719    
6720 gbeauche 1.1 bi->needed_flags=liveflags[0];
6721    
6722 gbeauche 1.5 align_target(align_loops);
6723 gbeauche 1.1 was_comp=0;
6724    
6725     bi->direct_handler=(cpuop_func *)get_target();
6726     set_dhtu(bi,bi->direct_handler);
6727     bi->status=BI_COMPILING;
6728 gbeauche 1.24 current_block_start_target=(uintptr)get_target();
6729 gbeauche 1.1
6730     log_startblock();
6731    
6732     if (bi->count>=0) { /* Need to generate countdown code */
6733 gbeauche 1.24 raw_mov_l_mi((uintptr)&regs.pc_p,(uintptr)pc_hist[0].location);
6734     raw_sub_l_mi((uintptr)&(bi->count),1);
6735     raw_jl((uintptr)popall_recompile_block);
6736 gbeauche 1.1 }
6737     if (optlev==0) { /* No need to actually translate */
6738     /* Execute normally without keeping stats */
6739 gbeauche 1.24 raw_mov_l_mi((uintptr)&regs.pc_p,(uintptr)pc_hist[0].location);
6740     raw_jmp((uintptr)popall_exec_nostats);
6741 gbeauche 1.1 }
6742     else {
6743     reg_alloc_run=0;
6744     next_pc_p=0;
6745     taken_pc_p=0;
6746     branch_cc=0;
6747    
6748     comp_pc_p=(uae_u8*)pc_hist[0].location;
6749     init_comp();
6750     was_comp=1;
6751    
6752 gbeauche 1.34 #ifdef USE_CPU_EMUL_SERVICES
6753     raw_sub_l_mi((uintptr)&emulated_ticks,blocklen);
6754     raw_jcc_b_oponly(NATIVE_CC_GT);
6755     uae_s8 *branchadd=(uae_s8*)get_target();
6756     emit_byte(0);
6757     raw_call((uintptr)cpu_do_check_ticks);
6758     *branchadd=(uintptr)get_target()-((uintptr)branchadd+1);
6759     #endif
6760    
6761 gbeauche 1.1 #if JIT_DEBUG
6762     if (JITDebug) {
6763 gbeauche 1.24 raw_mov_l_mi((uintptr)&last_regs_pc_p,(uintptr)pc_hist[0].location);
6764     raw_mov_l_mi((uintptr)&last_compiled_block_addr,current_block_start_target);
6765 gbeauche 1.1 }
6766     #endif
6767    
6768     for (i=0;i<blocklen &&
6769     get_target_noopt()<max_compile_start;i++) {
6770     cpuop_func **cputbl;
6771     compop_func **comptbl;
6772     uae_u32 opcode=DO_GET_OPCODE(pc_hist[i].location);
6773     needed_flags=(liveflags[i+1] & prop[opcode].set_flags);
6774     if (!needed_flags) {
6775     cputbl=nfcpufunctbl;
6776     comptbl=nfcompfunctbl;
6777     }
6778     else {
6779     cputbl=cpufunctbl;
6780     comptbl=compfunctbl;
6781     }
6782 gbeauche 1.32
6783     #if FLIGHT_RECORDER
6784     {
6785     mov_l_ri(S1, get_virtual_address((uae_u8 *)(pc_hist[i].location)) | 1);
6786     clobber_flags();
6787     remove_all_offsets();
6788     int arg = readreg_specific(S1,4,REG_PAR1);
6789     prepare_for_call_1();
6790     unlock2(arg);
6791     prepare_for_call_2();
6792     raw_call((uintptr)m68k_record_step);
6793     }
6794     #endif
6795 gbeauche 1.1
6796     failure = 1; // gb-- defaults to failure state
6797     if (comptbl[opcode] && optlev>1) {
6798     failure=0;
6799     if (!was_comp) {
6800     comp_pc_p=(uae_u8*)pc_hist[i].location;
6801     init_comp();
6802     }
6803 gbeauche 1.18 was_comp=1;
6804 gbeauche 1.1
6805     comptbl[opcode](opcode);
6806     freescratch();
6807     if (!(liveflags[i+1] & FLAG_CZNV)) {
6808     /* We can forget about flags */
6809     dont_care_flags();
6810     }
6811     #if INDIVIDUAL_INST
6812     flush(1);
6813     nop();
6814     flush(1);
6815     was_comp=0;
6816     #endif
6817     }
6818    
6819     if (failure) {
6820     if (was_comp) {
6821     flush(1);
6822     was_comp=0;
6823     }
6824     raw_mov_l_ri(REG_PAR1,(uae_u32)opcode);
6825     #if USE_NORMAL_CALLING_CONVENTION
6826     raw_push_l_r(REG_PAR1);
6827     #endif
6828 gbeauche 1.24 raw_mov_l_mi((uintptr)&regs.pc_p,
6829     (uintptr)pc_hist[i].location);
6830     raw_call((uintptr)cputbl[opcode]);
6831 gbeauche 1.9 #if PROFILE_UNTRANSLATED_INSNS
6832     // raw_cputbl_count[] is indexed with plain opcode (in m68k order)
6833 gbeauche 1.24 raw_add_l_mi((uintptr)&raw_cputbl_count[cft_map(opcode)],1);
6834 gbeauche 1.9 #endif
6835 gbeauche 1.1 #if USE_NORMAL_CALLING_CONVENTION
6836     raw_inc_sp(4);
6837     #endif
6838    
6839     if (i < blocklen - 1) {
6840     uae_s8* branchadd;
6841    
6842 gbeauche 1.24 raw_mov_l_rm(0,(uintptr)specflags);
6843 gbeauche 1.1 raw_test_l_rr(0,0);
6844     raw_jz_b_oponly();
6845     branchadd=(uae_s8 *)get_target();
6846     emit_byte(0);
6847 gbeauche 1.24 raw_jmp((uintptr)popall_do_nothing);
6848     *branchadd=(uintptr)get_target()-(uintptr)branchadd-1;
6849 gbeauche 1.1 }
6850     }
6851     }
6852     #if 1 /* This isn't completely kosher yet; It really needs to be
6853     be integrated into a general inter-block-dependency scheme */
6854     if (next_pc_p && taken_pc_p &&
6855     was_comp && taken_pc_p==current_block_pc_p) {
6856     blockinfo* bi1=get_blockinfo_addr_new((void*)next_pc_p,0);
6857     blockinfo* bi2=get_blockinfo_addr_new((void*)taken_pc_p,0);
6858     uae_u8 x=bi1->needed_flags;
6859    
6860     if (x==0xff || 1) { /* To be on the safe side */
6861     uae_u16* next=(uae_u16*)next_pc_p;
6862     uae_u32 op=DO_GET_OPCODE(next);
6863    
6864     x=0x1f;
6865     x&=(~prop[op].set_flags);
6866     x|=prop[op].use_flags;
6867     }
6868    
6869     x|=bi2->needed_flags;
6870     if (!(x & FLAG_CZNV)) {
6871     /* We can forget about flags */
6872     dont_care_flags();
6873     extra_len+=2; /* The next instruction now is part of this
6874     block */
6875     }
6876    
6877     }
6878     #endif
6879     log_flush();
6880    
6881     if (next_pc_p) { /* A branch was registered */
6882 gbeauche 1.24 uintptr t1=next_pc_p;
6883     uintptr t2=taken_pc_p;
6884 gbeauche 1.1 int cc=branch_cc;
6885    
6886     uae_u32* branchadd;
6887     uae_u32* tba;
6888     bigstate tmp;
6889     blockinfo* tbi;
6890    
6891     if (taken_pc_p<next_pc_p) {
6892     /* backward branch. Optimize for the "taken" case ---
6893     which means the raw_jcc should fall through when
6894     the 68k branch is taken. */
6895     t1=taken_pc_p;
6896     t2=next_pc_p;
6897     cc=branch_cc^1;
6898     }
6899    
6900     tmp=live; /* ouch! This is big... */
6901     raw_jcc_l_oponly(cc);
6902     branchadd=(uae_u32*)get_target();
6903     emit_long(0);
6904    
6905     /* predicted outcome */
6906     tbi=get_blockinfo_addr_new((void*)t1,1);
6907     match_states(tbi);
6908 gbeauche 1.24 raw_cmp_l_mi((uintptr)specflags,0);
6909 gbeauche 1.1 raw_jcc_l_oponly(4);
6910     tba=(uae_u32*)get_target();
6911 gbeauche 1.24 emit_long(get_handler(t1)-((uintptr)tba+4));
6912     raw_mov_l_mi((uintptr)&regs.pc_p,t1);
6913 gbeauche 1.28 flush_reg_count();
6914 gbeauche 1.24 raw_jmp((uintptr)popall_do_nothing);
6915 gbeauche 1.1 create_jmpdep(bi,0,tba,t1);
6916    
6917 gbeauche 1.5 align_target(align_jumps);
6918 gbeauche 1.1 /* not-predicted outcome */
6919 gbeauche 1.24 *branchadd=(uintptr)get_target()-((uintptr)branchadd+4);
6920 gbeauche 1.1 live=tmp; /* Ouch again */
6921     tbi=get_blockinfo_addr_new((void*)t2,1);
6922     match_states(tbi);
6923    
6924     //flush(1); /* Can only get here if was_comp==1 */
6925 gbeauche 1.24 raw_cmp_l_mi((uintptr)specflags,0);
6926 gbeauche 1.1 raw_jcc_l_oponly(4);
6927     tba=(uae_u32*)get_target();
6928 gbeauche 1.24 emit_long(get_handler(t2)-((uintptr)tba+4));
6929     raw_mov_l_mi((uintptr)&regs.pc_p,t2);
6930 gbeauche 1.28 flush_reg_count();
6931 gbeauche 1.24 raw_jmp((uintptr)popall_do_nothing);
6932 gbeauche 1.1 create_jmpdep(bi,1,tba,t2);
6933     }
6934     else
6935     {
6936     if (was_comp) {
6937     flush(1);
6938     }
6939 gbeauche 1.28 flush_reg_count();
6940 gbeauche 1.1
6941     /* Let's find out where next_handler is... */
6942     if (was_comp && isinreg(PC_P)) {
6943     r=live.state[PC_P].realreg;
6944     raw_and_l_ri(r,TAGMASK);
6945     int r2 = (r==0) ? 1 : 0;
6946 gbeauche 1.24 raw_mov_l_ri(r2,(uintptr)popall_do_nothing);
6947     raw_cmp_l_mi((uintptr)specflags,0);
6948 gbeauche 1.27 raw_cmov_l_rm_indexed(r2,(uintptr)cache_tags,r,SIZEOF_VOID_P,NATIVE_CC_EQ);
6949 gbeauche 1.1 raw_jmp_r(r2);
6950     }
6951     else if (was_comp && isconst(PC_P)) {
6952     uae_u32 v=live.state[PC_P].val;
6953     uae_u32* tba;
6954     blockinfo* tbi;
6955    
6956 gbeauche 1.24 tbi=get_blockinfo_addr_new((void*)(uintptr)v,1);
6957 gbeauche 1.1 match_states(tbi);
6958    
6959 gbeauche 1.24 raw_cmp_l_mi((uintptr)specflags,0);
6960 gbeauche 1.1 raw_jcc_l_oponly(4);
6961     tba=(uae_u32*)get_target();
6962 gbeauche 1.24 emit_long(get_handler(v)-((uintptr)tba+4));
6963     raw_mov_l_mi((uintptr)&regs.pc_p,v);
6964     raw_jmp((uintptr)popall_do_nothing);
6965 gbeauche 1.1 create_jmpdep(bi,0,tba,v);
6966     }
6967     else {
6968     r=REG_PC_TMP;
6969 gbeauche 1.24 raw_mov_l_rm(r,(uintptr)&regs.pc_p);
6970 gbeauche 1.1 raw_and_l_ri(r,TAGMASK);
6971     int r2 = (r==0) ? 1 : 0;
6972 gbeauche 1.24 raw_mov_l_ri(r2,(uintptr)popall_do_nothing);
6973     raw_cmp_l_mi((uintptr)specflags,0);
6974 gbeauche 1.27 raw_cmov_l_rm_indexed(r2,(uintptr)cache_tags,r,SIZEOF_VOID_P,NATIVE_CC_EQ);
6975 gbeauche 1.1 raw_jmp_r(r2);
6976     }
6977     }
6978     }
6979    
6980     #if USE_MATCH
6981     if (callers_need_recompile(&live,&(bi->env))) {
6982     mark_callers_recompile(bi);
6983     }
6984    
6985     big_to_small_state(&live,&(bi->env));
6986     #endif
6987    
6988 gbeauche 1.8 #if USE_CHECKSUM_INFO
6989     remove_from_list(bi);
6990     if (trace_in_rom) {
6991     // No need to checksum that block trace on cache invalidation
6992     free_checksum_info_chain(bi->csi);
6993     bi->csi = NULL;
6994     add_to_dormant(bi);
6995     }
6996     else {
6997     calc_checksum(bi,&(bi->c1),&(bi->c2));
6998     add_to_active(bi);
6999     }
7000     #else
7001 gbeauche 1.1 if (next_pc_p+extra_len>=max_pcp &&
7002     next_pc_p+extra_len<max_pcp+LONGEST_68K_INST)
7003     max_pcp=next_pc_p+extra_len; /* extra_len covers flags magic */
7004     else
7005     max_pcp+=LONGEST_68K_INST;
7006 gbeauche 1.7
7007 gbeauche 1.1 bi->len=max_pcp-min_pcp;
7008     bi->min_pcp=min_pcp;
7009 gbeauche 1.7
7010 gbeauche 1.1 remove_from_list(bi);
7011     if (isinrom(min_pcp) && isinrom(max_pcp)) {
7012     add_to_dormant(bi); /* No need to checksum it on cache flush.
7013     Please don't start changing ROMs in
7014     flight! */
7015     }
7016     else {
7017     calc_checksum(bi,&(bi->c1),&(bi->c2));
7018     add_to_active(bi);
7019     }
7020 gbeauche 1.8 #endif
7021 gbeauche 1.1
7022     current_cache_size += get_target() - (uae_u8 *)current_compile_p;
7023    
7024     #if JIT_DEBUG
7025     if (JITDebug)
7026     bi->direct_handler_size = get_target() - (uae_u8 *)current_block_start_target;
7027    
7028     if (JITDebug && disasm_block) {
7029     uaecptr block_addr = start_pc + ((char *)pc_hist[0].location - (char *)start_pc_p);
7030     D(bug("M68K block @ 0x%08x (%d insns)\n", block_addr, blocklen));
7031     uae_u32 block_size = ((uae_u8 *)pc_hist[blocklen - 1].location - (uae_u8 *)pc_hist[0].location) + 1;
7032     disasm_m68k_block((uae_u8 *)pc_hist[0].location, block_size);
7033     D(bug("Compiled block @ 0x%08x\n", pc_hist[0].location));
7034     disasm_native_block((uae_u8 *)current_block_start_target, bi->direct_handler_size);
7035     getchar();
7036     }
7037     #endif
7038    
7039     log_dump();
7040 gbeauche 1.5 align_target(align_jumps);
7041 gbeauche 1.1
7042     /* This is the non-direct handler */
7043     bi->handler=
7044     bi->handler_to_use=(cpuop_func *)get_target();
7045 gbeauche 1.24 raw_cmp_l_mi((uintptr)&regs.pc_p,(uintptr)pc_hist[0].location);
7046     raw_jnz((uintptr)popall_cache_miss);
7047 gbeauche 1.1 comp_pc_p=(uae_u8*)pc_hist[0].location;
7048    
7049     bi->status=BI_FINALIZING;
7050     init_comp();
7051     match_states(bi);
7052     flush(1);
7053    
7054 gbeauche 1.24 raw_jmp((uintptr)bi->direct_handler);
7055 gbeauche 1.1
7056     current_compile_p=get_target();
7057     raise_in_cl_list(bi);
7058    
7059     /* We will flush soon, anyway, so let's do it now */
7060     if (current_compile_p>=max_compile_start)
7061     flush_icache_hard(7);
7062    
7063     bi->status=BI_ACTIVE;
7064     if (redo_current_block)
7065     block_need_recompile(bi);
7066    
7067     #if PROFILE_COMPILE_TIME
7068     compile_time += (clock() - start_time);
7069     #endif
7070     }
7071 gbeauche 1.34
7072     /* Account for compilation time */
7073     cpu_do_check_ticks();
7074 gbeauche 1.1 }
7075    
7076     void do_nothing(void)
7077     {
7078     /* What did you expect this to do? */
7079     }
7080    
7081     void exec_nostats(void)
7082     {
7083     for (;;) {
7084     uae_u32 opcode = GET_OPCODE;
7085 gbeauche 1.32 #if FLIGHT_RECORDER
7086     m68k_record_step(m68k_getpc());
7087     #endif
7088 gbeauche 1.1 (*cpufunctbl[opcode])(opcode);
7089 gbeauche 1.34 cpu_check_ticks();
7090 gbeauche 1.1 if (end_block(opcode) || SPCFLAGS_TEST(SPCFLAG_ALL)) {
7091     return; /* We will deal with the spcflags in the caller */
7092     }
7093     }
7094     }
7095    
7096     void execute_normal(void)
7097     {
7098     if (!check_for_cache_miss()) {
7099     cpu_history pc_hist[MAXRUN];
7100     int blocklen = 0;
7101     #if REAL_ADDRESSING || DIRECT_ADDRESSING
7102     start_pc_p = regs.pc_p;
7103     start_pc = get_virtual_address(regs.pc_p);
7104     #else
7105     start_pc_p = regs.pc_oldp;
7106     start_pc = regs.pc;
7107     #endif
7108     for (;;) { /* Take note: This is the do-it-normal loop */
7109     pc_hist[blocklen++].location = (uae_u16 *)regs.pc_p;
7110     uae_u32 opcode = GET_OPCODE;
7111     #if FLIGHT_RECORDER
7112     m68k_record_step(m68k_getpc());
7113     #endif
7114     (*cpufunctbl[opcode])(opcode);
7115 gbeauche 1.34 cpu_check_ticks();
7116 gbeauche 1.1 if (end_block(opcode) || SPCFLAGS_TEST(SPCFLAG_ALL) || blocklen>=MAXRUN) {
7117     compile_block(pc_hist, blocklen);
7118     return; /* We will deal with the spcflags in the caller */
7119     }
7120     /* No need to check regs.spcflags, because if they were set,
7121     we'd have ended up inside that "if" */
7122     }
7123     }
7124     }
7125    
7126     typedef void (*compiled_handler)(void);
7127    
7128 gbeauche 1.35 #if USE_PUSH_POP
7129 gbeauche 1.1 void m68k_do_compile_execute(void)
7130     {
7131     for (;;) {
7132     ((compiled_handler)(pushall_call_handler))();
7133     /* Whenever we return from that, we should check spcflags */
7134     if (SPCFLAGS_TEST(SPCFLAG_ALL)) {
7135     if (m68k_do_specialties ())
7136     return;
7137     }
7138     }
7139     }
7140 gbeauche 1.6 #endif
7141 gbeauche 1.35
7142     void m68k_compile_execute (void)
7143     {
7144     for (;;) {
7145     if (quit_program)
7146     break;
7147     m68k_do_compile_execute();
7148     }
7149     }