ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/compiler/compemu_support.cpp
Revision: 1.39
Committed: 2007-01-13T18:21:30Z (17 years, 8 months ago) by gbeauche
Branch: MAIN
Changes since 1.38: +0 -13 lines
Log Message:
Remove the 33-bit addressing hack as it's overly complex for not much gain.
Rather, use an address override prefix (0x67) though Intel Core optimization
reference guide says to avoid LCP prefixes. In practise, impact on performance
is measurably marginal on e.g. Speedometer tests.

File Contents

# User Rev Content
1 gbeauche 1.11 /*
2     * compiler/compemu_support.cpp - Core dynamic translation engine
3     *
4     * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
5     *
6 gbeauche 1.29 * Adaptation for Basilisk II and improvements, copyright 2000-2005
7 gbeauche 1.11 * Gwenole Beauchesne
8     *
9 gbeauche 1.29 * Basilisk II (C) 1997-2005 Christian Bauer
10 gbeauche 1.11 *
11     * This program is free software; you can redistribute it and/or modify
12     * it under the terms of the GNU General Public License as published by
13     * the Free Software Foundation; either version 2 of the License, or
14     * (at your option) any later version.
15     *
16     * This program is distributed in the hope that it will be useful,
17     * but WITHOUT ANY WARRANTY; without even the implied warranty of
18     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19     * GNU General Public License for more details.
20     *
21     * You should have received a copy of the GNU General Public License
22     * along with this program; if not, write to the Free Software
23     * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24     */
25    
26 gbeauche 1.1 #if !REAL_ADDRESSING && !DIRECT_ADDRESSING
27     #error "Only Real or Direct Addressing is supported with the JIT Compiler"
28     #endif
29    
30 gbeauche 1.4 #if X86_ASSEMBLY && !SAHF_SETO_PROFITABLE
31     #error "Only [LS]AHF scheme to [gs]et flags is supported with the JIT Compiler"
32     #endif
33    
34 gbeauche 1.24 /* NOTE: support for AMD64 assumes translation cache and other code
35     * buffers are allocated into a 32-bit address space because (i) B2/JIT
36     * code is not 64-bit clean and (ii) it's faster to resolve branches
37     * that way.
38     */
39     #if !defined(__i386__) && !defined(__x86_64__)
40     #error "Only IA-32 and X86-64 targets are supported with the JIT Compiler"
41     #endif
42    
43 gbeauche 1.1 #define USE_MATCH 0
44    
45     /* kludge for Brian, so he can compile under MSVC++ */
46     #define USE_NORMAL_CALLING_CONVENTION 0
47    
48     #ifndef WIN32
49 gbeauche 1.20 #include <unistd.h>
50 gbeauche 1.1 #include <sys/types.h>
51     #include <sys/mman.h>
52     #endif
53    
54     #include <stdlib.h>
55     #include <fcntl.h>
56     #include <errno.h>
57    
58     #include "sysdeps.h"
59     #include "cpu_emulation.h"
60     #include "main.h"
61     #include "prefs.h"
62     #include "user_strings.h"
63 gbeauche 1.2 #include "vm_alloc.h"
64 gbeauche 1.1
65     #include "m68k.h"
66     #include "memory.h"
67     #include "readcpu.h"
68     #include "newcpu.h"
69     #include "comptbl.h"
70     #include "compiler/compemu.h"
71     #include "fpu/fpu.h"
72     #include "fpu/flags.h"
73    
74     #define DEBUG 1
75     #include "debug.h"
76    
77     #ifdef ENABLE_MON
78     #include "mon.h"
79     #endif
80    
81     #ifndef WIN32
82 gbeauche 1.9 #define PROFILE_COMPILE_TIME 1
83     #define PROFILE_UNTRANSLATED_INSNS 1
84 gbeauche 1.1 #endif
85    
86 gbeauche 1.28 #if defined(__x86_64__) && 0
87     #define RECORD_REGISTER_USAGE 1
88     #endif
89    
90 gbeauche 1.1 #ifdef WIN32
91     #undef write_log
92     #define write_log dummy_write_log
93     static void dummy_write_log(const char *, ...) { }
94     #endif
95    
96     #if JIT_DEBUG
97     #undef abort
98     #define abort() do { \
99     fprintf(stderr, "Abort in file %s at line %d\n", __FILE__, __LINE__); \
100     exit(EXIT_FAILURE); \
101     } while (0)
102     #endif
103    
104 gbeauche 1.28 #if RECORD_REGISTER_USAGE
105     static uint64 reg_count[16];
106     static int reg_count_local[16];
107    
108     static int reg_count_compare(const void *ap, const void *bp)
109     {
110     const int a = *((int *)ap);
111     const int b = *((int *)bp);
112     return reg_count[b] - reg_count[a];
113     }
114     #endif
115    
116 gbeauche 1.1 #if PROFILE_COMPILE_TIME
117     #include <time.h>
118     static uae_u32 compile_count = 0;
119     static clock_t compile_time = 0;
120     static clock_t emul_start_time = 0;
121     static clock_t emul_end_time = 0;
122     #endif
123    
124 gbeauche 1.9 #if PROFILE_UNTRANSLATED_INSNS
125     const int untranslated_top_ten = 20;
126     static uae_u32 raw_cputbl_count[65536] = { 0, };
127     static uae_u16 opcode_nums[65536];
128    
129     static int untranslated_compfn(const void *e1, const void *e2)
130     {
131     return raw_cputbl_count[*(const uae_u16 *)e1] < raw_cputbl_count[*(const uae_u16 *)e2];
132     }
133     #endif
134    
135 gbeauche 1.24 static compop_func *compfunctbl[65536];
136     static compop_func *nfcompfunctbl[65536];
137     static cpuop_func *nfcpufunctbl[65536];
138 gbeauche 1.1 uae_u8* comp_pc_p;
139    
140 gbeauche 1.6 // From newcpu.cpp
141     extern bool quit_program;
142    
143 gbeauche 1.1 // gb-- Extra data for Basilisk II/JIT
144     #if JIT_DEBUG
145     static bool JITDebug = false; // Enable runtime disassemblers through mon?
146     #else
147     const bool JITDebug = false; // Don't use JIT debug mode at all
148     #endif
149 gbeauche 1.33 #if USE_INLINING
150     static bool follow_const_jumps = true; // Flag: translation through constant jumps
151     #else
152     const bool follow_const_jumps = false;
153     #endif
154 gbeauche 1.1
155 gbeauche 1.22 const uae_u32 MIN_CACHE_SIZE = 1024; // Minimal translation cache size (1 MB)
156 gbeauche 1.1 static uae_u32 cache_size = 0; // Size of total cache allocated for compiled blocks
157 gbeauche 1.3 static uae_u32 current_cache_size = 0; // Cache grows upwards: how much has been consumed already
158 gbeauche 1.1 static bool lazy_flush = true; // Flag: lazy translation cache invalidation
159     static bool avoid_fpu = true; // Flag: compile FPU instructions ?
160     static bool have_cmov = false; // target has CMOV instructions ?
161 gbeauche 1.30 static bool have_lahf_lm = true; // target has LAHF supported in long mode ?
162 gbeauche 1.1 static bool have_rat_stall = true; // target has partial register stalls ?
163 gbeauche 1.12 const bool tune_alignment = true; // Tune code alignments for running CPU ?
164     const bool tune_nop_fillers = true; // Tune no-op fillers for architecture
165 gbeauche 1.15 static bool setzflg_uses_bsf = false; // setzflg virtual instruction can use native BSF instruction correctly?
166 gbeauche 1.5 static int align_loops = 32; // Align the start of loops
167     static int align_jumps = 32; // Align the start of jumps
168 gbeauche 1.1 static int optcount[10] = {
169     10, // How often a block has to be executed before it is translated
170     0, // How often to use naive translation
171     0, 0, 0, 0,
172     -1, -1, -1, -1
173     };
174    
175     struct op_properties {
176     uae_u8 use_flags;
177     uae_u8 set_flags;
178     uae_u8 is_addx;
179     uae_u8 cflow;
180     };
181     static op_properties prop[65536];
182    
183     static inline int end_block(uae_u32 opcode)
184     {
185     return (prop[opcode].cflow & fl_end_block);
186     }
187    
188 gbeauche 1.8 static inline bool is_const_jump(uae_u32 opcode)
189     {
190     return (prop[opcode].cflow == fl_const_jump);
191     }
192    
193 gbeauche 1.18 static inline bool may_trap(uae_u32 opcode)
194     {
195     return (prop[opcode].cflow & fl_trap);
196     }
197    
198     static inline unsigned int cft_map (unsigned int f)
199     {
200     #ifndef HAVE_GET_WORD_UNSWAPPED
201     return f;
202     #else
203     return ((f >> 8) & 255) | ((f & 255) << 8);
204     #endif
205     }
206    
207 gbeauche 1.1 uae_u8* start_pc_p;
208     uae_u32 start_pc;
209     uae_u32 current_block_pc_p;
210 gbeauche 1.24 static uintptr current_block_start_target;
211 gbeauche 1.1 uae_u32 needed_flags;
212 gbeauche 1.24 static uintptr next_pc_p;
213     static uintptr taken_pc_p;
214 gbeauche 1.1 static int branch_cc;
215     static int redo_current_block;
216    
217     int segvcount=0;
218     int soft_flush_count=0;
219     int hard_flush_count=0;
220     int checksum_count=0;
221     static uae_u8* current_compile_p=NULL;
222     static uae_u8* max_compile_start;
223     static uae_u8* compiled_code=NULL;
224     static uae_s32 reg_alloc_run;
225 gbeauche 1.24 const int POPALLSPACE_SIZE = 1024; /* That should be enough space */
226     static uae_u8* popallspace=NULL;
227 gbeauche 1.1
228     void* pushall_call_handler=NULL;
229     static void* popall_do_nothing=NULL;
230     static void* popall_exec_nostats=NULL;
231     static void* popall_execute_normal=NULL;
232     static void* popall_cache_miss=NULL;
233     static void* popall_recompile_block=NULL;
234     static void* popall_check_checksum=NULL;
235    
236     /* The 68k only ever executes from even addresses. So right now, we
237     * waste half the entries in this array
238     * UPDATE: We now use those entries to store the start of the linked
239     * lists that we maintain for each hash result.
240     */
241     cacheline cache_tags[TAGSIZE];
242     int letit=0;
243     blockinfo* hold_bi[MAX_HOLD_BI];
244     blockinfo* active;
245     blockinfo* dormant;
246    
247     /* 68040 */
248     extern struct cputbl op_smalltbl_0_nf[];
249     extern struct comptbl op_smalltbl_0_comp_nf[];
250     extern struct comptbl op_smalltbl_0_comp_ff[];
251    
252     /* 68020 + 68881 */
253     extern struct cputbl op_smalltbl_1_nf[];
254    
255     /* 68020 */
256     extern struct cputbl op_smalltbl_2_nf[];
257    
258     /* 68010 */
259     extern struct cputbl op_smalltbl_3_nf[];
260    
261     /* 68000 */
262     extern struct cputbl op_smalltbl_4_nf[];
263    
264     /* 68000 slow but compatible. */
265     extern struct cputbl op_smalltbl_5_nf[];
266    
267     static void flush_icache_hard(int n);
268     static void flush_icache_lazy(int n);
269     static void flush_icache_none(int n);
270     void (*flush_icache)(int n) = flush_icache_none;
271    
272    
273    
274     bigstate live;
275     smallstate empty_ss;
276     smallstate default_ss;
277     static int optlev;
278    
279     static int writereg(int r, int size);
280     static void unlock2(int r);
281     static void setlock(int r);
282     static int readreg_specific(int r, int size, int spec);
283     static int writereg_specific(int r, int size, int spec);
284     static void prepare_for_call_1(void);
285     static void prepare_for_call_2(void);
286     static void align_target(uae_u32 a);
287    
288     static uae_s32 nextused[VREGS];
289    
290     uae_u32 m68k_pc_offset;
291    
292     /* Some arithmetic ooperations can be optimized away if the operands
293     * are known to be constant. But that's only a good idea when the
294     * side effects they would have on the flags are not important. This
295     * variable indicates whether we need the side effects or not
296     */
297     uae_u32 needflags=0;
298    
299     /* Flag handling is complicated.
300     *
301     * x86 instructions create flags, which quite often are exactly what we
302     * want. So at times, the "68k" flags are actually in the x86 flags.
303     *
304     * Then again, sometimes we do x86 instructions that clobber the x86
305     * flags, but don't represent a corresponding m68k instruction. In that
306     * case, we have to save them.
307     *
308     * We used to save them to the stack, but now store them back directly
309     * into the regflags.cznv of the traditional emulation. Thus some odd
310     * names.
311     *
312     * So flags can be in either of two places (used to be three; boy were
313     * things complicated back then!); And either place can contain either
314     * valid flags or invalid trash (and on the stack, there was also the
315     * option of "nothing at all", now gone). A couple of variables keep
316     * track of the respective states.
317     *
318     * To make things worse, we might or might not be interested in the flags.
319     * by default, we are, but a call to dont_care_flags can change that
320     * until the next call to live_flags. If we are not, pretty much whatever
321     * is in the register and/or the native flags is seen as valid.
322     */
323    
324     static __inline__ blockinfo* get_blockinfo(uae_u32 cl)
325     {
326     return cache_tags[cl+1].bi;
327     }
328    
329     static __inline__ blockinfo* get_blockinfo_addr(void* addr)
330     {
331     blockinfo* bi=get_blockinfo(cacheline(addr));
332    
333     while (bi) {
334     if (bi->pc_p==addr)
335     return bi;
336     bi=bi->next_same_cl;
337     }
338     return NULL;
339     }
340    
341    
342     /*******************************************************************
343     * All sorts of list related functions for all of the lists *
344     *******************************************************************/
345    
346     static __inline__ void remove_from_cl_list(blockinfo* bi)
347     {
348     uae_u32 cl=cacheline(bi->pc_p);
349    
350     if (bi->prev_same_cl_p)
351     *(bi->prev_same_cl_p)=bi->next_same_cl;
352     if (bi->next_same_cl)
353     bi->next_same_cl->prev_same_cl_p=bi->prev_same_cl_p;
354     if (cache_tags[cl+1].bi)
355     cache_tags[cl].handler=cache_tags[cl+1].bi->handler_to_use;
356     else
357     cache_tags[cl].handler=(cpuop_func *)popall_execute_normal;
358     }
359    
360     static __inline__ void remove_from_list(blockinfo* bi)
361     {
362     if (bi->prev_p)
363     *(bi->prev_p)=bi->next;
364     if (bi->next)
365     bi->next->prev_p=bi->prev_p;
366     }
367    
368     static __inline__ void remove_from_lists(blockinfo* bi)
369     {
370     remove_from_list(bi);
371     remove_from_cl_list(bi);
372     }
373    
374     static __inline__ void add_to_cl_list(blockinfo* bi)
375     {
376     uae_u32 cl=cacheline(bi->pc_p);
377    
378     if (cache_tags[cl+1].bi)
379     cache_tags[cl+1].bi->prev_same_cl_p=&(bi->next_same_cl);
380     bi->next_same_cl=cache_tags[cl+1].bi;
381    
382     cache_tags[cl+1].bi=bi;
383     bi->prev_same_cl_p=&(cache_tags[cl+1].bi);
384    
385     cache_tags[cl].handler=bi->handler_to_use;
386     }
387    
388     static __inline__ void raise_in_cl_list(blockinfo* bi)
389     {
390     remove_from_cl_list(bi);
391     add_to_cl_list(bi);
392     }
393    
394     static __inline__ void add_to_active(blockinfo* bi)
395     {
396     if (active)
397     active->prev_p=&(bi->next);
398     bi->next=active;
399    
400     active=bi;
401     bi->prev_p=&active;
402     }
403    
404     static __inline__ void add_to_dormant(blockinfo* bi)
405     {
406     if (dormant)
407     dormant->prev_p=&(bi->next);
408     bi->next=dormant;
409    
410     dormant=bi;
411     bi->prev_p=&dormant;
412     }
413    
414     static __inline__ void remove_dep(dependency* d)
415     {
416     if (d->prev_p)
417     *(d->prev_p)=d->next;
418     if (d->next)
419     d->next->prev_p=d->prev_p;
420     d->prev_p=NULL;
421     d->next=NULL;
422     }
423    
424     /* This block's code is about to be thrown away, so it no longer
425     depends on anything else */
426     static __inline__ void remove_deps(blockinfo* bi)
427     {
428     remove_dep(&(bi->dep[0]));
429     remove_dep(&(bi->dep[1]));
430     }
431    
432     static __inline__ void adjust_jmpdep(dependency* d, cpuop_func* a)
433     {
434     *(d->jmp_off)=(uintptr)a-((uintptr)d->jmp_off+4);
435     }
436    
437     /********************************************************************
438     * Soft flush handling support functions *
439     ********************************************************************/
440    
441     static __inline__ void set_dhtu(blockinfo* bi, cpuop_func* dh)
442     {
443     //write_log("bi is %p\n",bi);
444     if (dh!=bi->direct_handler_to_use) {
445     dependency* x=bi->deplist;
446     //write_log("bi->deplist=%p\n",bi->deplist);
447     while (x) {
448     //write_log("x is %p\n",x);
449     //write_log("x->next is %p\n",x->next);
450     //write_log("x->prev_p is %p\n",x->prev_p);
451    
452     if (x->jmp_off) {
453     adjust_jmpdep(x,dh);
454     }
455     x=x->next;
456     }
457     bi->direct_handler_to_use=dh;
458     }
459     }
460    
461     static __inline__ void invalidate_block(blockinfo* bi)
462     {
463     int i;
464    
465     bi->optlevel=0;
466     bi->count=optcount[0]-1;
467     bi->handler=NULL;
468     bi->handler_to_use=(cpuop_func *)popall_execute_normal;
469     bi->direct_handler=NULL;
470     set_dhtu(bi,bi->direct_pen);
471     bi->needed_flags=0xff;
472     bi->status=BI_INVALID;
473     for (i=0;i<2;i++) {
474     bi->dep[i].jmp_off=NULL;
475     bi->dep[i].target=NULL;
476     }
477     remove_deps(bi);
478     }
479    
480     static __inline__ void create_jmpdep(blockinfo* bi, int i, uae_u32* jmpaddr, uae_u32 target)
481     {
482 gbeauche 1.24 blockinfo* tbi=get_blockinfo_addr((void*)(uintptr)target);
483 gbeauche 1.1
484     Dif(!tbi) {
485     write_log("Could not create jmpdep!\n");
486     abort();
487     }
488     bi->dep[i].jmp_off=jmpaddr;
489     bi->dep[i].source=bi;
490     bi->dep[i].target=tbi;
491     bi->dep[i].next=tbi->deplist;
492     if (bi->dep[i].next)
493     bi->dep[i].next->prev_p=&(bi->dep[i].next);
494     bi->dep[i].prev_p=&(tbi->deplist);
495     tbi->deplist=&(bi->dep[i]);
496     }
497    
498     static __inline__ void block_need_recompile(blockinfo * bi)
499     {
500     uae_u32 cl = cacheline(bi->pc_p);
501    
502     set_dhtu(bi, bi->direct_pen);
503     bi->direct_handler = bi->direct_pen;
504    
505     bi->handler_to_use = (cpuop_func *)popall_execute_normal;
506     bi->handler = (cpuop_func *)popall_execute_normal;
507     if (bi == cache_tags[cl + 1].bi)
508     cache_tags[cl].handler = (cpuop_func *)popall_execute_normal;
509     bi->status = BI_NEED_RECOMP;
510     }
511    
512     static __inline__ void mark_callers_recompile(blockinfo * bi)
513     {
514     dependency *x = bi->deplist;
515    
516     while (x) {
517     dependency *next = x->next; /* This disappears when we mark for
518     * recompilation and thus remove the
519     * blocks from the lists */
520     if (x->jmp_off) {
521     blockinfo *cbi = x->source;
522    
523     Dif(cbi->status == BI_INVALID) {
524     // write_log("invalid block in dependency list\n"); // FIXME?
525     // abort();
526     }
527     if (cbi->status == BI_ACTIVE || cbi->status == BI_NEED_CHECK) {
528     block_need_recompile(cbi);
529     mark_callers_recompile(cbi);
530     }
531     else if (cbi->status == BI_COMPILING) {
532     redo_current_block = 1;
533     }
534     else if (cbi->status == BI_NEED_RECOMP) {
535     /* nothing */
536     }
537     else {
538     //write_log("Status %d in mark_callers\n",cbi->status); // FIXME?
539     }
540     }
541     x = next;
542     }
543     }
544    
545     static __inline__ blockinfo* get_blockinfo_addr_new(void* addr, int setstate)
546     {
547     blockinfo* bi=get_blockinfo_addr(addr);
548     int i;
549    
550     if (!bi) {
551     for (i=0;i<MAX_HOLD_BI && !bi;i++) {
552     if (hold_bi[i]) {
553     uae_u32 cl=cacheline(addr);
554    
555     bi=hold_bi[i];
556     hold_bi[i]=NULL;
557     bi->pc_p=(uae_u8 *)addr;
558     invalidate_block(bi);
559     add_to_active(bi);
560     add_to_cl_list(bi);
561    
562     }
563     }
564     }
565     if (!bi) {
566     write_log("Looking for blockinfo, can't find free one\n");
567     abort();
568     }
569     return bi;
570     }
571    
572     static void prepare_block(blockinfo* bi);
573    
574     /* Managment of blockinfos.
575    
576     A blockinfo struct is allocated whenever a new block has to be
577     compiled. If the list of free blockinfos is empty, we allocate a new
578     pool of blockinfos and link the newly created blockinfos altogether
579     into the list of free blockinfos. Otherwise, we simply pop a structure
580 gbeauche 1.7 off the free list.
581 gbeauche 1.1
582     Blockinfo are lazily deallocated, i.e. chained altogether in the
583     list of free blockinfos whenvever a translation cache flush (hard or
584     soft) request occurs.
585     */
586    
587 gbeauche 1.7 template< class T >
588     class LazyBlockAllocator
589     {
590     enum {
591     kPoolSize = 1 + 4096 / sizeof(T)
592     };
593     struct Pool {
594     T chunk[kPoolSize];
595     Pool * next;
596     };
597     Pool * mPools;
598     T * mChunks;
599     public:
600     LazyBlockAllocator() : mPools(0), mChunks(0) { }
601     ~LazyBlockAllocator();
602     T * acquire();
603     void release(T * const);
604 gbeauche 1.1 };
605    
606 gbeauche 1.7 template< class T >
607     LazyBlockAllocator<T>::~LazyBlockAllocator()
608 gbeauche 1.1 {
609 gbeauche 1.7 Pool * currentPool = mPools;
610     while (currentPool) {
611     Pool * deadPool = currentPool;
612     currentPool = currentPool->next;
613     free(deadPool);
614     }
615     }
616    
617     template< class T >
618     T * LazyBlockAllocator<T>::acquire()
619     {
620     if (!mChunks) {
621     // There is no chunk left, allocate a new pool and link the
622     // chunks into the free list
623     Pool * newPool = (Pool *)malloc(sizeof(Pool));
624     for (T * chunk = &newPool->chunk[0]; chunk < &newPool->chunk[kPoolSize]; chunk++) {
625     chunk->next = mChunks;
626     mChunks = chunk;
627 gbeauche 1.1 }
628 gbeauche 1.7 newPool->next = mPools;
629     mPools = newPool;
630     }
631     T * chunk = mChunks;
632     mChunks = chunk->next;
633     return chunk;
634     }
635    
636     template< class T >
637     void LazyBlockAllocator<T>::release(T * const chunk)
638     {
639     chunk->next = mChunks;
640     mChunks = chunk;
641     }
642    
643     template< class T >
644     class HardBlockAllocator
645     {
646     public:
647     T * acquire() {
648     T * data = (T *)current_compile_p;
649     current_compile_p += sizeof(T);
650     return data;
651 gbeauche 1.1 }
652 gbeauche 1.7
653     void release(T * const chunk) {
654     // Deallocated on invalidation
655     }
656     };
657    
658     #if USE_SEPARATE_BIA
659     static LazyBlockAllocator<blockinfo> BlockInfoAllocator;
660     static LazyBlockAllocator<checksum_info> ChecksumInfoAllocator;
661 gbeauche 1.1 #else
662 gbeauche 1.7 static HardBlockAllocator<blockinfo> BlockInfoAllocator;
663     static HardBlockAllocator<checksum_info> ChecksumInfoAllocator;
664 gbeauche 1.1 #endif
665    
666 gbeauche 1.8 static __inline__ checksum_info *alloc_checksum_info(void)
667     {
668     checksum_info *csi = ChecksumInfoAllocator.acquire();
669     csi->next = NULL;
670     return csi;
671     }
672    
673     static __inline__ void free_checksum_info(checksum_info *csi)
674     {
675     csi->next = NULL;
676     ChecksumInfoAllocator.release(csi);
677     }
678    
679     static __inline__ void free_checksum_info_chain(checksum_info *csi)
680     {
681     while (csi != NULL) {
682     checksum_info *csi2 = csi->next;
683     free_checksum_info(csi);
684     csi = csi2;
685     }
686     }
687 gbeauche 1.7
688     static __inline__ blockinfo *alloc_blockinfo(void)
689 gbeauche 1.1 {
690 gbeauche 1.7 blockinfo *bi = BlockInfoAllocator.acquire();
691     #if USE_CHECKSUM_INFO
692     bi->csi = NULL;
693 gbeauche 1.1 #endif
694 gbeauche 1.7 return bi;
695 gbeauche 1.1 }
696    
697 gbeauche 1.7 static __inline__ void free_blockinfo(blockinfo *bi)
698 gbeauche 1.1 {
699 gbeauche 1.7 #if USE_CHECKSUM_INFO
700 gbeauche 1.8 free_checksum_info_chain(bi->csi);
701     bi->csi = NULL;
702 gbeauche 1.1 #endif
703 gbeauche 1.7 BlockInfoAllocator.release(bi);
704 gbeauche 1.1 }
705    
706     static __inline__ void alloc_blockinfos(void)
707     {
708     int i;
709     blockinfo* bi;
710    
711     for (i=0;i<MAX_HOLD_BI;i++) {
712     if (hold_bi[i])
713     return;
714     bi=hold_bi[i]=alloc_blockinfo();
715     prepare_block(bi);
716     }
717     }
718    
719     /********************************************************************
720     * Functions to emit data into memory, and other general support *
721     ********************************************************************/
722    
723     static uae_u8* target;
724    
725     static void emit_init(void)
726     {
727     }
728    
729     static __inline__ void emit_byte(uae_u8 x)
730     {
731     *target++=x;
732     }
733    
734     static __inline__ void emit_word(uae_u16 x)
735     {
736     *((uae_u16*)target)=x;
737     target+=2;
738     }
739    
740     static __inline__ void emit_long(uae_u32 x)
741     {
742     *((uae_u32*)target)=x;
743     target+=4;
744     }
745    
746 gbeauche 1.24 static __inline__ void emit_quad(uae_u64 x)
747     {
748     *((uae_u64*)target)=x;
749     target+=8;
750     }
751    
752 gbeauche 1.12 static __inline__ void emit_block(const uae_u8 *block, uae_u32 blocklen)
753     {
754     memcpy((uae_u8 *)target,block,blocklen);
755     target+=blocklen;
756     }
757    
758 gbeauche 1.1 static __inline__ uae_u32 reverse32(uae_u32 v)
759     {
760     #if 1
761     // gb-- We have specialized byteswapping functions, just use them
762     return do_byteswap_32(v);
763     #else
764     return ((v>>24)&0xff) | ((v>>8)&0xff00) | ((v<<8)&0xff0000) | ((v<<24)&0xff000000);
765     #endif
766     }
767    
768     /********************************************************************
769     * Getting the information about the target CPU *
770     ********************************************************************/
771    
772     #include "codegen_x86.cpp"
773    
774     void set_target(uae_u8* t)
775     {
776     target=t;
777     }
778    
779     static __inline__ uae_u8* get_target_noopt(void)
780     {
781     return target;
782     }
783    
784     __inline__ uae_u8* get_target(void)
785     {
786     return get_target_noopt();
787     }
788    
789    
790     /********************************************************************
791     * Flags status handling. EMIT TIME! *
792     ********************************************************************/
793    
794     static void bt_l_ri_noclobber(R4 r, IMM i);
795    
796     static void make_flags_live_internal(void)
797     {
798     if (live.flags_in_flags==VALID)
799     return;
800     Dif (live.flags_on_stack==TRASH) {
801     write_log("Want flags, got something on stack, but it is TRASH\n");
802     abort();
803     }
804     if (live.flags_on_stack==VALID) {
805     int tmp;
806     tmp=readreg_specific(FLAGTMP,4,FLAG_NREG2);
807     raw_reg_to_flags(tmp);
808     unlock2(tmp);
809    
810     live.flags_in_flags=VALID;
811     return;
812     }
813     write_log("Huh? live.flags_in_flags=%d, live.flags_on_stack=%d, but need to make live\n",
814     live.flags_in_flags,live.flags_on_stack);
815     abort();
816     }
817    
818     static void flags_to_stack(void)
819     {
820     if (live.flags_on_stack==VALID)
821     return;
822     if (!live.flags_are_important) {
823     live.flags_on_stack=VALID;
824     return;
825     }
826     Dif (live.flags_in_flags!=VALID)
827     abort();
828     else {
829     int tmp;
830     tmp=writereg_specific(FLAGTMP,4,FLAG_NREG1);
831     raw_flags_to_reg(tmp);
832     unlock2(tmp);
833     }
834     live.flags_on_stack=VALID;
835     }
836    
837     static __inline__ void clobber_flags(void)
838     {
839     if (live.flags_in_flags==VALID && live.flags_on_stack!=VALID)
840     flags_to_stack();
841     live.flags_in_flags=TRASH;
842     }
843    
844     /* Prepare for leaving the compiled stuff */
845     static __inline__ void flush_flags(void)
846     {
847     flags_to_stack();
848     return;
849     }
850    
851     int touchcnt;
852    
853     /********************************************************************
854 gbeauche 1.18 * Partial register flushing for optimized calls *
855     ********************************************************************/
856    
857     struct regusage {
858     uae_u16 rmask;
859     uae_u16 wmask;
860     };
861    
862     static inline void ru_set(uae_u16 *mask, int reg)
863     {
864     #if USE_OPTIMIZED_CALLS
865     *mask |= 1 << reg;
866     #endif
867     }
868    
869     static inline bool ru_get(const uae_u16 *mask, int reg)
870     {
871     #if USE_OPTIMIZED_CALLS
872     return (*mask & (1 << reg));
873     #else
874     /* Default: instruction reads & write to register */
875     return true;
876     #endif
877     }
878    
879     static inline void ru_set_read(regusage *ru, int reg)
880     {
881     ru_set(&ru->rmask, reg);
882     }
883    
884     static inline void ru_set_write(regusage *ru, int reg)
885     {
886     ru_set(&ru->wmask, reg);
887     }
888    
889     static inline bool ru_read_p(const regusage *ru, int reg)
890     {
891     return ru_get(&ru->rmask, reg);
892     }
893    
894     static inline bool ru_write_p(const regusage *ru, int reg)
895     {
896     return ru_get(&ru->wmask, reg);
897     }
898    
899     static void ru_fill_ea(regusage *ru, int reg, amodes mode,
900     wordsizes size, int write_mode)
901     {
902     switch (mode) {
903     case Areg:
904     reg += 8;
905     /* fall through */
906     case Dreg:
907     ru_set(write_mode ? &ru->wmask : &ru->rmask, reg);
908     break;
909     case Ad16:
910     /* skip displacment */
911     m68k_pc_offset += 2;
912     case Aind:
913     case Aipi:
914     case Apdi:
915     ru_set_read(ru, reg+8);
916     break;
917     case Ad8r:
918     ru_set_read(ru, reg+8);
919     /* fall through */
920     case PC8r: {
921     uae_u16 dp = comp_get_iword((m68k_pc_offset+=2)-2);
922     reg = (dp >> 12) & 15;
923     ru_set_read(ru, reg);
924     if (dp & 0x100)
925     m68k_pc_offset += (((dp & 0x30) >> 3) & 7) + ((dp & 3) * 2);
926     break;
927     }
928     case PC16:
929     case absw:
930     case imm0:
931     case imm1:
932     m68k_pc_offset += 2;
933     break;
934     case absl:
935     case imm2:
936     m68k_pc_offset += 4;
937     break;
938     case immi:
939     m68k_pc_offset += (size == sz_long) ? 4 : 2;
940     break;
941     }
942     }
943    
944     /* TODO: split into a static initialization part and a dynamic one
945     (instructions depending on extension words) */
946     static void ru_fill(regusage *ru, uae_u32 opcode)
947     {
948     m68k_pc_offset += 2;
949    
950     /* Default: no register is used or written to */
951     ru->rmask = 0;
952     ru->wmask = 0;
953    
954     uae_u32 real_opcode = cft_map(opcode);
955     struct instr *dp = &table68k[real_opcode];
956    
957     bool rw_dest = true;
958     bool handled = false;
959    
960     /* Handle some instructions specifically */
961     uae_u16 reg, ext;
962     switch (dp->mnemo) {
963     case i_BFCHG:
964     case i_BFCLR:
965     case i_BFEXTS:
966     case i_BFEXTU:
967     case i_BFFFO:
968     case i_BFINS:
969     case i_BFSET:
970     case i_BFTST:
971     ext = comp_get_iword((m68k_pc_offset+=2)-2);
972     if (ext & 0x800) ru_set_read(ru, (ext >> 6) & 7);
973     if (ext & 0x020) ru_set_read(ru, ext & 7);
974     ru_fill_ea(ru, dp->dreg, (amodes)dp->dmode, (wordsizes)dp->size, 1);
975     if (dp->dmode == Dreg)
976     ru_set_read(ru, dp->dreg);
977     switch (dp->mnemo) {
978     case i_BFEXTS:
979     case i_BFEXTU:
980     case i_BFFFO:
981     ru_set_write(ru, (ext >> 12) & 7);
982     break;
983     case i_BFINS:
984     ru_set_read(ru, (ext >> 12) & 7);
985     /* fall through */
986     case i_BFCHG:
987     case i_BFCLR:
988     case i_BSET:
989     if (dp->dmode == Dreg)
990     ru_set_write(ru, dp->dreg);
991     break;
992     }
993     handled = true;
994     rw_dest = false;
995     break;
996    
997     case i_BTST:
998     rw_dest = false;
999     break;
1000    
1001     case i_CAS:
1002     {
1003     ext = comp_get_iword((m68k_pc_offset+=2)-2);
1004     int Du = ext & 7;
1005     ru_set_read(ru, Du);
1006     int Dc = (ext >> 6) & 7;
1007     ru_set_read(ru, Dc);
1008     ru_set_write(ru, Dc);
1009     break;
1010     }
1011     case i_CAS2:
1012     {
1013     int Dc1, Dc2, Du1, Du2, Rn1, Rn2;
1014     ext = comp_get_iword((m68k_pc_offset+=2)-2);
1015     Rn1 = (ext >> 12) & 15;
1016     Du1 = (ext >> 6) & 7;
1017     Dc1 = ext & 7;
1018     ru_set_read(ru, Rn1);
1019     ru_set_read(ru, Du1);
1020     ru_set_read(ru, Dc1);
1021     ru_set_write(ru, Dc1);
1022     ext = comp_get_iword((m68k_pc_offset+=2)-2);
1023     Rn2 = (ext >> 12) & 15;
1024     Du2 = (ext >> 6) & 7;
1025     Dc2 = ext & 7;
1026     ru_set_read(ru, Rn2);
1027     ru_set_read(ru, Du2);
1028     ru_set_write(ru, Dc2);
1029     break;
1030     }
1031     case i_DIVL: case i_MULL:
1032     m68k_pc_offset += 2;
1033     break;
1034     case i_LEA:
1035     case i_MOVE: case i_MOVEA: case i_MOVE16:
1036     rw_dest = false;
1037     break;
1038     case i_PACK: case i_UNPK:
1039     rw_dest = false;
1040     m68k_pc_offset += 2;
1041     break;
1042     case i_TRAPcc:
1043     m68k_pc_offset += (dp->size == sz_long) ? 4 : 2;
1044     break;
1045     case i_RTR:
1046     /* do nothing, just for coverage debugging */
1047     break;
1048     /* TODO: handle EXG instruction */
1049     }
1050    
1051     /* Handle A-Traps better */
1052     if ((real_opcode & 0xf000) == 0xa000) {
1053     handled = true;
1054     }
1055    
1056     /* Handle EmulOps better */
1057     if ((real_opcode & 0xff00) == 0x7100) {
1058     handled = true;
1059     ru->rmask = 0xffff;
1060     ru->wmask = 0;
1061     }
1062    
1063     if (dp->suse && !handled)
1064     ru_fill_ea(ru, dp->sreg, (amodes)dp->smode, (wordsizes)dp->size, 0);
1065    
1066     if (dp->duse && !handled)
1067     ru_fill_ea(ru, dp->dreg, (amodes)dp->dmode, (wordsizes)dp->size, 1);
1068    
1069     if (rw_dest)
1070     ru->rmask |= ru->wmask;
1071    
1072     handled = handled || dp->suse || dp->duse;
1073    
1074     /* Mark all registers as used/written if the instruction may trap */
1075     if (may_trap(opcode)) {
1076     handled = true;
1077     ru->rmask = 0xffff;
1078     ru->wmask = 0xffff;
1079     }
1080    
1081     if (!handled) {
1082     write_log("ru_fill: %04x = { %04x, %04x }\n",
1083     real_opcode, ru->rmask, ru->wmask);
1084     abort();
1085     }
1086     }
1087    
1088     /********************************************************************
1089 gbeauche 1.1 * register allocation per block logging *
1090     ********************************************************************/
1091    
1092     static uae_s8 vstate[VREGS];
1093     static uae_s8 vwritten[VREGS];
1094     static uae_s8 nstate[N_REGS];
1095    
1096     #define L_UNKNOWN -127
1097     #define L_UNAVAIL -1
1098     #define L_NEEDED -2
1099     #define L_UNNEEDED -3
1100    
1101     static __inline__ void big_to_small_state(bigstate * b, smallstate * s)
1102     {
1103     int i;
1104    
1105     for (i = 0; i < VREGS; i++)
1106     s->virt[i] = vstate[i];
1107     for (i = 0; i < N_REGS; i++)
1108     s->nat[i] = nstate[i];
1109     }
1110    
1111     static __inline__ int callers_need_recompile(bigstate * b, smallstate * s)
1112     {
1113     int i;
1114     int reverse = 0;
1115    
1116     for (i = 0; i < VREGS; i++) {
1117     if (vstate[i] != L_UNNEEDED && s->virt[i] == L_UNNEEDED)
1118     return 1;
1119     if (vstate[i] == L_UNNEEDED && s->virt[i] != L_UNNEEDED)
1120     reverse++;
1121     }
1122     for (i = 0; i < N_REGS; i++) {
1123     if (nstate[i] >= 0 && nstate[i] != s->nat[i])
1124     return 1;
1125     if (nstate[i] < 0 && s->nat[i] >= 0)
1126     reverse++;
1127     }
1128     if (reverse >= 2 && USE_MATCH)
1129     return 1; /* In this case, it might be worth recompiling the
1130     * callers */
1131     return 0;
1132     }
1133    
1134     static __inline__ void log_startblock(void)
1135     {
1136     int i;
1137    
1138     for (i = 0; i < VREGS; i++) {
1139     vstate[i] = L_UNKNOWN;
1140     vwritten[i] = 0;
1141     }
1142     for (i = 0; i < N_REGS; i++)
1143     nstate[i] = L_UNKNOWN;
1144     }
1145    
1146     /* Using an n-reg for a temp variable */
1147     static __inline__ void log_isused(int n)
1148     {
1149     if (nstate[n] == L_UNKNOWN)
1150     nstate[n] = L_UNAVAIL;
1151     }
1152    
1153     static __inline__ void log_visused(int r)
1154     {
1155     if (vstate[r] == L_UNKNOWN)
1156     vstate[r] = L_NEEDED;
1157     }
1158    
1159     static __inline__ void do_load_reg(int n, int r)
1160     {
1161     if (r == FLAGTMP)
1162     raw_load_flagreg(n, r);
1163     else if (r == FLAGX)
1164     raw_load_flagx(n, r);
1165     else
1166 gbeauche 1.24 raw_mov_l_rm(n, (uintptr) live.state[r].mem);
1167 gbeauche 1.1 }
1168    
1169     static __inline__ void check_load_reg(int n, int r)
1170     {
1171 gbeauche 1.24 raw_mov_l_rm(n, (uintptr) live.state[r].mem);
1172 gbeauche 1.1 }
1173    
1174     static __inline__ void log_vwrite(int r)
1175     {
1176     vwritten[r] = 1;
1177     }
1178    
1179     /* Using an n-reg to hold a v-reg */
1180     static __inline__ void log_isreg(int n, int r)
1181     {
1182     static int count = 0;
1183    
1184     if (nstate[n] == L_UNKNOWN && r < 16 && !vwritten[r] && USE_MATCH)
1185     nstate[n] = r;
1186     else {
1187     do_load_reg(n, r);
1188     if (nstate[n] == L_UNKNOWN)
1189     nstate[n] = L_UNAVAIL;
1190     }
1191     if (vstate[r] == L_UNKNOWN)
1192     vstate[r] = L_NEEDED;
1193     }
1194    
1195     static __inline__ void log_clobberreg(int r)
1196     {
1197     if (vstate[r] == L_UNKNOWN)
1198     vstate[r] = L_UNNEEDED;
1199     }
1200    
1201     /* This ends all possibility of clever register allocation */
1202    
1203     static __inline__ void log_flush(void)
1204     {
1205     int i;
1206    
1207     for (i = 0; i < VREGS; i++)
1208     if (vstate[i] == L_UNKNOWN)
1209     vstate[i] = L_NEEDED;
1210     for (i = 0; i < N_REGS; i++)
1211     if (nstate[i] == L_UNKNOWN)
1212     nstate[i] = L_UNAVAIL;
1213     }
1214    
1215     static __inline__ void log_dump(void)
1216     {
1217     int i;
1218    
1219     return;
1220    
1221     write_log("----------------------\n");
1222     for (i = 0; i < N_REGS; i++) {
1223     switch (nstate[i]) {
1224     case L_UNKNOWN:
1225     write_log("Nat %d : UNKNOWN\n", i);
1226     break;
1227     case L_UNAVAIL:
1228     write_log("Nat %d : UNAVAIL\n", i);
1229     break;
1230     default:
1231     write_log("Nat %d : %d\n", i, nstate[i]);
1232     break;
1233     }
1234     }
1235     for (i = 0; i < VREGS; i++) {
1236     if (vstate[i] == L_UNNEEDED)
1237     write_log("Virt %d: UNNEEDED\n", i);
1238     }
1239     }
1240    
1241     /********************************************************************
1242     * register status handling. EMIT TIME! *
1243     ********************************************************************/
1244    
1245     static __inline__ void set_status(int r, int status)
1246     {
1247     if (status == ISCONST)
1248     log_clobberreg(r);
1249     live.state[r].status=status;
1250     }
1251    
1252     static __inline__ int isinreg(int r)
1253     {
1254     return live.state[r].status==CLEAN || live.state[r].status==DIRTY;
1255     }
1256    
1257     static __inline__ void adjust_nreg(int r, uae_u32 val)
1258     {
1259     if (!val)
1260     return;
1261     raw_lea_l_brr(r,r,val);
1262     }
1263    
1264     static void tomem(int r)
1265     {
1266     int rr=live.state[r].realreg;
1267    
1268     if (isinreg(r)) {
1269     if (live.state[r].val && live.nat[rr].nholds==1
1270     && !live.nat[rr].locked) {
1271     // write_log("RemovingA offset %x from reg %d (%d) at %p\n",
1272     // live.state[r].val,r,rr,target);
1273     adjust_nreg(rr,live.state[r].val);
1274     live.state[r].val=0;
1275     live.state[r].dirtysize=4;
1276     set_status(r,DIRTY);
1277     }
1278     }
1279    
1280     if (live.state[r].status==DIRTY) {
1281     switch (live.state[r].dirtysize) {
1282 gbeauche 1.24 case 1: raw_mov_b_mr((uintptr)live.state[r].mem,rr); break;
1283     case 2: raw_mov_w_mr((uintptr)live.state[r].mem,rr); break;
1284     case 4: raw_mov_l_mr((uintptr)live.state[r].mem,rr); break;
1285 gbeauche 1.1 default: abort();
1286     }
1287     log_vwrite(r);
1288     set_status(r,CLEAN);
1289     live.state[r].dirtysize=0;
1290     }
1291     }
1292    
1293     static __inline__ int isconst(int r)
1294     {
1295     return live.state[r].status==ISCONST;
1296     }
1297    
1298     int is_const(int r)
1299     {
1300     return isconst(r);
1301     }
1302    
1303     static __inline__ void writeback_const(int r)
1304     {
1305     if (!isconst(r))
1306     return;
1307     Dif (live.state[r].needflush==NF_HANDLER) {
1308     write_log("Trying to write back constant NF_HANDLER!\n");
1309     abort();
1310     }
1311    
1312 gbeauche 1.24 raw_mov_l_mi((uintptr)live.state[r].mem,live.state[r].val);
1313 gbeauche 1.1 log_vwrite(r);
1314     live.state[r].val=0;
1315     set_status(r,INMEM);
1316     }
1317    
1318     static __inline__ void tomem_c(int r)
1319     {
1320     if (isconst(r)) {
1321     writeback_const(r);
1322     }
1323     else
1324     tomem(r);
1325     }
1326    
1327     static void evict(int r)
1328     {
1329     int rr;
1330    
1331     if (!isinreg(r))
1332     return;
1333     tomem(r);
1334     rr=live.state[r].realreg;
1335    
1336     Dif (live.nat[rr].locked &&
1337     live.nat[rr].nholds==1) {
1338     write_log("register %d in nreg %d is locked!\n",r,live.state[r].realreg);
1339     abort();
1340     }
1341    
1342     live.nat[rr].nholds--;
1343     if (live.nat[rr].nholds!=live.state[r].realind) { /* Was not last */
1344     int topreg=live.nat[rr].holds[live.nat[rr].nholds];
1345     int thisind=live.state[r].realind;
1346    
1347     live.nat[rr].holds[thisind]=topreg;
1348     live.state[topreg].realind=thisind;
1349     }
1350     live.state[r].realreg=-1;
1351     set_status(r,INMEM);
1352     }
1353    
1354     static __inline__ void free_nreg(int r)
1355     {
1356     int i=live.nat[r].nholds;
1357    
1358     while (i) {
1359     int vr;
1360    
1361     --i;
1362     vr=live.nat[r].holds[i];
1363     evict(vr);
1364     }
1365     Dif (live.nat[r].nholds!=0) {
1366     write_log("Failed to free nreg %d, nholds is %d\n",r,live.nat[r].nholds);
1367     abort();
1368     }
1369     }
1370    
1371     /* Use with care! */
1372     static __inline__ void isclean(int r)
1373     {
1374     if (!isinreg(r))
1375     return;
1376     live.state[r].validsize=4;
1377     live.state[r].dirtysize=0;
1378     live.state[r].val=0;
1379     set_status(r,CLEAN);
1380     }
1381    
1382     static __inline__ void disassociate(int r)
1383     {
1384     isclean(r);
1385     evict(r);
1386     }
1387    
1388     static __inline__ void set_const(int r, uae_u32 val)
1389     {
1390     disassociate(r);
1391     live.state[r].val=val;
1392     set_status(r,ISCONST);
1393     }
1394    
1395     static __inline__ uae_u32 get_offset(int r)
1396     {
1397     return live.state[r].val;
1398     }
1399    
1400     static int alloc_reg_hinted(int r, int size, int willclobber, int hint)
1401     {
1402     int bestreg;
1403     uae_s32 when;
1404     int i;
1405     uae_s32 badness=0; /* to shut up gcc */
1406     bestreg=-1;
1407     when=2000000000;
1408    
1409 gbeauche 1.37 /* XXX use a regalloc_order table? */
1410     for (i=0;i<N_REGS;i++) {
1411 gbeauche 1.1 badness=live.nat[i].touched;
1412     if (live.nat[i].nholds==0)
1413     badness=0;
1414     if (i==hint)
1415     badness-=200000000;
1416     if (!live.nat[i].locked && badness<when) {
1417     if ((size==1 && live.nat[i].canbyte) ||
1418     (size==2 && live.nat[i].canword) ||
1419     (size==4)) {
1420     bestreg=i;
1421     when=badness;
1422     if (live.nat[i].nholds==0 && hint<0)
1423     break;
1424     if (i==hint)
1425     break;
1426     }
1427     }
1428     }
1429     Dif (bestreg==-1)
1430     abort();
1431    
1432     if (live.nat[bestreg].nholds>0) {
1433     free_nreg(bestreg);
1434     }
1435     if (isinreg(r)) {
1436     int rr=live.state[r].realreg;
1437     /* This will happen if we read a partially dirty register at a
1438     bigger size */
1439     Dif (willclobber || live.state[r].validsize>=size)
1440     abort();
1441     Dif (live.nat[rr].nholds!=1)
1442     abort();
1443     if (size==4 && live.state[r].validsize==2) {
1444     log_isused(bestreg);
1445     log_visused(r);
1446 gbeauche 1.24 raw_mov_l_rm(bestreg,(uintptr)live.state[r].mem);
1447 gbeauche 1.1 raw_bswap_32(bestreg);
1448     raw_zero_extend_16_rr(rr,rr);
1449     raw_zero_extend_16_rr(bestreg,bestreg);
1450     raw_bswap_32(bestreg);
1451     raw_lea_l_brr_indexed(rr,rr,bestreg,1,0);
1452     live.state[r].validsize=4;
1453     live.nat[rr].touched=touchcnt++;
1454     return rr;
1455     }
1456     if (live.state[r].validsize==1) {
1457     /* Nothing yet */
1458     }
1459     evict(r);
1460     }
1461    
1462     if (!willclobber) {
1463     if (live.state[r].status!=UNDEF) {
1464     if (isconst(r)) {
1465     raw_mov_l_ri(bestreg,live.state[r].val);
1466     live.state[r].val=0;
1467     live.state[r].dirtysize=4;
1468     set_status(r,DIRTY);
1469     log_isused(bestreg);
1470     }
1471     else {
1472     log_isreg(bestreg, r); /* This will also load it! */
1473     live.state[r].dirtysize=0;
1474     set_status(r,CLEAN);
1475     }
1476     }
1477     else {
1478     live.state[r].val=0;
1479     live.state[r].dirtysize=0;
1480     set_status(r,CLEAN);
1481     log_isused(bestreg);
1482     }
1483     live.state[r].validsize=4;
1484     }
1485     else { /* this is the easiest way, but not optimal. FIXME! */
1486     /* Now it's trickier, but hopefully still OK */
1487     if (!isconst(r) || size==4) {
1488     live.state[r].validsize=size;
1489     live.state[r].dirtysize=size;
1490     live.state[r].val=0;
1491     set_status(r,DIRTY);
1492     if (size == 4) {
1493     log_clobberreg(r);
1494     log_isused(bestreg);
1495     }
1496     else {
1497     log_visused(r);
1498     log_isused(bestreg);
1499     }
1500     }
1501     else {
1502     if (live.state[r].status!=UNDEF)
1503     raw_mov_l_ri(bestreg,live.state[r].val);
1504     live.state[r].val=0;
1505     live.state[r].validsize=4;
1506     live.state[r].dirtysize=4;
1507     set_status(r,DIRTY);
1508     log_isused(bestreg);
1509     }
1510     }
1511     live.state[r].realreg=bestreg;
1512     live.state[r].realind=live.nat[bestreg].nholds;
1513     live.nat[bestreg].touched=touchcnt++;
1514     live.nat[bestreg].holds[live.nat[bestreg].nholds]=r;
1515     live.nat[bestreg].nholds++;
1516    
1517     return bestreg;
1518     }
1519    
1520     static int alloc_reg(int r, int size, int willclobber)
1521     {
1522     return alloc_reg_hinted(r,size,willclobber,-1);
1523     }
1524    
1525     static void unlock2(int r)
1526     {
1527     Dif (!live.nat[r].locked)
1528     abort();
1529     live.nat[r].locked--;
1530     }
1531    
1532     static void setlock(int r)
1533     {
1534     live.nat[r].locked++;
1535     }
1536    
1537    
1538     static void mov_nregs(int d, int s)
1539     {
1540     int ns=live.nat[s].nholds;
1541     int nd=live.nat[d].nholds;
1542     int i;
1543    
1544     if (s==d)
1545     return;
1546    
1547     if (nd>0)
1548     free_nreg(d);
1549    
1550     log_isused(d);
1551     raw_mov_l_rr(d,s);
1552    
1553     for (i=0;i<live.nat[s].nholds;i++) {
1554     int vs=live.nat[s].holds[i];
1555    
1556     live.state[vs].realreg=d;
1557     live.state[vs].realind=i;
1558     live.nat[d].holds[i]=vs;
1559     }
1560     live.nat[d].nholds=live.nat[s].nholds;
1561    
1562     live.nat[s].nholds=0;
1563     }
1564    
1565    
1566     static __inline__ void make_exclusive(int r, int size, int spec)
1567     {
1568     int clobber;
1569     reg_status oldstate;
1570     int rr=live.state[r].realreg;
1571     int nr;
1572     int nind;
1573     int ndirt=0;
1574     int i;
1575    
1576     if (!isinreg(r))
1577     return;
1578     if (live.nat[rr].nholds==1)
1579     return;
1580     for (i=0;i<live.nat[rr].nholds;i++) {
1581     int vr=live.nat[rr].holds[i];
1582     if (vr!=r &&
1583     (live.state[vr].status==DIRTY || live.state[vr].val))
1584     ndirt++;
1585     }
1586     if (!ndirt && size<live.state[r].validsize && !live.nat[rr].locked) {
1587     /* Everything else is clean, so let's keep this register */
1588     for (i=0;i<live.nat[rr].nholds;i++) {
1589     int vr=live.nat[rr].holds[i];
1590     if (vr!=r) {
1591     evict(vr);
1592     i--; /* Try that index again! */
1593     }
1594     }
1595     Dif (live.nat[rr].nholds!=1) {
1596     write_log("natreg %d holds %d vregs, %d not exclusive\n",
1597     rr,live.nat[rr].nholds,r);
1598     abort();
1599     }
1600     return;
1601     }
1602    
1603     /* We have to split the register */
1604     oldstate=live.state[r];
1605    
1606     setlock(rr); /* Make sure this doesn't go away */
1607     /* Forget about r being in the register rr */
1608     disassociate(r);
1609     /* Get a new register, that we will clobber completely */
1610     if (oldstate.status==DIRTY) {
1611     /* If dirtysize is <4, we need a register that can handle the
1612     eventual smaller memory store! Thanks to Quake68k for exposing
1613     this detail ;-) */
1614     nr=alloc_reg_hinted(r,oldstate.dirtysize,1,spec);
1615     }
1616     else {
1617     nr=alloc_reg_hinted(r,4,1,spec);
1618     }
1619     nind=live.state[r].realind;
1620     live.state[r]=oldstate; /* Keep all the old state info */
1621     live.state[r].realreg=nr;
1622     live.state[r].realind=nind;
1623    
1624     if (size<live.state[r].validsize) {
1625     if (live.state[r].val) {
1626     /* Might as well compensate for the offset now */
1627     raw_lea_l_brr(nr,rr,oldstate.val);
1628     live.state[r].val=0;
1629     live.state[r].dirtysize=4;
1630     set_status(r,DIRTY);
1631     }
1632     else
1633     raw_mov_l_rr(nr,rr); /* Make another copy */
1634     }
1635     unlock2(rr);
1636     }
1637    
1638     static __inline__ void add_offset(int r, uae_u32 off)
1639     {
1640     live.state[r].val+=off;
1641     }
1642    
1643     static __inline__ void remove_offset(int r, int spec)
1644     {
1645     reg_status oldstate;
1646     int rr;
1647    
1648     if (isconst(r))
1649     return;
1650     if (live.state[r].val==0)
1651     return;
1652     if (isinreg(r) && live.state[r].validsize<4)
1653     evict(r);
1654    
1655     if (!isinreg(r))
1656     alloc_reg_hinted(r,4,0,spec);
1657    
1658     Dif (live.state[r].validsize!=4) {
1659     write_log("Validsize=%d in remove_offset\n",live.state[r].validsize);
1660     abort();
1661     }
1662     make_exclusive(r,0,-1);
1663     /* make_exclusive might have done the job already */
1664     if (live.state[r].val==0)
1665     return;
1666    
1667     rr=live.state[r].realreg;
1668    
1669     if (live.nat[rr].nholds==1) {
1670     //write_log("RemovingB offset %x from reg %d (%d) at %p\n",
1671     // live.state[r].val,r,rr,target);
1672     adjust_nreg(rr,live.state[r].val);
1673     live.state[r].dirtysize=4;
1674     live.state[r].val=0;
1675     set_status(r,DIRTY);
1676     return;
1677     }
1678     write_log("Failed in remove_offset\n");
1679     abort();
1680     }
1681    
1682     static __inline__ void remove_all_offsets(void)
1683     {
1684     int i;
1685    
1686     for (i=0;i<VREGS;i++)
1687     remove_offset(i,-1);
1688     }
1689    
1690 gbeauche 1.28 static inline void flush_reg_count(void)
1691     {
1692     #if RECORD_REGISTER_USAGE
1693     for (int r = 0; r < 16; r++)
1694     if (reg_count_local[r])
1695     ADDQim(reg_count_local[r], ((uintptr)reg_count) + (8 * r), X86_NOREG, X86_NOREG, 1);
1696     #endif
1697     }
1698    
1699     static inline void record_register(int r)
1700     {
1701     #if RECORD_REGISTER_USAGE
1702     if (r < 16)
1703     reg_count_local[r]++;
1704     #endif
1705     }
1706    
1707 gbeauche 1.1 static __inline__ int readreg_general(int r, int size, int spec, int can_offset)
1708     {
1709     int n;
1710     int answer=-1;
1711    
1712 gbeauche 1.28 record_register(r);
1713 gbeauche 1.1 if (live.state[r].status==UNDEF) {
1714     write_log("WARNING: Unexpected read of undefined register %d\n",r);
1715     }
1716     if (!can_offset)
1717     remove_offset(r,spec);
1718    
1719     if (isinreg(r) && live.state[r].validsize>=size) {
1720     n=live.state[r].realreg;
1721     switch(size) {
1722     case 1:
1723     if (live.nat[n].canbyte || spec>=0) {
1724     answer=n;
1725     }
1726     break;
1727     case 2:
1728     if (live.nat[n].canword || spec>=0) {
1729     answer=n;
1730     }
1731     break;
1732     case 4:
1733     answer=n;
1734     break;
1735     default: abort();
1736     }
1737     if (answer<0)
1738     evict(r);
1739     }
1740     /* either the value was in memory to start with, or it was evicted and
1741     is in memory now */
1742     if (answer<0) {
1743     answer=alloc_reg_hinted(r,spec>=0?4:size,0,spec);
1744     }
1745    
1746     if (spec>=0 && spec!=answer) {
1747     /* Too bad */
1748     mov_nregs(spec,answer);
1749     answer=spec;
1750     }
1751     live.nat[answer].locked++;
1752     live.nat[answer].touched=touchcnt++;
1753     return answer;
1754     }
1755    
1756    
1757    
1758     static int readreg(int r, int size)
1759     {
1760     return readreg_general(r,size,-1,0);
1761     }
1762    
1763     static int readreg_specific(int r, int size, int spec)
1764     {
1765     return readreg_general(r,size,spec,0);
1766     }
1767    
1768     static int readreg_offset(int r, int size)
1769     {
1770     return readreg_general(r,size,-1,1);
1771     }
1772    
1773     /* writereg_general(r, size, spec)
1774     *
1775     * INPUT
1776     * - r : mid-layer register
1777     * - size : requested size (1/2/4)
1778     * - spec : -1 if find or make a register free, otherwise specifies
1779     * the physical register to use in any case
1780     *
1781     * OUTPUT
1782     * - hard (physical, x86 here) register allocated to virtual register r
1783     */
1784     static __inline__ int writereg_general(int r, int size, int spec)
1785     {
1786     int n;
1787     int answer=-1;
1788    
1789 gbeauche 1.28 record_register(r);
1790 gbeauche 1.1 if (size<4) {
1791     remove_offset(r,spec);
1792     }
1793    
1794     make_exclusive(r,size,spec);
1795     if (isinreg(r)) {
1796     int nvsize=size>live.state[r].validsize?size:live.state[r].validsize;
1797     int ndsize=size>live.state[r].dirtysize?size:live.state[r].dirtysize;
1798     n=live.state[r].realreg;
1799    
1800     Dif (live.nat[n].nholds!=1)
1801     abort();
1802     switch(size) {
1803     case 1:
1804     if (live.nat[n].canbyte || spec>=0) {
1805     live.state[r].dirtysize=ndsize;
1806     live.state[r].validsize=nvsize;
1807     answer=n;
1808     }
1809     break;
1810     case 2:
1811     if (live.nat[n].canword || spec>=0) {
1812     live.state[r].dirtysize=ndsize;
1813     live.state[r].validsize=nvsize;
1814     answer=n;
1815     }
1816     break;
1817     case 4:
1818     live.state[r].dirtysize=ndsize;
1819     live.state[r].validsize=nvsize;
1820     answer=n;
1821     break;
1822     default: abort();
1823     }
1824     if (answer<0)
1825     evict(r);
1826     }
1827     /* either the value was in memory to start with, or it was evicted and
1828     is in memory now */
1829     if (answer<0) {
1830     answer=alloc_reg_hinted(r,size,1,spec);
1831     }
1832     if (spec>=0 && spec!=answer) {
1833     mov_nregs(spec,answer);
1834     answer=spec;
1835     }
1836     if (live.state[r].status==UNDEF)
1837     live.state[r].validsize=4;
1838     live.state[r].dirtysize=size>live.state[r].dirtysize?size:live.state[r].dirtysize;
1839     live.state[r].validsize=size>live.state[r].validsize?size:live.state[r].validsize;
1840    
1841     live.nat[answer].locked++;
1842     live.nat[answer].touched=touchcnt++;
1843     if (size==4) {
1844     live.state[r].val=0;
1845     }
1846     else {
1847     Dif (live.state[r].val) {
1848     write_log("Problem with val\n");
1849     abort();
1850     }
1851     }
1852     set_status(r,DIRTY);
1853     return answer;
1854     }
1855    
1856     static int writereg(int r, int size)
1857     {
1858     return writereg_general(r,size,-1);
1859     }
1860    
1861     static int writereg_specific(int r, int size, int spec)
1862     {
1863     return writereg_general(r,size,spec);
1864     }
1865    
1866     static __inline__ int rmw_general(int r, int wsize, int rsize, int spec)
1867     {
1868     int n;
1869     int answer=-1;
1870    
1871 gbeauche 1.28 record_register(r);
1872 gbeauche 1.1 if (live.state[r].status==UNDEF) {
1873     write_log("WARNING: Unexpected read of undefined register %d\n",r);
1874     }
1875     remove_offset(r,spec);
1876     make_exclusive(r,0,spec);
1877    
1878     Dif (wsize<rsize) {
1879     write_log("Cannot handle wsize<rsize in rmw_general()\n");
1880     abort();
1881     }
1882     if (isinreg(r) && live.state[r].validsize>=rsize) {
1883     n=live.state[r].realreg;
1884     Dif (live.nat[n].nholds!=1)
1885     abort();
1886    
1887     switch(rsize) {
1888     case 1:
1889     if (live.nat[n].canbyte || spec>=0) {
1890     answer=n;
1891     }
1892     break;
1893     case 2:
1894     if (live.nat[n].canword || spec>=0) {
1895     answer=n;
1896     }
1897     break;
1898     case 4:
1899     answer=n;
1900     break;
1901     default: abort();
1902     }
1903     if (answer<0)
1904     evict(r);
1905     }
1906     /* either the value was in memory to start with, or it was evicted and
1907     is in memory now */
1908     if (answer<0) {
1909     answer=alloc_reg_hinted(r,spec>=0?4:rsize,0,spec);
1910     }
1911    
1912     if (spec>=0 && spec!=answer) {
1913     /* Too bad */
1914     mov_nregs(spec,answer);
1915     answer=spec;
1916     }
1917     if (wsize>live.state[r].dirtysize)
1918     live.state[r].dirtysize=wsize;
1919     if (wsize>live.state[r].validsize)
1920     live.state[r].validsize=wsize;
1921     set_status(r,DIRTY);
1922    
1923     live.nat[answer].locked++;
1924     live.nat[answer].touched=touchcnt++;
1925    
1926     Dif (live.state[r].val) {
1927     write_log("Problem with val(rmw)\n");
1928     abort();
1929     }
1930     return answer;
1931     }
1932    
1933     static int rmw(int r, int wsize, int rsize)
1934     {
1935     return rmw_general(r,wsize,rsize,-1);
1936     }
1937    
1938     static int rmw_specific(int r, int wsize, int rsize, int spec)
1939     {
1940     return rmw_general(r,wsize,rsize,spec);
1941     }
1942    
1943    
1944     /* needed for restoring the carry flag on non-P6 cores */
1945     static void bt_l_ri_noclobber(R4 r, IMM i)
1946     {
1947     int size=4;
1948     if (i<16)
1949     size=2;
1950     r=readreg(r,size);
1951     raw_bt_l_ri(r,i);
1952     unlock2(r);
1953     }
1954    
1955     /********************************************************************
1956     * FPU register status handling. EMIT TIME! *
1957     ********************************************************************/
1958    
1959     static void f_tomem(int r)
1960     {
1961     if (live.fate[r].status==DIRTY) {
1962     #if USE_LONG_DOUBLE
1963 gbeauche 1.24 raw_fmov_ext_mr((uintptr)live.fate[r].mem,live.fate[r].realreg);
1964 gbeauche 1.1 #else
1965 gbeauche 1.24 raw_fmov_mr((uintptr)live.fate[r].mem,live.fate[r].realreg);
1966 gbeauche 1.1 #endif
1967     live.fate[r].status=CLEAN;
1968     }
1969     }
1970    
1971     static void f_tomem_drop(int r)
1972     {
1973     if (live.fate[r].status==DIRTY) {
1974     #if USE_LONG_DOUBLE
1975 gbeauche 1.24 raw_fmov_ext_mr_drop((uintptr)live.fate[r].mem,live.fate[r].realreg);
1976 gbeauche 1.1 #else
1977 gbeauche 1.24 raw_fmov_mr_drop((uintptr)live.fate[r].mem,live.fate[r].realreg);
1978 gbeauche 1.1 #endif
1979     live.fate[r].status=INMEM;
1980     }
1981     }
1982    
1983    
1984     static __inline__ int f_isinreg(int r)
1985     {
1986     return live.fate[r].status==CLEAN || live.fate[r].status==DIRTY;
1987     }
1988    
1989     static void f_evict(int r)
1990     {
1991     int rr;
1992    
1993     if (!f_isinreg(r))
1994     return;
1995     rr=live.fate[r].realreg;
1996     if (live.fat[rr].nholds==1)
1997     f_tomem_drop(r);
1998     else
1999     f_tomem(r);
2000    
2001     Dif (live.fat[rr].locked &&
2002     live.fat[rr].nholds==1) {
2003     write_log("FPU register %d in nreg %d is locked!\n",r,live.fate[r].realreg);
2004     abort();
2005     }
2006    
2007     live.fat[rr].nholds--;
2008     if (live.fat[rr].nholds!=live.fate[r].realind) { /* Was not last */
2009     int topreg=live.fat[rr].holds[live.fat[rr].nholds];
2010     int thisind=live.fate[r].realind;
2011     live.fat[rr].holds[thisind]=topreg;
2012     live.fate[topreg].realind=thisind;
2013     }
2014     live.fate[r].status=INMEM;
2015     live.fate[r].realreg=-1;
2016     }
2017    
2018     static __inline__ void f_free_nreg(int r)
2019     {
2020     int i=live.fat[r].nholds;
2021    
2022     while (i) {
2023     int vr;
2024    
2025     --i;
2026     vr=live.fat[r].holds[i];
2027     f_evict(vr);
2028     }
2029     Dif (live.fat[r].nholds!=0) {
2030     write_log("Failed to free nreg %d, nholds is %d\n",r,live.fat[r].nholds);
2031     abort();
2032     }
2033     }
2034    
2035    
2036     /* Use with care! */
2037     static __inline__ void f_isclean(int r)
2038     {
2039     if (!f_isinreg(r))
2040     return;
2041     live.fate[r].status=CLEAN;
2042     }
2043    
2044     static __inline__ void f_disassociate(int r)
2045     {
2046     f_isclean(r);
2047     f_evict(r);
2048     }
2049    
2050    
2051    
2052     static int f_alloc_reg(int r, int willclobber)
2053     {
2054     int bestreg;
2055     uae_s32 when;
2056     int i;
2057     uae_s32 badness;
2058     bestreg=-1;
2059     when=2000000000;
2060     for (i=N_FREGS;i--;) {
2061     badness=live.fat[i].touched;
2062     if (live.fat[i].nholds==0)
2063     badness=0;
2064    
2065     if (!live.fat[i].locked && badness<when) {
2066     bestreg=i;
2067     when=badness;
2068     if (live.fat[i].nholds==0)
2069     break;
2070     }
2071     }
2072     Dif (bestreg==-1)
2073     abort();
2074    
2075     if (live.fat[bestreg].nholds>0) {
2076     f_free_nreg(bestreg);
2077     }
2078     if (f_isinreg(r)) {
2079     f_evict(r);
2080     }
2081    
2082     if (!willclobber) {
2083     if (live.fate[r].status!=UNDEF) {
2084     #if USE_LONG_DOUBLE
2085 gbeauche 1.24 raw_fmov_ext_rm(bestreg,(uintptr)live.fate[r].mem);
2086 gbeauche 1.1 #else
2087 gbeauche 1.24 raw_fmov_rm(bestreg,(uintptr)live.fate[r].mem);
2088 gbeauche 1.1 #endif
2089     }
2090     live.fate[r].status=CLEAN;
2091     }
2092     else {
2093     live.fate[r].status=DIRTY;
2094     }
2095     live.fate[r].realreg=bestreg;
2096     live.fate[r].realind=live.fat[bestreg].nholds;
2097     live.fat[bestreg].touched=touchcnt++;
2098     live.fat[bestreg].holds[live.fat[bestreg].nholds]=r;
2099     live.fat[bestreg].nholds++;
2100    
2101     return bestreg;
2102     }
2103    
2104     static void f_unlock(int r)
2105     {
2106     Dif (!live.fat[r].locked)
2107     abort();
2108     live.fat[r].locked--;
2109     }
2110    
2111     static void f_setlock(int r)
2112     {
2113     live.fat[r].locked++;
2114     }
2115    
2116     static __inline__ int f_readreg(int r)
2117     {
2118     int n;
2119     int answer=-1;
2120    
2121     if (f_isinreg(r)) {
2122     n=live.fate[r].realreg;
2123     answer=n;
2124     }
2125     /* either the value was in memory to start with, or it was evicted and
2126     is in memory now */
2127     if (answer<0)
2128     answer=f_alloc_reg(r,0);
2129    
2130     live.fat[answer].locked++;
2131     live.fat[answer].touched=touchcnt++;
2132     return answer;
2133     }
2134    
2135     static __inline__ void f_make_exclusive(int r, int clobber)
2136     {
2137     freg_status oldstate;
2138     int rr=live.fate[r].realreg;
2139     int nr;
2140     int nind;
2141     int ndirt=0;
2142     int i;
2143    
2144     if (!f_isinreg(r))
2145     return;
2146     if (live.fat[rr].nholds==1)
2147     return;
2148     for (i=0;i<live.fat[rr].nholds;i++) {
2149     int vr=live.fat[rr].holds[i];
2150     if (vr!=r && live.fate[vr].status==DIRTY)
2151     ndirt++;
2152     }
2153     if (!ndirt && !live.fat[rr].locked) {
2154     /* Everything else is clean, so let's keep this register */
2155     for (i=0;i<live.fat[rr].nholds;i++) {
2156     int vr=live.fat[rr].holds[i];
2157     if (vr!=r) {
2158     f_evict(vr);
2159     i--; /* Try that index again! */
2160     }
2161     }
2162     Dif (live.fat[rr].nholds!=1) {
2163     write_log("realreg %d holds %d (",rr,live.fat[rr].nholds);
2164     for (i=0;i<live.fat[rr].nholds;i++) {
2165     write_log(" %d(%d,%d)",live.fat[rr].holds[i],
2166     live.fate[live.fat[rr].holds[i]].realreg,
2167     live.fate[live.fat[rr].holds[i]].realind);
2168     }
2169     write_log("\n");
2170     abort();
2171     }
2172     return;
2173     }
2174    
2175     /* We have to split the register */
2176     oldstate=live.fate[r];
2177    
2178     f_setlock(rr); /* Make sure this doesn't go away */
2179     /* Forget about r being in the register rr */
2180     f_disassociate(r);
2181     /* Get a new register, that we will clobber completely */
2182     nr=f_alloc_reg(r,1);
2183     nind=live.fate[r].realind;
2184     if (!clobber)
2185     raw_fmov_rr(nr,rr); /* Make another copy */
2186     live.fate[r]=oldstate; /* Keep all the old state info */
2187     live.fate[r].realreg=nr;
2188     live.fate[r].realind=nind;
2189     f_unlock(rr);
2190     }
2191    
2192    
2193     static __inline__ int f_writereg(int r)
2194     {
2195     int n;
2196     int answer=-1;
2197    
2198     f_make_exclusive(r,1);
2199     if (f_isinreg(r)) {
2200     n=live.fate[r].realreg;
2201     answer=n;
2202     }
2203     if (answer<0) {
2204     answer=f_alloc_reg(r,1);
2205     }
2206     live.fate[r].status=DIRTY;
2207     live.fat[answer].locked++;
2208     live.fat[answer].touched=touchcnt++;
2209     return answer;
2210     }
2211    
2212     static int f_rmw(int r)
2213     {
2214     int n;
2215    
2216     f_make_exclusive(r,0);
2217     if (f_isinreg(r)) {
2218     n=live.fate[r].realreg;
2219     }
2220     else
2221     n=f_alloc_reg(r,0);
2222     live.fate[r].status=DIRTY;
2223     live.fat[n].locked++;
2224     live.fat[n].touched=touchcnt++;
2225     return n;
2226     }
2227    
2228     static void fflags_into_flags_internal(uae_u32 tmp)
2229     {
2230     int r;
2231    
2232     clobber_flags();
2233     r=f_readreg(FP_RESULT);
2234     if (FFLAG_NREG_CLOBBER_CONDITION) {
2235     int tmp2=tmp;
2236     tmp=writereg_specific(tmp,4,FFLAG_NREG);
2237     raw_fflags_into_flags(r);
2238     unlock2(tmp);
2239     forget_about(tmp2);
2240     }
2241     else
2242     raw_fflags_into_flags(r);
2243     f_unlock(r);
2244 gbeauche 1.19 live_flags();
2245 gbeauche 1.1 }
2246    
2247    
2248    
2249    
2250     /********************************************************************
2251     * CPU functions exposed to gencomp. Both CREATE and EMIT time *
2252     ********************************************************************/
2253    
2254     /*
2255     * RULES FOR HANDLING REGISTERS:
2256     *
2257     * * In the function headers, order the parameters
2258     * - 1st registers written to
2259     * - 2nd read/modify/write registers
2260     * - 3rd registers read from
2261     * * Before calling raw_*, you must call readreg, writereg or rmw for
2262     * each register
2263     * * The order for this is
2264     * - 1st call remove_offset for all registers written to with size<4
2265     * - 2nd call readreg for all registers read without offset
2266     * - 3rd call rmw for all rmw registers
2267     * - 4th call readreg_offset for all registers that can handle offsets
2268     * - 5th call get_offset for all the registers from the previous step
2269     * - 6th call writereg for all written-to registers
2270     * - 7th call raw_*
2271     * - 8th unlock2 all registers that were locked
2272     */
2273    
2274     MIDFUNC(0,live_flags,(void))
2275     {
2276     live.flags_on_stack=TRASH;
2277     live.flags_in_flags=VALID;
2278     live.flags_are_important=1;
2279     }
2280     MENDFUNC(0,live_flags,(void))
2281    
2282     MIDFUNC(0,dont_care_flags,(void))
2283     {
2284     live.flags_are_important=0;
2285     }
2286     MENDFUNC(0,dont_care_flags,(void))
2287    
2288    
2289     MIDFUNC(0,duplicate_carry,(void))
2290     {
2291     evict(FLAGX);
2292     make_flags_live_internal();
2293 gbeauche 1.24 COMPCALL(setcc_m)((uintptr)live.state[FLAGX].mem,2);
2294 gbeauche 1.1 log_vwrite(FLAGX);
2295     }
2296     MENDFUNC(0,duplicate_carry,(void))
2297    
2298     MIDFUNC(0,restore_carry,(void))
2299     {
2300     if (!have_rat_stall) { /* Not a P6 core, i.e. no partial stalls */
2301     bt_l_ri_noclobber(FLAGX,0);
2302     }
2303     else { /* Avoid the stall the above creates.
2304     This is slow on non-P6, though.
2305     */
2306     COMPCALL(rol_b_ri(FLAGX,8));
2307     isclean(FLAGX);
2308     }
2309     }
2310     MENDFUNC(0,restore_carry,(void))
2311    
2312     MIDFUNC(0,start_needflags,(void))
2313     {
2314     needflags=1;
2315     }
2316     MENDFUNC(0,start_needflags,(void))
2317    
2318     MIDFUNC(0,end_needflags,(void))
2319     {
2320     needflags=0;
2321     }
2322     MENDFUNC(0,end_needflags,(void))
2323    
2324     MIDFUNC(0,make_flags_live,(void))
2325     {
2326     make_flags_live_internal();
2327     }
2328     MENDFUNC(0,make_flags_live,(void))
2329    
2330     MIDFUNC(1,fflags_into_flags,(W2 tmp))
2331     {
2332     clobber_flags();
2333     fflags_into_flags_internal(tmp);
2334     }
2335     MENDFUNC(1,fflags_into_flags,(W2 tmp))
2336    
2337    
2338     MIDFUNC(2,bt_l_ri,(R4 r, IMM i)) /* This is defined as only affecting C */
2339     {
2340     int size=4;
2341     if (i<16)
2342     size=2;
2343     CLOBBER_BT;
2344     r=readreg(r,size);
2345     raw_bt_l_ri(r,i);
2346     unlock2(r);
2347     }
2348     MENDFUNC(2,bt_l_ri,(R4 r, IMM i)) /* This is defined as only affecting C */
2349    
2350     MIDFUNC(2,bt_l_rr,(R4 r, R4 b)) /* This is defined as only affecting C */
2351     {
2352     CLOBBER_BT;
2353     r=readreg(r,4);
2354     b=readreg(b,4);
2355     raw_bt_l_rr(r,b);
2356     unlock2(r);
2357     unlock2(b);
2358     }
2359     MENDFUNC(2,bt_l_rr,(R4 r, R4 b)) /* This is defined as only affecting C */
2360    
2361     MIDFUNC(2,btc_l_ri,(RW4 r, IMM i))
2362     {
2363     int size=4;
2364     if (i<16)
2365     size=2;
2366     CLOBBER_BT;
2367     r=rmw(r,size,size);
2368     raw_btc_l_ri(r,i);
2369     unlock2(r);
2370     }
2371     MENDFUNC(2,btc_l_ri,(RW4 r, IMM i))
2372    
2373     MIDFUNC(2,btc_l_rr,(RW4 r, R4 b))
2374     {
2375     CLOBBER_BT;
2376     b=readreg(b,4);
2377     r=rmw(r,4,4);
2378     raw_btc_l_rr(r,b);
2379     unlock2(r);
2380     unlock2(b);
2381     }
2382     MENDFUNC(2,btc_l_rr,(RW4 r, R4 b))
2383    
2384    
2385     MIDFUNC(2,btr_l_ri,(RW4 r, IMM i))
2386     {
2387     int size=4;
2388     if (i<16)
2389     size=2;
2390     CLOBBER_BT;
2391     r=rmw(r,size,size);
2392     raw_btr_l_ri(r,i);
2393     unlock2(r);
2394     }
2395     MENDFUNC(2,btr_l_ri,(RW4 r, IMM i))
2396    
2397     MIDFUNC(2,btr_l_rr,(RW4 r, R4 b))
2398     {
2399     CLOBBER_BT;
2400     b=readreg(b,4);
2401     r=rmw(r,4,4);
2402     raw_btr_l_rr(r,b);
2403     unlock2(r);
2404     unlock2(b);
2405     }
2406     MENDFUNC(2,btr_l_rr,(RW4 r, R4 b))
2407    
2408    
2409     MIDFUNC(2,bts_l_ri,(RW4 r, IMM i))
2410     {
2411     int size=4;
2412     if (i<16)
2413     size=2;
2414     CLOBBER_BT;
2415     r=rmw(r,size,size);
2416     raw_bts_l_ri(r,i);
2417     unlock2(r);
2418     }
2419     MENDFUNC(2,bts_l_ri,(RW4 r, IMM i))
2420    
2421     MIDFUNC(2,bts_l_rr,(RW4 r, R4 b))
2422     {
2423     CLOBBER_BT;
2424     b=readreg(b,4);
2425     r=rmw(r,4,4);
2426     raw_bts_l_rr(r,b);
2427     unlock2(r);
2428     unlock2(b);
2429     }
2430     MENDFUNC(2,bts_l_rr,(RW4 r, R4 b))
2431    
2432     MIDFUNC(2,mov_l_rm,(W4 d, IMM s))
2433     {
2434     CLOBBER_MOV;
2435     d=writereg(d,4);
2436     raw_mov_l_rm(d,s);
2437     unlock2(d);
2438     }
2439     MENDFUNC(2,mov_l_rm,(W4 d, IMM s))
2440    
2441    
2442     MIDFUNC(1,call_r,(R4 r)) /* Clobbering is implicit */
2443     {
2444     r=readreg(r,4);
2445     raw_call_r(r);
2446     unlock2(r);
2447     }
2448     MENDFUNC(1,call_r,(R4 r)) /* Clobbering is implicit */
2449    
2450     MIDFUNC(2,sub_l_mi,(IMM d, IMM s))
2451     {
2452     CLOBBER_SUB;
2453     raw_sub_l_mi(d,s) ;
2454     }
2455     MENDFUNC(2,sub_l_mi,(IMM d, IMM s))
2456    
2457     MIDFUNC(2,mov_l_mi,(IMM d, IMM s))
2458     {
2459     CLOBBER_MOV;
2460     raw_mov_l_mi(d,s) ;
2461     }
2462     MENDFUNC(2,mov_l_mi,(IMM d, IMM s))
2463    
2464     MIDFUNC(2,mov_w_mi,(IMM d, IMM s))
2465     {
2466     CLOBBER_MOV;
2467     raw_mov_w_mi(d,s) ;
2468     }
2469     MENDFUNC(2,mov_w_mi,(IMM d, IMM s))
2470    
2471     MIDFUNC(2,mov_b_mi,(IMM d, IMM s))
2472     {
2473     CLOBBER_MOV;
2474     raw_mov_b_mi(d,s) ;
2475     }
2476     MENDFUNC(2,mov_b_mi,(IMM d, IMM s))
2477    
2478     MIDFUNC(2,rol_b_ri,(RW1 r, IMM i))
2479     {
2480     if (!i && !needflags)
2481     return;
2482     CLOBBER_ROL;
2483     r=rmw(r,1,1);
2484     raw_rol_b_ri(r,i);
2485     unlock2(r);
2486     }
2487     MENDFUNC(2,rol_b_ri,(RW1 r, IMM i))
2488    
2489     MIDFUNC(2,rol_w_ri,(RW2 r, IMM i))
2490     {
2491     if (!i && !needflags)
2492     return;
2493     CLOBBER_ROL;
2494     r=rmw(r,2,2);
2495     raw_rol_w_ri(r,i);
2496     unlock2(r);
2497     }
2498     MENDFUNC(2,rol_w_ri,(RW2 r, IMM i))
2499    
2500     MIDFUNC(2,rol_l_ri,(RW4 r, IMM i))
2501     {
2502     if (!i && !needflags)
2503     return;
2504     CLOBBER_ROL;
2505     r=rmw(r,4,4);
2506     raw_rol_l_ri(r,i);
2507     unlock2(r);
2508     }
2509     MENDFUNC(2,rol_l_ri,(RW4 r, IMM i))
2510    
2511     MIDFUNC(2,rol_l_rr,(RW4 d, R1 r))
2512     {
2513     if (isconst(r)) {
2514     COMPCALL(rol_l_ri)(d,(uae_u8)live.state[r].val);
2515     return;
2516     }
2517     CLOBBER_ROL;
2518     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2519     d=rmw(d,4,4);
2520     Dif (r!=1) {
2521     write_log("Illegal register %d in raw_rol_b\n",r);
2522     abort();
2523     }
2524     raw_rol_l_rr(d,r) ;
2525     unlock2(r);
2526     unlock2(d);
2527     }
2528     MENDFUNC(2,rol_l_rr,(RW4 d, R1 r))
2529    
2530     MIDFUNC(2,rol_w_rr,(RW2 d, R1 r))
2531     { /* Can only do this with r==1, i.e. cl */
2532    
2533     if (isconst(r)) {
2534     COMPCALL(rol_w_ri)(d,(uae_u8)live.state[r].val);
2535     return;
2536     }
2537     CLOBBER_ROL;
2538     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2539     d=rmw(d,2,2);
2540     Dif (r!=1) {
2541     write_log("Illegal register %d in raw_rol_b\n",r);
2542     abort();
2543     }
2544     raw_rol_w_rr(d,r) ;
2545     unlock2(r);
2546     unlock2(d);
2547     }
2548     MENDFUNC(2,rol_w_rr,(RW2 d, R1 r))
2549    
2550     MIDFUNC(2,rol_b_rr,(RW1 d, R1 r))
2551     { /* Can only do this with r==1, i.e. cl */
2552    
2553     if (isconst(r)) {
2554     COMPCALL(rol_b_ri)(d,(uae_u8)live.state[r].val);
2555     return;
2556     }
2557    
2558     CLOBBER_ROL;
2559     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2560     d=rmw(d,1,1);
2561     Dif (r!=1) {
2562     write_log("Illegal register %d in raw_rol_b\n",r);
2563     abort();
2564     }
2565     raw_rol_b_rr(d,r) ;
2566     unlock2(r);
2567     unlock2(d);
2568     }
2569     MENDFUNC(2,rol_b_rr,(RW1 d, R1 r))
2570    
2571    
2572     MIDFUNC(2,shll_l_rr,(RW4 d, R1 r))
2573     {
2574     if (isconst(r)) {
2575     COMPCALL(shll_l_ri)(d,(uae_u8)live.state[r].val);
2576     return;
2577     }
2578     CLOBBER_SHLL;
2579     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2580     d=rmw(d,4,4);
2581     Dif (r!=1) {
2582     write_log("Illegal register %d in raw_rol_b\n",r);
2583     abort();
2584     }
2585     raw_shll_l_rr(d,r) ;
2586     unlock2(r);
2587     unlock2(d);
2588     }
2589     MENDFUNC(2,shll_l_rr,(RW4 d, R1 r))
2590    
2591     MIDFUNC(2,shll_w_rr,(RW2 d, R1 r))
2592     { /* Can only do this with r==1, i.e. cl */
2593    
2594     if (isconst(r)) {
2595     COMPCALL(shll_w_ri)(d,(uae_u8)live.state[r].val);
2596     return;
2597     }
2598     CLOBBER_SHLL;
2599     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2600     d=rmw(d,2,2);
2601     Dif (r!=1) {
2602     write_log("Illegal register %d in raw_shll_b\n",r);
2603     abort();
2604     }
2605     raw_shll_w_rr(d,r) ;
2606     unlock2(r);
2607     unlock2(d);
2608     }
2609     MENDFUNC(2,shll_w_rr,(RW2 d, R1 r))
2610    
2611     MIDFUNC(2,shll_b_rr,(RW1 d, R1 r))
2612     { /* Can only do this with r==1, i.e. cl */
2613    
2614     if (isconst(r)) {
2615     COMPCALL(shll_b_ri)(d,(uae_u8)live.state[r].val);
2616     return;
2617     }
2618    
2619     CLOBBER_SHLL;
2620     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2621     d=rmw(d,1,1);
2622     Dif (r!=1) {
2623     write_log("Illegal register %d in raw_shll_b\n",r);
2624     abort();
2625     }
2626     raw_shll_b_rr(d,r) ;
2627     unlock2(r);
2628     unlock2(d);
2629     }
2630     MENDFUNC(2,shll_b_rr,(RW1 d, R1 r))
2631    
2632    
2633     MIDFUNC(2,ror_b_ri,(R1 r, IMM i))
2634     {
2635     if (!i && !needflags)
2636     return;
2637     CLOBBER_ROR;
2638     r=rmw(r,1,1);
2639     raw_ror_b_ri(r,i);
2640     unlock2(r);
2641     }
2642     MENDFUNC(2,ror_b_ri,(R1 r, IMM i))
2643    
2644     MIDFUNC(2,ror_w_ri,(R2 r, IMM i))
2645     {
2646     if (!i && !needflags)
2647     return;
2648     CLOBBER_ROR;
2649     r=rmw(r,2,2);
2650     raw_ror_w_ri(r,i);
2651     unlock2(r);
2652     }
2653     MENDFUNC(2,ror_w_ri,(R2 r, IMM i))
2654    
2655     MIDFUNC(2,ror_l_ri,(R4 r, IMM i))
2656     {
2657     if (!i && !needflags)
2658     return;
2659     CLOBBER_ROR;
2660     r=rmw(r,4,4);
2661     raw_ror_l_ri(r,i);
2662     unlock2(r);
2663     }
2664     MENDFUNC(2,ror_l_ri,(R4 r, IMM i))
2665    
2666     MIDFUNC(2,ror_l_rr,(R4 d, R1 r))
2667     {
2668     if (isconst(r)) {
2669     COMPCALL(ror_l_ri)(d,(uae_u8)live.state[r].val);
2670     return;
2671     }
2672     CLOBBER_ROR;
2673     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2674     d=rmw(d,4,4);
2675     raw_ror_l_rr(d,r) ;
2676     unlock2(r);
2677     unlock2(d);
2678     }
2679     MENDFUNC(2,ror_l_rr,(R4 d, R1 r))
2680    
2681     MIDFUNC(2,ror_w_rr,(R2 d, R1 r))
2682     {
2683     if (isconst(r)) {
2684     COMPCALL(ror_w_ri)(d,(uae_u8)live.state[r].val);
2685     return;
2686     }
2687     CLOBBER_ROR;
2688     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2689     d=rmw(d,2,2);
2690     raw_ror_w_rr(d,r) ;
2691     unlock2(r);
2692     unlock2(d);
2693     }
2694     MENDFUNC(2,ror_w_rr,(R2 d, R1 r))
2695    
2696     MIDFUNC(2,ror_b_rr,(R1 d, R1 r))
2697     {
2698     if (isconst(r)) {
2699     COMPCALL(ror_b_ri)(d,(uae_u8)live.state[r].val);
2700     return;
2701     }
2702    
2703     CLOBBER_ROR;
2704     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2705     d=rmw(d,1,1);
2706     raw_ror_b_rr(d,r) ;
2707     unlock2(r);
2708     unlock2(d);
2709     }
2710     MENDFUNC(2,ror_b_rr,(R1 d, R1 r))
2711    
2712     MIDFUNC(2,shrl_l_rr,(RW4 d, R1 r))
2713     {
2714     if (isconst(r)) {
2715     COMPCALL(shrl_l_ri)(d,(uae_u8)live.state[r].val);
2716     return;
2717     }
2718     CLOBBER_SHRL;
2719     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2720     d=rmw(d,4,4);
2721     Dif (r!=1) {
2722     write_log("Illegal register %d in raw_rol_b\n",r);
2723     abort();
2724     }
2725     raw_shrl_l_rr(d,r) ;
2726     unlock2(r);
2727     unlock2(d);
2728     }
2729     MENDFUNC(2,shrl_l_rr,(RW4 d, R1 r))
2730    
2731     MIDFUNC(2,shrl_w_rr,(RW2 d, R1 r))
2732     { /* Can only do this with r==1, i.e. cl */
2733    
2734     if (isconst(r)) {
2735     COMPCALL(shrl_w_ri)(d,(uae_u8)live.state[r].val);
2736     return;
2737     }
2738     CLOBBER_SHRL;
2739     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2740     d=rmw(d,2,2);
2741     Dif (r!=1) {
2742     write_log("Illegal register %d in raw_shrl_b\n",r);
2743     abort();
2744     }
2745     raw_shrl_w_rr(d,r) ;
2746     unlock2(r);
2747     unlock2(d);
2748     }
2749     MENDFUNC(2,shrl_w_rr,(RW2 d, R1 r))
2750    
2751     MIDFUNC(2,shrl_b_rr,(RW1 d, R1 r))
2752     { /* Can only do this with r==1, i.e. cl */
2753    
2754     if (isconst(r)) {
2755     COMPCALL(shrl_b_ri)(d,(uae_u8)live.state[r].val);
2756     return;
2757     }
2758    
2759     CLOBBER_SHRL;
2760     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2761     d=rmw(d,1,1);
2762     Dif (r!=1) {
2763     write_log("Illegal register %d in raw_shrl_b\n",r);
2764     abort();
2765     }
2766     raw_shrl_b_rr(d,r) ;
2767     unlock2(r);
2768     unlock2(d);
2769     }
2770     MENDFUNC(2,shrl_b_rr,(RW1 d, R1 r))
2771    
2772    
2773    
2774     MIDFUNC(2,shll_l_ri,(RW4 r, IMM i))
2775     {
2776     if (!i && !needflags)
2777     return;
2778     if (isconst(r) && !needflags) {
2779     live.state[r].val<<=i;
2780     return;
2781     }
2782     CLOBBER_SHLL;
2783     r=rmw(r,4,4);
2784     raw_shll_l_ri(r,i);
2785     unlock2(r);
2786     }
2787     MENDFUNC(2,shll_l_ri,(RW4 r, IMM i))
2788    
2789     MIDFUNC(2,shll_w_ri,(RW2 r, IMM i))
2790     {
2791     if (!i && !needflags)
2792     return;
2793     CLOBBER_SHLL;
2794     r=rmw(r,2,2);
2795     raw_shll_w_ri(r,i);
2796     unlock2(r);
2797     }
2798     MENDFUNC(2,shll_w_ri,(RW2 r, IMM i))
2799    
2800     MIDFUNC(2,shll_b_ri,(RW1 r, IMM i))
2801     {
2802     if (!i && !needflags)
2803     return;
2804     CLOBBER_SHLL;
2805     r=rmw(r,1,1);
2806     raw_shll_b_ri(r,i);
2807     unlock2(r);
2808     }
2809     MENDFUNC(2,shll_b_ri,(RW1 r, IMM i))
2810    
2811     MIDFUNC(2,shrl_l_ri,(RW4 r, IMM i))
2812     {
2813     if (!i && !needflags)
2814     return;
2815     if (isconst(r) && !needflags) {
2816     live.state[r].val>>=i;
2817     return;
2818     }
2819     CLOBBER_SHRL;
2820     r=rmw(r,4,4);
2821     raw_shrl_l_ri(r,i);
2822     unlock2(r);
2823     }
2824     MENDFUNC(2,shrl_l_ri,(RW4 r, IMM i))
2825    
2826     MIDFUNC(2,shrl_w_ri,(RW2 r, IMM i))
2827     {
2828     if (!i && !needflags)
2829     return;
2830     CLOBBER_SHRL;
2831     r=rmw(r,2,2);
2832     raw_shrl_w_ri(r,i);
2833     unlock2(r);
2834     }
2835     MENDFUNC(2,shrl_w_ri,(RW2 r, IMM i))
2836    
2837     MIDFUNC(2,shrl_b_ri,(RW1 r, IMM i))
2838     {
2839     if (!i && !needflags)
2840     return;
2841     CLOBBER_SHRL;
2842     r=rmw(r,1,1);
2843     raw_shrl_b_ri(r,i);
2844     unlock2(r);
2845     }
2846     MENDFUNC(2,shrl_b_ri,(RW1 r, IMM i))
2847    
2848     MIDFUNC(2,shra_l_ri,(RW4 r, IMM i))
2849     {
2850     if (!i && !needflags)
2851     return;
2852     CLOBBER_SHRA;
2853     r=rmw(r,4,4);
2854     raw_shra_l_ri(r,i);
2855     unlock2(r);
2856     }
2857     MENDFUNC(2,shra_l_ri,(RW4 r, IMM i))
2858    
2859     MIDFUNC(2,shra_w_ri,(RW2 r, IMM i))
2860     {
2861     if (!i && !needflags)
2862     return;
2863     CLOBBER_SHRA;
2864     r=rmw(r,2,2);
2865     raw_shra_w_ri(r,i);
2866     unlock2(r);
2867     }
2868     MENDFUNC(2,shra_w_ri,(RW2 r, IMM i))
2869    
2870     MIDFUNC(2,shra_b_ri,(RW1 r, IMM i))
2871     {
2872     if (!i && !needflags)
2873     return;
2874     CLOBBER_SHRA;
2875     r=rmw(r,1,1);
2876     raw_shra_b_ri(r,i);
2877     unlock2(r);
2878     }
2879     MENDFUNC(2,shra_b_ri,(RW1 r, IMM i))
2880    
2881     MIDFUNC(2,shra_l_rr,(RW4 d, R1 r))
2882     {
2883     if (isconst(r)) {
2884     COMPCALL(shra_l_ri)(d,(uae_u8)live.state[r].val);
2885     return;
2886     }
2887     CLOBBER_SHRA;
2888     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2889     d=rmw(d,4,4);
2890     Dif (r!=1) {
2891     write_log("Illegal register %d in raw_rol_b\n",r);
2892     abort();
2893     }
2894     raw_shra_l_rr(d,r) ;
2895     unlock2(r);
2896     unlock2(d);
2897     }
2898     MENDFUNC(2,shra_l_rr,(RW4 d, R1 r))
2899    
2900     MIDFUNC(2,shra_w_rr,(RW2 d, R1 r))
2901     { /* Can only do this with r==1, i.e. cl */
2902    
2903     if (isconst(r)) {
2904     COMPCALL(shra_w_ri)(d,(uae_u8)live.state[r].val);
2905     return;
2906     }
2907     CLOBBER_SHRA;
2908     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2909     d=rmw(d,2,2);
2910     Dif (r!=1) {
2911     write_log("Illegal register %d in raw_shra_b\n",r);
2912     abort();
2913     }
2914     raw_shra_w_rr(d,r) ;
2915     unlock2(r);
2916     unlock2(d);
2917     }
2918     MENDFUNC(2,shra_w_rr,(RW2 d, R1 r))
2919    
2920     MIDFUNC(2,shra_b_rr,(RW1 d, R1 r))
2921     { /* Can only do this with r==1, i.e. cl */
2922    
2923     if (isconst(r)) {
2924     COMPCALL(shra_b_ri)(d,(uae_u8)live.state[r].val);
2925     return;
2926     }
2927    
2928     CLOBBER_SHRA;
2929     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2930     d=rmw(d,1,1);
2931     Dif (r!=1) {
2932     write_log("Illegal register %d in raw_shra_b\n",r);
2933     abort();
2934     }
2935     raw_shra_b_rr(d,r) ;
2936     unlock2(r);
2937     unlock2(d);
2938     }
2939     MENDFUNC(2,shra_b_rr,(RW1 d, R1 r))
2940    
2941    
2942     MIDFUNC(2,setcc,(W1 d, IMM cc))
2943     {
2944     CLOBBER_SETCC;
2945     d=writereg(d,1);
2946     raw_setcc(d,cc);
2947     unlock2(d);
2948     }
2949     MENDFUNC(2,setcc,(W1 d, IMM cc))
2950    
2951     MIDFUNC(2,setcc_m,(IMM d, IMM cc))
2952     {
2953     CLOBBER_SETCC;
2954     raw_setcc_m(d,cc);
2955     }
2956     MENDFUNC(2,setcc_m,(IMM d, IMM cc))
2957    
2958     MIDFUNC(3,cmov_l_rr,(RW4 d, R4 s, IMM cc))
2959     {
2960     if (d==s)
2961     return;
2962     CLOBBER_CMOV;
2963     s=readreg(s,4);
2964     d=rmw(d,4,4);
2965     raw_cmov_l_rr(d,s,cc);
2966     unlock2(s);
2967     unlock2(d);
2968     }
2969     MENDFUNC(3,cmov_l_rr,(RW4 d, R4 s, IMM cc))
2970    
2971     MIDFUNC(3,cmov_l_rm,(RW4 d, IMM s, IMM cc))
2972     {
2973     CLOBBER_CMOV;
2974     d=rmw(d,4,4);
2975     raw_cmov_l_rm(d,s,cc);
2976     unlock2(d);
2977     }
2978     MENDFUNC(3,cmov_l_rm,(RW4 d, IMM s, IMM cc))
2979    
2980 gbeauche 1.26 MIDFUNC(2,bsf_l_rr,(W4 d, W4 s))
2981     {
2982     CLOBBER_BSF;
2983     s = readreg(s, 4);
2984     d = writereg(d, 4);
2985     raw_bsf_l_rr(d, s);
2986     unlock2(s);
2987     unlock2(d);
2988     }
2989     MENDFUNC(2,bsf_l_rr,(W4 d, W4 s))
2990    
2991     /* Set the Z flag depending on the value in s. Note that the
2992     value has to be 0 or -1 (or, more precisely, for non-zero
2993     values, bit 14 must be set)! */
2994     MIDFUNC(2,simulate_bsf,(W4 tmp, RW4 s))
2995 gbeauche 1.1 {
2996 gbeauche 1.26 CLOBBER_BSF;
2997     s=rmw_specific(s,4,4,FLAG_NREG3);
2998     tmp=writereg(tmp,4);
2999     raw_flags_set_zero(s, tmp);
3000     unlock2(tmp);
3001     unlock2(s);
3002 gbeauche 1.1 }
3003 gbeauche 1.26 MENDFUNC(2,simulate_bsf,(W4 tmp, RW4 s))
3004 gbeauche 1.1
3005     MIDFUNC(2,imul_32_32,(RW4 d, R4 s))
3006     {
3007     CLOBBER_MUL;
3008     s=readreg(s,4);
3009     d=rmw(d,4,4);
3010     raw_imul_32_32(d,s);
3011     unlock2(s);
3012     unlock2(d);
3013     }
3014     MENDFUNC(2,imul_32_32,(RW4 d, R4 s))
3015    
3016     MIDFUNC(2,imul_64_32,(RW4 d, RW4 s))
3017     {
3018     CLOBBER_MUL;
3019     s=rmw_specific(s,4,4,MUL_NREG2);
3020     d=rmw_specific(d,4,4,MUL_NREG1);
3021     raw_imul_64_32(d,s);
3022     unlock2(s);
3023     unlock2(d);
3024     }
3025     MENDFUNC(2,imul_64_32,(RW4 d, RW4 s))
3026    
3027     MIDFUNC(2,mul_64_32,(RW4 d, RW4 s))
3028     {
3029     CLOBBER_MUL;
3030     s=rmw_specific(s,4,4,MUL_NREG2);
3031     d=rmw_specific(d,4,4,MUL_NREG1);
3032     raw_mul_64_32(d,s);
3033     unlock2(s);
3034     unlock2(d);
3035     }
3036     MENDFUNC(2,mul_64_32,(RW4 d, RW4 s))
3037    
3038     MIDFUNC(2,mul_32_32,(RW4 d, R4 s))
3039     {
3040     CLOBBER_MUL;
3041     s=readreg(s,4);
3042     d=rmw(d,4,4);
3043     raw_mul_32_32(d,s);
3044     unlock2(s);
3045     unlock2(d);
3046     }
3047     MENDFUNC(2,mul_32_32,(RW4 d, R4 s))
3048    
3049 gbeauche 1.24 #if SIZEOF_VOID_P == 8
3050     MIDFUNC(2,sign_extend_32_rr,(W4 d, R2 s))
3051     {
3052     int isrmw;
3053    
3054     if (isconst(s)) {
3055     set_const(d,(uae_s32)live.state[s].val);
3056     return;
3057     }
3058    
3059     CLOBBER_SE32;
3060     isrmw=(s==d);
3061     if (!isrmw) {
3062     s=readreg(s,4);
3063     d=writereg(d,4);
3064     }
3065     else { /* If we try to lock this twice, with different sizes, we
3066     are int trouble! */
3067     s=d=rmw(s,4,4);
3068     }
3069     raw_sign_extend_32_rr(d,s);
3070     if (!isrmw) {
3071     unlock2(d);
3072     unlock2(s);
3073     }
3074     else {
3075     unlock2(s);
3076     }
3077     }
3078     MENDFUNC(2,sign_extend_32_rr,(W4 d, R2 s))
3079     #endif
3080    
3081 gbeauche 1.1 MIDFUNC(2,sign_extend_16_rr,(W4 d, R2 s))
3082     {
3083     int isrmw;
3084    
3085     if (isconst(s)) {
3086     set_const(d,(uae_s32)(uae_s16)live.state[s].val);
3087     return;
3088     }
3089    
3090     CLOBBER_SE16;
3091     isrmw=(s==d);
3092     if (!isrmw) {
3093     s=readreg(s,2);
3094     d=writereg(d,4);
3095     }
3096     else { /* If we try to lock this twice, with different sizes, we
3097     are int trouble! */
3098     s=d=rmw(s,4,2);
3099     }
3100     raw_sign_extend_16_rr(d,s);
3101     if (!isrmw) {
3102     unlock2(d);
3103     unlock2(s);
3104     }
3105     else {
3106     unlock2(s);
3107     }
3108     }
3109     MENDFUNC(2,sign_extend_16_rr,(W4 d, R2 s))
3110    
3111     MIDFUNC(2,sign_extend_8_rr,(W4 d, R1 s))
3112     {
3113     int isrmw;
3114    
3115     if (isconst(s)) {
3116     set_const(d,(uae_s32)(uae_s8)live.state[s].val);
3117     return;
3118     }
3119    
3120     isrmw=(s==d);
3121     CLOBBER_SE8;
3122     if (!isrmw) {
3123     s=readreg(s,1);
3124     d=writereg(d,4);
3125     }
3126     else { /* If we try to lock this twice, with different sizes, we
3127     are int trouble! */
3128     s=d=rmw(s,4,1);
3129     }
3130    
3131     raw_sign_extend_8_rr(d,s);
3132    
3133     if (!isrmw) {
3134     unlock2(d);
3135     unlock2(s);
3136     }
3137     else {
3138     unlock2(s);
3139     }
3140     }
3141     MENDFUNC(2,sign_extend_8_rr,(W4 d, R1 s))
3142    
3143    
3144     MIDFUNC(2,zero_extend_16_rr,(W4 d, R2 s))
3145     {
3146     int isrmw;
3147    
3148     if (isconst(s)) {
3149     set_const(d,(uae_u32)(uae_u16)live.state[s].val);
3150     return;
3151     }
3152    
3153     isrmw=(s==d);
3154     CLOBBER_ZE16;
3155     if (!isrmw) {
3156     s=readreg(s,2);
3157     d=writereg(d,4);
3158     }
3159     else { /* If we try to lock this twice, with different sizes, we
3160     are int trouble! */
3161     s=d=rmw(s,4,2);
3162     }
3163     raw_zero_extend_16_rr(d,s);
3164     if (!isrmw) {
3165     unlock2(d);
3166     unlock2(s);
3167     }
3168     else {
3169     unlock2(s);
3170     }
3171     }
3172     MENDFUNC(2,zero_extend_16_rr,(W4 d, R2 s))
3173    
3174     MIDFUNC(2,zero_extend_8_rr,(W4 d, R1 s))
3175     {
3176     int isrmw;
3177     if (isconst(s)) {
3178     set_const(d,(uae_u32)(uae_u8)live.state[s].val);
3179     return;
3180     }
3181    
3182     isrmw=(s==d);
3183     CLOBBER_ZE8;
3184     if (!isrmw) {
3185     s=readreg(s,1);
3186     d=writereg(d,4);
3187     }
3188     else { /* If we try to lock this twice, with different sizes, we
3189     are int trouble! */
3190     s=d=rmw(s,4,1);
3191     }
3192    
3193     raw_zero_extend_8_rr(d,s);
3194    
3195     if (!isrmw) {
3196     unlock2(d);
3197     unlock2(s);
3198     }
3199     else {
3200     unlock2(s);
3201     }
3202     }
3203     MENDFUNC(2,zero_extend_8_rr,(W4 d, R1 s))
3204    
3205     MIDFUNC(2,mov_b_rr,(W1 d, R1 s))
3206     {
3207     if (d==s)
3208     return;
3209     if (isconst(s)) {
3210     COMPCALL(mov_b_ri)(d,(uae_u8)live.state[s].val);
3211     return;
3212     }
3213    
3214     CLOBBER_MOV;
3215     s=readreg(s,1);
3216     d=writereg(d,1);
3217     raw_mov_b_rr(d,s);
3218     unlock2(d);
3219     unlock2(s);
3220     }
3221     MENDFUNC(2,mov_b_rr,(W1 d, R1 s))
3222    
3223     MIDFUNC(2,mov_w_rr,(W2 d, R2 s))
3224     {
3225     if (d==s)
3226     return;
3227     if (isconst(s)) {
3228     COMPCALL(mov_w_ri)(d,(uae_u16)live.state[s].val);
3229     return;
3230     }
3231    
3232     CLOBBER_MOV;
3233     s=readreg(s,2);
3234     d=writereg(d,2);
3235     raw_mov_w_rr(d,s);
3236     unlock2(d);
3237     unlock2(s);
3238     }
3239     MENDFUNC(2,mov_w_rr,(W2 d, R2 s))
3240    
3241    
3242     MIDFUNC(4,mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
3243     {
3244     CLOBBER_MOV;
3245     baser=readreg(baser,4);
3246     index=readreg(index,4);
3247     d=writereg(d,4);
3248    
3249     raw_mov_l_rrm_indexed(d,baser,index,factor);
3250     unlock2(d);
3251     unlock2(baser);
3252     unlock2(index);
3253     }
3254     MENDFUNC(4,mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
3255    
3256     MIDFUNC(4,mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
3257     {
3258     CLOBBER_MOV;
3259     baser=readreg(baser,4);
3260     index=readreg(index,4);
3261     d=writereg(d,2);
3262    
3263     raw_mov_w_rrm_indexed(d,baser,index,factor);
3264     unlock2(d);
3265     unlock2(baser);
3266     unlock2(index);
3267     }
3268     MENDFUNC(4,mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
3269    
3270     MIDFUNC(4,mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
3271     {
3272     CLOBBER_MOV;
3273     baser=readreg(baser,4);
3274     index=readreg(index,4);
3275     d=writereg(d,1);
3276    
3277     raw_mov_b_rrm_indexed(d,baser,index,factor);
3278    
3279     unlock2(d);
3280     unlock2(baser);
3281     unlock2(index);
3282     }
3283     MENDFUNC(4,mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
3284    
3285    
3286     MIDFUNC(4,mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
3287     {
3288     CLOBBER_MOV;
3289     baser=readreg(baser,4);
3290     index=readreg(index,4);
3291     s=readreg(s,4);
3292    
3293     Dif (baser==s || index==s)
3294     abort();
3295    
3296    
3297     raw_mov_l_mrr_indexed(baser,index,factor,s);
3298     unlock2(s);
3299     unlock2(baser);
3300     unlock2(index);
3301     }
3302     MENDFUNC(4,mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
3303    
3304     MIDFUNC(4,mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
3305     {
3306     CLOBBER_MOV;
3307     baser=readreg(baser,4);
3308     index=readreg(index,4);
3309     s=readreg(s,2);
3310    
3311     raw_mov_w_mrr_indexed(baser,index,factor,s);
3312     unlock2(s);
3313     unlock2(baser);
3314     unlock2(index);
3315     }
3316     MENDFUNC(4,mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
3317    
3318     MIDFUNC(4,mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
3319     {
3320     CLOBBER_MOV;
3321     s=readreg(s,1);
3322     baser=readreg(baser,4);
3323     index=readreg(index,4);
3324    
3325     raw_mov_b_mrr_indexed(baser,index,factor,s);
3326     unlock2(s);
3327     unlock2(baser);
3328     unlock2(index);
3329     }
3330     MENDFUNC(4,mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
3331    
3332    
3333     MIDFUNC(5,mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
3334     {
3335     int basereg=baser;
3336     int indexreg=index;
3337    
3338     CLOBBER_MOV;
3339     s=readreg(s,4);
3340     baser=readreg_offset(baser,4);
3341     index=readreg_offset(index,4);
3342    
3343     base+=get_offset(basereg);
3344     base+=factor*get_offset(indexreg);
3345    
3346     raw_mov_l_bmrr_indexed(base,baser,index,factor,s);
3347     unlock2(s);
3348     unlock2(baser);
3349     unlock2(index);
3350     }
3351     MENDFUNC(5,mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
3352    
3353     MIDFUNC(5,mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
3354     {
3355     int basereg=baser;
3356     int indexreg=index;
3357    
3358     CLOBBER_MOV;
3359     s=readreg(s,2);
3360     baser=readreg_offset(baser,4);
3361     index=readreg_offset(index,4);
3362    
3363     base+=get_offset(basereg);
3364     base+=factor*get_offset(indexreg);
3365    
3366     raw_mov_w_bmrr_indexed(base,baser,index,factor,s);
3367     unlock2(s);
3368     unlock2(baser);
3369     unlock2(index);
3370     }
3371     MENDFUNC(5,mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
3372    
3373     MIDFUNC(5,mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
3374     {
3375     int basereg=baser;
3376     int indexreg=index;
3377    
3378     CLOBBER_MOV;
3379     s=readreg(s,1);
3380     baser=readreg_offset(baser,4);
3381     index=readreg_offset(index,4);
3382    
3383     base+=get_offset(basereg);
3384     base+=factor*get_offset(indexreg);
3385    
3386     raw_mov_b_bmrr_indexed(base,baser,index,factor,s);
3387     unlock2(s);
3388     unlock2(baser);
3389     unlock2(index);
3390     }
3391     MENDFUNC(5,mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
3392    
3393    
3394    
3395     /* Read a long from base+baser+factor*index */
3396     MIDFUNC(5,mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
3397     {
3398     int basereg=baser;
3399     int indexreg=index;
3400    
3401     CLOBBER_MOV;
3402     baser=readreg_offset(baser,4);
3403     index=readreg_offset(index,4);
3404     base+=get_offset(basereg);
3405     base+=factor*get_offset(indexreg);
3406     d=writereg(d,4);
3407     raw_mov_l_brrm_indexed(d,base,baser,index,factor);
3408     unlock2(d);
3409     unlock2(baser);
3410     unlock2(index);
3411     }
3412     MENDFUNC(5,mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
3413    
3414    
3415     MIDFUNC(5,mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
3416     {
3417     int basereg=baser;
3418     int indexreg=index;
3419    
3420     CLOBBER_MOV;
3421     remove_offset(d,-1);
3422     baser=readreg_offset(baser,4);
3423     index=readreg_offset(index,4);
3424     base+=get_offset(basereg);
3425     base+=factor*get_offset(indexreg);
3426     d=writereg(d,2);
3427     raw_mov_w_brrm_indexed(d,base,baser,index,factor);
3428     unlock2(d);
3429     unlock2(baser);
3430     unlock2(index);
3431     }
3432     MENDFUNC(5,mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
3433    
3434    
3435     MIDFUNC(5,mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
3436     {
3437     int basereg=baser;
3438     int indexreg=index;
3439    
3440     CLOBBER_MOV;
3441     remove_offset(d,-1);
3442     baser=readreg_offset(baser,4);
3443     index=readreg_offset(index,4);
3444     base+=get_offset(basereg);
3445     base+=factor*get_offset(indexreg);
3446     d=writereg(d,1);
3447     raw_mov_b_brrm_indexed(d,base,baser,index,factor);
3448     unlock2(d);
3449     unlock2(baser);
3450     unlock2(index);
3451     }
3452     MENDFUNC(5,mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
3453    
3454     /* Read a long from base+factor*index */
3455     MIDFUNC(4,mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
3456     {
3457     int indexreg=index;
3458    
3459     if (isconst(index)) {
3460     COMPCALL(mov_l_rm)(d,base+factor*live.state[index].val);
3461     return;
3462     }
3463    
3464     CLOBBER_MOV;
3465     index=readreg_offset(index,4);
3466     base+=get_offset(indexreg)*factor;
3467     d=writereg(d,4);
3468    
3469     raw_mov_l_rm_indexed(d,base,index,factor);
3470     unlock2(index);
3471     unlock2(d);
3472     }
3473     MENDFUNC(4,mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
3474    
3475    
3476     /* read the long at the address contained in s+offset and store in d */
3477     MIDFUNC(3,mov_l_rR,(W4 d, R4 s, IMM offset))
3478     {
3479     if (isconst(s)) {
3480     COMPCALL(mov_l_rm)(d,live.state[s].val+offset);
3481     return;
3482     }
3483     CLOBBER_MOV;
3484     s=readreg(s,4);
3485     d=writereg(d,4);
3486    
3487     raw_mov_l_rR(d,s,offset);
3488     unlock2(d);
3489     unlock2(s);
3490     }
3491     MENDFUNC(3,mov_l_rR,(W4 d, R4 s, IMM offset))
3492    
3493     /* read the word at the address contained in s+offset and store in d */
3494     MIDFUNC(3,mov_w_rR,(W2 d, R4 s, IMM offset))
3495     {
3496     if (isconst(s)) {
3497     COMPCALL(mov_w_rm)(d,live.state[s].val+offset);
3498     return;
3499     }
3500     CLOBBER_MOV;
3501     s=readreg(s,4);
3502     d=writereg(d,2);
3503    
3504     raw_mov_w_rR(d,s,offset);
3505     unlock2(d);
3506     unlock2(s);
3507     }
3508     MENDFUNC(3,mov_w_rR,(W2 d, R4 s, IMM offset))
3509    
3510     /* read the word at the address contained in s+offset and store in d */
3511     MIDFUNC(3,mov_b_rR,(W1 d, R4 s, IMM offset))
3512     {
3513     if (isconst(s)) {
3514     COMPCALL(mov_b_rm)(d,live.state[s].val+offset);
3515     return;
3516     }
3517     CLOBBER_MOV;
3518     s=readreg(s,4);
3519     d=writereg(d,1);
3520    
3521     raw_mov_b_rR(d,s,offset);
3522     unlock2(d);
3523     unlock2(s);
3524     }
3525     MENDFUNC(3,mov_b_rR,(W1 d, R4 s, IMM offset))
3526    
3527     /* read the long at the address contained in s+offset and store in d */
3528     MIDFUNC(3,mov_l_brR,(W4 d, R4 s, IMM offset))
3529     {
3530     int sreg=s;
3531     if (isconst(s)) {
3532     COMPCALL(mov_l_rm)(d,live.state[s].val+offset);
3533     return;
3534     }
3535     CLOBBER_MOV;
3536     s=readreg_offset(s,4);
3537     offset+=get_offset(sreg);
3538     d=writereg(d,4);
3539    
3540     raw_mov_l_brR(d,s,offset);
3541     unlock2(d);
3542     unlock2(s);
3543     }
3544     MENDFUNC(3,mov_l_brR,(W4 d, R4 s, IMM offset))
3545    
3546     /* read the word at the address contained in s+offset and store in d */
3547     MIDFUNC(3,mov_w_brR,(W2 d, R4 s, IMM offset))
3548     {
3549     int sreg=s;
3550     if (isconst(s)) {
3551     COMPCALL(mov_w_rm)(d,live.state[s].val+offset);
3552     return;
3553     }
3554     CLOBBER_MOV;
3555     remove_offset(d,-1);
3556     s=readreg_offset(s,4);
3557     offset+=get_offset(sreg);
3558     d=writereg(d,2);
3559    
3560     raw_mov_w_brR(d,s,offset);
3561     unlock2(d);
3562     unlock2(s);
3563     }
3564     MENDFUNC(3,mov_w_brR,(W2 d, R4 s, IMM offset))
3565    
3566     /* read the word at the address contained in s+offset and store in d */
3567     MIDFUNC(3,mov_b_brR,(W1 d, R4 s, IMM offset))
3568     {
3569     int sreg=s;
3570     if (isconst(s)) {
3571     COMPCALL(mov_b_rm)(d,live.state[s].val+offset);
3572     return;
3573     }
3574     CLOBBER_MOV;
3575     remove_offset(d,-1);
3576     s=readreg_offset(s,4);
3577     offset+=get_offset(sreg);
3578     d=writereg(d,1);
3579    
3580     raw_mov_b_brR(d,s,offset);
3581     unlock2(d);
3582     unlock2(s);
3583     }
3584     MENDFUNC(3,mov_b_brR,(W1 d, R4 s, IMM offset))
3585    
3586     MIDFUNC(3,mov_l_Ri,(R4 d, IMM i, IMM offset))
3587     {
3588     int dreg=d;
3589     if (isconst(d)) {
3590     COMPCALL(mov_l_mi)(live.state[d].val+offset,i);
3591     return;
3592     }
3593    
3594     CLOBBER_MOV;
3595     d=readreg_offset(d,4);
3596     offset+=get_offset(dreg);
3597     raw_mov_l_Ri(d,i,offset);
3598     unlock2(d);
3599     }
3600     MENDFUNC(3,mov_l_Ri,(R4 d, IMM i, IMM offset))
3601    
3602     MIDFUNC(3,mov_w_Ri,(R4 d, IMM i, IMM offset))
3603     {
3604     int dreg=d;
3605     if (isconst(d)) {
3606     COMPCALL(mov_w_mi)(live.state[d].val+offset,i);
3607     return;
3608     }
3609    
3610     CLOBBER_MOV;
3611     d=readreg_offset(d,4);
3612     offset+=get_offset(dreg);
3613     raw_mov_w_Ri(d,i,offset);
3614     unlock2(d);
3615     }
3616     MENDFUNC(3,mov_w_Ri,(R4 d, IMM i, IMM offset))
3617    
3618     MIDFUNC(3,mov_b_Ri,(R4 d, IMM i, IMM offset))
3619     {
3620     int dreg=d;
3621     if (isconst(d)) {
3622     COMPCALL(mov_b_mi)(live.state[d].val+offset,i);
3623     return;
3624     }
3625    
3626     CLOBBER_MOV;
3627     d=readreg_offset(d,4);
3628     offset+=get_offset(dreg);
3629     raw_mov_b_Ri(d,i,offset);
3630     unlock2(d);
3631     }
3632     MENDFUNC(3,mov_b_Ri,(R4 d, IMM i, IMM offset))
3633    
3634     /* Warning! OFFSET is byte sized only! */
3635     MIDFUNC(3,mov_l_Rr,(R4 d, R4 s, IMM offset))
3636     {
3637     if (isconst(d)) {
3638     COMPCALL(mov_l_mr)(live.state[d].val+offset,s);
3639     return;
3640     }
3641     if (isconst(s)) {
3642     COMPCALL(mov_l_Ri)(d,live.state[s].val,offset);
3643     return;
3644     }
3645    
3646     CLOBBER_MOV;
3647     s=readreg(s,4);
3648     d=readreg(d,4);
3649    
3650     raw_mov_l_Rr(d,s,offset);
3651     unlock2(d);
3652     unlock2(s);
3653     }
3654     MENDFUNC(3,mov_l_Rr,(R4 d, R4 s, IMM offset))
3655    
3656     MIDFUNC(3,mov_w_Rr,(R4 d, R2 s, IMM offset))
3657     {
3658     if (isconst(d)) {
3659     COMPCALL(mov_w_mr)(live.state[d].val+offset,s);
3660     return;
3661     }
3662     if (isconst(s)) {
3663     COMPCALL(mov_w_Ri)(d,(uae_u16)live.state[s].val,offset);
3664     return;
3665     }
3666    
3667     CLOBBER_MOV;
3668     s=readreg(s,2);
3669     d=readreg(d,4);
3670     raw_mov_w_Rr(d,s,offset);
3671     unlock2(d);
3672     unlock2(s);
3673     }
3674     MENDFUNC(3,mov_w_Rr,(R4 d, R2 s, IMM offset))
3675    
3676     MIDFUNC(3,mov_b_Rr,(R4 d, R1 s, IMM offset))
3677     {
3678     if (isconst(d)) {
3679     COMPCALL(mov_b_mr)(live.state[d].val+offset,s);
3680     return;
3681     }
3682     if (isconst(s)) {
3683     COMPCALL(mov_b_Ri)(d,(uae_u8)live.state[s].val,offset);
3684     return;
3685     }
3686    
3687     CLOBBER_MOV;
3688     s=readreg(s,1);
3689     d=readreg(d,4);
3690     raw_mov_b_Rr(d,s,offset);
3691     unlock2(d);
3692     unlock2(s);
3693     }
3694     MENDFUNC(3,mov_b_Rr,(R4 d, R1 s, IMM offset))
3695    
3696     MIDFUNC(3,lea_l_brr,(W4 d, R4 s, IMM offset))
3697     {
3698     if (isconst(s)) {
3699     COMPCALL(mov_l_ri)(d,live.state[s].val+offset);
3700     return;
3701     }
3702     #if USE_OFFSET
3703     if (d==s) {
3704     add_offset(d,offset);
3705     return;
3706     }
3707     #endif
3708     CLOBBER_LEA;
3709     s=readreg(s,4);
3710     d=writereg(d,4);
3711     raw_lea_l_brr(d,s,offset);
3712     unlock2(d);
3713     unlock2(s);
3714     }
3715     MENDFUNC(3,lea_l_brr,(W4 d, R4 s, IMM offset))
3716    
3717     MIDFUNC(5,lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
3718     {
3719     if (!offset) {
3720     COMPCALL(lea_l_rr_indexed)(d,s,index,factor);
3721     return;
3722     }
3723     CLOBBER_LEA;
3724     s=readreg(s,4);
3725     index=readreg(index,4);
3726     d=writereg(d,4);
3727    
3728     raw_lea_l_brr_indexed(d,s,index,factor,offset);
3729     unlock2(d);
3730     unlock2(index);
3731     unlock2(s);
3732     }
3733     MENDFUNC(5,lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
3734    
3735     MIDFUNC(4,lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
3736     {
3737     CLOBBER_LEA;
3738     s=readreg(s,4);
3739     index=readreg(index,4);
3740     d=writereg(d,4);
3741    
3742     raw_lea_l_rr_indexed(d,s,index,factor);
3743     unlock2(d);
3744     unlock2(index);
3745     unlock2(s);
3746     }
3747     MENDFUNC(4,lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
3748    
3749     /* write d to the long at the address contained in s+offset */
3750     MIDFUNC(3,mov_l_bRr,(R4 d, R4 s, IMM offset))
3751     {
3752     int dreg=d;
3753     if (isconst(d)) {
3754     COMPCALL(mov_l_mr)(live.state[d].val+offset,s);
3755     return;
3756     }
3757    
3758     CLOBBER_MOV;
3759     s=readreg(s,4);
3760     d=readreg_offset(d,4);
3761     offset+=get_offset(dreg);
3762    
3763     raw_mov_l_bRr(d,s,offset);
3764     unlock2(d);
3765     unlock2(s);
3766     }
3767     MENDFUNC(3,mov_l_bRr,(R4 d, R4 s, IMM offset))
3768    
3769     /* write the word at the address contained in s+offset and store in d */
3770     MIDFUNC(3,mov_w_bRr,(R4 d, R2 s, IMM offset))
3771     {
3772     int dreg=d;
3773    
3774     if (isconst(d)) {
3775     COMPCALL(mov_w_mr)(live.state[d].val+offset,s);
3776     return;
3777     }
3778    
3779     CLOBBER_MOV;
3780     s=readreg(s,2);
3781     d=readreg_offset(d,4);
3782     offset+=get_offset(dreg);
3783     raw_mov_w_bRr(d,s,offset);
3784     unlock2(d);
3785     unlock2(s);
3786     }
3787     MENDFUNC(3,mov_w_bRr,(R4 d, R2 s, IMM offset))
3788    
3789     MIDFUNC(3,mov_b_bRr,(R4 d, R1 s, IMM offset))
3790     {
3791     int dreg=d;
3792     if (isconst(d)) {
3793     COMPCALL(mov_b_mr)(live.state[d].val+offset,s);
3794     return;
3795     }
3796    
3797     CLOBBER_MOV;
3798     s=readreg(s,1);
3799     d=readreg_offset(d,4);
3800     offset+=get_offset(dreg);
3801     raw_mov_b_bRr(d,s,offset);
3802     unlock2(d);
3803     unlock2(s);
3804     }
3805     MENDFUNC(3,mov_b_bRr,(R4 d, R1 s, IMM offset))
3806    
3807     MIDFUNC(1,bswap_32,(RW4 r))
3808     {
3809     int reg=r;
3810    
3811     if (isconst(r)) {
3812     uae_u32 oldv=live.state[r].val;
3813     live.state[r].val=reverse32(oldv);
3814     return;
3815     }
3816    
3817     CLOBBER_SW32;
3818     r=rmw(r,4,4);
3819     raw_bswap_32(r);
3820     unlock2(r);
3821     }
3822     MENDFUNC(1,bswap_32,(RW4 r))
3823    
3824     MIDFUNC(1,bswap_16,(RW2 r))
3825     {
3826     if (isconst(r)) {
3827     uae_u32 oldv=live.state[r].val;
3828     live.state[r].val=((oldv>>8)&0xff) | ((oldv<<8)&0xff00) |
3829     (oldv&0xffff0000);
3830     return;
3831     }
3832    
3833     CLOBBER_SW16;
3834     r=rmw(r,2,2);
3835    
3836     raw_bswap_16(r);
3837     unlock2(r);
3838     }
3839     MENDFUNC(1,bswap_16,(RW2 r))
3840    
3841    
3842    
3843     MIDFUNC(2,mov_l_rr,(W4 d, R4 s))
3844     {
3845     int olds;
3846    
3847     if (d==s) { /* How pointless! */
3848     return;
3849     }
3850     if (isconst(s)) {
3851     COMPCALL(mov_l_ri)(d,live.state[s].val);
3852     return;
3853     }
3854     olds=s;
3855     disassociate(d);
3856     s=readreg_offset(s,4);
3857     live.state[d].realreg=s;
3858     live.state[d].realind=live.nat[s].nholds;
3859     live.state[d].val=live.state[olds].val;
3860     live.state[d].validsize=4;
3861     live.state[d].dirtysize=4;
3862     set_status(d,DIRTY);
3863    
3864     live.nat[s].holds[live.nat[s].nholds]=d;
3865     live.nat[s].nholds++;
3866     log_clobberreg(d);
3867     /* write_log("Added %d to nreg %d(%d), now holds %d regs\n",
3868     d,s,live.state[d].realind,live.nat[s].nholds); */
3869     unlock2(s);
3870     }
3871     MENDFUNC(2,mov_l_rr,(W4 d, R4 s))
3872    
3873     MIDFUNC(2,mov_l_mr,(IMM d, R4 s))
3874     {
3875     if (isconst(s)) {
3876     COMPCALL(mov_l_mi)(d,live.state[s].val);
3877     return;
3878     }
3879     CLOBBER_MOV;
3880     s=readreg(s,4);
3881    
3882     raw_mov_l_mr(d,s);
3883     unlock2(s);
3884     }
3885     MENDFUNC(2,mov_l_mr,(IMM d, R4 s))
3886    
3887    
3888     MIDFUNC(2,mov_w_mr,(IMM d, R2 s))
3889     {
3890     if (isconst(s)) {
3891     COMPCALL(mov_w_mi)(d,(uae_u16)live.state[s].val);
3892     return;
3893     }
3894     CLOBBER_MOV;
3895     s=readreg(s,2);
3896    
3897     raw_mov_w_mr(d,s);
3898     unlock2(s);
3899     }
3900     MENDFUNC(2,mov_w_mr,(IMM d, R2 s))
3901    
3902     MIDFUNC(2,mov_w_rm,(W2 d, IMM s))
3903     {
3904     CLOBBER_MOV;
3905     d=writereg(d,2);
3906    
3907     raw_mov_w_rm(d,s);
3908     unlock2(d);
3909     }
3910     MENDFUNC(2,mov_w_rm,(W2 d, IMM s))
3911    
3912     MIDFUNC(2,mov_b_mr,(IMM d, R1 s))
3913     {
3914     if (isconst(s)) {
3915     COMPCALL(mov_b_mi)(d,(uae_u8)live.state[s].val);
3916     return;
3917     }
3918    
3919     CLOBBER_MOV;
3920     s=readreg(s,1);
3921    
3922     raw_mov_b_mr(d,s);
3923     unlock2(s);
3924     }
3925     MENDFUNC(2,mov_b_mr,(IMM d, R1 s))
3926    
3927     MIDFUNC(2,mov_b_rm,(W1 d, IMM s))
3928     {
3929     CLOBBER_MOV;
3930     d=writereg(d,1);
3931    
3932     raw_mov_b_rm(d,s);
3933     unlock2(d);
3934     }
3935     MENDFUNC(2,mov_b_rm,(W1 d, IMM s))
3936    
3937     MIDFUNC(2,mov_l_ri,(W4 d, IMM s))
3938     {
3939     set_const(d,s);
3940     return;
3941     }
3942     MENDFUNC(2,mov_l_ri,(W4 d, IMM s))
3943    
3944     MIDFUNC(2,mov_w_ri,(W2 d, IMM s))
3945     {
3946     CLOBBER_MOV;
3947     d=writereg(d,2);
3948    
3949     raw_mov_w_ri(d,s);
3950     unlock2(d);
3951     }
3952     MENDFUNC(2,mov_w_ri,(W2 d, IMM s))
3953    
3954     MIDFUNC(2,mov_b_ri,(W1 d, IMM s))
3955     {
3956     CLOBBER_MOV;
3957     d=writereg(d,1);
3958    
3959     raw_mov_b_ri(d,s);
3960     unlock2(d);
3961     }
3962     MENDFUNC(2,mov_b_ri,(W1 d, IMM s))
3963    
3964    
3965     MIDFUNC(2,add_l_mi,(IMM d, IMM s))
3966     {
3967     CLOBBER_ADD;
3968     raw_add_l_mi(d,s) ;
3969     }
3970     MENDFUNC(2,add_l_mi,(IMM d, IMM s))
3971    
3972     MIDFUNC(2,add_w_mi,(IMM d, IMM s))
3973     {
3974     CLOBBER_ADD;
3975     raw_add_w_mi(d,s) ;
3976     }
3977     MENDFUNC(2,add_w_mi,(IMM d, IMM s))
3978    
3979     MIDFUNC(2,add_b_mi,(IMM d, IMM s))
3980     {
3981     CLOBBER_ADD;
3982     raw_add_b_mi(d,s) ;
3983     }
3984     MENDFUNC(2,add_b_mi,(IMM d, IMM s))
3985    
3986    
3987     MIDFUNC(2,test_l_ri,(R4 d, IMM i))
3988     {
3989     CLOBBER_TEST;
3990     d=readreg(d,4);
3991    
3992     raw_test_l_ri(d,i);
3993     unlock2(d);
3994     }
3995     MENDFUNC(2,test_l_ri,(R4 d, IMM i))
3996    
3997     MIDFUNC(2,test_l_rr,(R4 d, R4 s))
3998     {
3999     CLOBBER_TEST;
4000     d=readreg(d,4);
4001     s=readreg(s,4);
4002    
4003     raw_test_l_rr(d,s);;
4004     unlock2(d);
4005     unlock2(s);
4006     }
4007     MENDFUNC(2,test_l_rr,(R4 d, R4 s))
4008    
4009     MIDFUNC(2,test_w_rr,(R2 d, R2 s))
4010     {
4011     CLOBBER_TEST;
4012     d=readreg(d,2);
4013     s=readreg(s,2);
4014    
4015     raw_test_w_rr(d,s);
4016     unlock2(d);
4017     unlock2(s);
4018     }
4019     MENDFUNC(2,test_w_rr,(R2 d, R2 s))
4020    
4021     MIDFUNC(2,test_b_rr,(R1 d, R1 s))
4022     {
4023     CLOBBER_TEST;
4024     d=readreg(d,1);
4025     s=readreg(s,1);
4026    
4027     raw_test_b_rr(d,s);
4028     unlock2(d);
4029     unlock2(s);
4030     }
4031     MENDFUNC(2,test_b_rr,(R1 d, R1 s))
4032    
4033    
4034     MIDFUNC(2,and_l_ri,(RW4 d, IMM i))
4035     {
4036     if (isconst(d) && !needflags) {
4037     live.state[d].val &= i;
4038     return;
4039     }
4040    
4041     CLOBBER_AND;
4042     d=rmw(d,4,4);
4043    
4044     raw_and_l_ri(d,i);
4045     unlock2(d);
4046     }
4047     MENDFUNC(2,and_l_ri,(RW4 d, IMM i))
4048    
4049     MIDFUNC(2,and_l,(RW4 d, R4 s))
4050     {
4051     CLOBBER_AND;
4052     s=readreg(s,4);
4053     d=rmw(d,4,4);
4054    
4055     raw_and_l(d,s);
4056     unlock2(d);
4057     unlock2(s);
4058     }
4059     MENDFUNC(2,and_l,(RW4 d, R4 s))
4060    
4061     MIDFUNC(2,and_w,(RW2 d, R2 s))
4062     {
4063     CLOBBER_AND;
4064     s=readreg(s,2);
4065     d=rmw(d,2,2);
4066    
4067     raw_and_w(d,s);
4068     unlock2(d);
4069     unlock2(s);
4070     }
4071     MENDFUNC(2,and_w,(RW2 d, R2 s))
4072    
4073     MIDFUNC(2,and_b,(RW1 d, R1 s))
4074     {
4075     CLOBBER_AND;
4076     s=readreg(s,1);
4077     d=rmw(d,1,1);
4078    
4079     raw_and_b(d,s);
4080     unlock2(d);
4081     unlock2(s);
4082     }
4083     MENDFUNC(2,and_b,(RW1 d, R1 s))
4084    
4085     // gb-- used for making an fpcr value in compemu_fpp.cpp
4086     MIDFUNC(2,or_l_rm,(RW4 d, IMM s))
4087     {
4088     CLOBBER_OR;
4089     d=rmw(d,4,4);
4090    
4091     raw_or_l_rm(d,s);
4092     unlock2(d);
4093     }
4094     MENDFUNC(2,or_l_rm,(RW4 d, IMM s))
4095    
4096     MIDFUNC(2,or_l_ri,(RW4 d, IMM i))
4097     {
4098     if (isconst(d) && !needflags) {
4099     live.state[d].val|=i;
4100     return;
4101     }
4102     CLOBBER_OR;
4103     d=rmw(d,4,4);
4104    
4105     raw_or_l_ri(d,i);
4106     unlock2(d);
4107     }
4108     MENDFUNC(2,or_l_ri,(RW4 d, IMM i))
4109    
4110     MIDFUNC(2,or_l,(RW4 d, R4 s))
4111     {
4112     if (isconst(d) && isconst(s) && !needflags) {
4113     live.state[d].val|=live.state[s].val;
4114     return;
4115     }
4116     CLOBBER_OR;
4117     s=readreg(s,4);
4118     d=rmw(d,4,4);
4119    
4120     raw_or_l(d,s);
4121     unlock2(d);
4122     unlock2(s);
4123     }
4124     MENDFUNC(2,or_l,(RW4 d, R4 s))
4125    
4126     MIDFUNC(2,or_w,(RW2 d, R2 s))
4127     {
4128     CLOBBER_OR;
4129     s=readreg(s,2);
4130     d=rmw(d,2,2);
4131    
4132     raw_or_w(d,s);
4133     unlock2(d);
4134     unlock2(s);
4135     }
4136     MENDFUNC(2,or_w,(RW2 d, R2 s))
4137    
4138     MIDFUNC(2,or_b,(RW1 d, R1 s))
4139     {
4140     CLOBBER_OR;
4141     s=readreg(s,1);
4142     d=rmw(d,1,1);
4143    
4144     raw_or_b(d,s);
4145     unlock2(d);
4146     unlock2(s);
4147     }
4148     MENDFUNC(2,or_b,(RW1 d, R1 s))
4149    
4150     MIDFUNC(2,adc_l,(RW4 d, R4 s))
4151     {
4152     CLOBBER_ADC;
4153     s=readreg(s,4);
4154     d=rmw(d,4,4);
4155    
4156     raw_adc_l(d,s);
4157    
4158     unlock2(d);
4159     unlock2(s);
4160     }
4161     MENDFUNC(2,adc_l,(RW4 d, R4 s))
4162    
4163     MIDFUNC(2,adc_w,(RW2 d, R2 s))
4164     {
4165     CLOBBER_ADC;
4166     s=readreg(s,2);
4167     d=rmw(d,2,2);
4168    
4169     raw_adc_w(d,s);
4170     unlock2(d);
4171     unlock2(s);
4172     }
4173     MENDFUNC(2,adc_w,(RW2 d, R2 s))
4174    
4175     MIDFUNC(2,adc_b,(RW1 d, R1 s))
4176     {
4177     CLOBBER_ADC;
4178     s=readreg(s,1);
4179     d=rmw(d,1,1);
4180    
4181     raw_adc_b(d,s);
4182     unlock2(d);
4183     unlock2(s);
4184     }
4185     MENDFUNC(2,adc_b,(RW1 d, R1 s))
4186    
4187     MIDFUNC(2,add_l,(RW4 d, R4 s))
4188     {
4189     if (isconst(s)) {
4190     COMPCALL(add_l_ri)(d,live.state[s].val);
4191     return;
4192     }
4193    
4194     CLOBBER_ADD;
4195     s=readreg(s,4);
4196     d=rmw(d,4,4);
4197    
4198     raw_add_l(d,s);
4199    
4200     unlock2(d);
4201     unlock2(s);
4202     }
4203     MENDFUNC(2,add_l,(RW4 d, R4 s))
4204    
4205     MIDFUNC(2,add_w,(RW2 d, R2 s))
4206     {
4207     if (isconst(s)) {
4208     COMPCALL(add_w_ri)(d,(uae_u16)live.state[s].val);
4209     return;
4210     }
4211    
4212     CLOBBER_ADD;
4213     s=readreg(s,2);
4214     d=rmw(d,2,2);
4215    
4216     raw_add_w(d,s);
4217     unlock2(d);
4218     unlock2(s);
4219     }
4220     MENDFUNC(2,add_w,(RW2 d, R2 s))
4221    
4222     MIDFUNC(2,add_b,(RW1 d, R1 s))
4223     {
4224     if (isconst(s)) {
4225     COMPCALL(add_b_ri)(d,(uae_u8)live.state[s].val);
4226     return;
4227     }
4228    
4229     CLOBBER_ADD;
4230     s=readreg(s,1);
4231     d=rmw(d,1,1);
4232    
4233     raw_add_b(d,s);
4234     unlock2(d);
4235     unlock2(s);
4236     }
4237     MENDFUNC(2,add_b,(RW1 d, R1 s))
4238    
4239     MIDFUNC(2,sub_l_ri,(RW4 d, IMM i))
4240     {
4241     if (!i && !needflags)
4242     return;
4243     if (isconst(d) && !needflags) {
4244     live.state[d].val-=i;
4245     return;
4246     }
4247     #if USE_OFFSET
4248     if (!needflags) {
4249     add_offset(d,-i);
4250     return;
4251     }
4252     #endif
4253    
4254     CLOBBER_SUB;
4255     d=rmw(d,4,4);
4256    
4257     raw_sub_l_ri(d,i);
4258     unlock2(d);
4259     }
4260     MENDFUNC(2,sub_l_ri,(RW4 d, IMM i))
4261    
4262     MIDFUNC(2,sub_w_ri,(RW2 d, IMM i))
4263     {
4264     if (!i && !needflags)
4265     return;
4266    
4267     CLOBBER_SUB;
4268     d=rmw(d,2,2);
4269    
4270     raw_sub_w_ri(d,i);
4271     unlock2(d);
4272     }
4273     MENDFUNC(2,sub_w_ri,(RW2 d, IMM i))
4274    
4275     MIDFUNC(2,sub_b_ri,(RW1 d, IMM i))
4276     {
4277     if (!i && !needflags)
4278     return;
4279    
4280     CLOBBER_SUB;
4281     d=rmw(d,1,1);
4282    
4283     raw_sub_b_ri(d,i);
4284    
4285     unlock2(d);
4286     }
4287     MENDFUNC(2,sub_b_ri,(RW1 d, IMM i))
4288    
4289     MIDFUNC(2,add_l_ri,(RW4 d, IMM i))
4290     {
4291     if (!i && !needflags)
4292     return;
4293     if (isconst(d) && !needflags) {
4294     live.state[d].val+=i;
4295     return;
4296     }
4297     #if USE_OFFSET
4298     if (!needflags) {
4299     add_offset(d,i);
4300     return;
4301     }
4302     #endif
4303     CLOBBER_ADD;
4304     d=rmw(d,4,4);
4305     raw_add_l_ri(d,i);
4306     unlock2(d);
4307     }
4308     MENDFUNC(2,add_l_ri,(RW4 d, IMM i))
4309    
4310     MIDFUNC(2,add_w_ri,(RW2 d, IMM i))
4311     {
4312     if (!i && !needflags)
4313     return;
4314    
4315     CLOBBER_ADD;
4316     d=rmw(d,2,2);
4317    
4318     raw_add_w_ri(d,i);
4319     unlock2(d);
4320     }
4321     MENDFUNC(2,add_w_ri,(RW2 d, IMM i))
4322    
4323     MIDFUNC(2,add_b_ri,(RW1 d, IMM i))
4324     {
4325     if (!i && !needflags)
4326     return;
4327    
4328     CLOBBER_ADD;
4329     d=rmw(d,1,1);
4330    
4331     raw_add_b_ri(d,i);
4332    
4333     unlock2(d);
4334     }
4335     MENDFUNC(2,add_b_ri,(RW1 d, IMM i))
4336    
4337     MIDFUNC(2,sbb_l,(RW4 d, R4 s))
4338     {
4339     CLOBBER_SBB;
4340     s=readreg(s,4);
4341     d=rmw(d,4,4);
4342    
4343     raw_sbb_l(d,s);
4344     unlock2(d);
4345     unlock2(s);
4346     }
4347     MENDFUNC(2,sbb_l,(RW4 d, R4 s))
4348    
4349     MIDFUNC(2,sbb_w,(RW2 d, R2 s))
4350     {
4351     CLOBBER_SBB;
4352     s=readreg(s,2);
4353     d=rmw(d,2,2);
4354    
4355     raw_sbb_w(d,s);
4356     unlock2(d);
4357     unlock2(s);
4358     }
4359     MENDFUNC(2,sbb_w,(RW2 d, R2 s))
4360    
4361     MIDFUNC(2,sbb_b,(RW1 d, R1 s))
4362     {
4363     CLOBBER_SBB;
4364     s=readreg(s,1);
4365     d=rmw(d,1,1);
4366    
4367     raw_sbb_b(d,s);
4368     unlock2(d);
4369     unlock2(s);
4370     }
4371     MENDFUNC(2,sbb_b,(RW1 d, R1 s))
4372    
4373     MIDFUNC(2,sub_l,(RW4 d, R4 s))
4374     {
4375     if (isconst(s)) {
4376     COMPCALL(sub_l_ri)(d,live.state[s].val);
4377     return;
4378     }
4379    
4380     CLOBBER_SUB;
4381     s=readreg(s,4);
4382     d=rmw(d,4,4);
4383    
4384     raw_sub_l(d,s);
4385     unlock2(d);
4386     unlock2(s);
4387     }
4388     MENDFUNC(2,sub_l,(RW4 d, R4 s))
4389    
4390     MIDFUNC(2,sub_w,(RW2 d, R2 s))
4391     {
4392     if (isconst(s)) {
4393     COMPCALL(sub_w_ri)(d,(uae_u16)live.state[s].val);
4394     return;
4395     }
4396    
4397     CLOBBER_SUB;
4398     s=readreg(s,2);
4399     d=rmw(d,2,2);
4400    
4401     raw_sub_w(d,s);
4402     unlock2(d);
4403     unlock2(s);
4404     }
4405     MENDFUNC(2,sub_w,(RW2 d, R2 s))
4406    
4407     MIDFUNC(2,sub_b,(RW1 d, R1 s))
4408     {
4409     if (isconst(s)) {
4410     COMPCALL(sub_b_ri)(d,(uae_u8)live.state[s].val);
4411     return;
4412     }
4413    
4414     CLOBBER_SUB;
4415     s=readreg(s,1);
4416     d=rmw(d,1,1);
4417    
4418     raw_sub_b(d,s);
4419     unlock2(d);
4420     unlock2(s);
4421     }
4422     MENDFUNC(2,sub_b,(RW1 d, R1 s))
4423    
4424     MIDFUNC(2,cmp_l,(R4 d, R4 s))
4425     {
4426     CLOBBER_CMP;
4427     s=readreg(s,4);
4428     d=readreg(d,4);
4429    
4430     raw_cmp_l(d,s);
4431     unlock2(d);
4432     unlock2(s);
4433     }
4434     MENDFUNC(2,cmp_l,(R4 d, R4 s))
4435    
4436     MIDFUNC(2,cmp_l_ri,(R4 r, IMM i))
4437     {
4438     CLOBBER_CMP;
4439     r=readreg(r,4);
4440    
4441     raw_cmp_l_ri(r,i);
4442     unlock2(r);
4443     }
4444     MENDFUNC(2,cmp_l_ri,(R4 r, IMM i))
4445    
4446     MIDFUNC(2,cmp_w,(R2 d, R2 s))
4447     {
4448     CLOBBER_CMP;
4449     s=readreg(s,2);
4450     d=readreg(d,2);
4451    
4452     raw_cmp_w(d,s);
4453     unlock2(d);
4454     unlock2(s);
4455     }
4456     MENDFUNC(2,cmp_w,(R2 d, R2 s))
4457    
4458     MIDFUNC(2,cmp_b,(R1 d, R1 s))
4459     {
4460     CLOBBER_CMP;
4461     s=readreg(s,1);
4462     d=readreg(d,1);
4463    
4464     raw_cmp_b(d,s);
4465     unlock2(d);
4466     unlock2(s);
4467     }
4468     MENDFUNC(2,cmp_b,(R1 d, R1 s))
4469    
4470    
4471     MIDFUNC(2,xor_l,(RW4 d, R4 s))
4472     {
4473     CLOBBER_XOR;
4474     s=readreg(s,4);
4475     d=rmw(d,4,4);
4476    
4477     raw_xor_l(d,s);
4478     unlock2(d);
4479     unlock2(s);
4480     }
4481     MENDFUNC(2,xor_l,(RW4 d, R4 s))
4482    
4483     MIDFUNC(2,xor_w,(RW2 d, R2 s))
4484     {
4485     CLOBBER_XOR;
4486     s=readreg(s,2);
4487     d=rmw(d,2,2);
4488    
4489     raw_xor_w(d,s);
4490     unlock2(d);
4491     unlock2(s);
4492     }
4493     MENDFUNC(2,xor_w,(RW2 d, R2 s))
4494    
4495     MIDFUNC(2,xor_b,(RW1 d, R1 s))
4496     {
4497     CLOBBER_XOR;
4498     s=readreg(s,1);
4499     d=rmw(d,1,1);
4500    
4501     raw_xor_b(d,s);
4502     unlock2(d);
4503     unlock2(s);
4504     }
4505     MENDFUNC(2,xor_b,(RW1 d, R1 s))
4506    
4507     MIDFUNC(5,call_r_11,(W4 out1, R4 r, R4 in1, IMM osize, IMM isize))
4508     {
4509     clobber_flags();
4510     remove_all_offsets();
4511     if (osize==4) {
4512     if (out1!=in1 && out1!=r) {
4513     COMPCALL(forget_about)(out1);
4514     }
4515     }
4516     else {
4517     tomem_c(out1);
4518     }
4519    
4520     in1=readreg_specific(in1,isize,REG_PAR1);
4521     r=readreg(r,4);
4522     prepare_for_call_1(); /* This should ensure that there won't be
4523     any need for swapping nregs in prepare_for_call_2
4524     */
4525     #if USE_NORMAL_CALLING_CONVENTION
4526     raw_push_l_r(in1);
4527     #endif
4528     unlock2(in1);
4529     unlock2(r);
4530    
4531     prepare_for_call_2();
4532     raw_call_r(r);
4533    
4534     #if USE_NORMAL_CALLING_CONVENTION
4535     raw_inc_sp(4);
4536     #endif
4537    
4538    
4539     live.nat[REG_RESULT].holds[0]=out1;
4540     live.nat[REG_RESULT].nholds=1;
4541     live.nat[REG_RESULT].touched=touchcnt++;
4542    
4543     live.state[out1].realreg=REG_RESULT;
4544     live.state[out1].realind=0;
4545     live.state[out1].val=0;
4546     live.state[out1].validsize=osize;
4547     live.state[out1].dirtysize=osize;
4548     set_status(out1,DIRTY);
4549     }
4550     MENDFUNC(5,call_r_11,(W4 out1, R4 r, R4 in1, IMM osize, IMM isize))
4551    
4552     MIDFUNC(5,call_r_02,(R4 r, R4 in1, R4 in2, IMM isize1, IMM isize2))
4553     {
4554     clobber_flags();
4555     remove_all_offsets();
4556     in1=readreg_specific(in1,isize1,REG_PAR1);
4557     in2=readreg_specific(in2,isize2,REG_PAR2);
4558     r=readreg(r,4);
4559     prepare_for_call_1(); /* This should ensure that there won't be
4560     any need for swapping nregs in prepare_for_call_2
4561     */
4562     #if USE_NORMAL_CALLING_CONVENTION
4563     raw_push_l_r(in2);
4564     raw_push_l_r(in1);
4565     #endif
4566     unlock2(r);
4567     unlock2(in1);
4568     unlock2(in2);
4569     prepare_for_call_2();
4570     raw_call_r(r);
4571     #if USE_NORMAL_CALLING_CONVENTION
4572     raw_inc_sp(8);
4573     #endif
4574     }
4575     MENDFUNC(5,call_r_02,(R4 r, R4 in1, R4 in2, IMM isize1, IMM isize2))
4576    
4577     /* forget_about() takes a mid-layer register */
4578     MIDFUNC(1,forget_about,(W4 r))
4579     {
4580     if (isinreg(r))
4581     disassociate(r);
4582     live.state[r].val=0;
4583     set_status(r,UNDEF);
4584     }
4585     MENDFUNC(1,forget_about,(W4 r))
4586    
4587     MIDFUNC(0,nop,(void))
4588     {
4589     raw_nop();
4590     }
4591     MENDFUNC(0,nop,(void))
4592    
4593    
4594     MIDFUNC(1,f_forget_about,(FW r))
4595     {
4596     if (f_isinreg(r))
4597     f_disassociate(r);
4598     live.fate[r].status=UNDEF;
4599     }
4600     MENDFUNC(1,f_forget_about,(FW r))
4601    
4602     MIDFUNC(1,fmov_pi,(FW r))
4603     {
4604     r=f_writereg(r);
4605     raw_fmov_pi(r);
4606     f_unlock(r);
4607     }
4608     MENDFUNC(1,fmov_pi,(FW r))
4609    
4610     MIDFUNC(1,fmov_log10_2,(FW r))
4611     {
4612     r=f_writereg(r);
4613     raw_fmov_log10_2(r);
4614     f_unlock(r);
4615     }
4616     MENDFUNC(1,fmov_log10_2,(FW r))
4617    
4618     MIDFUNC(1,fmov_log2_e,(FW r))
4619     {
4620     r=f_writereg(r);
4621     raw_fmov_log2_e(r);
4622     f_unlock(r);
4623     }
4624     MENDFUNC(1,fmov_log2_e,(FW r))
4625    
4626     MIDFUNC(1,fmov_loge_2,(FW r))
4627     {
4628     r=f_writereg(r);
4629     raw_fmov_loge_2(r);
4630     f_unlock(r);
4631     }
4632     MENDFUNC(1,fmov_loge_2,(FW r))
4633    
4634     MIDFUNC(1,fmov_1,(FW r))
4635     {
4636     r=f_writereg(r);
4637     raw_fmov_1(r);
4638     f_unlock(r);
4639     }
4640     MENDFUNC(1,fmov_1,(FW r))
4641    
4642     MIDFUNC(1,fmov_0,(FW r))
4643     {
4644     r=f_writereg(r);
4645     raw_fmov_0(r);
4646     f_unlock(r);
4647     }
4648     MENDFUNC(1,fmov_0,(FW r))
4649    
4650     MIDFUNC(2,fmov_rm,(FW r, MEMR m))
4651     {
4652     r=f_writereg(r);
4653     raw_fmov_rm(r,m);
4654     f_unlock(r);
4655     }
4656     MENDFUNC(2,fmov_rm,(FW r, MEMR m))
4657    
4658     MIDFUNC(2,fmovi_rm,(FW r, MEMR m))
4659     {
4660     r=f_writereg(r);
4661     raw_fmovi_rm(r,m);
4662     f_unlock(r);
4663     }
4664     MENDFUNC(2,fmovi_rm,(FW r, MEMR m))
4665    
4666     MIDFUNC(2,fmovi_mr,(MEMW m, FR r))
4667     {
4668     r=f_readreg(r);
4669     raw_fmovi_mr(m,r);
4670     f_unlock(r);
4671     }
4672     MENDFUNC(2,fmovi_mr,(MEMW m, FR r))
4673    
4674     MIDFUNC(2,fmovs_rm,(FW r, MEMR m))
4675     {
4676     r=f_writereg(r);
4677     raw_fmovs_rm(r,m);
4678     f_unlock(r);
4679     }
4680     MENDFUNC(2,fmovs_rm,(FW r, MEMR m))
4681    
4682     MIDFUNC(2,fmovs_mr,(MEMW m, FR r))
4683     {
4684     r=f_readreg(r);
4685     raw_fmovs_mr(m,r);
4686     f_unlock(r);
4687     }
4688     MENDFUNC(2,fmovs_mr,(MEMW m, FR r))
4689    
4690     MIDFUNC(2,fmov_ext_mr,(MEMW m, FR r))
4691     {
4692     r=f_readreg(r);
4693     raw_fmov_ext_mr(m,r);
4694     f_unlock(r);
4695     }
4696     MENDFUNC(2,fmov_ext_mr,(MEMW m, FR r))
4697    
4698     MIDFUNC(2,fmov_mr,(MEMW m, FR r))
4699     {
4700     r=f_readreg(r);
4701     raw_fmov_mr(m,r);
4702     f_unlock(r);
4703     }
4704     MENDFUNC(2,fmov_mr,(MEMW m, FR r))
4705    
4706     MIDFUNC(2,fmov_ext_rm,(FW r, MEMR m))
4707     {
4708     r=f_writereg(r);
4709     raw_fmov_ext_rm(r,m);
4710     f_unlock(r);
4711     }
4712     MENDFUNC(2,fmov_ext_rm,(FW r, MEMR m))
4713    
4714     MIDFUNC(2,fmov_rr,(FW d, FR s))
4715     {
4716     if (d==s) { /* How pointless! */
4717     return;
4718     }
4719     #if USE_F_ALIAS
4720     f_disassociate(d);
4721     s=f_readreg(s);
4722     live.fate[d].realreg=s;
4723     live.fate[d].realind=live.fat[s].nholds;
4724     live.fate[d].status=DIRTY;
4725     live.fat[s].holds[live.fat[s].nholds]=d;
4726     live.fat[s].nholds++;
4727     f_unlock(s);
4728     #else
4729     s=f_readreg(s);
4730     d=f_writereg(d);
4731     raw_fmov_rr(d,s);
4732     f_unlock(s);
4733     f_unlock(d);
4734     #endif
4735     }
4736     MENDFUNC(2,fmov_rr,(FW d, FR s))
4737    
4738     MIDFUNC(2,fldcw_m_indexed,(R4 index, IMM base))
4739     {
4740     index=readreg(index,4);
4741    
4742     raw_fldcw_m_indexed(index,base);
4743     unlock2(index);
4744     }
4745     MENDFUNC(2,fldcw_m_indexed,(R4 index, IMM base))
4746    
4747     MIDFUNC(1,ftst_r,(FR r))
4748     {
4749     r=f_readreg(r);
4750     raw_ftst_r(r);
4751     f_unlock(r);
4752     }
4753     MENDFUNC(1,ftst_r,(FR r))
4754    
4755     MIDFUNC(0,dont_care_fflags,(void))
4756     {
4757     f_disassociate(FP_RESULT);
4758     }
4759     MENDFUNC(0,dont_care_fflags,(void))
4760    
4761     MIDFUNC(2,fsqrt_rr,(FW d, FR s))
4762     {
4763     s=f_readreg(s);
4764     d=f_writereg(d);
4765     raw_fsqrt_rr(d,s);
4766     f_unlock(s);
4767     f_unlock(d);
4768     }
4769     MENDFUNC(2,fsqrt_rr,(FW d, FR s))
4770    
4771     MIDFUNC(2,fabs_rr,(FW d, FR s))
4772     {
4773     s=f_readreg(s);
4774     d=f_writereg(d);
4775     raw_fabs_rr(d,s);
4776     f_unlock(s);
4777     f_unlock(d);
4778     }
4779     MENDFUNC(2,fabs_rr,(FW d, FR s))
4780    
4781     MIDFUNC(2,fsin_rr,(FW d, FR s))
4782     {
4783     s=f_readreg(s);
4784     d=f_writereg(d);
4785     raw_fsin_rr(d,s);
4786     f_unlock(s);
4787     f_unlock(d);
4788     }
4789     MENDFUNC(2,fsin_rr,(FW d, FR s))
4790    
4791     MIDFUNC(2,fcos_rr,(FW d, FR s))
4792     {
4793     s=f_readreg(s);
4794     d=f_writereg(d);
4795     raw_fcos_rr(d,s);
4796     f_unlock(s);
4797     f_unlock(d);
4798     }
4799     MENDFUNC(2,fcos_rr,(FW d, FR s))
4800    
4801     MIDFUNC(2,ftwotox_rr,(FW d, FR s))
4802     {
4803     s=f_readreg(s);
4804     d=f_writereg(d);
4805     raw_ftwotox_rr(d,s);
4806     f_unlock(s);
4807     f_unlock(d);
4808     }
4809     MENDFUNC(2,ftwotox_rr,(FW d, FR s))
4810    
4811     MIDFUNC(2,fetox_rr,(FW d, FR s))
4812     {
4813     s=f_readreg(s);
4814     d=f_writereg(d);
4815     raw_fetox_rr(d,s);
4816     f_unlock(s);
4817     f_unlock(d);
4818     }
4819     MENDFUNC(2,fetox_rr,(FW d, FR s))
4820    
4821     MIDFUNC(2,frndint_rr,(FW d, FR s))
4822     {
4823     s=f_readreg(s);
4824     d=f_writereg(d);
4825     raw_frndint_rr(d,s);
4826     f_unlock(s);
4827     f_unlock(d);
4828     }
4829     MENDFUNC(2,frndint_rr,(FW d, FR s))
4830    
4831     MIDFUNC(2,flog2_rr,(FW d, FR s))
4832     {
4833     s=f_readreg(s);
4834     d=f_writereg(d);
4835     raw_flog2_rr(d,s);
4836     f_unlock(s);
4837     f_unlock(d);
4838     }
4839     MENDFUNC(2,flog2_rr,(FW d, FR s))
4840    
4841     MIDFUNC(2,fneg_rr,(FW d, FR s))
4842     {
4843     s=f_readreg(s);
4844     d=f_writereg(d);
4845     raw_fneg_rr(d,s);
4846     f_unlock(s);
4847     f_unlock(d);
4848     }
4849     MENDFUNC(2,fneg_rr,(FW d, FR s))
4850    
4851     MIDFUNC(2,fadd_rr,(FRW d, FR s))
4852     {
4853     s=f_readreg(s);
4854     d=f_rmw(d);
4855     raw_fadd_rr(d,s);
4856     f_unlock(s);
4857     f_unlock(d);
4858     }
4859     MENDFUNC(2,fadd_rr,(FRW d, FR s))
4860    
4861     MIDFUNC(2,fsub_rr,(FRW d, FR s))
4862     {
4863     s=f_readreg(s);
4864     d=f_rmw(d);
4865     raw_fsub_rr(d,s);
4866     f_unlock(s);
4867     f_unlock(d);
4868     }
4869     MENDFUNC(2,fsub_rr,(FRW d, FR s))
4870    
4871     MIDFUNC(2,fcmp_rr,(FR d, FR s))
4872     {
4873     d=f_readreg(d);
4874     s=f_readreg(s);
4875     raw_fcmp_rr(d,s);
4876     f_unlock(s);
4877     f_unlock(d);
4878     }
4879     MENDFUNC(2,fcmp_rr,(FR d, FR s))
4880    
4881     MIDFUNC(2,fdiv_rr,(FRW d, FR s))
4882     {
4883     s=f_readreg(s);
4884     d=f_rmw(d);
4885     raw_fdiv_rr(d,s);
4886     f_unlock(s);
4887     f_unlock(d);
4888     }
4889     MENDFUNC(2,fdiv_rr,(FRW d, FR s))
4890    
4891     MIDFUNC(2,frem_rr,(FRW d, FR s))
4892     {
4893     s=f_readreg(s);
4894     d=f_rmw(d);
4895     raw_frem_rr(d,s);
4896     f_unlock(s);
4897     f_unlock(d);
4898     }
4899     MENDFUNC(2,frem_rr,(FRW d, FR s))
4900    
4901     MIDFUNC(2,frem1_rr,(FRW d, FR s))
4902     {
4903     s=f_readreg(s);
4904     d=f_rmw(d);
4905     raw_frem1_rr(d,s);
4906     f_unlock(s);
4907     f_unlock(d);
4908     }
4909     MENDFUNC(2,frem1_rr,(FRW d, FR s))
4910    
4911     MIDFUNC(2,fmul_rr,(FRW d, FR s))
4912     {
4913     s=f_readreg(s);
4914     d=f_rmw(d);
4915     raw_fmul_rr(d,s);
4916     f_unlock(s);
4917     f_unlock(d);
4918     }
4919     MENDFUNC(2,fmul_rr,(FRW d, FR s))
4920    
4921     /********************************************************************
4922     * Support functions exposed to gencomp. CREATE time *
4923     ********************************************************************/
4924    
4925 gbeauche 1.26 void set_zero(int r, int tmp)
4926     {
4927     if (setzflg_uses_bsf)
4928     bsf_l_rr(r,r);
4929     else
4930     simulate_bsf(tmp,r);
4931     }
4932    
4933 gbeauche 1.1 int kill_rodent(int r)
4934     {
4935     return KILLTHERAT &&
4936     have_rat_stall &&
4937     (live.state[r].status==INMEM ||
4938     live.state[r].status==CLEAN ||
4939     live.state[r].status==ISCONST ||
4940     live.state[r].dirtysize==4);
4941     }
4942    
4943     uae_u32 get_const(int r)
4944     {
4945     Dif (!isconst(r)) {
4946     write_log("Register %d should be constant, but isn't\n",r);
4947     abort();
4948     }
4949     return live.state[r].val;
4950     }
4951    
4952     void sync_m68k_pc(void)
4953     {
4954     if (m68k_pc_offset) {
4955     add_l_ri(PC_P,m68k_pc_offset);
4956     comp_pc_p+=m68k_pc_offset;
4957     m68k_pc_offset=0;
4958     }
4959     }
4960    
4961     /********************************************************************
4962     * Scratch registers management *
4963     ********************************************************************/
4964    
4965     struct scratch_t {
4966     uae_u32 regs[VREGS];
4967     fpu_register fregs[VFREGS];
4968     };
4969    
4970     static scratch_t scratch;
4971    
4972     /********************************************************************
4973     * Support functions exposed to newcpu *
4974     ********************************************************************/
4975    
4976     static inline const char *str_on_off(bool b)
4977     {
4978     return b ? "on" : "off";
4979     }
4980    
4981     void compiler_init(void)
4982     {
4983     static bool initialized = false;
4984     if (initialized)
4985     return;
4986 gbeauche 1.24
4987 gbeauche 1.1 #if JIT_DEBUG
4988     // JIT debug mode ?
4989     JITDebug = PrefsFindBool("jitdebug");
4990     #endif
4991     write_log("<JIT compiler> : enable runtime disassemblers : %s\n", JITDebug ? "yes" : "no");
4992    
4993     #ifdef USE_JIT_FPU
4994     // Use JIT compiler for FPU instructions ?
4995     avoid_fpu = !PrefsFindBool("jitfpu");
4996     #else
4997     // JIT FPU is always disabled
4998     avoid_fpu = true;
4999     #endif
5000     write_log("<JIT compiler> : compile FPU instructions : %s\n", !avoid_fpu ? "yes" : "no");
5001    
5002     // Get size of the translation cache (in KB)
5003     cache_size = PrefsFindInt32("jitcachesize");
5004     write_log("<JIT compiler> : requested translation cache size : %d KB\n", cache_size);
5005    
5006     // Initialize target CPU (check for features, e.g. CMOV, rat stalls)
5007     raw_init_cpu();
5008 gbeauche 1.15 setzflg_uses_bsf = target_check_bsf();
5009 gbeauche 1.1 write_log("<JIT compiler> : target processor has CMOV instructions : %s\n", have_cmov ? "yes" : "no");
5010     write_log("<JIT compiler> : target processor can suffer from partial register stalls : %s\n", have_rat_stall ? "yes" : "no");
5011 gbeauche 1.5 write_log("<JIT compiler> : alignment for loops, jumps are %d, %d\n", align_loops, align_jumps);
5012 gbeauche 1.1
5013     // Translation cache flush mechanism
5014     lazy_flush = PrefsFindBool("jitlazyflush");
5015     write_log("<JIT compiler> : lazy translation cache invalidation : %s\n", str_on_off(lazy_flush));
5016     flush_icache = lazy_flush ? flush_icache_lazy : flush_icache_hard;
5017    
5018     // Compiler features
5019     write_log("<JIT compiler> : register aliasing : %s\n", str_on_off(1));
5020     write_log("<JIT compiler> : FP register aliasing : %s\n", str_on_off(USE_F_ALIAS));
5021     write_log("<JIT compiler> : lazy constant offsetting : %s\n", str_on_off(USE_OFFSET));
5022 gbeauche 1.33 #if USE_INLINING
5023     follow_const_jumps = PrefsFindBool("jitinline");
5024     #endif
5025     write_log("<JIT compiler> : translate through constant jumps : %s\n", str_on_off(follow_const_jumps));
5026 gbeauche 1.1 write_log("<JIT compiler> : separate blockinfo allocation : %s\n", str_on_off(USE_SEPARATE_BIA));
5027    
5028     // Build compiler tables
5029     build_comp();
5030    
5031     initialized = true;
5032    
5033 gbeauche 1.9 #if PROFILE_UNTRANSLATED_INSNS
5034     write_log("<JIT compiler> : gather statistics on untranslated insns count\n");
5035     #endif
5036    
5037 gbeauche 1.1 #if PROFILE_COMPILE_TIME
5038     write_log("<JIT compiler> : gather statistics on translation time\n");
5039     emul_start_time = clock();
5040     #endif
5041     }
5042    
5043     void compiler_exit(void)
5044     {
5045     #if PROFILE_COMPILE_TIME
5046     emul_end_time = clock();
5047     #endif
5048    
5049     // Deallocate translation cache
5050     if (compiled_code) {
5051 gbeauche 1.3 vm_release(compiled_code, cache_size * 1024);
5052 gbeauche 1.1 compiled_code = 0;
5053     }
5054 gbeauche 1.24
5055     // Deallocate popallspace
5056     if (popallspace) {
5057     vm_release(popallspace, POPALLSPACE_SIZE);
5058     popallspace = 0;
5059     }
5060 gbeauche 1.1
5061     #if PROFILE_COMPILE_TIME
5062     write_log("### Compile Block statistics\n");
5063     write_log("Number of calls to compile_block : %d\n", compile_count);
5064     uae_u32 emul_time = emul_end_time - emul_start_time;
5065     write_log("Total emulation time : %.1f sec\n", double(emul_time)/double(CLOCKS_PER_SEC));
5066     write_log("Total compilation time : %.1f sec (%.1f%%)\n", double(compile_time)/double(CLOCKS_PER_SEC),
5067     100.0*double(compile_time)/double(emul_time));
5068     write_log("\n");
5069     #endif
5070 gbeauche 1.9
5071     #if PROFILE_UNTRANSLATED_INSNS
5072     uae_u64 untranslated_count = 0;
5073     for (int i = 0; i < 65536; i++) {
5074     opcode_nums[i] = i;
5075     untranslated_count += raw_cputbl_count[i];
5076     }
5077     write_log("Sorting out untranslated instructions count...\n");
5078     qsort(opcode_nums, 65536, sizeof(uae_u16), untranslated_compfn);
5079     write_log("\nRank Opc Count Name\n");
5080     for (int i = 0; i < untranslated_top_ten; i++) {
5081     uae_u32 count = raw_cputbl_count[opcode_nums[i]];
5082     struct instr *dp;
5083     struct mnemolookup *lookup;
5084     if (!count)
5085     break;
5086     dp = table68k + opcode_nums[i];
5087     for (lookup = lookuptab; lookup->mnemo != dp->mnemo; lookup++)
5088     ;
5089     write_log("%03d: %04x %10lu %s\n", i, opcode_nums[i], count, lookup->name);
5090     }
5091     #endif
5092 gbeauche 1.28
5093     #if RECORD_REGISTER_USAGE
5094     int reg_count_ids[16];
5095     uint64 tot_reg_count = 0;
5096     for (int i = 0; i < 16; i++) {
5097     reg_count_ids[i] = i;
5098     tot_reg_count += reg_count[i];
5099     }
5100     qsort(reg_count_ids, 16, sizeof(int), reg_count_compare);
5101     uint64 cum_reg_count = 0;
5102     for (int i = 0; i < 16; i++) {
5103     int r = reg_count_ids[i];
5104     cum_reg_count += reg_count[r];
5105     printf("%c%d : %16ld %2.1f%% [%2.1f]\n", r < 8 ? 'D' : 'A', r % 8,
5106     reg_count[r],
5107     100.0*double(reg_count[r])/double(tot_reg_count),
5108     100.0*double(cum_reg_count)/double(tot_reg_count));
5109     }
5110     #endif
5111 gbeauche 1.1 }
5112    
5113     bool compiler_use_jit(void)
5114     {
5115     // Check for the "jit" prefs item
5116     if (!PrefsFindBool("jit"))
5117     return false;
5118    
5119     // Don't use JIT if translation cache size is less then MIN_CACHE_SIZE KB
5120     if (PrefsFindInt32("jitcachesize") < MIN_CACHE_SIZE) {
5121     write_log("<JIT compiler> : translation cache size is less than %d KB. Disabling JIT.\n", MIN_CACHE_SIZE);
5122     return false;
5123     }
5124    
5125     // FIXME: there are currently problems with JIT compilation and anything below a 68040
5126     if (CPUType < 4) {
5127     write_log("<JIT compiler> : 68040 emulation is required instead of 680%d0. Disabling JIT.\n", CPUType);
5128     return false;
5129     }
5130    
5131     return true;
5132     }
5133    
5134     void init_comp(void)
5135     {
5136     int i;
5137     uae_s8* cb=can_byte;
5138     uae_s8* cw=can_word;
5139     uae_s8* au=always_used;
5140    
5141 gbeauche 1.28 #if RECORD_REGISTER_USAGE
5142     for (i=0;i<16;i++)
5143     reg_count_local[i] = 0;
5144     #endif
5145    
5146 gbeauche 1.1 for (i=0;i<VREGS;i++) {
5147     live.state[i].realreg=-1;
5148     live.state[i].needflush=NF_SCRATCH;
5149     live.state[i].val=0;
5150     set_status(i,UNDEF);
5151     }
5152    
5153     for (i=0;i<VFREGS;i++) {
5154     live.fate[i].status=UNDEF;
5155     live.fate[i].realreg=-1;
5156     live.fate[i].needflush=NF_SCRATCH;
5157     }
5158    
5159     for (i=0;i<VREGS;i++) {
5160     if (i<16) { /* First 16 registers map to 68k registers */
5161     live.state[i].mem=((uae_u32*)&regs)+i;
5162     live.state[i].needflush=NF_TOMEM;
5163     set_status(i,INMEM);
5164     }
5165     else
5166     live.state[i].mem=scratch.regs+i;
5167     }
5168     live.state[PC_P].mem=(uae_u32*)&(regs.pc_p);
5169     live.state[PC_P].needflush=NF_TOMEM;
5170 gbeauche 1.24 set_const(PC_P,(uintptr)comp_pc_p);
5171 gbeauche 1.1
5172 gbeauche 1.24 live.state[FLAGX].mem=(uae_u32*)&(regflags.x);
5173 gbeauche 1.1 live.state[FLAGX].needflush=NF_TOMEM;
5174     set_status(FLAGX,INMEM);
5175    
5176 gbeauche 1.24 live.state[FLAGTMP].mem=(uae_u32*)&(regflags.cznv);
5177 gbeauche 1.1 live.state[FLAGTMP].needflush=NF_TOMEM;
5178     set_status(FLAGTMP,INMEM);
5179    
5180     live.state[NEXT_HANDLER].needflush=NF_HANDLER;
5181     set_status(NEXT_HANDLER,UNDEF);
5182    
5183     for (i=0;i<VFREGS;i++) {
5184     if (i<8) { /* First 8 registers map to 68k FPU registers */
5185     live.fate[i].mem=(uae_u32*)fpu_register_address(i);
5186     live.fate[i].needflush=NF_TOMEM;
5187     live.fate[i].status=INMEM;
5188     }
5189     else if (i==FP_RESULT) {
5190     live.fate[i].mem=(uae_u32*)(&fpu.result);
5191     live.fate[i].needflush=NF_TOMEM;
5192     live.fate[i].status=INMEM;
5193     }
5194     else
5195 gbeauche 1.25 live.fate[i].mem=(uae_u32*)(&scratch.fregs[i]);
5196 gbeauche 1.1 }
5197    
5198    
5199     for (i=0;i<N_REGS;i++) {
5200     live.nat[i].touched=0;
5201     live.nat[i].nholds=0;
5202     live.nat[i].locked=0;
5203     if (*cb==i) {
5204     live.nat[i].canbyte=1; cb++;
5205     } else live.nat[i].canbyte=0;
5206     if (*cw==i) {
5207     live.nat[i].canword=1; cw++;
5208     } else live.nat[i].canword=0;
5209     if (*au==i) {
5210     live.nat[i].locked=1; au++;
5211     }
5212     }
5213    
5214     for (i=0;i<N_FREGS;i++) {
5215     live.fat[i].touched=0;
5216     live.fat[i].nholds=0;
5217     live.fat[i].locked=0;
5218     }
5219    
5220     touchcnt=1;
5221     m68k_pc_offset=0;
5222     live.flags_in_flags=TRASH;
5223     live.flags_on_stack=VALID;
5224     live.flags_are_important=1;
5225    
5226     raw_fp_init();
5227     }
5228    
5229     /* Only do this if you really mean it! The next call should be to init!*/
5230     void flush(int save_regs)
5231     {
5232     int fi,i;
5233    
5234     log_flush();
5235     flush_flags(); /* low level */
5236     sync_m68k_pc(); /* mid level */
5237    
5238     if (save_regs) {
5239     for (i=0;i<VFREGS;i++) {
5240     if (live.fate[i].needflush==NF_SCRATCH ||
5241     live.fate[i].status==CLEAN) {
5242     f_disassociate(i);
5243     }
5244     }
5245     for (i=0;i<VREGS;i++) {
5246     if (live.state[i].needflush==NF_TOMEM) {
5247     switch(live.state[i].status) {
5248     case INMEM:
5249     if (live.state[i].val) {
5250 gbeauche 1.24 raw_add_l_mi((uintptr)live.state[i].mem,live.state[i].val);
5251 gbeauche 1.1 log_vwrite(i);
5252     live.state[i].val=0;
5253     }
5254     break;
5255     case CLEAN:
5256     case DIRTY:
5257     remove_offset(i,-1); tomem(i); break;
5258     case ISCONST:
5259     if (i!=PC_P)
5260     writeback_const(i);
5261     break;
5262     default: break;
5263     }
5264     Dif (live.state[i].val && i!=PC_P) {
5265     write_log("Register %d still has val %x\n",
5266     i,live.state[i].val);
5267     }
5268     }
5269     }
5270     for (i=0;i<VFREGS;i++) {
5271     if (live.fate[i].needflush==NF_TOMEM &&
5272     live.fate[i].status==DIRTY) {
5273     f_evict(i);
5274     }
5275     }
5276     raw_fp_cleanup_drop();
5277     }
5278     if (needflags) {
5279     write_log("Warning! flush with needflags=1!\n");
5280     }
5281     }
5282    
5283     static void flush_keepflags(void)
5284     {
5285     int fi,i;
5286    
5287     for (i=0;i<VFREGS;i++) {
5288     if (live.fate[i].needflush==NF_SCRATCH ||
5289     live.fate[i].status==CLEAN) {
5290     f_disassociate(i);
5291     }
5292     }
5293     for (i=0;i<VREGS;i++) {
5294     if (live.state[i].needflush==NF_TOMEM) {
5295     switch(live.state[i].status) {
5296     case INMEM:
5297     /* Can't adjust the offset here --- that needs "add" */
5298     break;
5299     case CLEAN:
5300     case DIRTY:
5301     remove_offset(i,-1); tomem(i); break;
5302     case ISCONST:
5303     if (i!=PC_P)
5304     writeback_const(i);
5305     break;
5306     default: break;
5307     }
5308     }
5309     }
5310     for (i=0;i<VFREGS;i++) {
5311     if (live.fate[i].needflush==NF_TOMEM &&
5312     live.fate[i].status==DIRTY) {
5313     f_evict(i);
5314     }
5315     }
5316     raw_fp_cleanup_drop();
5317     }
5318    
5319     void freescratch(void)
5320     {
5321     int i;
5322     for (i=0;i<N_REGS;i++)
5323     if (live.nat[i].locked && i!=4)
5324     write_log("Warning! %d is locked\n",i);
5325    
5326     for (i=0;i<VREGS;i++)
5327     if (live.state[i].needflush==NF_SCRATCH) {
5328     forget_about(i);
5329     }
5330    
5331     for (i=0;i<VFREGS;i++)
5332     if (live.fate[i].needflush==NF_SCRATCH) {
5333     f_forget_about(i);
5334     }
5335     }
5336    
5337     /********************************************************************
5338     * Support functions, internal *
5339     ********************************************************************/
5340    
5341    
5342     static void align_target(uae_u32 a)
5343     {
5344 gbeauche 1.14 if (!a)
5345     return;
5346    
5347 gbeauche 1.12 if (tune_nop_fillers)
5348 gbeauche 1.24 raw_emit_nop_filler(a - (((uintptr)target) & (a - 1)));
5349 gbeauche 1.12 else {
5350     /* Fill with NOPs --- makes debugging with gdb easier */
5351 gbeauche 1.24 while ((uintptr)target&(a-1))
5352 gbeauche 1.12 *target++=0x90;
5353     }
5354 gbeauche 1.1 }
5355    
5356     static __inline__ int isinrom(uintptr addr)
5357     {
5358     return ((addr >= (uintptr)ROMBaseHost) && (addr < (uintptr)ROMBaseHost + ROMSize));
5359     }
5360    
5361     static void flush_all(void)
5362     {
5363     int i;
5364    
5365     log_flush();
5366     for (i=0;i<VREGS;i++)
5367     if (live.state[i].status==DIRTY) {
5368     if (!call_saved[live.state[i].realreg]) {
5369     tomem(i);
5370     }
5371     }
5372     for (i=0;i<VFREGS;i++)
5373     if (f_isinreg(i))
5374     f_evict(i);
5375     raw_fp_cleanup_drop();
5376     }
5377    
5378     /* Make sure all registers that will get clobbered by a call are
5379     save and sound in memory */
5380     static void prepare_for_call_1(void)
5381     {
5382     flush_all(); /* If there are registers that don't get clobbered,
5383     * we should be a bit more selective here */
5384     }
5385    
5386     /* We will call a C routine in a moment. That will clobber all registers,
5387     so we need to disassociate everything */
5388     static void prepare_for_call_2(void)
5389     {
5390     int i;
5391     for (i=0;i<N_REGS;i++)
5392     if (!call_saved[i] && live.nat[i].nholds>0)
5393     free_nreg(i);
5394    
5395     for (i=0;i<N_FREGS;i++)
5396     if (live.fat[i].nholds>0)
5397     f_free_nreg(i);
5398    
5399     live.flags_in_flags=TRASH; /* Note: We assume we already rescued the
5400     flags at the very start of the call_r
5401     functions! */
5402     }
5403    
5404     /********************************************************************
5405     * Memory access and related functions, CREATE time *
5406     ********************************************************************/
5407    
5408     void register_branch(uae_u32 not_taken, uae_u32 taken, uae_u8 cond)
5409     {
5410     next_pc_p=not_taken;
5411     taken_pc_p=taken;
5412     branch_cc=cond;
5413     }
5414    
5415    
5416     static uae_u32 get_handler_address(uae_u32 addr)
5417     {
5418     uae_u32 cl=cacheline(addr);
5419 gbeauche 1.24 blockinfo* bi=get_blockinfo_addr_new((void*)(uintptr)addr,0);
5420     return (uintptr)&(bi->direct_handler_to_use);
5421 gbeauche 1.1 }
5422    
5423     static uae_u32 get_handler(uae_u32 addr)
5424     {
5425     uae_u32 cl=cacheline(addr);
5426 gbeauche 1.24 blockinfo* bi=get_blockinfo_addr_new((void*)(uintptr)addr,0);
5427     return (uintptr)bi->direct_handler_to_use;
5428 gbeauche 1.1 }
5429    
5430     static void load_handler(int reg, uae_u32 addr)
5431     {
5432     mov_l_rm(reg,get_handler_address(addr));
5433     }
5434    
5435     /* This version assumes that it is writing *real* memory, and *will* fail
5436     * if that assumption is wrong! No branches, no second chances, just
5437     * straight go-for-it attitude */
5438    
5439 gbeauche 1.24 static void writemem_real(int address, int source, int size, int tmp, int clobber)
5440 gbeauche 1.1 {
5441     int f=tmp;
5442    
5443     if (clobber)
5444     f=source;
5445 gbeauche 1.24
5446 gbeauche 1.1 switch(size) {
5447     case 1: mov_b_bRr(address,source,MEMBaseDiff); break;
5448     case 2: mov_w_rr(f,source); bswap_16(f); mov_w_bRr(address,f,MEMBaseDiff); break;
5449     case 4: mov_l_rr(f,source); bswap_32(f); mov_l_bRr(address,f,MEMBaseDiff); break;
5450     }
5451     forget_about(tmp);
5452     forget_about(f);
5453     }
5454    
5455     void writebyte(int address, int source, int tmp)
5456     {
5457 gbeauche 1.24 writemem_real(address,source,1,tmp,0);
5458 gbeauche 1.1 }
5459    
5460     static __inline__ void writeword_general(int address, int source, int tmp,
5461     int clobber)
5462     {
5463 gbeauche 1.24 writemem_real(address,source,2,tmp,clobber);
5464 gbeauche 1.1 }
5465    
5466     void writeword_clobber(int address, int source, int tmp)
5467     {
5468     writeword_general(address,source,tmp,1);
5469     }
5470    
5471     void writeword(int address, int source, int tmp)
5472     {
5473     writeword_general(address,source,tmp,0);
5474     }
5475    
5476     static __inline__ void writelong_general(int address, int source, int tmp,
5477     int clobber)
5478     {
5479 gbeauche 1.24 writemem_real(address,source,4,tmp,clobber);
5480 gbeauche 1.1 }
5481    
5482     void writelong_clobber(int address, int source, int tmp)
5483     {
5484     writelong_general(address,source,tmp,1);
5485     }
5486    
5487     void writelong(int address, int source, int tmp)
5488     {
5489     writelong_general(address,source,tmp,0);
5490     }
5491    
5492    
5493    
5494     /* This version assumes that it is reading *real* memory, and *will* fail
5495     * if that assumption is wrong! No branches, no second chances, just
5496     * straight go-for-it attitude */
5497    
5498 gbeauche 1.24 static void readmem_real(int address, int dest, int size, int tmp)
5499 gbeauche 1.1 {
5500     int f=tmp;
5501    
5502     if (size==4 && address!=dest)
5503     f=dest;
5504    
5505     switch(size) {
5506     case 1: mov_b_brR(dest,address,MEMBaseDiff); break;
5507     case 2: mov_w_brR(dest,address,MEMBaseDiff); bswap_16(dest); break;
5508     case 4: mov_l_brR(dest,address,MEMBaseDiff); bswap_32(dest); break;
5509     }
5510     forget_about(tmp);
5511     }
5512    
5513     void readbyte(int address, int dest, int tmp)
5514     {
5515 gbeauche 1.24 readmem_real(address,dest,1,tmp);
5516 gbeauche 1.1 }
5517    
5518     void readword(int address, int dest, int tmp)
5519     {
5520 gbeauche 1.24 readmem_real(address,dest,2,tmp);
5521 gbeauche 1.1 }
5522    
5523     void readlong(int address, int dest, int tmp)
5524     {
5525 gbeauche 1.24 readmem_real(address,dest,4,tmp);
5526 gbeauche 1.1 }
5527    
5528     void get_n_addr(int address, int dest, int tmp)
5529     {
5530     // a is the register containing the virtual address
5531     // after the offset had been fetched
5532     int a=tmp;
5533    
5534     // f is the register that will contain the offset
5535     int f=tmp;
5536    
5537     // a == f == tmp if (address == dest)
5538     if (address!=dest) {
5539     a=address;
5540     f=dest;
5541     }
5542    
5543     #if REAL_ADDRESSING
5544     mov_l_rr(dest, address);
5545     #elif DIRECT_ADDRESSING
5546     lea_l_brr(dest,address,MEMBaseDiff);
5547     #endif
5548     forget_about(tmp);
5549     }
5550    
5551     void get_n_addr_jmp(int address, int dest, int tmp)
5552     {
5553     /* For this, we need to get the same address as the rest of UAE
5554     would --- otherwise we end up translating everything twice */
5555     get_n_addr(address,dest,tmp);
5556     }
5557    
5558    
5559     /* base is a register, but dp is an actual value.
5560     target is a register, as is tmp */
5561     void calc_disp_ea_020(int base, uae_u32 dp, int target, int tmp)
5562     {
5563     int reg = (dp >> 12) & 15;
5564     int regd_shift=(dp >> 9) & 3;
5565    
5566     if (dp & 0x100) {
5567     int ignorebase=(dp&0x80);
5568     int ignorereg=(dp&0x40);
5569     int addbase=0;
5570     int outer=0;
5571    
5572     if ((dp & 0x30) == 0x20) addbase = (uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
5573     if ((dp & 0x30) == 0x30) addbase = comp_get_ilong((m68k_pc_offset+=4)-4);
5574    
5575     if ((dp & 0x3) == 0x2) outer = (uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
5576     if ((dp & 0x3) == 0x3) outer = comp_get_ilong((m68k_pc_offset+=4)-4);
5577    
5578     if ((dp & 0x4) == 0) { /* add regd *before* the get_long */
5579     if (!ignorereg) {
5580     if ((dp & 0x800) == 0)
5581     sign_extend_16_rr(target,reg);
5582     else
5583     mov_l_rr(target,reg);
5584     shll_l_ri(target,regd_shift);
5585     }
5586     else
5587     mov_l_ri(target,0);
5588    
5589     /* target is now regd */
5590     if (!ignorebase)
5591     add_l(target,base);
5592     add_l_ri(target,addbase);
5593     if (dp&0x03) readlong(target,target,tmp);
5594     } else { /* do the getlong first, then add regd */
5595     if (!ignorebase) {
5596     mov_l_rr(target,base);
5597     add_l_ri(target,addbase);
5598     }
5599     else
5600     mov_l_ri(target,addbase);
5601     if (dp&0x03) readlong(target,target,tmp);
5602    
5603     if (!ignorereg) {
5604     if ((dp & 0x800) == 0)
5605     sign_extend_16_rr(tmp,reg);
5606     else
5607     mov_l_rr(tmp,reg);
5608     shll_l_ri(tmp,regd_shift);
5609     /* tmp is now regd */
5610     add_l(target,tmp);
5611     }
5612     }
5613     add_l_ri(target,outer);
5614     }
5615     else { /* 68000 version */
5616     if ((dp & 0x800) == 0) { /* Sign extend */
5617     sign_extend_16_rr(target,reg);
5618     lea_l_brr_indexed(target,base,target,1<<regd_shift,(uae_s32)((uae_s8)dp));
5619     }
5620     else {
5621     lea_l_brr_indexed(target,base,reg,1<<regd_shift,(uae_s32)((uae_s8)dp));
5622     }
5623     }
5624     forget_about(tmp);
5625     }
5626    
5627    
5628    
5629    
5630    
5631     void set_cache_state(int enabled)
5632     {
5633     if (enabled!=letit)
5634     flush_icache_hard(77);
5635     letit=enabled;
5636     }
5637    
5638     int get_cache_state(void)
5639     {
5640     return letit;
5641     }
5642    
5643     uae_u32 get_jitted_size(void)
5644     {
5645     if (compiled_code)
5646     return current_compile_p-compiled_code;
5647     return 0;
5648     }
5649    
5650 gbeauche 1.20 const int CODE_ALLOC_MAX_ATTEMPTS = 10;
5651     const int CODE_ALLOC_BOUNDARIES = 128 * 1024; // 128 KB
5652    
5653     static uint8 *do_alloc_code(uint32 size, int depth)
5654     {
5655     #if defined(__linux__) && 0
5656     /*
5657     This is a really awful hack that is known to work on Linux at
5658     least.
5659    
5660     The trick here is to make sure the allocated cache is nearby
5661     code segment, and more precisely in the positive half of a
5662     32-bit address space. i.e. addr < 0x80000000. Actually, it
5663     turned out that a 32-bit binary run on AMD64 yields a cache
5664     allocated around 0xa0000000, thus causing some troubles when
5665     translating addresses from m68k to x86.
5666     */
5667     static uint8 * code_base = NULL;
5668     if (code_base == NULL) {
5669     uintptr page_size = getpagesize();
5670     uintptr boundaries = CODE_ALLOC_BOUNDARIES;
5671     if (boundaries < page_size)
5672     boundaries = page_size;
5673     code_base = (uint8 *)sbrk(0);
5674     for (int attempts = 0; attempts < CODE_ALLOC_MAX_ATTEMPTS; attempts++) {
5675     if (vm_acquire_fixed(code_base, size) == 0) {
5676     uint8 *code = code_base;
5677     code_base += size;
5678     return code;
5679     }
5680     code_base += boundaries;
5681     }
5682     return NULL;
5683     }
5684    
5685     if (vm_acquire_fixed(code_base, size) == 0) {
5686     uint8 *code = code_base;
5687     code_base += size;
5688     return code;
5689     }
5690    
5691     if (depth >= CODE_ALLOC_MAX_ATTEMPTS)
5692     return NULL;
5693    
5694     return do_alloc_code(size, depth + 1);
5695     #else
5696     uint8 *code = (uint8 *)vm_acquire(size);
5697     return code == VM_MAP_FAILED ? NULL : code;
5698     #endif
5699     }
5700    
5701     static inline uint8 *alloc_code(uint32 size)
5702     {
5703 gbeauche 1.31 uint8 *ptr = do_alloc_code(size, 0);
5704     /* allocated code must fit in 32-bit boundaries */
5705     assert((uintptr)ptr <= 0xffffffff);
5706     return ptr;
5707 gbeauche 1.20 }
5708    
5709 gbeauche 1.1 void alloc_cache(void)
5710     {
5711     if (compiled_code) {
5712     flush_icache_hard(6);
5713 gbeauche 1.3 vm_release(compiled_code, cache_size * 1024);
5714 gbeauche 1.1 compiled_code = 0;
5715     }
5716    
5717     if (cache_size == 0)
5718     return;
5719    
5720     while (!compiled_code && cache_size) {
5721 gbeauche 1.20 if ((compiled_code = alloc_code(cache_size * 1024)) == NULL) {
5722 gbeauche 1.1 compiled_code = 0;
5723     cache_size /= 2;
5724     }
5725     }
5726 gbeauche 1.25 vm_protect(compiled_code, cache_size * 1024, VM_PAGE_READ | VM_PAGE_WRITE | VM_PAGE_EXECUTE);
5727 gbeauche 1.1
5728     if (compiled_code) {
5729     write_log("<JIT compiler> : actual translation cache size : %d KB at 0x%08X\n", cache_size, compiled_code);
5730     max_compile_start = compiled_code + cache_size*1024 - BYTES_PER_INST;
5731     current_compile_p = compiled_code;
5732     current_cache_size = 0;
5733     }
5734     }
5735    
5736    
5737    
5738 gbeauche 1.13 extern void op_illg_1 (uae_u32 opcode) REGPARAM;
5739 gbeauche 1.1
5740 gbeauche 1.8 static void calc_checksum(blockinfo* bi, uae_u32* c1, uae_u32* c2)
5741 gbeauche 1.1 {
5742 gbeauche 1.8 uae_u32 k1 = 0;
5743     uae_u32 k2 = 0;
5744    
5745     #if USE_CHECKSUM_INFO
5746     checksum_info *csi = bi->csi;
5747     Dif(!csi) abort();
5748     while (csi) {
5749     uae_s32 len = csi->length;
5750 gbeauche 1.24 uintptr tmp = (uintptr)csi->start_p;
5751 gbeauche 1.8 #else
5752     uae_s32 len = bi->len;
5753 gbeauche 1.24 uintptr tmp = (uintptr)bi->min_pcp;
5754 gbeauche 1.8 #endif
5755     uae_u32*pos;
5756 gbeauche 1.1
5757 gbeauche 1.8 len += (tmp & 3);
5758 gbeauche 1.24 tmp &= ~((uintptr)3);
5759 gbeauche 1.8 pos = (uae_u32 *)tmp;
5760    
5761     if (len >= 0 && len <= MAX_CHECKSUM_LEN) {
5762     while (len > 0) {
5763     k1 += *pos;
5764     k2 ^= *pos;
5765     pos++;
5766     len -= 4;
5767     }
5768     }
5769 gbeauche 1.1
5770 gbeauche 1.8 #if USE_CHECKSUM_INFO
5771     csi = csi->next;
5772 gbeauche 1.1 }
5773 gbeauche 1.8 #endif
5774    
5775     *c1 = k1;
5776     *c2 = k2;
5777 gbeauche 1.1 }
5778    
5779 gbeauche 1.8 #if 0
5780 gbeauche 1.7 static void show_checksum(CSI_TYPE* csi)
5781 gbeauche 1.1 {
5782     uae_u32 k1=0;
5783     uae_u32 k2=0;
5784 gbeauche 1.7 uae_s32 len=CSI_LENGTH(csi);
5785 gbeauche 1.24 uae_u32 tmp=(uintptr)CSI_START_P(csi);
5786 gbeauche 1.1 uae_u32* pos;
5787    
5788     len+=(tmp&3);
5789     tmp&=(~3);
5790     pos=(uae_u32*)tmp;
5791    
5792     if (len<0 || len>MAX_CHECKSUM_LEN) {
5793     return;
5794     }
5795     else {
5796     while (len>0) {
5797     write_log("%08x ",*pos);
5798     pos++;
5799     len-=4;
5800     }
5801     write_log(" bla\n");
5802     }
5803     }
5804 gbeauche 1.8 #endif
5805 gbeauche 1.1
5806    
5807     int check_for_cache_miss(void)
5808     {
5809     blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5810    
5811     if (bi) {
5812     int cl=cacheline(regs.pc_p);
5813     if (bi!=cache_tags[cl+1].bi) {
5814     raise_in_cl_list(bi);
5815     return 1;
5816     }
5817     }
5818     return 0;
5819     }
5820    
5821    
5822     static void recompile_block(void)
5823     {
5824     /* An existing block's countdown code has expired. We need to make
5825     sure that execute_normal doesn't refuse to recompile due to a
5826     perceived cache miss... */
5827     blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5828    
5829     Dif (!bi)
5830     abort();
5831     raise_in_cl_list(bi);
5832     execute_normal();
5833     return;
5834     }
5835     static void cache_miss(void)
5836     {
5837     blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5838     uae_u32 cl=cacheline(regs.pc_p);
5839     blockinfo* bi2=get_blockinfo(cl);
5840    
5841     if (!bi) {
5842     execute_normal(); /* Compile this block now */
5843     return;
5844     }
5845     Dif (!bi2 || bi==bi2) {
5846     write_log("Unexplained cache miss %p %p\n",bi,bi2);
5847     abort();
5848     }
5849     raise_in_cl_list(bi);
5850     return;
5851     }
5852    
5853     static int called_check_checksum(blockinfo* bi);
5854    
5855     static inline int block_check_checksum(blockinfo* bi)
5856     {
5857     uae_u32 c1,c2;
5858 gbeauche 1.7 bool isgood;
5859 gbeauche 1.1
5860     if (bi->status!=BI_NEED_CHECK)
5861     return 1; /* This block is in a checked state */
5862    
5863     checksum_count++;
5864 gbeauche 1.7
5865 gbeauche 1.1 if (bi->c1 || bi->c2)
5866     calc_checksum(bi,&c1,&c2);
5867     else {
5868     c1=c2=1; /* Make sure it doesn't match */
5869 gbeauche 1.7 }
5870 gbeauche 1.1
5871     isgood=(c1==bi->c1 && c2==bi->c2);
5872 gbeauche 1.7
5873 gbeauche 1.1 if (isgood) {
5874     /* This block is still OK. So we reactivate. Of course, that
5875     means we have to move it into the needs-to-be-flushed list */
5876     bi->handler_to_use=bi->handler;
5877     set_dhtu(bi,bi->direct_handler);
5878     bi->status=BI_CHECKING;
5879     isgood=called_check_checksum(bi);
5880     }
5881     if (isgood) {
5882     /* write_log("reactivate %p/%p (%x %x/%x %x)\n",bi,bi->pc_p,
5883     c1,c2,bi->c1,bi->c2);*/
5884     remove_from_list(bi);
5885     add_to_active(bi);
5886     raise_in_cl_list(bi);
5887     bi->status=BI_ACTIVE;
5888     }
5889     else {
5890     /* This block actually changed. We need to invalidate it,
5891     and set it up to be recompiled */
5892     /* write_log("discard %p/%p (%x %x/%x %x)\n",bi,bi->pc_p,
5893     c1,c2,bi->c1,bi->c2); */
5894     invalidate_block(bi);
5895     raise_in_cl_list(bi);
5896     }
5897     return isgood;
5898     }
5899    
5900     static int called_check_checksum(blockinfo* bi)
5901     {
5902     dependency* x=bi->deplist;
5903     int isgood=1;
5904     int i;
5905    
5906     for (i=0;i<2 && isgood;i++) {
5907     if (bi->dep[i].jmp_off) {
5908     isgood=block_check_checksum(bi->dep[i].target);
5909     }
5910     }
5911     return isgood;
5912     }
5913    
5914     static void check_checksum(void)
5915     {
5916     blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5917     uae_u32 cl=cacheline(regs.pc_p);
5918     blockinfo* bi2=get_blockinfo(cl);
5919    
5920     /* These are not the droids you are looking for... */
5921     if (!bi) {
5922     /* Whoever is the primary target is in a dormant state, but
5923     calling it was accidental, and we should just compile this
5924     new block */
5925     execute_normal();
5926     return;
5927     }
5928     if (bi!=bi2) {
5929     /* The block was hit accidentally, but it does exist. Cache miss */
5930     cache_miss();
5931     return;
5932     }
5933    
5934     if (!block_check_checksum(bi))
5935     execute_normal();
5936     }
5937    
5938     static __inline__ void match_states(blockinfo* bi)
5939     {
5940     int i;
5941     smallstate* s=&(bi->env);
5942    
5943     if (bi->status==BI_NEED_CHECK) {
5944     block_check_checksum(bi);
5945     }
5946     if (bi->status==BI_ACTIVE ||
5947     bi->status==BI_FINALIZING) { /* Deal with the *promises* the
5948     block makes (about not using
5949     certain vregs) */
5950     for (i=0;i<16;i++) {
5951     if (s->virt[i]==L_UNNEEDED) {
5952     // write_log("unneeded reg %d at %p\n",i,target);
5953     COMPCALL(forget_about)(i); // FIXME
5954     }
5955     }
5956     }
5957     flush(1);
5958    
5959     /* And now deal with the *demands* the block makes */
5960     for (i=0;i<N_REGS;i++) {
5961     int v=s->nat[i];
5962     if (v>=0) {
5963     // printf("Loading reg %d into %d at %p\n",v,i,target);
5964     readreg_specific(v,4,i);
5965     // do_load_reg(i,v);
5966     // setlock(i);
5967     }
5968     }
5969     for (i=0;i<N_REGS;i++) {
5970     int v=s->nat[i];
5971     if (v>=0) {
5972     unlock2(i);
5973     }
5974     }
5975     }
5976    
5977     static __inline__ void create_popalls(void)
5978     {
5979     int i,r;
5980    
5981 gbeauche 1.24 if ((popallspace = alloc_code(POPALLSPACE_SIZE)) == NULL) {
5982     write_log("FATAL: Could not allocate popallspace!\n");
5983     abort();
5984     }
5985     vm_protect(popallspace, POPALLSPACE_SIZE, VM_PAGE_READ | VM_PAGE_WRITE);
5986    
5987 gbeauche 1.35 int stack_space = STACK_OFFSET;
5988     for (i=0;i<N_REGS;i++) {
5989     if (need_to_preserve[i])
5990     stack_space += sizeof(void *);
5991     }
5992     stack_space %= STACK_ALIGN;
5993     if (stack_space)
5994     stack_space = STACK_ALIGN - stack_space;
5995    
5996 gbeauche 1.1 current_compile_p=popallspace;
5997     set_target(current_compile_p);
5998 gbeauche 1.36
5999     /* We need to guarantee 16-byte stack alignment on x86 at any point
6000     within the JIT generated code. We have multiple exit points
6001     possible but a single entry. A "jmp" is used so that we don't
6002     have to generate stack alignment in generated code that has to
6003     call external functions (e.g. a generic instruction handler).
6004    
6005     In summary, JIT generated code is not leaf so we have to deal
6006     with it here to maintain correct stack alignment. */
6007     align_target(align_jumps);
6008     current_compile_p=get_target();
6009     pushall_call_handler=get_target();
6010     for (i=N_REGS;i--;) {
6011     if (need_to_preserve[i])
6012     raw_push_l_r(i);
6013     }
6014     raw_dec_sp(stack_space);
6015     r=REG_PC_TMP;
6016     raw_mov_l_rm(r,(uintptr)&regs.pc_p);
6017     raw_and_l_ri(r,TAGMASK);
6018     raw_jmp_m_indexed((uintptr)cache_tags,r,SIZEOF_VOID_P);
6019    
6020     /* now the exit points */
6021 gbeauche 1.5 align_target(align_jumps);
6022     popall_do_nothing=get_target();
6023 gbeauche 1.35 raw_inc_sp(stack_space);
6024 gbeauche 1.1 for (i=0;i<N_REGS;i++) {
6025     if (need_to_preserve[i])
6026     raw_pop_l_r(i);
6027     }
6028 gbeauche 1.24 raw_jmp((uintptr)do_nothing);
6029 gbeauche 1.1
6030 gbeauche 1.5 align_target(align_jumps);
6031 gbeauche 1.1 popall_execute_normal=get_target();
6032 gbeauche 1.35 raw_inc_sp(stack_space);
6033 gbeauche 1.1 for (i=0;i<N_REGS;i++) {
6034     if (need_to_preserve[i])
6035     raw_pop_l_r(i);
6036     }
6037 gbeauche 1.24 raw_jmp((uintptr)execute_normal);
6038 gbeauche 1.1
6039 gbeauche 1.5 align_target(align_jumps);
6040 gbeauche 1.1 popall_cache_miss=get_target();
6041 gbeauche 1.35 raw_inc_sp(stack_space);
6042 gbeauche 1.1 for (i=0;i<N_REGS;i++) {
6043     if (need_to_preserve[i])
6044     raw_pop_l_r(i);
6045     }
6046 gbeauche 1.24 raw_jmp((uintptr)cache_miss);
6047 gbeauche 1.1
6048 gbeauche 1.5 align_target(align_jumps);
6049 gbeauche 1.1 popall_recompile_block=get_target();
6050 gbeauche 1.35 raw_inc_sp(stack_space);
6051 gbeauche 1.1 for (i=0;i<N_REGS;i++) {
6052     if (need_to_preserve[i])
6053     raw_pop_l_r(i);
6054     }
6055 gbeauche 1.24 raw_jmp((uintptr)recompile_block);
6056 gbeauche 1.5
6057     align_target(align_jumps);
6058 gbeauche 1.1 popall_exec_nostats=get_target();
6059 gbeauche 1.35 raw_inc_sp(stack_space);
6060 gbeauche 1.1 for (i=0;i<N_REGS;i++) {
6061     if (need_to_preserve[i])
6062     raw_pop_l_r(i);
6063     }
6064 gbeauche 1.24 raw_jmp((uintptr)exec_nostats);
6065 gbeauche 1.5
6066     align_target(align_jumps);
6067 gbeauche 1.1 popall_check_checksum=get_target();
6068 gbeauche 1.35 raw_inc_sp(stack_space);
6069 gbeauche 1.1 for (i=0;i<N_REGS;i++) {
6070     if (need_to_preserve[i])
6071     raw_pop_l_r(i);
6072     }
6073 gbeauche 1.24 raw_jmp((uintptr)check_checksum);
6074 gbeauche 1.5
6075 gbeauche 1.24 // no need to further write into popallspace
6076     vm_protect(popallspace, POPALLSPACE_SIZE, VM_PAGE_READ | VM_PAGE_EXECUTE);
6077 gbeauche 1.1 }
6078    
6079     static __inline__ void reset_lists(void)
6080     {
6081     int i;
6082    
6083     for (i=0;i<MAX_HOLD_BI;i++)
6084     hold_bi[i]=NULL;
6085     active=NULL;
6086     dormant=NULL;
6087     }
6088    
6089     static void prepare_block(blockinfo* bi)
6090     {
6091     int i;
6092    
6093     set_target(current_compile_p);
6094 gbeauche 1.5 align_target(align_jumps);
6095 gbeauche 1.1 bi->direct_pen=(cpuop_func *)get_target();
6096 gbeauche 1.24 raw_mov_l_rm(0,(uintptr)&(bi->pc_p));
6097     raw_mov_l_mr((uintptr)&regs.pc_p,0);
6098     raw_jmp((uintptr)popall_execute_normal);
6099 gbeauche 1.1
6100 gbeauche 1.5 align_target(align_jumps);
6101 gbeauche 1.1 bi->direct_pcc=(cpuop_func *)get_target();
6102 gbeauche 1.24 raw_mov_l_rm(0,(uintptr)&(bi->pc_p));
6103     raw_mov_l_mr((uintptr)&regs.pc_p,0);
6104     raw_jmp((uintptr)popall_check_checksum);
6105 gbeauche 1.1 current_compile_p=get_target();
6106    
6107     bi->deplist=NULL;
6108     for (i=0;i<2;i++) {
6109     bi->dep[i].prev_p=NULL;
6110     bi->dep[i].next=NULL;
6111     }
6112     bi->env=default_ss;
6113     bi->status=BI_INVALID;
6114     bi->havestate=0;
6115     //bi->env=empty_ss;
6116     }
6117    
6118 gbeauche 1.21 // OPCODE is in big endian format, use cft_map() beforehand, if needed.
6119     static inline void reset_compop(int opcode)
6120 gbeauche 1.17 {
6121 gbeauche 1.21 compfunctbl[opcode] = NULL;
6122     nfcompfunctbl[opcode] = NULL;
6123     }
6124    
6125     static int read_opcode(const char *p)
6126     {
6127     int opcode = 0;
6128     for (int i = 0; i < 4; i++) {
6129     int op = p[i];
6130     switch (op) {
6131     case '0': case '1': case '2': case '3': case '4':
6132     case '5': case '6': case '7': case '8': case '9':
6133     opcode = (opcode << 4) | (op - '0');
6134     break;
6135     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
6136     opcode = (opcode << 4) | ((op - 'a') + 10);
6137     break;
6138     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
6139     opcode = (opcode << 4) | ((op - 'A') + 10);
6140     break;
6141     default:
6142     return -1;
6143     }
6144     }
6145     return opcode;
6146     }
6147    
6148     static bool merge_blacklist()
6149     {
6150     const char *blacklist = PrefsFindString("jitblacklist");
6151     if (blacklist) {
6152     const char *p = blacklist;
6153     for (;;) {
6154     if (*p == 0)
6155     return true;
6156    
6157     int opcode1 = read_opcode(p);
6158     if (opcode1 < 0)
6159     return false;
6160     p += 4;
6161    
6162     int opcode2 = opcode1;
6163     if (*p == '-') {
6164     p++;
6165     opcode2 = read_opcode(p);
6166     if (opcode2 < 0)
6167     return false;
6168     p += 4;
6169     }
6170    
6171     if (*p == 0 || *p == ';') {
6172     write_log("<JIT compiler> : blacklist opcodes : %04x-%04x\n", opcode1, opcode2);
6173     for (int opcode = opcode1; opcode <= opcode2; opcode++)
6174     reset_compop(cft_map(opcode));
6175    
6176     if (*p++ == ';')
6177     continue;
6178    
6179     return true;
6180     }
6181    
6182     return false;
6183     }
6184     }
6185     return true;
6186 gbeauche 1.17 }
6187    
6188 gbeauche 1.1 void build_comp(void)
6189     {
6190     int i;
6191     int jumpcount=0;
6192     unsigned long opcode;
6193     struct comptbl* tbl=op_smalltbl_0_comp_ff;
6194     struct comptbl* nftbl=op_smalltbl_0_comp_nf;
6195     int count;
6196     int cpu_level = 0; // 68000 (default)
6197     if (CPUType == 4)
6198     cpu_level = 4; // 68040 with FPU
6199     else {
6200     if (FPUType)
6201     cpu_level = 3; // 68020 with FPU
6202     else if (CPUType >= 2)
6203     cpu_level = 2; // 68020
6204     else if (CPUType == 1)
6205     cpu_level = 1;
6206     }
6207     struct cputbl *nfctbl = (
6208     cpu_level == 4 ? op_smalltbl_0_nf
6209     : cpu_level == 3 ? op_smalltbl_1_nf
6210     : cpu_level == 2 ? op_smalltbl_2_nf
6211     : cpu_level == 1 ? op_smalltbl_3_nf
6212     : op_smalltbl_4_nf);
6213    
6214     write_log ("<JIT compiler> : building compiler function tables\n");
6215    
6216     for (opcode = 0; opcode < 65536; opcode++) {
6217 gbeauche 1.21 reset_compop(opcode);
6218 gbeauche 1.1 nfcpufunctbl[opcode] = op_illg_1;
6219     prop[opcode].use_flags = 0x1f;
6220     prop[opcode].set_flags = 0x1f;
6221     prop[opcode].cflow = fl_trap; // ILLEGAL instructions do trap
6222     }
6223    
6224     for (i = 0; tbl[i].opcode < 65536; i++) {
6225     int cflow = table68k[tbl[i].opcode].cflow;
6226 gbeauche 1.33 if (follow_const_jumps && (tbl[i].specific & 16))
6227 gbeauche 1.10 cflow = fl_const_jump;
6228 gbeauche 1.8 else
6229 gbeauche 1.10 cflow &= ~fl_const_jump;
6230     prop[cft_map(tbl[i].opcode)].cflow = cflow;
6231 gbeauche 1.1
6232     int uses_fpu = tbl[i].specific & 32;
6233 gbeauche 1.21 if (uses_fpu && avoid_fpu)
6234 gbeauche 1.1 compfunctbl[cft_map(tbl[i].opcode)] = NULL;
6235     else
6236     compfunctbl[cft_map(tbl[i].opcode)] = tbl[i].handler;
6237     }
6238 gbeauche 1.8
6239 gbeauche 1.1 for (i = 0; nftbl[i].opcode < 65536; i++) {
6240     int uses_fpu = tbl[i].specific & 32;
6241 gbeauche 1.21 if (uses_fpu && avoid_fpu)
6242 gbeauche 1.1 nfcompfunctbl[cft_map(nftbl[i].opcode)] = NULL;
6243     else
6244     nfcompfunctbl[cft_map(nftbl[i].opcode)] = nftbl[i].handler;
6245    
6246     nfcpufunctbl[cft_map(nftbl[i].opcode)] = nfctbl[i].handler;
6247     }
6248    
6249     for (i = 0; nfctbl[i].handler; i++) {
6250     nfcpufunctbl[cft_map(nfctbl[i].opcode)] = nfctbl[i].handler;
6251     }
6252    
6253     for (opcode = 0; opcode < 65536; opcode++) {
6254     compop_func *f;
6255     compop_func *nff;
6256     cpuop_func *nfcf;
6257     int isaddx,cflow;
6258    
6259     if (table68k[opcode].mnemo == i_ILLG || table68k[opcode].clev > cpu_level)
6260     continue;
6261    
6262     if (table68k[opcode].handler != -1) {
6263     f = compfunctbl[cft_map(table68k[opcode].handler)];
6264     nff = nfcompfunctbl[cft_map(table68k[opcode].handler)];
6265     nfcf = nfcpufunctbl[cft_map(table68k[opcode].handler)];
6266     cflow = prop[cft_map(table68k[opcode].handler)].cflow;
6267     isaddx = prop[cft_map(table68k[opcode].handler)].is_addx;
6268     prop[cft_map(opcode)].cflow = cflow;
6269     prop[cft_map(opcode)].is_addx = isaddx;
6270     compfunctbl[cft_map(opcode)] = f;
6271     nfcompfunctbl[cft_map(opcode)] = nff;
6272     Dif (nfcf == op_illg_1)
6273     abort();
6274     nfcpufunctbl[cft_map(opcode)] = nfcf;
6275     }
6276     prop[cft_map(opcode)].set_flags = table68k[opcode].flagdead;
6277     prop[cft_map(opcode)].use_flags = table68k[opcode].flaglive;
6278 gbeauche 1.33 /* Unconditional jumps don't evaluate condition codes, so they
6279     * don't actually use any flags themselves */
6280     if (prop[cft_map(opcode)].cflow & fl_const_jump)
6281     prop[cft_map(opcode)].use_flags = 0;
6282 gbeauche 1.1 }
6283     for (i = 0; nfctbl[i].handler != NULL; i++) {
6284     if (nfctbl[i].specific)
6285     nfcpufunctbl[cft_map(tbl[i].opcode)] = nfctbl[i].handler;
6286     }
6287 gbeauche 1.21
6288     /* Merge in blacklist */
6289     if (!merge_blacklist())
6290     write_log("<JIT compiler> : blacklist merge failure!\n");
6291 gbeauche 1.1
6292     count=0;
6293     for (opcode = 0; opcode < 65536; opcode++) {
6294     if (compfunctbl[cft_map(opcode)])
6295     count++;
6296     }
6297     write_log("<JIT compiler> : supposedly %d compileable opcodes!\n",count);
6298    
6299     /* Initialise state */
6300     create_popalls();
6301     alloc_cache();
6302     reset_lists();
6303    
6304     for (i=0;i<TAGSIZE;i+=2) {
6305     cache_tags[i].handler=(cpuop_func *)popall_execute_normal;
6306     cache_tags[i+1].bi=NULL;
6307     }
6308    
6309     #if 0
6310     for (i=0;i<N_REGS;i++) {
6311     empty_ss.nat[i].holds=-1;
6312     empty_ss.nat[i].validsize=0;
6313     empty_ss.nat[i].dirtysize=0;
6314     }
6315     #endif
6316     for (i=0;i<VREGS;i++) {
6317     empty_ss.virt[i]=L_NEEDED;
6318     }
6319     for (i=0;i<N_REGS;i++) {
6320     empty_ss.nat[i]=L_UNKNOWN;
6321     }
6322     default_ss=empty_ss;
6323     }
6324    
6325    
6326     static void flush_icache_none(int n)
6327     {
6328     /* Nothing to do. */
6329     }
6330    
6331     static void flush_icache_hard(int n)
6332     {
6333     uae_u32 i;
6334     blockinfo* bi, *dbi;
6335    
6336     hard_flush_count++;
6337     #if 0
6338     write_log("Flush Icache_hard(%d/%x/%p), %u KB\n",
6339     n,regs.pc,regs.pc_p,current_cache_size/1024);
6340     current_cache_size = 0;
6341     #endif
6342     bi=active;
6343     while(bi) {
6344     cache_tags[cacheline(bi->pc_p)].handler=(cpuop_func *)popall_execute_normal;
6345     cache_tags[cacheline(bi->pc_p)+1].bi=NULL;
6346     dbi=bi; bi=bi->next;
6347     free_blockinfo(dbi);
6348     }
6349     bi=dormant;
6350     while(bi) {
6351     cache_tags[cacheline(bi->pc_p)].handler=(cpuop_func *)popall_execute_normal;
6352     cache_tags[cacheline(bi->pc_p)+1].bi=NULL;
6353     dbi=bi; bi=bi->next;
6354     free_blockinfo(dbi);
6355     }
6356    
6357     reset_lists();
6358     if (!compiled_code)
6359     return;
6360     current_compile_p=compiled_code;
6361     SPCFLAGS_SET( SPCFLAG_JIT_EXEC_RETURN ); /* To get out of compiled code */
6362     }
6363    
6364    
6365     /* "Soft flushing" --- instead of actually throwing everything away,
6366     we simply mark everything as "needs to be checked".
6367     */
6368    
6369     static inline void flush_icache_lazy(int n)
6370     {
6371     uae_u32 i;
6372     blockinfo* bi;
6373     blockinfo* bi2;
6374    
6375     soft_flush_count++;
6376     if (!active)
6377     return;
6378    
6379     bi=active;
6380     while (bi) {
6381     uae_u32 cl=cacheline(bi->pc_p);
6382     if (bi->status==BI_INVALID ||
6383     bi->status==BI_NEED_RECOMP) {
6384     if (bi==cache_tags[cl+1].bi)
6385     cache_tags[cl].handler=(cpuop_func *)popall_execute_normal;
6386     bi->handler_to_use=(cpuop_func *)popall_execute_normal;
6387     set_dhtu(bi,bi->direct_pen);
6388     bi->status=BI_INVALID;
6389     }
6390     else {
6391     if (bi==cache_tags[cl+1].bi)
6392     cache_tags[cl].handler=(cpuop_func *)popall_check_checksum;
6393     bi->handler_to_use=(cpuop_func *)popall_check_checksum;
6394     set_dhtu(bi,bi->direct_pcc);
6395     bi->status=BI_NEED_CHECK;
6396     }
6397     bi2=bi;
6398     bi=bi->next;
6399     }
6400     /* bi2 is now the last entry in the active list */
6401     bi2->next=dormant;
6402     if (dormant)
6403     dormant->prev_p=&(bi2->next);
6404    
6405     dormant=active;
6406     active->prev_p=&dormant;
6407     active=NULL;
6408 gbeauche 1.22 }
6409    
6410 gbeauche 1.38 void flush_icache_range(uae_u8 *start_p, uae_u32 length)
6411 gbeauche 1.22 {
6412     if (!active)
6413     return;
6414    
6415     #if LAZY_FLUSH_ICACHE_RANGE
6416     blockinfo *bi = active;
6417     while (bi) {
6418     #if USE_CHECKSUM_INFO
6419 gbeauche 1.38 bool candidate = false;
6420     for (checksum_info *csi = bi->csi; csi; csi = csi->next) {
6421     if (((start_p - csi->start_p) < csi->length) ||
6422     ((csi->start_p - start_p) < length)) {
6423     candidate = true;
6424     break;
6425     }
6426     }
6427 gbeauche 1.22 #else
6428     // Assume system is consistent and would invalidate the right range
6429 gbeauche 1.38 const bool candidate = (bi->pc_p - start_p) < length;
6430 gbeauche 1.22 #endif
6431 gbeauche 1.38 blockinfo *dbi = bi;
6432     bi = bi->next;
6433     if (candidate) {
6434     uae_u32 cl = cacheline(dbi->pc_p);
6435     if (dbi->status == BI_INVALID || dbi->status == BI_NEED_RECOMP) {
6436     if (dbi == cache_tags[cl+1].bi)
6437 gbeauche 1.22 cache_tags[cl].handler = (cpuop_func *)popall_execute_normal;
6438 gbeauche 1.38 dbi->handler_to_use = (cpuop_func *)popall_execute_normal;
6439     set_dhtu(dbi, dbi->direct_pen);
6440     dbi->status = BI_INVALID;
6441     }
6442     else {
6443     if (dbi == cache_tags[cl+1].bi)
6444     cache_tags[cl].handler = (cpuop_func *)popall_check_checksum;
6445     dbi->handler_to_use = (cpuop_func *)popall_check_checksum;
6446     set_dhtu(dbi, dbi->direct_pcc);
6447     dbi->status = BI_NEED_CHECK;
6448     }
6449     remove_from_list(dbi);
6450     add_to_dormant(dbi);
6451 gbeauche 1.22 }
6452     }
6453     return;
6454     #endif
6455     flush_icache(-1);
6456 gbeauche 1.1 }
6457    
6458     static void catastrophe(void)
6459     {
6460     abort();
6461     }
6462    
6463     int failure;
6464    
6465     #define TARGET_M68K 0
6466     #define TARGET_POWERPC 1
6467     #define TARGET_X86 2
6468 gbeauche 1.24 #define TARGET_X86_64 3
6469 gbeauche 1.1 #if defined(i386) || defined(__i386__)
6470     #define TARGET_NATIVE TARGET_X86
6471     #endif
6472     #if defined(powerpc) || defined(__powerpc__)
6473     #define TARGET_NATIVE TARGET_POWERPC
6474     #endif
6475 gbeauche 1.24 #if defined(x86_64) || defined(__x86_64__)
6476     #define TARGET_NATIVE TARGET_X86_64
6477     #endif
6478 gbeauche 1.1
6479     #ifdef ENABLE_MON
6480 gbeauche 1.24 static uae_u32 mon_read_byte_jit(uintptr addr)
6481 gbeauche 1.1 {
6482     uae_u8 *m = (uae_u8 *)addr;
6483 gbeauche 1.24 return (uintptr)(*m);
6484 gbeauche 1.1 }
6485    
6486 gbeauche 1.24 static void mon_write_byte_jit(uintptr addr, uae_u32 b)
6487 gbeauche 1.1 {
6488     uae_u8 *m = (uae_u8 *)addr;
6489     *m = b;
6490     }
6491     #endif
6492    
6493     void disasm_block(int target, uint8 * start, size_t length)
6494     {
6495     if (!JITDebug)
6496     return;
6497    
6498     #if defined(JIT_DEBUG) && defined(ENABLE_MON)
6499     char disasm_str[200];
6500     sprintf(disasm_str, "%s $%x $%x",
6501     target == TARGET_M68K ? "d68" :
6502     target == TARGET_X86 ? "d86" :
6503 gbeauche 1.24 target == TARGET_X86_64 ? "d8664" :
6504 gbeauche 1.1 target == TARGET_POWERPC ? "d" : "x",
6505     start, start + length - 1);
6506    
6507 gbeauche 1.24 uae_u32 (*old_mon_read_byte)(uintptr) = mon_read_byte;
6508     void (*old_mon_write_byte)(uintptr, uae_u32) = mon_write_byte;
6509 gbeauche 1.1
6510     mon_read_byte = mon_read_byte_jit;
6511     mon_write_byte = mon_write_byte_jit;
6512    
6513     char *arg[5] = {"mon", "-m", "-r", disasm_str, NULL};
6514     mon(4, arg);
6515    
6516     mon_read_byte = old_mon_read_byte;
6517     mon_write_byte = old_mon_write_byte;
6518     #endif
6519     }
6520    
6521 gbeauche 1.24 static void disasm_native_block(uint8 *start, size_t length)
6522 gbeauche 1.1 {
6523     disasm_block(TARGET_NATIVE, start, length);
6524     }
6525    
6526 gbeauche 1.24 static void disasm_m68k_block(uint8 *start, size_t length)
6527 gbeauche 1.1 {
6528     disasm_block(TARGET_M68K, start, length);
6529     }
6530    
6531     #ifdef HAVE_GET_WORD_UNSWAPPED
6532     # define DO_GET_OPCODE(a) (do_get_mem_word_unswapped((uae_u16 *)(a)))
6533     #else
6534     # define DO_GET_OPCODE(a) (do_get_mem_word((uae_u16 *)(a)))
6535     #endif
6536    
6537     #if JIT_DEBUG
6538     static uae_u8 *last_regs_pc_p = 0;
6539     static uae_u8 *last_compiled_block_addr = 0;
6540    
6541     void compiler_dumpstate(void)
6542     {
6543     if (!JITDebug)
6544     return;
6545    
6546     write_log("### Host addresses\n");
6547     write_log("MEM_BASE : %x\n", MEMBaseDiff);
6548     write_log("PC_P : %p\n", &regs.pc_p);
6549     write_log("SPCFLAGS : %p\n", &regs.spcflags);
6550     write_log("D0-D7 : %p-%p\n", &regs.regs[0], &regs.regs[7]);
6551     write_log("A0-A7 : %p-%p\n", &regs.regs[8], &regs.regs[15]);
6552     write_log("\n");
6553    
6554     write_log("### M68k processor state\n");
6555     m68k_dumpstate(0);
6556     write_log("\n");
6557    
6558     write_log("### Block in Mac address space\n");
6559     write_log("M68K block : %p\n",
6560 gbeauche 1.24 (void *)(uintptr)get_virtual_address(last_regs_pc_p));
6561 gbeauche 1.1 write_log("Native block : %p (%d bytes)\n",
6562 gbeauche 1.24 (void *)(uintptr)get_virtual_address(last_compiled_block_addr),
6563 gbeauche 1.1 get_blockinfo_addr(last_regs_pc_p)->direct_handler_size);
6564     write_log("\n");
6565     }
6566     #endif
6567    
6568     static void compile_block(cpu_history* pc_hist, int blocklen)
6569     {
6570     if (letit && compiled_code) {
6571     #if PROFILE_COMPILE_TIME
6572     compile_count++;
6573     clock_t start_time = clock();
6574     #endif
6575     #if JIT_DEBUG
6576     bool disasm_block = false;
6577     #endif
6578    
6579     /* OK, here we need to 'compile' a block */
6580     int i;
6581     int r;
6582     int was_comp=0;
6583     uae_u8 liveflags[MAXRUN+1];
6584 gbeauche 1.8 #if USE_CHECKSUM_INFO
6585     bool trace_in_rom = isinrom((uintptr)pc_hist[0].location);
6586 gbeauche 1.24 uintptr max_pcp=(uintptr)pc_hist[blocklen - 1].location;
6587     uintptr min_pcp=max_pcp;
6588 gbeauche 1.8 #else
6589 gbeauche 1.24 uintptr max_pcp=(uintptr)pc_hist[0].location;
6590     uintptr min_pcp=max_pcp;
6591 gbeauche 1.8 #endif
6592 gbeauche 1.1 uae_u32 cl=cacheline(pc_hist[0].location);
6593     void* specflags=(void*)&regs.spcflags;
6594     blockinfo* bi=NULL;
6595     blockinfo* bi2;
6596     int extra_len=0;
6597    
6598     redo_current_block=0;
6599     if (current_compile_p>=max_compile_start)
6600     flush_icache_hard(7);
6601    
6602     alloc_blockinfos();
6603    
6604     bi=get_blockinfo_addr_new(pc_hist[0].location,0);
6605     bi2=get_blockinfo(cl);
6606    
6607     optlev=bi->optlevel;
6608     if (bi->status!=BI_INVALID) {
6609     Dif (bi!=bi2) {
6610     /* I don't think it can happen anymore. Shouldn't, in
6611     any case. So let's make sure... */
6612     write_log("WOOOWOO count=%d, ol=%d %p %p\n",
6613     bi->count,bi->optlevel,bi->handler_to_use,
6614     cache_tags[cl].handler);
6615     abort();
6616     }
6617    
6618     Dif (bi->count!=-1 && bi->status!=BI_NEED_RECOMP) {
6619     write_log("bi->count=%d, bi->status=%d\n",bi->count,bi->status);
6620     /* What the heck? We are not supposed to be here! */
6621     abort();
6622     }
6623     }
6624     if (bi->count==-1) {
6625     optlev++;
6626     while (!optcount[optlev])
6627     optlev++;
6628     bi->count=optcount[optlev]-1;
6629     }
6630 gbeauche 1.24 current_block_pc_p=(uintptr)pc_hist[0].location;
6631 gbeauche 1.1
6632     remove_deps(bi); /* We are about to create new code */
6633     bi->optlevel=optlev;
6634     bi->pc_p=(uae_u8*)pc_hist[0].location;
6635 gbeauche 1.8 #if USE_CHECKSUM_INFO
6636     free_checksum_info_chain(bi->csi);
6637     bi->csi = NULL;
6638     #endif
6639 gbeauche 1.1
6640     liveflags[blocklen]=0x1f; /* All flags needed afterwards */
6641     i=blocklen;
6642     while (i--) {
6643     uae_u16* currpcp=pc_hist[i].location;
6644     uae_u32 op=DO_GET_OPCODE(currpcp);
6645    
6646 gbeauche 1.8 #if USE_CHECKSUM_INFO
6647     trace_in_rom = trace_in_rom && isinrom((uintptr)currpcp);
6648 gbeauche 1.33 if (follow_const_jumps && is_const_jump(op)) {
6649 gbeauche 1.8 checksum_info *csi = alloc_checksum_info();
6650     csi->start_p = (uae_u8 *)min_pcp;
6651     csi->length = max_pcp - min_pcp + LONGEST_68K_INST;
6652     csi->next = bi->csi;
6653     bi->csi = csi;
6654 gbeauche 1.24 max_pcp = (uintptr)currpcp;
6655 gbeauche 1.8 }
6656 gbeauche 1.24 min_pcp = (uintptr)currpcp;
6657 gbeauche 1.8 #else
6658 gbeauche 1.24 if ((uintptr)currpcp<min_pcp)
6659     min_pcp=(uintptr)currpcp;
6660     if ((uintptr)currpcp>max_pcp)
6661     max_pcp=(uintptr)currpcp;
6662 gbeauche 1.8 #endif
6663 gbeauche 1.1
6664     liveflags[i]=((liveflags[i+1]&
6665     (~prop[op].set_flags))|
6666     prop[op].use_flags);
6667     if (prop[op].is_addx && (liveflags[i+1]&FLAG_Z)==0)
6668     liveflags[i]&= ~FLAG_Z;
6669     }
6670    
6671 gbeauche 1.8 #if USE_CHECKSUM_INFO
6672     checksum_info *csi = alloc_checksum_info();
6673     csi->start_p = (uae_u8 *)min_pcp;
6674     csi->length = max_pcp - min_pcp + LONGEST_68K_INST;
6675     csi->next = bi->csi;
6676     bi->csi = csi;
6677     #endif
6678    
6679 gbeauche 1.1 bi->needed_flags=liveflags[0];
6680    
6681 gbeauche 1.5 align_target(align_loops);
6682 gbeauche 1.1 was_comp=0;
6683    
6684     bi->direct_handler=(cpuop_func *)get_target();
6685     set_dhtu(bi,bi->direct_handler);
6686     bi->status=BI_COMPILING;
6687 gbeauche 1.24 current_block_start_target=(uintptr)get_target();
6688 gbeauche 1.1
6689     log_startblock();
6690    
6691     if (bi->count>=0) { /* Need to generate countdown code */
6692 gbeauche 1.24 raw_mov_l_mi((uintptr)&regs.pc_p,(uintptr)pc_hist[0].location);
6693     raw_sub_l_mi((uintptr)&(bi->count),1);
6694     raw_jl((uintptr)popall_recompile_block);
6695 gbeauche 1.1 }
6696     if (optlev==0) { /* No need to actually translate */
6697     /* Execute normally without keeping stats */
6698 gbeauche 1.24 raw_mov_l_mi((uintptr)&regs.pc_p,(uintptr)pc_hist[0].location);
6699     raw_jmp((uintptr)popall_exec_nostats);
6700 gbeauche 1.1 }
6701     else {
6702     reg_alloc_run=0;
6703     next_pc_p=0;
6704     taken_pc_p=0;
6705     branch_cc=0;
6706    
6707     comp_pc_p=(uae_u8*)pc_hist[0].location;
6708     init_comp();
6709     was_comp=1;
6710    
6711 gbeauche 1.34 #ifdef USE_CPU_EMUL_SERVICES
6712     raw_sub_l_mi((uintptr)&emulated_ticks,blocklen);
6713     raw_jcc_b_oponly(NATIVE_CC_GT);
6714     uae_s8 *branchadd=(uae_s8*)get_target();
6715     emit_byte(0);
6716     raw_call((uintptr)cpu_do_check_ticks);
6717     *branchadd=(uintptr)get_target()-((uintptr)branchadd+1);
6718     #endif
6719    
6720 gbeauche 1.1 #if JIT_DEBUG
6721     if (JITDebug) {
6722 gbeauche 1.24 raw_mov_l_mi((uintptr)&last_regs_pc_p,(uintptr)pc_hist[0].location);
6723     raw_mov_l_mi((uintptr)&last_compiled_block_addr,current_block_start_target);
6724 gbeauche 1.1 }
6725     #endif
6726    
6727     for (i=0;i<blocklen &&
6728     get_target_noopt()<max_compile_start;i++) {
6729     cpuop_func **cputbl;
6730     compop_func **comptbl;
6731     uae_u32 opcode=DO_GET_OPCODE(pc_hist[i].location);
6732     needed_flags=(liveflags[i+1] & prop[opcode].set_flags);
6733     if (!needed_flags) {
6734     cputbl=nfcpufunctbl;
6735     comptbl=nfcompfunctbl;
6736     }
6737     else {
6738     cputbl=cpufunctbl;
6739     comptbl=compfunctbl;
6740     }
6741 gbeauche 1.32
6742     #if FLIGHT_RECORDER
6743     {
6744     mov_l_ri(S1, get_virtual_address((uae_u8 *)(pc_hist[i].location)) | 1);
6745     clobber_flags();
6746     remove_all_offsets();
6747     int arg = readreg_specific(S1,4,REG_PAR1);
6748     prepare_for_call_1();
6749     unlock2(arg);
6750     prepare_for_call_2();
6751     raw_call((uintptr)m68k_record_step);
6752     }
6753     #endif
6754 gbeauche 1.1
6755     failure = 1; // gb-- defaults to failure state
6756     if (comptbl[opcode] && optlev>1) {
6757     failure=0;
6758     if (!was_comp) {
6759     comp_pc_p=(uae_u8*)pc_hist[i].location;
6760     init_comp();
6761     }
6762 gbeauche 1.18 was_comp=1;
6763 gbeauche 1.1
6764     comptbl[opcode](opcode);
6765     freescratch();
6766     if (!(liveflags[i+1] & FLAG_CZNV)) {
6767     /* We can forget about flags */
6768     dont_care_flags();
6769     }
6770     #if INDIVIDUAL_INST
6771     flush(1);
6772     nop();
6773     flush(1);
6774     was_comp=0;
6775     #endif
6776     }
6777    
6778     if (failure) {
6779     if (was_comp) {
6780     flush(1);
6781     was_comp=0;
6782     }
6783     raw_mov_l_ri(REG_PAR1,(uae_u32)opcode);
6784     #if USE_NORMAL_CALLING_CONVENTION
6785     raw_push_l_r(REG_PAR1);
6786     #endif
6787 gbeauche 1.24 raw_mov_l_mi((uintptr)&regs.pc_p,
6788     (uintptr)pc_hist[i].location);
6789     raw_call((uintptr)cputbl[opcode]);
6790 gbeauche 1.9 #if PROFILE_UNTRANSLATED_INSNS
6791     // raw_cputbl_count[] is indexed with plain opcode (in m68k order)
6792 gbeauche 1.24 raw_add_l_mi((uintptr)&raw_cputbl_count[cft_map(opcode)],1);
6793 gbeauche 1.9 #endif
6794 gbeauche 1.1 #if USE_NORMAL_CALLING_CONVENTION
6795     raw_inc_sp(4);
6796     #endif
6797    
6798     if (i < blocklen - 1) {
6799     uae_s8* branchadd;
6800    
6801 gbeauche 1.24 raw_mov_l_rm(0,(uintptr)specflags);
6802 gbeauche 1.1 raw_test_l_rr(0,0);
6803     raw_jz_b_oponly();
6804     branchadd=(uae_s8 *)get_target();
6805     emit_byte(0);
6806 gbeauche 1.24 raw_jmp((uintptr)popall_do_nothing);
6807     *branchadd=(uintptr)get_target()-(uintptr)branchadd-1;
6808 gbeauche 1.1 }
6809     }
6810     }
6811     #if 1 /* This isn't completely kosher yet; It really needs to be
6812     be integrated into a general inter-block-dependency scheme */
6813     if (next_pc_p && taken_pc_p &&
6814     was_comp && taken_pc_p==current_block_pc_p) {
6815     blockinfo* bi1=get_blockinfo_addr_new((void*)next_pc_p,0);
6816     blockinfo* bi2=get_blockinfo_addr_new((void*)taken_pc_p,0);
6817     uae_u8 x=bi1->needed_flags;
6818    
6819     if (x==0xff || 1) { /* To be on the safe side */
6820     uae_u16* next=(uae_u16*)next_pc_p;
6821     uae_u32 op=DO_GET_OPCODE(next);
6822    
6823     x=0x1f;
6824     x&=(~prop[op].set_flags);
6825     x|=prop[op].use_flags;
6826     }
6827    
6828     x|=bi2->needed_flags;
6829     if (!(x & FLAG_CZNV)) {
6830     /* We can forget about flags */
6831     dont_care_flags();
6832     extra_len+=2; /* The next instruction now is part of this
6833     block */
6834     }
6835    
6836     }
6837     #endif
6838     log_flush();
6839    
6840     if (next_pc_p) { /* A branch was registered */
6841 gbeauche 1.24 uintptr t1=next_pc_p;
6842     uintptr t2=taken_pc_p;
6843 gbeauche 1.1 int cc=branch_cc;
6844    
6845     uae_u32* branchadd;
6846     uae_u32* tba;
6847     bigstate tmp;
6848     blockinfo* tbi;
6849    
6850     if (taken_pc_p<next_pc_p) {
6851     /* backward branch. Optimize for the "taken" case ---
6852     which means the raw_jcc should fall through when
6853     the 68k branch is taken. */
6854     t1=taken_pc_p;
6855     t2=next_pc_p;
6856     cc=branch_cc^1;
6857     }
6858    
6859     tmp=live; /* ouch! This is big... */
6860     raw_jcc_l_oponly(cc);
6861     branchadd=(uae_u32*)get_target();
6862     emit_long(0);
6863    
6864     /* predicted outcome */
6865     tbi=get_blockinfo_addr_new((void*)t1,1);
6866     match_states(tbi);
6867 gbeauche 1.24 raw_cmp_l_mi((uintptr)specflags,0);
6868 gbeauche 1.1 raw_jcc_l_oponly(4);
6869     tba=(uae_u32*)get_target();
6870 gbeauche 1.24 emit_long(get_handler(t1)-((uintptr)tba+4));
6871     raw_mov_l_mi((uintptr)&regs.pc_p,t1);
6872 gbeauche 1.28 flush_reg_count();
6873 gbeauche 1.24 raw_jmp((uintptr)popall_do_nothing);
6874 gbeauche 1.1 create_jmpdep(bi,0,tba,t1);
6875    
6876 gbeauche 1.5 align_target(align_jumps);
6877 gbeauche 1.1 /* not-predicted outcome */
6878 gbeauche 1.24 *branchadd=(uintptr)get_target()-((uintptr)branchadd+4);
6879 gbeauche 1.1 live=tmp; /* Ouch again */
6880     tbi=get_blockinfo_addr_new((void*)t2,1);
6881     match_states(tbi);
6882    
6883     //flush(1); /* Can only get here if was_comp==1 */
6884 gbeauche 1.24 raw_cmp_l_mi((uintptr)specflags,0);
6885 gbeauche 1.1 raw_jcc_l_oponly(4);
6886     tba=(uae_u32*)get_target();
6887 gbeauche 1.24 emit_long(get_handler(t2)-((uintptr)tba+4));
6888     raw_mov_l_mi((uintptr)&regs.pc_p,t2);
6889 gbeauche 1.28 flush_reg_count();
6890 gbeauche 1.24 raw_jmp((uintptr)popall_do_nothing);
6891 gbeauche 1.1 create_jmpdep(bi,1,tba,t2);
6892     }
6893     else
6894     {
6895     if (was_comp) {
6896     flush(1);
6897     }
6898 gbeauche 1.28 flush_reg_count();
6899 gbeauche 1.1
6900     /* Let's find out where next_handler is... */
6901     if (was_comp && isinreg(PC_P)) {
6902     r=live.state[PC_P].realreg;
6903     raw_and_l_ri(r,TAGMASK);
6904     int r2 = (r==0) ? 1 : 0;
6905 gbeauche 1.24 raw_mov_l_ri(r2,(uintptr)popall_do_nothing);
6906     raw_cmp_l_mi((uintptr)specflags,0);
6907 gbeauche 1.27 raw_cmov_l_rm_indexed(r2,(uintptr)cache_tags,r,SIZEOF_VOID_P,NATIVE_CC_EQ);
6908 gbeauche 1.1 raw_jmp_r(r2);
6909     }
6910     else if (was_comp && isconst(PC_P)) {
6911     uae_u32 v=live.state[PC_P].val;
6912     uae_u32* tba;
6913     blockinfo* tbi;
6914    
6915 gbeauche 1.24 tbi=get_blockinfo_addr_new((void*)(uintptr)v,1);
6916 gbeauche 1.1 match_states(tbi);
6917    
6918 gbeauche 1.24 raw_cmp_l_mi((uintptr)specflags,0);
6919 gbeauche 1.1 raw_jcc_l_oponly(4);
6920     tba=(uae_u32*)get_target();
6921 gbeauche 1.24 emit_long(get_handler(v)-((uintptr)tba+4));
6922     raw_mov_l_mi((uintptr)&regs.pc_p,v);
6923     raw_jmp((uintptr)popall_do_nothing);
6924 gbeauche 1.1 create_jmpdep(bi,0,tba,v);
6925     }
6926     else {
6927     r=REG_PC_TMP;
6928 gbeauche 1.24 raw_mov_l_rm(r,(uintptr)&regs.pc_p);
6929 gbeauche 1.1 raw_and_l_ri(r,TAGMASK);
6930     int r2 = (r==0) ? 1 : 0;
6931 gbeauche 1.24 raw_mov_l_ri(r2,(uintptr)popall_do_nothing);
6932     raw_cmp_l_mi((uintptr)specflags,0);
6933 gbeauche 1.27 raw_cmov_l_rm_indexed(r2,(uintptr)cache_tags,r,SIZEOF_VOID_P,NATIVE_CC_EQ);
6934 gbeauche 1.1 raw_jmp_r(r2);
6935     }
6936     }
6937     }
6938    
6939     #if USE_MATCH
6940     if (callers_need_recompile(&live,&(bi->env))) {
6941     mark_callers_recompile(bi);
6942     }
6943    
6944     big_to_small_state(&live,&(bi->env));
6945     #endif
6946    
6947 gbeauche 1.8 #if USE_CHECKSUM_INFO
6948     remove_from_list(bi);
6949     if (trace_in_rom) {
6950     // No need to checksum that block trace on cache invalidation
6951     free_checksum_info_chain(bi->csi);
6952     bi->csi = NULL;
6953     add_to_dormant(bi);
6954     }
6955     else {
6956     calc_checksum(bi,&(bi->c1),&(bi->c2));
6957     add_to_active(bi);
6958     }
6959     #else
6960 gbeauche 1.1 if (next_pc_p+extra_len>=max_pcp &&
6961     next_pc_p+extra_len<max_pcp+LONGEST_68K_INST)
6962     max_pcp=next_pc_p+extra_len; /* extra_len covers flags magic */
6963     else
6964     max_pcp+=LONGEST_68K_INST;
6965 gbeauche 1.7
6966 gbeauche 1.1 bi->len=max_pcp-min_pcp;
6967     bi->min_pcp=min_pcp;
6968 gbeauche 1.7
6969 gbeauche 1.1 remove_from_list(bi);
6970     if (isinrom(min_pcp) && isinrom(max_pcp)) {
6971     add_to_dormant(bi); /* No need to checksum it on cache flush.
6972     Please don't start changing ROMs in
6973     flight! */
6974     }
6975     else {
6976     calc_checksum(bi,&(bi->c1),&(bi->c2));
6977     add_to_active(bi);
6978     }
6979 gbeauche 1.8 #endif
6980 gbeauche 1.1
6981     current_cache_size += get_target() - (uae_u8 *)current_compile_p;
6982    
6983     #if JIT_DEBUG
6984     if (JITDebug)
6985     bi->direct_handler_size = get_target() - (uae_u8 *)current_block_start_target;
6986    
6987     if (JITDebug && disasm_block) {
6988     uaecptr block_addr = start_pc + ((char *)pc_hist[0].location - (char *)start_pc_p);
6989     D(bug("M68K block @ 0x%08x (%d insns)\n", block_addr, blocklen));
6990     uae_u32 block_size = ((uae_u8 *)pc_hist[blocklen - 1].location - (uae_u8 *)pc_hist[0].location) + 1;
6991     disasm_m68k_block((uae_u8 *)pc_hist[0].location, block_size);
6992     D(bug("Compiled block @ 0x%08x\n", pc_hist[0].location));
6993     disasm_native_block((uae_u8 *)current_block_start_target, bi->direct_handler_size);
6994     getchar();
6995     }
6996     #endif
6997    
6998     log_dump();
6999 gbeauche 1.5 align_target(align_jumps);
7000 gbeauche 1.1
7001     /* This is the non-direct handler */
7002     bi->handler=
7003     bi->handler_to_use=(cpuop_func *)get_target();
7004 gbeauche 1.24 raw_cmp_l_mi((uintptr)&regs.pc_p,(uintptr)pc_hist[0].location);
7005     raw_jnz((uintptr)popall_cache_miss);
7006 gbeauche 1.1 comp_pc_p=(uae_u8*)pc_hist[0].location;
7007    
7008     bi->status=BI_FINALIZING;
7009     init_comp();
7010     match_states(bi);
7011     flush(1);
7012    
7013 gbeauche 1.24 raw_jmp((uintptr)bi->direct_handler);
7014 gbeauche 1.1
7015     current_compile_p=get_target();
7016     raise_in_cl_list(bi);
7017    
7018     /* We will flush soon, anyway, so let's do it now */
7019     if (current_compile_p>=max_compile_start)
7020     flush_icache_hard(7);
7021    
7022     bi->status=BI_ACTIVE;
7023     if (redo_current_block)
7024     block_need_recompile(bi);
7025    
7026     #if PROFILE_COMPILE_TIME
7027     compile_time += (clock() - start_time);
7028     #endif
7029     }
7030 gbeauche 1.34
7031     /* Account for compilation time */
7032     cpu_do_check_ticks();
7033 gbeauche 1.1 }
7034    
7035     void do_nothing(void)
7036     {
7037     /* What did you expect this to do? */
7038     }
7039    
7040     void exec_nostats(void)
7041     {
7042     for (;;) {
7043     uae_u32 opcode = GET_OPCODE;
7044 gbeauche 1.32 #if FLIGHT_RECORDER
7045     m68k_record_step(m68k_getpc());
7046     #endif
7047 gbeauche 1.1 (*cpufunctbl[opcode])(opcode);
7048 gbeauche 1.34 cpu_check_ticks();
7049 gbeauche 1.1 if (end_block(opcode) || SPCFLAGS_TEST(SPCFLAG_ALL)) {
7050     return; /* We will deal with the spcflags in the caller */
7051     }
7052     }
7053     }
7054    
7055     void execute_normal(void)
7056     {
7057     if (!check_for_cache_miss()) {
7058     cpu_history pc_hist[MAXRUN];
7059     int blocklen = 0;
7060     #if REAL_ADDRESSING || DIRECT_ADDRESSING
7061     start_pc_p = regs.pc_p;
7062     start_pc = get_virtual_address(regs.pc_p);
7063     #else
7064     start_pc_p = regs.pc_oldp;
7065     start_pc = regs.pc;
7066     #endif
7067     for (;;) { /* Take note: This is the do-it-normal loop */
7068     pc_hist[blocklen++].location = (uae_u16 *)regs.pc_p;
7069     uae_u32 opcode = GET_OPCODE;
7070     #if FLIGHT_RECORDER
7071     m68k_record_step(m68k_getpc());
7072     #endif
7073     (*cpufunctbl[opcode])(opcode);
7074 gbeauche 1.34 cpu_check_ticks();
7075 gbeauche 1.1 if (end_block(opcode) || SPCFLAGS_TEST(SPCFLAG_ALL) || blocklen>=MAXRUN) {
7076     compile_block(pc_hist, blocklen);
7077     return; /* We will deal with the spcflags in the caller */
7078     }
7079     /* No need to check regs.spcflags, because if they were set,
7080     we'd have ended up inside that "if" */
7081     }
7082     }
7083     }
7084    
7085     typedef void (*compiled_handler)(void);
7086    
7087 gbeauche 1.36 static void m68k_do_compile_execute(void)
7088 gbeauche 1.1 {
7089     for (;;) {
7090     ((compiled_handler)(pushall_call_handler))();
7091     /* Whenever we return from that, we should check spcflags */
7092     if (SPCFLAGS_TEST(SPCFLAG_ALL)) {
7093     if (m68k_do_specialties ())
7094     return;
7095     }
7096     }
7097     }
7098 gbeauche 1.35
7099     void m68k_compile_execute (void)
7100     {
7101     for (;;) {
7102     if (quit_program)
7103     break;
7104     m68k_do_compile_execute();
7105     }
7106     }