ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/compiler/compemu_support.cpp
Revision: 1.43
Committed: 2008-01-01T09:40:35Z (16 years, 9 months ago) by gbeauche
Branch: MAIN
CVS Tags: HEAD
Changes since 1.42: +1 -1 lines
Log Message:
Happy New Year!

File Contents

# User Rev Content
1 gbeauche 1.11 /*
2     * compiler/compemu_support.cpp - Core dynamic translation engine
3     *
4     * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
5     *
6 gbeauche 1.29 * Adaptation for Basilisk II and improvements, copyright 2000-2005
7 gbeauche 1.11 * Gwenole Beauchesne
8     *
9 gbeauche 1.43 * Basilisk II (C) 1997-2008 Christian Bauer
10 gbeauche 1.11 *
11     * This program is free software; you can redistribute it and/or modify
12     * it under the terms of the GNU General Public License as published by
13     * the Free Software Foundation; either version 2 of the License, or
14     * (at your option) any later version.
15     *
16     * This program is distributed in the hope that it will be useful,
17     * but WITHOUT ANY WARRANTY; without even the implied warranty of
18     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19     * GNU General Public License for more details.
20     *
21     * You should have received a copy of the GNU General Public License
22     * along with this program; if not, write to the Free Software
23     * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24     */
25    
26 gbeauche 1.1 #if !REAL_ADDRESSING && !DIRECT_ADDRESSING
27     #error "Only Real or Direct Addressing is supported with the JIT Compiler"
28     #endif
29    
30 gbeauche 1.4 #if X86_ASSEMBLY && !SAHF_SETO_PROFITABLE
31     #error "Only [LS]AHF scheme to [gs]et flags is supported with the JIT Compiler"
32     #endif
33    
34 gbeauche 1.24 /* NOTE: support for AMD64 assumes translation cache and other code
35     * buffers are allocated into a 32-bit address space because (i) B2/JIT
36     * code is not 64-bit clean and (ii) it's faster to resolve branches
37     * that way.
38     */
39     #if !defined(__i386__) && !defined(__x86_64__)
40     #error "Only IA-32 and X86-64 targets are supported with the JIT Compiler"
41     #endif
42    
43 gbeauche 1.1 #define USE_MATCH 0
44    
45     /* kludge for Brian, so he can compile under MSVC++ */
46     #define USE_NORMAL_CALLING_CONVENTION 0
47    
48     #ifndef WIN32
49 gbeauche 1.20 #include <unistd.h>
50 gbeauche 1.1 #include <sys/types.h>
51     #include <sys/mman.h>
52     #endif
53    
54     #include <stdlib.h>
55     #include <fcntl.h>
56     #include <errno.h>
57    
58     #include "sysdeps.h"
59     #include "cpu_emulation.h"
60     #include "main.h"
61     #include "prefs.h"
62     #include "user_strings.h"
63 gbeauche 1.2 #include "vm_alloc.h"
64 gbeauche 1.1
65     #include "m68k.h"
66     #include "memory.h"
67     #include "readcpu.h"
68     #include "newcpu.h"
69     #include "comptbl.h"
70     #include "compiler/compemu.h"
71     #include "fpu/fpu.h"
72     #include "fpu/flags.h"
73    
74     #define DEBUG 1
75     #include "debug.h"
76    
77     #ifdef ENABLE_MON
78     #include "mon.h"
79     #endif
80    
81     #ifndef WIN32
82 gbeauche 1.9 #define PROFILE_COMPILE_TIME 1
83     #define PROFILE_UNTRANSLATED_INSNS 1
84 gbeauche 1.1 #endif
85    
86 gbeauche 1.28 #if defined(__x86_64__) && 0
87     #define RECORD_REGISTER_USAGE 1
88     #endif
89    
90 gbeauche 1.1 #ifdef WIN32
91     #undef write_log
92     #define write_log dummy_write_log
93     static void dummy_write_log(const char *, ...) { }
94     #endif
95    
96     #if JIT_DEBUG
97     #undef abort
98     #define abort() do { \
99     fprintf(stderr, "Abort in file %s at line %d\n", __FILE__, __LINE__); \
100     exit(EXIT_FAILURE); \
101     } while (0)
102     #endif
103    
104 gbeauche 1.28 #if RECORD_REGISTER_USAGE
105     static uint64 reg_count[16];
106     static int reg_count_local[16];
107    
108     static int reg_count_compare(const void *ap, const void *bp)
109     {
110     const int a = *((int *)ap);
111     const int b = *((int *)bp);
112     return reg_count[b] - reg_count[a];
113     }
114     #endif
115    
116 gbeauche 1.1 #if PROFILE_COMPILE_TIME
117     #include <time.h>
118     static uae_u32 compile_count = 0;
119     static clock_t compile_time = 0;
120     static clock_t emul_start_time = 0;
121     static clock_t emul_end_time = 0;
122     #endif
123    
124 gbeauche 1.9 #if PROFILE_UNTRANSLATED_INSNS
125     const int untranslated_top_ten = 20;
126     static uae_u32 raw_cputbl_count[65536] = { 0, };
127     static uae_u16 opcode_nums[65536];
128    
129     static int untranslated_compfn(const void *e1, const void *e2)
130     {
131     return raw_cputbl_count[*(const uae_u16 *)e1] < raw_cputbl_count[*(const uae_u16 *)e2];
132     }
133     #endif
134    
135 gbeauche 1.24 static compop_func *compfunctbl[65536];
136     static compop_func *nfcompfunctbl[65536];
137     static cpuop_func *nfcpufunctbl[65536];
138 gbeauche 1.1 uae_u8* comp_pc_p;
139    
140 gbeauche 1.6 // From newcpu.cpp
141     extern bool quit_program;
142    
143 gbeauche 1.1 // gb-- Extra data for Basilisk II/JIT
144     #if JIT_DEBUG
145     static bool JITDebug = false; // Enable runtime disassemblers through mon?
146     #else
147     const bool JITDebug = false; // Don't use JIT debug mode at all
148     #endif
149 gbeauche 1.33 #if USE_INLINING
150     static bool follow_const_jumps = true; // Flag: translation through constant jumps
151     #else
152     const bool follow_const_jumps = false;
153     #endif
154 gbeauche 1.1
155 gbeauche 1.22 const uae_u32 MIN_CACHE_SIZE = 1024; // Minimal translation cache size (1 MB)
156 gbeauche 1.1 static uae_u32 cache_size = 0; // Size of total cache allocated for compiled blocks
157 gbeauche 1.3 static uae_u32 current_cache_size = 0; // Cache grows upwards: how much has been consumed already
158 gbeauche 1.1 static bool lazy_flush = true; // Flag: lazy translation cache invalidation
159     static bool avoid_fpu = true; // Flag: compile FPU instructions ?
160     static bool have_cmov = false; // target has CMOV instructions ?
161 gbeauche 1.30 static bool have_lahf_lm = true; // target has LAHF supported in long mode ?
162 gbeauche 1.1 static bool have_rat_stall = true; // target has partial register stalls ?
163 gbeauche 1.12 const bool tune_alignment = true; // Tune code alignments for running CPU ?
164     const bool tune_nop_fillers = true; // Tune no-op fillers for architecture
165 gbeauche 1.15 static bool setzflg_uses_bsf = false; // setzflg virtual instruction can use native BSF instruction correctly?
166 gbeauche 1.5 static int align_loops = 32; // Align the start of loops
167     static int align_jumps = 32; // Align the start of jumps
168 gbeauche 1.1 static int optcount[10] = {
169     10, // How often a block has to be executed before it is translated
170     0, // How often to use naive translation
171     0, 0, 0, 0,
172     -1, -1, -1, -1
173     };
174    
175     struct op_properties {
176     uae_u8 use_flags;
177     uae_u8 set_flags;
178     uae_u8 is_addx;
179     uae_u8 cflow;
180     };
181     static op_properties prop[65536];
182    
183     static inline int end_block(uae_u32 opcode)
184     {
185     return (prop[opcode].cflow & fl_end_block);
186     }
187    
188 gbeauche 1.8 static inline bool is_const_jump(uae_u32 opcode)
189     {
190     return (prop[opcode].cflow == fl_const_jump);
191     }
192    
193 gbeauche 1.18 static inline bool may_trap(uae_u32 opcode)
194     {
195     return (prop[opcode].cflow & fl_trap);
196     }
197    
198     static inline unsigned int cft_map (unsigned int f)
199     {
200     #ifndef HAVE_GET_WORD_UNSWAPPED
201     return f;
202     #else
203     return ((f >> 8) & 255) | ((f & 255) << 8);
204     #endif
205     }
206    
207 gbeauche 1.1 uae_u8* start_pc_p;
208     uae_u32 start_pc;
209     uae_u32 current_block_pc_p;
210 gbeauche 1.24 static uintptr current_block_start_target;
211 gbeauche 1.1 uae_u32 needed_flags;
212 gbeauche 1.24 static uintptr next_pc_p;
213     static uintptr taken_pc_p;
214 gbeauche 1.1 static int branch_cc;
215     static int redo_current_block;
216    
217     int segvcount=0;
218     int soft_flush_count=0;
219     int hard_flush_count=0;
220     int checksum_count=0;
221     static uae_u8* current_compile_p=NULL;
222     static uae_u8* max_compile_start;
223     static uae_u8* compiled_code=NULL;
224     static uae_s32 reg_alloc_run;
225 gbeauche 1.24 const int POPALLSPACE_SIZE = 1024; /* That should be enough space */
226     static uae_u8* popallspace=NULL;
227 gbeauche 1.1
228     void* pushall_call_handler=NULL;
229     static void* popall_do_nothing=NULL;
230     static void* popall_exec_nostats=NULL;
231     static void* popall_execute_normal=NULL;
232     static void* popall_cache_miss=NULL;
233     static void* popall_recompile_block=NULL;
234     static void* popall_check_checksum=NULL;
235    
236     /* The 68k only ever executes from even addresses. So right now, we
237     * waste half the entries in this array
238     * UPDATE: We now use those entries to store the start of the linked
239     * lists that we maintain for each hash result.
240     */
241     cacheline cache_tags[TAGSIZE];
242     int letit=0;
243     blockinfo* hold_bi[MAX_HOLD_BI];
244     blockinfo* active;
245     blockinfo* dormant;
246    
247     /* 68040 */
248     extern struct cputbl op_smalltbl_0_nf[];
249     extern struct comptbl op_smalltbl_0_comp_nf[];
250     extern struct comptbl op_smalltbl_0_comp_ff[];
251    
252     /* 68020 + 68881 */
253     extern struct cputbl op_smalltbl_1_nf[];
254    
255     /* 68020 */
256     extern struct cputbl op_smalltbl_2_nf[];
257    
258     /* 68010 */
259     extern struct cputbl op_smalltbl_3_nf[];
260    
261     /* 68000 */
262     extern struct cputbl op_smalltbl_4_nf[];
263    
264     /* 68000 slow but compatible. */
265     extern struct cputbl op_smalltbl_5_nf[];
266    
267     static void flush_icache_hard(int n);
268     static void flush_icache_lazy(int n);
269     static void flush_icache_none(int n);
270     void (*flush_icache)(int n) = flush_icache_none;
271    
272    
273    
274     bigstate live;
275     smallstate empty_ss;
276     smallstate default_ss;
277     static int optlev;
278    
279     static int writereg(int r, int size);
280     static void unlock2(int r);
281     static void setlock(int r);
282     static int readreg_specific(int r, int size, int spec);
283     static int writereg_specific(int r, int size, int spec);
284     static void prepare_for_call_1(void);
285     static void prepare_for_call_2(void);
286     static void align_target(uae_u32 a);
287    
288     static uae_s32 nextused[VREGS];
289    
290     uae_u32 m68k_pc_offset;
291    
292     /* Some arithmetic ooperations can be optimized away if the operands
293     * are known to be constant. But that's only a good idea when the
294     * side effects they would have on the flags are not important. This
295     * variable indicates whether we need the side effects or not
296     */
297     uae_u32 needflags=0;
298    
299     /* Flag handling is complicated.
300     *
301     * x86 instructions create flags, which quite often are exactly what we
302     * want. So at times, the "68k" flags are actually in the x86 flags.
303     *
304     * Then again, sometimes we do x86 instructions that clobber the x86
305     * flags, but don't represent a corresponding m68k instruction. In that
306     * case, we have to save them.
307     *
308     * We used to save them to the stack, but now store them back directly
309     * into the regflags.cznv of the traditional emulation. Thus some odd
310     * names.
311     *
312     * So flags can be in either of two places (used to be three; boy were
313     * things complicated back then!); And either place can contain either
314     * valid flags or invalid trash (and on the stack, there was also the
315     * option of "nothing at all", now gone). A couple of variables keep
316     * track of the respective states.
317     *
318     * To make things worse, we might or might not be interested in the flags.
319     * by default, we are, but a call to dont_care_flags can change that
320     * until the next call to live_flags. If we are not, pretty much whatever
321     * is in the register and/or the native flags is seen as valid.
322     */
323    
324     static __inline__ blockinfo* get_blockinfo(uae_u32 cl)
325     {
326     return cache_tags[cl+1].bi;
327     }
328    
329     static __inline__ blockinfo* get_blockinfo_addr(void* addr)
330     {
331     blockinfo* bi=get_blockinfo(cacheline(addr));
332    
333     while (bi) {
334     if (bi->pc_p==addr)
335     return bi;
336     bi=bi->next_same_cl;
337     }
338     return NULL;
339     }
340    
341    
342     /*******************************************************************
343     * All sorts of list related functions for all of the lists *
344     *******************************************************************/
345    
346     static __inline__ void remove_from_cl_list(blockinfo* bi)
347     {
348     uae_u32 cl=cacheline(bi->pc_p);
349    
350     if (bi->prev_same_cl_p)
351     *(bi->prev_same_cl_p)=bi->next_same_cl;
352     if (bi->next_same_cl)
353     bi->next_same_cl->prev_same_cl_p=bi->prev_same_cl_p;
354     if (cache_tags[cl+1].bi)
355     cache_tags[cl].handler=cache_tags[cl+1].bi->handler_to_use;
356     else
357     cache_tags[cl].handler=(cpuop_func *)popall_execute_normal;
358     }
359    
360     static __inline__ void remove_from_list(blockinfo* bi)
361     {
362     if (bi->prev_p)
363     *(bi->prev_p)=bi->next;
364     if (bi->next)
365     bi->next->prev_p=bi->prev_p;
366     }
367    
368     static __inline__ void remove_from_lists(blockinfo* bi)
369     {
370     remove_from_list(bi);
371     remove_from_cl_list(bi);
372     }
373    
374     static __inline__ void add_to_cl_list(blockinfo* bi)
375     {
376     uae_u32 cl=cacheline(bi->pc_p);
377    
378     if (cache_tags[cl+1].bi)
379     cache_tags[cl+1].bi->prev_same_cl_p=&(bi->next_same_cl);
380     bi->next_same_cl=cache_tags[cl+1].bi;
381    
382     cache_tags[cl+1].bi=bi;
383     bi->prev_same_cl_p=&(cache_tags[cl+1].bi);
384    
385     cache_tags[cl].handler=bi->handler_to_use;
386     }
387    
388     static __inline__ void raise_in_cl_list(blockinfo* bi)
389     {
390     remove_from_cl_list(bi);
391     add_to_cl_list(bi);
392     }
393    
394     static __inline__ void add_to_active(blockinfo* bi)
395     {
396     if (active)
397     active->prev_p=&(bi->next);
398     bi->next=active;
399    
400     active=bi;
401     bi->prev_p=&active;
402     }
403    
404     static __inline__ void add_to_dormant(blockinfo* bi)
405     {
406     if (dormant)
407     dormant->prev_p=&(bi->next);
408     bi->next=dormant;
409    
410     dormant=bi;
411     bi->prev_p=&dormant;
412     }
413    
414     static __inline__ void remove_dep(dependency* d)
415     {
416     if (d->prev_p)
417     *(d->prev_p)=d->next;
418     if (d->next)
419     d->next->prev_p=d->prev_p;
420     d->prev_p=NULL;
421     d->next=NULL;
422     }
423    
424     /* This block's code is about to be thrown away, so it no longer
425     depends on anything else */
426     static __inline__ void remove_deps(blockinfo* bi)
427     {
428     remove_dep(&(bi->dep[0]));
429     remove_dep(&(bi->dep[1]));
430     }
431    
432     static __inline__ void adjust_jmpdep(dependency* d, cpuop_func* a)
433     {
434     *(d->jmp_off)=(uintptr)a-((uintptr)d->jmp_off+4);
435     }
436    
437     /********************************************************************
438     * Soft flush handling support functions *
439     ********************************************************************/
440    
441     static __inline__ void set_dhtu(blockinfo* bi, cpuop_func* dh)
442     {
443     //write_log("bi is %p\n",bi);
444     if (dh!=bi->direct_handler_to_use) {
445     dependency* x=bi->deplist;
446     //write_log("bi->deplist=%p\n",bi->deplist);
447     while (x) {
448     //write_log("x is %p\n",x);
449     //write_log("x->next is %p\n",x->next);
450     //write_log("x->prev_p is %p\n",x->prev_p);
451    
452     if (x->jmp_off) {
453     adjust_jmpdep(x,dh);
454     }
455     x=x->next;
456     }
457     bi->direct_handler_to_use=dh;
458     }
459     }
460    
461     static __inline__ void invalidate_block(blockinfo* bi)
462     {
463     int i;
464    
465     bi->optlevel=0;
466     bi->count=optcount[0]-1;
467     bi->handler=NULL;
468     bi->handler_to_use=(cpuop_func *)popall_execute_normal;
469     bi->direct_handler=NULL;
470     set_dhtu(bi,bi->direct_pen);
471     bi->needed_flags=0xff;
472     bi->status=BI_INVALID;
473     for (i=0;i<2;i++) {
474     bi->dep[i].jmp_off=NULL;
475     bi->dep[i].target=NULL;
476     }
477     remove_deps(bi);
478     }
479    
480     static __inline__ void create_jmpdep(blockinfo* bi, int i, uae_u32* jmpaddr, uae_u32 target)
481     {
482 gbeauche 1.24 blockinfo* tbi=get_blockinfo_addr((void*)(uintptr)target);
483 gbeauche 1.1
484     Dif(!tbi) {
485     write_log("Could not create jmpdep!\n");
486     abort();
487     }
488     bi->dep[i].jmp_off=jmpaddr;
489     bi->dep[i].source=bi;
490     bi->dep[i].target=tbi;
491     bi->dep[i].next=tbi->deplist;
492     if (bi->dep[i].next)
493     bi->dep[i].next->prev_p=&(bi->dep[i].next);
494     bi->dep[i].prev_p=&(tbi->deplist);
495     tbi->deplist=&(bi->dep[i]);
496     }
497    
498     static __inline__ void block_need_recompile(blockinfo * bi)
499     {
500     uae_u32 cl = cacheline(bi->pc_p);
501    
502     set_dhtu(bi, bi->direct_pen);
503     bi->direct_handler = bi->direct_pen;
504    
505     bi->handler_to_use = (cpuop_func *)popall_execute_normal;
506     bi->handler = (cpuop_func *)popall_execute_normal;
507     if (bi == cache_tags[cl + 1].bi)
508     cache_tags[cl].handler = (cpuop_func *)popall_execute_normal;
509     bi->status = BI_NEED_RECOMP;
510     }
511    
512     static __inline__ void mark_callers_recompile(blockinfo * bi)
513     {
514     dependency *x = bi->deplist;
515    
516     while (x) {
517     dependency *next = x->next; /* This disappears when we mark for
518     * recompilation and thus remove the
519     * blocks from the lists */
520     if (x->jmp_off) {
521     blockinfo *cbi = x->source;
522    
523     Dif(cbi->status == BI_INVALID) {
524     // write_log("invalid block in dependency list\n"); // FIXME?
525     // abort();
526     }
527     if (cbi->status == BI_ACTIVE || cbi->status == BI_NEED_CHECK) {
528     block_need_recompile(cbi);
529     mark_callers_recompile(cbi);
530     }
531     else if (cbi->status == BI_COMPILING) {
532     redo_current_block = 1;
533     }
534     else if (cbi->status == BI_NEED_RECOMP) {
535     /* nothing */
536     }
537     else {
538     //write_log("Status %d in mark_callers\n",cbi->status); // FIXME?
539     }
540     }
541     x = next;
542     }
543     }
544    
545     static __inline__ blockinfo* get_blockinfo_addr_new(void* addr, int setstate)
546     {
547     blockinfo* bi=get_blockinfo_addr(addr);
548     int i;
549    
550     if (!bi) {
551     for (i=0;i<MAX_HOLD_BI && !bi;i++) {
552     if (hold_bi[i]) {
553     uae_u32 cl=cacheline(addr);
554    
555     bi=hold_bi[i];
556     hold_bi[i]=NULL;
557     bi->pc_p=(uae_u8 *)addr;
558     invalidate_block(bi);
559     add_to_active(bi);
560     add_to_cl_list(bi);
561    
562     }
563     }
564     }
565     if (!bi) {
566     write_log("Looking for blockinfo, can't find free one\n");
567     abort();
568     }
569     return bi;
570     }
571    
572     static void prepare_block(blockinfo* bi);
573    
574     /* Managment of blockinfos.
575    
576     A blockinfo struct is allocated whenever a new block has to be
577     compiled. If the list of free blockinfos is empty, we allocate a new
578     pool of blockinfos and link the newly created blockinfos altogether
579     into the list of free blockinfos. Otherwise, we simply pop a structure
580 gbeauche 1.7 off the free list.
581 gbeauche 1.1
582     Blockinfo are lazily deallocated, i.e. chained altogether in the
583     list of free blockinfos whenvever a translation cache flush (hard or
584     soft) request occurs.
585     */
586    
587 gbeauche 1.7 template< class T >
588     class LazyBlockAllocator
589     {
590     enum {
591     kPoolSize = 1 + 4096 / sizeof(T)
592     };
593     struct Pool {
594     T chunk[kPoolSize];
595     Pool * next;
596     };
597     Pool * mPools;
598     T * mChunks;
599     public:
600     LazyBlockAllocator() : mPools(0), mChunks(0) { }
601     ~LazyBlockAllocator();
602     T * acquire();
603     void release(T * const);
604 gbeauche 1.1 };
605    
606 gbeauche 1.7 template< class T >
607     LazyBlockAllocator<T>::~LazyBlockAllocator()
608 gbeauche 1.1 {
609 gbeauche 1.7 Pool * currentPool = mPools;
610     while (currentPool) {
611     Pool * deadPool = currentPool;
612     currentPool = currentPool->next;
613     free(deadPool);
614     }
615     }
616    
617     template< class T >
618     T * LazyBlockAllocator<T>::acquire()
619     {
620     if (!mChunks) {
621     // There is no chunk left, allocate a new pool and link the
622     // chunks into the free list
623     Pool * newPool = (Pool *)malloc(sizeof(Pool));
624     for (T * chunk = &newPool->chunk[0]; chunk < &newPool->chunk[kPoolSize]; chunk++) {
625     chunk->next = mChunks;
626     mChunks = chunk;
627 gbeauche 1.1 }
628 gbeauche 1.7 newPool->next = mPools;
629     mPools = newPool;
630     }
631     T * chunk = mChunks;
632     mChunks = chunk->next;
633     return chunk;
634     }
635    
636     template< class T >
637     void LazyBlockAllocator<T>::release(T * const chunk)
638     {
639     chunk->next = mChunks;
640     mChunks = chunk;
641     }
642    
643     template< class T >
644     class HardBlockAllocator
645     {
646     public:
647     T * acquire() {
648     T * data = (T *)current_compile_p;
649     current_compile_p += sizeof(T);
650     return data;
651 gbeauche 1.1 }
652 gbeauche 1.7
653     void release(T * const chunk) {
654     // Deallocated on invalidation
655     }
656     };
657    
658     #if USE_SEPARATE_BIA
659     static LazyBlockAllocator<blockinfo> BlockInfoAllocator;
660     static LazyBlockAllocator<checksum_info> ChecksumInfoAllocator;
661 gbeauche 1.1 #else
662 gbeauche 1.7 static HardBlockAllocator<blockinfo> BlockInfoAllocator;
663     static HardBlockAllocator<checksum_info> ChecksumInfoAllocator;
664 gbeauche 1.1 #endif
665    
666 gbeauche 1.8 static __inline__ checksum_info *alloc_checksum_info(void)
667     {
668     checksum_info *csi = ChecksumInfoAllocator.acquire();
669     csi->next = NULL;
670     return csi;
671     }
672    
673     static __inline__ void free_checksum_info(checksum_info *csi)
674     {
675     csi->next = NULL;
676     ChecksumInfoAllocator.release(csi);
677     }
678    
679     static __inline__ void free_checksum_info_chain(checksum_info *csi)
680     {
681     while (csi != NULL) {
682     checksum_info *csi2 = csi->next;
683     free_checksum_info(csi);
684     csi = csi2;
685     }
686     }
687 gbeauche 1.7
688     static __inline__ blockinfo *alloc_blockinfo(void)
689 gbeauche 1.1 {
690 gbeauche 1.7 blockinfo *bi = BlockInfoAllocator.acquire();
691     #if USE_CHECKSUM_INFO
692     bi->csi = NULL;
693 gbeauche 1.1 #endif
694 gbeauche 1.7 return bi;
695 gbeauche 1.1 }
696    
697 gbeauche 1.7 static __inline__ void free_blockinfo(blockinfo *bi)
698 gbeauche 1.1 {
699 gbeauche 1.7 #if USE_CHECKSUM_INFO
700 gbeauche 1.8 free_checksum_info_chain(bi->csi);
701     bi->csi = NULL;
702 gbeauche 1.1 #endif
703 gbeauche 1.7 BlockInfoAllocator.release(bi);
704 gbeauche 1.1 }
705    
706     static __inline__ void alloc_blockinfos(void)
707     {
708     int i;
709     blockinfo* bi;
710    
711     for (i=0;i<MAX_HOLD_BI;i++) {
712     if (hold_bi[i])
713     return;
714     bi=hold_bi[i]=alloc_blockinfo();
715     prepare_block(bi);
716     }
717     }
718    
719     /********************************************************************
720     * Functions to emit data into memory, and other general support *
721     ********************************************************************/
722    
723     static uae_u8* target;
724    
725     static void emit_init(void)
726     {
727     }
728    
729     static __inline__ void emit_byte(uae_u8 x)
730     {
731     *target++=x;
732     }
733    
734     static __inline__ void emit_word(uae_u16 x)
735     {
736     *((uae_u16*)target)=x;
737     target+=2;
738     }
739    
740     static __inline__ void emit_long(uae_u32 x)
741     {
742     *((uae_u32*)target)=x;
743     target+=4;
744     }
745    
746 gbeauche 1.24 static __inline__ void emit_quad(uae_u64 x)
747     {
748     *((uae_u64*)target)=x;
749     target+=8;
750     }
751    
752 gbeauche 1.12 static __inline__ void emit_block(const uae_u8 *block, uae_u32 blocklen)
753     {
754     memcpy((uae_u8 *)target,block,blocklen);
755     target+=blocklen;
756     }
757    
758 gbeauche 1.1 static __inline__ uae_u32 reverse32(uae_u32 v)
759     {
760     #if 1
761     // gb-- We have specialized byteswapping functions, just use them
762     return do_byteswap_32(v);
763     #else
764     return ((v>>24)&0xff) | ((v>>8)&0xff00) | ((v<<8)&0xff0000) | ((v<<24)&0xff000000);
765     #endif
766     }
767    
768     /********************************************************************
769     * Getting the information about the target CPU *
770     ********************************************************************/
771    
772     #include "codegen_x86.cpp"
773    
774     void set_target(uae_u8* t)
775     {
776     target=t;
777     }
778    
779     static __inline__ uae_u8* get_target_noopt(void)
780     {
781     return target;
782     }
783    
784     __inline__ uae_u8* get_target(void)
785     {
786     return get_target_noopt();
787     }
788    
789    
790     /********************************************************************
791     * Flags status handling. EMIT TIME! *
792     ********************************************************************/
793    
794     static void bt_l_ri_noclobber(R4 r, IMM i);
795    
796     static void make_flags_live_internal(void)
797     {
798     if (live.flags_in_flags==VALID)
799     return;
800     Dif (live.flags_on_stack==TRASH) {
801     write_log("Want flags, got something on stack, but it is TRASH\n");
802     abort();
803     }
804     if (live.flags_on_stack==VALID) {
805     int tmp;
806     tmp=readreg_specific(FLAGTMP,4,FLAG_NREG2);
807     raw_reg_to_flags(tmp);
808     unlock2(tmp);
809    
810     live.flags_in_flags=VALID;
811     return;
812     }
813     write_log("Huh? live.flags_in_flags=%d, live.flags_on_stack=%d, but need to make live\n",
814     live.flags_in_flags,live.flags_on_stack);
815     abort();
816     }
817    
818     static void flags_to_stack(void)
819     {
820     if (live.flags_on_stack==VALID)
821     return;
822     if (!live.flags_are_important) {
823     live.flags_on_stack=VALID;
824     return;
825     }
826     Dif (live.flags_in_flags!=VALID)
827     abort();
828     else {
829     int tmp;
830     tmp=writereg_specific(FLAGTMP,4,FLAG_NREG1);
831     raw_flags_to_reg(tmp);
832     unlock2(tmp);
833     }
834     live.flags_on_stack=VALID;
835     }
836    
837     static __inline__ void clobber_flags(void)
838     {
839     if (live.flags_in_flags==VALID && live.flags_on_stack!=VALID)
840     flags_to_stack();
841     live.flags_in_flags=TRASH;
842     }
843    
844     /* Prepare for leaving the compiled stuff */
845     static __inline__ void flush_flags(void)
846     {
847     flags_to_stack();
848     return;
849     }
850    
851     int touchcnt;
852    
853     /********************************************************************
854 gbeauche 1.18 * Partial register flushing for optimized calls *
855     ********************************************************************/
856    
857     struct regusage {
858     uae_u16 rmask;
859     uae_u16 wmask;
860     };
861    
862     static inline void ru_set(uae_u16 *mask, int reg)
863     {
864     #if USE_OPTIMIZED_CALLS
865     *mask |= 1 << reg;
866     #endif
867     }
868    
869     static inline bool ru_get(const uae_u16 *mask, int reg)
870     {
871     #if USE_OPTIMIZED_CALLS
872     return (*mask & (1 << reg));
873     #else
874     /* Default: instruction reads & write to register */
875     return true;
876     #endif
877     }
878    
879     static inline void ru_set_read(regusage *ru, int reg)
880     {
881     ru_set(&ru->rmask, reg);
882     }
883    
884     static inline void ru_set_write(regusage *ru, int reg)
885     {
886     ru_set(&ru->wmask, reg);
887     }
888    
889     static inline bool ru_read_p(const regusage *ru, int reg)
890     {
891     return ru_get(&ru->rmask, reg);
892     }
893    
894     static inline bool ru_write_p(const regusage *ru, int reg)
895     {
896     return ru_get(&ru->wmask, reg);
897     }
898    
899     static void ru_fill_ea(regusage *ru, int reg, amodes mode,
900     wordsizes size, int write_mode)
901     {
902     switch (mode) {
903     case Areg:
904     reg += 8;
905     /* fall through */
906     case Dreg:
907     ru_set(write_mode ? &ru->wmask : &ru->rmask, reg);
908     break;
909     case Ad16:
910     /* skip displacment */
911     m68k_pc_offset += 2;
912     case Aind:
913     case Aipi:
914     case Apdi:
915     ru_set_read(ru, reg+8);
916     break;
917     case Ad8r:
918     ru_set_read(ru, reg+8);
919     /* fall through */
920     case PC8r: {
921     uae_u16 dp = comp_get_iword((m68k_pc_offset+=2)-2);
922     reg = (dp >> 12) & 15;
923     ru_set_read(ru, reg);
924     if (dp & 0x100)
925     m68k_pc_offset += (((dp & 0x30) >> 3) & 7) + ((dp & 3) * 2);
926     break;
927     }
928     case PC16:
929     case absw:
930     case imm0:
931     case imm1:
932     m68k_pc_offset += 2;
933     break;
934     case absl:
935     case imm2:
936     m68k_pc_offset += 4;
937     break;
938     case immi:
939     m68k_pc_offset += (size == sz_long) ? 4 : 2;
940     break;
941     }
942     }
943    
944     /* TODO: split into a static initialization part and a dynamic one
945     (instructions depending on extension words) */
946     static void ru_fill(regusage *ru, uae_u32 opcode)
947     {
948     m68k_pc_offset += 2;
949    
950     /* Default: no register is used or written to */
951     ru->rmask = 0;
952     ru->wmask = 0;
953    
954     uae_u32 real_opcode = cft_map(opcode);
955     struct instr *dp = &table68k[real_opcode];
956    
957     bool rw_dest = true;
958     bool handled = false;
959    
960     /* Handle some instructions specifically */
961     uae_u16 reg, ext;
962     switch (dp->mnemo) {
963     case i_BFCHG:
964     case i_BFCLR:
965     case i_BFEXTS:
966     case i_BFEXTU:
967     case i_BFFFO:
968     case i_BFINS:
969     case i_BFSET:
970     case i_BFTST:
971     ext = comp_get_iword((m68k_pc_offset+=2)-2);
972     if (ext & 0x800) ru_set_read(ru, (ext >> 6) & 7);
973     if (ext & 0x020) ru_set_read(ru, ext & 7);
974     ru_fill_ea(ru, dp->dreg, (amodes)dp->dmode, (wordsizes)dp->size, 1);
975     if (dp->dmode == Dreg)
976     ru_set_read(ru, dp->dreg);
977     switch (dp->mnemo) {
978     case i_BFEXTS:
979     case i_BFEXTU:
980     case i_BFFFO:
981     ru_set_write(ru, (ext >> 12) & 7);
982     break;
983     case i_BFINS:
984     ru_set_read(ru, (ext >> 12) & 7);
985     /* fall through */
986     case i_BFCHG:
987     case i_BFCLR:
988     case i_BSET:
989     if (dp->dmode == Dreg)
990     ru_set_write(ru, dp->dreg);
991     break;
992     }
993     handled = true;
994     rw_dest = false;
995     break;
996    
997     case i_BTST:
998     rw_dest = false;
999     break;
1000    
1001     case i_CAS:
1002     {
1003     ext = comp_get_iword((m68k_pc_offset+=2)-2);
1004     int Du = ext & 7;
1005     ru_set_read(ru, Du);
1006     int Dc = (ext >> 6) & 7;
1007     ru_set_read(ru, Dc);
1008     ru_set_write(ru, Dc);
1009     break;
1010     }
1011     case i_CAS2:
1012     {
1013     int Dc1, Dc2, Du1, Du2, Rn1, Rn2;
1014     ext = comp_get_iword((m68k_pc_offset+=2)-2);
1015     Rn1 = (ext >> 12) & 15;
1016     Du1 = (ext >> 6) & 7;
1017     Dc1 = ext & 7;
1018     ru_set_read(ru, Rn1);
1019     ru_set_read(ru, Du1);
1020     ru_set_read(ru, Dc1);
1021     ru_set_write(ru, Dc1);
1022     ext = comp_get_iword((m68k_pc_offset+=2)-2);
1023     Rn2 = (ext >> 12) & 15;
1024     Du2 = (ext >> 6) & 7;
1025     Dc2 = ext & 7;
1026     ru_set_read(ru, Rn2);
1027     ru_set_read(ru, Du2);
1028     ru_set_write(ru, Dc2);
1029     break;
1030     }
1031     case i_DIVL: case i_MULL:
1032     m68k_pc_offset += 2;
1033     break;
1034     case i_LEA:
1035     case i_MOVE: case i_MOVEA: case i_MOVE16:
1036     rw_dest = false;
1037     break;
1038     case i_PACK: case i_UNPK:
1039     rw_dest = false;
1040     m68k_pc_offset += 2;
1041     break;
1042     case i_TRAPcc:
1043     m68k_pc_offset += (dp->size == sz_long) ? 4 : 2;
1044     break;
1045     case i_RTR:
1046     /* do nothing, just for coverage debugging */
1047     break;
1048     /* TODO: handle EXG instruction */
1049     }
1050    
1051     /* Handle A-Traps better */
1052     if ((real_opcode & 0xf000) == 0xa000) {
1053     handled = true;
1054     }
1055    
1056     /* Handle EmulOps better */
1057     if ((real_opcode & 0xff00) == 0x7100) {
1058     handled = true;
1059     ru->rmask = 0xffff;
1060     ru->wmask = 0;
1061     }
1062    
1063     if (dp->suse && !handled)
1064     ru_fill_ea(ru, dp->sreg, (amodes)dp->smode, (wordsizes)dp->size, 0);
1065    
1066     if (dp->duse && !handled)
1067     ru_fill_ea(ru, dp->dreg, (amodes)dp->dmode, (wordsizes)dp->size, 1);
1068    
1069     if (rw_dest)
1070     ru->rmask |= ru->wmask;
1071    
1072     handled = handled || dp->suse || dp->duse;
1073    
1074     /* Mark all registers as used/written if the instruction may trap */
1075     if (may_trap(opcode)) {
1076     handled = true;
1077     ru->rmask = 0xffff;
1078     ru->wmask = 0xffff;
1079     }
1080    
1081     if (!handled) {
1082     write_log("ru_fill: %04x = { %04x, %04x }\n",
1083     real_opcode, ru->rmask, ru->wmask);
1084     abort();
1085     }
1086     }
1087    
1088     /********************************************************************
1089 gbeauche 1.1 * register allocation per block logging *
1090     ********************************************************************/
1091    
1092     static uae_s8 vstate[VREGS];
1093     static uae_s8 vwritten[VREGS];
1094     static uae_s8 nstate[N_REGS];
1095    
1096     #define L_UNKNOWN -127
1097     #define L_UNAVAIL -1
1098     #define L_NEEDED -2
1099     #define L_UNNEEDED -3
1100    
1101     static __inline__ void big_to_small_state(bigstate * b, smallstate * s)
1102     {
1103     int i;
1104    
1105     for (i = 0; i < VREGS; i++)
1106     s->virt[i] = vstate[i];
1107     for (i = 0; i < N_REGS; i++)
1108     s->nat[i] = nstate[i];
1109     }
1110    
1111     static __inline__ int callers_need_recompile(bigstate * b, smallstate * s)
1112     {
1113     int i;
1114     int reverse = 0;
1115    
1116     for (i = 0; i < VREGS; i++) {
1117     if (vstate[i] != L_UNNEEDED && s->virt[i] == L_UNNEEDED)
1118     return 1;
1119     if (vstate[i] == L_UNNEEDED && s->virt[i] != L_UNNEEDED)
1120     reverse++;
1121     }
1122     for (i = 0; i < N_REGS; i++) {
1123     if (nstate[i] >= 0 && nstate[i] != s->nat[i])
1124     return 1;
1125     if (nstate[i] < 0 && s->nat[i] >= 0)
1126     reverse++;
1127     }
1128     if (reverse >= 2 && USE_MATCH)
1129     return 1; /* In this case, it might be worth recompiling the
1130     * callers */
1131     return 0;
1132     }
1133    
1134     static __inline__ void log_startblock(void)
1135     {
1136     int i;
1137    
1138     for (i = 0; i < VREGS; i++) {
1139     vstate[i] = L_UNKNOWN;
1140     vwritten[i] = 0;
1141     }
1142     for (i = 0; i < N_REGS; i++)
1143     nstate[i] = L_UNKNOWN;
1144     }
1145    
1146     /* Using an n-reg for a temp variable */
1147     static __inline__ void log_isused(int n)
1148     {
1149     if (nstate[n] == L_UNKNOWN)
1150     nstate[n] = L_UNAVAIL;
1151     }
1152    
1153     static __inline__ void log_visused(int r)
1154     {
1155     if (vstate[r] == L_UNKNOWN)
1156     vstate[r] = L_NEEDED;
1157     }
1158    
1159     static __inline__ void do_load_reg(int n, int r)
1160     {
1161     if (r == FLAGTMP)
1162     raw_load_flagreg(n, r);
1163     else if (r == FLAGX)
1164     raw_load_flagx(n, r);
1165     else
1166 gbeauche 1.24 raw_mov_l_rm(n, (uintptr) live.state[r].mem);
1167 gbeauche 1.1 }
1168    
1169     static __inline__ void check_load_reg(int n, int r)
1170     {
1171 gbeauche 1.24 raw_mov_l_rm(n, (uintptr) live.state[r].mem);
1172 gbeauche 1.1 }
1173    
1174     static __inline__ void log_vwrite(int r)
1175     {
1176     vwritten[r] = 1;
1177     }
1178    
1179     /* Using an n-reg to hold a v-reg */
1180     static __inline__ void log_isreg(int n, int r)
1181     {
1182     static int count = 0;
1183    
1184     if (nstate[n] == L_UNKNOWN && r < 16 && !vwritten[r] && USE_MATCH)
1185     nstate[n] = r;
1186     else {
1187     do_load_reg(n, r);
1188     if (nstate[n] == L_UNKNOWN)
1189     nstate[n] = L_UNAVAIL;
1190     }
1191     if (vstate[r] == L_UNKNOWN)
1192     vstate[r] = L_NEEDED;
1193     }
1194    
1195     static __inline__ void log_clobberreg(int r)
1196     {
1197     if (vstate[r] == L_UNKNOWN)
1198     vstate[r] = L_UNNEEDED;
1199     }
1200    
1201     /* This ends all possibility of clever register allocation */
1202    
1203     static __inline__ void log_flush(void)
1204     {
1205     int i;
1206    
1207     for (i = 0; i < VREGS; i++)
1208     if (vstate[i] == L_UNKNOWN)
1209     vstate[i] = L_NEEDED;
1210     for (i = 0; i < N_REGS; i++)
1211     if (nstate[i] == L_UNKNOWN)
1212     nstate[i] = L_UNAVAIL;
1213     }
1214    
1215     static __inline__ void log_dump(void)
1216     {
1217     int i;
1218    
1219     return;
1220    
1221     write_log("----------------------\n");
1222     for (i = 0; i < N_REGS; i++) {
1223     switch (nstate[i]) {
1224     case L_UNKNOWN:
1225     write_log("Nat %d : UNKNOWN\n", i);
1226     break;
1227     case L_UNAVAIL:
1228     write_log("Nat %d : UNAVAIL\n", i);
1229     break;
1230     default:
1231     write_log("Nat %d : %d\n", i, nstate[i]);
1232     break;
1233     }
1234     }
1235     for (i = 0; i < VREGS; i++) {
1236     if (vstate[i] == L_UNNEEDED)
1237     write_log("Virt %d: UNNEEDED\n", i);
1238     }
1239     }
1240    
1241     /********************************************************************
1242     * register status handling. EMIT TIME! *
1243     ********************************************************************/
1244    
1245     static __inline__ void set_status(int r, int status)
1246     {
1247     if (status == ISCONST)
1248     log_clobberreg(r);
1249     live.state[r].status=status;
1250     }
1251    
1252     static __inline__ int isinreg(int r)
1253     {
1254     return live.state[r].status==CLEAN || live.state[r].status==DIRTY;
1255     }
1256    
1257     static __inline__ void adjust_nreg(int r, uae_u32 val)
1258     {
1259     if (!val)
1260     return;
1261     raw_lea_l_brr(r,r,val);
1262     }
1263    
1264     static void tomem(int r)
1265     {
1266     int rr=live.state[r].realreg;
1267    
1268     if (isinreg(r)) {
1269     if (live.state[r].val && live.nat[rr].nholds==1
1270     && !live.nat[rr].locked) {
1271     // write_log("RemovingA offset %x from reg %d (%d) at %p\n",
1272     // live.state[r].val,r,rr,target);
1273     adjust_nreg(rr,live.state[r].val);
1274     live.state[r].val=0;
1275     live.state[r].dirtysize=4;
1276     set_status(r,DIRTY);
1277     }
1278     }
1279    
1280     if (live.state[r].status==DIRTY) {
1281     switch (live.state[r].dirtysize) {
1282 gbeauche 1.24 case 1: raw_mov_b_mr((uintptr)live.state[r].mem,rr); break;
1283     case 2: raw_mov_w_mr((uintptr)live.state[r].mem,rr); break;
1284     case 4: raw_mov_l_mr((uintptr)live.state[r].mem,rr); break;
1285 gbeauche 1.1 default: abort();
1286     }
1287     log_vwrite(r);
1288     set_status(r,CLEAN);
1289     live.state[r].dirtysize=0;
1290     }
1291     }
1292    
1293     static __inline__ int isconst(int r)
1294     {
1295     return live.state[r].status==ISCONST;
1296     }
1297    
1298     int is_const(int r)
1299     {
1300     return isconst(r);
1301     }
1302    
1303     static __inline__ void writeback_const(int r)
1304     {
1305     if (!isconst(r))
1306     return;
1307     Dif (live.state[r].needflush==NF_HANDLER) {
1308     write_log("Trying to write back constant NF_HANDLER!\n");
1309     abort();
1310     }
1311    
1312 gbeauche 1.24 raw_mov_l_mi((uintptr)live.state[r].mem,live.state[r].val);
1313 gbeauche 1.1 log_vwrite(r);
1314     live.state[r].val=0;
1315     set_status(r,INMEM);
1316     }
1317    
1318     static __inline__ void tomem_c(int r)
1319     {
1320     if (isconst(r)) {
1321     writeback_const(r);
1322     }
1323     else
1324     tomem(r);
1325     }
1326    
1327     static void evict(int r)
1328     {
1329     int rr;
1330    
1331     if (!isinreg(r))
1332     return;
1333     tomem(r);
1334     rr=live.state[r].realreg;
1335    
1336     Dif (live.nat[rr].locked &&
1337     live.nat[rr].nholds==1) {
1338     write_log("register %d in nreg %d is locked!\n",r,live.state[r].realreg);
1339     abort();
1340     }
1341    
1342     live.nat[rr].nholds--;
1343     if (live.nat[rr].nholds!=live.state[r].realind) { /* Was not last */
1344     int topreg=live.nat[rr].holds[live.nat[rr].nholds];
1345     int thisind=live.state[r].realind;
1346    
1347     live.nat[rr].holds[thisind]=topreg;
1348     live.state[topreg].realind=thisind;
1349     }
1350     live.state[r].realreg=-1;
1351     set_status(r,INMEM);
1352     }
1353    
1354     static __inline__ void free_nreg(int r)
1355     {
1356     int i=live.nat[r].nholds;
1357    
1358     while (i) {
1359     int vr;
1360    
1361     --i;
1362     vr=live.nat[r].holds[i];
1363     evict(vr);
1364     }
1365     Dif (live.nat[r].nholds!=0) {
1366     write_log("Failed to free nreg %d, nholds is %d\n",r,live.nat[r].nholds);
1367     abort();
1368     }
1369     }
1370    
1371     /* Use with care! */
1372     static __inline__ void isclean(int r)
1373     {
1374     if (!isinreg(r))
1375     return;
1376     live.state[r].validsize=4;
1377     live.state[r].dirtysize=0;
1378     live.state[r].val=0;
1379     set_status(r,CLEAN);
1380     }
1381    
1382     static __inline__ void disassociate(int r)
1383     {
1384     isclean(r);
1385     evict(r);
1386     }
1387    
1388     static __inline__ void set_const(int r, uae_u32 val)
1389     {
1390     disassociate(r);
1391     live.state[r].val=val;
1392     set_status(r,ISCONST);
1393     }
1394    
1395     static __inline__ uae_u32 get_offset(int r)
1396     {
1397     return live.state[r].val;
1398     }
1399    
1400     static int alloc_reg_hinted(int r, int size, int willclobber, int hint)
1401     {
1402     int bestreg;
1403     uae_s32 when;
1404     int i;
1405     uae_s32 badness=0; /* to shut up gcc */
1406     bestreg=-1;
1407     when=2000000000;
1408    
1409 gbeauche 1.37 /* XXX use a regalloc_order table? */
1410     for (i=0;i<N_REGS;i++) {
1411 gbeauche 1.1 badness=live.nat[i].touched;
1412     if (live.nat[i].nholds==0)
1413     badness=0;
1414     if (i==hint)
1415     badness-=200000000;
1416     if (!live.nat[i].locked && badness<when) {
1417     if ((size==1 && live.nat[i].canbyte) ||
1418     (size==2 && live.nat[i].canword) ||
1419     (size==4)) {
1420     bestreg=i;
1421     when=badness;
1422     if (live.nat[i].nholds==0 && hint<0)
1423     break;
1424     if (i==hint)
1425     break;
1426     }
1427     }
1428     }
1429     Dif (bestreg==-1)
1430     abort();
1431    
1432     if (live.nat[bestreg].nholds>0) {
1433     free_nreg(bestreg);
1434     }
1435     if (isinreg(r)) {
1436     int rr=live.state[r].realreg;
1437     /* This will happen if we read a partially dirty register at a
1438     bigger size */
1439     Dif (willclobber || live.state[r].validsize>=size)
1440     abort();
1441     Dif (live.nat[rr].nholds!=1)
1442     abort();
1443     if (size==4 && live.state[r].validsize==2) {
1444     log_isused(bestreg);
1445     log_visused(r);
1446 gbeauche 1.24 raw_mov_l_rm(bestreg,(uintptr)live.state[r].mem);
1447 gbeauche 1.1 raw_bswap_32(bestreg);
1448     raw_zero_extend_16_rr(rr,rr);
1449     raw_zero_extend_16_rr(bestreg,bestreg);
1450     raw_bswap_32(bestreg);
1451     raw_lea_l_brr_indexed(rr,rr,bestreg,1,0);
1452     live.state[r].validsize=4;
1453     live.nat[rr].touched=touchcnt++;
1454     return rr;
1455     }
1456     if (live.state[r].validsize==1) {
1457     /* Nothing yet */
1458     }
1459     evict(r);
1460     }
1461    
1462     if (!willclobber) {
1463     if (live.state[r].status!=UNDEF) {
1464     if (isconst(r)) {
1465     raw_mov_l_ri(bestreg,live.state[r].val);
1466     live.state[r].val=0;
1467     live.state[r].dirtysize=4;
1468     set_status(r,DIRTY);
1469     log_isused(bestreg);
1470     }
1471     else {
1472     log_isreg(bestreg, r); /* This will also load it! */
1473     live.state[r].dirtysize=0;
1474     set_status(r,CLEAN);
1475     }
1476     }
1477     else {
1478     live.state[r].val=0;
1479     live.state[r].dirtysize=0;
1480     set_status(r,CLEAN);
1481     log_isused(bestreg);
1482     }
1483     live.state[r].validsize=4;
1484     }
1485     else { /* this is the easiest way, but not optimal. FIXME! */
1486     /* Now it's trickier, but hopefully still OK */
1487     if (!isconst(r) || size==4) {
1488     live.state[r].validsize=size;
1489     live.state[r].dirtysize=size;
1490     live.state[r].val=0;
1491     set_status(r,DIRTY);
1492     if (size == 4) {
1493     log_clobberreg(r);
1494     log_isused(bestreg);
1495     }
1496     else {
1497     log_visused(r);
1498     log_isused(bestreg);
1499     }
1500     }
1501     else {
1502     if (live.state[r].status!=UNDEF)
1503     raw_mov_l_ri(bestreg,live.state[r].val);
1504     live.state[r].val=0;
1505     live.state[r].validsize=4;
1506     live.state[r].dirtysize=4;
1507     set_status(r,DIRTY);
1508     log_isused(bestreg);
1509     }
1510     }
1511     live.state[r].realreg=bestreg;
1512     live.state[r].realind=live.nat[bestreg].nholds;
1513     live.nat[bestreg].touched=touchcnt++;
1514     live.nat[bestreg].holds[live.nat[bestreg].nholds]=r;
1515     live.nat[bestreg].nholds++;
1516    
1517     return bestreg;
1518     }
1519    
1520     static int alloc_reg(int r, int size, int willclobber)
1521     {
1522     return alloc_reg_hinted(r,size,willclobber,-1);
1523     }
1524    
1525     static void unlock2(int r)
1526     {
1527     Dif (!live.nat[r].locked)
1528     abort();
1529     live.nat[r].locked--;
1530     }
1531    
1532     static void setlock(int r)
1533     {
1534     live.nat[r].locked++;
1535     }
1536    
1537    
1538     static void mov_nregs(int d, int s)
1539     {
1540     int ns=live.nat[s].nholds;
1541     int nd=live.nat[d].nholds;
1542     int i;
1543    
1544     if (s==d)
1545     return;
1546    
1547     if (nd>0)
1548     free_nreg(d);
1549    
1550     log_isused(d);
1551     raw_mov_l_rr(d,s);
1552    
1553     for (i=0;i<live.nat[s].nholds;i++) {
1554     int vs=live.nat[s].holds[i];
1555    
1556     live.state[vs].realreg=d;
1557     live.state[vs].realind=i;
1558     live.nat[d].holds[i]=vs;
1559     }
1560     live.nat[d].nholds=live.nat[s].nholds;
1561    
1562     live.nat[s].nholds=0;
1563     }
1564    
1565    
1566     static __inline__ void make_exclusive(int r, int size, int spec)
1567     {
1568     int clobber;
1569     reg_status oldstate;
1570     int rr=live.state[r].realreg;
1571     int nr;
1572     int nind;
1573     int ndirt=0;
1574     int i;
1575    
1576     if (!isinreg(r))
1577     return;
1578     if (live.nat[rr].nholds==1)
1579     return;
1580     for (i=0;i<live.nat[rr].nholds;i++) {
1581     int vr=live.nat[rr].holds[i];
1582     if (vr!=r &&
1583     (live.state[vr].status==DIRTY || live.state[vr].val))
1584     ndirt++;
1585     }
1586     if (!ndirt && size<live.state[r].validsize && !live.nat[rr].locked) {
1587     /* Everything else is clean, so let's keep this register */
1588     for (i=0;i<live.nat[rr].nholds;i++) {
1589     int vr=live.nat[rr].holds[i];
1590     if (vr!=r) {
1591     evict(vr);
1592     i--; /* Try that index again! */
1593     }
1594     }
1595     Dif (live.nat[rr].nholds!=1) {
1596     write_log("natreg %d holds %d vregs, %d not exclusive\n",
1597     rr,live.nat[rr].nholds,r);
1598     abort();
1599     }
1600     return;
1601     }
1602    
1603     /* We have to split the register */
1604     oldstate=live.state[r];
1605    
1606     setlock(rr); /* Make sure this doesn't go away */
1607     /* Forget about r being in the register rr */
1608     disassociate(r);
1609     /* Get a new register, that we will clobber completely */
1610     if (oldstate.status==DIRTY) {
1611     /* If dirtysize is <4, we need a register that can handle the
1612     eventual smaller memory store! Thanks to Quake68k for exposing
1613     this detail ;-) */
1614     nr=alloc_reg_hinted(r,oldstate.dirtysize,1,spec);
1615     }
1616     else {
1617     nr=alloc_reg_hinted(r,4,1,spec);
1618     }
1619     nind=live.state[r].realind;
1620     live.state[r]=oldstate; /* Keep all the old state info */
1621     live.state[r].realreg=nr;
1622     live.state[r].realind=nind;
1623    
1624     if (size<live.state[r].validsize) {
1625     if (live.state[r].val) {
1626     /* Might as well compensate for the offset now */
1627     raw_lea_l_brr(nr,rr,oldstate.val);
1628     live.state[r].val=0;
1629     live.state[r].dirtysize=4;
1630     set_status(r,DIRTY);
1631     }
1632     else
1633     raw_mov_l_rr(nr,rr); /* Make another copy */
1634     }
1635     unlock2(rr);
1636     }
1637    
1638     static __inline__ void add_offset(int r, uae_u32 off)
1639     {
1640     live.state[r].val+=off;
1641     }
1642    
1643     static __inline__ void remove_offset(int r, int spec)
1644     {
1645     reg_status oldstate;
1646     int rr;
1647    
1648     if (isconst(r))
1649     return;
1650     if (live.state[r].val==0)
1651     return;
1652     if (isinreg(r) && live.state[r].validsize<4)
1653     evict(r);
1654    
1655     if (!isinreg(r))
1656     alloc_reg_hinted(r,4,0,spec);
1657    
1658     Dif (live.state[r].validsize!=4) {
1659     write_log("Validsize=%d in remove_offset\n",live.state[r].validsize);
1660     abort();
1661     }
1662     make_exclusive(r,0,-1);
1663     /* make_exclusive might have done the job already */
1664     if (live.state[r].val==0)
1665     return;
1666    
1667     rr=live.state[r].realreg;
1668    
1669     if (live.nat[rr].nholds==1) {
1670     //write_log("RemovingB offset %x from reg %d (%d) at %p\n",
1671     // live.state[r].val,r,rr,target);
1672     adjust_nreg(rr,live.state[r].val);
1673     live.state[r].dirtysize=4;
1674     live.state[r].val=0;
1675     set_status(r,DIRTY);
1676     return;
1677     }
1678     write_log("Failed in remove_offset\n");
1679     abort();
1680     }
1681    
1682     static __inline__ void remove_all_offsets(void)
1683     {
1684     int i;
1685    
1686     for (i=0;i<VREGS;i++)
1687     remove_offset(i,-1);
1688     }
1689    
1690 gbeauche 1.28 static inline void flush_reg_count(void)
1691     {
1692     #if RECORD_REGISTER_USAGE
1693     for (int r = 0; r < 16; r++)
1694     if (reg_count_local[r])
1695     ADDQim(reg_count_local[r], ((uintptr)reg_count) + (8 * r), X86_NOREG, X86_NOREG, 1);
1696     #endif
1697     }
1698    
1699     static inline void record_register(int r)
1700     {
1701     #if RECORD_REGISTER_USAGE
1702     if (r < 16)
1703     reg_count_local[r]++;
1704     #endif
1705     }
1706    
1707 gbeauche 1.1 static __inline__ int readreg_general(int r, int size, int spec, int can_offset)
1708     {
1709     int n;
1710     int answer=-1;
1711    
1712 gbeauche 1.28 record_register(r);
1713 gbeauche 1.1 if (live.state[r].status==UNDEF) {
1714     write_log("WARNING: Unexpected read of undefined register %d\n",r);
1715     }
1716     if (!can_offset)
1717     remove_offset(r,spec);
1718    
1719     if (isinreg(r) && live.state[r].validsize>=size) {
1720     n=live.state[r].realreg;
1721     switch(size) {
1722     case 1:
1723     if (live.nat[n].canbyte || spec>=0) {
1724     answer=n;
1725     }
1726     break;
1727     case 2:
1728     if (live.nat[n].canword || spec>=0) {
1729     answer=n;
1730     }
1731     break;
1732     case 4:
1733     answer=n;
1734     break;
1735     default: abort();
1736     }
1737     if (answer<0)
1738     evict(r);
1739     }
1740     /* either the value was in memory to start with, or it was evicted and
1741     is in memory now */
1742     if (answer<0) {
1743     answer=alloc_reg_hinted(r,spec>=0?4:size,0,spec);
1744     }
1745    
1746     if (spec>=0 && spec!=answer) {
1747     /* Too bad */
1748     mov_nregs(spec,answer);
1749     answer=spec;
1750     }
1751     live.nat[answer].locked++;
1752     live.nat[answer].touched=touchcnt++;
1753     return answer;
1754     }
1755    
1756    
1757    
1758     static int readreg(int r, int size)
1759     {
1760     return readreg_general(r,size,-1,0);
1761     }
1762    
1763     static int readreg_specific(int r, int size, int spec)
1764     {
1765     return readreg_general(r,size,spec,0);
1766     }
1767    
1768     static int readreg_offset(int r, int size)
1769     {
1770     return readreg_general(r,size,-1,1);
1771     }
1772    
1773     /* writereg_general(r, size, spec)
1774     *
1775     * INPUT
1776     * - r : mid-layer register
1777     * - size : requested size (1/2/4)
1778     * - spec : -1 if find or make a register free, otherwise specifies
1779     * the physical register to use in any case
1780     *
1781     * OUTPUT
1782     * - hard (physical, x86 here) register allocated to virtual register r
1783     */
1784     static __inline__ int writereg_general(int r, int size, int spec)
1785     {
1786     int n;
1787     int answer=-1;
1788    
1789 gbeauche 1.28 record_register(r);
1790 gbeauche 1.1 if (size<4) {
1791     remove_offset(r,spec);
1792     }
1793    
1794     make_exclusive(r,size,spec);
1795     if (isinreg(r)) {
1796     int nvsize=size>live.state[r].validsize?size:live.state[r].validsize;
1797     int ndsize=size>live.state[r].dirtysize?size:live.state[r].dirtysize;
1798     n=live.state[r].realreg;
1799    
1800     Dif (live.nat[n].nholds!=1)
1801     abort();
1802     switch(size) {
1803     case 1:
1804     if (live.nat[n].canbyte || spec>=0) {
1805     live.state[r].dirtysize=ndsize;
1806     live.state[r].validsize=nvsize;
1807     answer=n;
1808     }
1809     break;
1810     case 2:
1811     if (live.nat[n].canword || spec>=0) {
1812     live.state[r].dirtysize=ndsize;
1813     live.state[r].validsize=nvsize;
1814     answer=n;
1815     }
1816     break;
1817     case 4:
1818     live.state[r].dirtysize=ndsize;
1819     live.state[r].validsize=nvsize;
1820     answer=n;
1821     break;
1822     default: abort();
1823     }
1824     if (answer<0)
1825     evict(r);
1826     }
1827     /* either the value was in memory to start with, or it was evicted and
1828     is in memory now */
1829     if (answer<0) {
1830     answer=alloc_reg_hinted(r,size,1,spec);
1831     }
1832     if (spec>=0 && spec!=answer) {
1833     mov_nregs(spec,answer);
1834     answer=spec;
1835     }
1836     if (live.state[r].status==UNDEF)
1837     live.state[r].validsize=4;
1838     live.state[r].dirtysize=size>live.state[r].dirtysize?size:live.state[r].dirtysize;
1839     live.state[r].validsize=size>live.state[r].validsize?size:live.state[r].validsize;
1840    
1841     live.nat[answer].locked++;
1842     live.nat[answer].touched=touchcnt++;
1843     if (size==4) {
1844     live.state[r].val=0;
1845     }
1846     else {
1847     Dif (live.state[r].val) {
1848     write_log("Problem with val\n");
1849     abort();
1850     }
1851     }
1852     set_status(r,DIRTY);
1853     return answer;
1854     }
1855    
1856     static int writereg(int r, int size)
1857     {
1858     return writereg_general(r,size,-1);
1859     }
1860    
1861     static int writereg_specific(int r, int size, int spec)
1862     {
1863     return writereg_general(r,size,spec);
1864     }
1865    
1866     static __inline__ int rmw_general(int r, int wsize, int rsize, int spec)
1867     {
1868     int n;
1869     int answer=-1;
1870    
1871 gbeauche 1.28 record_register(r);
1872 gbeauche 1.1 if (live.state[r].status==UNDEF) {
1873     write_log("WARNING: Unexpected read of undefined register %d\n",r);
1874     }
1875     remove_offset(r,spec);
1876     make_exclusive(r,0,spec);
1877    
1878     Dif (wsize<rsize) {
1879     write_log("Cannot handle wsize<rsize in rmw_general()\n");
1880     abort();
1881     }
1882     if (isinreg(r) && live.state[r].validsize>=rsize) {
1883     n=live.state[r].realreg;
1884     Dif (live.nat[n].nholds!=1)
1885     abort();
1886    
1887     switch(rsize) {
1888     case 1:
1889     if (live.nat[n].canbyte || spec>=0) {
1890     answer=n;
1891     }
1892     break;
1893     case 2:
1894     if (live.nat[n].canword || spec>=0) {
1895     answer=n;
1896     }
1897     break;
1898     case 4:
1899     answer=n;
1900     break;
1901     default: abort();
1902     }
1903     if (answer<0)
1904     evict(r);
1905     }
1906     /* either the value was in memory to start with, or it was evicted and
1907     is in memory now */
1908     if (answer<0) {
1909     answer=alloc_reg_hinted(r,spec>=0?4:rsize,0,spec);
1910     }
1911    
1912     if (spec>=0 && spec!=answer) {
1913     /* Too bad */
1914     mov_nregs(spec,answer);
1915     answer=spec;
1916     }
1917     if (wsize>live.state[r].dirtysize)
1918     live.state[r].dirtysize=wsize;
1919     if (wsize>live.state[r].validsize)
1920     live.state[r].validsize=wsize;
1921     set_status(r,DIRTY);
1922    
1923     live.nat[answer].locked++;
1924     live.nat[answer].touched=touchcnt++;
1925    
1926     Dif (live.state[r].val) {
1927     write_log("Problem with val(rmw)\n");
1928     abort();
1929     }
1930     return answer;
1931     }
1932    
1933     static int rmw(int r, int wsize, int rsize)
1934     {
1935     return rmw_general(r,wsize,rsize,-1);
1936     }
1937    
1938     static int rmw_specific(int r, int wsize, int rsize, int spec)
1939     {
1940     return rmw_general(r,wsize,rsize,spec);
1941     }
1942    
1943    
1944     /* needed for restoring the carry flag on non-P6 cores */
1945     static void bt_l_ri_noclobber(R4 r, IMM i)
1946     {
1947     int size=4;
1948     if (i<16)
1949     size=2;
1950     r=readreg(r,size);
1951     raw_bt_l_ri(r,i);
1952     unlock2(r);
1953     }
1954    
1955     /********************************************************************
1956     * FPU register status handling. EMIT TIME! *
1957     ********************************************************************/
1958    
1959     static void f_tomem(int r)
1960     {
1961     if (live.fate[r].status==DIRTY) {
1962     #if USE_LONG_DOUBLE
1963 gbeauche 1.24 raw_fmov_ext_mr((uintptr)live.fate[r].mem,live.fate[r].realreg);
1964 gbeauche 1.1 #else
1965 gbeauche 1.24 raw_fmov_mr((uintptr)live.fate[r].mem,live.fate[r].realreg);
1966 gbeauche 1.1 #endif
1967     live.fate[r].status=CLEAN;
1968     }
1969     }
1970    
1971     static void f_tomem_drop(int r)
1972     {
1973     if (live.fate[r].status==DIRTY) {
1974     #if USE_LONG_DOUBLE
1975 gbeauche 1.24 raw_fmov_ext_mr_drop((uintptr)live.fate[r].mem,live.fate[r].realreg);
1976 gbeauche 1.1 #else
1977 gbeauche 1.24 raw_fmov_mr_drop((uintptr)live.fate[r].mem,live.fate[r].realreg);
1978 gbeauche 1.1 #endif
1979     live.fate[r].status=INMEM;
1980     }
1981     }
1982    
1983    
1984     static __inline__ int f_isinreg(int r)
1985     {
1986     return live.fate[r].status==CLEAN || live.fate[r].status==DIRTY;
1987     }
1988    
1989     static void f_evict(int r)
1990     {
1991     int rr;
1992    
1993     if (!f_isinreg(r))
1994     return;
1995     rr=live.fate[r].realreg;
1996     if (live.fat[rr].nholds==1)
1997     f_tomem_drop(r);
1998     else
1999     f_tomem(r);
2000    
2001     Dif (live.fat[rr].locked &&
2002     live.fat[rr].nholds==1) {
2003     write_log("FPU register %d in nreg %d is locked!\n",r,live.fate[r].realreg);
2004     abort();
2005     }
2006    
2007     live.fat[rr].nholds--;
2008     if (live.fat[rr].nholds!=live.fate[r].realind) { /* Was not last */
2009     int topreg=live.fat[rr].holds[live.fat[rr].nholds];
2010     int thisind=live.fate[r].realind;
2011     live.fat[rr].holds[thisind]=topreg;
2012     live.fate[topreg].realind=thisind;
2013     }
2014     live.fate[r].status=INMEM;
2015     live.fate[r].realreg=-1;
2016     }
2017    
2018     static __inline__ void f_free_nreg(int r)
2019     {
2020     int i=live.fat[r].nholds;
2021    
2022     while (i) {
2023     int vr;
2024    
2025     --i;
2026     vr=live.fat[r].holds[i];
2027     f_evict(vr);
2028     }
2029     Dif (live.fat[r].nholds!=0) {
2030     write_log("Failed to free nreg %d, nholds is %d\n",r,live.fat[r].nholds);
2031     abort();
2032     }
2033     }
2034    
2035    
2036     /* Use with care! */
2037     static __inline__ void f_isclean(int r)
2038     {
2039     if (!f_isinreg(r))
2040     return;
2041     live.fate[r].status=CLEAN;
2042     }
2043    
2044     static __inline__ void f_disassociate(int r)
2045     {
2046     f_isclean(r);
2047     f_evict(r);
2048     }
2049    
2050    
2051    
2052     static int f_alloc_reg(int r, int willclobber)
2053     {
2054     int bestreg;
2055     uae_s32 when;
2056     int i;
2057     uae_s32 badness;
2058     bestreg=-1;
2059     when=2000000000;
2060     for (i=N_FREGS;i--;) {
2061     badness=live.fat[i].touched;
2062     if (live.fat[i].nholds==0)
2063     badness=0;
2064    
2065     if (!live.fat[i].locked && badness<when) {
2066     bestreg=i;
2067     when=badness;
2068     if (live.fat[i].nholds==0)
2069     break;
2070     }
2071     }
2072     Dif (bestreg==-1)
2073     abort();
2074    
2075     if (live.fat[bestreg].nholds>0) {
2076     f_free_nreg(bestreg);
2077     }
2078     if (f_isinreg(r)) {
2079     f_evict(r);
2080     }
2081    
2082     if (!willclobber) {
2083     if (live.fate[r].status!=UNDEF) {
2084     #if USE_LONG_DOUBLE
2085 gbeauche 1.24 raw_fmov_ext_rm(bestreg,(uintptr)live.fate[r].mem);
2086 gbeauche 1.1 #else
2087 gbeauche 1.24 raw_fmov_rm(bestreg,(uintptr)live.fate[r].mem);
2088 gbeauche 1.1 #endif
2089     }
2090     live.fate[r].status=CLEAN;
2091     }
2092     else {
2093     live.fate[r].status=DIRTY;
2094     }
2095     live.fate[r].realreg=bestreg;
2096     live.fate[r].realind=live.fat[bestreg].nholds;
2097     live.fat[bestreg].touched=touchcnt++;
2098     live.fat[bestreg].holds[live.fat[bestreg].nholds]=r;
2099     live.fat[bestreg].nholds++;
2100    
2101     return bestreg;
2102     }
2103    
2104     static void f_unlock(int r)
2105     {
2106     Dif (!live.fat[r].locked)
2107     abort();
2108     live.fat[r].locked--;
2109     }
2110    
2111     static void f_setlock(int r)
2112     {
2113     live.fat[r].locked++;
2114     }
2115    
2116     static __inline__ int f_readreg(int r)
2117     {
2118     int n;
2119     int answer=-1;
2120    
2121     if (f_isinreg(r)) {
2122     n=live.fate[r].realreg;
2123     answer=n;
2124     }
2125     /* either the value was in memory to start with, or it was evicted and
2126     is in memory now */
2127     if (answer<0)
2128     answer=f_alloc_reg(r,0);
2129    
2130     live.fat[answer].locked++;
2131     live.fat[answer].touched=touchcnt++;
2132     return answer;
2133     }
2134    
2135     static __inline__ void f_make_exclusive(int r, int clobber)
2136     {
2137     freg_status oldstate;
2138     int rr=live.fate[r].realreg;
2139     int nr;
2140     int nind;
2141     int ndirt=0;
2142     int i;
2143    
2144     if (!f_isinreg(r))
2145     return;
2146     if (live.fat[rr].nholds==1)
2147     return;
2148     for (i=0;i<live.fat[rr].nholds;i++) {
2149     int vr=live.fat[rr].holds[i];
2150     if (vr!=r && live.fate[vr].status==DIRTY)
2151     ndirt++;
2152     }
2153     if (!ndirt && !live.fat[rr].locked) {
2154     /* Everything else is clean, so let's keep this register */
2155     for (i=0;i<live.fat[rr].nholds;i++) {
2156     int vr=live.fat[rr].holds[i];
2157     if (vr!=r) {
2158     f_evict(vr);
2159     i--; /* Try that index again! */
2160     }
2161     }
2162     Dif (live.fat[rr].nholds!=1) {
2163     write_log("realreg %d holds %d (",rr,live.fat[rr].nholds);
2164     for (i=0;i<live.fat[rr].nholds;i++) {
2165     write_log(" %d(%d,%d)",live.fat[rr].holds[i],
2166     live.fate[live.fat[rr].holds[i]].realreg,
2167     live.fate[live.fat[rr].holds[i]].realind);
2168     }
2169     write_log("\n");
2170     abort();
2171     }
2172     return;
2173     }
2174    
2175     /* We have to split the register */
2176     oldstate=live.fate[r];
2177    
2178     f_setlock(rr); /* Make sure this doesn't go away */
2179     /* Forget about r being in the register rr */
2180     f_disassociate(r);
2181     /* Get a new register, that we will clobber completely */
2182     nr=f_alloc_reg(r,1);
2183     nind=live.fate[r].realind;
2184     if (!clobber)
2185     raw_fmov_rr(nr,rr); /* Make another copy */
2186     live.fate[r]=oldstate; /* Keep all the old state info */
2187     live.fate[r].realreg=nr;
2188     live.fate[r].realind=nind;
2189     f_unlock(rr);
2190     }
2191    
2192    
2193     static __inline__ int f_writereg(int r)
2194     {
2195     int n;
2196     int answer=-1;
2197    
2198     f_make_exclusive(r,1);
2199     if (f_isinreg(r)) {
2200     n=live.fate[r].realreg;
2201     answer=n;
2202     }
2203     if (answer<0) {
2204     answer=f_alloc_reg(r,1);
2205     }
2206     live.fate[r].status=DIRTY;
2207     live.fat[answer].locked++;
2208     live.fat[answer].touched=touchcnt++;
2209     return answer;
2210     }
2211    
2212     static int f_rmw(int r)
2213     {
2214     int n;
2215    
2216     f_make_exclusive(r,0);
2217     if (f_isinreg(r)) {
2218     n=live.fate[r].realreg;
2219     }
2220     else
2221     n=f_alloc_reg(r,0);
2222     live.fate[r].status=DIRTY;
2223     live.fat[n].locked++;
2224     live.fat[n].touched=touchcnt++;
2225     return n;
2226     }
2227    
2228     static void fflags_into_flags_internal(uae_u32 tmp)
2229     {
2230     int r;
2231    
2232     clobber_flags();
2233     r=f_readreg(FP_RESULT);
2234     if (FFLAG_NREG_CLOBBER_CONDITION) {
2235     int tmp2=tmp;
2236     tmp=writereg_specific(tmp,4,FFLAG_NREG);
2237     raw_fflags_into_flags(r);
2238     unlock2(tmp);
2239     forget_about(tmp2);
2240     }
2241     else
2242     raw_fflags_into_flags(r);
2243     f_unlock(r);
2244 gbeauche 1.19 live_flags();
2245 gbeauche 1.1 }
2246    
2247    
2248    
2249    
2250     /********************************************************************
2251     * CPU functions exposed to gencomp. Both CREATE and EMIT time *
2252     ********************************************************************/
2253    
2254     /*
2255     * RULES FOR HANDLING REGISTERS:
2256     *
2257     * * In the function headers, order the parameters
2258     * - 1st registers written to
2259     * - 2nd read/modify/write registers
2260     * - 3rd registers read from
2261     * * Before calling raw_*, you must call readreg, writereg or rmw for
2262     * each register
2263     * * The order for this is
2264     * - 1st call remove_offset for all registers written to with size<4
2265     * - 2nd call readreg for all registers read without offset
2266     * - 3rd call rmw for all rmw registers
2267     * - 4th call readreg_offset for all registers that can handle offsets
2268     * - 5th call get_offset for all the registers from the previous step
2269     * - 6th call writereg for all written-to registers
2270     * - 7th call raw_*
2271     * - 8th unlock2 all registers that were locked
2272     */
2273    
2274     MIDFUNC(0,live_flags,(void))
2275     {
2276     live.flags_on_stack=TRASH;
2277     live.flags_in_flags=VALID;
2278     live.flags_are_important=1;
2279     }
2280     MENDFUNC(0,live_flags,(void))
2281    
2282     MIDFUNC(0,dont_care_flags,(void))
2283     {
2284     live.flags_are_important=0;
2285     }
2286     MENDFUNC(0,dont_care_flags,(void))
2287    
2288    
2289     MIDFUNC(0,duplicate_carry,(void))
2290     {
2291     evict(FLAGX);
2292     make_flags_live_internal();
2293 gbeauche 1.24 COMPCALL(setcc_m)((uintptr)live.state[FLAGX].mem,2);
2294 gbeauche 1.1 log_vwrite(FLAGX);
2295     }
2296     MENDFUNC(0,duplicate_carry,(void))
2297    
2298     MIDFUNC(0,restore_carry,(void))
2299     {
2300     if (!have_rat_stall) { /* Not a P6 core, i.e. no partial stalls */
2301     bt_l_ri_noclobber(FLAGX,0);
2302     }
2303     else { /* Avoid the stall the above creates.
2304     This is slow on non-P6, though.
2305     */
2306     COMPCALL(rol_b_ri(FLAGX,8));
2307     isclean(FLAGX);
2308     }
2309     }
2310     MENDFUNC(0,restore_carry,(void))
2311    
2312     MIDFUNC(0,start_needflags,(void))
2313     {
2314     needflags=1;
2315     }
2316     MENDFUNC(0,start_needflags,(void))
2317    
2318     MIDFUNC(0,end_needflags,(void))
2319     {
2320     needflags=0;
2321     }
2322     MENDFUNC(0,end_needflags,(void))
2323    
2324     MIDFUNC(0,make_flags_live,(void))
2325     {
2326     make_flags_live_internal();
2327     }
2328     MENDFUNC(0,make_flags_live,(void))
2329    
2330     MIDFUNC(1,fflags_into_flags,(W2 tmp))
2331     {
2332     clobber_flags();
2333     fflags_into_flags_internal(tmp);
2334     }
2335     MENDFUNC(1,fflags_into_flags,(W2 tmp))
2336    
2337    
2338     MIDFUNC(2,bt_l_ri,(R4 r, IMM i)) /* This is defined as only affecting C */
2339     {
2340     int size=4;
2341     if (i<16)
2342     size=2;
2343     CLOBBER_BT;
2344     r=readreg(r,size);
2345     raw_bt_l_ri(r,i);
2346     unlock2(r);
2347     }
2348     MENDFUNC(2,bt_l_ri,(R4 r, IMM i)) /* This is defined as only affecting C */
2349    
2350     MIDFUNC(2,bt_l_rr,(R4 r, R4 b)) /* This is defined as only affecting C */
2351     {
2352     CLOBBER_BT;
2353     r=readreg(r,4);
2354     b=readreg(b,4);
2355     raw_bt_l_rr(r,b);
2356     unlock2(r);
2357     unlock2(b);
2358     }
2359     MENDFUNC(2,bt_l_rr,(R4 r, R4 b)) /* This is defined as only affecting C */
2360    
2361     MIDFUNC(2,btc_l_ri,(RW4 r, IMM i))
2362     {
2363     int size=4;
2364     if (i<16)
2365     size=2;
2366     CLOBBER_BT;
2367     r=rmw(r,size,size);
2368     raw_btc_l_ri(r,i);
2369     unlock2(r);
2370     }
2371     MENDFUNC(2,btc_l_ri,(RW4 r, IMM i))
2372    
2373     MIDFUNC(2,btc_l_rr,(RW4 r, R4 b))
2374     {
2375     CLOBBER_BT;
2376     b=readreg(b,4);
2377     r=rmw(r,4,4);
2378     raw_btc_l_rr(r,b);
2379     unlock2(r);
2380     unlock2(b);
2381     }
2382     MENDFUNC(2,btc_l_rr,(RW4 r, R4 b))
2383    
2384    
2385     MIDFUNC(2,btr_l_ri,(RW4 r, IMM i))
2386     {
2387     int size=4;
2388     if (i<16)
2389     size=2;
2390     CLOBBER_BT;
2391     r=rmw(r,size,size);
2392     raw_btr_l_ri(r,i);
2393     unlock2(r);
2394     }
2395     MENDFUNC(2,btr_l_ri,(RW4 r, IMM i))
2396    
2397     MIDFUNC(2,btr_l_rr,(RW4 r, R4 b))
2398     {
2399     CLOBBER_BT;
2400     b=readreg(b,4);
2401     r=rmw(r,4,4);
2402     raw_btr_l_rr(r,b);
2403     unlock2(r);
2404     unlock2(b);
2405     }
2406     MENDFUNC(2,btr_l_rr,(RW4 r, R4 b))
2407    
2408    
2409     MIDFUNC(2,bts_l_ri,(RW4 r, IMM i))
2410     {
2411     int size=4;
2412     if (i<16)
2413     size=2;
2414     CLOBBER_BT;
2415     r=rmw(r,size,size);
2416     raw_bts_l_ri(r,i);
2417     unlock2(r);
2418     }
2419     MENDFUNC(2,bts_l_ri,(RW4 r, IMM i))
2420    
2421     MIDFUNC(2,bts_l_rr,(RW4 r, R4 b))
2422     {
2423     CLOBBER_BT;
2424     b=readreg(b,4);
2425     r=rmw(r,4,4);
2426     raw_bts_l_rr(r,b);
2427     unlock2(r);
2428     unlock2(b);
2429     }
2430     MENDFUNC(2,bts_l_rr,(RW4 r, R4 b))
2431    
2432     MIDFUNC(2,mov_l_rm,(W4 d, IMM s))
2433     {
2434     CLOBBER_MOV;
2435     d=writereg(d,4);
2436     raw_mov_l_rm(d,s);
2437     unlock2(d);
2438     }
2439     MENDFUNC(2,mov_l_rm,(W4 d, IMM s))
2440    
2441    
2442     MIDFUNC(1,call_r,(R4 r)) /* Clobbering is implicit */
2443     {
2444     r=readreg(r,4);
2445     raw_call_r(r);
2446     unlock2(r);
2447     }
2448     MENDFUNC(1,call_r,(R4 r)) /* Clobbering is implicit */
2449    
2450     MIDFUNC(2,sub_l_mi,(IMM d, IMM s))
2451     {
2452     CLOBBER_SUB;
2453     raw_sub_l_mi(d,s) ;
2454     }
2455     MENDFUNC(2,sub_l_mi,(IMM d, IMM s))
2456    
2457     MIDFUNC(2,mov_l_mi,(IMM d, IMM s))
2458     {
2459     CLOBBER_MOV;
2460     raw_mov_l_mi(d,s) ;
2461     }
2462     MENDFUNC(2,mov_l_mi,(IMM d, IMM s))
2463    
2464     MIDFUNC(2,mov_w_mi,(IMM d, IMM s))
2465     {
2466     CLOBBER_MOV;
2467     raw_mov_w_mi(d,s) ;
2468     }
2469     MENDFUNC(2,mov_w_mi,(IMM d, IMM s))
2470    
2471     MIDFUNC(2,mov_b_mi,(IMM d, IMM s))
2472     {
2473     CLOBBER_MOV;
2474     raw_mov_b_mi(d,s) ;
2475     }
2476     MENDFUNC(2,mov_b_mi,(IMM d, IMM s))
2477    
2478     MIDFUNC(2,rol_b_ri,(RW1 r, IMM i))
2479     {
2480     if (!i && !needflags)
2481     return;
2482     CLOBBER_ROL;
2483     r=rmw(r,1,1);
2484     raw_rol_b_ri(r,i);
2485     unlock2(r);
2486     }
2487     MENDFUNC(2,rol_b_ri,(RW1 r, IMM i))
2488    
2489     MIDFUNC(2,rol_w_ri,(RW2 r, IMM i))
2490     {
2491     if (!i && !needflags)
2492     return;
2493     CLOBBER_ROL;
2494     r=rmw(r,2,2);
2495     raw_rol_w_ri(r,i);
2496     unlock2(r);
2497     }
2498     MENDFUNC(2,rol_w_ri,(RW2 r, IMM i))
2499    
2500     MIDFUNC(2,rol_l_ri,(RW4 r, IMM i))
2501     {
2502     if (!i && !needflags)
2503     return;
2504     CLOBBER_ROL;
2505     r=rmw(r,4,4);
2506     raw_rol_l_ri(r,i);
2507     unlock2(r);
2508     }
2509     MENDFUNC(2,rol_l_ri,(RW4 r, IMM i))
2510    
2511     MIDFUNC(2,rol_l_rr,(RW4 d, R1 r))
2512     {
2513     if (isconst(r)) {
2514     COMPCALL(rol_l_ri)(d,(uae_u8)live.state[r].val);
2515     return;
2516     }
2517     CLOBBER_ROL;
2518     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2519     d=rmw(d,4,4);
2520     Dif (r!=1) {
2521     write_log("Illegal register %d in raw_rol_b\n",r);
2522     abort();
2523     }
2524     raw_rol_l_rr(d,r) ;
2525     unlock2(r);
2526     unlock2(d);
2527     }
2528     MENDFUNC(2,rol_l_rr,(RW4 d, R1 r))
2529    
2530     MIDFUNC(2,rol_w_rr,(RW2 d, R1 r))
2531     { /* Can only do this with r==1, i.e. cl */
2532    
2533     if (isconst(r)) {
2534     COMPCALL(rol_w_ri)(d,(uae_u8)live.state[r].val);
2535     return;
2536     }
2537     CLOBBER_ROL;
2538     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2539     d=rmw(d,2,2);
2540     Dif (r!=1) {
2541     write_log("Illegal register %d in raw_rol_b\n",r);
2542     abort();
2543     }
2544     raw_rol_w_rr(d,r) ;
2545     unlock2(r);
2546     unlock2(d);
2547     }
2548     MENDFUNC(2,rol_w_rr,(RW2 d, R1 r))
2549    
2550     MIDFUNC(2,rol_b_rr,(RW1 d, R1 r))
2551     { /* Can only do this with r==1, i.e. cl */
2552    
2553     if (isconst(r)) {
2554     COMPCALL(rol_b_ri)(d,(uae_u8)live.state[r].val);
2555     return;
2556     }
2557    
2558     CLOBBER_ROL;
2559     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2560     d=rmw(d,1,1);
2561     Dif (r!=1) {
2562     write_log("Illegal register %d in raw_rol_b\n",r);
2563     abort();
2564     }
2565     raw_rol_b_rr(d,r) ;
2566     unlock2(r);
2567     unlock2(d);
2568     }
2569     MENDFUNC(2,rol_b_rr,(RW1 d, R1 r))
2570    
2571    
2572     MIDFUNC(2,shll_l_rr,(RW4 d, R1 r))
2573     {
2574     if (isconst(r)) {
2575     COMPCALL(shll_l_ri)(d,(uae_u8)live.state[r].val);
2576     return;
2577     }
2578     CLOBBER_SHLL;
2579     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2580     d=rmw(d,4,4);
2581     Dif (r!=1) {
2582     write_log("Illegal register %d in raw_rol_b\n",r);
2583     abort();
2584     }
2585     raw_shll_l_rr(d,r) ;
2586     unlock2(r);
2587     unlock2(d);
2588     }
2589     MENDFUNC(2,shll_l_rr,(RW4 d, R1 r))
2590    
2591     MIDFUNC(2,shll_w_rr,(RW2 d, R1 r))
2592     { /* Can only do this with r==1, i.e. cl */
2593    
2594     if (isconst(r)) {
2595     COMPCALL(shll_w_ri)(d,(uae_u8)live.state[r].val);
2596     return;
2597     }
2598     CLOBBER_SHLL;
2599     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2600     d=rmw(d,2,2);
2601     Dif (r!=1) {
2602     write_log("Illegal register %d in raw_shll_b\n",r);
2603     abort();
2604     }
2605     raw_shll_w_rr(d,r) ;
2606     unlock2(r);
2607     unlock2(d);
2608     }
2609     MENDFUNC(2,shll_w_rr,(RW2 d, R1 r))
2610    
2611     MIDFUNC(2,shll_b_rr,(RW1 d, R1 r))
2612     { /* Can only do this with r==1, i.e. cl */
2613    
2614     if (isconst(r)) {
2615     COMPCALL(shll_b_ri)(d,(uae_u8)live.state[r].val);
2616     return;
2617     }
2618    
2619     CLOBBER_SHLL;
2620     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2621     d=rmw(d,1,1);
2622     Dif (r!=1) {
2623     write_log("Illegal register %d in raw_shll_b\n",r);
2624     abort();
2625     }
2626     raw_shll_b_rr(d,r) ;
2627     unlock2(r);
2628     unlock2(d);
2629     }
2630     MENDFUNC(2,shll_b_rr,(RW1 d, R1 r))
2631    
2632    
2633     MIDFUNC(2,ror_b_ri,(R1 r, IMM i))
2634     {
2635     if (!i && !needflags)
2636     return;
2637     CLOBBER_ROR;
2638     r=rmw(r,1,1);
2639     raw_ror_b_ri(r,i);
2640     unlock2(r);
2641     }
2642     MENDFUNC(2,ror_b_ri,(R1 r, IMM i))
2643    
2644     MIDFUNC(2,ror_w_ri,(R2 r, IMM i))
2645     {
2646     if (!i && !needflags)
2647     return;
2648     CLOBBER_ROR;
2649     r=rmw(r,2,2);
2650     raw_ror_w_ri(r,i);
2651     unlock2(r);
2652     }
2653     MENDFUNC(2,ror_w_ri,(R2 r, IMM i))
2654    
2655     MIDFUNC(2,ror_l_ri,(R4 r, IMM i))
2656     {
2657     if (!i && !needflags)
2658     return;
2659     CLOBBER_ROR;
2660     r=rmw(r,4,4);
2661     raw_ror_l_ri(r,i);
2662     unlock2(r);
2663     }
2664     MENDFUNC(2,ror_l_ri,(R4 r, IMM i))
2665    
2666     MIDFUNC(2,ror_l_rr,(R4 d, R1 r))
2667     {
2668     if (isconst(r)) {
2669     COMPCALL(ror_l_ri)(d,(uae_u8)live.state[r].val);
2670     return;
2671     }
2672     CLOBBER_ROR;
2673     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2674     d=rmw(d,4,4);
2675     raw_ror_l_rr(d,r) ;
2676     unlock2(r);
2677     unlock2(d);
2678     }
2679     MENDFUNC(2,ror_l_rr,(R4 d, R1 r))
2680    
2681     MIDFUNC(2,ror_w_rr,(R2 d, R1 r))
2682     {
2683     if (isconst(r)) {
2684     COMPCALL(ror_w_ri)(d,(uae_u8)live.state[r].val);
2685     return;
2686     }
2687     CLOBBER_ROR;
2688     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2689     d=rmw(d,2,2);
2690     raw_ror_w_rr(d,r) ;
2691     unlock2(r);
2692     unlock2(d);
2693     }
2694     MENDFUNC(2,ror_w_rr,(R2 d, R1 r))
2695    
2696     MIDFUNC(2,ror_b_rr,(R1 d, R1 r))
2697     {
2698     if (isconst(r)) {
2699     COMPCALL(ror_b_ri)(d,(uae_u8)live.state[r].val);
2700     return;
2701     }
2702    
2703     CLOBBER_ROR;
2704     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2705     d=rmw(d,1,1);
2706     raw_ror_b_rr(d,r) ;
2707     unlock2(r);
2708     unlock2(d);
2709     }
2710     MENDFUNC(2,ror_b_rr,(R1 d, R1 r))
2711    
2712     MIDFUNC(2,shrl_l_rr,(RW4 d, R1 r))
2713     {
2714     if (isconst(r)) {
2715     COMPCALL(shrl_l_ri)(d,(uae_u8)live.state[r].val);
2716     return;
2717     }
2718     CLOBBER_SHRL;
2719     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2720     d=rmw(d,4,4);
2721     Dif (r!=1) {
2722     write_log("Illegal register %d in raw_rol_b\n",r);
2723     abort();
2724     }
2725     raw_shrl_l_rr(d,r) ;
2726     unlock2(r);
2727     unlock2(d);
2728     }
2729     MENDFUNC(2,shrl_l_rr,(RW4 d, R1 r))
2730    
2731     MIDFUNC(2,shrl_w_rr,(RW2 d, R1 r))
2732     { /* Can only do this with r==1, i.e. cl */
2733    
2734     if (isconst(r)) {
2735     COMPCALL(shrl_w_ri)(d,(uae_u8)live.state[r].val);
2736     return;
2737     }
2738     CLOBBER_SHRL;
2739     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2740     d=rmw(d,2,2);
2741     Dif (r!=1) {
2742     write_log("Illegal register %d in raw_shrl_b\n",r);
2743     abort();
2744     }
2745     raw_shrl_w_rr(d,r) ;
2746     unlock2(r);
2747     unlock2(d);
2748     }
2749     MENDFUNC(2,shrl_w_rr,(RW2 d, R1 r))
2750    
2751     MIDFUNC(2,shrl_b_rr,(RW1 d, R1 r))
2752     { /* Can only do this with r==1, i.e. cl */
2753    
2754     if (isconst(r)) {
2755     COMPCALL(shrl_b_ri)(d,(uae_u8)live.state[r].val);
2756     return;
2757     }
2758    
2759     CLOBBER_SHRL;
2760     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2761     d=rmw(d,1,1);
2762     Dif (r!=1) {
2763     write_log("Illegal register %d in raw_shrl_b\n",r);
2764     abort();
2765     }
2766     raw_shrl_b_rr(d,r) ;
2767     unlock2(r);
2768     unlock2(d);
2769     }
2770     MENDFUNC(2,shrl_b_rr,(RW1 d, R1 r))
2771    
2772    
2773    
2774     MIDFUNC(2,shll_l_ri,(RW4 r, IMM i))
2775     {
2776     if (!i && !needflags)
2777     return;
2778     if (isconst(r) && !needflags) {
2779     live.state[r].val<<=i;
2780     return;
2781     }
2782     CLOBBER_SHLL;
2783     r=rmw(r,4,4);
2784     raw_shll_l_ri(r,i);
2785     unlock2(r);
2786     }
2787     MENDFUNC(2,shll_l_ri,(RW4 r, IMM i))
2788    
2789     MIDFUNC(2,shll_w_ri,(RW2 r, IMM i))
2790     {
2791     if (!i && !needflags)
2792     return;
2793     CLOBBER_SHLL;
2794     r=rmw(r,2,2);
2795     raw_shll_w_ri(r,i);
2796     unlock2(r);
2797     }
2798     MENDFUNC(2,shll_w_ri,(RW2 r, IMM i))
2799    
2800     MIDFUNC(2,shll_b_ri,(RW1 r, IMM i))
2801     {
2802     if (!i && !needflags)
2803     return;
2804     CLOBBER_SHLL;
2805     r=rmw(r,1,1);
2806     raw_shll_b_ri(r,i);
2807     unlock2(r);
2808     }
2809     MENDFUNC(2,shll_b_ri,(RW1 r, IMM i))
2810    
2811     MIDFUNC(2,shrl_l_ri,(RW4 r, IMM i))
2812     {
2813     if (!i && !needflags)
2814     return;
2815     if (isconst(r) && !needflags) {
2816     live.state[r].val>>=i;
2817     return;
2818     }
2819     CLOBBER_SHRL;
2820     r=rmw(r,4,4);
2821     raw_shrl_l_ri(r,i);
2822     unlock2(r);
2823     }
2824     MENDFUNC(2,shrl_l_ri,(RW4 r, IMM i))
2825    
2826     MIDFUNC(2,shrl_w_ri,(RW2 r, IMM i))
2827     {
2828     if (!i && !needflags)
2829     return;
2830     CLOBBER_SHRL;
2831     r=rmw(r,2,2);
2832     raw_shrl_w_ri(r,i);
2833     unlock2(r);
2834     }
2835     MENDFUNC(2,shrl_w_ri,(RW2 r, IMM i))
2836    
2837     MIDFUNC(2,shrl_b_ri,(RW1 r, IMM i))
2838     {
2839     if (!i && !needflags)
2840     return;
2841     CLOBBER_SHRL;
2842     r=rmw(r,1,1);
2843     raw_shrl_b_ri(r,i);
2844     unlock2(r);
2845     }
2846     MENDFUNC(2,shrl_b_ri,(RW1 r, IMM i))
2847    
2848     MIDFUNC(2,shra_l_ri,(RW4 r, IMM i))
2849     {
2850     if (!i && !needflags)
2851     return;
2852     CLOBBER_SHRA;
2853     r=rmw(r,4,4);
2854     raw_shra_l_ri(r,i);
2855     unlock2(r);
2856     }
2857     MENDFUNC(2,shra_l_ri,(RW4 r, IMM i))
2858    
2859     MIDFUNC(2,shra_w_ri,(RW2 r, IMM i))
2860     {
2861     if (!i && !needflags)
2862     return;
2863     CLOBBER_SHRA;
2864     r=rmw(r,2,2);
2865     raw_shra_w_ri(r,i);
2866     unlock2(r);
2867     }
2868     MENDFUNC(2,shra_w_ri,(RW2 r, IMM i))
2869    
2870     MIDFUNC(2,shra_b_ri,(RW1 r, IMM i))
2871     {
2872     if (!i && !needflags)
2873     return;
2874     CLOBBER_SHRA;
2875     r=rmw(r,1,1);
2876     raw_shra_b_ri(r,i);
2877     unlock2(r);
2878     }
2879     MENDFUNC(2,shra_b_ri,(RW1 r, IMM i))
2880    
2881     MIDFUNC(2,shra_l_rr,(RW4 d, R1 r))
2882     {
2883     if (isconst(r)) {
2884     COMPCALL(shra_l_ri)(d,(uae_u8)live.state[r].val);
2885     return;
2886     }
2887     CLOBBER_SHRA;
2888     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2889     d=rmw(d,4,4);
2890     Dif (r!=1) {
2891     write_log("Illegal register %d in raw_rol_b\n",r);
2892     abort();
2893     }
2894     raw_shra_l_rr(d,r) ;
2895     unlock2(r);
2896     unlock2(d);
2897     }
2898     MENDFUNC(2,shra_l_rr,(RW4 d, R1 r))
2899    
2900     MIDFUNC(2,shra_w_rr,(RW2 d, R1 r))
2901     { /* Can only do this with r==1, i.e. cl */
2902    
2903     if (isconst(r)) {
2904     COMPCALL(shra_w_ri)(d,(uae_u8)live.state[r].val);
2905     return;
2906     }
2907     CLOBBER_SHRA;
2908     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2909     d=rmw(d,2,2);
2910     Dif (r!=1) {
2911     write_log("Illegal register %d in raw_shra_b\n",r);
2912     abort();
2913     }
2914     raw_shra_w_rr(d,r) ;
2915     unlock2(r);
2916     unlock2(d);
2917     }
2918     MENDFUNC(2,shra_w_rr,(RW2 d, R1 r))
2919    
2920     MIDFUNC(2,shra_b_rr,(RW1 d, R1 r))
2921     { /* Can only do this with r==1, i.e. cl */
2922    
2923     if (isconst(r)) {
2924     COMPCALL(shra_b_ri)(d,(uae_u8)live.state[r].val);
2925     return;
2926     }
2927    
2928     CLOBBER_SHRA;
2929     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2930     d=rmw(d,1,1);
2931     Dif (r!=1) {
2932     write_log("Illegal register %d in raw_shra_b\n",r);
2933     abort();
2934     }
2935     raw_shra_b_rr(d,r) ;
2936     unlock2(r);
2937     unlock2(d);
2938     }
2939     MENDFUNC(2,shra_b_rr,(RW1 d, R1 r))
2940    
2941    
2942     MIDFUNC(2,setcc,(W1 d, IMM cc))
2943     {
2944     CLOBBER_SETCC;
2945     d=writereg(d,1);
2946     raw_setcc(d,cc);
2947     unlock2(d);
2948     }
2949     MENDFUNC(2,setcc,(W1 d, IMM cc))
2950    
2951     MIDFUNC(2,setcc_m,(IMM d, IMM cc))
2952     {
2953     CLOBBER_SETCC;
2954     raw_setcc_m(d,cc);
2955     }
2956     MENDFUNC(2,setcc_m,(IMM d, IMM cc))
2957    
2958 gbeauche 1.42 MIDFUNC(3,cmov_b_rr,(RW1 d, R1 s, IMM cc))
2959     {
2960     if (d==s)
2961     return;
2962     CLOBBER_CMOV;
2963     s=readreg(s,1);
2964     d=rmw(d,1,1);
2965     raw_cmov_b_rr(d,s,cc);
2966     unlock2(s);
2967     unlock2(d);
2968     }
2969     MENDFUNC(3,cmov_b_rr,(RW1 d, R1 s, IMM cc))
2970    
2971     MIDFUNC(3,cmov_w_rr,(RW2 d, R2 s, IMM cc))
2972     {
2973     if (d==s)
2974     return;
2975     CLOBBER_CMOV;
2976     s=readreg(s,2);
2977     d=rmw(d,2,2);
2978     raw_cmov_w_rr(d,s,cc);
2979     unlock2(s);
2980     unlock2(d);
2981     }
2982     MENDFUNC(3,cmov_w_rr,(RW2 d, R2 s, IMM cc))
2983    
2984 gbeauche 1.1 MIDFUNC(3,cmov_l_rr,(RW4 d, R4 s, IMM cc))
2985     {
2986     if (d==s)
2987     return;
2988     CLOBBER_CMOV;
2989     s=readreg(s,4);
2990     d=rmw(d,4,4);
2991     raw_cmov_l_rr(d,s,cc);
2992     unlock2(s);
2993     unlock2(d);
2994     }
2995     MENDFUNC(3,cmov_l_rr,(RW4 d, R4 s, IMM cc))
2996    
2997     MIDFUNC(3,cmov_l_rm,(RW4 d, IMM s, IMM cc))
2998     {
2999     CLOBBER_CMOV;
3000     d=rmw(d,4,4);
3001     raw_cmov_l_rm(d,s,cc);
3002     unlock2(d);
3003     }
3004     MENDFUNC(3,cmov_l_rm,(RW4 d, IMM s, IMM cc))
3005    
3006 gbeauche 1.26 MIDFUNC(2,bsf_l_rr,(W4 d, W4 s))
3007     {
3008     CLOBBER_BSF;
3009     s = readreg(s, 4);
3010     d = writereg(d, 4);
3011     raw_bsf_l_rr(d, s);
3012     unlock2(s);
3013     unlock2(d);
3014     }
3015     MENDFUNC(2,bsf_l_rr,(W4 d, W4 s))
3016    
3017     /* Set the Z flag depending on the value in s. Note that the
3018     value has to be 0 or -1 (or, more precisely, for non-zero
3019     values, bit 14 must be set)! */
3020     MIDFUNC(2,simulate_bsf,(W4 tmp, RW4 s))
3021 gbeauche 1.1 {
3022 gbeauche 1.26 CLOBBER_BSF;
3023     s=rmw_specific(s,4,4,FLAG_NREG3);
3024     tmp=writereg(tmp,4);
3025     raw_flags_set_zero(s, tmp);
3026     unlock2(tmp);
3027     unlock2(s);
3028 gbeauche 1.1 }
3029 gbeauche 1.26 MENDFUNC(2,simulate_bsf,(W4 tmp, RW4 s))
3030 gbeauche 1.1
3031     MIDFUNC(2,imul_32_32,(RW4 d, R4 s))
3032     {
3033     CLOBBER_MUL;
3034     s=readreg(s,4);
3035     d=rmw(d,4,4);
3036     raw_imul_32_32(d,s);
3037     unlock2(s);
3038     unlock2(d);
3039     }
3040     MENDFUNC(2,imul_32_32,(RW4 d, R4 s))
3041    
3042     MIDFUNC(2,imul_64_32,(RW4 d, RW4 s))
3043     {
3044     CLOBBER_MUL;
3045     s=rmw_specific(s,4,4,MUL_NREG2);
3046     d=rmw_specific(d,4,4,MUL_NREG1);
3047     raw_imul_64_32(d,s);
3048     unlock2(s);
3049     unlock2(d);
3050     }
3051     MENDFUNC(2,imul_64_32,(RW4 d, RW4 s))
3052    
3053     MIDFUNC(2,mul_64_32,(RW4 d, RW4 s))
3054     {
3055     CLOBBER_MUL;
3056     s=rmw_specific(s,4,4,MUL_NREG2);
3057     d=rmw_specific(d,4,4,MUL_NREG1);
3058     raw_mul_64_32(d,s);
3059     unlock2(s);
3060     unlock2(d);
3061     }
3062     MENDFUNC(2,mul_64_32,(RW4 d, RW4 s))
3063    
3064     MIDFUNC(2,mul_32_32,(RW4 d, R4 s))
3065     {
3066     CLOBBER_MUL;
3067     s=readreg(s,4);
3068     d=rmw(d,4,4);
3069     raw_mul_32_32(d,s);
3070     unlock2(s);
3071     unlock2(d);
3072     }
3073     MENDFUNC(2,mul_32_32,(RW4 d, R4 s))
3074    
3075 gbeauche 1.24 #if SIZEOF_VOID_P == 8
3076     MIDFUNC(2,sign_extend_32_rr,(W4 d, R2 s))
3077     {
3078     int isrmw;
3079    
3080     if (isconst(s)) {
3081     set_const(d,(uae_s32)live.state[s].val);
3082     return;
3083     }
3084    
3085     CLOBBER_SE32;
3086     isrmw=(s==d);
3087     if (!isrmw) {
3088     s=readreg(s,4);
3089     d=writereg(d,4);
3090     }
3091     else { /* If we try to lock this twice, with different sizes, we
3092     are int trouble! */
3093     s=d=rmw(s,4,4);
3094     }
3095     raw_sign_extend_32_rr(d,s);
3096     if (!isrmw) {
3097     unlock2(d);
3098     unlock2(s);
3099     }
3100     else {
3101     unlock2(s);
3102     }
3103     }
3104     MENDFUNC(2,sign_extend_32_rr,(W4 d, R2 s))
3105     #endif
3106    
3107 gbeauche 1.1 MIDFUNC(2,sign_extend_16_rr,(W4 d, R2 s))
3108     {
3109     int isrmw;
3110    
3111     if (isconst(s)) {
3112     set_const(d,(uae_s32)(uae_s16)live.state[s].val);
3113     return;
3114     }
3115    
3116     CLOBBER_SE16;
3117     isrmw=(s==d);
3118     if (!isrmw) {
3119     s=readreg(s,2);
3120     d=writereg(d,4);
3121     }
3122     else { /* If we try to lock this twice, with different sizes, we
3123     are int trouble! */
3124     s=d=rmw(s,4,2);
3125     }
3126     raw_sign_extend_16_rr(d,s);
3127     if (!isrmw) {
3128     unlock2(d);
3129     unlock2(s);
3130     }
3131     else {
3132     unlock2(s);
3133     }
3134     }
3135     MENDFUNC(2,sign_extend_16_rr,(W4 d, R2 s))
3136    
3137     MIDFUNC(2,sign_extend_8_rr,(W4 d, R1 s))
3138     {
3139     int isrmw;
3140    
3141     if (isconst(s)) {
3142     set_const(d,(uae_s32)(uae_s8)live.state[s].val);
3143     return;
3144     }
3145    
3146     isrmw=(s==d);
3147     CLOBBER_SE8;
3148     if (!isrmw) {
3149     s=readreg(s,1);
3150     d=writereg(d,4);
3151     }
3152     else { /* If we try to lock this twice, with different sizes, we
3153     are int trouble! */
3154     s=d=rmw(s,4,1);
3155     }
3156    
3157     raw_sign_extend_8_rr(d,s);
3158    
3159     if (!isrmw) {
3160     unlock2(d);
3161     unlock2(s);
3162     }
3163     else {
3164     unlock2(s);
3165     }
3166     }
3167     MENDFUNC(2,sign_extend_8_rr,(W4 d, R1 s))
3168    
3169    
3170     MIDFUNC(2,zero_extend_16_rr,(W4 d, R2 s))
3171     {
3172     int isrmw;
3173    
3174     if (isconst(s)) {
3175     set_const(d,(uae_u32)(uae_u16)live.state[s].val);
3176     return;
3177     }
3178    
3179     isrmw=(s==d);
3180     CLOBBER_ZE16;
3181     if (!isrmw) {
3182     s=readreg(s,2);
3183     d=writereg(d,4);
3184     }
3185     else { /* If we try to lock this twice, with different sizes, we
3186     are int trouble! */
3187     s=d=rmw(s,4,2);
3188     }
3189     raw_zero_extend_16_rr(d,s);
3190     if (!isrmw) {
3191     unlock2(d);
3192     unlock2(s);
3193     }
3194     else {
3195     unlock2(s);
3196     }
3197     }
3198     MENDFUNC(2,zero_extend_16_rr,(W4 d, R2 s))
3199    
3200     MIDFUNC(2,zero_extend_8_rr,(W4 d, R1 s))
3201     {
3202     int isrmw;
3203     if (isconst(s)) {
3204     set_const(d,(uae_u32)(uae_u8)live.state[s].val);
3205     return;
3206     }
3207    
3208     isrmw=(s==d);
3209     CLOBBER_ZE8;
3210     if (!isrmw) {
3211     s=readreg(s,1);
3212     d=writereg(d,4);
3213     }
3214     else { /* If we try to lock this twice, with different sizes, we
3215     are int trouble! */
3216     s=d=rmw(s,4,1);
3217     }
3218    
3219     raw_zero_extend_8_rr(d,s);
3220    
3221     if (!isrmw) {
3222     unlock2(d);
3223     unlock2(s);
3224     }
3225     else {
3226     unlock2(s);
3227     }
3228     }
3229     MENDFUNC(2,zero_extend_8_rr,(W4 d, R1 s))
3230    
3231     MIDFUNC(2,mov_b_rr,(W1 d, R1 s))
3232     {
3233     if (d==s)
3234     return;
3235     if (isconst(s)) {
3236     COMPCALL(mov_b_ri)(d,(uae_u8)live.state[s].val);
3237     return;
3238     }
3239    
3240     CLOBBER_MOV;
3241     s=readreg(s,1);
3242     d=writereg(d,1);
3243     raw_mov_b_rr(d,s);
3244     unlock2(d);
3245     unlock2(s);
3246     }
3247     MENDFUNC(2,mov_b_rr,(W1 d, R1 s))
3248    
3249     MIDFUNC(2,mov_w_rr,(W2 d, R2 s))
3250     {
3251     if (d==s)
3252     return;
3253     if (isconst(s)) {
3254     COMPCALL(mov_w_ri)(d,(uae_u16)live.state[s].val);
3255     return;
3256     }
3257    
3258     CLOBBER_MOV;
3259     s=readreg(s,2);
3260     d=writereg(d,2);
3261     raw_mov_w_rr(d,s);
3262     unlock2(d);
3263     unlock2(s);
3264     }
3265     MENDFUNC(2,mov_w_rr,(W2 d, R2 s))
3266    
3267    
3268     MIDFUNC(4,mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
3269     {
3270     CLOBBER_MOV;
3271     baser=readreg(baser,4);
3272     index=readreg(index,4);
3273     d=writereg(d,4);
3274    
3275     raw_mov_l_rrm_indexed(d,baser,index,factor);
3276     unlock2(d);
3277     unlock2(baser);
3278     unlock2(index);
3279     }
3280     MENDFUNC(4,mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
3281    
3282     MIDFUNC(4,mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
3283     {
3284     CLOBBER_MOV;
3285     baser=readreg(baser,4);
3286     index=readreg(index,4);
3287     d=writereg(d,2);
3288    
3289     raw_mov_w_rrm_indexed(d,baser,index,factor);
3290     unlock2(d);
3291     unlock2(baser);
3292     unlock2(index);
3293     }
3294     MENDFUNC(4,mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
3295    
3296     MIDFUNC(4,mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
3297     {
3298     CLOBBER_MOV;
3299     baser=readreg(baser,4);
3300     index=readreg(index,4);
3301     d=writereg(d,1);
3302    
3303     raw_mov_b_rrm_indexed(d,baser,index,factor);
3304    
3305     unlock2(d);
3306     unlock2(baser);
3307     unlock2(index);
3308     }
3309     MENDFUNC(4,mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
3310    
3311    
3312     MIDFUNC(4,mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
3313     {
3314     CLOBBER_MOV;
3315     baser=readreg(baser,4);
3316     index=readreg(index,4);
3317     s=readreg(s,4);
3318    
3319     Dif (baser==s || index==s)
3320     abort();
3321    
3322    
3323     raw_mov_l_mrr_indexed(baser,index,factor,s);
3324     unlock2(s);
3325     unlock2(baser);
3326     unlock2(index);
3327     }
3328     MENDFUNC(4,mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
3329    
3330     MIDFUNC(4,mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
3331     {
3332     CLOBBER_MOV;
3333     baser=readreg(baser,4);
3334     index=readreg(index,4);
3335     s=readreg(s,2);
3336    
3337     raw_mov_w_mrr_indexed(baser,index,factor,s);
3338     unlock2(s);
3339     unlock2(baser);
3340     unlock2(index);
3341     }
3342     MENDFUNC(4,mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
3343    
3344     MIDFUNC(4,mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
3345     {
3346     CLOBBER_MOV;
3347     s=readreg(s,1);
3348     baser=readreg(baser,4);
3349     index=readreg(index,4);
3350    
3351     raw_mov_b_mrr_indexed(baser,index,factor,s);
3352     unlock2(s);
3353     unlock2(baser);
3354     unlock2(index);
3355     }
3356     MENDFUNC(4,mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
3357    
3358    
3359     MIDFUNC(5,mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
3360     {
3361     int basereg=baser;
3362     int indexreg=index;
3363    
3364     CLOBBER_MOV;
3365     s=readreg(s,4);
3366     baser=readreg_offset(baser,4);
3367     index=readreg_offset(index,4);
3368    
3369     base+=get_offset(basereg);
3370     base+=factor*get_offset(indexreg);
3371    
3372     raw_mov_l_bmrr_indexed(base,baser,index,factor,s);
3373     unlock2(s);
3374     unlock2(baser);
3375     unlock2(index);
3376     }
3377     MENDFUNC(5,mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
3378    
3379     MIDFUNC(5,mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
3380     {
3381     int basereg=baser;
3382     int indexreg=index;
3383    
3384     CLOBBER_MOV;
3385     s=readreg(s,2);
3386     baser=readreg_offset(baser,4);
3387     index=readreg_offset(index,4);
3388    
3389     base+=get_offset(basereg);
3390     base+=factor*get_offset(indexreg);
3391    
3392     raw_mov_w_bmrr_indexed(base,baser,index,factor,s);
3393     unlock2(s);
3394     unlock2(baser);
3395     unlock2(index);
3396     }
3397     MENDFUNC(5,mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
3398    
3399     MIDFUNC(5,mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
3400     {
3401     int basereg=baser;
3402     int indexreg=index;
3403    
3404     CLOBBER_MOV;
3405     s=readreg(s,1);
3406     baser=readreg_offset(baser,4);
3407     index=readreg_offset(index,4);
3408    
3409     base+=get_offset(basereg);
3410     base+=factor*get_offset(indexreg);
3411    
3412     raw_mov_b_bmrr_indexed(base,baser,index,factor,s);
3413     unlock2(s);
3414     unlock2(baser);
3415     unlock2(index);
3416     }
3417     MENDFUNC(5,mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
3418    
3419    
3420    
3421     /* Read a long from base+baser+factor*index */
3422     MIDFUNC(5,mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
3423     {
3424     int basereg=baser;
3425     int indexreg=index;
3426    
3427     CLOBBER_MOV;
3428     baser=readreg_offset(baser,4);
3429     index=readreg_offset(index,4);
3430     base+=get_offset(basereg);
3431     base+=factor*get_offset(indexreg);
3432     d=writereg(d,4);
3433     raw_mov_l_brrm_indexed(d,base,baser,index,factor);
3434     unlock2(d);
3435     unlock2(baser);
3436     unlock2(index);
3437     }
3438     MENDFUNC(5,mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
3439    
3440    
3441     MIDFUNC(5,mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
3442     {
3443     int basereg=baser;
3444     int indexreg=index;
3445    
3446     CLOBBER_MOV;
3447     remove_offset(d,-1);
3448     baser=readreg_offset(baser,4);
3449     index=readreg_offset(index,4);
3450     base+=get_offset(basereg);
3451     base+=factor*get_offset(indexreg);
3452     d=writereg(d,2);
3453     raw_mov_w_brrm_indexed(d,base,baser,index,factor);
3454     unlock2(d);
3455     unlock2(baser);
3456     unlock2(index);
3457     }
3458     MENDFUNC(5,mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
3459    
3460    
3461     MIDFUNC(5,mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
3462     {
3463     int basereg=baser;
3464     int indexreg=index;
3465    
3466     CLOBBER_MOV;
3467     remove_offset(d,-1);
3468     baser=readreg_offset(baser,4);
3469     index=readreg_offset(index,4);
3470     base+=get_offset(basereg);
3471     base+=factor*get_offset(indexreg);
3472     d=writereg(d,1);
3473     raw_mov_b_brrm_indexed(d,base,baser,index,factor);
3474     unlock2(d);
3475     unlock2(baser);
3476     unlock2(index);
3477     }
3478     MENDFUNC(5,mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
3479    
3480     /* Read a long from base+factor*index */
3481     MIDFUNC(4,mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
3482     {
3483     int indexreg=index;
3484    
3485     if (isconst(index)) {
3486     COMPCALL(mov_l_rm)(d,base+factor*live.state[index].val);
3487     return;
3488     }
3489    
3490     CLOBBER_MOV;
3491     index=readreg_offset(index,4);
3492     base+=get_offset(indexreg)*factor;
3493     d=writereg(d,4);
3494    
3495     raw_mov_l_rm_indexed(d,base,index,factor);
3496     unlock2(index);
3497     unlock2(d);
3498     }
3499     MENDFUNC(4,mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
3500    
3501    
3502     /* read the long at the address contained in s+offset and store in d */
3503     MIDFUNC(3,mov_l_rR,(W4 d, R4 s, IMM offset))
3504     {
3505     if (isconst(s)) {
3506     COMPCALL(mov_l_rm)(d,live.state[s].val+offset);
3507     return;
3508     }
3509     CLOBBER_MOV;
3510     s=readreg(s,4);
3511     d=writereg(d,4);
3512    
3513     raw_mov_l_rR(d,s,offset);
3514     unlock2(d);
3515     unlock2(s);
3516     }
3517     MENDFUNC(3,mov_l_rR,(W4 d, R4 s, IMM offset))
3518    
3519     /* read the word at the address contained in s+offset and store in d */
3520     MIDFUNC(3,mov_w_rR,(W2 d, R4 s, IMM offset))
3521     {
3522     if (isconst(s)) {
3523     COMPCALL(mov_w_rm)(d,live.state[s].val+offset);
3524     return;
3525     }
3526     CLOBBER_MOV;
3527     s=readreg(s,4);
3528     d=writereg(d,2);
3529    
3530     raw_mov_w_rR(d,s,offset);
3531     unlock2(d);
3532     unlock2(s);
3533     }
3534     MENDFUNC(3,mov_w_rR,(W2 d, R4 s, IMM offset))
3535    
3536     /* read the word at the address contained in s+offset and store in d */
3537     MIDFUNC(3,mov_b_rR,(W1 d, R4 s, IMM offset))
3538     {
3539     if (isconst(s)) {
3540     COMPCALL(mov_b_rm)(d,live.state[s].val+offset);
3541     return;
3542     }
3543     CLOBBER_MOV;
3544     s=readreg(s,4);
3545     d=writereg(d,1);
3546    
3547     raw_mov_b_rR(d,s,offset);
3548     unlock2(d);
3549     unlock2(s);
3550     }
3551     MENDFUNC(3,mov_b_rR,(W1 d, R4 s, IMM offset))
3552    
3553     /* read the long at the address contained in s+offset and store in d */
3554     MIDFUNC(3,mov_l_brR,(W4 d, R4 s, IMM offset))
3555     {
3556     int sreg=s;
3557     if (isconst(s)) {
3558     COMPCALL(mov_l_rm)(d,live.state[s].val+offset);
3559     return;
3560     }
3561     CLOBBER_MOV;
3562     s=readreg_offset(s,4);
3563     offset+=get_offset(sreg);
3564     d=writereg(d,4);
3565    
3566     raw_mov_l_brR(d,s,offset);
3567     unlock2(d);
3568     unlock2(s);
3569     }
3570     MENDFUNC(3,mov_l_brR,(W4 d, R4 s, IMM offset))
3571    
3572     /* read the word at the address contained in s+offset and store in d */
3573     MIDFUNC(3,mov_w_brR,(W2 d, R4 s, IMM offset))
3574     {
3575     int sreg=s;
3576     if (isconst(s)) {
3577     COMPCALL(mov_w_rm)(d,live.state[s].val+offset);
3578     return;
3579     }
3580     CLOBBER_MOV;
3581     remove_offset(d,-1);
3582     s=readreg_offset(s,4);
3583     offset+=get_offset(sreg);
3584     d=writereg(d,2);
3585    
3586     raw_mov_w_brR(d,s,offset);
3587     unlock2(d);
3588     unlock2(s);
3589     }
3590     MENDFUNC(3,mov_w_brR,(W2 d, R4 s, IMM offset))
3591    
3592     /* read the word at the address contained in s+offset and store in d */
3593     MIDFUNC(3,mov_b_brR,(W1 d, R4 s, IMM offset))
3594     {
3595     int sreg=s;
3596     if (isconst(s)) {
3597     COMPCALL(mov_b_rm)(d,live.state[s].val+offset);
3598     return;
3599     }
3600     CLOBBER_MOV;
3601     remove_offset(d,-1);
3602     s=readreg_offset(s,4);
3603     offset+=get_offset(sreg);
3604     d=writereg(d,1);
3605    
3606     raw_mov_b_brR(d,s,offset);
3607     unlock2(d);
3608     unlock2(s);
3609     }
3610     MENDFUNC(3,mov_b_brR,(W1 d, R4 s, IMM offset))
3611    
3612     MIDFUNC(3,mov_l_Ri,(R4 d, IMM i, IMM offset))
3613     {
3614     int dreg=d;
3615     if (isconst(d)) {
3616     COMPCALL(mov_l_mi)(live.state[d].val+offset,i);
3617     return;
3618     }
3619    
3620     CLOBBER_MOV;
3621     d=readreg_offset(d,4);
3622     offset+=get_offset(dreg);
3623     raw_mov_l_Ri(d,i,offset);
3624     unlock2(d);
3625     }
3626     MENDFUNC(3,mov_l_Ri,(R4 d, IMM i, IMM offset))
3627    
3628     MIDFUNC(3,mov_w_Ri,(R4 d, IMM i, IMM offset))
3629     {
3630     int dreg=d;
3631     if (isconst(d)) {
3632     COMPCALL(mov_w_mi)(live.state[d].val+offset,i);
3633     return;
3634     }
3635    
3636     CLOBBER_MOV;
3637     d=readreg_offset(d,4);
3638     offset+=get_offset(dreg);
3639     raw_mov_w_Ri(d,i,offset);
3640     unlock2(d);
3641     }
3642     MENDFUNC(3,mov_w_Ri,(R4 d, IMM i, IMM offset))
3643    
3644     MIDFUNC(3,mov_b_Ri,(R4 d, IMM i, IMM offset))
3645     {
3646     int dreg=d;
3647     if (isconst(d)) {
3648     COMPCALL(mov_b_mi)(live.state[d].val+offset,i);
3649     return;
3650     }
3651    
3652     CLOBBER_MOV;
3653     d=readreg_offset(d,4);
3654     offset+=get_offset(dreg);
3655     raw_mov_b_Ri(d,i,offset);
3656     unlock2(d);
3657     }
3658     MENDFUNC(3,mov_b_Ri,(R4 d, IMM i, IMM offset))
3659    
3660     /* Warning! OFFSET is byte sized only! */
3661     MIDFUNC(3,mov_l_Rr,(R4 d, R4 s, IMM offset))
3662     {
3663     if (isconst(d)) {
3664     COMPCALL(mov_l_mr)(live.state[d].val+offset,s);
3665     return;
3666     }
3667     if (isconst(s)) {
3668     COMPCALL(mov_l_Ri)(d,live.state[s].val,offset);
3669     return;
3670     }
3671    
3672     CLOBBER_MOV;
3673     s=readreg(s,4);
3674     d=readreg(d,4);
3675    
3676     raw_mov_l_Rr(d,s,offset);
3677     unlock2(d);
3678     unlock2(s);
3679     }
3680     MENDFUNC(3,mov_l_Rr,(R4 d, R4 s, IMM offset))
3681    
3682     MIDFUNC(3,mov_w_Rr,(R4 d, R2 s, IMM offset))
3683     {
3684     if (isconst(d)) {
3685     COMPCALL(mov_w_mr)(live.state[d].val+offset,s);
3686     return;
3687     }
3688     if (isconst(s)) {
3689     COMPCALL(mov_w_Ri)(d,(uae_u16)live.state[s].val,offset);
3690     return;
3691     }
3692    
3693     CLOBBER_MOV;
3694     s=readreg(s,2);
3695     d=readreg(d,4);
3696     raw_mov_w_Rr(d,s,offset);
3697     unlock2(d);
3698     unlock2(s);
3699     }
3700     MENDFUNC(3,mov_w_Rr,(R4 d, R2 s, IMM offset))
3701    
3702     MIDFUNC(3,mov_b_Rr,(R4 d, R1 s, IMM offset))
3703     {
3704     if (isconst(d)) {
3705     COMPCALL(mov_b_mr)(live.state[d].val+offset,s);
3706     return;
3707     }
3708     if (isconst(s)) {
3709     COMPCALL(mov_b_Ri)(d,(uae_u8)live.state[s].val,offset);
3710     return;
3711     }
3712    
3713     CLOBBER_MOV;
3714     s=readreg(s,1);
3715     d=readreg(d,4);
3716     raw_mov_b_Rr(d,s,offset);
3717     unlock2(d);
3718     unlock2(s);
3719     }
3720     MENDFUNC(3,mov_b_Rr,(R4 d, R1 s, IMM offset))
3721    
3722     MIDFUNC(3,lea_l_brr,(W4 d, R4 s, IMM offset))
3723     {
3724     if (isconst(s)) {
3725     COMPCALL(mov_l_ri)(d,live.state[s].val+offset);
3726     return;
3727     }
3728     #if USE_OFFSET
3729     if (d==s) {
3730     add_offset(d,offset);
3731     return;
3732     }
3733     #endif
3734     CLOBBER_LEA;
3735     s=readreg(s,4);
3736     d=writereg(d,4);
3737     raw_lea_l_brr(d,s,offset);
3738     unlock2(d);
3739     unlock2(s);
3740     }
3741     MENDFUNC(3,lea_l_brr,(W4 d, R4 s, IMM offset))
3742    
3743     MIDFUNC(5,lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
3744     {
3745     if (!offset) {
3746     COMPCALL(lea_l_rr_indexed)(d,s,index,factor);
3747     return;
3748     }
3749     CLOBBER_LEA;
3750     s=readreg(s,4);
3751     index=readreg(index,4);
3752     d=writereg(d,4);
3753    
3754     raw_lea_l_brr_indexed(d,s,index,factor,offset);
3755     unlock2(d);
3756     unlock2(index);
3757     unlock2(s);
3758     }
3759     MENDFUNC(5,lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
3760    
3761     MIDFUNC(4,lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
3762     {
3763     CLOBBER_LEA;
3764     s=readreg(s,4);
3765     index=readreg(index,4);
3766     d=writereg(d,4);
3767    
3768     raw_lea_l_rr_indexed(d,s,index,factor);
3769     unlock2(d);
3770     unlock2(index);
3771     unlock2(s);
3772     }
3773     MENDFUNC(4,lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
3774    
3775     /* write d to the long at the address contained in s+offset */
3776     MIDFUNC(3,mov_l_bRr,(R4 d, R4 s, IMM offset))
3777     {
3778     int dreg=d;
3779     if (isconst(d)) {
3780     COMPCALL(mov_l_mr)(live.state[d].val+offset,s);
3781     return;
3782     }
3783    
3784     CLOBBER_MOV;
3785     s=readreg(s,4);
3786     d=readreg_offset(d,4);
3787     offset+=get_offset(dreg);
3788    
3789     raw_mov_l_bRr(d,s,offset);
3790     unlock2(d);
3791     unlock2(s);
3792     }
3793     MENDFUNC(3,mov_l_bRr,(R4 d, R4 s, IMM offset))
3794    
3795     /* write the word at the address contained in s+offset and store in d */
3796     MIDFUNC(3,mov_w_bRr,(R4 d, R2 s, IMM offset))
3797     {
3798     int dreg=d;
3799    
3800     if (isconst(d)) {
3801     COMPCALL(mov_w_mr)(live.state[d].val+offset,s);
3802     return;
3803     }
3804    
3805     CLOBBER_MOV;
3806     s=readreg(s,2);
3807     d=readreg_offset(d,4);
3808     offset+=get_offset(dreg);
3809     raw_mov_w_bRr(d,s,offset);
3810     unlock2(d);
3811     unlock2(s);
3812     }
3813     MENDFUNC(3,mov_w_bRr,(R4 d, R2 s, IMM offset))
3814    
3815     MIDFUNC(3,mov_b_bRr,(R4 d, R1 s, IMM offset))
3816     {
3817     int dreg=d;
3818     if (isconst(d)) {
3819     COMPCALL(mov_b_mr)(live.state[d].val+offset,s);
3820     return;
3821     }
3822    
3823     CLOBBER_MOV;
3824     s=readreg(s,1);
3825     d=readreg_offset(d,4);
3826     offset+=get_offset(dreg);
3827     raw_mov_b_bRr(d,s,offset);
3828     unlock2(d);
3829     unlock2(s);
3830     }
3831     MENDFUNC(3,mov_b_bRr,(R4 d, R1 s, IMM offset))
3832    
3833     MIDFUNC(1,bswap_32,(RW4 r))
3834     {
3835     int reg=r;
3836    
3837     if (isconst(r)) {
3838     uae_u32 oldv=live.state[r].val;
3839     live.state[r].val=reverse32(oldv);
3840     return;
3841     }
3842    
3843     CLOBBER_SW32;
3844     r=rmw(r,4,4);
3845     raw_bswap_32(r);
3846     unlock2(r);
3847     }
3848     MENDFUNC(1,bswap_32,(RW4 r))
3849    
3850     MIDFUNC(1,bswap_16,(RW2 r))
3851     {
3852     if (isconst(r)) {
3853     uae_u32 oldv=live.state[r].val;
3854     live.state[r].val=((oldv>>8)&0xff) | ((oldv<<8)&0xff00) |
3855     (oldv&0xffff0000);
3856     return;
3857     }
3858    
3859     CLOBBER_SW16;
3860     r=rmw(r,2,2);
3861    
3862     raw_bswap_16(r);
3863     unlock2(r);
3864     }
3865     MENDFUNC(1,bswap_16,(RW2 r))
3866    
3867    
3868    
3869     MIDFUNC(2,mov_l_rr,(W4 d, R4 s))
3870     {
3871     int olds;
3872    
3873     if (d==s) { /* How pointless! */
3874     return;
3875     }
3876     if (isconst(s)) {
3877     COMPCALL(mov_l_ri)(d,live.state[s].val);
3878     return;
3879     }
3880     olds=s;
3881     disassociate(d);
3882     s=readreg_offset(s,4);
3883     live.state[d].realreg=s;
3884     live.state[d].realind=live.nat[s].nholds;
3885     live.state[d].val=live.state[olds].val;
3886     live.state[d].validsize=4;
3887     live.state[d].dirtysize=4;
3888     set_status(d,DIRTY);
3889    
3890     live.nat[s].holds[live.nat[s].nholds]=d;
3891     live.nat[s].nholds++;
3892     log_clobberreg(d);
3893     /* write_log("Added %d to nreg %d(%d), now holds %d regs\n",
3894     d,s,live.state[d].realind,live.nat[s].nholds); */
3895     unlock2(s);
3896     }
3897     MENDFUNC(2,mov_l_rr,(W4 d, R4 s))
3898    
3899     MIDFUNC(2,mov_l_mr,(IMM d, R4 s))
3900     {
3901     if (isconst(s)) {
3902     COMPCALL(mov_l_mi)(d,live.state[s].val);
3903     return;
3904     }
3905     CLOBBER_MOV;
3906     s=readreg(s,4);
3907    
3908     raw_mov_l_mr(d,s);
3909     unlock2(s);
3910     }
3911     MENDFUNC(2,mov_l_mr,(IMM d, R4 s))
3912    
3913    
3914     MIDFUNC(2,mov_w_mr,(IMM d, R2 s))
3915     {
3916     if (isconst(s)) {
3917     COMPCALL(mov_w_mi)(d,(uae_u16)live.state[s].val);
3918     return;
3919     }
3920     CLOBBER_MOV;
3921     s=readreg(s,2);
3922    
3923     raw_mov_w_mr(d,s);
3924     unlock2(s);
3925     }
3926     MENDFUNC(2,mov_w_mr,(IMM d, R2 s))
3927    
3928     MIDFUNC(2,mov_w_rm,(W2 d, IMM s))
3929     {
3930     CLOBBER_MOV;
3931     d=writereg(d,2);
3932    
3933     raw_mov_w_rm(d,s);
3934     unlock2(d);
3935     }
3936     MENDFUNC(2,mov_w_rm,(W2 d, IMM s))
3937    
3938     MIDFUNC(2,mov_b_mr,(IMM d, R1 s))
3939     {
3940     if (isconst(s)) {
3941     COMPCALL(mov_b_mi)(d,(uae_u8)live.state[s].val);
3942     return;
3943     }
3944    
3945     CLOBBER_MOV;
3946     s=readreg(s,1);
3947    
3948     raw_mov_b_mr(d,s);
3949     unlock2(s);
3950     }
3951     MENDFUNC(2,mov_b_mr,(IMM d, R1 s))
3952    
3953     MIDFUNC(2,mov_b_rm,(W1 d, IMM s))
3954     {
3955     CLOBBER_MOV;
3956     d=writereg(d,1);
3957    
3958     raw_mov_b_rm(d,s);
3959     unlock2(d);
3960     }
3961     MENDFUNC(2,mov_b_rm,(W1 d, IMM s))
3962    
3963     MIDFUNC(2,mov_l_ri,(W4 d, IMM s))
3964     {
3965     set_const(d,s);
3966     return;
3967     }
3968     MENDFUNC(2,mov_l_ri,(W4 d, IMM s))
3969    
3970     MIDFUNC(2,mov_w_ri,(W2 d, IMM s))
3971     {
3972     CLOBBER_MOV;
3973     d=writereg(d,2);
3974    
3975     raw_mov_w_ri(d,s);
3976     unlock2(d);
3977     }
3978     MENDFUNC(2,mov_w_ri,(W2 d, IMM s))
3979    
3980     MIDFUNC(2,mov_b_ri,(W1 d, IMM s))
3981     {
3982     CLOBBER_MOV;
3983     d=writereg(d,1);
3984    
3985     raw_mov_b_ri(d,s);
3986     unlock2(d);
3987     }
3988     MENDFUNC(2,mov_b_ri,(W1 d, IMM s))
3989    
3990    
3991     MIDFUNC(2,add_l_mi,(IMM d, IMM s))
3992     {
3993     CLOBBER_ADD;
3994     raw_add_l_mi(d,s) ;
3995     }
3996     MENDFUNC(2,add_l_mi,(IMM d, IMM s))
3997    
3998     MIDFUNC(2,add_w_mi,(IMM d, IMM s))
3999     {
4000     CLOBBER_ADD;
4001     raw_add_w_mi(d,s) ;
4002     }
4003     MENDFUNC(2,add_w_mi,(IMM d, IMM s))
4004    
4005     MIDFUNC(2,add_b_mi,(IMM d, IMM s))
4006     {
4007     CLOBBER_ADD;
4008     raw_add_b_mi(d,s) ;
4009     }
4010     MENDFUNC(2,add_b_mi,(IMM d, IMM s))
4011    
4012    
4013     MIDFUNC(2,test_l_ri,(R4 d, IMM i))
4014     {
4015     CLOBBER_TEST;
4016     d=readreg(d,4);
4017    
4018     raw_test_l_ri(d,i);
4019     unlock2(d);
4020     }
4021     MENDFUNC(2,test_l_ri,(R4 d, IMM i))
4022    
4023     MIDFUNC(2,test_l_rr,(R4 d, R4 s))
4024     {
4025     CLOBBER_TEST;
4026     d=readreg(d,4);
4027     s=readreg(s,4);
4028    
4029     raw_test_l_rr(d,s);;
4030     unlock2(d);
4031     unlock2(s);
4032     }
4033     MENDFUNC(2,test_l_rr,(R4 d, R4 s))
4034    
4035     MIDFUNC(2,test_w_rr,(R2 d, R2 s))
4036     {
4037     CLOBBER_TEST;
4038     d=readreg(d,2);
4039     s=readreg(s,2);
4040    
4041     raw_test_w_rr(d,s);
4042     unlock2(d);
4043     unlock2(s);
4044     }
4045     MENDFUNC(2,test_w_rr,(R2 d, R2 s))
4046    
4047     MIDFUNC(2,test_b_rr,(R1 d, R1 s))
4048     {
4049     CLOBBER_TEST;
4050     d=readreg(d,1);
4051     s=readreg(s,1);
4052    
4053     raw_test_b_rr(d,s);
4054     unlock2(d);
4055     unlock2(s);
4056     }
4057     MENDFUNC(2,test_b_rr,(R1 d, R1 s))
4058    
4059    
4060     MIDFUNC(2,and_l_ri,(RW4 d, IMM i))
4061     {
4062     if (isconst(d) && !needflags) {
4063     live.state[d].val &= i;
4064     return;
4065     }
4066    
4067     CLOBBER_AND;
4068     d=rmw(d,4,4);
4069    
4070     raw_and_l_ri(d,i);
4071     unlock2(d);
4072     }
4073     MENDFUNC(2,and_l_ri,(RW4 d, IMM i))
4074    
4075     MIDFUNC(2,and_l,(RW4 d, R4 s))
4076     {
4077     CLOBBER_AND;
4078     s=readreg(s,4);
4079     d=rmw(d,4,4);
4080    
4081     raw_and_l(d,s);
4082     unlock2(d);
4083     unlock2(s);
4084     }
4085     MENDFUNC(2,and_l,(RW4 d, R4 s))
4086    
4087     MIDFUNC(2,and_w,(RW2 d, R2 s))
4088     {
4089     CLOBBER_AND;
4090     s=readreg(s,2);
4091     d=rmw(d,2,2);
4092    
4093     raw_and_w(d,s);
4094     unlock2(d);
4095     unlock2(s);
4096     }
4097     MENDFUNC(2,and_w,(RW2 d, R2 s))
4098    
4099     MIDFUNC(2,and_b,(RW1 d, R1 s))
4100     {
4101     CLOBBER_AND;
4102     s=readreg(s,1);
4103     d=rmw(d,1,1);
4104    
4105     raw_and_b(d,s);
4106     unlock2(d);
4107     unlock2(s);
4108     }
4109     MENDFUNC(2,and_b,(RW1 d, R1 s))
4110    
4111     // gb-- used for making an fpcr value in compemu_fpp.cpp
4112     MIDFUNC(2,or_l_rm,(RW4 d, IMM s))
4113     {
4114     CLOBBER_OR;
4115     d=rmw(d,4,4);
4116    
4117     raw_or_l_rm(d,s);
4118     unlock2(d);
4119     }
4120     MENDFUNC(2,or_l_rm,(RW4 d, IMM s))
4121    
4122     MIDFUNC(2,or_l_ri,(RW4 d, IMM i))
4123     {
4124     if (isconst(d) && !needflags) {
4125     live.state[d].val|=i;
4126     return;
4127     }
4128     CLOBBER_OR;
4129     d=rmw(d,4,4);
4130    
4131     raw_or_l_ri(d,i);
4132     unlock2(d);
4133     }
4134     MENDFUNC(2,or_l_ri,(RW4 d, IMM i))
4135    
4136     MIDFUNC(2,or_l,(RW4 d, R4 s))
4137     {
4138     if (isconst(d) && isconst(s) && !needflags) {
4139     live.state[d].val|=live.state[s].val;
4140     return;
4141     }
4142     CLOBBER_OR;
4143     s=readreg(s,4);
4144     d=rmw(d,4,4);
4145    
4146     raw_or_l(d,s);
4147     unlock2(d);
4148     unlock2(s);
4149     }
4150     MENDFUNC(2,or_l,(RW4 d, R4 s))
4151    
4152     MIDFUNC(2,or_w,(RW2 d, R2 s))
4153     {
4154     CLOBBER_OR;
4155     s=readreg(s,2);
4156     d=rmw(d,2,2);
4157    
4158     raw_or_w(d,s);
4159     unlock2(d);
4160     unlock2(s);
4161     }
4162     MENDFUNC(2,or_w,(RW2 d, R2 s))
4163    
4164     MIDFUNC(2,or_b,(RW1 d, R1 s))
4165     {
4166     CLOBBER_OR;
4167     s=readreg(s,1);
4168     d=rmw(d,1,1);
4169    
4170     raw_or_b(d,s);
4171     unlock2(d);
4172     unlock2(s);
4173     }
4174     MENDFUNC(2,or_b,(RW1 d, R1 s))
4175    
4176     MIDFUNC(2,adc_l,(RW4 d, R4 s))
4177     {
4178     CLOBBER_ADC;
4179     s=readreg(s,4);
4180     d=rmw(d,4,4);
4181    
4182     raw_adc_l(d,s);
4183    
4184     unlock2(d);
4185     unlock2(s);
4186     }
4187     MENDFUNC(2,adc_l,(RW4 d, R4 s))
4188    
4189     MIDFUNC(2,adc_w,(RW2 d, R2 s))
4190     {
4191     CLOBBER_ADC;
4192     s=readreg(s,2);
4193     d=rmw(d,2,2);
4194    
4195     raw_adc_w(d,s);
4196     unlock2(d);
4197     unlock2(s);
4198     }
4199     MENDFUNC(2,adc_w,(RW2 d, R2 s))
4200    
4201     MIDFUNC(2,adc_b,(RW1 d, R1 s))
4202     {
4203     CLOBBER_ADC;
4204     s=readreg(s,1);
4205     d=rmw(d,1,1);
4206    
4207     raw_adc_b(d,s);
4208     unlock2(d);
4209     unlock2(s);
4210     }
4211     MENDFUNC(2,adc_b,(RW1 d, R1 s))
4212    
4213     MIDFUNC(2,add_l,(RW4 d, R4 s))
4214     {
4215     if (isconst(s)) {
4216     COMPCALL(add_l_ri)(d,live.state[s].val);
4217     return;
4218     }
4219    
4220     CLOBBER_ADD;
4221     s=readreg(s,4);
4222     d=rmw(d,4,4);
4223    
4224     raw_add_l(d,s);
4225    
4226     unlock2(d);
4227     unlock2(s);
4228     }
4229     MENDFUNC(2,add_l,(RW4 d, R4 s))
4230    
4231     MIDFUNC(2,add_w,(RW2 d, R2 s))
4232     {
4233     if (isconst(s)) {
4234     COMPCALL(add_w_ri)(d,(uae_u16)live.state[s].val);
4235     return;
4236     }
4237    
4238     CLOBBER_ADD;
4239     s=readreg(s,2);
4240     d=rmw(d,2,2);
4241    
4242     raw_add_w(d,s);
4243     unlock2(d);
4244     unlock2(s);
4245     }
4246     MENDFUNC(2,add_w,(RW2 d, R2 s))
4247    
4248     MIDFUNC(2,add_b,(RW1 d, R1 s))
4249     {
4250     if (isconst(s)) {
4251     COMPCALL(add_b_ri)(d,(uae_u8)live.state[s].val);
4252     return;
4253     }
4254    
4255     CLOBBER_ADD;
4256     s=readreg(s,1);
4257     d=rmw(d,1,1);
4258    
4259     raw_add_b(d,s);
4260     unlock2(d);
4261     unlock2(s);
4262     }
4263     MENDFUNC(2,add_b,(RW1 d, R1 s))
4264    
4265     MIDFUNC(2,sub_l_ri,(RW4 d, IMM i))
4266     {
4267     if (!i && !needflags)
4268     return;
4269     if (isconst(d) && !needflags) {
4270     live.state[d].val-=i;
4271     return;
4272     }
4273     #if USE_OFFSET
4274     if (!needflags) {
4275     add_offset(d,-i);
4276     return;
4277     }
4278     #endif
4279    
4280     CLOBBER_SUB;
4281     d=rmw(d,4,4);
4282    
4283     raw_sub_l_ri(d,i);
4284     unlock2(d);
4285     }
4286     MENDFUNC(2,sub_l_ri,(RW4 d, IMM i))
4287    
4288     MIDFUNC(2,sub_w_ri,(RW2 d, IMM i))
4289     {
4290     if (!i && !needflags)
4291     return;
4292    
4293     CLOBBER_SUB;
4294     d=rmw(d,2,2);
4295    
4296     raw_sub_w_ri(d,i);
4297     unlock2(d);
4298     }
4299     MENDFUNC(2,sub_w_ri,(RW2 d, IMM i))
4300    
4301     MIDFUNC(2,sub_b_ri,(RW1 d, IMM i))
4302     {
4303     if (!i && !needflags)
4304     return;
4305    
4306     CLOBBER_SUB;
4307     d=rmw(d,1,1);
4308    
4309     raw_sub_b_ri(d,i);
4310    
4311     unlock2(d);
4312     }
4313     MENDFUNC(2,sub_b_ri,(RW1 d, IMM i))
4314    
4315     MIDFUNC(2,add_l_ri,(RW4 d, IMM i))
4316     {
4317     if (!i && !needflags)
4318     return;
4319     if (isconst(d) && !needflags) {
4320     live.state[d].val+=i;
4321     return;
4322     }
4323     #if USE_OFFSET
4324     if (!needflags) {
4325     add_offset(d,i);
4326     return;
4327     }
4328     #endif
4329     CLOBBER_ADD;
4330     d=rmw(d,4,4);
4331     raw_add_l_ri(d,i);
4332     unlock2(d);
4333     }
4334     MENDFUNC(2,add_l_ri,(RW4 d, IMM i))
4335    
4336     MIDFUNC(2,add_w_ri,(RW2 d, IMM i))
4337     {
4338     if (!i && !needflags)
4339     return;
4340    
4341     CLOBBER_ADD;
4342     d=rmw(d,2,2);
4343    
4344     raw_add_w_ri(d,i);
4345     unlock2(d);
4346     }
4347     MENDFUNC(2,add_w_ri,(RW2 d, IMM i))
4348    
4349     MIDFUNC(2,add_b_ri,(RW1 d, IMM i))
4350     {
4351     if (!i && !needflags)
4352     return;
4353    
4354     CLOBBER_ADD;
4355     d=rmw(d,1,1);
4356    
4357     raw_add_b_ri(d,i);
4358    
4359     unlock2(d);
4360     }
4361     MENDFUNC(2,add_b_ri,(RW1 d, IMM i))
4362    
4363     MIDFUNC(2,sbb_l,(RW4 d, R4 s))
4364     {
4365     CLOBBER_SBB;
4366     s=readreg(s,4);
4367     d=rmw(d,4,4);
4368    
4369     raw_sbb_l(d,s);
4370     unlock2(d);
4371     unlock2(s);
4372     }
4373     MENDFUNC(2,sbb_l,(RW4 d, R4 s))
4374    
4375     MIDFUNC(2,sbb_w,(RW2 d, R2 s))
4376     {
4377     CLOBBER_SBB;
4378     s=readreg(s,2);
4379     d=rmw(d,2,2);
4380    
4381     raw_sbb_w(d,s);
4382     unlock2(d);
4383     unlock2(s);
4384     }
4385     MENDFUNC(2,sbb_w,(RW2 d, R2 s))
4386    
4387     MIDFUNC(2,sbb_b,(RW1 d, R1 s))
4388     {
4389     CLOBBER_SBB;
4390     s=readreg(s,1);
4391     d=rmw(d,1,1);
4392    
4393     raw_sbb_b(d,s);
4394     unlock2(d);
4395     unlock2(s);
4396     }
4397     MENDFUNC(2,sbb_b,(RW1 d, R1 s))
4398    
4399     MIDFUNC(2,sub_l,(RW4 d, R4 s))
4400     {
4401     if (isconst(s)) {
4402     COMPCALL(sub_l_ri)(d,live.state[s].val);
4403     return;
4404     }
4405    
4406     CLOBBER_SUB;
4407     s=readreg(s,4);
4408     d=rmw(d,4,4);
4409    
4410     raw_sub_l(d,s);
4411     unlock2(d);
4412     unlock2(s);
4413     }
4414     MENDFUNC(2,sub_l,(RW4 d, R4 s))
4415    
4416     MIDFUNC(2,sub_w,(RW2 d, R2 s))
4417     {
4418     if (isconst(s)) {
4419     COMPCALL(sub_w_ri)(d,(uae_u16)live.state[s].val);
4420     return;
4421     }
4422    
4423     CLOBBER_SUB;
4424     s=readreg(s,2);
4425     d=rmw(d,2,2);
4426    
4427     raw_sub_w(d,s);
4428     unlock2(d);
4429     unlock2(s);
4430     }
4431     MENDFUNC(2,sub_w,(RW2 d, R2 s))
4432    
4433     MIDFUNC(2,sub_b,(RW1 d, R1 s))
4434     {
4435     if (isconst(s)) {
4436     COMPCALL(sub_b_ri)(d,(uae_u8)live.state[s].val);
4437     return;
4438     }
4439    
4440     CLOBBER_SUB;
4441     s=readreg(s,1);
4442     d=rmw(d,1,1);
4443    
4444     raw_sub_b(d,s);
4445     unlock2(d);
4446     unlock2(s);
4447     }
4448     MENDFUNC(2,sub_b,(RW1 d, R1 s))
4449    
4450     MIDFUNC(2,cmp_l,(R4 d, R4 s))
4451     {
4452     CLOBBER_CMP;
4453     s=readreg(s,4);
4454     d=readreg(d,4);
4455    
4456     raw_cmp_l(d,s);
4457     unlock2(d);
4458     unlock2(s);
4459     }
4460     MENDFUNC(2,cmp_l,(R4 d, R4 s))
4461    
4462     MIDFUNC(2,cmp_l_ri,(R4 r, IMM i))
4463     {
4464     CLOBBER_CMP;
4465     r=readreg(r,4);
4466    
4467     raw_cmp_l_ri(r,i);
4468     unlock2(r);
4469     }
4470     MENDFUNC(2,cmp_l_ri,(R4 r, IMM i))
4471    
4472     MIDFUNC(2,cmp_w,(R2 d, R2 s))
4473     {
4474     CLOBBER_CMP;
4475     s=readreg(s,2);
4476     d=readreg(d,2);
4477    
4478     raw_cmp_w(d,s);
4479     unlock2(d);
4480     unlock2(s);
4481     }
4482     MENDFUNC(2,cmp_w,(R2 d, R2 s))
4483    
4484     MIDFUNC(2,cmp_b,(R1 d, R1 s))
4485     {
4486     CLOBBER_CMP;
4487     s=readreg(s,1);
4488     d=readreg(d,1);
4489    
4490     raw_cmp_b(d,s);
4491     unlock2(d);
4492     unlock2(s);
4493     }
4494     MENDFUNC(2,cmp_b,(R1 d, R1 s))
4495    
4496    
4497     MIDFUNC(2,xor_l,(RW4 d, R4 s))
4498     {
4499     CLOBBER_XOR;
4500     s=readreg(s,4);
4501     d=rmw(d,4,4);
4502    
4503     raw_xor_l(d,s);
4504     unlock2(d);
4505     unlock2(s);
4506     }
4507     MENDFUNC(2,xor_l,(RW4 d, R4 s))
4508    
4509     MIDFUNC(2,xor_w,(RW2 d, R2 s))
4510     {
4511     CLOBBER_XOR;
4512     s=readreg(s,2);
4513     d=rmw(d,2,2);
4514    
4515     raw_xor_w(d,s);
4516     unlock2(d);
4517     unlock2(s);
4518     }
4519     MENDFUNC(2,xor_w,(RW2 d, R2 s))
4520    
4521     MIDFUNC(2,xor_b,(RW1 d, R1 s))
4522     {
4523     CLOBBER_XOR;
4524     s=readreg(s,1);
4525     d=rmw(d,1,1);
4526    
4527     raw_xor_b(d,s);
4528     unlock2(d);
4529     unlock2(s);
4530     }
4531     MENDFUNC(2,xor_b,(RW1 d, R1 s))
4532    
4533     MIDFUNC(5,call_r_11,(W4 out1, R4 r, R4 in1, IMM osize, IMM isize))
4534     {
4535     clobber_flags();
4536     remove_all_offsets();
4537     if (osize==4) {
4538     if (out1!=in1 && out1!=r) {
4539     COMPCALL(forget_about)(out1);
4540     }
4541     }
4542     else {
4543     tomem_c(out1);
4544     }
4545    
4546     in1=readreg_specific(in1,isize,REG_PAR1);
4547     r=readreg(r,4);
4548     prepare_for_call_1(); /* This should ensure that there won't be
4549     any need for swapping nregs in prepare_for_call_2
4550     */
4551     #if USE_NORMAL_CALLING_CONVENTION
4552     raw_push_l_r(in1);
4553     #endif
4554     unlock2(in1);
4555     unlock2(r);
4556    
4557     prepare_for_call_2();
4558     raw_call_r(r);
4559    
4560     #if USE_NORMAL_CALLING_CONVENTION
4561     raw_inc_sp(4);
4562     #endif
4563    
4564    
4565     live.nat[REG_RESULT].holds[0]=out1;
4566     live.nat[REG_RESULT].nholds=1;
4567     live.nat[REG_RESULT].touched=touchcnt++;
4568    
4569     live.state[out1].realreg=REG_RESULT;
4570     live.state[out1].realind=0;
4571     live.state[out1].val=0;
4572     live.state[out1].validsize=osize;
4573     live.state[out1].dirtysize=osize;
4574     set_status(out1,DIRTY);
4575     }
4576     MENDFUNC(5,call_r_11,(W4 out1, R4 r, R4 in1, IMM osize, IMM isize))
4577    
4578     MIDFUNC(5,call_r_02,(R4 r, R4 in1, R4 in2, IMM isize1, IMM isize2))
4579     {
4580     clobber_flags();
4581     remove_all_offsets();
4582     in1=readreg_specific(in1,isize1,REG_PAR1);
4583     in2=readreg_specific(in2,isize2,REG_PAR2);
4584     r=readreg(r,4);
4585     prepare_for_call_1(); /* This should ensure that there won't be
4586     any need for swapping nregs in prepare_for_call_2
4587     */
4588     #if USE_NORMAL_CALLING_CONVENTION
4589     raw_push_l_r(in2);
4590     raw_push_l_r(in1);
4591     #endif
4592     unlock2(r);
4593     unlock2(in1);
4594     unlock2(in2);
4595     prepare_for_call_2();
4596     raw_call_r(r);
4597     #if USE_NORMAL_CALLING_CONVENTION
4598     raw_inc_sp(8);
4599     #endif
4600     }
4601     MENDFUNC(5,call_r_02,(R4 r, R4 in1, R4 in2, IMM isize1, IMM isize2))
4602    
4603     /* forget_about() takes a mid-layer register */
4604     MIDFUNC(1,forget_about,(W4 r))
4605     {
4606     if (isinreg(r))
4607     disassociate(r);
4608     live.state[r].val=0;
4609     set_status(r,UNDEF);
4610     }
4611     MENDFUNC(1,forget_about,(W4 r))
4612    
4613     MIDFUNC(0,nop,(void))
4614     {
4615     raw_nop();
4616     }
4617     MENDFUNC(0,nop,(void))
4618    
4619    
4620     MIDFUNC(1,f_forget_about,(FW r))
4621     {
4622     if (f_isinreg(r))
4623     f_disassociate(r);
4624     live.fate[r].status=UNDEF;
4625     }
4626     MENDFUNC(1,f_forget_about,(FW r))
4627    
4628     MIDFUNC(1,fmov_pi,(FW r))
4629     {
4630     r=f_writereg(r);
4631     raw_fmov_pi(r);
4632     f_unlock(r);
4633     }
4634     MENDFUNC(1,fmov_pi,(FW r))
4635    
4636     MIDFUNC(1,fmov_log10_2,(FW r))
4637     {
4638     r=f_writereg(r);
4639     raw_fmov_log10_2(r);
4640     f_unlock(r);
4641     }
4642     MENDFUNC(1,fmov_log10_2,(FW r))
4643    
4644     MIDFUNC(1,fmov_log2_e,(FW r))
4645     {
4646     r=f_writereg(r);
4647     raw_fmov_log2_e(r);
4648     f_unlock(r);
4649     }
4650     MENDFUNC(1,fmov_log2_e,(FW r))
4651    
4652     MIDFUNC(1,fmov_loge_2,(FW r))
4653     {
4654     r=f_writereg(r);
4655     raw_fmov_loge_2(r);
4656     f_unlock(r);
4657     }
4658     MENDFUNC(1,fmov_loge_2,(FW r))
4659    
4660     MIDFUNC(1,fmov_1,(FW r))
4661     {
4662     r=f_writereg(r);
4663     raw_fmov_1(r);
4664     f_unlock(r);
4665     }
4666     MENDFUNC(1,fmov_1,(FW r))
4667    
4668     MIDFUNC(1,fmov_0,(FW r))
4669     {
4670     r=f_writereg(r);
4671     raw_fmov_0(r);
4672     f_unlock(r);
4673     }
4674     MENDFUNC(1,fmov_0,(FW r))
4675    
4676     MIDFUNC(2,fmov_rm,(FW r, MEMR m))
4677     {
4678     r=f_writereg(r);
4679     raw_fmov_rm(r,m);
4680     f_unlock(r);
4681     }
4682     MENDFUNC(2,fmov_rm,(FW r, MEMR m))
4683    
4684     MIDFUNC(2,fmovi_rm,(FW r, MEMR m))
4685     {
4686     r=f_writereg(r);
4687     raw_fmovi_rm(r,m);
4688     f_unlock(r);
4689     }
4690     MENDFUNC(2,fmovi_rm,(FW r, MEMR m))
4691    
4692     MIDFUNC(2,fmovi_mr,(MEMW m, FR r))
4693     {
4694     r=f_readreg(r);
4695     raw_fmovi_mr(m,r);
4696     f_unlock(r);
4697     }
4698     MENDFUNC(2,fmovi_mr,(MEMW m, FR r))
4699    
4700     MIDFUNC(2,fmovs_rm,(FW r, MEMR m))
4701     {
4702     r=f_writereg(r);
4703     raw_fmovs_rm(r,m);
4704     f_unlock(r);
4705     }
4706     MENDFUNC(2,fmovs_rm,(FW r, MEMR m))
4707    
4708     MIDFUNC(2,fmovs_mr,(MEMW m, FR r))
4709     {
4710     r=f_readreg(r);
4711     raw_fmovs_mr(m,r);
4712     f_unlock(r);
4713     }
4714     MENDFUNC(2,fmovs_mr,(MEMW m, FR r))
4715    
4716     MIDFUNC(2,fmov_ext_mr,(MEMW m, FR r))
4717     {
4718     r=f_readreg(r);
4719     raw_fmov_ext_mr(m,r);
4720     f_unlock(r);
4721     }
4722     MENDFUNC(2,fmov_ext_mr,(MEMW m, FR r))
4723    
4724     MIDFUNC(2,fmov_mr,(MEMW m, FR r))
4725     {
4726     r=f_readreg(r);
4727     raw_fmov_mr(m,r);
4728     f_unlock(r);
4729     }
4730     MENDFUNC(2,fmov_mr,(MEMW m, FR r))
4731    
4732     MIDFUNC(2,fmov_ext_rm,(FW r, MEMR m))
4733     {
4734     r=f_writereg(r);
4735     raw_fmov_ext_rm(r,m);
4736     f_unlock(r);
4737     }
4738     MENDFUNC(2,fmov_ext_rm,(FW r, MEMR m))
4739    
4740     MIDFUNC(2,fmov_rr,(FW d, FR s))
4741     {
4742     if (d==s) { /* How pointless! */
4743     return;
4744     }
4745     #if USE_F_ALIAS
4746     f_disassociate(d);
4747     s=f_readreg(s);
4748     live.fate[d].realreg=s;
4749     live.fate[d].realind=live.fat[s].nholds;
4750     live.fate[d].status=DIRTY;
4751     live.fat[s].holds[live.fat[s].nholds]=d;
4752     live.fat[s].nholds++;
4753     f_unlock(s);
4754     #else
4755     s=f_readreg(s);
4756     d=f_writereg(d);
4757     raw_fmov_rr(d,s);
4758     f_unlock(s);
4759     f_unlock(d);
4760     #endif
4761     }
4762     MENDFUNC(2,fmov_rr,(FW d, FR s))
4763    
4764     MIDFUNC(2,fldcw_m_indexed,(R4 index, IMM base))
4765     {
4766     index=readreg(index,4);
4767    
4768     raw_fldcw_m_indexed(index,base);
4769     unlock2(index);
4770     }
4771     MENDFUNC(2,fldcw_m_indexed,(R4 index, IMM base))
4772    
4773     MIDFUNC(1,ftst_r,(FR r))
4774     {
4775     r=f_readreg(r);
4776     raw_ftst_r(r);
4777     f_unlock(r);
4778     }
4779     MENDFUNC(1,ftst_r,(FR r))
4780    
4781     MIDFUNC(0,dont_care_fflags,(void))
4782     {
4783     f_disassociate(FP_RESULT);
4784     }
4785     MENDFUNC(0,dont_care_fflags,(void))
4786    
4787     MIDFUNC(2,fsqrt_rr,(FW d, FR s))
4788     {
4789     s=f_readreg(s);
4790     d=f_writereg(d);
4791     raw_fsqrt_rr(d,s);
4792     f_unlock(s);
4793     f_unlock(d);
4794     }
4795     MENDFUNC(2,fsqrt_rr,(FW d, FR s))
4796    
4797     MIDFUNC(2,fabs_rr,(FW d, FR s))
4798     {
4799     s=f_readreg(s);
4800     d=f_writereg(d);
4801     raw_fabs_rr(d,s);
4802     f_unlock(s);
4803     f_unlock(d);
4804     }
4805     MENDFUNC(2,fabs_rr,(FW d, FR s))
4806    
4807     MIDFUNC(2,fsin_rr,(FW d, FR s))
4808     {
4809     s=f_readreg(s);
4810     d=f_writereg(d);
4811     raw_fsin_rr(d,s);
4812     f_unlock(s);
4813     f_unlock(d);
4814     }
4815     MENDFUNC(2,fsin_rr,(FW d, FR s))
4816    
4817     MIDFUNC(2,fcos_rr,(FW d, FR s))
4818     {
4819     s=f_readreg(s);
4820     d=f_writereg(d);
4821     raw_fcos_rr(d,s);
4822     f_unlock(s);
4823     f_unlock(d);
4824     }
4825     MENDFUNC(2,fcos_rr,(FW d, FR s))
4826    
4827     MIDFUNC(2,ftwotox_rr,(FW d, FR s))
4828     {
4829     s=f_readreg(s);
4830     d=f_writereg(d);
4831     raw_ftwotox_rr(d,s);
4832     f_unlock(s);
4833     f_unlock(d);
4834     }
4835     MENDFUNC(2,ftwotox_rr,(FW d, FR s))
4836    
4837     MIDFUNC(2,fetox_rr,(FW d, FR s))
4838     {
4839     s=f_readreg(s);
4840     d=f_writereg(d);
4841     raw_fetox_rr(d,s);
4842     f_unlock(s);
4843     f_unlock(d);
4844     }
4845     MENDFUNC(2,fetox_rr,(FW d, FR s))
4846    
4847     MIDFUNC(2,frndint_rr,(FW d, FR s))
4848     {
4849     s=f_readreg(s);
4850     d=f_writereg(d);
4851     raw_frndint_rr(d,s);
4852     f_unlock(s);
4853     f_unlock(d);
4854     }
4855     MENDFUNC(2,frndint_rr,(FW d, FR s))
4856    
4857     MIDFUNC(2,flog2_rr,(FW d, FR s))
4858     {
4859     s=f_readreg(s);
4860     d=f_writereg(d);
4861     raw_flog2_rr(d,s);
4862     f_unlock(s);
4863     f_unlock(d);
4864     }
4865     MENDFUNC(2,flog2_rr,(FW d, FR s))
4866    
4867     MIDFUNC(2,fneg_rr,(FW d, FR s))
4868     {
4869     s=f_readreg(s);
4870     d=f_writereg(d);
4871     raw_fneg_rr(d,s);
4872     f_unlock(s);
4873     f_unlock(d);
4874     }
4875     MENDFUNC(2,fneg_rr,(FW d, FR s))
4876    
4877     MIDFUNC(2,fadd_rr,(FRW d, FR s))
4878     {
4879     s=f_readreg(s);
4880     d=f_rmw(d);
4881     raw_fadd_rr(d,s);
4882     f_unlock(s);
4883     f_unlock(d);
4884     }
4885     MENDFUNC(2,fadd_rr,(FRW d, FR s))
4886    
4887     MIDFUNC(2,fsub_rr,(FRW d, FR s))
4888     {
4889     s=f_readreg(s);
4890     d=f_rmw(d);
4891     raw_fsub_rr(d,s);
4892     f_unlock(s);
4893     f_unlock(d);
4894     }
4895     MENDFUNC(2,fsub_rr,(FRW d, FR s))
4896    
4897     MIDFUNC(2,fcmp_rr,(FR d, FR s))
4898     {
4899     d=f_readreg(d);
4900     s=f_readreg(s);
4901     raw_fcmp_rr(d,s);
4902     f_unlock(s);
4903     f_unlock(d);
4904     }
4905     MENDFUNC(2,fcmp_rr,(FR d, FR s))
4906    
4907     MIDFUNC(2,fdiv_rr,(FRW d, FR s))
4908     {
4909     s=f_readreg(s);
4910     d=f_rmw(d);
4911     raw_fdiv_rr(d,s);
4912     f_unlock(s);
4913     f_unlock(d);
4914     }
4915     MENDFUNC(2,fdiv_rr,(FRW d, FR s))
4916    
4917     MIDFUNC(2,frem_rr,(FRW d, FR s))
4918     {
4919     s=f_readreg(s);
4920     d=f_rmw(d);
4921     raw_frem_rr(d,s);
4922     f_unlock(s);
4923     f_unlock(d);
4924     }
4925     MENDFUNC(2,frem_rr,(FRW d, FR s))
4926    
4927     MIDFUNC(2,frem1_rr,(FRW d, FR s))
4928     {
4929     s=f_readreg(s);
4930     d=f_rmw(d);
4931     raw_frem1_rr(d,s);
4932     f_unlock(s);
4933     f_unlock(d);
4934     }
4935     MENDFUNC(2,frem1_rr,(FRW d, FR s))
4936    
4937     MIDFUNC(2,fmul_rr,(FRW d, FR s))
4938     {
4939     s=f_readreg(s);
4940     d=f_rmw(d);
4941     raw_fmul_rr(d,s);
4942     f_unlock(s);
4943     f_unlock(d);
4944     }
4945     MENDFUNC(2,fmul_rr,(FRW d, FR s))
4946    
4947     /********************************************************************
4948     * Support functions exposed to gencomp. CREATE time *
4949     ********************************************************************/
4950    
4951 gbeauche 1.26 void set_zero(int r, int tmp)
4952     {
4953     if (setzflg_uses_bsf)
4954     bsf_l_rr(r,r);
4955     else
4956     simulate_bsf(tmp,r);
4957     }
4958    
4959 gbeauche 1.1 int kill_rodent(int r)
4960     {
4961     return KILLTHERAT &&
4962     have_rat_stall &&
4963     (live.state[r].status==INMEM ||
4964     live.state[r].status==CLEAN ||
4965     live.state[r].status==ISCONST ||
4966     live.state[r].dirtysize==4);
4967     }
4968    
4969     uae_u32 get_const(int r)
4970     {
4971     Dif (!isconst(r)) {
4972     write_log("Register %d should be constant, but isn't\n",r);
4973     abort();
4974     }
4975     return live.state[r].val;
4976     }
4977    
4978     void sync_m68k_pc(void)
4979     {
4980     if (m68k_pc_offset) {
4981     add_l_ri(PC_P,m68k_pc_offset);
4982     comp_pc_p+=m68k_pc_offset;
4983     m68k_pc_offset=0;
4984     }
4985     }
4986    
4987     /********************************************************************
4988     * Scratch registers management *
4989     ********************************************************************/
4990    
4991     struct scratch_t {
4992     uae_u32 regs[VREGS];
4993     fpu_register fregs[VFREGS];
4994     };
4995    
4996     static scratch_t scratch;
4997    
4998     /********************************************************************
4999     * Support functions exposed to newcpu *
5000     ********************************************************************/
5001    
5002     static inline const char *str_on_off(bool b)
5003     {
5004     return b ? "on" : "off";
5005     }
5006    
5007     void compiler_init(void)
5008     {
5009     static bool initialized = false;
5010     if (initialized)
5011     return;
5012 gbeauche 1.24
5013 gbeauche 1.1 #if JIT_DEBUG
5014     // JIT debug mode ?
5015     JITDebug = PrefsFindBool("jitdebug");
5016     #endif
5017     write_log("<JIT compiler> : enable runtime disassemblers : %s\n", JITDebug ? "yes" : "no");
5018    
5019     #ifdef USE_JIT_FPU
5020     // Use JIT compiler for FPU instructions ?
5021     avoid_fpu = !PrefsFindBool("jitfpu");
5022     #else
5023     // JIT FPU is always disabled
5024     avoid_fpu = true;
5025     #endif
5026     write_log("<JIT compiler> : compile FPU instructions : %s\n", !avoid_fpu ? "yes" : "no");
5027    
5028     // Get size of the translation cache (in KB)
5029     cache_size = PrefsFindInt32("jitcachesize");
5030     write_log("<JIT compiler> : requested translation cache size : %d KB\n", cache_size);
5031    
5032     // Initialize target CPU (check for features, e.g. CMOV, rat stalls)
5033     raw_init_cpu();
5034 gbeauche 1.15 setzflg_uses_bsf = target_check_bsf();
5035 gbeauche 1.1 write_log("<JIT compiler> : target processor has CMOV instructions : %s\n", have_cmov ? "yes" : "no");
5036     write_log("<JIT compiler> : target processor can suffer from partial register stalls : %s\n", have_rat_stall ? "yes" : "no");
5037 gbeauche 1.5 write_log("<JIT compiler> : alignment for loops, jumps are %d, %d\n", align_loops, align_jumps);
5038 gbeauche 1.1
5039     // Translation cache flush mechanism
5040     lazy_flush = PrefsFindBool("jitlazyflush");
5041     write_log("<JIT compiler> : lazy translation cache invalidation : %s\n", str_on_off(lazy_flush));
5042     flush_icache = lazy_flush ? flush_icache_lazy : flush_icache_hard;
5043    
5044     // Compiler features
5045     write_log("<JIT compiler> : register aliasing : %s\n", str_on_off(1));
5046     write_log("<JIT compiler> : FP register aliasing : %s\n", str_on_off(USE_F_ALIAS));
5047     write_log("<JIT compiler> : lazy constant offsetting : %s\n", str_on_off(USE_OFFSET));
5048 gbeauche 1.33 #if USE_INLINING
5049     follow_const_jumps = PrefsFindBool("jitinline");
5050     #endif
5051     write_log("<JIT compiler> : translate through constant jumps : %s\n", str_on_off(follow_const_jumps));
5052 gbeauche 1.1 write_log("<JIT compiler> : separate blockinfo allocation : %s\n", str_on_off(USE_SEPARATE_BIA));
5053    
5054     // Build compiler tables
5055     build_comp();
5056    
5057     initialized = true;
5058    
5059 gbeauche 1.9 #if PROFILE_UNTRANSLATED_INSNS
5060     write_log("<JIT compiler> : gather statistics on untranslated insns count\n");
5061     #endif
5062    
5063 gbeauche 1.1 #if PROFILE_COMPILE_TIME
5064     write_log("<JIT compiler> : gather statistics on translation time\n");
5065     emul_start_time = clock();
5066     #endif
5067     }
5068    
5069     void compiler_exit(void)
5070     {
5071     #if PROFILE_COMPILE_TIME
5072     emul_end_time = clock();
5073     #endif
5074    
5075     // Deallocate translation cache
5076     if (compiled_code) {
5077 gbeauche 1.3 vm_release(compiled_code, cache_size * 1024);
5078 gbeauche 1.1 compiled_code = 0;
5079     }
5080 gbeauche 1.24
5081     // Deallocate popallspace
5082     if (popallspace) {
5083     vm_release(popallspace, POPALLSPACE_SIZE);
5084     popallspace = 0;
5085     }
5086 gbeauche 1.1
5087     #if PROFILE_COMPILE_TIME
5088     write_log("### Compile Block statistics\n");
5089     write_log("Number of calls to compile_block : %d\n", compile_count);
5090     uae_u32 emul_time = emul_end_time - emul_start_time;
5091     write_log("Total emulation time : %.1f sec\n", double(emul_time)/double(CLOCKS_PER_SEC));
5092     write_log("Total compilation time : %.1f sec (%.1f%%)\n", double(compile_time)/double(CLOCKS_PER_SEC),
5093     100.0*double(compile_time)/double(emul_time));
5094     write_log("\n");
5095     #endif
5096 gbeauche 1.9
5097     #if PROFILE_UNTRANSLATED_INSNS
5098     uae_u64 untranslated_count = 0;
5099     for (int i = 0; i < 65536; i++) {
5100     opcode_nums[i] = i;
5101     untranslated_count += raw_cputbl_count[i];
5102     }
5103     write_log("Sorting out untranslated instructions count...\n");
5104     qsort(opcode_nums, 65536, sizeof(uae_u16), untranslated_compfn);
5105     write_log("\nRank Opc Count Name\n");
5106     for (int i = 0; i < untranslated_top_ten; i++) {
5107     uae_u32 count = raw_cputbl_count[opcode_nums[i]];
5108     struct instr *dp;
5109     struct mnemolookup *lookup;
5110     if (!count)
5111     break;
5112     dp = table68k + opcode_nums[i];
5113     for (lookup = lookuptab; lookup->mnemo != dp->mnemo; lookup++)
5114     ;
5115     write_log("%03d: %04x %10lu %s\n", i, opcode_nums[i], count, lookup->name);
5116     }
5117     #endif
5118 gbeauche 1.28
5119     #if RECORD_REGISTER_USAGE
5120     int reg_count_ids[16];
5121     uint64 tot_reg_count = 0;
5122     for (int i = 0; i < 16; i++) {
5123     reg_count_ids[i] = i;
5124     tot_reg_count += reg_count[i];
5125     }
5126     qsort(reg_count_ids, 16, sizeof(int), reg_count_compare);
5127     uint64 cum_reg_count = 0;
5128     for (int i = 0; i < 16; i++) {
5129     int r = reg_count_ids[i];
5130     cum_reg_count += reg_count[r];
5131     printf("%c%d : %16ld %2.1f%% [%2.1f]\n", r < 8 ? 'D' : 'A', r % 8,
5132     reg_count[r],
5133     100.0*double(reg_count[r])/double(tot_reg_count),
5134     100.0*double(cum_reg_count)/double(tot_reg_count));
5135     }
5136     #endif
5137 gbeauche 1.1 }
5138    
5139     bool compiler_use_jit(void)
5140     {
5141     // Check for the "jit" prefs item
5142     if (!PrefsFindBool("jit"))
5143     return false;
5144    
5145     // Don't use JIT if translation cache size is less then MIN_CACHE_SIZE KB
5146     if (PrefsFindInt32("jitcachesize") < MIN_CACHE_SIZE) {
5147     write_log("<JIT compiler> : translation cache size is less than %d KB. Disabling JIT.\n", MIN_CACHE_SIZE);
5148     return false;
5149     }
5150    
5151 gbeauche 1.41 // Enable JIT for 68020+ emulation only
5152     if (CPUType < 2) {
5153     write_log("<JIT compiler> : JIT is not supported in 680%d0 emulation mode, disabling.\n", CPUType);
5154 gbeauche 1.1 return false;
5155     }
5156    
5157     return true;
5158     }
5159    
5160     void init_comp(void)
5161     {
5162     int i;
5163     uae_s8* cb=can_byte;
5164     uae_s8* cw=can_word;
5165     uae_s8* au=always_used;
5166    
5167 gbeauche 1.28 #if RECORD_REGISTER_USAGE
5168     for (i=0;i<16;i++)
5169     reg_count_local[i] = 0;
5170     #endif
5171    
5172 gbeauche 1.1 for (i=0;i<VREGS;i++) {
5173     live.state[i].realreg=-1;
5174     live.state[i].needflush=NF_SCRATCH;
5175     live.state[i].val=0;
5176     set_status(i,UNDEF);
5177     }
5178    
5179     for (i=0;i<VFREGS;i++) {
5180     live.fate[i].status=UNDEF;
5181     live.fate[i].realreg=-1;
5182     live.fate[i].needflush=NF_SCRATCH;
5183     }
5184    
5185     for (i=0;i<VREGS;i++) {
5186     if (i<16) { /* First 16 registers map to 68k registers */
5187     live.state[i].mem=((uae_u32*)&regs)+i;
5188     live.state[i].needflush=NF_TOMEM;
5189     set_status(i,INMEM);
5190     }
5191     else
5192     live.state[i].mem=scratch.regs+i;
5193     }
5194     live.state[PC_P].mem=(uae_u32*)&(regs.pc_p);
5195     live.state[PC_P].needflush=NF_TOMEM;
5196 gbeauche 1.24 set_const(PC_P,(uintptr)comp_pc_p);
5197 gbeauche 1.1
5198 gbeauche 1.24 live.state[FLAGX].mem=(uae_u32*)&(regflags.x);
5199 gbeauche 1.1 live.state[FLAGX].needflush=NF_TOMEM;
5200     set_status(FLAGX,INMEM);
5201    
5202 gbeauche 1.24 live.state[FLAGTMP].mem=(uae_u32*)&(regflags.cznv);
5203 gbeauche 1.1 live.state[FLAGTMP].needflush=NF_TOMEM;
5204     set_status(FLAGTMP,INMEM);
5205    
5206     live.state[NEXT_HANDLER].needflush=NF_HANDLER;
5207     set_status(NEXT_HANDLER,UNDEF);
5208    
5209     for (i=0;i<VFREGS;i++) {
5210     if (i<8) { /* First 8 registers map to 68k FPU registers */
5211     live.fate[i].mem=(uae_u32*)fpu_register_address(i);
5212     live.fate[i].needflush=NF_TOMEM;
5213     live.fate[i].status=INMEM;
5214     }
5215     else if (i==FP_RESULT) {
5216     live.fate[i].mem=(uae_u32*)(&fpu.result);
5217     live.fate[i].needflush=NF_TOMEM;
5218     live.fate[i].status=INMEM;
5219     }
5220     else
5221 gbeauche 1.25 live.fate[i].mem=(uae_u32*)(&scratch.fregs[i]);
5222 gbeauche 1.1 }
5223    
5224    
5225     for (i=0;i<N_REGS;i++) {
5226     live.nat[i].touched=0;
5227     live.nat[i].nholds=0;
5228     live.nat[i].locked=0;
5229     if (*cb==i) {
5230     live.nat[i].canbyte=1; cb++;
5231     } else live.nat[i].canbyte=0;
5232     if (*cw==i) {
5233     live.nat[i].canword=1; cw++;
5234     } else live.nat[i].canword=0;
5235     if (*au==i) {
5236     live.nat[i].locked=1; au++;
5237     }
5238     }
5239    
5240     for (i=0;i<N_FREGS;i++) {
5241     live.fat[i].touched=0;
5242     live.fat[i].nholds=0;
5243     live.fat[i].locked=0;
5244     }
5245    
5246     touchcnt=1;
5247     m68k_pc_offset=0;
5248     live.flags_in_flags=TRASH;
5249     live.flags_on_stack=VALID;
5250     live.flags_are_important=1;
5251    
5252     raw_fp_init();
5253     }
5254    
5255     /* Only do this if you really mean it! The next call should be to init!*/
5256     void flush(int save_regs)
5257     {
5258     int fi,i;
5259    
5260     log_flush();
5261     flush_flags(); /* low level */
5262     sync_m68k_pc(); /* mid level */
5263    
5264     if (save_regs) {
5265     for (i=0;i<VFREGS;i++) {
5266     if (live.fate[i].needflush==NF_SCRATCH ||
5267     live.fate[i].status==CLEAN) {
5268     f_disassociate(i);
5269     }
5270     }
5271     for (i=0;i<VREGS;i++) {
5272     if (live.state[i].needflush==NF_TOMEM) {
5273     switch(live.state[i].status) {
5274     case INMEM:
5275     if (live.state[i].val) {
5276 gbeauche 1.24 raw_add_l_mi((uintptr)live.state[i].mem,live.state[i].val);
5277 gbeauche 1.1 log_vwrite(i);
5278     live.state[i].val=0;
5279     }
5280     break;
5281     case CLEAN:
5282     case DIRTY:
5283     remove_offset(i,-1); tomem(i); break;
5284     case ISCONST:
5285     if (i!=PC_P)
5286     writeback_const(i);
5287     break;
5288     default: break;
5289     }
5290     Dif (live.state[i].val && i!=PC_P) {
5291     write_log("Register %d still has val %x\n",
5292     i,live.state[i].val);
5293     }
5294     }
5295     }
5296     for (i=0;i<VFREGS;i++) {
5297     if (live.fate[i].needflush==NF_TOMEM &&
5298     live.fate[i].status==DIRTY) {
5299     f_evict(i);
5300     }
5301     }
5302     raw_fp_cleanup_drop();
5303     }
5304     if (needflags) {
5305     write_log("Warning! flush with needflags=1!\n");
5306     }
5307     }
5308    
5309     static void flush_keepflags(void)
5310     {
5311     int fi,i;
5312    
5313     for (i=0;i<VFREGS;i++) {
5314     if (live.fate[i].needflush==NF_SCRATCH ||
5315     live.fate[i].status==CLEAN) {
5316     f_disassociate(i);
5317     }
5318     }
5319     for (i=0;i<VREGS;i++) {
5320     if (live.state[i].needflush==NF_TOMEM) {
5321     switch(live.state[i].status) {
5322     case INMEM:
5323     /* Can't adjust the offset here --- that needs "add" */
5324     break;
5325     case CLEAN:
5326     case DIRTY:
5327     remove_offset(i,-1); tomem(i); break;
5328     case ISCONST:
5329     if (i!=PC_P)
5330     writeback_const(i);
5331     break;
5332     default: break;
5333     }
5334     }
5335     }
5336     for (i=0;i<VFREGS;i++) {
5337     if (live.fate[i].needflush==NF_TOMEM &&
5338     live.fate[i].status==DIRTY) {
5339     f_evict(i);
5340     }
5341     }
5342     raw_fp_cleanup_drop();
5343     }
5344    
5345     void freescratch(void)
5346     {
5347     int i;
5348     for (i=0;i<N_REGS;i++)
5349     if (live.nat[i].locked && i!=4)
5350     write_log("Warning! %d is locked\n",i);
5351    
5352     for (i=0;i<VREGS;i++)
5353     if (live.state[i].needflush==NF_SCRATCH) {
5354     forget_about(i);
5355     }
5356    
5357     for (i=0;i<VFREGS;i++)
5358     if (live.fate[i].needflush==NF_SCRATCH) {
5359     f_forget_about(i);
5360     }
5361     }
5362    
5363     /********************************************************************
5364     * Support functions, internal *
5365     ********************************************************************/
5366    
5367    
5368     static void align_target(uae_u32 a)
5369     {
5370 gbeauche 1.14 if (!a)
5371     return;
5372    
5373 gbeauche 1.12 if (tune_nop_fillers)
5374 gbeauche 1.24 raw_emit_nop_filler(a - (((uintptr)target) & (a - 1)));
5375 gbeauche 1.12 else {
5376     /* Fill with NOPs --- makes debugging with gdb easier */
5377 gbeauche 1.24 while ((uintptr)target&(a-1))
5378 gbeauche 1.12 *target++=0x90;
5379     }
5380 gbeauche 1.1 }
5381    
5382     static __inline__ int isinrom(uintptr addr)
5383     {
5384     return ((addr >= (uintptr)ROMBaseHost) && (addr < (uintptr)ROMBaseHost + ROMSize));
5385     }
5386    
5387     static void flush_all(void)
5388     {
5389     int i;
5390    
5391     log_flush();
5392     for (i=0;i<VREGS;i++)
5393     if (live.state[i].status==DIRTY) {
5394     if (!call_saved[live.state[i].realreg]) {
5395     tomem(i);
5396     }
5397     }
5398     for (i=0;i<VFREGS;i++)
5399     if (f_isinreg(i))
5400     f_evict(i);
5401     raw_fp_cleanup_drop();
5402     }
5403    
5404     /* Make sure all registers that will get clobbered by a call are
5405     save and sound in memory */
5406     static void prepare_for_call_1(void)
5407     {
5408     flush_all(); /* If there are registers that don't get clobbered,
5409     * we should be a bit more selective here */
5410     }
5411    
5412     /* We will call a C routine in a moment. That will clobber all registers,
5413     so we need to disassociate everything */
5414     static void prepare_for_call_2(void)
5415     {
5416     int i;
5417     for (i=0;i<N_REGS;i++)
5418     if (!call_saved[i] && live.nat[i].nholds>0)
5419     free_nreg(i);
5420    
5421     for (i=0;i<N_FREGS;i++)
5422     if (live.fat[i].nholds>0)
5423     f_free_nreg(i);
5424    
5425     live.flags_in_flags=TRASH; /* Note: We assume we already rescued the
5426     flags at the very start of the call_r
5427     functions! */
5428     }
5429    
5430     /********************************************************************
5431     * Memory access and related functions, CREATE time *
5432     ********************************************************************/
5433    
5434     void register_branch(uae_u32 not_taken, uae_u32 taken, uae_u8 cond)
5435     {
5436     next_pc_p=not_taken;
5437     taken_pc_p=taken;
5438     branch_cc=cond;
5439     }
5440    
5441    
5442     static uae_u32 get_handler_address(uae_u32 addr)
5443     {
5444     uae_u32 cl=cacheline(addr);
5445 gbeauche 1.24 blockinfo* bi=get_blockinfo_addr_new((void*)(uintptr)addr,0);
5446     return (uintptr)&(bi->direct_handler_to_use);
5447 gbeauche 1.1 }
5448    
5449     static uae_u32 get_handler(uae_u32 addr)
5450     {
5451     uae_u32 cl=cacheline(addr);
5452 gbeauche 1.24 blockinfo* bi=get_blockinfo_addr_new((void*)(uintptr)addr,0);
5453     return (uintptr)bi->direct_handler_to_use;
5454 gbeauche 1.1 }
5455    
5456     static void load_handler(int reg, uae_u32 addr)
5457     {
5458     mov_l_rm(reg,get_handler_address(addr));
5459     }
5460    
5461     /* This version assumes that it is writing *real* memory, and *will* fail
5462     * if that assumption is wrong! No branches, no second chances, just
5463     * straight go-for-it attitude */
5464    
5465 gbeauche 1.24 static void writemem_real(int address, int source, int size, int tmp, int clobber)
5466 gbeauche 1.1 {
5467     int f=tmp;
5468    
5469     if (clobber)
5470     f=source;
5471 gbeauche 1.24
5472 gbeauche 1.1 switch(size) {
5473     case 1: mov_b_bRr(address,source,MEMBaseDiff); break;
5474     case 2: mov_w_rr(f,source); bswap_16(f); mov_w_bRr(address,f,MEMBaseDiff); break;
5475     case 4: mov_l_rr(f,source); bswap_32(f); mov_l_bRr(address,f,MEMBaseDiff); break;
5476     }
5477     forget_about(tmp);
5478     forget_about(f);
5479     }
5480    
5481     void writebyte(int address, int source, int tmp)
5482     {
5483 gbeauche 1.24 writemem_real(address,source,1,tmp,0);
5484 gbeauche 1.1 }
5485    
5486     static __inline__ void writeword_general(int address, int source, int tmp,
5487     int clobber)
5488     {
5489 gbeauche 1.24 writemem_real(address,source,2,tmp,clobber);
5490 gbeauche 1.1 }
5491    
5492     void writeword_clobber(int address, int source, int tmp)
5493     {
5494     writeword_general(address,source,tmp,1);
5495     }
5496    
5497     void writeword(int address, int source, int tmp)
5498     {
5499     writeword_general(address,source,tmp,0);
5500     }
5501    
5502     static __inline__ void writelong_general(int address, int source, int tmp,
5503     int clobber)
5504     {
5505 gbeauche 1.24 writemem_real(address,source,4,tmp,clobber);
5506 gbeauche 1.1 }
5507    
5508     void writelong_clobber(int address, int source, int tmp)
5509     {
5510     writelong_general(address,source,tmp,1);
5511     }
5512    
5513     void writelong(int address, int source, int tmp)
5514     {
5515     writelong_general(address,source,tmp,0);
5516     }
5517    
5518    
5519    
5520     /* This version assumes that it is reading *real* memory, and *will* fail
5521     * if that assumption is wrong! No branches, no second chances, just
5522     * straight go-for-it attitude */
5523    
5524 gbeauche 1.24 static void readmem_real(int address, int dest, int size, int tmp)
5525 gbeauche 1.1 {
5526     int f=tmp;
5527    
5528     if (size==4 && address!=dest)
5529     f=dest;
5530    
5531     switch(size) {
5532     case 1: mov_b_brR(dest,address,MEMBaseDiff); break;
5533     case 2: mov_w_brR(dest,address,MEMBaseDiff); bswap_16(dest); break;
5534     case 4: mov_l_brR(dest,address,MEMBaseDiff); bswap_32(dest); break;
5535     }
5536     forget_about(tmp);
5537     }
5538    
5539     void readbyte(int address, int dest, int tmp)
5540     {
5541 gbeauche 1.24 readmem_real(address,dest,1,tmp);
5542 gbeauche 1.1 }
5543    
5544     void readword(int address, int dest, int tmp)
5545     {
5546 gbeauche 1.24 readmem_real(address,dest,2,tmp);
5547 gbeauche 1.1 }
5548    
5549     void readlong(int address, int dest, int tmp)
5550     {
5551 gbeauche 1.24 readmem_real(address,dest,4,tmp);
5552 gbeauche 1.1 }
5553    
5554     void get_n_addr(int address, int dest, int tmp)
5555     {
5556     // a is the register containing the virtual address
5557     // after the offset had been fetched
5558     int a=tmp;
5559    
5560     // f is the register that will contain the offset
5561     int f=tmp;
5562    
5563     // a == f == tmp if (address == dest)
5564     if (address!=dest) {
5565     a=address;
5566     f=dest;
5567     }
5568    
5569     #if REAL_ADDRESSING
5570     mov_l_rr(dest, address);
5571     #elif DIRECT_ADDRESSING
5572     lea_l_brr(dest,address,MEMBaseDiff);
5573     #endif
5574     forget_about(tmp);
5575     }
5576    
5577     void get_n_addr_jmp(int address, int dest, int tmp)
5578     {
5579     /* For this, we need to get the same address as the rest of UAE
5580     would --- otherwise we end up translating everything twice */
5581     get_n_addr(address,dest,tmp);
5582     }
5583    
5584    
5585     /* base is a register, but dp is an actual value.
5586     target is a register, as is tmp */
5587     void calc_disp_ea_020(int base, uae_u32 dp, int target, int tmp)
5588     {
5589     int reg = (dp >> 12) & 15;
5590     int regd_shift=(dp >> 9) & 3;
5591    
5592     if (dp & 0x100) {
5593     int ignorebase=(dp&0x80);
5594     int ignorereg=(dp&0x40);
5595     int addbase=0;
5596     int outer=0;
5597    
5598     if ((dp & 0x30) == 0x20) addbase = (uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
5599     if ((dp & 0x30) == 0x30) addbase = comp_get_ilong((m68k_pc_offset+=4)-4);
5600    
5601     if ((dp & 0x3) == 0x2) outer = (uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
5602     if ((dp & 0x3) == 0x3) outer = comp_get_ilong((m68k_pc_offset+=4)-4);
5603    
5604     if ((dp & 0x4) == 0) { /* add regd *before* the get_long */
5605     if (!ignorereg) {
5606     if ((dp & 0x800) == 0)
5607     sign_extend_16_rr(target,reg);
5608     else
5609     mov_l_rr(target,reg);
5610     shll_l_ri(target,regd_shift);
5611     }
5612     else
5613     mov_l_ri(target,0);
5614    
5615     /* target is now regd */
5616     if (!ignorebase)
5617     add_l(target,base);
5618     add_l_ri(target,addbase);
5619     if (dp&0x03) readlong(target,target,tmp);
5620     } else { /* do the getlong first, then add regd */
5621     if (!ignorebase) {
5622     mov_l_rr(target,base);
5623     add_l_ri(target,addbase);
5624     }
5625     else
5626     mov_l_ri(target,addbase);
5627     if (dp&0x03) readlong(target,target,tmp);
5628    
5629     if (!ignorereg) {
5630     if ((dp & 0x800) == 0)
5631     sign_extend_16_rr(tmp,reg);
5632     else
5633     mov_l_rr(tmp,reg);
5634     shll_l_ri(tmp,regd_shift);
5635     /* tmp is now regd */
5636     add_l(target,tmp);
5637     }
5638     }
5639     add_l_ri(target,outer);
5640     }
5641     else { /* 68000 version */
5642     if ((dp & 0x800) == 0) { /* Sign extend */
5643     sign_extend_16_rr(target,reg);
5644     lea_l_brr_indexed(target,base,target,1<<regd_shift,(uae_s32)((uae_s8)dp));
5645     }
5646     else {
5647     lea_l_brr_indexed(target,base,reg,1<<regd_shift,(uae_s32)((uae_s8)dp));
5648     }
5649     }
5650     forget_about(tmp);
5651     }
5652    
5653    
5654    
5655    
5656    
5657     void set_cache_state(int enabled)
5658     {
5659     if (enabled!=letit)
5660     flush_icache_hard(77);
5661     letit=enabled;
5662     }
5663    
5664     int get_cache_state(void)
5665     {
5666     return letit;
5667     }
5668    
5669     uae_u32 get_jitted_size(void)
5670     {
5671     if (compiled_code)
5672     return current_compile_p-compiled_code;
5673     return 0;
5674     }
5675    
5676 gbeauche 1.20 const int CODE_ALLOC_MAX_ATTEMPTS = 10;
5677     const int CODE_ALLOC_BOUNDARIES = 128 * 1024; // 128 KB
5678    
5679     static uint8 *do_alloc_code(uint32 size, int depth)
5680     {
5681     #if defined(__linux__) && 0
5682     /*
5683     This is a really awful hack that is known to work on Linux at
5684     least.
5685    
5686     The trick here is to make sure the allocated cache is nearby
5687     code segment, and more precisely in the positive half of a
5688     32-bit address space. i.e. addr < 0x80000000. Actually, it
5689     turned out that a 32-bit binary run on AMD64 yields a cache
5690     allocated around 0xa0000000, thus causing some troubles when
5691     translating addresses from m68k to x86.
5692     */
5693     static uint8 * code_base = NULL;
5694     if (code_base == NULL) {
5695     uintptr page_size = getpagesize();
5696     uintptr boundaries = CODE_ALLOC_BOUNDARIES;
5697     if (boundaries < page_size)
5698     boundaries = page_size;
5699     code_base = (uint8 *)sbrk(0);
5700     for (int attempts = 0; attempts < CODE_ALLOC_MAX_ATTEMPTS; attempts++) {
5701     if (vm_acquire_fixed(code_base, size) == 0) {
5702     uint8 *code = code_base;
5703     code_base += size;
5704     return code;
5705     }
5706     code_base += boundaries;
5707     }
5708     return NULL;
5709     }
5710    
5711     if (vm_acquire_fixed(code_base, size) == 0) {
5712     uint8 *code = code_base;
5713     code_base += size;
5714     return code;
5715     }
5716    
5717     if (depth >= CODE_ALLOC_MAX_ATTEMPTS)
5718     return NULL;
5719    
5720     return do_alloc_code(size, depth + 1);
5721     #else
5722     uint8 *code = (uint8 *)vm_acquire(size);
5723     return code == VM_MAP_FAILED ? NULL : code;
5724     #endif
5725     }
5726    
5727     static inline uint8 *alloc_code(uint32 size)
5728     {
5729 gbeauche 1.31 uint8 *ptr = do_alloc_code(size, 0);
5730     /* allocated code must fit in 32-bit boundaries */
5731     assert((uintptr)ptr <= 0xffffffff);
5732     return ptr;
5733 gbeauche 1.20 }
5734    
5735 gbeauche 1.1 void alloc_cache(void)
5736     {
5737     if (compiled_code) {
5738     flush_icache_hard(6);
5739 gbeauche 1.3 vm_release(compiled_code, cache_size * 1024);
5740 gbeauche 1.1 compiled_code = 0;
5741     }
5742    
5743     if (cache_size == 0)
5744     return;
5745    
5746     while (!compiled_code && cache_size) {
5747 gbeauche 1.20 if ((compiled_code = alloc_code(cache_size * 1024)) == NULL) {
5748 gbeauche 1.1 compiled_code = 0;
5749     cache_size /= 2;
5750     }
5751     }
5752 gbeauche 1.25 vm_protect(compiled_code, cache_size * 1024, VM_PAGE_READ | VM_PAGE_WRITE | VM_PAGE_EXECUTE);
5753 gbeauche 1.1
5754     if (compiled_code) {
5755     write_log("<JIT compiler> : actual translation cache size : %d KB at 0x%08X\n", cache_size, compiled_code);
5756     max_compile_start = compiled_code + cache_size*1024 - BYTES_PER_INST;
5757     current_compile_p = compiled_code;
5758     current_cache_size = 0;
5759     }
5760     }
5761    
5762    
5763    
5764 gbeauche 1.13 extern void op_illg_1 (uae_u32 opcode) REGPARAM;
5765 gbeauche 1.1
5766 gbeauche 1.8 static void calc_checksum(blockinfo* bi, uae_u32* c1, uae_u32* c2)
5767 gbeauche 1.1 {
5768 gbeauche 1.8 uae_u32 k1 = 0;
5769     uae_u32 k2 = 0;
5770    
5771     #if USE_CHECKSUM_INFO
5772     checksum_info *csi = bi->csi;
5773     Dif(!csi) abort();
5774     while (csi) {
5775     uae_s32 len = csi->length;
5776 gbeauche 1.24 uintptr tmp = (uintptr)csi->start_p;
5777 gbeauche 1.8 #else
5778     uae_s32 len = bi->len;
5779 gbeauche 1.24 uintptr tmp = (uintptr)bi->min_pcp;
5780 gbeauche 1.8 #endif
5781     uae_u32*pos;
5782 gbeauche 1.1
5783 gbeauche 1.8 len += (tmp & 3);
5784 gbeauche 1.24 tmp &= ~((uintptr)3);
5785 gbeauche 1.8 pos = (uae_u32 *)tmp;
5786    
5787     if (len >= 0 && len <= MAX_CHECKSUM_LEN) {
5788     while (len > 0) {
5789     k1 += *pos;
5790     k2 ^= *pos;
5791     pos++;
5792     len -= 4;
5793     }
5794     }
5795 gbeauche 1.1
5796 gbeauche 1.8 #if USE_CHECKSUM_INFO
5797     csi = csi->next;
5798 gbeauche 1.1 }
5799 gbeauche 1.8 #endif
5800    
5801     *c1 = k1;
5802     *c2 = k2;
5803 gbeauche 1.1 }
5804    
5805 gbeauche 1.8 #if 0
5806 gbeauche 1.7 static void show_checksum(CSI_TYPE* csi)
5807 gbeauche 1.1 {
5808     uae_u32 k1=0;
5809     uae_u32 k2=0;
5810 gbeauche 1.7 uae_s32 len=CSI_LENGTH(csi);
5811 gbeauche 1.24 uae_u32 tmp=(uintptr)CSI_START_P(csi);
5812 gbeauche 1.1 uae_u32* pos;
5813    
5814     len+=(tmp&3);
5815     tmp&=(~3);
5816     pos=(uae_u32*)tmp;
5817    
5818     if (len<0 || len>MAX_CHECKSUM_LEN) {
5819     return;
5820     }
5821     else {
5822     while (len>0) {
5823     write_log("%08x ",*pos);
5824     pos++;
5825     len-=4;
5826     }
5827     write_log(" bla\n");
5828     }
5829     }
5830 gbeauche 1.8 #endif
5831 gbeauche 1.1
5832    
5833     int check_for_cache_miss(void)
5834     {
5835     blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5836    
5837     if (bi) {
5838     int cl=cacheline(regs.pc_p);
5839     if (bi!=cache_tags[cl+1].bi) {
5840     raise_in_cl_list(bi);
5841     return 1;
5842     }
5843     }
5844     return 0;
5845     }
5846    
5847    
5848     static void recompile_block(void)
5849     {
5850     /* An existing block's countdown code has expired. We need to make
5851     sure that execute_normal doesn't refuse to recompile due to a
5852     perceived cache miss... */
5853     blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5854    
5855     Dif (!bi)
5856     abort();
5857     raise_in_cl_list(bi);
5858     execute_normal();
5859     return;
5860     }
5861     static void cache_miss(void)
5862     {
5863     blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5864     uae_u32 cl=cacheline(regs.pc_p);
5865     blockinfo* bi2=get_blockinfo(cl);
5866    
5867     if (!bi) {
5868     execute_normal(); /* Compile this block now */
5869     return;
5870     }
5871     Dif (!bi2 || bi==bi2) {
5872     write_log("Unexplained cache miss %p %p\n",bi,bi2);
5873     abort();
5874     }
5875     raise_in_cl_list(bi);
5876     return;
5877     }
5878    
5879     static int called_check_checksum(blockinfo* bi);
5880    
5881     static inline int block_check_checksum(blockinfo* bi)
5882     {
5883     uae_u32 c1,c2;
5884 gbeauche 1.7 bool isgood;
5885 gbeauche 1.1
5886     if (bi->status!=BI_NEED_CHECK)
5887     return 1; /* This block is in a checked state */
5888    
5889     checksum_count++;
5890 gbeauche 1.7
5891 gbeauche 1.1 if (bi->c1 || bi->c2)
5892     calc_checksum(bi,&c1,&c2);
5893     else {
5894     c1=c2=1; /* Make sure it doesn't match */
5895 gbeauche 1.7 }
5896 gbeauche 1.1
5897     isgood=(c1==bi->c1 && c2==bi->c2);
5898 gbeauche 1.7
5899 gbeauche 1.1 if (isgood) {
5900     /* This block is still OK. So we reactivate. Of course, that
5901     means we have to move it into the needs-to-be-flushed list */
5902     bi->handler_to_use=bi->handler;
5903     set_dhtu(bi,bi->direct_handler);
5904     bi->status=BI_CHECKING;
5905     isgood=called_check_checksum(bi);
5906     }
5907     if (isgood) {
5908     /* write_log("reactivate %p/%p (%x %x/%x %x)\n",bi,bi->pc_p,
5909     c1,c2,bi->c1,bi->c2);*/
5910     remove_from_list(bi);
5911     add_to_active(bi);
5912     raise_in_cl_list(bi);
5913     bi->status=BI_ACTIVE;
5914     }
5915     else {
5916     /* This block actually changed. We need to invalidate it,
5917     and set it up to be recompiled */
5918     /* write_log("discard %p/%p (%x %x/%x %x)\n",bi,bi->pc_p,
5919     c1,c2,bi->c1,bi->c2); */
5920     invalidate_block(bi);
5921     raise_in_cl_list(bi);
5922     }
5923     return isgood;
5924     }
5925    
5926     static int called_check_checksum(blockinfo* bi)
5927     {
5928     dependency* x=bi->deplist;
5929     int isgood=1;
5930     int i;
5931    
5932     for (i=0;i<2 && isgood;i++) {
5933     if (bi->dep[i].jmp_off) {
5934     isgood=block_check_checksum(bi->dep[i].target);
5935     }
5936     }
5937     return isgood;
5938     }
5939    
5940     static void check_checksum(void)
5941     {
5942     blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5943     uae_u32 cl=cacheline(regs.pc_p);
5944     blockinfo* bi2=get_blockinfo(cl);
5945    
5946     /* These are not the droids you are looking for... */
5947     if (!bi) {
5948     /* Whoever is the primary target is in a dormant state, but
5949     calling it was accidental, and we should just compile this
5950     new block */
5951     execute_normal();
5952     return;
5953     }
5954     if (bi!=bi2) {
5955     /* The block was hit accidentally, but it does exist. Cache miss */
5956     cache_miss();
5957     return;
5958     }
5959    
5960     if (!block_check_checksum(bi))
5961     execute_normal();
5962     }
5963    
5964     static __inline__ void match_states(blockinfo* bi)
5965     {
5966     int i;
5967     smallstate* s=&(bi->env);
5968    
5969     if (bi->status==BI_NEED_CHECK) {
5970     block_check_checksum(bi);
5971     }
5972     if (bi->status==BI_ACTIVE ||
5973     bi->status==BI_FINALIZING) { /* Deal with the *promises* the
5974     block makes (about not using
5975     certain vregs) */
5976     for (i=0;i<16;i++) {
5977     if (s->virt[i]==L_UNNEEDED) {
5978     // write_log("unneeded reg %d at %p\n",i,target);
5979     COMPCALL(forget_about)(i); // FIXME
5980     }
5981     }
5982     }
5983     flush(1);
5984    
5985     /* And now deal with the *demands* the block makes */
5986     for (i=0;i<N_REGS;i++) {
5987     int v=s->nat[i];
5988     if (v>=0) {
5989     // printf("Loading reg %d into %d at %p\n",v,i,target);
5990     readreg_specific(v,4,i);
5991     // do_load_reg(i,v);
5992     // setlock(i);
5993     }
5994     }
5995     for (i=0;i<N_REGS;i++) {
5996     int v=s->nat[i];
5997     if (v>=0) {
5998     unlock2(i);
5999     }
6000     }
6001     }
6002    
6003     static __inline__ void create_popalls(void)
6004     {
6005     int i,r;
6006    
6007 gbeauche 1.24 if ((popallspace = alloc_code(POPALLSPACE_SIZE)) == NULL) {
6008     write_log("FATAL: Could not allocate popallspace!\n");
6009     abort();
6010     }
6011     vm_protect(popallspace, POPALLSPACE_SIZE, VM_PAGE_READ | VM_PAGE_WRITE);
6012    
6013 gbeauche 1.35 int stack_space = STACK_OFFSET;
6014     for (i=0;i<N_REGS;i++) {
6015     if (need_to_preserve[i])
6016     stack_space += sizeof(void *);
6017     }
6018     stack_space %= STACK_ALIGN;
6019     if (stack_space)
6020     stack_space = STACK_ALIGN - stack_space;
6021    
6022 gbeauche 1.1 current_compile_p=popallspace;
6023     set_target(current_compile_p);
6024 gbeauche 1.36
6025     /* We need to guarantee 16-byte stack alignment on x86 at any point
6026     within the JIT generated code. We have multiple exit points
6027     possible but a single entry. A "jmp" is used so that we don't
6028     have to generate stack alignment in generated code that has to
6029     call external functions (e.g. a generic instruction handler).
6030    
6031     In summary, JIT generated code is not leaf so we have to deal
6032     with it here to maintain correct stack alignment. */
6033     align_target(align_jumps);
6034     current_compile_p=get_target();
6035     pushall_call_handler=get_target();
6036     for (i=N_REGS;i--;) {
6037     if (need_to_preserve[i])
6038     raw_push_l_r(i);
6039     }
6040     raw_dec_sp(stack_space);
6041     r=REG_PC_TMP;
6042     raw_mov_l_rm(r,(uintptr)&regs.pc_p);
6043     raw_and_l_ri(r,TAGMASK);
6044     raw_jmp_m_indexed((uintptr)cache_tags,r,SIZEOF_VOID_P);
6045    
6046     /* now the exit points */
6047 gbeauche 1.5 align_target(align_jumps);
6048     popall_do_nothing=get_target();
6049 gbeauche 1.35 raw_inc_sp(stack_space);
6050 gbeauche 1.1 for (i=0;i<N_REGS;i++) {
6051     if (need_to_preserve[i])
6052     raw_pop_l_r(i);
6053     }
6054 gbeauche 1.24 raw_jmp((uintptr)do_nothing);
6055 gbeauche 1.1
6056 gbeauche 1.5 align_target(align_jumps);
6057 gbeauche 1.1 popall_execute_normal=get_target();
6058 gbeauche 1.35 raw_inc_sp(stack_space);
6059 gbeauche 1.1 for (i=0;i<N_REGS;i++) {
6060     if (need_to_preserve[i])
6061     raw_pop_l_r(i);
6062     }
6063 gbeauche 1.24 raw_jmp((uintptr)execute_normal);
6064 gbeauche 1.1
6065 gbeauche 1.5 align_target(align_jumps);
6066 gbeauche 1.1 popall_cache_miss=get_target();
6067 gbeauche 1.35 raw_inc_sp(stack_space);
6068 gbeauche 1.1 for (i=0;i<N_REGS;i++) {
6069     if (need_to_preserve[i])
6070     raw_pop_l_r(i);
6071     }
6072 gbeauche 1.24 raw_jmp((uintptr)cache_miss);
6073 gbeauche 1.1
6074 gbeauche 1.5 align_target(align_jumps);
6075 gbeauche 1.1 popall_recompile_block=get_target();
6076 gbeauche 1.35 raw_inc_sp(stack_space);
6077 gbeauche 1.1 for (i=0;i<N_REGS;i++) {
6078     if (need_to_preserve[i])
6079     raw_pop_l_r(i);
6080     }
6081 gbeauche 1.24 raw_jmp((uintptr)recompile_block);
6082 gbeauche 1.5
6083     align_target(align_jumps);
6084 gbeauche 1.1 popall_exec_nostats=get_target();
6085 gbeauche 1.35 raw_inc_sp(stack_space);
6086 gbeauche 1.1 for (i=0;i<N_REGS;i++) {
6087     if (need_to_preserve[i])
6088     raw_pop_l_r(i);
6089     }
6090 gbeauche 1.24 raw_jmp((uintptr)exec_nostats);
6091 gbeauche 1.5
6092     align_target(align_jumps);
6093 gbeauche 1.1 popall_check_checksum=get_target();
6094 gbeauche 1.35 raw_inc_sp(stack_space);
6095 gbeauche 1.1 for (i=0;i<N_REGS;i++) {
6096     if (need_to_preserve[i])
6097     raw_pop_l_r(i);
6098     }
6099 gbeauche 1.24 raw_jmp((uintptr)check_checksum);
6100 gbeauche 1.5
6101 gbeauche 1.24 // no need to further write into popallspace
6102     vm_protect(popallspace, POPALLSPACE_SIZE, VM_PAGE_READ | VM_PAGE_EXECUTE);
6103 gbeauche 1.1 }
6104    
6105     static __inline__ void reset_lists(void)
6106     {
6107     int i;
6108    
6109     for (i=0;i<MAX_HOLD_BI;i++)
6110     hold_bi[i]=NULL;
6111     active=NULL;
6112     dormant=NULL;
6113     }
6114    
6115     static void prepare_block(blockinfo* bi)
6116     {
6117     int i;
6118    
6119     set_target(current_compile_p);
6120 gbeauche 1.5 align_target(align_jumps);
6121 gbeauche 1.1 bi->direct_pen=(cpuop_func *)get_target();
6122 gbeauche 1.24 raw_mov_l_rm(0,(uintptr)&(bi->pc_p));
6123     raw_mov_l_mr((uintptr)&regs.pc_p,0);
6124     raw_jmp((uintptr)popall_execute_normal);
6125 gbeauche 1.1
6126 gbeauche 1.5 align_target(align_jumps);
6127 gbeauche 1.1 bi->direct_pcc=(cpuop_func *)get_target();
6128 gbeauche 1.24 raw_mov_l_rm(0,(uintptr)&(bi->pc_p));
6129     raw_mov_l_mr((uintptr)&regs.pc_p,0);
6130     raw_jmp((uintptr)popall_check_checksum);
6131 gbeauche 1.1 current_compile_p=get_target();
6132    
6133     bi->deplist=NULL;
6134     for (i=0;i<2;i++) {
6135     bi->dep[i].prev_p=NULL;
6136     bi->dep[i].next=NULL;
6137     }
6138     bi->env=default_ss;
6139     bi->status=BI_INVALID;
6140     bi->havestate=0;
6141     //bi->env=empty_ss;
6142     }
6143    
6144 gbeauche 1.21 // OPCODE is in big endian format, use cft_map() beforehand, if needed.
6145     static inline void reset_compop(int opcode)
6146 gbeauche 1.17 {
6147 gbeauche 1.21 compfunctbl[opcode] = NULL;
6148     nfcompfunctbl[opcode] = NULL;
6149     }
6150    
6151     static int read_opcode(const char *p)
6152     {
6153     int opcode = 0;
6154     for (int i = 0; i < 4; i++) {
6155     int op = p[i];
6156     switch (op) {
6157     case '0': case '1': case '2': case '3': case '4':
6158     case '5': case '6': case '7': case '8': case '9':
6159     opcode = (opcode << 4) | (op - '0');
6160     break;
6161     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
6162     opcode = (opcode << 4) | ((op - 'a') + 10);
6163     break;
6164     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
6165     opcode = (opcode << 4) | ((op - 'A') + 10);
6166     break;
6167     default:
6168     return -1;
6169     }
6170     }
6171     return opcode;
6172     }
6173    
6174     static bool merge_blacklist()
6175     {
6176     const char *blacklist = PrefsFindString("jitblacklist");
6177     if (blacklist) {
6178     const char *p = blacklist;
6179     for (;;) {
6180     if (*p == 0)
6181     return true;
6182    
6183     int opcode1 = read_opcode(p);
6184     if (opcode1 < 0)
6185     return false;
6186     p += 4;
6187    
6188     int opcode2 = opcode1;
6189     if (*p == '-') {
6190     p++;
6191     opcode2 = read_opcode(p);
6192     if (opcode2 < 0)
6193     return false;
6194     p += 4;
6195     }
6196    
6197 gbeauche 1.40 if (*p == 0 || *p == ',' || *p == ';') {
6198 gbeauche 1.21 write_log("<JIT compiler> : blacklist opcodes : %04x-%04x\n", opcode1, opcode2);
6199     for (int opcode = opcode1; opcode <= opcode2; opcode++)
6200     reset_compop(cft_map(opcode));
6201    
6202 gbeauche 1.40 if (*p == ',' || *p++ == ';')
6203 gbeauche 1.21 continue;
6204    
6205     return true;
6206     }
6207    
6208     return false;
6209     }
6210     }
6211     return true;
6212 gbeauche 1.17 }
6213    
6214 gbeauche 1.1 void build_comp(void)
6215     {
6216     int i;
6217     int jumpcount=0;
6218     unsigned long opcode;
6219     struct comptbl* tbl=op_smalltbl_0_comp_ff;
6220     struct comptbl* nftbl=op_smalltbl_0_comp_nf;
6221     int count;
6222     int cpu_level = 0; // 68000 (default)
6223     if (CPUType == 4)
6224     cpu_level = 4; // 68040 with FPU
6225     else {
6226     if (FPUType)
6227     cpu_level = 3; // 68020 with FPU
6228     else if (CPUType >= 2)
6229     cpu_level = 2; // 68020
6230     else if (CPUType == 1)
6231     cpu_level = 1;
6232     }
6233     struct cputbl *nfctbl = (
6234     cpu_level == 4 ? op_smalltbl_0_nf
6235     : cpu_level == 3 ? op_smalltbl_1_nf
6236     : cpu_level == 2 ? op_smalltbl_2_nf
6237     : cpu_level == 1 ? op_smalltbl_3_nf
6238     : op_smalltbl_4_nf);
6239    
6240     write_log ("<JIT compiler> : building compiler function tables\n");
6241    
6242     for (opcode = 0; opcode < 65536; opcode++) {
6243 gbeauche 1.21 reset_compop(opcode);
6244 gbeauche 1.1 nfcpufunctbl[opcode] = op_illg_1;
6245     prop[opcode].use_flags = 0x1f;
6246     prop[opcode].set_flags = 0x1f;
6247     prop[opcode].cflow = fl_trap; // ILLEGAL instructions do trap
6248     }
6249    
6250     for (i = 0; tbl[i].opcode < 65536; i++) {
6251     int cflow = table68k[tbl[i].opcode].cflow;
6252 gbeauche 1.33 if (follow_const_jumps && (tbl[i].specific & 16))
6253 gbeauche 1.10 cflow = fl_const_jump;
6254 gbeauche 1.8 else
6255 gbeauche 1.10 cflow &= ~fl_const_jump;
6256     prop[cft_map(tbl[i].opcode)].cflow = cflow;
6257 gbeauche 1.1
6258     int uses_fpu = tbl[i].specific & 32;
6259 gbeauche 1.21 if (uses_fpu && avoid_fpu)
6260 gbeauche 1.1 compfunctbl[cft_map(tbl[i].opcode)] = NULL;
6261     else
6262     compfunctbl[cft_map(tbl[i].opcode)] = tbl[i].handler;
6263     }
6264 gbeauche 1.8
6265 gbeauche 1.1 for (i = 0; nftbl[i].opcode < 65536; i++) {
6266     int uses_fpu = tbl[i].specific & 32;
6267 gbeauche 1.21 if (uses_fpu && avoid_fpu)
6268 gbeauche 1.1 nfcompfunctbl[cft_map(nftbl[i].opcode)] = NULL;
6269     else
6270     nfcompfunctbl[cft_map(nftbl[i].opcode)] = nftbl[i].handler;
6271    
6272     nfcpufunctbl[cft_map(nftbl[i].opcode)] = nfctbl[i].handler;
6273     }
6274    
6275     for (i = 0; nfctbl[i].handler; i++) {
6276     nfcpufunctbl[cft_map(nfctbl[i].opcode)] = nfctbl[i].handler;
6277     }
6278    
6279     for (opcode = 0; opcode < 65536; opcode++) {
6280     compop_func *f;
6281     compop_func *nff;
6282     cpuop_func *nfcf;
6283     int isaddx,cflow;
6284    
6285     if (table68k[opcode].mnemo == i_ILLG || table68k[opcode].clev > cpu_level)
6286     continue;
6287    
6288     if (table68k[opcode].handler != -1) {
6289     f = compfunctbl[cft_map(table68k[opcode].handler)];
6290     nff = nfcompfunctbl[cft_map(table68k[opcode].handler)];
6291     nfcf = nfcpufunctbl[cft_map(table68k[opcode].handler)];
6292     cflow = prop[cft_map(table68k[opcode].handler)].cflow;
6293     isaddx = prop[cft_map(table68k[opcode].handler)].is_addx;
6294     prop[cft_map(opcode)].cflow = cflow;
6295     prop[cft_map(opcode)].is_addx = isaddx;
6296     compfunctbl[cft_map(opcode)] = f;
6297     nfcompfunctbl[cft_map(opcode)] = nff;
6298     Dif (nfcf == op_illg_1)
6299     abort();
6300     nfcpufunctbl[cft_map(opcode)] = nfcf;
6301     }
6302     prop[cft_map(opcode)].set_flags = table68k[opcode].flagdead;
6303     prop[cft_map(opcode)].use_flags = table68k[opcode].flaglive;
6304 gbeauche 1.33 /* Unconditional jumps don't evaluate condition codes, so they
6305     * don't actually use any flags themselves */
6306     if (prop[cft_map(opcode)].cflow & fl_const_jump)
6307     prop[cft_map(opcode)].use_flags = 0;
6308 gbeauche 1.1 }
6309     for (i = 0; nfctbl[i].handler != NULL; i++) {
6310     if (nfctbl[i].specific)
6311     nfcpufunctbl[cft_map(tbl[i].opcode)] = nfctbl[i].handler;
6312     }
6313 gbeauche 1.21
6314     /* Merge in blacklist */
6315     if (!merge_blacklist())
6316     write_log("<JIT compiler> : blacklist merge failure!\n");
6317 gbeauche 1.1
6318     count=0;
6319     for (opcode = 0; opcode < 65536; opcode++) {
6320     if (compfunctbl[cft_map(opcode)])
6321     count++;
6322     }
6323     write_log("<JIT compiler> : supposedly %d compileable opcodes!\n",count);
6324    
6325     /* Initialise state */
6326     create_popalls();
6327     alloc_cache();
6328     reset_lists();
6329    
6330     for (i=0;i<TAGSIZE;i+=2) {
6331     cache_tags[i].handler=(cpuop_func *)popall_execute_normal;
6332     cache_tags[i+1].bi=NULL;
6333     }
6334    
6335     #if 0
6336     for (i=0;i<N_REGS;i++) {
6337     empty_ss.nat[i].holds=-1;
6338     empty_ss.nat[i].validsize=0;
6339     empty_ss.nat[i].dirtysize=0;
6340     }
6341     #endif
6342     for (i=0;i<VREGS;i++) {
6343     empty_ss.virt[i]=L_NEEDED;
6344     }
6345     for (i=0;i<N_REGS;i++) {
6346     empty_ss.nat[i]=L_UNKNOWN;
6347     }
6348     default_ss=empty_ss;
6349     }
6350    
6351    
6352     static void flush_icache_none(int n)
6353     {
6354     /* Nothing to do. */
6355     }
6356    
6357     static void flush_icache_hard(int n)
6358     {
6359     uae_u32 i;
6360     blockinfo* bi, *dbi;
6361    
6362     hard_flush_count++;
6363     #if 0
6364     write_log("Flush Icache_hard(%d/%x/%p), %u KB\n",
6365     n,regs.pc,regs.pc_p,current_cache_size/1024);
6366     current_cache_size = 0;
6367     #endif
6368     bi=active;
6369     while(bi) {
6370     cache_tags[cacheline(bi->pc_p)].handler=(cpuop_func *)popall_execute_normal;
6371     cache_tags[cacheline(bi->pc_p)+1].bi=NULL;
6372     dbi=bi; bi=bi->next;
6373     free_blockinfo(dbi);
6374     }
6375     bi=dormant;
6376     while(bi) {
6377     cache_tags[cacheline(bi->pc_p)].handler=(cpuop_func *)popall_execute_normal;
6378     cache_tags[cacheline(bi->pc_p)+1].bi=NULL;
6379     dbi=bi; bi=bi->next;
6380     free_blockinfo(dbi);
6381     }
6382    
6383     reset_lists();
6384     if (!compiled_code)
6385     return;
6386     current_compile_p=compiled_code;
6387     SPCFLAGS_SET( SPCFLAG_JIT_EXEC_RETURN ); /* To get out of compiled code */
6388     }
6389    
6390    
6391     /* "Soft flushing" --- instead of actually throwing everything away,
6392     we simply mark everything as "needs to be checked".
6393     */
6394    
6395     static inline void flush_icache_lazy(int n)
6396     {
6397     uae_u32 i;
6398     blockinfo* bi;
6399     blockinfo* bi2;
6400    
6401     soft_flush_count++;
6402     if (!active)
6403     return;
6404    
6405     bi=active;
6406     while (bi) {
6407     uae_u32 cl=cacheline(bi->pc_p);
6408     if (bi->status==BI_INVALID ||
6409     bi->status==BI_NEED_RECOMP) {
6410     if (bi==cache_tags[cl+1].bi)
6411     cache_tags[cl].handler=(cpuop_func *)popall_execute_normal;
6412     bi->handler_to_use=(cpuop_func *)popall_execute_normal;
6413     set_dhtu(bi,bi->direct_pen);
6414     bi->status=BI_INVALID;
6415     }
6416     else {
6417     if (bi==cache_tags[cl+1].bi)
6418     cache_tags[cl].handler=(cpuop_func *)popall_check_checksum;
6419     bi->handler_to_use=(cpuop_func *)popall_check_checksum;
6420     set_dhtu(bi,bi->direct_pcc);
6421     bi->status=BI_NEED_CHECK;
6422     }
6423     bi2=bi;
6424     bi=bi->next;
6425     }
6426     /* bi2 is now the last entry in the active list */
6427     bi2->next=dormant;
6428     if (dormant)
6429     dormant->prev_p=&(bi2->next);
6430    
6431     dormant=active;
6432     active->prev_p=&dormant;
6433     active=NULL;
6434 gbeauche 1.22 }
6435    
6436 gbeauche 1.38 void flush_icache_range(uae_u8 *start_p, uae_u32 length)
6437 gbeauche 1.22 {
6438     if (!active)
6439     return;
6440    
6441     #if LAZY_FLUSH_ICACHE_RANGE
6442     blockinfo *bi = active;
6443     while (bi) {
6444     #if USE_CHECKSUM_INFO
6445 gbeauche 1.38 bool candidate = false;
6446     for (checksum_info *csi = bi->csi; csi; csi = csi->next) {
6447     if (((start_p - csi->start_p) < csi->length) ||
6448     ((csi->start_p - start_p) < length)) {
6449     candidate = true;
6450     break;
6451     }
6452     }
6453 gbeauche 1.22 #else
6454     // Assume system is consistent and would invalidate the right range
6455 gbeauche 1.38 const bool candidate = (bi->pc_p - start_p) < length;
6456 gbeauche 1.22 #endif
6457 gbeauche 1.38 blockinfo *dbi = bi;
6458     bi = bi->next;
6459     if (candidate) {
6460     uae_u32 cl = cacheline(dbi->pc_p);
6461     if (dbi->status == BI_INVALID || dbi->status == BI_NEED_RECOMP) {
6462     if (dbi == cache_tags[cl+1].bi)
6463 gbeauche 1.22 cache_tags[cl].handler = (cpuop_func *)popall_execute_normal;
6464 gbeauche 1.38 dbi->handler_to_use = (cpuop_func *)popall_execute_normal;
6465     set_dhtu(dbi, dbi->direct_pen);
6466     dbi->status = BI_INVALID;
6467     }
6468     else {
6469     if (dbi == cache_tags[cl+1].bi)
6470     cache_tags[cl].handler = (cpuop_func *)popall_check_checksum;
6471     dbi->handler_to_use = (cpuop_func *)popall_check_checksum;
6472     set_dhtu(dbi, dbi->direct_pcc);
6473     dbi->status = BI_NEED_CHECK;
6474     }
6475     remove_from_list(dbi);
6476     add_to_dormant(dbi);
6477 gbeauche 1.22 }
6478     }
6479     return;
6480     #endif
6481     flush_icache(-1);
6482 gbeauche 1.1 }
6483    
6484     static void catastrophe(void)
6485     {
6486     abort();
6487     }
6488    
6489     int failure;
6490    
6491     #define TARGET_M68K 0
6492     #define TARGET_POWERPC 1
6493     #define TARGET_X86 2
6494 gbeauche 1.24 #define TARGET_X86_64 3
6495 gbeauche 1.1 #if defined(i386) || defined(__i386__)
6496     #define TARGET_NATIVE TARGET_X86
6497     #endif
6498     #if defined(powerpc) || defined(__powerpc__)
6499     #define TARGET_NATIVE TARGET_POWERPC
6500     #endif
6501 gbeauche 1.24 #if defined(x86_64) || defined(__x86_64__)
6502     #define TARGET_NATIVE TARGET_X86_64
6503     #endif
6504 gbeauche 1.1
6505     #ifdef ENABLE_MON
6506 gbeauche 1.24 static uae_u32 mon_read_byte_jit(uintptr addr)
6507 gbeauche 1.1 {
6508     uae_u8 *m = (uae_u8 *)addr;
6509 gbeauche 1.24 return (uintptr)(*m);
6510 gbeauche 1.1 }
6511    
6512 gbeauche 1.24 static void mon_write_byte_jit(uintptr addr, uae_u32 b)
6513 gbeauche 1.1 {
6514     uae_u8 *m = (uae_u8 *)addr;
6515     *m = b;
6516     }
6517     #endif
6518    
6519     void disasm_block(int target, uint8 * start, size_t length)
6520     {
6521     if (!JITDebug)
6522     return;
6523    
6524     #if defined(JIT_DEBUG) && defined(ENABLE_MON)
6525     char disasm_str[200];
6526     sprintf(disasm_str, "%s $%x $%x",
6527     target == TARGET_M68K ? "d68" :
6528     target == TARGET_X86 ? "d86" :
6529 gbeauche 1.24 target == TARGET_X86_64 ? "d8664" :
6530 gbeauche 1.1 target == TARGET_POWERPC ? "d" : "x",
6531     start, start + length - 1);
6532    
6533 gbeauche 1.24 uae_u32 (*old_mon_read_byte)(uintptr) = mon_read_byte;
6534     void (*old_mon_write_byte)(uintptr, uae_u32) = mon_write_byte;
6535 gbeauche 1.1
6536     mon_read_byte = mon_read_byte_jit;
6537     mon_write_byte = mon_write_byte_jit;
6538    
6539     char *arg[5] = {"mon", "-m", "-r", disasm_str, NULL};
6540     mon(4, arg);
6541    
6542     mon_read_byte = old_mon_read_byte;
6543     mon_write_byte = old_mon_write_byte;
6544     #endif
6545     }
6546    
6547 gbeauche 1.24 static void disasm_native_block(uint8 *start, size_t length)
6548 gbeauche 1.1 {
6549     disasm_block(TARGET_NATIVE, start, length);
6550     }
6551    
6552 gbeauche 1.24 static void disasm_m68k_block(uint8 *start, size_t length)
6553 gbeauche 1.1 {
6554     disasm_block(TARGET_M68K, start, length);
6555     }
6556    
6557     #ifdef HAVE_GET_WORD_UNSWAPPED
6558     # define DO_GET_OPCODE(a) (do_get_mem_word_unswapped((uae_u16 *)(a)))
6559     #else
6560     # define DO_GET_OPCODE(a) (do_get_mem_word((uae_u16 *)(a)))
6561     #endif
6562    
6563     #if JIT_DEBUG
6564     static uae_u8 *last_regs_pc_p = 0;
6565     static uae_u8 *last_compiled_block_addr = 0;
6566    
6567     void compiler_dumpstate(void)
6568     {
6569     if (!JITDebug)
6570     return;
6571    
6572     write_log("### Host addresses\n");
6573     write_log("MEM_BASE : %x\n", MEMBaseDiff);
6574     write_log("PC_P : %p\n", &regs.pc_p);
6575     write_log("SPCFLAGS : %p\n", &regs.spcflags);
6576     write_log("D0-D7 : %p-%p\n", &regs.regs[0], &regs.regs[7]);
6577     write_log("A0-A7 : %p-%p\n", &regs.regs[8], &regs.regs[15]);
6578     write_log("\n");
6579    
6580     write_log("### M68k processor state\n");
6581     m68k_dumpstate(0);
6582     write_log("\n");
6583    
6584     write_log("### Block in Mac address space\n");
6585     write_log("M68K block : %p\n",
6586 gbeauche 1.24 (void *)(uintptr)get_virtual_address(last_regs_pc_p));
6587 gbeauche 1.1 write_log("Native block : %p (%d bytes)\n",
6588 gbeauche 1.24 (void *)(uintptr)get_virtual_address(last_compiled_block_addr),
6589 gbeauche 1.1 get_blockinfo_addr(last_regs_pc_p)->direct_handler_size);
6590     write_log("\n");
6591     }
6592     #endif
6593    
6594     static void compile_block(cpu_history* pc_hist, int blocklen)
6595     {
6596     if (letit && compiled_code) {
6597     #if PROFILE_COMPILE_TIME
6598     compile_count++;
6599     clock_t start_time = clock();
6600     #endif
6601     #if JIT_DEBUG
6602     bool disasm_block = false;
6603     #endif
6604    
6605     /* OK, here we need to 'compile' a block */
6606     int i;
6607     int r;
6608     int was_comp=0;
6609     uae_u8 liveflags[MAXRUN+1];
6610 gbeauche 1.8 #if USE_CHECKSUM_INFO
6611     bool trace_in_rom = isinrom((uintptr)pc_hist[0].location);
6612 gbeauche 1.24 uintptr max_pcp=(uintptr)pc_hist[blocklen - 1].location;
6613     uintptr min_pcp=max_pcp;
6614 gbeauche 1.8 #else
6615 gbeauche 1.24 uintptr max_pcp=(uintptr)pc_hist[0].location;
6616     uintptr min_pcp=max_pcp;
6617 gbeauche 1.8 #endif
6618 gbeauche 1.1 uae_u32 cl=cacheline(pc_hist[0].location);
6619     void* specflags=(void*)&regs.spcflags;
6620     blockinfo* bi=NULL;
6621     blockinfo* bi2;
6622     int extra_len=0;
6623    
6624     redo_current_block=0;
6625     if (current_compile_p>=max_compile_start)
6626     flush_icache_hard(7);
6627    
6628     alloc_blockinfos();
6629    
6630     bi=get_blockinfo_addr_new(pc_hist[0].location,0);
6631     bi2=get_blockinfo(cl);
6632    
6633     optlev=bi->optlevel;
6634     if (bi->status!=BI_INVALID) {
6635     Dif (bi!=bi2) {
6636     /* I don't think it can happen anymore. Shouldn't, in
6637     any case. So let's make sure... */
6638     write_log("WOOOWOO count=%d, ol=%d %p %p\n",
6639     bi->count,bi->optlevel,bi->handler_to_use,
6640     cache_tags[cl].handler);
6641     abort();
6642     }
6643    
6644     Dif (bi->count!=-1 && bi->status!=BI_NEED_RECOMP) {
6645     write_log("bi->count=%d, bi->status=%d\n",bi->count,bi->status);
6646     /* What the heck? We are not supposed to be here! */
6647     abort();
6648     }
6649     }
6650     if (bi->count==-1) {
6651     optlev++;
6652     while (!optcount[optlev])
6653     optlev++;
6654     bi->count=optcount[optlev]-1;
6655     }
6656 gbeauche 1.24 current_block_pc_p=(uintptr)pc_hist[0].location;
6657 gbeauche 1.1
6658     remove_deps(bi); /* We are about to create new code */
6659     bi->optlevel=optlev;
6660     bi->pc_p=(uae_u8*)pc_hist[0].location;
6661 gbeauche 1.8 #if USE_CHECKSUM_INFO
6662     free_checksum_info_chain(bi->csi);
6663     bi->csi = NULL;
6664     #endif
6665 gbeauche 1.1
6666     liveflags[blocklen]=0x1f; /* All flags needed afterwards */
6667     i=blocklen;
6668     while (i--) {
6669     uae_u16* currpcp=pc_hist[i].location;
6670     uae_u32 op=DO_GET_OPCODE(currpcp);
6671    
6672 gbeauche 1.8 #if USE_CHECKSUM_INFO
6673     trace_in_rom = trace_in_rom && isinrom((uintptr)currpcp);
6674 gbeauche 1.33 if (follow_const_jumps && is_const_jump(op)) {
6675 gbeauche 1.8 checksum_info *csi = alloc_checksum_info();
6676     csi->start_p = (uae_u8 *)min_pcp;
6677     csi->length = max_pcp - min_pcp + LONGEST_68K_INST;
6678     csi->next = bi->csi;
6679     bi->csi = csi;
6680 gbeauche 1.24 max_pcp = (uintptr)currpcp;
6681 gbeauche 1.8 }
6682 gbeauche 1.24 min_pcp = (uintptr)currpcp;
6683 gbeauche 1.8 #else
6684 gbeauche 1.24 if ((uintptr)currpcp<min_pcp)
6685     min_pcp=(uintptr)currpcp;
6686     if ((uintptr)currpcp>max_pcp)
6687     max_pcp=(uintptr)currpcp;
6688 gbeauche 1.8 #endif
6689 gbeauche 1.1
6690     liveflags[i]=((liveflags[i+1]&
6691     (~prop[op].set_flags))|
6692     prop[op].use_flags);
6693     if (prop[op].is_addx && (liveflags[i+1]&FLAG_Z)==0)
6694     liveflags[i]&= ~FLAG_Z;
6695     }
6696    
6697 gbeauche 1.8 #if USE_CHECKSUM_INFO
6698     checksum_info *csi = alloc_checksum_info();
6699     csi->start_p = (uae_u8 *)min_pcp;
6700     csi->length = max_pcp - min_pcp + LONGEST_68K_INST;
6701     csi->next = bi->csi;
6702     bi->csi = csi;
6703     #endif
6704    
6705 gbeauche 1.1 bi->needed_flags=liveflags[0];
6706    
6707 gbeauche 1.5 align_target(align_loops);
6708 gbeauche 1.1 was_comp=0;
6709    
6710     bi->direct_handler=(cpuop_func *)get_target();
6711     set_dhtu(bi,bi->direct_handler);
6712     bi->status=BI_COMPILING;
6713 gbeauche 1.24 current_block_start_target=(uintptr)get_target();
6714 gbeauche 1.1
6715     log_startblock();
6716    
6717     if (bi->count>=0) { /* Need to generate countdown code */
6718 gbeauche 1.24 raw_mov_l_mi((uintptr)&regs.pc_p,(uintptr)pc_hist[0].location);
6719     raw_sub_l_mi((uintptr)&(bi->count),1);
6720     raw_jl((uintptr)popall_recompile_block);
6721 gbeauche 1.1 }
6722     if (optlev==0) { /* No need to actually translate */
6723     /* Execute normally without keeping stats */
6724 gbeauche 1.24 raw_mov_l_mi((uintptr)&regs.pc_p,(uintptr)pc_hist[0].location);
6725     raw_jmp((uintptr)popall_exec_nostats);
6726 gbeauche 1.1 }
6727     else {
6728     reg_alloc_run=0;
6729     next_pc_p=0;
6730     taken_pc_p=0;
6731     branch_cc=0;
6732    
6733     comp_pc_p=(uae_u8*)pc_hist[0].location;
6734     init_comp();
6735     was_comp=1;
6736    
6737 gbeauche 1.34 #ifdef USE_CPU_EMUL_SERVICES
6738     raw_sub_l_mi((uintptr)&emulated_ticks,blocklen);
6739     raw_jcc_b_oponly(NATIVE_CC_GT);
6740     uae_s8 *branchadd=(uae_s8*)get_target();
6741     emit_byte(0);
6742     raw_call((uintptr)cpu_do_check_ticks);
6743     *branchadd=(uintptr)get_target()-((uintptr)branchadd+1);
6744     #endif
6745    
6746 gbeauche 1.1 #if JIT_DEBUG
6747     if (JITDebug) {
6748 gbeauche 1.24 raw_mov_l_mi((uintptr)&last_regs_pc_p,(uintptr)pc_hist[0].location);
6749     raw_mov_l_mi((uintptr)&last_compiled_block_addr,current_block_start_target);
6750 gbeauche 1.1 }
6751     #endif
6752    
6753     for (i=0;i<blocklen &&
6754     get_target_noopt()<max_compile_start;i++) {
6755     cpuop_func **cputbl;
6756     compop_func **comptbl;
6757     uae_u32 opcode=DO_GET_OPCODE(pc_hist[i].location);
6758     needed_flags=(liveflags[i+1] & prop[opcode].set_flags);
6759     if (!needed_flags) {
6760     cputbl=nfcpufunctbl;
6761     comptbl=nfcompfunctbl;
6762     }
6763     else {
6764     cputbl=cpufunctbl;
6765     comptbl=compfunctbl;
6766     }
6767 gbeauche 1.32
6768     #if FLIGHT_RECORDER
6769     {
6770     mov_l_ri(S1, get_virtual_address((uae_u8 *)(pc_hist[i].location)) | 1);
6771     clobber_flags();
6772     remove_all_offsets();
6773     int arg = readreg_specific(S1,4,REG_PAR1);
6774     prepare_for_call_1();
6775     unlock2(arg);
6776     prepare_for_call_2();
6777     raw_call((uintptr)m68k_record_step);
6778     }
6779     #endif
6780 gbeauche 1.1
6781     failure = 1; // gb-- defaults to failure state
6782     if (comptbl[opcode] && optlev>1) {
6783     failure=0;
6784     if (!was_comp) {
6785     comp_pc_p=(uae_u8*)pc_hist[i].location;
6786     init_comp();
6787     }
6788 gbeauche 1.18 was_comp=1;
6789 gbeauche 1.1
6790     comptbl[opcode](opcode);
6791     freescratch();
6792     if (!(liveflags[i+1] & FLAG_CZNV)) {
6793     /* We can forget about flags */
6794     dont_care_flags();
6795     }
6796     #if INDIVIDUAL_INST
6797     flush(1);
6798     nop();
6799     flush(1);
6800     was_comp=0;
6801     #endif
6802     }
6803    
6804     if (failure) {
6805     if (was_comp) {
6806     flush(1);
6807     was_comp=0;
6808     }
6809     raw_mov_l_ri(REG_PAR1,(uae_u32)opcode);
6810     #if USE_NORMAL_CALLING_CONVENTION
6811     raw_push_l_r(REG_PAR1);
6812     #endif
6813 gbeauche 1.24 raw_mov_l_mi((uintptr)&regs.pc_p,
6814     (uintptr)pc_hist[i].location);
6815     raw_call((uintptr)cputbl[opcode]);
6816 gbeauche 1.9 #if PROFILE_UNTRANSLATED_INSNS
6817     // raw_cputbl_count[] is indexed with plain opcode (in m68k order)
6818 gbeauche 1.24 raw_add_l_mi((uintptr)&raw_cputbl_count[cft_map(opcode)],1);
6819 gbeauche 1.9 #endif
6820 gbeauche 1.1 #if USE_NORMAL_CALLING_CONVENTION
6821     raw_inc_sp(4);
6822     #endif
6823    
6824     if (i < blocklen - 1) {
6825     uae_s8* branchadd;
6826    
6827 gbeauche 1.24 raw_mov_l_rm(0,(uintptr)specflags);
6828 gbeauche 1.1 raw_test_l_rr(0,0);
6829     raw_jz_b_oponly();
6830     branchadd=(uae_s8 *)get_target();
6831     emit_byte(0);
6832 gbeauche 1.24 raw_jmp((uintptr)popall_do_nothing);
6833     *branchadd=(uintptr)get_target()-(uintptr)branchadd-1;
6834 gbeauche 1.1 }
6835     }
6836     }
6837     #if 1 /* This isn't completely kosher yet; It really needs to be
6838     be integrated into a general inter-block-dependency scheme */
6839     if (next_pc_p && taken_pc_p &&
6840     was_comp && taken_pc_p==current_block_pc_p) {
6841     blockinfo* bi1=get_blockinfo_addr_new((void*)next_pc_p,0);
6842     blockinfo* bi2=get_blockinfo_addr_new((void*)taken_pc_p,0);
6843     uae_u8 x=bi1->needed_flags;
6844    
6845     if (x==0xff || 1) { /* To be on the safe side */
6846     uae_u16* next=(uae_u16*)next_pc_p;
6847     uae_u32 op=DO_GET_OPCODE(next);
6848    
6849     x=0x1f;
6850     x&=(~prop[op].set_flags);
6851     x|=prop[op].use_flags;
6852     }
6853    
6854     x|=bi2->needed_flags;
6855     if (!(x & FLAG_CZNV)) {
6856     /* We can forget about flags */
6857     dont_care_flags();
6858     extra_len+=2; /* The next instruction now is part of this
6859     block */
6860     }
6861    
6862     }
6863     #endif
6864     log_flush();
6865    
6866     if (next_pc_p) { /* A branch was registered */
6867 gbeauche 1.24 uintptr t1=next_pc_p;
6868     uintptr t2=taken_pc_p;
6869 gbeauche 1.1 int cc=branch_cc;
6870    
6871     uae_u32* branchadd;
6872     uae_u32* tba;
6873     bigstate tmp;
6874     blockinfo* tbi;
6875    
6876     if (taken_pc_p<next_pc_p) {
6877     /* backward branch. Optimize for the "taken" case ---
6878     which means the raw_jcc should fall through when
6879     the 68k branch is taken. */
6880     t1=taken_pc_p;
6881     t2=next_pc_p;
6882     cc=branch_cc^1;
6883     }
6884    
6885     tmp=live; /* ouch! This is big... */
6886     raw_jcc_l_oponly(cc);
6887     branchadd=(uae_u32*)get_target();
6888     emit_long(0);
6889    
6890     /* predicted outcome */
6891     tbi=get_blockinfo_addr_new((void*)t1,1);
6892     match_states(tbi);
6893 gbeauche 1.24 raw_cmp_l_mi((uintptr)specflags,0);
6894 gbeauche 1.1 raw_jcc_l_oponly(4);
6895     tba=(uae_u32*)get_target();
6896 gbeauche 1.24 emit_long(get_handler(t1)-((uintptr)tba+4));
6897     raw_mov_l_mi((uintptr)&regs.pc_p,t1);
6898 gbeauche 1.28 flush_reg_count();
6899 gbeauche 1.24 raw_jmp((uintptr)popall_do_nothing);
6900 gbeauche 1.1 create_jmpdep(bi,0,tba,t1);
6901    
6902 gbeauche 1.5 align_target(align_jumps);
6903 gbeauche 1.1 /* not-predicted outcome */
6904 gbeauche 1.24 *branchadd=(uintptr)get_target()-((uintptr)branchadd+4);
6905 gbeauche 1.1 live=tmp; /* Ouch again */
6906     tbi=get_blockinfo_addr_new((void*)t2,1);
6907     match_states(tbi);
6908    
6909     //flush(1); /* Can only get here if was_comp==1 */
6910 gbeauche 1.24 raw_cmp_l_mi((uintptr)specflags,0);
6911 gbeauche 1.1 raw_jcc_l_oponly(4);
6912     tba=(uae_u32*)get_target();
6913 gbeauche 1.24 emit_long(get_handler(t2)-((uintptr)tba+4));
6914     raw_mov_l_mi((uintptr)&regs.pc_p,t2);
6915 gbeauche 1.28 flush_reg_count();
6916 gbeauche 1.24 raw_jmp((uintptr)popall_do_nothing);
6917 gbeauche 1.1 create_jmpdep(bi,1,tba,t2);
6918     }
6919     else
6920     {
6921     if (was_comp) {
6922     flush(1);
6923     }
6924 gbeauche 1.28 flush_reg_count();
6925 gbeauche 1.1
6926     /* Let's find out where next_handler is... */
6927     if (was_comp && isinreg(PC_P)) {
6928     r=live.state[PC_P].realreg;
6929     raw_and_l_ri(r,TAGMASK);
6930     int r2 = (r==0) ? 1 : 0;
6931 gbeauche 1.24 raw_mov_l_ri(r2,(uintptr)popall_do_nothing);
6932     raw_cmp_l_mi((uintptr)specflags,0);
6933 gbeauche 1.27 raw_cmov_l_rm_indexed(r2,(uintptr)cache_tags,r,SIZEOF_VOID_P,NATIVE_CC_EQ);
6934 gbeauche 1.1 raw_jmp_r(r2);
6935     }
6936     else if (was_comp && isconst(PC_P)) {
6937     uae_u32 v=live.state[PC_P].val;
6938     uae_u32* tba;
6939     blockinfo* tbi;
6940    
6941 gbeauche 1.24 tbi=get_blockinfo_addr_new((void*)(uintptr)v,1);
6942 gbeauche 1.1 match_states(tbi);
6943    
6944 gbeauche 1.24 raw_cmp_l_mi((uintptr)specflags,0);
6945 gbeauche 1.1 raw_jcc_l_oponly(4);
6946     tba=(uae_u32*)get_target();
6947 gbeauche 1.24 emit_long(get_handler(v)-((uintptr)tba+4));
6948     raw_mov_l_mi((uintptr)&regs.pc_p,v);
6949     raw_jmp((uintptr)popall_do_nothing);
6950 gbeauche 1.1 create_jmpdep(bi,0,tba,v);
6951     }
6952     else {
6953     r=REG_PC_TMP;
6954 gbeauche 1.24 raw_mov_l_rm(r,(uintptr)&regs.pc_p);
6955 gbeauche 1.1 raw_and_l_ri(r,TAGMASK);
6956     int r2 = (r==0) ? 1 : 0;
6957 gbeauche 1.24 raw_mov_l_ri(r2,(uintptr)popall_do_nothing);
6958     raw_cmp_l_mi((uintptr)specflags,0);
6959 gbeauche 1.27 raw_cmov_l_rm_indexed(r2,(uintptr)cache_tags,r,SIZEOF_VOID_P,NATIVE_CC_EQ);
6960 gbeauche 1.1 raw_jmp_r(r2);
6961     }
6962     }
6963     }
6964    
6965     #if USE_MATCH
6966     if (callers_need_recompile(&live,&(bi->env))) {
6967     mark_callers_recompile(bi);
6968     }
6969    
6970     big_to_small_state(&live,&(bi->env));
6971     #endif
6972    
6973 gbeauche 1.8 #if USE_CHECKSUM_INFO
6974     remove_from_list(bi);
6975     if (trace_in_rom) {
6976     // No need to checksum that block trace on cache invalidation
6977     free_checksum_info_chain(bi->csi);
6978     bi->csi = NULL;
6979     add_to_dormant(bi);
6980     }
6981     else {
6982     calc_checksum(bi,&(bi->c1),&(bi->c2));
6983     add_to_active(bi);
6984     }
6985     #else
6986 gbeauche 1.1 if (next_pc_p+extra_len>=max_pcp &&
6987     next_pc_p+extra_len<max_pcp+LONGEST_68K_INST)
6988     max_pcp=next_pc_p+extra_len; /* extra_len covers flags magic */
6989     else
6990     max_pcp+=LONGEST_68K_INST;
6991 gbeauche 1.7
6992 gbeauche 1.1 bi->len=max_pcp-min_pcp;
6993     bi->min_pcp=min_pcp;
6994 gbeauche 1.7
6995 gbeauche 1.1 remove_from_list(bi);
6996     if (isinrom(min_pcp) && isinrom(max_pcp)) {
6997     add_to_dormant(bi); /* No need to checksum it on cache flush.
6998     Please don't start changing ROMs in
6999     flight! */
7000     }
7001     else {
7002     calc_checksum(bi,&(bi->c1),&(bi->c2));
7003     add_to_active(bi);
7004     }
7005 gbeauche 1.8 #endif
7006 gbeauche 1.1
7007     current_cache_size += get_target() - (uae_u8 *)current_compile_p;
7008    
7009     #if JIT_DEBUG
7010     if (JITDebug)
7011     bi->direct_handler_size = get_target() - (uae_u8 *)current_block_start_target;
7012    
7013     if (JITDebug && disasm_block) {
7014     uaecptr block_addr = start_pc + ((char *)pc_hist[0].location - (char *)start_pc_p);
7015     D(bug("M68K block @ 0x%08x (%d insns)\n", block_addr, blocklen));
7016     uae_u32 block_size = ((uae_u8 *)pc_hist[blocklen - 1].location - (uae_u8 *)pc_hist[0].location) + 1;
7017     disasm_m68k_block((uae_u8 *)pc_hist[0].location, block_size);
7018     D(bug("Compiled block @ 0x%08x\n", pc_hist[0].location));
7019     disasm_native_block((uae_u8 *)current_block_start_target, bi->direct_handler_size);
7020     getchar();
7021     }
7022     #endif
7023    
7024     log_dump();
7025 gbeauche 1.5 align_target(align_jumps);
7026 gbeauche 1.1
7027     /* This is the non-direct handler */
7028     bi->handler=
7029     bi->handler_to_use=(cpuop_func *)get_target();
7030 gbeauche 1.24 raw_cmp_l_mi((uintptr)&regs.pc_p,(uintptr)pc_hist[0].location);
7031     raw_jnz((uintptr)popall_cache_miss);
7032 gbeauche 1.1 comp_pc_p=(uae_u8*)pc_hist[0].location;
7033    
7034     bi->status=BI_FINALIZING;
7035     init_comp();
7036     match_states(bi);
7037     flush(1);
7038    
7039 gbeauche 1.24 raw_jmp((uintptr)bi->direct_handler);
7040 gbeauche 1.1
7041     current_compile_p=get_target();
7042     raise_in_cl_list(bi);
7043    
7044     /* We will flush soon, anyway, so let's do it now */
7045     if (current_compile_p>=max_compile_start)
7046     flush_icache_hard(7);
7047    
7048     bi->status=BI_ACTIVE;
7049     if (redo_current_block)
7050     block_need_recompile(bi);
7051    
7052     #if PROFILE_COMPILE_TIME
7053     compile_time += (clock() - start_time);
7054     #endif
7055     }
7056 gbeauche 1.34
7057     /* Account for compilation time */
7058     cpu_do_check_ticks();
7059 gbeauche 1.1 }
7060    
7061     void do_nothing(void)
7062     {
7063     /* What did you expect this to do? */
7064     }
7065    
7066     void exec_nostats(void)
7067     {
7068     for (;;) {
7069     uae_u32 opcode = GET_OPCODE;
7070 gbeauche 1.32 #if FLIGHT_RECORDER
7071     m68k_record_step(m68k_getpc());
7072     #endif
7073 gbeauche 1.1 (*cpufunctbl[opcode])(opcode);
7074 gbeauche 1.34 cpu_check_ticks();
7075 gbeauche 1.1 if (end_block(opcode) || SPCFLAGS_TEST(SPCFLAG_ALL)) {
7076     return; /* We will deal with the spcflags in the caller */
7077     }
7078     }
7079     }
7080    
7081     void execute_normal(void)
7082     {
7083     if (!check_for_cache_miss()) {
7084     cpu_history pc_hist[MAXRUN];
7085     int blocklen = 0;
7086     #if REAL_ADDRESSING || DIRECT_ADDRESSING
7087     start_pc_p = regs.pc_p;
7088     start_pc = get_virtual_address(regs.pc_p);
7089     #else
7090     start_pc_p = regs.pc_oldp;
7091     start_pc = regs.pc;
7092     #endif
7093     for (;;) { /* Take note: This is the do-it-normal loop */
7094     pc_hist[blocklen++].location = (uae_u16 *)regs.pc_p;
7095     uae_u32 opcode = GET_OPCODE;
7096     #if FLIGHT_RECORDER
7097     m68k_record_step(m68k_getpc());
7098     #endif
7099     (*cpufunctbl[opcode])(opcode);
7100 gbeauche 1.34 cpu_check_ticks();
7101 gbeauche 1.1 if (end_block(opcode) || SPCFLAGS_TEST(SPCFLAG_ALL) || blocklen>=MAXRUN) {
7102     compile_block(pc_hist, blocklen);
7103     return; /* We will deal with the spcflags in the caller */
7104     }
7105     /* No need to check regs.spcflags, because if they were set,
7106     we'd have ended up inside that "if" */
7107     }
7108     }
7109     }
7110    
7111     typedef void (*compiled_handler)(void);
7112    
7113 gbeauche 1.36 static void m68k_do_compile_execute(void)
7114 gbeauche 1.1 {
7115     for (;;) {
7116     ((compiled_handler)(pushall_call_handler))();
7117     /* Whenever we return from that, we should check spcflags */
7118     if (SPCFLAGS_TEST(SPCFLAG_ALL)) {
7119     if (m68k_do_specialties ())
7120     return;
7121     }
7122     }
7123     }
7124 gbeauche 1.35
7125     void m68k_compile_execute (void)
7126     {
7127     for (;;) {
7128     if (quit_program)
7129     break;
7130     m68k_do_compile_execute();
7131     }
7132     }