ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/compiler/compemu_support.cpp
Revision: 1.27
Committed: 2004-11-08T23:24:54Z (19 years, 11 months ago) by gbeauche
Branch: MAIN
Changes since 1.26: +2 -2 lines
Log Message:
fix inline dispatcher to really generate a cmove on x86-64 (silly bug!)

File Contents

# User Rev Content
1 gbeauche 1.11 /*
2     * compiler/compemu_support.cpp - Core dynamic translation engine
3     *
4     * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
5     *
6 cebix 1.23 * Adaptation for Basilisk II and improvements, copyright 2000-2004
7 gbeauche 1.11 * Gwenole Beauchesne
8     *
9 cebix 1.23 * Basilisk II (C) 1997-2004 Christian Bauer
10 gbeauche 1.11 *
11     * This program is free software; you can redistribute it and/or modify
12     * it under the terms of the GNU General Public License as published by
13     * the Free Software Foundation; either version 2 of the License, or
14     * (at your option) any later version.
15     *
16     * This program is distributed in the hope that it will be useful,
17     * but WITHOUT ANY WARRANTY; without even the implied warranty of
18     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19     * GNU General Public License for more details.
20     *
21     * You should have received a copy of the GNU General Public License
22     * along with this program; if not, write to the Free Software
23     * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24     */
25    
26 gbeauche 1.1 #if !REAL_ADDRESSING && !DIRECT_ADDRESSING
27     #error "Only Real or Direct Addressing is supported with the JIT Compiler"
28     #endif
29    
30 gbeauche 1.4 #if X86_ASSEMBLY && !SAHF_SETO_PROFITABLE
31     #error "Only [LS]AHF scheme to [gs]et flags is supported with the JIT Compiler"
32     #endif
33    
34 gbeauche 1.24 /* NOTE: support for AMD64 assumes translation cache and other code
35     * buffers are allocated into a 32-bit address space because (i) B2/JIT
36     * code is not 64-bit clean and (ii) it's faster to resolve branches
37     * that way.
38     */
39     #if !defined(__i386__) && !defined(__x86_64__)
40     #error "Only IA-32 and X86-64 targets are supported with the JIT Compiler"
41     #endif
42    
43 gbeauche 1.1 #define USE_MATCH 0
44    
45     /* kludge for Brian, so he can compile under MSVC++ */
46     #define USE_NORMAL_CALLING_CONVENTION 0
47    
48     #ifndef WIN32
49 gbeauche 1.20 #include <unistd.h>
50 gbeauche 1.1 #include <sys/types.h>
51     #include <sys/mman.h>
52     #endif
53    
54     #include <stdlib.h>
55     #include <fcntl.h>
56     #include <errno.h>
57    
58     #include "sysdeps.h"
59     #include "cpu_emulation.h"
60     #include "main.h"
61     #include "prefs.h"
62     #include "user_strings.h"
63 gbeauche 1.2 #include "vm_alloc.h"
64 gbeauche 1.1
65     #include "m68k.h"
66     #include "memory.h"
67     #include "readcpu.h"
68     #include "newcpu.h"
69     #include "comptbl.h"
70     #include "compiler/compemu.h"
71     #include "fpu/fpu.h"
72     #include "fpu/flags.h"
73    
74     #define DEBUG 1
75     #include "debug.h"
76    
77     #ifdef ENABLE_MON
78     #include "mon.h"
79     #endif
80    
81     #ifndef WIN32
82 gbeauche 1.9 #define PROFILE_COMPILE_TIME 1
83     #define PROFILE_UNTRANSLATED_INSNS 1
84 gbeauche 1.1 #endif
85    
86     #ifdef WIN32
87     #undef write_log
88     #define write_log dummy_write_log
89     static void dummy_write_log(const char *, ...) { }
90     #endif
91    
92     #if JIT_DEBUG
93     #undef abort
94     #define abort() do { \
95     fprintf(stderr, "Abort in file %s at line %d\n", __FILE__, __LINE__); \
96     exit(EXIT_FAILURE); \
97     } while (0)
98     #endif
99    
100     #if PROFILE_COMPILE_TIME
101     #include <time.h>
102     static uae_u32 compile_count = 0;
103     static clock_t compile_time = 0;
104     static clock_t emul_start_time = 0;
105     static clock_t emul_end_time = 0;
106     #endif
107    
108 gbeauche 1.9 #if PROFILE_UNTRANSLATED_INSNS
109     const int untranslated_top_ten = 20;
110     static uae_u32 raw_cputbl_count[65536] = { 0, };
111     static uae_u16 opcode_nums[65536];
112    
113     static int untranslated_compfn(const void *e1, const void *e2)
114     {
115     return raw_cputbl_count[*(const uae_u16 *)e1] < raw_cputbl_count[*(const uae_u16 *)e2];
116     }
117     #endif
118    
119 gbeauche 1.24 static compop_func *compfunctbl[65536];
120     static compop_func *nfcompfunctbl[65536];
121     static cpuop_func *nfcpufunctbl[65536];
122 gbeauche 1.1 uae_u8* comp_pc_p;
123    
124 gbeauche 1.26 // From main_unix.cpp
125     extern bool ThirtyThreeBitAddressing;
126    
127 gbeauche 1.6 // From newcpu.cpp
128     extern bool quit_program;
129    
130 gbeauche 1.1 // gb-- Extra data for Basilisk II/JIT
131     #if JIT_DEBUG
132     static bool JITDebug = false; // Enable runtime disassemblers through mon?
133     #else
134     const bool JITDebug = false; // Don't use JIT debug mode at all
135     #endif
136    
137 gbeauche 1.22 const uae_u32 MIN_CACHE_SIZE = 1024; // Minimal translation cache size (1 MB)
138 gbeauche 1.1 static uae_u32 cache_size = 0; // Size of total cache allocated for compiled blocks
139 gbeauche 1.3 static uae_u32 current_cache_size = 0; // Cache grows upwards: how much has been consumed already
140 gbeauche 1.1 static bool lazy_flush = true; // Flag: lazy translation cache invalidation
141     static bool avoid_fpu = true; // Flag: compile FPU instructions ?
142     static bool have_cmov = false; // target has CMOV instructions ?
143     static bool have_rat_stall = true; // target has partial register stalls ?
144 gbeauche 1.12 const bool tune_alignment = true; // Tune code alignments for running CPU ?
145     const bool tune_nop_fillers = true; // Tune no-op fillers for architecture
146 gbeauche 1.15 static bool setzflg_uses_bsf = false; // setzflg virtual instruction can use native BSF instruction correctly?
147 gbeauche 1.5 static int align_loops = 32; // Align the start of loops
148     static int align_jumps = 32; // Align the start of jumps
149 gbeauche 1.1 static int optcount[10] = {
150     10, // How often a block has to be executed before it is translated
151     0, // How often to use naive translation
152     0, 0, 0, 0,
153     -1, -1, -1, -1
154     };
155    
156     struct op_properties {
157     uae_u8 use_flags;
158     uae_u8 set_flags;
159     uae_u8 is_addx;
160     uae_u8 cflow;
161     };
162     static op_properties prop[65536];
163    
164     static inline int end_block(uae_u32 opcode)
165     {
166     return (prop[opcode].cflow & fl_end_block);
167     }
168    
169 gbeauche 1.8 static inline bool is_const_jump(uae_u32 opcode)
170     {
171     return (prop[opcode].cflow == fl_const_jump);
172     }
173    
174 gbeauche 1.18 static inline bool may_trap(uae_u32 opcode)
175     {
176     return (prop[opcode].cflow & fl_trap);
177     }
178    
179     static inline unsigned int cft_map (unsigned int f)
180     {
181     #ifndef HAVE_GET_WORD_UNSWAPPED
182     return f;
183     #else
184     return ((f >> 8) & 255) | ((f & 255) << 8);
185     #endif
186     }
187    
188 gbeauche 1.1 uae_u8* start_pc_p;
189     uae_u32 start_pc;
190     uae_u32 current_block_pc_p;
191 gbeauche 1.24 static uintptr current_block_start_target;
192 gbeauche 1.1 uae_u32 needed_flags;
193 gbeauche 1.24 static uintptr next_pc_p;
194     static uintptr taken_pc_p;
195 gbeauche 1.1 static int branch_cc;
196     static int redo_current_block;
197    
198     int segvcount=0;
199     int soft_flush_count=0;
200     int hard_flush_count=0;
201     int checksum_count=0;
202     static uae_u8* current_compile_p=NULL;
203     static uae_u8* max_compile_start;
204     static uae_u8* compiled_code=NULL;
205     static uae_s32 reg_alloc_run;
206 gbeauche 1.24 const int POPALLSPACE_SIZE = 1024; /* That should be enough space */
207     static uae_u8* popallspace=NULL;
208 gbeauche 1.1
209     void* pushall_call_handler=NULL;
210     static void* popall_do_nothing=NULL;
211     static void* popall_exec_nostats=NULL;
212     static void* popall_execute_normal=NULL;
213     static void* popall_cache_miss=NULL;
214     static void* popall_recompile_block=NULL;
215     static void* popall_check_checksum=NULL;
216    
217     /* The 68k only ever executes from even addresses. So right now, we
218     * waste half the entries in this array
219     * UPDATE: We now use those entries to store the start of the linked
220     * lists that we maintain for each hash result.
221     */
222     cacheline cache_tags[TAGSIZE];
223     int letit=0;
224     blockinfo* hold_bi[MAX_HOLD_BI];
225     blockinfo* active;
226     blockinfo* dormant;
227    
228     /* 68040 */
229     extern struct cputbl op_smalltbl_0_nf[];
230     extern struct comptbl op_smalltbl_0_comp_nf[];
231     extern struct comptbl op_smalltbl_0_comp_ff[];
232    
233     /* 68020 + 68881 */
234     extern struct cputbl op_smalltbl_1_nf[];
235    
236     /* 68020 */
237     extern struct cputbl op_smalltbl_2_nf[];
238    
239     /* 68010 */
240     extern struct cputbl op_smalltbl_3_nf[];
241    
242     /* 68000 */
243     extern struct cputbl op_smalltbl_4_nf[];
244    
245     /* 68000 slow but compatible. */
246     extern struct cputbl op_smalltbl_5_nf[];
247    
248     static void flush_icache_hard(int n);
249     static void flush_icache_lazy(int n);
250     static void flush_icache_none(int n);
251     void (*flush_icache)(int n) = flush_icache_none;
252    
253    
254    
255     bigstate live;
256     smallstate empty_ss;
257     smallstate default_ss;
258     static int optlev;
259    
260     static int writereg(int r, int size);
261     static void unlock2(int r);
262     static void setlock(int r);
263     static int readreg_specific(int r, int size, int spec);
264     static int writereg_specific(int r, int size, int spec);
265     static void prepare_for_call_1(void);
266     static void prepare_for_call_2(void);
267     static void align_target(uae_u32 a);
268    
269     static uae_s32 nextused[VREGS];
270    
271     uae_u32 m68k_pc_offset;
272    
273     /* Some arithmetic ooperations can be optimized away if the operands
274     * are known to be constant. But that's only a good idea when the
275     * side effects they would have on the flags are not important. This
276     * variable indicates whether we need the side effects or not
277     */
278     uae_u32 needflags=0;
279    
280     /* Flag handling is complicated.
281     *
282     * x86 instructions create flags, which quite often are exactly what we
283     * want. So at times, the "68k" flags are actually in the x86 flags.
284     *
285     * Then again, sometimes we do x86 instructions that clobber the x86
286     * flags, but don't represent a corresponding m68k instruction. In that
287     * case, we have to save them.
288     *
289     * We used to save them to the stack, but now store them back directly
290     * into the regflags.cznv of the traditional emulation. Thus some odd
291     * names.
292     *
293     * So flags can be in either of two places (used to be three; boy were
294     * things complicated back then!); And either place can contain either
295     * valid flags or invalid trash (and on the stack, there was also the
296     * option of "nothing at all", now gone). A couple of variables keep
297     * track of the respective states.
298     *
299     * To make things worse, we might or might not be interested in the flags.
300     * by default, we are, but a call to dont_care_flags can change that
301     * until the next call to live_flags. If we are not, pretty much whatever
302     * is in the register and/or the native flags is seen as valid.
303     */
304    
305     static __inline__ blockinfo* get_blockinfo(uae_u32 cl)
306     {
307     return cache_tags[cl+1].bi;
308     }
309    
310     static __inline__ blockinfo* get_blockinfo_addr(void* addr)
311     {
312     blockinfo* bi=get_blockinfo(cacheline(addr));
313    
314     while (bi) {
315     if (bi->pc_p==addr)
316     return bi;
317     bi=bi->next_same_cl;
318     }
319     return NULL;
320     }
321    
322    
323     /*******************************************************************
324     * All sorts of list related functions for all of the lists *
325     *******************************************************************/
326    
327     static __inline__ void remove_from_cl_list(blockinfo* bi)
328     {
329     uae_u32 cl=cacheline(bi->pc_p);
330    
331     if (bi->prev_same_cl_p)
332     *(bi->prev_same_cl_p)=bi->next_same_cl;
333     if (bi->next_same_cl)
334     bi->next_same_cl->prev_same_cl_p=bi->prev_same_cl_p;
335     if (cache_tags[cl+1].bi)
336     cache_tags[cl].handler=cache_tags[cl+1].bi->handler_to_use;
337     else
338     cache_tags[cl].handler=(cpuop_func *)popall_execute_normal;
339     }
340    
341     static __inline__ void remove_from_list(blockinfo* bi)
342     {
343     if (bi->prev_p)
344     *(bi->prev_p)=bi->next;
345     if (bi->next)
346     bi->next->prev_p=bi->prev_p;
347     }
348    
349     static __inline__ void remove_from_lists(blockinfo* bi)
350     {
351     remove_from_list(bi);
352     remove_from_cl_list(bi);
353     }
354    
355     static __inline__ void add_to_cl_list(blockinfo* bi)
356     {
357     uae_u32 cl=cacheline(bi->pc_p);
358    
359     if (cache_tags[cl+1].bi)
360     cache_tags[cl+1].bi->prev_same_cl_p=&(bi->next_same_cl);
361     bi->next_same_cl=cache_tags[cl+1].bi;
362    
363     cache_tags[cl+1].bi=bi;
364     bi->prev_same_cl_p=&(cache_tags[cl+1].bi);
365    
366     cache_tags[cl].handler=bi->handler_to_use;
367     }
368    
369     static __inline__ void raise_in_cl_list(blockinfo* bi)
370     {
371     remove_from_cl_list(bi);
372     add_to_cl_list(bi);
373     }
374    
375     static __inline__ void add_to_active(blockinfo* bi)
376     {
377     if (active)
378     active->prev_p=&(bi->next);
379     bi->next=active;
380    
381     active=bi;
382     bi->prev_p=&active;
383     }
384    
385     static __inline__ void add_to_dormant(blockinfo* bi)
386     {
387     if (dormant)
388     dormant->prev_p=&(bi->next);
389     bi->next=dormant;
390    
391     dormant=bi;
392     bi->prev_p=&dormant;
393     }
394    
395     static __inline__ void remove_dep(dependency* d)
396     {
397     if (d->prev_p)
398     *(d->prev_p)=d->next;
399     if (d->next)
400     d->next->prev_p=d->prev_p;
401     d->prev_p=NULL;
402     d->next=NULL;
403     }
404    
405     /* This block's code is about to be thrown away, so it no longer
406     depends on anything else */
407     static __inline__ void remove_deps(blockinfo* bi)
408     {
409     remove_dep(&(bi->dep[0]));
410     remove_dep(&(bi->dep[1]));
411     }
412    
413     static __inline__ void adjust_jmpdep(dependency* d, cpuop_func* a)
414     {
415     *(d->jmp_off)=(uintptr)a-((uintptr)d->jmp_off+4);
416     }
417    
418     /********************************************************************
419     * Soft flush handling support functions *
420     ********************************************************************/
421    
422     static __inline__ void set_dhtu(blockinfo* bi, cpuop_func* dh)
423     {
424     //write_log("bi is %p\n",bi);
425     if (dh!=bi->direct_handler_to_use) {
426     dependency* x=bi->deplist;
427     //write_log("bi->deplist=%p\n",bi->deplist);
428     while (x) {
429     //write_log("x is %p\n",x);
430     //write_log("x->next is %p\n",x->next);
431     //write_log("x->prev_p is %p\n",x->prev_p);
432    
433     if (x->jmp_off) {
434     adjust_jmpdep(x,dh);
435     }
436     x=x->next;
437     }
438     bi->direct_handler_to_use=dh;
439     }
440     }
441    
442     static __inline__ void invalidate_block(blockinfo* bi)
443     {
444     int i;
445    
446     bi->optlevel=0;
447     bi->count=optcount[0]-1;
448     bi->handler=NULL;
449     bi->handler_to_use=(cpuop_func *)popall_execute_normal;
450     bi->direct_handler=NULL;
451     set_dhtu(bi,bi->direct_pen);
452     bi->needed_flags=0xff;
453     bi->status=BI_INVALID;
454     for (i=0;i<2;i++) {
455     bi->dep[i].jmp_off=NULL;
456     bi->dep[i].target=NULL;
457     }
458     remove_deps(bi);
459     }
460    
461     static __inline__ void create_jmpdep(blockinfo* bi, int i, uae_u32* jmpaddr, uae_u32 target)
462     {
463 gbeauche 1.24 blockinfo* tbi=get_blockinfo_addr((void*)(uintptr)target);
464 gbeauche 1.1
465     Dif(!tbi) {
466     write_log("Could not create jmpdep!\n");
467     abort();
468     }
469     bi->dep[i].jmp_off=jmpaddr;
470     bi->dep[i].source=bi;
471     bi->dep[i].target=tbi;
472     bi->dep[i].next=tbi->deplist;
473     if (bi->dep[i].next)
474     bi->dep[i].next->prev_p=&(bi->dep[i].next);
475     bi->dep[i].prev_p=&(tbi->deplist);
476     tbi->deplist=&(bi->dep[i]);
477     }
478    
479     static __inline__ void block_need_recompile(blockinfo * bi)
480     {
481     uae_u32 cl = cacheline(bi->pc_p);
482    
483     set_dhtu(bi, bi->direct_pen);
484     bi->direct_handler = bi->direct_pen;
485    
486     bi->handler_to_use = (cpuop_func *)popall_execute_normal;
487     bi->handler = (cpuop_func *)popall_execute_normal;
488     if (bi == cache_tags[cl + 1].bi)
489     cache_tags[cl].handler = (cpuop_func *)popall_execute_normal;
490     bi->status = BI_NEED_RECOMP;
491     }
492    
493     static __inline__ void mark_callers_recompile(blockinfo * bi)
494     {
495     dependency *x = bi->deplist;
496    
497     while (x) {
498     dependency *next = x->next; /* This disappears when we mark for
499     * recompilation and thus remove the
500     * blocks from the lists */
501     if (x->jmp_off) {
502     blockinfo *cbi = x->source;
503    
504     Dif(cbi->status == BI_INVALID) {
505     // write_log("invalid block in dependency list\n"); // FIXME?
506     // abort();
507     }
508     if (cbi->status == BI_ACTIVE || cbi->status == BI_NEED_CHECK) {
509     block_need_recompile(cbi);
510     mark_callers_recompile(cbi);
511     }
512     else if (cbi->status == BI_COMPILING) {
513     redo_current_block = 1;
514     }
515     else if (cbi->status == BI_NEED_RECOMP) {
516     /* nothing */
517     }
518     else {
519     //write_log("Status %d in mark_callers\n",cbi->status); // FIXME?
520     }
521     }
522     x = next;
523     }
524     }
525    
526     static __inline__ blockinfo* get_blockinfo_addr_new(void* addr, int setstate)
527     {
528     blockinfo* bi=get_blockinfo_addr(addr);
529     int i;
530    
531     if (!bi) {
532     for (i=0;i<MAX_HOLD_BI && !bi;i++) {
533     if (hold_bi[i]) {
534     uae_u32 cl=cacheline(addr);
535    
536     bi=hold_bi[i];
537     hold_bi[i]=NULL;
538     bi->pc_p=(uae_u8 *)addr;
539     invalidate_block(bi);
540     add_to_active(bi);
541     add_to_cl_list(bi);
542    
543     }
544     }
545     }
546     if (!bi) {
547     write_log("Looking for blockinfo, can't find free one\n");
548     abort();
549     }
550     return bi;
551     }
552    
553     static void prepare_block(blockinfo* bi);
554    
555     /* Managment of blockinfos.
556    
557     A blockinfo struct is allocated whenever a new block has to be
558     compiled. If the list of free blockinfos is empty, we allocate a new
559     pool of blockinfos and link the newly created blockinfos altogether
560     into the list of free blockinfos. Otherwise, we simply pop a structure
561 gbeauche 1.7 off the free list.
562 gbeauche 1.1
563     Blockinfo are lazily deallocated, i.e. chained altogether in the
564     list of free blockinfos whenvever a translation cache flush (hard or
565     soft) request occurs.
566     */
567    
568 gbeauche 1.7 template< class T >
569     class LazyBlockAllocator
570     {
571     enum {
572     kPoolSize = 1 + 4096 / sizeof(T)
573     };
574     struct Pool {
575     T chunk[kPoolSize];
576     Pool * next;
577     };
578     Pool * mPools;
579     T * mChunks;
580     public:
581     LazyBlockAllocator() : mPools(0), mChunks(0) { }
582     ~LazyBlockAllocator();
583     T * acquire();
584     void release(T * const);
585 gbeauche 1.1 };
586    
587 gbeauche 1.7 template< class T >
588     LazyBlockAllocator<T>::~LazyBlockAllocator()
589 gbeauche 1.1 {
590 gbeauche 1.7 Pool * currentPool = mPools;
591     while (currentPool) {
592     Pool * deadPool = currentPool;
593     currentPool = currentPool->next;
594     free(deadPool);
595     }
596     }
597    
598     template< class T >
599     T * LazyBlockAllocator<T>::acquire()
600     {
601     if (!mChunks) {
602     // There is no chunk left, allocate a new pool and link the
603     // chunks into the free list
604     Pool * newPool = (Pool *)malloc(sizeof(Pool));
605     for (T * chunk = &newPool->chunk[0]; chunk < &newPool->chunk[kPoolSize]; chunk++) {
606     chunk->next = mChunks;
607     mChunks = chunk;
608 gbeauche 1.1 }
609 gbeauche 1.7 newPool->next = mPools;
610     mPools = newPool;
611     }
612     T * chunk = mChunks;
613     mChunks = chunk->next;
614     return chunk;
615     }
616    
617     template< class T >
618     void LazyBlockAllocator<T>::release(T * const chunk)
619     {
620     chunk->next = mChunks;
621     mChunks = chunk;
622     }
623    
624     template< class T >
625     class HardBlockAllocator
626     {
627     public:
628     T * acquire() {
629     T * data = (T *)current_compile_p;
630     current_compile_p += sizeof(T);
631     return data;
632 gbeauche 1.1 }
633 gbeauche 1.7
634     void release(T * const chunk) {
635     // Deallocated on invalidation
636     }
637     };
638    
639     #if USE_SEPARATE_BIA
640     static LazyBlockAllocator<blockinfo> BlockInfoAllocator;
641     static LazyBlockAllocator<checksum_info> ChecksumInfoAllocator;
642 gbeauche 1.1 #else
643 gbeauche 1.7 static HardBlockAllocator<blockinfo> BlockInfoAllocator;
644     static HardBlockAllocator<checksum_info> ChecksumInfoAllocator;
645 gbeauche 1.1 #endif
646    
647 gbeauche 1.8 static __inline__ checksum_info *alloc_checksum_info(void)
648     {
649     checksum_info *csi = ChecksumInfoAllocator.acquire();
650     csi->next = NULL;
651     return csi;
652     }
653    
654     static __inline__ void free_checksum_info(checksum_info *csi)
655     {
656     csi->next = NULL;
657     ChecksumInfoAllocator.release(csi);
658     }
659    
660     static __inline__ void free_checksum_info_chain(checksum_info *csi)
661     {
662     while (csi != NULL) {
663     checksum_info *csi2 = csi->next;
664     free_checksum_info(csi);
665     csi = csi2;
666     }
667     }
668 gbeauche 1.7
669     static __inline__ blockinfo *alloc_blockinfo(void)
670 gbeauche 1.1 {
671 gbeauche 1.7 blockinfo *bi = BlockInfoAllocator.acquire();
672     #if USE_CHECKSUM_INFO
673     bi->csi = NULL;
674 gbeauche 1.1 #endif
675 gbeauche 1.7 return bi;
676 gbeauche 1.1 }
677    
678 gbeauche 1.7 static __inline__ void free_blockinfo(blockinfo *bi)
679 gbeauche 1.1 {
680 gbeauche 1.7 #if USE_CHECKSUM_INFO
681 gbeauche 1.8 free_checksum_info_chain(bi->csi);
682     bi->csi = NULL;
683 gbeauche 1.1 #endif
684 gbeauche 1.7 BlockInfoAllocator.release(bi);
685 gbeauche 1.1 }
686    
687     static __inline__ void alloc_blockinfos(void)
688     {
689     int i;
690     blockinfo* bi;
691    
692     for (i=0;i<MAX_HOLD_BI;i++) {
693     if (hold_bi[i])
694     return;
695     bi=hold_bi[i]=alloc_blockinfo();
696     prepare_block(bi);
697     }
698     }
699    
700     /********************************************************************
701     * Functions to emit data into memory, and other general support *
702     ********************************************************************/
703    
704     static uae_u8* target;
705    
706     static void emit_init(void)
707     {
708     }
709    
710     static __inline__ void emit_byte(uae_u8 x)
711     {
712     *target++=x;
713     }
714    
715     static __inline__ void emit_word(uae_u16 x)
716     {
717     *((uae_u16*)target)=x;
718     target+=2;
719     }
720    
721     static __inline__ void emit_long(uae_u32 x)
722     {
723     *((uae_u32*)target)=x;
724     target+=4;
725     }
726    
727 gbeauche 1.24 static __inline__ void emit_quad(uae_u64 x)
728     {
729     *((uae_u64*)target)=x;
730     target+=8;
731     }
732    
733 gbeauche 1.12 static __inline__ void emit_block(const uae_u8 *block, uae_u32 blocklen)
734     {
735     memcpy((uae_u8 *)target,block,blocklen);
736     target+=blocklen;
737     }
738    
739 gbeauche 1.1 static __inline__ uae_u32 reverse32(uae_u32 v)
740     {
741     #if 1
742     // gb-- We have specialized byteswapping functions, just use them
743     return do_byteswap_32(v);
744     #else
745     return ((v>>24)&0xff) | ((v>>8)&0xff00) | ((v<<8)&0xff0000) | ((v<<24)&0xff000000);
746     #endif
747     }
748    
749     /********************************************************************
750     * Getting the information about the target CPU *
751     ********************************************************************/
752    
753     #include "codegen_x86.cpp"
754    
755     void set_target(uae_u8* t)
756     {
757     target=t;
758     }
759    
760     static __inline__ uae_u8* get_target_noopt(void)
761     {
762     return target;
763     }
764    
765     __inline__ uae_u8* get_target(void)
766     {
767     return get_target_noopt();
768     }
769    
770    
771     /********************************************************************
772     * Flags status handling. EMIT TIME! *
773     ********************************************************************/
774    
775     static void bt_l_ri_noclobber(R4 r, IMM i);
776    
777     static void make_flags_live_internal(void)
778     {
779     if (live.flags_in_flags==VALID)
780     return;
781     Dif (live.flags_on_stack==TRASH) {
782     write_log("Want flags, got something on stack, but it is TRASH\n");
783     abort();
784     }
785     if (live.flags_on_stack==VALID) {
786     int tmp;
787     tmp=readreg_specific(FLAGTMP,4,FLAG_NREG2);
788     raw_reg_to_flags(tmp);
789     unlock2(tmp);
790    
791     live.flags_in_flags=VALID;
792     return;
793     }
794     write_log("Huh? live.flags_in_flags=%d, live.flags_on_stack=%d, but need to make live\n",
795     live.flags_in_flags,live.flags_on_stack);
796     abort();
797     }
798    
799     static void flags_to_stack(void)
800     {
801     if (live.flags_on_stack==VALID)
802     return;
803     if (!live.flags_are_important) {
804     live.flags_on_stack=VALID;
805     return;
806     }
807     Dif (live.flags_in_flags!=VALID)
808     abort();
809     else {
810     int tmp;
811     tmp=writereg_specific(FLAGTMP,4,FLAG_NREG1);
812     raw_flags_to_reg(tmp);
813     unlock2(tmp);
814     }
815     live.flags_on_stack=VALID;
816     }
817    
818     static __inline__ void clobber_flags(void)
819     {
820     if (live.flags_in_flags==VALID && live.flags_on_stack!=VALID)
821     flags_to_stack();
822     live.flags_in_flags=TRASH;
823     }
824    
825     /* Prepare for leaving the compiled stuff */
826     static __inline__ void flush_flags(void)
827     {
828     flags_to_stack();
829     return;
830     }
831    
832     int touchcnt;
833    
834     /********************************************************************
835 gbeauche 1.18 * Partial register flushing for optimized calls *
836     ********************************************************************/
837    
838     struct regusage {
839     uae_u16 rmask;
840     uae_u16 wmask;
841     };
842    
843     static inline void ru_set(uae_u16 *mask, int reg)
844     {
845     #if USE_OPTIMIZED_CALLS
846     *mask |= 1 << reg;
847     #endif
848     }
849    
850     static inline bool ru_get(const uae_u16 *mask, int reg)
851     {
852     #if USE_OPTIMIZED_CALLS
853     return (*mask & (1 << reg));
854     #else
855     /* Default: instruction reads & write to register */
856     return true;
857     #endif
858     }
859    
860     static inline void ru_set_read(regusage *ru, int reg)
861     {
862     ru_set(&ru->rmask, reg);
863     }
864    
865     static inline void ru_set_write(regusage *ru, int reg)
866     {
867     ru_set(&ru->wmask, reg);
868     }
869    
870     static inline bool ru_read_p(const regusage *ru, int reg)
871     {
872     return ru_get(&ru->rmask, reg);
873     }
874    
875     static inline bool ru_write_p(const regusage *ru, int reg)
876     {
877     return ru_get(&ru->wmask, reg);
878     }
879    
880     static void ru_fill_ea(regusage *ru, int reg, amodes mode,
881     wordsizes size, int write_mode)
882     {
883     switch (mode) {
884     case Areg:
885     reg += 8;
886     /* fall through */
887     case Dreg:
888     ru_set(write_mode ? &ru->wmask : &ru->rmask, reg);
889     break;
890     case Ad16:
891     /* skip displacment */
892     m68k_pc_offset += 2;
893     case Aind:
894     case Aipi:
895     case Apdi:
896     ru_set_read(ru, reg+8);
897     break;
898     case Ad8r:
899     ru_set_read(ru, reg+8);
900     /* fall through */
901     case PC8r: {
902     uae_u16 dp = comp_get_iword((m68k_pc_offset+=2)-2);
903     reg = (dp >> 12) & 15;
904     ru_set_read(ru, reg);
905     if (dp & 0x100)
906     m68k_pc_offset += (((dp & 0x30) >> 3) & 7) + ((dp & 3) * 2);
907     break;
908     }
909     case PC16:
910     case absw:
911     case imm0:
912     case imm1:
913     m68k_pc_offset += 2;
914     break;
915     case absl:
916     case imm2:
917     m68k_pc_offset += 4;
918     break;
919     case immi:
920     m68k_pc_offset += (size == sz_long) ? 4 : 2;
921     break;
922     }
923     }
924    
925     /* TODO: split into a static initialization part and a dynamic one
926     (instructions depending on extension words) */
927     static void ru_fill(regusage *ru, uae_u32 opcode)
928     {
929     m68k_pc_offset += 2;
930    
931     /* Default: no register is used or written to */
932     ru->rmask = 0;
933     ru->wmask = 0;
934    
935     uae_u32 real_opcode = cft_map(opcode);
936     struct instr *dp = &table68k[real_opcode];
937    
938     bool rw_dest = true;
939     bool handled = false;
940    
941     /* Handle some instructions specifically */
942     uae_u16 reg, ext;
943     switch (dp->mnemo) {
944     case i_BFCHG:
945     case i_BFCLR:
946     case i_BFEXTS:
947     case i_BFEXTU:
948     case i_BFFFO:
949     case i_BFINS:
950     case i_BFSET:
951     case i_BFTST:
952     ext = comp_get_iword((m68k_pc_offset+=2)-2);
953     if (ext & 0x800) ru_set_read(ru, (ext >> 6) & 7);
954     if (ext & 0x020) ru_set_read(ru, ext & 7);
955     ru_fill_ea(ru, dp->dreg, (amodes)dp->dmode, (wordsizes)dp->size, 1);
956     if (dp->dmode == Dreg)
957     ru_set_read(ru, dp->dreg);
958     switch (dp->mnemo) {
959     case i_BFEXTS:
960     case i_BFEXTU:
961     case i_BFFFO:
962     ru_set_write(ru, (ext >> 12) & 7);
963     break;
964     case i_BFINS:
965     ru_set_read(ru, (ext >> 12) & 7);
966     /* fall through */
967     case i_BFCHG:
968     case i_BFCLR:
969     case i_BSET:
970     if (dp->dmode == Dreg)
971     ru_set_write(ru, dp->dreg);
972     break;
973     }
974     handled = true;
975     rw_dest = false;
976     break;
977    
978     case i_BTST:
979     rw_dest = false;
980     break;
981    
982     case i_CAS:
983     {
984     ext = comp_get_iword((m68k_pc_offset+=2)-2);
985     int Du = ext & 7;
986     ru_set_read(ru, Du);
987     int Dc = (ext >> 6) & 7;
988     ru_set_read(ru, Dc);
989     ru_set_write(ru, Dc);
990     break;
991     }
992     case i_CAS2:
993     {
994     int Dc1, Dc2, Du1, Du2, Rn1, Rn2;
995     ext = comp_get_iword((m68k_pc_offset+=2)-2);
996     Rn1 = (ext >> 12) & 15;
997     Du1 = (ext >> 6) & 7;
998     Dc1 = ext & 7;
999     ru_set_read(ru, Rn1);
1000     ru_set_read(ru, Du1);
1001     ru_set_read(ru, Dc1);
1002     ru_set_write(ru, Dc1);
1003     ext = comp_get_iword((m68k_pc_offset+=2)-2);
1004     Rn2 = (ext >> 12) & 15;
1005     Du2 = (ext >> 6) & 7;
1006     Dc2 = ext & 7;
1007     ru_set_read(ru, Rn2);
1008     ru_set_read(ru, Du2);
1009     ru_set_write(ru, Dc2);
1010     break;
1011     }
1012     case i_DIVL: case i_MULL:
1013     m68k_pc_offset += 2;
1014     break;
1015     case i_LEA:
1016     case i_MOVE: case i_MOVEA: case i_MOVE16:
1017     rw_dest = false;
1018     break;
1019     case i_PACK: case i_UNPK:
1020     rw_dest = false;
1021     m68k_pc_offset += 2;
1022     break;
1023     case i_TRAPcc:
1024     m68k_pc_offset += (dp->size == sz_long) ? 4 : 2;
1025     break;
1026     case i_RTR:
1027     /* do nothing, just for coverage debugging */
1028     break;
1029     /* TODO: handle EXG instruction */
1030     }
1031    
1032     /* Handle A-Traps better */
1033     if ((real_opcode & 0xf000) == 0xa000) {
1034     handled = true;
1035     }
1036    
1037     /* Handle EmulOps better */
1038     if ((real_opcode & 0xff00) == 0x7100) {
1039     handled = true;
1040     ru->rmask = 0xffff;
1041     ru->wmask = 0;
1042     }
1043    
1044     if (dp->suse && !handled)
1045     ru_fill_ea(ru, dp->sreg, (amodes)dp->smode, (wordsizes)dp->size, 0);
1046    
1047     if (dp->duse && !handled)
1048     ru_fill_ea(ru, dp->dreg, (amodes)dp->dmode, (wordsizes)dp->size, 1);
1049    
1050     if (rw_dest)
1051     ru->rmask |= ru->wmask;
1052    
1053     handled = handled || dp->suse || dp->duse;
1054    
1055     /* Mark all registers as used/written if the instruction may trap */
1056     if (may_trap(opcode)) {
1057     handled = true;
1058     ru->rmask = 0xffff;
1059     ru->wmask = 0xffff;
1060     }
1061    
1062     if (!handled) {
1063     write_log("ru_fill: %04x = { %04x, %04x }\n",
1064     real_opcode, ru->rmask, ru->wmask);
1065     abort();
1066     }
1067     }
1068    
1069     /********************************************************************
1070 gbeauche 1.1 * register allocation per block logging *
1071     ********************************************************************/
1072    
1073     static uae_s8 vstate[VREGS];
1074     static uae_s8 vwritten[VREGS];
1075     static uae_s8 nstate[N_REGS];
1076    
1077     #define L_UNKNOWN -127
1078     #define L_UNAVAIL -1
1079     #define L_NEEDED -2
1080     #define L_UNNEEDED -3
1081    
1082     static __inline__ void big_to_small_state(bigstate * b, smallstate * s)
1083     {
1084     int i;
1085    
1086     for (i = 0; i < VREGS; i++)
1087     s->virt[i] = vstate[i];
1088     for (i = 0; i < N_REGS; i++)
1089     s->nat[i] = nstate[i];
1090     }
1091    
1092     static __inline__ int callers_need_recompile(bigstate * b, smallstate * s)
1093     {
1094     int i;
1095     int reverse = 0;
1096    
1097     for (i = 0; i < VREGS; i++) {
1098     if (vstate[i] != L_UNNEEDED && s->virt[i] == L_UNNEEDED)
1099     return 1;
1100     if (vstate[i] == L_UNNEEDED && s->virt[i] != L_UNNEEDED)
1101     reverse++;
1102     }
1103     for (i = 0; i < N_REGS; i++) {
1104     if (nstate[i] >= 0 && nstate[i] != s->nat[i])
1105     return 1;
1106     if (nstate[i] < 0 && s->nat[i] >= 0)
1107     reverse++;
1108     }
1109     if (reverse >= 2 && USE_MATCH)
1110     return 1; /* In this case, it might be worth recompiling the
1111     * callers */
1112     return 0;
1113     }
1114    
1115     static __inline__ void log_startblock(void)
1116     {
1117     int i;
1118    
1119     for (i = 0; i < VREGS; i++) {
1120     vstate[i] = L_UNKNOWN;
1121     vwritten[i] = 0;
1122     }
1123     for (i = 0; i < N_REGS; i++)
1124     nstate[i] = L_UNKNOWN;
1125     }
1126    
1127     /* Using an n-reg for a temp variable */
1128     static __inline__ void log_isused(int n)
1129     {
1130     if (nstate[n] == L_UNKNOWN)
1131     nstate[n] = L_UNAVAIL;
1132     }
1133    
1134     static __inline__ void log_visused(int r)
1135     {
1136     if (vstate[r] == L_UNKNOWN)
1137     vstate[r] = L_NEEDED;
1138     }
1139    
1140     static __inline__ void do_load_reg(int n, int r)
1141     {
1142     if (r == FLAGTMP)
1143     raw_load_flagreg(n, r);
1144     else if (r == FLAGX)
1145     raw_load_flagx(n, r);
1146     else
1147 gbeauche 1.24 raw_mov_l_rm(n, (uintptr) live.state[r].mem);
1148 gbeauche 1.1 }
1149    
1150     static __inline__ void check_load_reg(int n, int r)
1151     {
1152 gbeauche 1.24 raw_mov_l_rm(n, (uintptr) live.state[r].mem);
1153 gbeauche 1.1 }
1154    
1155     static __inline__ void log_vwrite(int r)
1156     {
1157     vwritten[r] = 1;
1158     }
1159    
1160     /* Using an n-reg to hold a v-reg */
1161     static __inline__ void log_isreg(int n, int r)
1162     {
1163     static int count = 0;
1164    
1165     if (nstate[n] == L_UNKNOWN && r < 16 && !vwritten[r] && USE_MATCH)
1166     nstate[n] = r;
1167     else {
1168     do_load_reg(n, r);
1169     if (nstate[n] == L_UNKNOWN)
1170     nstate[n] = L_UNAVAIL;
1171     }
1172     if (vstate[r] == L_UNKNOWN)
1173     vstate[r] = L_NEEDED;
1174     }
1175    
1176     static __inline__ void log_clobberreg(int r)
1177     {
1178     if (vstate[r] == L_UNKNOWN)
1179     vstate[r] = L_UNNEEDED;
1180     }
1181    
1182     /* This ends all possibility of clever register allocation */
1183    
1184     static __inline__ void log_flush(void)
1185     {
1186     int i;
1187    
1188     for (i = 0; i < VREGS; i++)
1189     if (vstate[i] == L_UNKNOWN)
1190     vstate[i] = L_NEEDED;
1191     for (i = 0; i < N_REGS; i++)
1192     if (nstate[i] == L_UNKNOWN)
1193     nstate[i] = L_UNAVAIL;
1194     }
1195    
1196     static __inline__ void log_dump(void)
1197     {
1198     int i;
1199    
1200     return;
1201    
1202     write_log("----------------------\n");
1203     for (i = 0; i < N_REGS; i++) {
1204     switch (nstate[i]) {
1205     case L_UNKNOWN:
1206     write_log("Nat %d : UNKNOWN\n", i);
1207     break;
1208     case L_UNAVAIL:
1209     write_log("Nat %d : UNAVAIL\n", i);
1210     break;
1211     default:
1212     write_log("Nat %d : %d\n", i, nstate[i]);
1213     break;
1214     }
1215     }
1216     for (i = 0; i < VREGS; i++) {
1217     if (vstate[i] == L_UNNEEDED)
1218     write_log("Virt %d: UNNEEDED\n", i);
1219     }
1220     }
1221    
1222     /********************************************************************
1223     * register status handling. EMIT TIME! *
1224     ********************************************************************/
1225    
1226     static __inline__ void set_status(int r, int status)
1227     {
1228     if (status == ISCONST)
1229     log_clobberreg(r);
1230     live.state[r].status=status;
1231     }
1232    
1233     static __inline__ int isinreg(int r)
1234     {
1235     return live.state[r].status==CLEAN || live.state[r].status==DIRTY;
1236     }
1237    
1238     static __inline__ void adjust_nreg(int r, uae_u32 val)
1239     {
1240     if (!val)
1241     return;
1242     raw_lea_l_brr(r,r,val);
1243     }
1244    
1245     static void tomem(int r)
1246     {
1247     int rr=live.state[r].realreg;
1248    
1249     if (isinreg(r)) {
1250     if (live.state[r].val && live.nat[rr].nholds==1
1251     && !live.nat[rr].locked) {
1252     // write_log("RemovingA offset %x from reg %d (%d) at %p\n",
1253     // live.state[r].val,r,rr,target);
1254     adjust_nreg(rr,live.state[r].val);
1255     live.state[r].val=0;
1256     live.state[r].dirtysize=4;
1257     set_status(r,DIRTY);
1258     }
1259     }
1260    
1261     if (live.state[r].status==DIRTY) {
1262     switch (live.state[r].dirtysize) {
1263 gbeauche 1.24 case 1: raw_mov_b_mr((uintptr)live.state[r].mem,rr); break;
1264     case 2: raw_mov_w_mr((uintptr)live.state[r].mem,rr); break;
1265     case 4: raw_mov_l_mr((uintptr)live.state[r].mem,rr); break;
1266 gbeauche 1.1 default: abort();
1267     }
1268     log_vwrite(r);
1269     set_status(r,CLEAN);
1270     live.state[r].dirtysize=0;
1271     }
1272     }
1273    
1274     static __inline__ int isconst(int r)
1275     {
1276     return live.state[r].status==ISCONST;
1277     }
1278    
1279     int is_const(int r)
1280     {
1281     return isconst(r);
1282     }
1283    
1284     static __inline__ void writeback_const(int r)
1285     {
1286     if (!isconst(r))
1287     return;
1288     Dif (live.state[r].needflush==NF_HANDLER) {
1289     write_log("Trying to write back constant NF_HANDLER!\n");
1290     abort();
1291     }
1292    
1293 gbeauche 1.24 raw_mov_l_mi((uintptr)live.state[r].mem,live.state[r].val);
1294 gbeauche 1.1 log_vwrite(r);
1295     live.state[r].val=0;
1296     set_status(r,INMEM);
1297     }
1298    
1299     static __inline__ void tomem_c(int r)
1300     {
1301     if (isconst(r)) {
1302     writeback_const(r);
1303     }
1304     else
1305     tomem(r);
1306     }
1307    
1308     static void evict(int r)
1309     {
1310     int rr;
1311    
1312     if (!isinreg(r))
1313     return;
1314     tomem(r);
1315     rr=live.state[r].realreg;
1316    
1317     Dif (live.nat[rr].locked &&
1318     live.nat[rr].nholds==1) {
1319     write_log("register %d in nreg %d is locked!\n",r,live.state[r].realreg);
1320     abort();
1321     }
1322    
1323     live.nat[rr].nholds--;
1324     if (live.nat[rr].nholds!=live.state[r].realind) { /* Was not last */
1325     int topreg=live.nat[rr].holds[live.nat[rr].nholds];
1326     int thisind=live.state[r].realind;
1327    
1328     live.nat[rr].holds[thisind]=topreg;
1329     live.state[topreg].realind=thisind;
1330     }
1331     live.state[r].realreg=-1;
1332     set_status(r,INMEM);
1333     }
1334    
1335     static __inline__ void free_nreg(int r)
1336     {
1337     int i=live.nat[r].nholds;
1338    
1339     while (i) {
1340     int vr;
1341    
1342     --i;
1343     vr=live.nat[r].holds[i];
1344     evict(vr);
1345     }
1346     Dif (live.nat[r].nholds!=0) {
1347     write_log("Failed to free nreg %d, nholds is %d\n",r,live.nat[r].nholds);
1348     abort();
1349     }
1350     }
1351    
1352     /* Use with care! */
1353     static __inline__ void isclean(int r)
1354     {
1355     if (!isinreg(r))
1356     return;
1357     live.state[r].validsize=4;
1358     live.state[r].dirtysize=0;
1359     live.state[r].val=0;
1360     set_status(r,CLEAN);
1361     }
1362    
1363     static __inline__ void disassociate(int r)
1364     {
1365     isclean(r);
1366     evict(r);
1367     }
1368    
1369     static __inline__ void set_const(int r, uae_u32 val)
1370     {
1371     disassociate(r);
1372     live.state[r].val=val;
1373     set_status(r,ISCONST);
1374     }
1375    
1376     static __inline__ uae_u32 get_offset(int r)
1377     {
1378     return live.state[r].val;
1379     }
1380    
1381     static int alloc_reg_hinted(int r, int size, int willclobber, int hint)
1382     {
1383     int bestreg;
1384     uae_s32 when;
1385     int i;
1386     uae_s32 badness=0; /* to shut up gcc */
1387     bestreg=-1;
1388     when=2000000000;
1389    
1390     for (i=N_REGS;i--;) {
1391     badness=live.nat[i].touched;
1392     if (live.nat[i].nholds==0)
1393     badness=0;
1394     if (i==hint)
1395     badness-=200000000;
1396     if (!live.nat[i].locked && badness<when) {
1397     if ((size==1 && live.nat[i].canbyte) ||
1398     (size==2 && live.nat[i].canword) ||
1399     (size==4)) {
1400     bestreg=i;
1401     when=badness;
1402     if (live.nat[i].nholds==0 && hint<0)
1403     break;
1404     if (i==hint)
1405     break;
1406     }
1407     }
1408     }
1409     Dif (bestreg==-1)
1410     abort();
1411    
1412     if (live.nat[bestreg].nholds>0) {
1413     free_nreg(bestreg);
1414     }
1415     if (isinreg(r)) {
1416     int rr=live.state[r].realreg;
1417     /* This will happen if we read a partially dirty register at a
1418     bigger size */
1419     Dif (willclobber || live.state[r].validsize>=size)
1420     abort();
1421     Dif (live.nat[rr].nholds!=1)
1422     abort();
1423     if (size==4 && live.state[r].validsize==2) {
1424     log_isused(bestreg);
1425     log_visused(r);
1426 gbeauche 1.24 raw_mov_l_rm(bestreg,(uintptr)live.state[r].mem);
1427 gbeauche 1.1 raw_bswap_32(bestreg);
1428     raw_zero_extend_16_rr(rr,rr);
1429     raw_zero_extend_16_rr(bestreg,bestreg);
1430     raw_bswap_32(bestreg);
1431     raw_lea_l_brr_indexed(rr,rr,bestreg,1,0);
1432     live.state[r].validsize=4;
1433     live.nat[rr].touched=touchcnt++;
1434     return rr;
1435     }
1436     if (live.state[r].validsize==1) {
1437     /* Nothing yet */
1438     }
1439     evict(r);
1440     }
1441    
1442     if (!willclobber) {
1443     if (live.state[r].status!=UNDEF) {
1444     if (isconst(r)) {
1445     raw_mov_l_ri(bestreg,live.state[r].val);
1446     live.state[r].val=0;
1447     live.state[r].dirtysize=4;
1448     set_status(r,DIRTY);
1449     log_isused(bestreg);
1450     }
1451     else {
1452     log_isreg(bestreg, r); /* This will also load it! */
1453     live.state[r].dirtysize=0;
1454     set_status(r,CLEAN);
1455     }
1456     }
1457     else {
1458     live.state[r].val=0;
1459     live.state[r].dirtysize=0;
1460     set_status(r,CLEAN);
1461     log_isused(bestreg);
1462     }
1463     live.state[r].validsize=4;
1464     }
1465     else { /* this is the easiest way, but not optimal. FIXME! */
1466     /* Now it's trickier, but hopefully still OK */
1467     if (!isconst(r) || size==4) {
1468     live.state[r].validsize=size;
1469     live.state[r].dirtysize=size;
1470     live.state[r].val=0;
1471     set_status(r,DIRTY);
1472     if (size == 4) {
1473     log_clobberreg(r);
1474     log_isused(bestreg);
1475     }
1476     else {
1477     log_visused(r);
1478     log_isused(bestreg);
1479     }
1480     }
1481     else {
1482     if (live.state[r].status!=UNDEF)
1483     raw_mov_l_ri(bestreg,live.state[r].val);
1484     live.state[r].val=0;
1485     live.state[r].validsize=4;
1486     live.state[r].dirtysize=4;
1487     set_status(r,DIRTY);
1488     log_isused(bestreg);
1489     }
1490     }
1491     live.state[r].realreg=bestreg;
1492     live.state[r].realind=live.nat[bestreg].nholds;
1493     live.nat[bestreg].touched=touchcnt++;
1494     live.nat[bestreg].holds[live.nat[bestreg].nholds]=r;
1495     live.nat[bestreg].nholds++;
1496    
1497     return bestreg;
1498     }
1499    
1500     static int alloc_reg(int r, int size, int willclobber)
1501     {
1502     return alloc_reg_hinted(r,size,willclobber,-1);
1503     }
1504    
1505     static void unlock2(int r)
1506     {
1507     Dif (!live.nat[r].locked)
1508     abort();
1509     live.nat[r].locked--;
1510     }
1511    
1512     static void setlock(int r)
1513     {
1514     live.nat[r].locked++;
1515     }
1516    
1517    
1518     static void mov_nregs(int d, int s)
1519     {
1520     int ns=live.nat[s].nholds;
1521     int nd=live.nat[d].nholds;
1522     int i;
1523    
1524     if (s==d)
1525     return;
1526    
1527     if (nd>0)
1528     free_nreg(d);
1529    
1530     log_isused(d);
1531     raw_mov_l_rr(d,s);
1532    
1533     for (i=0;i<live.nat[s].nholds;i++) {
1534     int vs=live.nat[s].holds[i];
1535    
1536     live.state[vs].realreg=d;
1537     live.state[vs].realind=i;
1538     live.nat[d].holds[i]=vs;
1539     }
1540     live.nat[d].nholds=live.nat[s].nholds;
1541    
1542     live.nat[s].nholds=0;
1543     }
1544    
1545    
1546     static __inline__ void make_exclusive(int r, int size, int spec)
1547     {
1548     int clobber;
1549     reg_status oldstate;
1550     int rr=live.state[r].realreg;
1551     int nr;
1552     int nind;
1553     int ndirt=0;
1554     int i;
1555    
1556     if (!isinreg(r))
1557     return;
1558     if (live.nat[rr].nholds==1)
1559     return;
1560     for (i=0;i<live.nat[rr].nholds;i++) {
1561     int vr=live.nat[rr].holds[i];
1562     if (vr!=r &&
1563     (live.state[vr].status==DIRTY || live.state[vr].val))
1564     ndirt++;
1565     }
1566     if (!ndirt && size<live.state[r].validsize && !live.nat[rr].locked) {
1567     /* Everything else is clean, so let's keep this register */
1568     for (i=0;i<live.nat[rr].nholds;i++) {
1569     int vr=live.nat[rr].holds[i];
1570     if (vr!=r) {
1571     evict(vr);
1572     i--; /* Try that index again! */
1573     }
1574     }
1575     Dif (live.nat[rr].nholds!=1) {
1576     write_log("natreg %d holds %d vregs, %d not exclusive\n",
1577     rr,live.nat[rr].nholds,r);
1578     abort();
1579     }
1580     return;
1581     }
1582    
1583     /* We have to split the register */
1584     oldstate=live.state[r];
1585    
1586     setlock(rr); /* Make sure this doesn't go away */
1587     /* Forget about r being in the register rr */
1588     disassociate(r);
1589     /* Get a new register, that we will clobber completely */
1590     if (oldstate.status==DIRTY) {
1591     /* If dirtysize is <4, we need a register that can handle the
1592     eventual smaller memory store! Thanks to Quake68k for exposing
1593     this detail ;-) */
1594     nr=alloc_reg_hinted(r,oldstate.dirtysize,1,spec);
1595     }
1596     else {
1597     nr=alloc_reg_hinted(r,4,1,spec);
1598     }
1599     nind=live.state[r].realind;
1600     live.state[r]=oldstate; /* Keep all the old state info */
1601     live.state[r].realreg=nr;
1602     live.state[r].realind=nind;
1603    
1604     if (size<live.state[r].validsize) {
1605     if (live.state[r].val) {
1606     /* Might as well compensate for the offset now */
1607     raw_lea_l_brr(nr,rr,oldstate.val);
1608     live.state[r].val=0;
1609     live.state[r].dirtysize=4;
1610     set_status(r,DIRTY);
1611     }
1612     else
1613     raw_mov_l_rr(nr,rr); /* Make another copy */
1614     }
1615     unlock2(rr);
1616     }
1617    
1618     static __inline__ void add_offset(int r, uae_u32 off)
1619     {
1620     live.state[r].val+=off;
1621     }
1622    
1623     static __inline__ void remove_offset(int r, int spec)
1624     {
1625     reg_status oldstate;
1626     int rr;
1627    
1628     if (isconst(r))
1629     return;
1630     if (live.state[r].val==0)
1631     return;
1632     if (isinreg(r) && live.state[r].validsize<4)
1633     evict(r);
1634    
1635     if (!isinreg(r))
1636     alloc_reg_hinted(r,4,0,spec);
1637    
1638     Dif (live.state[r].validsize!=4) {
1639     write_log("Validsize=%d in remove_offset\n",live.state[r].validsize);
1640     abort();
1641     }
1642     make_exclusive(r,0,-1);
1643     /* make_exclusive might have done the job already */
1644     if (live.state[r].val==0)
1645     return;
1646    
1647     rr=live.state[r].realreg;
1648    
1649     if (live.nat[rr].nholds==1) {
1650     //write_log("RemovingB offset %x from reg %d (%d) at %p\n",
1651     // live.state[r].val,r,rr,target);
1652     adjust_nreg(rr,live.state[r].val);
1653     live.state[r].dirtysize=4;
1654     live.state[r].val=0;
1655     set_status(r,DIRTY);
1656     return;
1657     }
1658     write_log("Failed in remove_offset\n");
1659     abort();
1660     }
1661    
1662     static __inline__ void remove_all_offsets(void)
1663     {
1664     int i;
1665    
1666     for (i=0;i<VREGS;i++)
1667     remove_offset(i,-1);
1668     }
1669    
1670     static __inline__ int readreg_general(int r, int size, int spec, int can_offset)
1671     {
1672     int n;
1673     int answer=-1;
1674    
1675     if (live.state[r].status==UNDEF) {
1676     write_log("WARNING: Unexpected read of undefined register %d\n",r);
1677     }
1678     if (!can_offset)
1679     remove_offset(r,spec);
1680    
1681     if (isinreg(r) && live.state[r].validsize>=size) {
1682     n=live.state[r].realreg;
1683     switch(size) {
1684     case 1:
1685     if (live.nat[n].canbyte || spec>=0) {
1686     answer=n;
1687     }
1688     break;
1689     case 2:
1690     if (live.nat[n].canword || spec>=0) {
1691     answer=n;
1692     }
1693     break;
1694     case 4:
1695     answer=n;
1696     break;
1697     default: abort();
1698     }
1699     if (answer<0)
1700     evict(r);
1701     }
1702     /* either the value was in memory to start with, or it was evicted and
1703     is in memory now */
1704     if (answer<0) {
1705     answer=alloc_reg_hinted(r,spec>=0?4:size,0,spec);
1706     }
1707    
1708     if (spec>=0 && spec!=answer) {
1709     /* Too bad */
1710     mov_nregs(spec,answer);
1711     answer=spec;
1712     }
1713     live.nat[answer].locked++;
1714     live.nat[answer].touched=touchcnt++;
1715     return answer;
1716     }
1717    
1718    
1719    
1720     static int readreg(int r, int size)
1721     {
1722     return readreg_general(r,size,-1,0);
1723     }
1724    
1725     static int readreg_specific(int r, int size, int spec)
1726     {
1727     return readreg_general(r,size,spec,0);
1728     }
1729    
1730     static int readreg_offset(int r, int size)
1731     {
1732     return readreg_general(r,size,-1,1);
1733     }
1734    
1735     /* writereg_general(r, size, spec)
1736     *
1737     * INPUT
1738     * - r : mid-layer register
1739     * - size : requested size (1/2/4)
1740     * - spec : -1 if find or make a register free, otherwise specifies
1741     * the physical register to use in any case
1742     *
1743     * OUTPUT
1744     * - hard (physical, x86 here) register allocated to virtual register r
1745     */
1746     static __inline__ int writereg_general(int r, int size, int spec)
1747     {
1748     int n;
1749     int answer=-1;
1750    
1751     if (size<4) {
1752     remove_offset(r,spec);
1753     }
1754    
1755     make_exclusive(r,size,spec);
1756     if (isinreg(r)) {
1757     int nvsize=size>live.state[r].validsize?size:live.state[r].validsize;
1758     int ndsize=size>live.state[r].dirtysize?size:live.state[r].dirtysize;
1759     n=live.state[r].realreg;
1760    
1761     Dif (live.nat[n].nholds!=1)
1762     abort();
1763     switch(size) {
1764     case 1:
1765     if (live.nat[n].canbyte || spec>=0) {
1766     live.state[r].dirtysize=ndsize;
1767     live.state[r].validsize=nvsize;
1768     answer=n;
1769     }
1770     break;
1771     case 2:
1772     if (live.nat[n].canword || spec>=0) {
1773     live.state[r].dirtysize=ndsize;
1774     live.state[r].validsize=nvsize;
1775     answer=n;
1776     }
1777     break;
1778     case 4:
1779     live.state[r].dirtysize=ndsize;
1780     live.state[r].validsize=nvsize;
1781     answer=n;
1782     break;
1783     default: abort();
1784     }
1785     if (answer<0)
1786     evict(r);
1787     }
1788     /* either the value was in memory to start with, or it was evicted and
1789     is in memory now */
1790     if (answer<0) {
1791     answer=alloc_reg_hinted(r,size,1,spec);
1792     }
1793     if (spec>=0 && spec!=answer) {
1794     mov_nregs(spec,answer);
1795     answer=spec;
1796     }
1797     if (live.state[r].status==UNDEF)
1798     live.state[r].validsize=4;
1799     live.state[r].dirtysize=size>live.state[r].dirtysize?size:live.state[r].dirtysize;
1800     live.state[r].validsize=size>live.state[r].validsize?size:live.state[r].validsize;
1801    
1802     live.nat[answer].locked++;
1803     live.nat[answer].touched=touchcnt++;
1804     if (size==4) {
1805     live.state[r].val=0;
1806     }
1807     else {
1808     Dif (live.state[r].val) {
1809     write_log("Problem with val\n");
1810     abort();
1811     }
1812     }
1813     set_status(r,DIRTY);
1814     return answer;
1815     }
1816    
1817     static int writereg(int r, int size)
1818     {
1819     return writereg_general(r,size,-1);
1820     }
1821    
1822     static int writereg_specific(int r, int size, int spec)
1823     {
1824     return writereg_general(r,size,spec);
1825     }
1826    
1827     static __inline__ int rmw_general(int r, int wsize, int rsize, int spec)
1828     {
1829     int n;
1830     int answer=-1;
1831    
1832     if (live.state[r].status==UNDEF) {
1833     write_log("WARNING: Unexpected read of undefined register %d\n",r);
1834     }
1835     remove_offset(r,spec);
1836     make_exclusive(r,0,spec);
1837    
1838     Dif (wsize<rsize) {
1839     write_log("Cannot handle wsize<rsize in rmw_general()\n");
1840     abort();
1841     }
1842     if (isinreg(r) && live.state[r].validsize>=rsize) {
1843     n=live.state[r].realreg;
1844     Dif (live.nat[n].nholds!=1)
1845     abort();
1846    
1847     switch(rsize) {
1848     case 1:
1849     if (live.nat[n].canbyte || spec>=0) {
1850     answer=n;
1851     }
1852     break;
1853     case 2:
1854     if (live.nat[n].canword || spec>=0) {
1855     answer=n;
1856     }
1857     break;
1858     case 4:
1859     answer=n;
1860     break;
1861     default: abort();
1862     }
1863     if (answer<0)
1864     evict(r);
1865     }
1866     /* either the value was in memory to start with, or it was evicted and
1867     is in memory now */
1868     if (answer<0) {
1869     answer=alloc_reg_hinted(r,spec>=0?4:rsize,0,spec);
1870     }
1871    
1872     if (spec>=0 && spec!=answer) {
1873     /* Too bad */
1874     mov_nregs(spec,answer);
1875     answer=spec;
1876     }
1877     if (wsize>live.state[r].dirtysize)
1878     live.state[r].dirtysize=wsize;
1879     if (wsize>live.state[r].validsize)
1880     live.state[r].validsize=wsize;
1881     set_status(r,DIRTY);
1882    
1883     live.nat[answer].locked++;
1884     live.nat[answer].touched=touchcnt++;
1885    
1886     Dif (live.state[r].val) {
1887     write_log("Problem with val(rmw)\n");
1888     abort();
1889     }
1890     return answer;
1891     }
1892    
1893     static int rmw(int r, int wsize, int rsize)
1894     {
1895     return rmw_general(r,wsize,rsize,-1);
1896     }
1897    
1898     static int rmw_specific(int r, int wsize, int rsize, int spec)
1899     {
1900     return rmw_general(r,wsize,rsize,spec);
1901     }
1902    
1903    
1904     /* needed for restoring the carry flag on non-P6 cores */
1905     static void bt_l_ri_noclobber(R4 r, IMM i)
1906     {
1907     int size=4;
1908     if (i<16)
1909     size=2;
1910     r=readreg(r,size);
1911     raw_bt_l_ri(r,i);
1912     unlock2(r);
1913     }
1914    
1915     /********************************************************************
1916     * FPU register status handling. EMIT TIME! *
1917     ********************************************************************/
1918    
1919     static void f_tomem(int r)
1920     {
1921     if (live.fate[r].status==DIRTY) {
1922     #if USE_LONG_DOUBLE
1923 gbeauche 1.24 raw_fmov_ext_mr((uintptr)live.fate[r].mem,live.fate[r].realreg);
1924 gbeauche 1.1 #else
1925 gbeauche 1.24 raw_fmov_mr((uintptr)live.fate[r].mem,live.fate[r].realreg);
1926 gbeauche 1.1 #endif
1927     live.fate[r].status=CLEAN;
1928     }
1929     }
1930    
1931     static void f_tomem_drop(int r)
1932     {
1933     if (live.fate[r].status==DIRTY) {
1934     #if USE_LONG_DOUBLE
1935 gbeauche 1.24 raw_fmov_ext_mr_drop((uintptr)live.fate[r].mem,live.fate[r].realreg);
1936 gbeauche 1.1 #else
1937 gbeauche 1.24 raw_fmov_mr_drop((uintptr)live.fate[r].mem,live.fate[r].realreg);
1938 gbeauche 1.1 #endif
1939     live.fate[r].status=INMEM;
1940     }
1941     }
1942    
1943    
1944     static __inline__ int f_isinreg(int r)
1945     {
1946     return live.fate[r].status==CLEAN || live.fate[r].status==DIRTY;
1947     }
1948    
1949     static void f_evict(int r)
1950     {
1951     int rr;
1952    
1953     if (!f_isinreg(r))
1954     return;
1955     rr=live.fate[r].realreg;
1956     if (live.fat[rr].nholds==1)
1957     f_tomem_drop(r);
1958     else
1959     f_tomem(r);
1960    
1961     Dif (live.fat[rr].locked &&
1962     live.fat[rr].nholds==1) {
1963     write_log("FPU register %d in nreg %d is locked!\n",r,live.fate[r].realreg);
1964     abort();
1965     }
1966    
1967     live.fat[rr].nholds--;
1968     if (live.fat[rr].nholds!=live.fate[r].realind) { /* Was not last */
1969     int topreg=live.fat[rr].holds[live.fat[rr].nholds];
1970     int thisind=live.fate[r].realind;
1971     live.fat[rr].holds[thisind]=topreg;
1972     live.fate[topreg].realind=thisind;
1973     }
1974     live.fate[r].status=INMEM;
1975     live.fate[r].realreg=-1;
1976     }
1977    
1978     static __inline__ void f_free_nreg(int r)
1979     {
1980     int i=live.fat[r].nholds;
1981    
1982     while (i) {
1983     int vr;
1984    
1985     --i;
1986     vr=live.fat[r].holds[i];
1987     f_evict(vr);
1988     }
1989     Dif (live.fat[r].nholds!=0) {
1990     write_log("Failed to free nreg %d, nholds is %d\n",r,live.fat[r].nholds);
1991     abort();
1992     }
1993     }
1994    
1995    
1996     /* Use with care! */
1997     static __inline__ void f_isclean(int r)
1998     {
1999     if (!f_isinreg(r))
2000     return;
2001     live.fate[r].status=CLEAN;
2002     }
2003    
2004     static __inline__ void f_disassociate(int r)
2005     {
2006     f_isclean(r);
2007     f_evict(r);
2008     }
2009    
2010    
2011    
2012     static int f_alloc_reg(int r, int willclobber)
2013     {
2014     int bestreg;
2015     uae_s32 when;
2016     int i;
2017     uae_s32 badness;
2018     bestreg=-1;
2019     when=2000000000;
2020     for (i=N_FREGS;i--;) {
2021     badness=live.fat[i].touched;
2022     if (live.fat[i].nholds==0)
2023     badness=0;
2024    
2025     if (!live.fat[i].locked && badness<when) {
2026     bestreg=i;
2027     when=badness;
2028     if (live.fat[i].nholds==0)
2029     break;
2030     }
2031     }
2032     Dif (bestreg==-1)
2033     abort();
2034    
2035     if (live.fat[bestreg].nholds>0) {
2036     f_free_nreg(bestreg);
2037     }
2038     if (f_isinreg(r)) {
2039     f_evict(r);
2040     }
2041    
2042     if (!willclobber) {
2043     if (live.fate[r].status!=UNDEF) {
2044     #if USE_LONG_DOUBLE
2045 gbeauche 1.24 raw_fmov_ext_rm(bestreg,(uintptr)live.fate[r].mem);
2046 gbeauche 1.1 #else
2047 gbeauche 1.24 raw_fmov_rm(bestreg,(uintptr)live.fate[r].mem);
2048 gbeauche 1.1 #endif
2049     }
2050     live.fate[r].status=CLEAN;
2051     }
2052     else {
2053     live.fate[r].status=DIRTY;
2054     }
2055     live.fate[r].realreg=bestreg;
2056     live.fate[r].realind=live.fat[bestreg].nholds;
2057     live.fat[bestreg].touched=touchcnt++;
2058     live.fat[bestreg].holds[live.fat[bestreg].nholds]=r;
2059     live.fat[bestreg].nholds++;
2060    
2061     return bestreg;
2062     }
2063    
2064     static void f_unlock(int r)
2065     {
2066     Dif (!live.fat[r].locked)
2067     abort();
2068     live.fat[r].locked--;
2069     }
2070    
2071     static void f_setlock(int r)
2072     {
2073     live.fat[r].locked++;
2074     }
2075    
2076     static __inline__ int f_readreg(int r)
2077     {
2078     int n;
2079     int answer=-1;
2080    
2081     if (f_isinreg(r)) {
2082     n=live.fate[r].realreg;
2083     answer=n;
2084     }
2085     /* either the value was in memory to start with, or it was evicted and
2086     is in memory now */
2087     if (answer<0)
2088     answer=f_alloc_reg(r,0);
2089    
2090     live.fat[answer].locked++;
2091     live.fat[answer].touched=touchcnt++;
2092     return answer;
2093     }
2094    
2095     static __inline__ void f_make_exclusive(int r, int clobber)
2096     {
2097     freg_status oldstate;
2098     int rr=live.fate[r].realreg;
2099     int nr;
2100     int nind;
2101     int ndirt=0;
2102     int i;
2103    
2104     if (!f_isinreg(r))
2105     return;
2106     if (live.fat[rr].nholds==1)
2107     return;
2108     for (i=0;i<live.fat[rr].nholds;i++) {
2109     int vr=live.fat[rr].holds[i];
2110     if (vr!=r && live.fate[vr].status==DIRTY)
2111     ndirt++;
2112     }
2113     if (!ndirt && !live.fat[rr].locked) {
2114     /* Everything else is clean, so let's keep this register */
2115     for (i=0;i<live.fat[rr].nholds;i++) {
2116     int vr=live.fat[rr].holds[i];
2117     if (vr!=r) {
2118     f_evict(vr);
2119     i--; /* Try that index again! */
2120     }
2121     }
2122     Dif (live.fat[rr].nholds!=1) {
2123     write_log("realreg %d holds %d (",rr,live.fat[rr].nholds);
2124     for (i=0;i<live.fat[rr].nholds;i++) {
2125     write_log(" %d(%d,%d)",live.fat[rr].holds[i],
2126     live.fate[live.fat[rr].holds[i]].realreg,
2127     live.fate[live.fat[rr].holds[i]].realind);
2128     }
2129     write_log("\n");
2130     abort();
2131     }
2132     return;
2133     }
2134    
2135     /* We have to split the register */
2136     oldstate=live.fate[r];
2137    
2138     f_setlock(rr); /* Make sure this doesn't go away */
2139     /* Forget about r being in the register rr */
2140     f_disassociate(r);
2141     /* Get a new register, that we will clobber completely */
2142     nr=f_alloc_reg(r,1);
2143     nind=live.fate[r].realind;
2144     if (!clobber)
2145     raw_fmov_rr(nr,rr); /* Make another copy */
2146     live.fate[r]=oldstate; /* Keep all the old state info */
2147     live.fate[r].realreg=nr;
2148     live.fate[r].realind=nind;
2149     f_unlock(rr);
2150     }
2151    
2152    
2153     static __inline__ int f_writereg(int r)
2154     {
2155     int n;
2156     int answer=-1;
2157    
2158     f_make_exclusive(r,1);
2159     if (f_isinreg(r)) {
2160     n=live.fate[r].realreg;
2161     answer=n;
2162     }
2163     if (answer<0) {
2164     answer=f_alloc_reg(r,1);
2165     }
2166     live.fate[r].status=DIRTY;
2167     live.fat[answer].locked++;
2168     live.fat[answer].touched=touchcnt++;
2169     return answer;
2170     }
2171    
2172     static int f_rmw(int r)
2173     {
2174     int n;
2175    
2176     f_make_exclusive(r,0);
2177     if (f_isinreg(r)) {
2178     n=live.fate[r].realreg;
2179     }
2180     else
2181     n=f_alloc_reg(r,0);
2182     live.fate[r].status=DIRTY;
2183     live.fat[n].locked++;
2184     live.fat[n].touched=touchcnt++;
2185     return n;
2186     }
2187    
2188     static void fflags_into_flags_internal(uae_u32 tmp)
2189     {
2190     int r;
2191    
2192     clobber_flags();
2193     r=f_readreg(FP_RESULT);
2194     if (FFLAG_NREG_CLOBBER_CONDITION) {
2195     int tmp2=tmp;
2196     tmp=writereg_specific(tmp,4,FFLAG_NREG);
2197     raw_fflags_into_flags(r);
2198     unlock2(tmp);
2199     forget_about(tmp2);
2200     }
2201     else
2202     raw_fflags_into_flags(r);
2203     f_unlock(r);
2204 gbeauche 1.19 live_flags();
2205 gbeauche 1.1 }
2206    
2207    
2208    
2209    
2210     /********************************************************************
2211     * CPU functions exposed to gencomp. Both CREATE and EMIT time *
2212     ********************************************************************/
2213    
2214     /*
2215     * RULES FOR HANDLING REGISTERS:
2216     *
2217     * * In the function headers, order the parameters
2218     * - 1st registers written to
2219     * - 2nd read/modify/write registers
2220     * - 3rd registers read from
2221     * * Before calling raw_*, you must call readreg, writereg or rmw for
2222     * each register
2223     * * The order for this is
2224     * - 1st call remove_offset for all registers written to with size<4
2225     * - 2nd call readreg for all registers read without offset
2226     * - 3rd call rmw for all rmw registers
2227     * - 4th call readreg_offset for all registers that can handle offsets
2228     * - 5th call get_offset for all the registers from the previous step
2229     * - 6th call writereg for all written-to registers
2230     * - 7th call raw_*
2231     * - 8th unlock2 all registers that were locked
2232     */
2233    
2234     MIDFUNC(0,live_flags,(void))
2235     {
2236     live.flags_on_stack=TRASH;
2237     live.flags_in_flags=VALID;
2238     live.flags_are_important=1;
2239     }
2240     MENDFUNC(0,live_flags,(void))
2241    
2242     MIDFUNC(0,dont_care_flags,(void))
2243     {
2244     live.flags_are_important=0;
2245     }
2246     MENDFUNC(0,dont_care_flags,(void))
2247    
2248    
2249     MIDFUNC(0,duplicate_carry,(void))
2250     {
2251     evict(FLAGX);
2252     make_flags_live_internal();
2253 gbeauche 1.24 COMPCALL(setcc_m)((uintptr)live.state[FLAGX].mem,2);
2254 gbeauche 1.1 log_vwrite(FLAGX);
2255     }
2256     MENDFUNC(0,duplicate_carry,(void))
2257    
2258     MIDFUNC(0,restore_carry,(void))
2259     {
2260     if (!have_rat_stall) { /* Not a P6 core, i.e. no partial stalls */
2261     bt_l_ri_noclobber(FLAGX,0);
2262     }
2263     else { /* Avoid the stall the above creates.
2264     This is slow on non-P6, though.
2265     */
2266     COMPCALL(rol_b_ri(FLAGX,8));
2267     isclean(FLAGX);
2268     }
2269     }
2270     MENDFUNC(0,restore_carry,(void))
2271    
2272     MIDFUNC(0,start_needflags,(void))
2273     {
2274     needflags=1;
2275     }
2276     MENDFUNC(0,start_needflags,(void))
2277    
2278     MIDFUNC(0,end_needflags,(void))
2279     {
2280     needflags=0;
2281     }
2282     MENDFUNC(0,end_needflags,(void))
2283    
2284     MIDFUNC(0,make_flags_live,(void))
2285     {
2286     make_flags_live_internal();
2287     }
2288     MENDFUNC(0,make_flags_live,(void))
2289    
2290     MIDFUNC(1,fflags_into_flags,(W2 tmp))
2291     {
2292     clobber_flags();
2293     fflags_into_flags_internal(tmp);
2294     }
2295     MENDFUNC(1,fflags_into_flags,(W2 tmp))
2296    
2297    
2298     MIDFUNC(2,bt_l_ri,(R4 r, IMM i)) /* This is defined as only affecting C */
2299     {
2300     int size=4;
2301     if (i<16)
2302     size=2;
2303     CLOBBER_BT;
2304     r=readreg(r,size);
2305     raw_bt_l_ri(r,i);
2306     unlock2(r);
2307     }
2308     MENDFUNC(2,bt_l_ri,(R4 r, IMM i)) /* This is defined as only affecting C */
2309    
2310     MIDFUNC(2,bt_l_rr,(R4 r, R4 b)) /* This is defined as only affecting C */
2311     {
2312     CLOBBER_BT;
2313     r=readreg(r,4);
2314     b=readreg(b,4);
2315     raw_bt_l_rr(r,b);
2316     unlock2(r);
2317     unlock2(b);
2318     }
2319     MENDFUNC(2,bt_l_rr,(R4 r, R4 b)) /* This is defined as only affecting C */
2320    
2321     MIDFUNC(2,btc_l_ri,(RW4 r, IMM i))
2322     {
2323     int size=4;
2324     if (i<16)
2325     size=2;
2326     CLOBBER_BT;
2327     r=rmw(r,size,size);
2328     raw_btc_l_ri(r,i);
2329     unlock2(r);
2330     }
2331     MENDFUNC(2,btc_l_ri,(RW4 r, IMM i))
2332    
2333     MIDFUNC(2,btc_l_rr,(RW4 r, R4 b))
2334     {
2335     CLOBBER_BT;
2336     b=readreg(b,4);
2337     r=rmw(r,4,4);
2338     raw_btc_l_rr(r,b);
2339     unlock2(r);
2340     unlock2(b);
2341     }
2342     MENDFUNC(2,btc_l_rr,(RW4 r, R4 b))
2343    
2344    
2345     MIDFUNC(2,btr_l_ri,(RW4 r, IMM i))
2346     {
2347     int size=4;
2348     if (i<16)
2349     size=2;
2350     CLOBBER_BT;
2351     r=rmw(r,size,size);
2352     raw_btr_l_ri(r,i);
2353     unlock2(r);
2354     }
2355     MENDFUNC(2,btr_l_ri,(RW4 r, IMM i))
2356    
2357     MIDFUNC(2,btr_l_rr,(RW4 r, R4 b))
2358     {
2359     CLOBBER_BT;
2360     b=readreg(b,4);
2361     r=rmw(r,4,4);
2362     raw_btr_l_rr(r,b);
2363     unlock2(r);
2364     unlock2(b);
2365     }
2366     MENDFUNC(2,btr_l_rr,(RW4 r, R4 b))
2367    
2368    
2369     MIDFUNC(2,bts_l_ri,(RW4 r, IMM i))
2370     {
2371     int size=4;
2372     if (i<16)
2373     size=2;
2374     CLOBBER_BT;
2375     r=rmw(r,size,size);
2376     raw_bts_l_ri(r,i);
2377     unlock2(r);
2378     }
2379     MENDFUNC(2,bts_l_ri,(RW4 r, IMM i))
2380    
2381     MIDFUNC(2,bts_l_rr,(RW4 r, R4 b))
2382     {
2383     CLOBBER_BT;
2384     b=readreg(b,4);
2385     r=rmw(r,4,4);
2386     raw_bts_l_rr(r,b);
2387     unlock2(r);
2388     unlock2(b);
2389     }
2390     MENDFUNC(2,bts_l_rr,(RW4 r, R4 b))
2391    
2392     MIDFUNC(2,mov_l_rm,(W4 d, IMM s))
2393     {
2394     CLOBBER_MOV;
2395     d=writereg(d,4);
2396     raw_mov_l_rm(d,s);
2397     unlock2(d);
2398     }
2399     MENDFUNC(2,mov_l_rm,(W4 d, IMM s))
2400    
2401    
2402     MIDFUNC(1,call_r,(R4 r)) /* Clobbering is implicit */
2403     {
2404     r=readreg(r,4);
2405     raw_call_r(r);
2406     unlock2(r);
2407     }
2408     MENDFUNC(1,call_r,(R4 r)) /* Clobbering is implicit */
2409    
2410     MIDFUNC(2,sub_l_mi,(IMM d, IMM s))
2411     {
2412     CLOBBER_SUB;
2413     raw_sub_l_mi(d,s) ;
2414     }
2415     MENDFUNC(2,sub_l_mi,(IMM d, IMM s))
2416    
2417     MIDFUNC(2,mov_l_mi,(IMM d, IMM s))
2418     {
2419     CLOBBER_MOV;
2420     raw_mov_l_mi(d,s) ;
2421     }
2422     MENDFUNC(2,mov_l_mi,(IMM d, IMM s))
2423    
2424     MIDFUNC(2,mov_w_mi,(IMM d, IMM s))
2425     {
2426     CLOBBER_MOV;
2427     raw_mov_w_mi(d,s) ;
2428     }
2429     MENDFUNC(2,mov_w_mi,(IMM d, IMM s))
2430    
2431     MIDFUNC(2,mov_b_mi,(IMM d, IMM s))
2432     {
2433     CLOBBER_MOV;
2434     raw_mov_b_mi(d,s) ;
2435     }
2436     MENDFUNC(2,mov_b_mi,(IMM d, IMM s))
2437    
2438     MIDFUNC(2,rol_b_ri,(RW1 r, IMM i))
2439     {
2440     if (!i && !needflags)
2441     return;
2442     CLOBBER_ROL;
2443     r=rmw(r,1,1);
2444     raw_rol_b_ri(r,i);
2445     unlock2(r);
2446     }
2447     MENDFUNC(2,rol_b_ri,(RW1 r, IMM i))
2448    
2449     MIDFUNC(2,rol_w_ri,(RW2 r, IMM i))
2450     {
2451     if (!i && !needflags)
2452     return;
2453     CLOBBER_ROL;
2454     r=rmw(r,2,2);
2455     raw_rol_w_ri(r,i);
2456     unlock2(r);
2457     }
2458     MENDFUNC(2,rol_w_ri,(RW2 r, IMM i))
2459    
2460     MIDFUNC(2,rol_l_ri,(RW4 r, IMM i))
2461     {
2462     if (!i && !needflags)
2463     return;
2464     CLOBBER_ROL;
2465     r=rmw(r,4,4);
2466     raw_rol_l_ri(r,i);
2467     unlock2(r);
2468     }
2469     MENDFUNC(2,rol_l_ri,(RW4 r, IMM i))
2470    
2471     MIDFUNC(2,rol_l_rr,(RW4 d, R1 r))
2472     {
2473     if (isconst(r)) {
2474     COMPCALL(rol_l_ri)(d,(uae_u8)live.state[r].val);
2475     return;
2476     }
2477     CLOBBER_ROL;
2478     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2479     d=rmw(d,4,4);
2480     Dif (r!=1) {
2481     write_log("Illegal register %d in raw_rol_b\n",r);
2482     abort();
2483     }
2484     raw_rol_l_rr(d,r) ;
2485     unlock2(r);
2486     unlock2(d);
2487     }
2488     MENDFUNC(2,rol_l_rr,(RW4 d, R1 r))
2489    
2490     MIDFUNC(2,rol_w_rr,(RW2 d, R1 r))
2491     { /* Can only do this with r==1, i.e. cl */
2492    
2493     if (isconst(r)) {
2494     COMPCALL(rol_w_ri)(d,(uae_u8)live.state[r].val);
2495     return;
2496     }
2497     CLOBBER_ROL;
2498     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2499     d=rmw(d,2,2);
2500     Dif (r!=1) {
2501     write_log("Illegal register %d in raw_rol_b\n",r);
2502     abort();
2503     }
2504     raw_rol_w_rr(d,r) ;
2505     unlock2(r);
2506     unlock2(d);
2507     }
2508     MENDFUNC(2,rol_w_rr,(RW2 d, R1 r))
2509    
2510     MIDFUNC(2,rol_b_rr,(RW1 d, R1 r))
2511     { /* Can only do this with r==1, i.e. cl */
2512    
2513     if (isconst(r)) {
2514     COMPCALL(rol_b_ri)(d,(uae_u8)live.state[r].val);
2515     return;
2516     }
2517    
2518     CLOBBER_ROL;
2519     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2520     d=rmw(d,1,1);
2521     Dif (r!=1) {
2522     write_log("Illegal register %d in raw_rol_b\n",r);
2523     abort();
2524     }
2525     raw_rol_b_rr(d,r) ;
2526     unlock2(r);
2527     unlock2(d);
2528     }
2529     MENDFUNC(2,rol_b_rr,(RW1 d, R1 r))
2530    
2531    
2532     MIDFUNC(2,shll_l_rr,(RW4 d, R1 r))
2533     {
2534     if (isconst(r)) {
2535     COMPCALL(shll_l_ri)(d,(uae_u8)live.state[r].val);
2536     return;
2537     }
2538     CLOBBER_SHLL;
2539     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2540     d=rmw(d,4,4);
2541     Dif (r!=1) {
2542     write_log("Illegal register %d in raw_rol_b\n",r);
2543     abort();
2544     }
2545     raw_shll_l_rr(d,r) ;
2546     unlock2(r);
2547     unlock2(d);
2548     }
2549     MENDFUNC(2,shll_l_rr,(RW4 d, R1 r))
2550    
2551     MIDFUNC(2,shll_w_rr,(RW2 d, R1 r))
2552     { /* Can only do this with r==1, i.e. cl */
2553    
2554     if (isconst(r)) {
2555     COMPCALL(shll_w_ri)(d,(uae_u8)live.state[r].val);
2556     return;
2557     }
2558     CLOBBER_SHLL;
2559     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2560     d=rmw(d,2,2);
2561     Dif (r!=1) {
2562     write_log("Illegal register %d in raw_shll_b\n",r);
2563     abort();
2564     }
2565     raw_shll_w_rr(d,r) ;
2566     unlock2(r);
2567     unlock2(d);
2568     }
2569     MENDFUNC(2,shll_w_rr,(RW2 d, R1 r))
2570    
2571     MIDFUNC(2,shll_b_rr,(RW1 d, R1 r))
2572     { /* Can only do this with r==1, i.e. cl */
2573    
2574     if (isconst(r)) {
2575     COMPCALL(shll_b_ri)(d,(uae_u8)live.state[r].val);
2576     return;
2577     }
2578    
2579     CLOBBER_SHLL;
2580     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2581     d=rmw(d,1,1);
2582     Dif (r!=1) {
2583     write_log("Illegal register %d in raw_shll_b\n",r);
2584     abort();
2585     }
2586     raw_shll_b_rr(d,r) ;
2587     unlock2(r);
2588     unlock2(d);
2589     }
2590     MENDFUNC(2,shll_b_rr,(RW1 d, R1 r))
2591    
2592    
2593     MIDFUNC(2,ror_b_ri,(R1 r, IMM i))
2594     {
2595     if (!i && !needflags)
2596     return;
2597     CLOBBER_ROR;
2598     r=rmw(r,1,1);
2599     raw_ror_b_ri(r,i);
2600     unlock2(r);
2601     }
2602     MENDFUNC(2,ror_b_ri,(R1 r, IMM i))
2603    
2604     MIDFUNC(2,ror_w_ri,(R2 r, IMM i))
2605     {
2606     if (!i && !needflags)
2607     return;
2608     CLOBBER_ROR;
2609     r=rmw(r,2,2);
2610     raw_ror_w_ri(r,i);
2611     unlock2(r);
2612     }
2613     MENDFUNC(2,ror_w_ri,(R2 r, IMM i))
2614    
2615     MIDFUNC(2,ror_l_ri,(R4 r, IMM i))
2616     {
2617     if (!i && !needflags)
2618     return;
2619     CLOBBER_ROR;
2620     r=rmw(r,4,4);
2621     raw_ror_l_ri(r,i);
2622     unlock2(r);
2623     }
2624     MENDFUNC(2,ror_l_ri,(R4 r, IMM i))
2625    
2626     MIDFUNC(2,ror_l_rr,(R4 d, R1 r))
2627     {
2628     if (isconst(r)) {
2629     COMPCALL(ror_l_ri)(d,(uae_u8)live.state[r].val);
2630     return;
2631     }
2632     CLOBBER_ROR;
2633     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2634     d=rmw(d,4,4);
2635     raw_ror_l_rr(d,r) ;
2636     unlock2(r);
2637     unlock2(d);
2638     }
2639     MENDFUNC(2,ror_l_rr,(R4 d, R1 r))
2640    
2641     MIDFUNC(2,ror_w_rr,(R2 d, R1 r))
2642     {
2643     if (isconst(r)) {
2644     COMPCALL(ror_w_ri)(d,(uae_u8)live.state[r].val);
2645     return;
2646     }
2647     CLOBBER_ROR;
2648     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2649     d=rmw(d,2,2);
2650     raw_ror_w_rr(d,r) ;
2651     unlock2(r);
2652     unlock2(d);
2653     }
2654     MENDFUNC(2,ror_w_rr,(R2 d, R1 r))
2655    
2656     MIDFUNC(2,ror_b_rr,(R1 d, R1 r))
2657     {
2658     if (isconst(r)) {
2659     COMPCALL(ror_b_ri)(d,(uae_u8)live.state[r].val);
2660     return;
2661     }
2662    
2663     CLOBBER_ROR;
2664     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2665     d=rmw(d,1,1);
2666     raw_ror_b_rr(d,r) ;
2667     unlock2(r);
2668     unlock2(d);
2669     }
2670     MENDFUNC(2,ror_b_rr,(R1 d, R1 r))
2671    
2672     MIDFUNC(2,shrl_l_rr,(RW4 d, R1 r))
2673     {
2674     if (isconst(r)) {
2675     COMPCALL(shrl_l_ri)(d,(uae_u8)live.state[r].val);
2676     return;
2677     }
2678     CLOBBER_SHRL;
2679     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2680     d=rmw(d,4,4);
2681     Dif (r!=1) {
2682     write_log("Illegal register %d in raw_rol_b\n",r);
2683     abort();
2684     }
2685     raw_shrl_l_rr(d,r) ;
2686     unlock2(r);
2687     unlock2(d);
2688     }
2689     MENDFUNC(2,shrl_l_rr,(RW4 d, R1 r))
2690    
2691     MIDFUNC(2,shrl_w_rr,(RW2 d, R1 r))
2692     { /* Can only do this with r==1, i.e. cl */
2693    
2694     if (isconst(r)) {
2695     COMPCALL(shrl_w_ri)(d,(uae_u8)live.state[r].val);
2696     return;
2697     }
2698     CLOBBER_SHRL;
2699     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2700     d=rmw(d,2,2);
2701     Dif (r!=1) {
2702     write_log("Illegal register %d in raw_shrl_b\n",r);
2703     abort();
2704     }
2705     raw_shrl_w_rr(d,r) ;
2706     unlock2(r);
2707     unlock2(d);
2708     }
2709     MENDFUNC(2,shrl_w_rr,(RW2 d, R1 r))
2710    
2711     MIDFUNC(2,shrl_b_rr,(RW1 d, R1 r))
2712     { /* Can only do this with r==1, i.e. cl */
2713    
2714     if (isconst(r)) {
2715     COMPCALL(shrl_b_ri)(d,(uae_u8)live.state[r].val);
2716     return;
2717     }
2718    
2719     CLOBBER_SHRL;
2720     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2721     d=rmw(d,1,1);
2722     Dif (r!=1) {
2723     write_log("Illegal register %d in raw_shrl_b\n",r);
2724     abort();
2725     }
2726     raw_shrl_b_rr(d,r) ;
2727     unlock2(r);
2728     unlock2(d);
2729     }
2730     MENDFUNC(2,shrl_b_rr,(RW1 d, R1 r))
2731    
2732    
2733    
2734     MIDFUNC(2,shll_l_ri,(RW4 r, IMM i))
2735     {
2736     if (!i && !needflags)
2737     return;
2738     if (isconst(r) && !needflags) {
2739     live.state[r].val<<=i;
2740     return;
2741     }
2742     CLOBBER_SHLL;
2743     r=rmw(r,4,4);
2744     raw_shll_l_ri(r,i);
2745     unlock2(r);
2746     }
2747     MENDFUNC(2,shll_l_ri,(RW4 r, IMM i))
2748    
2749     MIDFUNC(2,shll_w_ri,(RW2 r, IMM i))
2750     {
2751     if (!i && !needflags)
2752     return;
2753     CLOBBER_SHLL;
2754     r=rmw(r,2,2);
2755     raw_shll_w_ri(r,i);
2756     unlock2(r);
2757     }
2758     MENDFUNC(2,shll_w_ri,(RW2 r, IMM i))
2759    
2760     MIDFUNC(2,shll_b_ri,(RW1 r, IMM i))
2761     {
2762     if (!i && !needflags)
2763     return;
2764     CLOBBER_SHLL;
2765     r=rmw(r,1,1);
2766     raw_shll_b_ri(r,i);
2767     unlock2(r);
2768     }
2769     MENDFUNC(2,shll_b_ri,(RW1 r, IMM i))
2770    
2771     MIDFUNC(2,shrl_l_ri,(RW4 r, IMM i))
2772     {
2773     if (!i && !needflags)
2774     return;
2775     if (isconst(r) && !needflags) {
2776     live.state[r].val>>=i;
2777     return;
2778     }
2779     CLOBBER_SHRL;
2780     r=rmw(r,4,4);
2781     raw_shrl_l_ri(r,i);
2782     unlock2(r);
2783     }
2784     MENDFUNC(2,shrl_l_ri,(RW4 r, IMM i))
2785    
2786     MIDFUNC(2,shrl_w_ri,(RW2 r, IMM i))
2787     {
2788     if (!i && !needflags)
2789     return;
2790     CLOBBER_SHRL;
2791     r=rmw(r,2,2);
2792     raw_shrl_w_ri(r,i);
2793     unlock2(r);
2794     }
2795     MENDFUNC(2,shrl_w_ri,(RW2 r, IMM i))
2796    
2797     MIDFUNC(2,shrl_b_ri,(RW1 r, IMM i))
2798     {
2799     if (!i && !needflags)
2800     return;
2801     CLOBBER_SHRL;
2802     r=rmw(r,1,1);
2803     raw_shrl_b_ri(r,i);
2804     unlock2(r);
2805     }
2806     MENDFUNC(2,shrl_b_ri,(RW1 r, IMM i))
2807    
2808     MIDFUNC(2,shra_l_ri,(RW4 r, IMM i))
2809     {
2810     if (!i && !needflags)
2811     return;
2812     CLOBBER_SHRA;
2813     r=rmw(r,4,4);
2814     raw_shra_l_ri(r,i);
2815     unlock2(r);
2816     }
2817     MENDFUNC(2,shra_l_ri,(RW4 r, IMM i))
2818    
2819     MIDFUNC(2,shra_w_ri,(RW2 r, IMM i))
2820     {
2821     if (!i && !needflags)
2822     return;
2823     CLOBBER_SHRA;
2824     r=rmw(r,2,2);
2825     raw_shra_w_ri(r,i);
2826     unlock2(r);
2827     }
2828     MENDFUNC(2,shra_w_ri,(RW2 r, IMM i))
2829    
2830     MIDFUNC(2,shra_b_ri,(RW1 r, IMM i))
2831     {
2832     if (!i && !needflags)
2833     return;
2834     CLOBBER_SHRA;
2835     r=rmw(r,1,1);
2836     raw_shra_b_ri(r,i);
2837     unlock2(r);
2838     }
2839     MENDFUNC(2,shra_b_ri,(RW1 r, IMM i))
2840    
2841     MIDFUNC(2,shra_l_rr,(RW4 d, R1 r))
2842     {
2843     if (isconst(r)) {
2844     COMPCALL(shra_l_ri)(d,(uae_u8)live.state[r].val);
2845     return;
2846     }
2847     CLOBBER_SHRA;
2848     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2849     d=rmw(d,4,4);
2850     Dif (r!=1) {
2851     write_log("Illegal register %d in raw_rol_b\n",r);
2852     abort();
2853     }
2854     raw_shra_l_rr(d,r) ;
2855     unlock2(r);
2856     unlock2(d);
2857     }
2858     MENDFUNC(2,shra_l_rr,(RW4 d, R1 r))
2859    
2860     MIDFUNC(2,shra_w_rr,(RW2 d, R1 r))
2861     { /* Can only do this with r==1, i.e. cl */
2862    
2863     if (isconst(r)) {
2864     COMPCALL(shra_w_ri)(d,(uae_u8)live.state[r].val);
2865     return;
2866     }
2867     CLOBBER_SHRA;
2868     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2869     d=rmw(d,2,2);
2870     Dif (r!=1) {
2871     write_log("Illegal register %d in raw_shra_b\n",r);
2872     abort();
2873     }
2874     raw_shra_w_rr(d,r) ;
2875     unlock2(r);
2876     unlock2(d);
2877     }
2878     MENDFUNC(2,shra_w_rr,(RW2 d, R1 r))
2879    
2880     MIDFUNC(2,shra_b_rr,(RW1 d, R1 r))
2881     { /* Can only do this with r==1, i.e. cl */
2882    
2883     if (isconst(r)) {
2884     COMPCALL(shra_b_ri)(d,(uae_u8)live.state[r].val);
2885     return;
2886     }
2887    
2888     CLOBBER_SHRA;
2889     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2890     d=rmw(d,1,1);
2891     Dif (r!=1) {
2892     write_log("Illegal register %d in raw_shra_b\n",r);
2893     abort();
2894     }
2895     raw_shra_b_rr(d,r) ;
2896     unlock2(r);
2897     unlock2(d);
2898     }
2899     MENDFUNC(2,shra_b_rr,(RW1 d, R1 r))
2900    
2901    
2902     MIDFUNC(2,setcc,(W1 d, IMM cc))
2903     {
2904     CLOBBER_SETCC;
2905     d=writereg(d,1);
2906     raw_setcc(d,cc);
2907     unlock2(d);
2908     }
2909     MENDFUNC(2,setcc,(W1 d, IMM cc))
2910    
2911     MIDFUNC(2,setcc_m,(IMM d, IMM cc))
2912     {
2913     CLOBBER_SETCC;
2914     raw_setcc_m(d,cc);
2915     }
2916     MENDFUNC(2,setcc_m,(IMM d, IMM cc))
2917    
2918     MIDFUNC(3,cmov_l_rr,(RW4 d, R4 s, IMM cc))
2919     {
2920     if (d==s)
2921     return;
2922     CLOBBER_CMOV;
2923     s=readreg(s,4);
2924     d=rmw(d,4,4);
2925     raw_cmov_l_rr(d,s,cc);
2926     unlock2(s);
2927     unlock2(d);
2928     }
2929     MENDFUNC(3,cmov_l_rr,(RW4 d, R4 s, IMM cc))
2930    
2931     MIDFUNC(3,cmov_l_rm,(RW4 d, IMM s, IMM cc))
2932     {
2933     CLOBBER_CMOV;
2934     d=rmw(d,4,4);
2935     raw_cmov_l_rm(d,s,cc);
2936     unlock2(d);
2937     }
2938     MENDFUNC(3,cmov_l_rm,(RW4 d, IMM s, IMM cc))
2939    
2940 gbeauche 1.26 MIDFUNC(2,bsf_l_rr,(W4 d, W4 s))
2941     {
2942     CLOBBER_BSF;
2943     s = readreg(s, 4);
2944     d = writereg(d, 4);
2945     raw_bsf_l_rr(d, s);
2946     unlock2(s);
2947     unlock2(d);
2948     }
2949     MENDFUNC(2,bsf_l_rr,(W4 d, W4 s))
2950    
2951     /* Set the Z flag depending on the value in s. Note that the
2952     value has to be 0 or -1 (or, more precisely, for non-zero
2953     values, bit 14 must be set)! */
2954     MIDFUNC(2,simulate_bsf,(W4 tmp, RW4 s))
2955 gbeauche 1.1 {
2956 gbeauche 1.26 CLOBBER_BSF;
2957     s=rmw_specific(s,4,4,FLAG_NREG3);
2958     tmp=writereg(tmp,4);
2959     raw_flags_set_zero(s, tmp);
2960     unlock2(tmp);
2961     unlock2(s);
2962 gbeauche 1.1 }
2963 gbeauche 1.26 MENDFUNC(2,simulate_bsf,(W4 tmp, RW4 s))
2964 gbeauche 1.1
2965     MIDFUNC(2,imul_32_32,(RW4 d, R4 s))
2966     {
2967     CLOBBER_MUL;
2968     s=readreg(s,4);
2969     d=rmw(d,4,4);
2970     raw_imul_32_32(d,s);
2971     unlock2(s);
2972     unlock2(d);
2973     }
2974     MENDFUNC(2,imul_32_32,(RW4 d, R4 s))
2975    
2976     MIDFUNC(2,imul_64_32,(RW4 d, RW4 s))
2977     {
2978     CLOBBER_MUL;
2979     s=rmw_specific(s,4,4,MUL_NREG2);
2980     d=rmw_specific(d,4,4,MUL_NREG1);
2981     raw_imul_64_32(d,s);
2982     unlock2(s);
2983     unlock2(d);
2984     }
2985     MENDFUNC(2,imul_64_32,(RW4 d, RW4 s))
2986    
2987     MIDFUNC(2,mul_64_32,(RW4 d, RW4 s))
2988     {
2989     CLOBBER_MUL;
2990     s=rmw_specific(s,4,4,MUL_NREG2);
2991     d=rmw_specific(d,4,4,MUL_NREG1);
2992     raw_mul_64_32(d,s);
2993     unlock2(s);
2994     unlock2(d);
2995     }
2996     MENDFUNC(2,mul_64_32,(RW4 d, RW4 s))
2997    
2998     MIDFUNC(2,mul_32_32,(RW4 d, R4 s))
2999     {
3000     CLOBBER_MUL;
3001     s=readreg(s,4);
3002     d=rmw(d,4,4);
3003     raw_mul_32_32(d,s);
3004     unlock2(s);
3005     unlock2(d);
3006     }
3007     MENDFUNC(2,mul_32_32,(RW4 d, R4 s))
3008    
3009 gbeauche 1.24 #if SIZEOF_VOID_P == 8
3010     MIDFUNC(2,sign_extend_32_rr,(W4 d, R2 s))
3011     {
3012     int isrmw;
3013    
3014     if (isconst(s)) {
3015     set_const(d,(uae_s32)live.state[s].val);
3016     return;
3017     }
3018    
3019     CLOBBER_SE32;
3020     isrmw=(s==d);
3021     if (!isrmw) {
3022     s=readreg(s,4);
3023     d=writereg(d,4);
3024     }
3025     else { /* If we try to lock this twice, with different sizes, we
3026     are int trouble! */
3027     s=d=rmw(s,4,4);
3028     }
3029     raw_sign_extend_32_rr(d,s);
3030     if (!isrmw) {
3031     unlock2(d);
3032     unlock2(s);
3033     }
3034     else {
3035     unlock2(s);
3036     }
3037     }
3038     MENDFUNC(2,sign_extend_32_rr,(W4 d, R2 s))
3039     #endif
3040    
3041 gbeauche 1.1 MIDFUNC(2,sign_extend_16_rr,(W4 d, R2 s))
3042     {
3043     int isrmw;
3044    
3045     if (isconst(s)) {
3046     set_const(d,(uae_s32)(uae_s16)live.state[s].val);
3047     return;
3048     }
3049    
3050     CLOBBER_SE16;
3051     isrmw=(s==d);
3052     if (!isrmw) {
3053     s=readreg(s,2);
3054     d=writereg(d,4);
3055     }
3056     else { /* If we try to lock this twice, with different sizes, we
3057     are int trouble! */
3058     s=d=rmw(s,4,2);
3059     }
3060     raw_sign_extend_16_rr(d,s);
3061     if (!isrmw) {
3062     unlock2(d);
3063     unlock2(s);
3064     }
3065     else {
3066     unlock2(s);
3067     }
3068     }
3069     MENDFUNC(2,sign_extend_16_rr,(W4 d, R2 s))
3070    
3071     MIDFUNC(2,sign_extend_8_rr,(W4 d, R1 s))
3072     {
3073     int isrmw;
3074    
3075     if (isconst(s)) {
3076     set_const(d,(uae_s32)(uae_s8)live.state[s].val);
3077     return;
3078     }
3079    
3080     isrmw=(s==d);
3081     CLOBBER_SE8;
3082     if (!isrmw) {
3083     s=readreg(s,1);
3084     d=writereg(d,4);
3085     }
3086     else { /* If we try to lock this twice, with different sizes, we
3087     are int trouble! */
3088     s=d=rmw(s,4,1);
3089     }
3090    
3091     raw_sign_extend_8_rr(d,s);
3092    
3093     if (!isrmw) {
3094     unlock2(d);
3095     unlock2(s);
3096     }
3097     else {
3098     unlock2(s);
3099     }
3100     }
3101     MENDFUNC(2,sign_extend_8_rr,(W4 d, R1 s))
3102    
3103    
3104     MIDFUNC(2,zero_extend_16_rr,(W4 d, R2 s))
3105     {
3106     int isrmw;
3107    
3108     if (isconst(s)) {
3109     set_const(d,(uae_u32)(uae_u16)live.state[s].val);
3110     return;
3111     }
3112    
3113     isrmw=(s==d);
3114     CLOBBER_ZE16;
3115     if (!isrmw) {
3116     s=readreg(s,2);
3117     d=writereg(d,4);
3118     }
3119     else { /* If we try to lock this twice, with different sizes, we
3120     are int trouble! */
3121     s=d=rmw(s,4,2);
3122     }
3123     raw_zero_extend_16_rr(d,s);
3124     if (!isrmw) {
3125     unlock2(d);
3126     unlock2(s);
3127     }
3128     else {
3129     unlock2(s);
3130     }
3131     }
3132     MENDFUNC(2,zero_extend_16_rr,(W4 d, R2 s))
3133    
3134     MIDFUNC(2,zero_extend_8_rr,(W4 d, R1 s))
3135     {
3136     int isrmw;
3137     if (isconst(s)) {
3138     set_const(d,(uae_u32)(uae_u8)live.state[s].val);
3139     return;
3140     }
3141    
3142     isrmw=(s==d);
3143     CLOBBER_ZE8;
3144     if (!isrmw) {
3145     s=readreg(s,1);
3146     d=writereg(d,4);
3147     }
3148     else { /* If we try to lock this twice, with different sizes, we
3149     are int trouble! */
3150     s=d=rmw(s,4,1);
3151     }
3152    
3153     raw_zero_extend_8_rr(d,s);
3154    
3155     if (!isrmw) {
3156     unlock2(d);
3157     unlock2(s);
3158     }
3159     else {
3160     unlock2(s);
3161     }
3162     }
3163     MENDFUNC(2,zero_extend_8_rr,(W4 d, R1 s))
3164    
3165     MIDFUNC(2,mov_b_rr,(W1 d, R1 s))
3166     {
3167     if (d==s)
3168     return;
3169     if (isconst(s)) {
3170     COMPCALL(mov_b_ri)(d,(uae_u8)live.state[s].val);
3171     return;
3172     }
3173    
3174     CLOBBER_MOV;
3175     s=readreg(s,1);
3176     d=writereg(d,1);
3177     raw_mov_b_rr(d,s);
3178     unlock2(d);
3179     unlock2(s);
3180     }
3181     MENDFUNC(2,mov_b_rr,(W1 d, R1 s))
3182    
3183     MIDFUNC(2,mov_w_rr,(W2 d, R2 s))
3184     {
3185     if (d==s)
3186     return;
3187     if (isconst(s)) {
3188     COMPCALL(mov_w_ri)(d,(uae_u16)live.state[s].val);
3189     return;
3190     }
3191    
3192     CLOBBER_MOV;
3193     s=readreg(s,2);
3194     d=writereg(d,2);
3195     raw_mov_w_rr(d,s);
3196     unlock2(d);
3197     unlock2(s);
3198     }
3199     MENDFUNC(2,mov_w_rr,(W2 d, R2 s))
3200    
3201    
3202     MIDFUNC(4,mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
3203     {
3204     CLOBBER_MOV;
3205     baser=readreg(baser,4);
3206     index=readreg(index,4);
3207     d=writereg(d,4);
3208    
3209     raw_mov_l_rrm_indexed(d,baser,index,factor);
3210     unlock2(d);
3211     unlock2(baser);
3212     unlock2(index);
3213     }
3214     MENDFUNC(4,mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
3215    
3216     MIDFUNC(4,mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
3217     {
3218     CLOBBER_MOV;
3219     baser=readreg(baser,4);
3220     index=readreg(index,4);
3221     d=writereg(d,2);
3222    
3223     raw_mov_w_rrm_indexed(d,baser,index,factor);
3224     unlock2(d);
3225     unlock2(baser);
3226     unlock2(index);
3227     }
3228     MENDFUNC(4,mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
3229    
3230     MIDFUNC(4,mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
3231     {
3232     CLOBBER_MOV;
3233     baser=readreg(baser,4);
3234     index=readreg(index,4);
3235     d=writereg(d,1);
3236    
3237     raw_mov_b_rrm_indexed(d,baser,index,factor);
3238    
3239     unlock2(d);
3240     unlock2(baser);
3241     unlock2(index);
3242     }
3243     MENDFUNC(4,mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
3244    
3245    
3246     MIDFUNC(4,mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
3247     {
3248     CLOBBER_MOV;
3249     baser=readreg(baser,4);
3250     index=readreg(index,4);
3251     s=readreg(s,4);
3252    
3253     Dif (baser==s || index==s)
3254     abort();
3255    
3256    
3257     raw_mov_l_mrr_indexed(baser,index,factor,s);
3258     unlock2(s);
3259     unlock2(baser);
3260     unlock2(index);
3261     }
3262     MENDFUNC(4,mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
3263    
3264     MIDFUNC(4,mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
3265     {
3266     CLOBBER_MOV;
3267     baser=readreg(baser,4);
3268     index=readreg(index,4);
3269     s=readreg(s,2);
3270    
3271     raw_mov_w_mrr_indexed(baser,index,factor,s);
3272     unlock2(s);
3273     unlock2(baser);
3274     unlock2(index);
3275     }
3276     MENDFUNC(4,mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
3277    
3278     MIDFUNC(4,mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
3279     {
3280     CLOBBER_MOV;
3281     s=readreg(s,1);
3282     baser=readreg(baser,4);
3283     index=readreg(index,4);
3284    
3285     raw_mov_b_mrr_indexed(baser,index,factor,s);
3286     unlock2(s);
3287     unlock2(baser);
3288     unlock2(index);
3289     }
3290     MENDFUNC(4,mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
3291    
3292    
3293     MIDFUNC(5,mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
3294     {
3295     int basereg=baser;
3296     int indexreg=index;
3297    
3298     CLOBBER_MOV;
3299     s=readreg(s,4);
3300     baser=readreg_offset(baser,4);
3301     index=readreg_offset(index,4);
3302    
3303     base+=get_offset(basereg);
3304     base+=factor*get_offset(indexreg);
3305    
3306     raw_mov_l_bmrr_indexed(base,baser,index,factor,s);
3307     unlock2(s);
3308     unlock2(baser);
3309     unlock2(index);
3310     }
3311     MENDFUNC(5,mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
3312    
3313     MIDFUNC(5,mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
3314     {
3315     int basereg=baser;
3316     int indexreg=index;
3317    
3318     CLOBBER_MOV;
3319     s=readreg(s,2);
3320     baser=readreg_offset(baser,4);
3321     index=readreg_offset(index,4);
3322    
3323     base+=get_offset(basereg);
3324     base+=factor*get_offset(indexreg);
3325    
3326     raw_mov_w_bmrr_indexed(base,baser,index,factor,s);
3327     unlock2(s);
3328     unlock2(baser);
3329     unlock2(index);
3330     }
3331     MENDFUNC(5,mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
3332    
3333     MIDFUNC(5,mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
3334     {
3335     int basereg=baser;
3336     int indexreg=index;
3337    
3338     CLOBBER_MOV;
3339     s=readreg(s,1);
3340     baser=readreg_offset(baser,4);
3341     index=readreg_offset(index,4);
3342    
3343     base+=get_offset(basereg);
3344     base+=factor*get_offset(indexreg);
3345    
3346     raw_mov_b_bmrr_indexed(base,baser,index,factor,s);
3347     unlock2(s);
3348     unlock2(baser);
3349     unlock2(index);
3350     }
3351     MENDFUNC(5,mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
3352    
3353    
3354    
3355     /* Read a long from base+baser+factor*index */
3356     MIDFUNC(5,mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
3357     {
3358     int basereg=baser;
3359     int indexreg=index;
3360    
3361     CLOBBER_MOV;
3362     baser=readreg_offset(baser,4);
3363     index=readreg_offset(index,4);
3364     base+=get_offset(basereg);
3365     base+=factor*get_offset(indexreg);
3366     d=writereg(d,4);
3367     raw_mov_l_brrm_indexed(d,base,baser,index,factor);
3368     unlock2(d);
3369     unlock2(baser);
3370     unlock2(index);
3371     }
3372     MENDFUNC(5,mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
3373    
3374    
3375     MIDFUNC(5,mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
3376     {
3377     int basereg=baser;
3378     int indexreg=index;
3379    
3380     CLOBBER_MOV;
3381     remove_offset(d,-1);
3382     baser=readreg_offset(baser,4);
3383     index=readreg_offset(index,4);
3384     base+=get_offset(basereg);
3385     base+=factor*get_offset(indexreg);
3386     d=writereg(d,2);
3387     raw_mov_w_brrm_indexed(d,base,baser,index,factor);
3388     unlock2(d);
3389     unlock2(baser);
3390     unlock2(index);
3391     }
3392     MENDFUNC(5,mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
3393    
3394    
3395     MIDFUNC(5,mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
3396     {
3397     int basereg=baser;
3398     int indexreg=index;
3399    
3400     CLOBBER_MOV;
3401     remove_offset(d,-1);
3402     baser=readreg_offset(baser,4);
3403     index=readreg_offset(index,4);
3404     base+=get_offset(basereg);
3405     base+=factor*get_offset(indexreg);
3406     d=writereg(d,1);
3407     raw_mov_b_brrm_indexed(d,base,baser,index,factor);
3408     unlock2(d);
3409     unlock2(baser);
3410     unlock2(index);
3411     }
3412     MENDFUNC(5,mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
3413    
3414     /* Read a long from base+factor*index */
3415     MIDFUNC(4,mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
3416     {
3417     int indexreg=index;
3418    
3419     if (isconst(index)) {
3420     COMPCALL(mov_l_rm)(d,base+factor*live.state[index].val);
3421     return;
3422     }
3423    
3424     CLOBBER_MOV;
3425     index=readreg_offset(index,4);
3426     base+=get_offset(indexreg)*factor;
3427     d=writereg(d,4);
3428    
3429     raw_mov_l_rm_indexed(d,base,index,factor);
3430     unlock2(index);
3431     unlock2(d);
3432     }
3433     MENDFUNC(4,mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
3434    
3435    
3436     /* read the long at the address contained in s+offset and store in d */
3437     MIDFUNC(3,mov_l_rR,(W4 d, R4 s, IMM offset))
3438     {
3439     if (isconst(s)) {
3440     COMPCALL(mov_l_rm)(d,live.state[s].val+offset);
3441     return;
3442     }
3443     CLOBBER_MOV;
3444     s=readreg(s,4);
3445     d=writereg(d,4);
3446    
3447     raw_mov_l_rR(d,s,offset);
3448     unlock2(d);
3449     unlock2(s);
3450     }
3451     MENDFUNC(3,mov_l_rR,(W4 d, R4 s, IMM offset))
3452    
3453     /* read the word at the address contained in s+offset and store in d */
3454     MIDFUNC(3,mov_w_rR,(W2 d, R4 s, IMM offset))
3455     {
3456     if (isconst(s)) {
3457     COMPCALL(mov_w_rm)(d,live.state[s].val+offset);
3458     return;
3459     }
3460     CLOBBER_MOV;
3461     s=readreg(s,4);
3462     d=writereg(d,2);
3463    
3464     raw_mov_w_rR(d,s,offset);
3465     unlock2(d);
3466     unlock2(s);
3467     }
3468     MENDFUNC(3,mov_w_rR,(W2 d, R4 s, IMM offset))
3469    
3470     /* read the word at the address contained in s+offset and store in d */
3471     MIDFUNC(3,mov_b_rR,(W1 d, R4 s, IMM offset))
3472     {
3473     if (isconst(s)) {
3474     COMPCALL(mov_b_rm)(d,live.state[s].val+offset);
3475     return;
3476     }
3477     CLOBBER_MOV;
3478     s=readreg(s,4);
3479     d=writereg(d,1);
3480    
3481     raw_mov_b_rR(d,s,offset);
3482     unlock2(d);
3483     unlock2(s);
3484     }
3485     MENDFUNC(3,mov_b_rR,(W1 d, R4 s, IMM offset))
3486    
3487     /* read the long at the address contained in s+offset and store in d */
3488     MIDFUNC(3,mov_l_brR,(W4 d, R4 s, IMM offset))
3489     {
3490     int sreg=s;
3491     if (isconst(s)) {
3492     COMPCALL(mov_l_rm)(d,live.state[s].val+offset);
3493     return;
3494     }
3495     CLOBBER_MOV;
3496     s=readreg_offset(s,4);
3497     offset+=get_offset(sreg);
3498     d=writereg(d,4);
3499    
3500     raw_mov_l_brR(d,s,offset);
3501     unlock2(d);
3502     unlock2(s);
3503     }
3504     MENDFUNC(3,mov_l_brR,(W4 d, R4 s, IMM offset))
3505    
3506     /* read the word at the address contained in s+offset and store in d */
3507     MIDFUNC(3,mov_w_brR,(W2 d, R4 s, IMM offset))
3508     {
3509     int sreg=s;
3510     if (isconst(s)) {
3511     COMPCALL(mov_w_rm)(d,live.state[s].val+offset);
3512     return;
3513     }
3514     CLOBBER_MOV;
3515     remove_offset(d,-1);
3516     s=readreg_offset(s,4);
3517     offset+=get_offset(sreg);
3518     d=writereg(d,2);
3519    
3520     raw_mov_w_brR(d,s,offset);
3521     unlock2(d);
3522     unlock2(s);
3523     }
3524     MENDFUNC(3,mov_w_brR,(W2 d, R4 s, IMM offset))
3525    
3526     /* read the word at the address contained in s+offset and store in d */
3527     MIDFUNC(3,mov_b_brR,(W1 d, R4 s, IMM offset))
3528     {
3529     int sreg=s;
3530     if (isconst(s)) {
3531     COMPCALL(mov_b_rm)(d,live.state[s].val+offset);
3532     return;
3533     }
3534     CLOBBER_MOV;
3535     remove_offset(d,-1);
3536     s=readreg_offset(s,4);
3537     offset+=get_offset(sreg);
3538     d=writereg(d,1);
3539    
3540     raw_mov_b_brR(d,s,offset);
3541     unlock2(d);
3542     unlock2(s);
3543     }
3544     MENDFUNC(3,mov_b_brR,(W1 d, R4 s, IMM offset))
3545    
3546     MIDFUNC(3,mov_l_Ri,(R4 d, IMM i, IMM offset))
3547     {
3548     int dreg=d;
3549     if (isconst(d)) {
3550     COMPCALL(mov_l_mi)(live.state[d].val+offset,i);
3551     return;
3552     }
3553    
3554     CLOBBER_MOV;
3555     d=readreg_offset(d,4);
3556     offset+=get_offset(dreg);
3557     raw_mov_l_Ri(d,i,offset);
3558     unlock2(d);
3559     }
3560     MENDFUNC(3,mov_l_Ri,(R4 d, IMM i, IMM offset))
3561    
3562     MIDFUNC(3,mov_w_Ri,(R4 d, IMM i, IMM offset))
3563     {
3564     int dreg=d;
3565     if (isconst(d)) {
3566     COMPCALL(mov_w_mi)(live.state[d].val+offset,i);
3567     return;
3568     }
3569    
3570     CLOBBER_MOV;
3571     d=readreg_offset(d,4);
3572     offset+=get_offset(dreg);
3573     raw_mov_w_Ri(d,i,offset);
3574     unlock2(d);
3575     }
3576     MENDFUNC(3,mov_w_Ri,(R4 d, IMM i, IMM offset))
3577    
3578     MIDFUNC(3,mov_b_Ri,(R4 d, IMM i, IMM offset))
3579     {
3580     int dreg=d;
3581     if (isconst(d)) {
3582     COMPCALL(mov_b_mi)(live.state[d].val+offset,i);
3583     return;
3584     }
3585    
3586     CLOBBER_MOV;
3587     d=readreg_offset(d,4);
3588     offset+=get_offset(dreg);
3589     raw_mov_b_Ri(d,i,offset);
3590     unlock2(d);
3591     }
3592     MENDFUNC(3,mov_b_Ri,(R4 d, IMM i, IMM offset))
3593    
3594     /* Warning! OFFSET is byte sized only! */
3595     MIDFUNC(3,mov_l_Rr,(R4 d, R4 s, IMM offset))
3596     {
3597     if (isconst(d)) {
3598     COMPCALL(mov_l_mr)(live.state[d].val+offset,s);
3599     return;
3600     }
3601     if (isconst(s)) {
3602     COMPCALL(mov_l_Ri)(d,live.state[s].val,offset);
3603     return;
3604     }
3605    
3606     CLOBBER_MOV;
3607     s=readreg(s,4);
3608     d=readreg(d,4);
3609    
3610     raw_mov_l_Rr(d,s,offset);
3611     unlock2(d);
3612     unlock2(s);
3613     }
3614     MENDFUNC(3,mov_l_Rr,(R4 d, R4 s, IMM offset))
3615    
3616     MIDFUNC(3,mov_w_Rr,(R4 d, R2 s, IMM offset))
3617     {
3618     if (isconst(d)) {
3619     COMPCALL(mov_w_mr)(live.state[d].val+offset,s);
3620     return;
3621     }
3622     if (isconst(s)) {
3623     COMPCALL(mov_w_Ri)(d,(uae_u16)live.state[s].val,offset);
3624     return;
3625     }
3626    
3627     CLOBBER_MOV;
3628     s=readreg(s,2);
3629     d=readreg(d,4);
3630     raw_mov_w_Rr(d,s,offset);
3631     unlock2(d);
3632     unlock2(s);
3633     }
3634     MENDFUNC(3,mov_w_Rr,(R4 d, R2 s, IMM offset))
3635    
3636     MIDFUNC(3,mov_b_Rr,(R4 d, R1 s, IMM offset))
3637     {
3638     if (isconst(d)) {
3639     COMPCALL(mov_b_mr)(live.state[d].val+offset,s);
3640     return;
3641     }
3642     if (isconst(s)) {
3643     COMPCALL(mov_b_Ri)(d,(uae_u8)live.state[s].val,offset);
3644     return;
3645     }
3646    
3647     CLOBBER_MOV;
3648     s=readreg(s,1);
3649     d=readreg(d,4);
3650     raw_mov_b_Rr(d,s,offset);
3651     unlock2(d);
3652     unlock2(s);
3653     }
3654     MENDFUNC(3,mov_b_Rr,(R4 d, R1 s, IMM offset))
3655    
3656     MIDFUNC(3,lea_l_brr,(W4 d, R4 s, IMM offset))
3657     {
3658     if (isconst(s)) {
3659     COMPCALL(mov_l_ri)(d,live.state[s].val+offset);
3660     return;
3661     }
3662     #if USE_OFFSET
3663     if (d==s) {
3664     add_offset(d,offset);
3665     return;
3666     }
3667     #endif
3668     CLOBBER_LEA;
3669     s=readreg(s,4);
3670     d=writereg(d,4);
3671     raw_lea_l_brr(d,s,offset);
3672     unlock2(d);
3673     unlock2(s);
3674     }
3675     MENDFUNC(3,lea_l_brr,(W4 d, R4 s, IMM offset))
3676    
3677     MIDFUNC(5,lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
3678     {
3679     if (!offset) {
3680     COMPCALL(lea_l_rr_indexed)(d,s,index,factor);
3681     return;
3682     }
3683     CLOBBER_LEA;
3684     s=readreg(s,4);
3685     index=readreg(index,4);
3686     d=writereg(d,4);
3687    
3688     raw_lea_l_brr_indexed(d,s,index,factor,offset);
3689     unlock2(d);
3690     unlock2(index);
3691     unlock2(s);
3692     }
3693     MENDFUNC(5,lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
3694    
3695     MIDFUNC(4,lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
3696     {
3697     CLOBBER_LEA;
3698     s=readreg(s,4);
3699     index=readreg(index,4);
3700     d=writereg(d,4);
3701    
3702     raw_lea_l_rr_indexed(d,s,index,factor);
3703     unlock2(d);
3704     unlock2(index);
3705     unlock2(s);
3706     }
3707     MENDFUNC(4,lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
3708    
3709     /* write d to the long at the address contained in s+offset */
3710     MIDFUNC(3,mov_l_bRr,(R4 d, R4 s, IMM offset))
3711     {
3712     int dreg=d;
3713     if (isconst(d)) {
3714     COMPCALL(mov_l_mr)(live.state[d].val+offset,s);
3715     return;
3716     }
3717    
3718     CLOBBER_MOV;
3719     s=readreg(s,4);
3720     d=readreg_offset(d,4);
3721     offset+=get_offset(dreg);
3722    
3723     raw_mov_l_bRr(d,s,offset);
3724     unlock2(d);
3725     unlock2(s);
3726     }
3727     MENDFUNC(3,mov_l_bRr,(R4 d, R4 s, IMM offset))
3728    
3729     /* write the word at the address contained in s+offset and store in d */
3730     MIDFUNC(3,mov_w_bRr,(R4 d, R2 s, IMM offset))
3731     {
3732     int dreg=d;
3733    
3734     if (isconst(d)) {
3735     COMPCALL(mov_w_mr)(live.state[d].val+offset,s);
3736     return;
3737     }
3738    
3739     CLOBBER_MOV;
3740     s=readreg(s,2);
3741     d=readreg_offset(d,4);
3742     offset+=get_offset(dreg);
3743     raw_mov_w_bRr(d,s,offset);
3744     unlock2(d);
3745     unlock2(s);
3746     }
3747     MENDFUNC(3,mov_w_bRr,(R4 d, R2 s, IMM offset))
3748    
3749     MIDFUNC(3,mov_b_bRr,(R4 d, R1 s, IMM offset))
3750     {
3751     int dreg=d;
3752     if (isconst(d)) {
3753     COMPCALL(mov_b_mr)(live.state[d].val+offset,s);
3754     return;
3755     }
3756    
3757     CLOBBER_MOV;
3758     s=readreg(s,1);
3759     d=readreg_offset(d,4);
3760     offset+=get_offset(dreg);
3761     raw_mov_b_bRr(d,s,offset);
3762     unlock2(d);
3763     unlock2(s);
3764     }
3765     MENDFUNC(3,mov_b_bRr,(R4 d, R1 s, IMM offset))
3766    
3767     MIDFUNC(1,bswap_32,(RW4 r))
3768     {
3769     int reg=r;
3770    
3771     if (isconst(r)) {
3772     uae_u32 oldv=live.state[r].val;
3773     live.state[r].val=reverse32(oldv);
3774     return;
3775     }
3776    
3777     CLOBBER_SW32;
3778     r=rmw(r,4,4);
3779     raw_bswap_32(r);
3780     unlock2(r);
3781     }
3782     MENDFUNC(1,bswap_32,(RW4 r))
3783    
3784     MIDFUNC(1,bswap_16,(RW2 r))
3785     {
3786     if (isconst(r)) {
3787     uae_u32 oldv=live.state[r].val;
3788     live.state[r].val=((oldv>>8)&0xff) | ((oldv<<8)&0xff00) |
3789     (oldv&0xffff0000);
3790     return;
3791     }
3792    
3793     CLOBBER_SW16;
3794     r=rmw(r,2,2);
3795    
3796     raw_bswap_16(r);
3797     unlock2(r);
3798     }
3799     MENDFUNC(1,bswap_16,(RW2 r))
3800    
3801    
3802    
3803     MIDFUNC(2,mov_l_rr,(W4 d, R4 s))
3804     {
3805     int olds;
3806    
3807     if (d==s) { /* How pointless! */
3808     return;
3809     }
3810     if (isconst(s)) {
3811     COMPCALL(mov_l_ri)(d,live.state[s].val);
3812     return;
3813     }
3814     olds=s;
3815     disassociate(d);
3816     s=readreg_offset(s,4);
3817     live.state[d].realreg=s;
3818     live.state[d].realind=live.nat[s].nholds;
3819     live.state[d].val=live.state[olds].val;
3820     live.state[d].validsize=4;
3821     live.state[d].dirtysize=4;
3822     set_status(d,DIRTY);
3823    
3824     live.nat[s].holds[live.nat[s].nholds]=d;
3825     live.nat[s].nholds++;
3826     log_clobberreg(d);
3827     /* write_log("Added %d to nreg %d(%d), now holds %d regs\n",
3828     d,s,live.state[d].realind,live.nat[s].nholds); */
3829     unlock2(s);
3830     }
3831     MENDFUNC(2,mov_l_rr,(W4 d, R4 s))
3832    
3833     MIDFUNC(2,mov_l_mr,(IMM d, R4 s))
3834     {
3835     if (isconst(s)) {
3836     COMPCALL(mov_l_mi)(d,live.state[s].val);
3837     return;
3838     }
3839     CLOBBER_MOV;
3840     s=readreg(s,4);
3841    
3842     raw_mov_l_mr(d,s);
3843     unlock2(s);
3844     }
3845     MENDFUNC(2,mov_l_mr,(IMM d, R4 s))
3846    
3847    
3848     MIDFUNC(2,mov_w_mr,(IMM d, R2 s))
3849     {
3850     if (isconst(s)) {
3851     COMPCALL(mov_w_mi)(d,(uae_u16)live.state[s].val);
3852     return;
3853     }
3854     CLOBBER_MOV;
3855     s=readreg(s,2);
3856    
3857     raw_mov_w_mr(d,s);
3858     unlock2(s);
3859     }
3860     MENDFUNC(2,mov_w_mr,(IMM d, R2 s))
3861    
3862     MIDFUNC(2,mov_w_rm,(W2 d, IMM s))
3863     {
3864     CLOBBER_MOV;
3865     d=writereg(d,2);
3866    
3867     raw_mov_w_rm(d,s);
3868     unlock2(d);
3869     }
3870     MENDFUNC(2,mov_w_rm,(W2 d, IMM s))
3871    
3872     MIDFUNC(2,mov_b_mr,(IMM d, R1 s))
3873     {
3874     if (isconst(s)) {
3875     COMPCALL(mov_b_mi)(d,(uae_u8)live.state[s].val);
3876     return;
3877     }
3878    
3879     CLOBBER_MOV;
3880     s=readreg(s,1);
3881    
3882     raw_mov_b_mr(d,s);
3883     unlock2(s);
3884     }
3885     MENDFUNC(2,mov_b_mr,(IMM d, R1 s))
3886    
3887     MIDFUNC(2,mov_b_rm,(W1 d, IMM s))
3888     {
3889     CLOBBER_MOV;
3890     d=writereg(d,1);
3891    
3892     raw_mov_b_rm(d,s);
3893     unlock2(d);
3894     }
3895     MENDFUNC(2,mov_b_rm,(W1 d, IMM s))
3896    
3897     MIDFUNC(2,mov_l_ri,(W4 d, IMM s))
3898     {
3899     set_const(d,s);
3900     return;
3901     }
3902     MENDFUNC(2,mov_l_ri,(W4 d, IMM s))
3903    
3904     MIDFUNC(2,mov_w_ri,(W2 d, IMM s))
3905     {
3906     CLOBBER_MOV;
3907     d=writereg(d,2);
3908    
3909     raw_mov_w_ri(d,s);
3910     unlock2(d);
3911     }
3912     MENDFUNC(2,mov_w_ri,(W2 d, IMM s))
3913    
3914     MIDFUNC(2,mov_b_ri,(W1 d, IMM s))
3915     {
3916     CLOBBER_MOV;
3917     d=writereg(d,1);
3918    
3919     raw_mov_b_ri(d,s);
3920     unlock2(d);
3921     }
3922     MENDFUNC(2,mov_b_ri,(W1 d, IMM s))
3923    
3924    
3925     MIDFUNC(2,add_l_mi,(IMM d, IMM s))
3926     {
3927     CLOBBER_ADD;
3928     raw_add_l_mi(d,s) ;
3929     }
3930     MENDFUNC(2,add_l_mi,(IMM d, IMM s))
3931    
3932     MIDFUNC(2,add_w_mi,(IMM d, IMM s))
3933     {
3934     CLOBBER_ADD;
3935     raw_add_w_mi(d,s) ;
3936     }
3937     MENDFUNC(2,add_w_mi,(IMM d, IMM s))
3938    
3939     MIDFUNC(2,add_b_mi,(IMM d, IMM s))
3940     {
3941     CLOBBER_ADD;
3942     raw_add_b_mi(d,s) ;
3943     }
3944     MENDFUNC(2,add_b_mi,(IMM d, IMM s))
3945    
3946    
3947     MIDFUNC(2,test_l_ri,(R4 d, IMM i))
3948     {
3949     CLOBBER_TEST;
3950     d=readreg(d,4);
3951    
3952     raw_test_l_ri(d,i);
3953     unlock2(d);
3954     }
3955     MENDFUNC(2,test_l_ri,(R4 d, IMM i))
3956    
3957     MIDFUNC(2,test_l_rr,(R4 d, R4 s))
3958     {
3959     CLOBBER_TEST;
3960     d=readreg(d,4);
3961     s=readreg(s,4);
3962    
3963     raw_test_l_rr(d,s);;
3964     unlock2(d);
3965     unlock2(s);
3966     }
3967     MENDFUNC(2,test_l_rr,(R4 d, R4 s))
3968    
3969     MIDFUNC(2,test_w_rr,(R2 d, R2 s))
3970     {
3971     CLOBBER_TEST;
3972     d=readreg(d,2);
3973     s=readreg(s,2);
3974    
3975     raw_test_w_rr(d,s);
3976     unlock2(d);
3977     unlock2(s);
3978     }
3979     MENDFUNC(2,test_w_rr,(R2 d, R2 s))
3980    
3981     MIDFUNC(2,test_b_rr,(R1 d, R1 s))
3982     {
3983     CLOBBER_TEST;
3984     d=readreg(d,1);
3985     s=readreg(s,1);
3986    
3987     raw_test_b_rr(d,s);
3988     unlock2(d);
3989     unlock2(s);
3990     }
3991     MENDFUNC(2,test_b_rr,(R1 d, R1 s))
3992    
3993    
3994     MIDFUNC(2,and_l_ri,(RW4 d, IMM i))
3995     {
3996     if (isconst(d) && !needflags) {
3997     live.state[d].val &= i;
3998     return;
3999     }
4000    
4001     CLOBBER_AND;
4002     d=rmw(d,4,4);
4003    
4004     raw_and_l_ri(d,i);
4005     unlock2(d);
4006     }
4007     MENDFUNC(2,and_l_ri,(RW4 d, IMM i))
4008    
4009     MIDFUNC(2,and_l,(RW4 d, R4 s))
4010     {
4011     CLOBBER_AND;
4012     s=readreg(s,4);
4013     d=rmw(d,4,4);
4014    
4015     raw_and_l(d,s);
4016     unlock2(d);
4017     unlock2(s);
4018     }
4019     MENDFUNC(2,and_l,(RW4 d, R4 s))
4020    
4021     MIDFUNC(2,and_w,(RW2 d, R2 s))
4022     {
4023     CLOBBER_AND;
4024     s=readreg(s,2);
4025     d=rmw(d,2,2);
4026    
4027     raw_and_w(d,s);
4028     unlock2(d);
4029     unlock2(s);
4030     }
4031     MENDFUNC(2,and_w,(RW2 d, R2 s))
4032    
4033     MIDFUNC(2,and_b,(RW1 d, R1 s))
4034     {
4035     CLOBBER_AND;
4036     s=readreg(s,1);
4037     d=rmw(d,1,1);
4038    
4039     raw_and_b(d,s);
4040     unlock2(d);
4041     unlock2(s);
4042     }
4043     MENDFUNC(2,and_b,(RW1 d, R1 s))
4044    
4045     // gb-- used for making an fpcr value in compemu_fpp.cpp
4046     MIDFUNC(2,or_l_rm,(RW4 d, IMM s))
4047     {
4048     CLOBBER_OR;
4049     d=rmw(d,4,4);
4050    
4051     raw_or_l_rm(d,s);
4052     unlock2(d);
4053     }
4054     MENDFUNC(2,or_l_rm,(RW4 d, IMM s))
4055    
4056     MIDFUNC(2,or_l_ri,(RW4 d, IMM i))
4057     {
4058     if (isconst(d) && !needflags) {
4059     live.state[d].val|=i;
4060     return;
4061     }
4062     CLOBBER_OR;
4063     d=rmw(d,4,4);
4064    
4065     raw_or_l_ri(d,i);
4066     unlock2(d);
4067     }
4068     MENDFUNC(2,or_l_ri,(RW4 d, IMM i))
4069    
4070     MIDFUNC(2,or_l,(RW4 d, R4 s))
4071     {
4072     if (isconst(d) && isconst(s) && !needflags) {
4073     live.state[d].val|=live.state[s].val;
4074     return;
4075     }
4076     CLOBBER_OR;
4077     s=readreg(s,4);
4078     d=rmw(d,4,4);
4079    
4080     raw_or_l(d,s);
4081     unlock2(d);
4082     unlock2(s);
4083     }
4084     MENDFUNC(2,or_l,(RW4 d, R4 s))
4085    
4086     MIDFUNC(2,or_w,(RW2 d, R2 s))
4087     {
4088     CLOBBER_OR;
4089     s=readreg(s,2);
4090     d=rmw(d,2,2);
4091    
4092     raw_or_w(d,s);
4093     unlock2(d);
4094     unlock2(s);
4095     }
4096     MENDFUNC(2,or_w,(RW2 d, R2 s))
4097    
4098     MIDFUNC(2,or_b,(RW1 d, R1 s))
4099     {
4100     CLOBBER_OR;
4101     s=readreg(s,1);
4102     d=rmw(d,1,1);
4103    
4104     raw_or_b(d,s);
4105     unlock2(d);
4106     unlock2(s);
4107     }
4108     MENDFUNC(2,or_b,(RW1 d, R1 s))
4109    
4110     MIDFUNC(2,adc_l,(RW4 d, R4 s))
4111     {
4112     CLOBBER_ADC;
4113     s=readreg(s,4);
4114     d=rmw(d,4,4);
4115    
4116     raw_adc_l(d,s);
4117    
4118     unlock2(d);
4119     unlock2(s);
4120     }
4121     MENDFUNC(2,adc_l,(RW4 d, R4 s))
4122    
4123     MIDFUNC(2,adc_w,(RW2 d, R2 s))
4124     {
4125     CLOBBER_ADC;
4126     s=readreg(s,2);
4127     d=rmw(d,2,2);
4128    
4129     raw_adc_w(d,s);
4130     unlock2(d);
4131     unlock2(s);
4132     }
4133     MENDFUNC(2,adc_w,(RW2 d, R2 s))
4134    
4135     MIDFUNC(2,adc_b,(RW1 d, R1 s))
4136     {
4137     CLOBBER_ADC;
4138     s=readreg(s,1);
4139     d=rmw(d,1,1);
4140    
4141     raw_adc_b(d,s);
4142     unlock2(d);
4143     unlock2(s);
4144     }
4145     MENDFUNC(2,adc_b,(RW1 d, R1 s))
4146    
4147     MIDFUNC(2,add_l,(RW4 d, R4 s))
4148     {
4149     if (isconst(s)) {
4150     COMPCALL(add_l_ri)(d,live.state[s].val);
4151     return;
4152     }
4153    
4154     CLOBBER_ADD;
4155     s=readreg(s,4);
4156     d=rmw(d,4,4);
4157    
4158     raw_add_l(d,s);
4159    
4160     unlock2(d);
4161     unlock2(s);
4162     }
4163     MENDFUNC(2,add_l,(RW4 d, R4 s))
4164    
4165     MIDFUNC(2,add_w,(RW2 d, R2 s))
4166     {
4167     if (isconst(s)) {
4168     COMPCALL(add_w_ri)(d,(uae_u16)live.state[s].val);
4169     return;
4170     }
4171    
4172     CLOBBER_ADD;
4173     s=readreg(s,2);
4174     d=rmw(d,2,2);
4175    
4176     raw_add_w(d,s);
4177     unlock2(d);
4178     unlock2(s);
4179     }
4180     MENDFUNC(2,add_w,(RW2 d, R2 s))
4181    
4182     MIDFUNC(2,add_b,(RW1 d, R1 s))
4183     {
4184     if (isconst(s)) {
4185     COMPCALL(add_b_ri)(d,(uae_u8)live.state[s].val);
4186     return;
4187     }
4188    
4189     CLOBBER_ADD;
4190     s=readreg(s,1);
4191     d=rmw(d,1,1);
4192    
4193     raw_add_b(d,s);
4194     unlock2(d);
4195     unlock2(s);
4196     }
4197     MENDFUNC(2,add_b,(RW1 d, R1 s))
4198    
4199     MIDFUNC(2,sub_l_ri,(RW4 d, IMM i))
4200     {
4201     if (!i && !needflags)
4202     return;
4203     if (isconst(d) && !needflags) {
4204     live.state[d].val-=i;
4205     return;
4206     }
4207     #if USE_OFFSET
4208     if (!needflags) {
4209     add_offset(d,-i);
4210     return;
4211     }
4212     #endif
4213    
4214     CLOBBER_SUB;
4215     d=rmw(d,4,4);
4216    
4217     raw_sub_l_ri(d,i);
4218     unlock2(d);
4219     }
4220     MENDFUNC(2,sub_l_ri,(RW4 d, IMM i))
4221    
4222     MIDFUNC(2,sub_w_ri,(RW2 d, IMM i))
4223     {
4224     if (!i && !needflags)
4225     return;
4226    
4227     CLOBBER_SUB;
4228     d=rmw(d,2,2);
4229    
4230     raw_sub_w_ri(d,i);
4231     unlock2(d);
4232     }
4233     MENDFUNC(2,sub_w_ri,(RW2 d, IMM i))
4234    
4235     MIDFUNC(2,sub_b_ri,(RW1 d, IMM i))
4236     {
4237     if (!i && !needflags)
4238     return;
4239    
4240     CLOBBER_SUB;
4241     d=rmw(d,1,1);
4242    
4243     raw_sub_b_ri(d,i);
4244    
4245     unlock2(d);
4246     }
4247     MENDFUNC(2,sub_b_ri,(RW1 d, IMM i))
4248    
4249     MIDFUNC(2,add_l_ri,(RW4 d, IMM i))
4250     {
4251     if (!i && !needflags)
4252     return;
4253     if (isconst(d) && !needflags) {
4254     live.state[d].val+=i;
4255     return;
4256     }
4257     #if USE_OFFSET
4258     if (!needflags) {
4259     add_offset(d,i);
4260     return;
4261     }
4262     #endif
4263     CLOBBER_ADD;
4264     d=rmw(d,4,4);
4265     raw_add_l_ri(d,i);
4266     unlock2(d);
4267     }
4268     MENDFUNC(2,add_l_ri,(RW4 d, IMM i))
4269    
4270     MIDFUNC(2,add_w_ri,(RW2 d, IMM i))
4271     {
4272     if (!i && !needflags)
4273     return;
4274    
4275     CLOBBER_ADD;
4276     d=rmw(d,2,2);
4277    
4278     raw_add_w_ri(d,i);
4279     unlock2(d);
4280     }
4281     MENDFUNC(2,add_w_ri,(RW2 d, IMM i))
4282    
4283     MIDFUNC(2,add_b_ri,(RW1 d, IMM i))
4284     {
4285     if (!i && !needflags)
4286     return;
4287    
4288     CLOBBER_ADD;
4289     d=rmw(d,1,1);
4290    
4291     raw_add_b_ri(d,i);
4292    
4293     unlock2(d);
4294     }
4295     MENDFUNC(2,add_b_ri,(RW1 d, IMM i))
4296    
4297     MIDFUNC(2,sbb_l,(RW4 d, R4 s))
4298     {
4299     CLOBBER_SBB;
4300     s=readreg(s,4);
4301     d=rmw(d,4,4);
4302    
4303     raw_sbb_l(d,s);
4304     unlock2(d);
4305     unlock2(s);
4306     }
4307     MENDFUNC(2,sbb_l,(RW4 d, R4 s))
4308    
4309     MIDFUNC(2,sbb_w,(RW2 d, R2 s))
4310     {
4311     CLOBBER_SBB;
4312     s=readreg(s,2);
4313     d=rmw(d,2,2);
4314    
4315     raw_sbb_w(d,s);
4316     unlock2(d);
4317     unlock2(s);
4318     }
4319     MENDFUNC(2,sbb_w,(RW2 d, R2 s))
4320    
4321     MIDFUNC(2,sbb_b,(RW1 d, R1 s))
4322     {
4323     CLOBBER_SBB;
4324     s=readreg(s,1);
4325     d=rmw(d,1,1);
4326    
4327     raw_sbb_b(d,s);
4328     unlock2(d);
4329     unlock2(s);
4330     }
4331     MENDFUNC(2,sbb_b,(RW1 d, R1 s))
4332    
4333     MIDFUNC(2,sub_l,(RW4 d, R4 s))
4334     {
4335     if (isconst(s)) {
4336     COMPCALL(sub_l_ri)(d,live.state[s].val);
4337     return;
4338     }
4339    
4340     CLOBBER_SUB;
4341     s=readreg(s,4);
4342     d=rmw(d,4,4);
4343    
4344     raw_sub_l(d,s);
4345     unlock2(d);
4346     unlock2(s);
4347     }
4348     MENDFUNC(2,sub_l,(RW4 d, R4 s))
4349    
4350     MIDFUNC(2,sub_w,(RW2 d, R2 s))
4351     {
4352     if (isconst(s)) {
4353     COMPCALL(sub_w_ri)(d,(uae_u16)live.state[s].val);
4354     return;
4355     }
4356    
4357     CLOBBER_SUB;
4358     s=readreg(s,2);
4359     d=rmw(d,2,2);
4360    
4361     raw_sub_w(d,s);
4362     unlock2(d);
4363     unlock2(s);
4364     }
4365     MENDFUNC(2,sub_w,(RW2 d, R2 s))
4366    
4367     MIDFUNC(2,sub_b,(RW1 d, R1 s))
4368     {
4369     if (isconst(s)) {
4370     COMPCALL(sub_b_ri)(d,(uae_u8)live.state[s].val);
4371     return;
4372     }
4373    
4374     CLOBBER_SUB;
4375     s=readreg(s,1);
4376     d=rmw(d,1,1);
4377    
4378     raw_sub_b(d,s);
4379     unlock2(d);
4380     unlock2(s);
4381     }
4382     MENDFUNC(2,sub_b,(RW1 d, R1 s))
4383    
4384     MIDFUNC(2,cmp_l,(R4 d, R4 s))
4385     {
4386     CLOBBER_CMP;
4387     s=readreg(s,4);
4388     d=readreg(d,4);
4389    
4390     raw_cmp_l(d,s);
4391     unlock2(d);
4392     unlock2(s);
4393     }
4394     MENDFUNC(2,cmp_l,(R4 d, R4 s))
4395    
4396     MIDFUNC(2,cmp_l_ri,(R4 r, IMM i))
4397     {
4398     CLOBBER_CMP;
4399     r=readreg(r,4);
4400    
4401     raw_cmp_l_ri(r,i);
4402     unlock2(r);
4403     }
4404     MENDFUNC(2,cmp_l_ri,(R4 r, IMM i))
4405    
4406     MIDFUNC(2,cmp_w,(R2 d, R2 s))
4407     {
4408     CLOBBER_CMP;
4409     s=readreg(s,2);
4410     d=readreg(d,2);
4411    
4412     raw_cmp_w(d,s);
4413     unlock2(d);
4414     unlock2(s);
4415     }
4416     MENDFUNC(2,cmp_w,(R2 d, R2 s))
4417    
4418     MIDFUNC(2,cmp_b,(R1 d, R1 s))
4419     {
4420     CLOBBER_CMP;
4421     s=readreg(s,1);
4422     d=readreg(d,1);
4423    
4424     raw_cmp_b(d,s);
4425     unlock2(d);
4426     unlock2(s);
4427     }
4428     MENDFUNC(2,cmp_b,(R1 d, R1 s))
4429    
4430    
4431     MIDFUNC(2,xor_l,(RW4 d, R4 s))
4432     {
4433     CLOBBER_XOR;
4434     s=readreg(s,4);
4435     d=rmw(d,4,4);
4436    
4437     raw_xor_l(d,s);
4438     unlock2(d);
4439     unlock2(s);
4440     }
4441     MENDFUNC(2,xor_l,(RW4 d, R4 s))
4442    
4443     MIDFUNC(2,xor_w,(RW2 d, R2 s))
4444     {
4445     CLOBBER_XOR;
4446     s=readreg(s,2);
4447     d=rmw(d,2,2);
4448    
4449     raw_xor_w(d,s);
4450     unlock2(d);
4451     unlock2(s);
4452     }
4453     MENDFUNC(2,xor_w,(RW2 d, R2 s))
4454    
4455     MIDFUNC(2,xor_b,(RW1 d, R1 s))
4456     {
4457     CLOBBER_XOR;
4458     s=readreg(s,1);
4459     d=rmw(d,1,1);
4460    
4461     raw_xor_b(d,s);
4462     unlock2(d);
4463     unlock2(s);
4464     }
4465     MENDFUNC(2,xor_b,(RW1 d, R1 s))
4466    
4467     MIDFUNC(5,call_r_11,(W4 out1, R4 r, R4 in1, IMM osize, IMM isize))
4468     {
4469     clobber_flags();
4470     remove_all_offsets();
4471     if (osize==4) {
4472     if (out1!=in1 && out1!=r) {
4473     COMPCALL(forget_about)(out1);
4474     }
4475     }
4476     else {
4477     tomem_c(out1);
4478     }
4479    
4480     in1=readreg_specific(in1,isize,REG_PAR1);
4481     r=readreg(r,4);
4482     prepare_for_call_1(); /* This should ensure that there won't be
4483     any need for swapping nregs in prepare_for_call_2
4484     */
4485     #if USE_NORMAL_CALLING_CONVENTION
4486     raw_push_l_r(in1);
4487     #endif
4488     unlock2(in1);
4489     unlock2(r);
4490    
4491     prepare_for_call_2();
4492     raw_call_r(r);
4493    
4494     #if USE_NORMAL_CALLING_CONVENTION
4495     raw_inc_sp(4);
4496     #endif
4497    
4498    
4499     live.nat[REG_RESULT].holds[0]=out1;
4500     live.nat[REG_RESULT].nholds=1;
4501     live.nat[REG_RESULT].touched=touchcnt++;
4502    
4503     live.state[out1].realreg=REG_RESULT;
4504     live.state[out1].realind=0;
4505     live.state[out1].val=0;
4506     live.state[out1].validsize=osize;
4507     live.state[out1].dirtysize=osize;
4508     set_status(out1,DIRTY);
4509     }
4510     MENDFUNC(5,call_r_11,(W4 out1, R4 r, R4 in1, IMM osize, IMM isize))
4511    
4512     MIDFUNC(5,call_r_02,(R4 r, R4 in1, R4 in2, IMM isize1, IMM isize2))
4513     {
4514     clobber_flags();
4515     remove_all_offsets();
4516     in1=readreg_specific(in1,isize1,REG_PAR1);
4517     in2=readreg_specific(in2,isize2,REG_PAR2);
4518     r=readreg(r,4);
4519     prepare_for_call_1(); /* This should ensure that there won't be
4520     any need for swapping nregs in prepare_for_call_2
4521     */
4522     #if USE_NORMAL_CALLING_CONVENTION
4523     raw_push_l_r(in2);
4524     raw_push_l_r(in1);
4525     #endif
4526     unlock2(r);
4527     unlock2(in1);
4528     unlock2(in2);
4529     prepare_for_call_2();
4530     raw_call_r(r);
4531     #if USE_NORMAL_CALLING_CONVENTION
4532     raw_inc_sp(8);
4533     #endif
4534     }
4535     MENDFUNC(5,call_r_02,(R4 r, R4 in1, R4 in2, IMM isize1, IMM isize2))
4536    
4537     /* forget_about() takes a mid-layer register */
4538     MIDFUNC(1,forget_about,(W4 r))
4539     {
4540     if (isinreg(r))
4541     disassociate(r);
4542     live.state[r].val=0;
4543     set_status(r,UNDEF);
4544     }
4545     MENDFUNC(1,forget_about,(W4 r))
4546    
4547     MIDFUNC(0,nop,(void))
4548     {
4549     raw_nop();
4550     }
4551     MENDFUNC(0,nop,(void))
4552    
4553    
4554     MIDFUNC(1,f_forget_about,(FW r))
4555     {
4556     if (f_isinreg(r))
4557     f_disassociate(r);
4558     live.fate[r].status=UNDEF;
4559     }
4560     MENDFUNC(1,f_forget_about,(FW r))
4561    
4562     MIDFUNC(1,fmov_pi,(FW r))
4563     {
4564     r=f_writereg(r);
4565     raw_fmov_pi(r);
4566     f_unlock(r);
4567     }
4568     MENDFUNC(1,fmov_pi,(FW r))
4569    
4570     MIDFUNC(1,fmov_log10_2,(FW r))
4571     {
4572     r=f_writereg(r);
4573     raw_fmov_log10_2(r);
4574     f_unlock(r);
4575     }
4576     MENDFUNC(1,fmov_log10_2,(FW r))
4577    
4578     MIDFUNC(1,fmov_log2_e,(FW r))
4579     {
4580     r=f_writereg(r);
4581     raw_fmov_log2_e(r);
4582     f_unlock(r);
4583     }
4584     MENDFUNC(1,fmov_log2_e,(FW r))
4585    
4586     MIDFUNC(1,fmov_loge_2,(FW r))
4587     {
4588     r=f_writereg(r);
4589     raw_fmov_loge_2(r);
4590     f_unlock(r);
4591     }
4592     MENDFUNC(1,fmov_loge_2,(FW r))
4593    
4594     MIDFUNC(1,fmov_1,(FW r))
4595     {
4596     r=f_writereg(r);
4597     raw_fmov_1(r);
4598     f_unlock(r);
4599     }
4600     MENDFUNC(1,fmov_1,(FW r))
4601    
4602     MIDFUNC(1,fmov_0,(FW r))
4603     {
4604     r=f_writereg(r);
4605     raw_fmov_0(r);
4606     f_unlock(r);
4607     }
4608     MENDFUNC(1,fmov_0,(FW r))
4609    
4610     MIDFUNC(2,fmov_rm,(FW r, MEMR m))
4611     {
4612     r=f_writereg(r);
4613     raw_fmov_rm(r,m);
4614     f_unlock(r);
4615     }
4616     MENDFUNC(2,fmov_rm,(FW r, MEMR m))
4617    
4618     MIDFUNC(2,fmovi_rm,(FW r, MEMR m))
4619     {
4620     r=f_writereg(r);
4621     raw_fmovi_rm(r,m);
4622     f_unlock(r);
4623     }
4624     MENDFUNC(2,fmovi_rm,(FW r, MEMR m))
4625    
4626     MIDFUNC(2,fmovi_mr,(MEMW m, FR r))
4627     {
4628     r=f_readreg(r);
4629     raw_fmovi_mr(m,r);
4630     f_unlock(r);
4631     }
4632     MENDFUNC(2,fmovi_mr,(MEMW m, FR r))
4633    
4634     MIDFUNC(2,fmovs_rm,(FW r, MEMR m))
4635     {
4636     r=f_writereg(r);
4637     raw_fmovs_rm(r,m);
4638     f_unlock(r);
4639     }
4640     MENDFUNC(2,fmovs_rm,(FW r, MEMR m))
4641    
4642     MIDFUNC(2,fmovs_mr,(MEMW m, FR r))
4643     {
4644     r=f_readreg(r);
4645     raw_fmovs_mr(m,r);
4646     f_unlock(r);
4647     }
4648     MENDFUNC(2,fmovs_mr,(MEMW m, FR r))
4649    
4650     MIDFUNC(2,fmov_ext_mr,(MEMW m, FR r))
4651     {
4652     r=f_readreg(r);
4653     raw_fmov_ext_mr(m,r);
4654     f_unlock(r);
4655     }
4656     MENDFUNC(2,fmov_ext_mr,(MEMW m, FR r))
4657    
4658     MIDFUNC(2,fmov_mr,(MEMW m, FR r))
4659     {
4660     r=f_readreg(r);
4661     raw_fmov_mr(m,r);
4662     f_unlock(r);
4663     }
4664     MENDFUNC(2,fmov_mr,(MEMW m, FR r))
4665    
4666     MIDFUNC(2,fmov_ext_rm,(FW r, MEMR m))
4667     {
4668     r=f_writereg(r);
4669     raw_fmov_ext_rm(r,m);
4670     f_unlock(r);
4671     }
4672     MENDFUNC(2,fmov_ext_rm,(FW r, MEMR m))
4673    
4674     MIDFUNC(2,fmov_rr,(FW d, FR s))
4675     {
4676     if (d==s) { /* How pointless! */
4677     return;
4678     }
4679     #if USE_F_ALIAS
4680     f_disassociate(d);
4681     s=f_readreg(s);
4682     live.fate[d].realreg=s;
4683     live.fate[d].realind=live.fat[s].nholds;
4684     live.fate[d].status=DIRTY;
4685     live.fat[s].holds[live.fat[s].nholds]=d;
4686     live.fat[s].nholds++;
4687     f_unlock(s);
4688     #else
4689     s=f_readreg(s);
4690     d=f_writereg(d);
4691     raw_fmov_rr(d,s);
4692     f_unlock(s);
4693     f_unlock(d);
4694     #endif
4695     }
4696     MENDFUNC(2,fmov_rr,(FW d, FR s))
4697    
4698     MIDFUNC(2,fldcw_m_indexed,(R4 index, IMM base))
4699     {
4700     index=readreg(index,4);
4701    
4702     raw_fldcw_m_indexed(index,base);
4703     unlock2(index);
4704     }
4705     MENDFUNC(2,fldcw_m_indexed,(R4 index, IMM base))
4706    
4707     MIDFUNC(1,ftst_r,(FR r))
4708     {
4709     r=f_readreg(r);
4710     raw_ftst_r(r);
4711     f_unlock(r);
4712     }
4713     MENDFUNC(1,ftst_r,(FR r))
4714    
4715     MIDFUNC(0,dont_care_fflags,(void))
4716     {
4717     f_disassociate(FP_RESULT);
4718     }
4719     MENDFUNC(0,dont_care_fflags,(void))
4720    
4721     MIDFUNC(2,fsqrt_rr,(FW d, FR s))
4722     {
4723     s=f_readreg(s);
4724     d=f_writereg(d);
4725     raw_fsqrt_rr(d,s);
4726     f_unlock(s);
4727     f_unlock(d);
4728     }
4729     MENDFUNC(2,fsqrt_rr,(FW d, FR s))
4730    
4731     MIDFUNC(2,fabs_rr,(FW d, FR s))
4732     {
4733     s=f_readreg(s);
4734     d=f_writereg(d);
4735     raw_fabs_rr(d,s);
4736     f_unlock(s);
4737     f_unlock(d);
4738     }
4739     MENDFUNC(2,fabs_rr,(FW d, FR s))
4740    
4741     MIDFUNC(2,fsin_rr,(FW d, FR s))
4742     {
4743     s=f_readreg(s);
4744     d=f_writereg(d);
4745     raw_fsin_rr(d,s);
4746     f_unlock(s);
4747     f_unlock(d);
4748     }
4749     MENDFUNC(2,fsin_rr,(FW d, FR s))
4750    
4751     MIDFUNC(2,fcos_rr,(FW d, FR s))
4752     {
4753     s=f_readreg(s);
4754     d=f_writereg(d);
4755     raw_fcos_rr(d,s);
4756     f_unlock(s);
4757     f_unlock(d);
4758     }
4759     MENDFUNC(2,fcos_rr,(FW d, FR s))
4760    
4761     MIDFUNC(2,ftwotox_rr,(FW d, FR s))
4762     {
4763     s=f_readreg(s);
4764     d=f_writereg(d);
4765     raw_ftwotox_rr(d,s);
4766     f_unlock(s);
4767     f_unlock(d);
4768     }
4769     MENDFUNC(2,ftwotox_rr,(FW d, FR s))
4770    
4771     MIDFUNC(2,fetox_rr,(FW d, FR s))
4772     {
4773     s=f_readreg(s);
4774     d=f_writereg(d);
4775     raw_fetox_rr(d,s);
4776     f_unlock(s);
4777     f_unlock(d);
4778     }
4779     MENDFUNC(2,fetox_rr,(FW d, FR s))
4780    
4781     MIDFUNC(2,frndint_rr,(FW d, FR s))
4782     {
4783     s=f_readreg(s);
4784     d=f_writereg(d);
4785     raw_frndint_rr(d,s);
4786     f_unlock(s);
4787     f_unlock(d);
4788     }
4789     MENDFUNC(2,frndint_rr,(FW d, FR s))
4790    
4791     MIDFUNC(2,flog2_rr,(FW d, FR s))
4792     {
4793     s=f_readreg(s);
4794     d=f_writereg(d);
4795     raw_flog2_rr(d,s);
4796     f_unlock(s);
4797     f_unlock(d);
4798     }
4799     MENDFUNC(2,flog2_rr,(FW d, FR s))
4800    
4801     MIDFUNC(2,fneg_rr,(FW d, FR s))
4802     {
4803     s=f_readreg(s);
4804     d=f_writereg(d);
4805     raw_fneg_rr(d,s);
4806     f_unlock(s);
4807     f_unlock(d);
4808     }
4809     MENDFUNC(2,fneg_rr,(FW d, FR s))
4810    
4811     MIDFUNC(2,fadd_rr,(FRW d, FR s))
4812     {
4813     s=f_readreg(s);
4814     d=f_rmw(d);
4815     raw_fadd_rr(d,s);
4816     f_unlock(s);
4817     f_unlock(d);
4818     }
4819     MENDFUNC(2,fadd_rr,(FRW d, FR s))
4820    
4821     MIDFUNC(2,fsub_rr,(FRW d, FR s))
4822     {
4823     s=f_readreg(s);
4824     d=f_rmw(d);
4825     raw_fsub_rr(d,s);
4826     f_unlock(s);
4827     f_unlock(d);
4828     }
4829     MENDFUNC(2,fsub_rr,(FRW d, FR s))
4830    
4831     MIDFUNC(2,fcmp_rr,(FR d, FR s))
4832     {
4833     d=f_readreg(d);
4834     s=f_readreg(s);
4835     raw_fcmp_rr(d,s);
4836     f_unlock(s);
4837     f_unlock(d);
4838     }
4839     MENDFUNC(2,fcmp_rr,(FR d, FR s))
4840    
4841     MIDFUNC(2,fdiv_rr,(FRW d, FR s))
4842     {
4843     s=f_readreg(s);
4844     d=f_rmw(d);
4845     raw_fdiv_rr(d,s);
4846     f_unlock(s);
4847     f_unlock(d);
4848     }
4849     MENDFUNC(2,fdiv_rr,(FRW d, FR s))
4850    
4851     MIDFUNC(2,frem_rr,(FRW d, FR s))
4852     {
4853     s=f_readreg(s);
4854     d=f_rmw(d);
4855     raw_frem_rr(d,s);
4856     f_unlock(s);
4857     f_unlock(d);
4858     }
4859     MENDFUNC(2,frem_rr,(FRW d, FR s))
4860    
4861     MIDFUNC(2,frem1_rr,(FRW d, FR s))
4862     {
4863     s=f_readreg(s);
4864     d=f_rmw(d);
4865     raw_frem1_rr(d,s);
4866     f_unlock(s);
4867     f_unlock(d);
4868     }
4869     MENDFUNC(2,frem1_rr,(FRW d, FR s))
4870    
4871     MIDFUNC(2,fmul_rr,(FRW d, FR s))
4872     {
4873     s=f_readreg(s);
4874     d=f_rmw(d);
4875     raw_fmul_rr(d,s);
4876     f_unlock(s);
4877     f_unlock(d);
4878     }
4879     MENDFUNC(2,fmul_rr,(FRW d, FR s))
4880    
4881     /********************************************************************
4882     * Support functions exposed to gencomp. CREATE time *
4883     ********************************************************************/
4884    
4885 gbeauche 1.26 void set_zero(int r, int tmp)
4886     {
4887     if (setzflg_uses_bsf)
4888     bsf_l_rr(r,r);
4889     else
4890     simulate_bsf(tmp,r);
4891     }
4892    
4893 gbeauche 1.1 int kill_rodent(int r)
4894     {
4895     return KILLTHERAT &&
4896     have_rat_stall &&
4897     (live.state[r].status==INMEM ||
4898     live.state[r].status==CLEAN ||
4899     live.state[r].status==ISCONST ||
4900     live.state[r].dirtysize==4);
4901     }
4902    
4903     uae_u32 get_const(int r)
4904     {
4905     Dif (!isconst(r)) {
4906     write_log("Register %d should be constant, but isn't\n",r);
4907     abort();
4908     }
4909     return live.state[r].val;
4910     }
4911    
4912     void sync_m68k_pc(void)
4913     {
4914     if (m68k_pc_offset) {
4915     add_l_ri(PC_P,m68k_pc_offset);
4916     comp_pc_p+=m68k_pc_offset;
4917     m68k_pc_offset=0;
4918     }
4919     }
4920    
4921     /********************************************************************
4922     * Scratch registers management *
4923     ********************************************************************/
4924    
4925     struct scratch_t {
4926     uae_u32 regs[VREGS];
4927     fpu_register fregs[VFREGS];
4928     };
4929    
4930     static scratch_t scratch;
4931    
4932     /********************************************************************
4933     * Support functions exposed to newcpu *
4934     ********************************************************************/
4935    
4936     static inline const char *str_on_off(bool b)
4937     {
4938     return b ? "on" : "off";
4939     }
4940    
4941     void compiler_init(void)
4942     {
4943     static bool initialized = false;
4944     if (initialized)
4945     return;
4946 gbeauche 1.24
4947 gbeauche 1.1 #if JIT_DEBUG
4948     // JIT debug mode ?
4949     JITDebug = PrefsFindBool("jitdebug");
4950     #endif
4951     write_log("<JIT compiler> : enable runtime disassemblers : %s\n", JITDebug ? "yes" : "no");
4952    
4953     #ifdef USE_JIT_FPU
4954     // Use JIT compiler for FPU instructions ?
4955     avoid_fpu = !PrefsFindBool("jitfpu");
4956     #else
4957     // JIT FPU is always disabled
4958     avoid_fpu = true;
4959     #endif
4960     write_log("<JIT compiler> : compile FPU instructions : %s\n", !avoid_fpu ? "yes" : "no");
4961    
4962     // Get size of the translation cache (in KB)
4963     cache_size = PrefsFindInt32("jitcachesize");
4964     write_log("<JIT compiler> : requested translation cache size : %d KB\n", cache_size);
4965    
4966     // Initialize target CPU (check for features, e.g. CMOV, rat stalls)
4967     raw_init_cpu();
4968 gbeauche 1.15 setzflg_uses_bsf = target_check_bsf();
4969 gbeauche 1.1 write_log("<JIT compiler> : target processor has CMOV instructions : %s\n", have_cmov ? "yes" : "no");
4970     write_log("<JIT compiler> : target processor can suffer from partial register stalls : %s\n", have_rat_stall ? "yes" : "no");
4971 gbeauche 1.5 write_log("<JIT compiler> : alignment for loops, jumps are %d, %d\n", align_loops, align_jumps);
4972 gbeauche 1.1
4973     // Translation cache flush mechanism
4974     lazy_flush = PrefsFindBool("jitlazyflush");
4975     write_log("<JIT compiler> : lazy translation cache invalidation : %s\n", str_on_off(lazy_flush));
4976     flush_icache = lazy_flush ? flush_icache_lazy : flush_icache_hard;
4977    
4978     // Compiler features
4979     write_log("<JIT compiler> : register aliasing : %s\n", str_on_off(1));
4980     write_log("<JIT compiler> : FP register aliasing : %s\n", str_on_off(USE_F_ALIAS));
4981     write_log("<JIT compiler> : lazy constant offsetting : %s\n", str_on_off(USE_OFFSET));
4982 gbeauche 1.8 write_log("<JIT compiler> : block inlining : %s\n", str_on_off(USE_INLINING));
4983 gbeauche 1.1 write_log("<JIT compiler> : separate blockinfo allocation : %s\n", str_on_off(USE_SEPARATE_BIA));
4984    
4985     // Build compiler tables
4986     build_comp();
4987    
4988     initialized = true;
4989    
4990 gbeauche 1.9 #if PROFILE_UNTRANSLATED_INSNS
4991     write_log("<JIT compiler> : gather statistics on untranslated insns count\n");
4992     #endif
4993    
4994 gbeauche 1.1 #if PROFILE_COMPILE_TIME
4995     write_log("<JIT compiler> : gather statistics on translation time\n");
4996     emul_start_time = clock();
4997     #endif
4998     }
4999    
5000     void compiler_exit(void)
5001     {
5002     #if PROFILE_COMPILE_TIME
5003     emul_end_time = clock();
5004     #endif
5005    
5006     // Deallocate translation cache
5007     if (compiled_code) {
5008 gbeauche 1.3 vm_release(compiled_code, cache_size * 1024);
5009 gbeauche 1.1 compiled_code = 0;
5010     }
5011 gbeauche 1.24
5012     // Deallocate popallspace
5013     if (popallspace) {
5014     vm_release(popallspace, POPALLSPACE_SIZE);
5015     popallspace = 0;
5016     }
5017 gbeauche 1.1
5018     #if PROFILE_COMPILE_TIME
5019     write_log("### Compile Block statistics\n");
5020     write_log("Number of calls to compile_block : %d\n", compile_count);
5021     uae_u32 emul_time = emul_end_time - emul_start_time;
5022     write_log("Total emulation time : %.1f sec\n", double(emul_time)/double(CLOCKS_PER_SEC));
5023     write_log("Total compilation time : %.1f sec (%.1f%%)\n", double(compile_time)/double(CLOCKS_PER_SEC),
5024     100.0*double(compile_time)/double(emul_time));
5025     write_log("\n");
5026     #endif
5027 gbeauche 1.9
5028     #if PROFILE_UNTRANSLATED_INSNS
5029     uae_u64 untranslated_count = 0;
5030     for (int i = 0; i < 65536; i++) {
5031     opcode_nums[i] = i;
5032     untranslated_count += raw_cputbl_count[i];
5033     }
5034     write_log("Sorting out untranslated instructions count...\n");
5035     qsort(opcode_nums, 65536, sizeof(uae_u16), untranslated_compfn);
5036     write_log("\nRank Opc Count Name\n");
5037     for (int i = 0; i < untranslated_top_ten; i++) {
5038     uae_u32 count = raw_cputbl_count[opcode_nums[i]];
5039     struct instr *dp;
5040     struct mnemolookup *lookup;
5041     if (!count)
5042     break;
5043     dp = table68k + opcode_nums[i];
5044     for (lookup = lookuptab; lookup->mnemo != dp->mnemo; lookup++)
5045     ;
5046     write_log("%03d: %04x %10lu %s\n", i, opcode_nums[i], count, lookup->name);
5047     }
5048     #endif
5049 gbeauche 1.1 }
5050    
5051     bool compiler_use_jit(void)
5052     {
5053     // Check for the "jit" prefs item
5054     if (!PrefsFindBool("jit"))
5055     return false;
5056    
5057     // Don't use JIT if translation cache size is less then MIN_CACHE_SIZE KB
5058     if (PrefsFindInt32("jitcachesize") < MIN_CACHE_SIZE) {
5059     write_log("<JIT compiler> : translation cache size is less than %d KB. Disabling JIT.\n", MIN_CACHE_SIZE);
5060     return false;
5061     }
5062    
5063     // FIXME: there are currently problems with JIT compilation and anything below a 68040
5064     if (CPUType < 4) {
5065     write_log("<JIT compiler> : 68040 emulation is required instead of 680%d0. Disabling JIT.\n", CPUType);
5066     return false;
5067     }
5068    
5069     return true;
5070     }
5071    
5072     void init_comp(void)
5073     {
5074     int i;
5075     uae_s8* cb=can_byte;
5076     uae_s8* cw=can_word;
5077     uae_s8* au=always_used;
5078    
5079     for (i=0;i<VREGS;i++) {
5080     live.state[i].realreg=-1;
5081     live.state[i].needflush=NF_SCRATCH;
5082     live.state[i].val=0;
5083     set_status(i,UNDEF);
5084     }
5085    
5086     for (i=0;i<VFREGS;i++) {
5087     live.fate[i].status=UNDEF;
5088     live.fate[i].realreg=-1;
5089     live.fate[i].needflush=NF_SCRATCH;
5090     }
5091    
5092     for (i=0;i<VREGS;i++) {
5093     if (i<16) { /* First 16 registers map to 68k registers */
5094     live.state[i].mem=((uae_u32*)&regs)+i;
5095     live.state[i].needflush=NF_TOMEM;
5096     set_status(i,INMEM);
5097     }
5098     else
5099     live.state[i].mem=scratch.regs+i;
5100     }
5101     live.state[PC_P].mem=(uae_u32*)&(regs.pc_p);
5102     live.state[PC_P].needflush=NF_TOMEM;
5103 gbeauche 1.24 set_const(PC_P,(uintptr)comp_pc_p);
5104 gbeauche 1.1
5105 gbeauche 1.24 live.state[FLAGX].mem=(uae_u32*)&(regflags.x);
5106 gbeauche 1.1 live.state[FLAGX].needflush=NF_TOMEM;
5107     set_status(FLAGX,INMEM);
5108    
5109 gbeauche 1.24 live.state[FLAGTMP].mem=(uae_u32*)&(regflags.cznv);
5110 gbeauche 1.1 live.state[FLAGTMP].needflush=NF_TOMEM;
5111     set_status(FLAGTMP,INMEM);
5112    
5113     live.state[NEXT_HANDLER].needflush=NF_HANDLER;
5114     set_status(NEXT_HANDLER,UNDEF);
5115    
5116     for (i=0;i<VFREGS;i++) {
5117     if (i<8) { /* First 8 registers map to 68k FPU registers */
5118     live.fate[i].mem=(uae_u32*)fpu_register_address(i);
5119     live.fate[i].needflush=NF_TOMEM;
5120     live.fate[i].status=INMEM;
5121     }
5122     else if (i==FP_RESULT) {
5123     live.fate[i].mem=(uae_u32*)(&fpu.result);
5124     live.fate[i].needflush=NF_TOMEM;
5125     live.fate[i].status=INMEM;
5126     }
5127     else
5128 gbeauche 1.25 live.fate[i].mem=(uae_u32*)(&scratch.fregs[i]);
5129 gbeauche 1.1 }
5130    
5131    
5132     for (i=0;i<N_REGS;i++) {
5133     live.nat[i].touched=0;
5134     live.nat[i].nholds=0;
5135     live.nat[i].locked=0;
5136     if (*cb==i) {
5137     live.nat[i].canbyte=1; cb++;
5138     } else live.nat[i].canbyte=0;
5139     if (*cw==i) {
5140     live.nat[i].canword=1; cw++;
5141     } else live.nat[i].canword=0;
5142     if (*au==i) {
5143     live.nat[i].locked=1; au++;
5144     }
5145     }
5146    
5147     for (i=0;i<N_FREGS;i++) {
5148     live.fat[i].touched=0;
5149     live.fat[i].nholds=0;
5150     live.fat[i].locked=0;
5151     }
5152    
5153     touchcnt=1;
5154     m68k_pc_offset=0;
5155     live.flags_in_flags=TRASH;
5156     live.flags_on_stack=VALID;
5157     live.flags_are_important=1;
5158    
5159     raw_fp_init();
5160     }
5161    
5162     /* Only do this if you really mean it! The next call should be to init!*/
5163     void flush(int save_regs)
5164     {
5165     int fi,i;
5166    
5167     log_flush();
5168     flush_flags(); /* low level */
5169     sync_m68k_pc(); /* mid level */
5170    
5171     if (save_regs) {
5172     for (i=0;i<VFREGS;i++) {
5173     if (live.fate[i].needflush==NF_SCRATCH ||
5174     live.fate[i].status==CLEAN) {
5175     f_disassociate(i);
5176     }
5177     }
5178     for (i=0;i<VREGS;i++) {
5179     if (live.state[i].needflush==NF_TOMEM) {
5180     switch(live.state[i].status) {
5181     case INMEM:
5182     if (live.state[i].val) {
5183 gbeauche 1.24 raw_add_l_mi((uintptr)live.state[i].mem,live.state[i].val);
5184 gbeauche 1.1 log_vwrite(i);
5185     live.state[i].val=0;
5186     }
5187     break;
5188     case CLEAN:
5189     case DIRTY:
5190     remove_offset(i,-1); tomem(i); break;
5191     case ISCONST:
5192     if (i!=PC_P)
5193     writeback_const(i);
5194     break;
5195     default: break;
5196     }
5197     Dif (live.state[i].val && i!=PC_P) {
5198     write_log("Register %d still has val %x\n",
5199     i,live.state[i].val);
5200     }
5201     }
5202     }
5203     for (i=0;i<VFREGS;i++) {
5204     if (live.fate[i].needflush==NF_TOMEM &&
5205     live.fate[i].status==DIRTY) {
5206     f_evict(i);
5207     }
5208     }
5209     raw_fp_cleanup_drop();
5210     }
5211     if (needflags) {
5212     write_log("Warning! flush with needflags=1!\n");
5213     }
5214     }
5215    
5216     static void flush_keepflags(void)
5217     {
5218     int fi,i;
5219    
5220     for (i=0;i<VFREGS;i++) {
5221     if (live.fate[i].needflush==NF_SCRATCH ||
5222     live.fate[i].status==CLEAN) {
5223     f_disassociate(i);
5224     }
5225     }
5226     for (i=0;i<VREGS;i++) {
5227     if (live.state[i].needflush==NF_TOMEM) {
5228     switch(live.state[i].status) {
5229     case INMEM:
5230     /* Can't adjust the offset here --- that needs "add" */
5231     break;
5232     case CLEAN:
5233     case DIRTY:
5234     remove_offset(i,-1); tomem(i); break;
5235     case ISCONST:
5236     if (i!=PC_P)
5237     writeback_const(i);
5238     break;
5239     default: break;
5240     }
5241     }
5242     }
5243     for (i=0;i<VFREGS;i++) {
5244     if (live.fate[i].needflush==NF_TOMEM &&
5245     live.fate[i].status==DIRTY) {
5246     f_evict(i);
5247     }
5248     }
5249     raw_fp_cleanup_drop();
5250     }
5251    
5252     void freescratch(void)
5253     {
5254     int i;
5255     for (i=0;i<N_REGS;i++)
5256     if (live.nat[i].locked && i!=4)
5257     write_log("Warning! %d is locked\n",i);
5258    
5259     for (i=0;i<VREGS;i++)
5260     if (live.state[i].needflush==NF_SCRATCH) {
5261     forget_about(i);
5262     }
5263    
5264     for (i=0;i<VFREGS;i++)
5265     if (live.fate[i].needflush==NF_SCRATCH) {
5266     f_forget_about(i);
5267     }
5268     }
5269    
5270     /********************************************************************
5271     * Support functions, internal *
5272     ********************************************************************/
5273    
5274    
5275     static void align_target(uae_u32 a)
5276     {
5277 gbeauche 1.14 if (!a)
5278     return;
5279    
5280 gbeauche 1.12 if (tune_nop_fillers)
5281 gbeauche 1.24 raw_emit_nop_filler(a - (((uintptr)target) & (a - 1)));
5282 gbeauche 1.12 else {
5283     /* Fill with NOPs --- makes debugging with gdb easier */
5284 gbeauche 1.24 while ((uintptr)target&(a-1))
5285 gbeauche 1.12 *target++=0x90;
5286     }
5287 gbeauche 1.1 }
5288    
5289     static __inline__ int isinrom(uintptr addr)
5290     {
5291     return ((addr >= (uintptr)ROMBaseHost) && (addr < (uintptr)ROMBaseHost + ROMSize));
5292     }
5293    
5294     static void flush_all(void)
5295     {
5296     int i;
5297    
5298     log_flush();
5299     for (i=0;i<VREGS;i++)
5300     if (live.state[i].status==DIRTY) {
5301     if (!call_saved[live.state[i].realreg]) {
5302     tomem(i);
5303     }
5304     }
5305     for (i=0;i<VFREGS;i++)
5306     if (f_isinreg(i))
5307     f_evict(i);
5308     raw_fp_cleanup_drop();
5309     }
5310    
5311     /* Make sure all registers that will get clobbered by a call are
5312     save and sound in memory */
5313     static void prepare_for_call_1(void)
5314     {
5315     flush_all(); /* If there are registers that don't get clobbered,
5316     * we should be a bit more selective here */
5317     }
5318    
5319     /* We will call a C routine in a moment. That will clobber all registers,
5320     so we need to disassociate everything */
5321     static void prepare_for_call_2(void)
5322     {
5323     int i;
5324     for (i=0;i<N_REGS;i++)
5325     if (!call_saved[i] && live.nat[i].nholds>0)
5326     free_nreg(i);
5327    
5328     for (i=0;i<N_FREGS;i++)
5329     if (live.fat[i].nholds>0)
5330     f_free_nreg(i);
5331    
5332     live.flags_in_flags=TRASH; /* Note: We assume we already rescued the
5333     flags at the very start of the call_r
5334     functions! */
5335     }
5336    
5337     /********************************************************************
5338     * Memory access and related functions, CREATE time *
5339     ********************************************************************/
5340    
5341     void register_branch(uae_u32 not_taken, uae_u32 taken, uae_u8 cond)
5342     {
5343     next_pc_p=not_taken;
5344     taken_pc_p=taken;
5345     branch_cc=cond;
5346     }
5347    
5348    
5349     static uae_u32 get_handler_address(uae_u32 addr)
5350     {
5351     uae_u32 cl=cacheline(addr);
5352 gbeauche 1.24 blockinfo* bi=get_blockinfo_addr_new((void*)(uintptr)addr,0);
5353     return (uintptr)&(bi->direct_handler_to_use);
5354 gbeauche 1.1 }
5355    
5356     static uae_u32 get_handler(uae_u32 addr)
5357     {
5358     uae_u32 cl=cacheline(addr);
5359 gbeauche 1.24 blockinfo* bi=get_blockinfo_addr_new((void*)(uintptr)addr,0);
5360     return (uintptr)bi->direct_handler_to_use;
5361 gbeauche 1.1 }
5362    
5363     static void load_handler(int reg, uae_u32 addr)
5364     {
5365     mov_l_rm(reg,get_handler_address(addr));
5366     }
5367    
5368     /* This version assumes that it is writing *real* memory, and *will* fail
5369     * if that assumption is wrong! No branches, no second chances, just
5370     * straight go-for-it attitude */
5371    
5372 gbeauche 1.24 static void writemem_real(int address, int source, int size, int tmp, int clobber)
5373 gbeauche 1.1 {
5374     int f=tmp;
5375    
5376     if (clobber)
5377     f=source;
5378 gbeauche 1.24
5379     #if SIZEOF_VOID_P == 8
5380 gbeauche 1.26 if (!ThirtyThreeBitAddressing)
5381     sign_extend_32_rr(address, address);
5382 gbeauche 1.24 #endif
5383    
5384 gbeauche 1.1 switch(size) {
5385     case 1: mov_b_bRr(address,source,MEMBaseDiff); break;
5386     case 2: mov_w_rr(f,source); bswap_16(f); mov_w_bRr(address,f,MEMBaseDiff); break;
5387     case 4: mov_l_rr(f,source); bswap_32(f); mov_l_bRr(address,f,MEMBaseDiff); break;
5388     }
5389     forget_about(tmp);
5390     forget_about(f);
5391     }
5392    
5393     void writebyte(int address, int source, int tmp)
5394     {
5395 gbeauche 1.24 writemem_real(address,source,1,tmp,0);
5396 gbeauche 1.1 }
5397    
5398     static __inline__ void writeword_general(int address, int source, int tmp,
5399     int clobber)
5400     {
5401 gbeauche 1.24 writemem_real(address,source,2,tmp,clobber);
5402 gbeauche 1.1 }
5403    
5404     void writeword_clobber(int address, int source, int tmp)
5405     {
5406     writeword_general(address,source,tmp,1);
5407     }
5408    
5409     void writeword(int address, int source, int tmp)
5410     {
5411     writeword_general(address,source,tmp,0);
5412     }
5413    
5414     static __inline__ void writelong_general(int address, int source, int tmp,
5415     int clobber)
5416     {
5417 gbeauche 1.24 writemem_real(address,source,4,tmp,clobber);
5418 gbeauche 1.1 }
5419    
5420     void writelong_clobber(int address, int source, int tmp)
5421     {
5422     writelong_general(address,source,tmp,1);
5423     }
5424    
5425     void writelong(int address, int source, int tmp)
5426     {
5427     writelong_general(address,source,tmp,0);
5428     }
5429    
5430    
5431    
5432     /* This version assumes that it is reading *real* memory, and *will* fail
5433     * if that assumption is wrong! No branches, no second chances, just
5434     * straight go-for-it attitude */
5435    
5436 gbeauche 1.24 static void readmem_real(int address, int dest, int size, int tmp)
5437 gbeauche 1.1 {
5438     int f=tmp;
5439    
5440     if (size==4 && address!=dest)
5441     f=dest;
5442    
5443 gbeauche 1.24 #if SIZEOF_VOID_P == 8
5444 gbeauche 1.26 if (!ThirtyThreeBitAddressing)
5445     sign_extend_32_rr(address, address);
5446 gbeauche 1.24 #endif
5447    
5448 gbeauche 1.1 switch(size) {
5449     case 1: mov_b_brR(dest,address,MEMBaseDiff); break;
5450     case 2: mov_w_brR(dest,address,MEMBaseDiff); bswap_16(dest); break;
5451     case 4: mov_l_brR(dest,address,MEMBaseDiff); bswap_32(dest); break;
5452     }
5453     forget_about(tmp);
5454     }
5455    
5456     void readbyte(int address, int dest, int tmp)
5457     {
5458 gbeauche 1.24 readmem_real(address,dest,1,tmp);
5459 gbeauche 1.1 }
5460    
5461     void readword(int address, int dest, int tmp)
5462     {
5463 gbeauche 1.24 readmem_real(address,dest,2,tmp);
5464 gbeauche 1.1 }
5465    
5466     void readlong(int address, int dest, int tmp)
5467     {
5468 gbeauche 1.24 readmem_real(address,dest,4,tmp);
5469 gbeauche 1.1 }
5470    
5471     void get_n_addr(int address, int dest, int tmp)
5472     {
5473     // a is the register containing the virtual address
5474     // after the offset had been fetched
5475     int a=tmp;
5476    
5477     // f is the register that will contain the offset
5478     int f=tmp;
5479    
5480     // a == f == tmp if (address == dest)
5481     if (address!=dest) {
5482     a=address;
5483     f=dest;
5484     }
5485    
5486     #if REAL_ADDRESSING
5487     mov_l_rr(dest, address);
5488     #elif DIRECT_ADDRESSING
5489     lea_l_brr(dest,address,MEMBaseDiff);
5490     #endif
5491     forget_about(tmp);
5492     }
5493    
5494     void get_n_addr_jmp(int address, int dest, int tmp)
5495     {
5496     /* For this, we need to get the same address as the rest of UAE
5497     would --- otherwise we end up translating everything twice */
5498     get_n_addr(address,dest,tmp);
5499     }
5500    
5501    
5502     /* base is a register, but dp is an actual value.
5503     target is a register, as is tmp */
5504     void calc_disp_ea_020(int base, uae_u32 dp, int target, int tmp)
5505     {
5506     int reg = (dp >> 12) & 15;
5507     int regd_shift=(dp >> 9) & 3;
5508    
5509     if (dp & 0x100) {
5510     int ignorebase=(dp&0x80);
5511     int ignorereg=(dp&0x40);
5512     int addbase=0;
5513     int outer=0;
5514    
5515     if ((dp & 0x30) == 0x20) addbase = (uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
5516     if ((dp & 0x30) == 0x30) addbase = comp_get_ilong((m68k_pc_offset+=4)-4);
5517    
5518     if ((dp & 0x3) == 0x2) outer = (uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
5519     if ((dp & 0x3) == 0x3) outer = comp_get_ilong((m68k_pc_offset+=4)-4);
5520    
5521     if ((dp & 0x4) == 0) { /* add regd *before* the get_long */
5522     if (!ignorereg) {
5523     if ((dp & 0x800) == 0)
5524     sign_extend_16_rr(target,reg);
5525     else
5526     mov_l_rr(target,reg);
5527     shll_l_ri(target,regd_shift);
5528     }
5529     else
5530     mov_l_ri(target,0);
5531    
5532     /* target is now regd */
5533     if (!ignorebase)
5534     add_l(target,base);
5535     add_l_ri(target,addbase);
5536     if (dp&0x03) readlong(target,target,tmp);
5537     } else { /* do the getlong first, then add regd */
5538     if (!ignorebase) {
5539     mov_l_rr(target,base);
5540     add_l_ri(target,addbase);
5541     }
5542     else
5543     mov_l_ri(target,addbase);
5544     if (dp&0x03) readlong(target,target,tmp);
5545    
5546     if (!ignorereg) {
5547     if ((dp & 0x800) == 0)
5548     sign_extend_16_rr(tmp,reg);
5549     else
5550     mov_l_rr(tmp,reg);
5551     shll_l_ri(tmp,regd_shift);
5552     /* tmp is now regd */
5553     add_l(target,tmp);
5554     }
5555     }
5556     add_l_ri(target,outer);
5557     }
5558     else { /* 68000 version */
5559     if ((dp & 0x800) == 0) { /* Sign extend */
5560     sign_extend_16_rr(target,reg);
5561     lea_l_brr_indexed(target,base,target,1<<regd_shift,(uae_s32)((uae_s8)dp));
5562     }
5563     else {
5564     lea_l_brr_indexed(target,base,reg,1<<regd_shift,(uae_s32)((uae_s8)dp));
5565     }
5566     }
5567     forget_about(tmp);
5568     }
5569    
5570    
5571    
5572    
5573    
5574     void set_cache_state(int enabled)
5575     {
5576     if (enabled!=letit)
5577     flush_icache_hard(77);
5578     letit=enabled;
5579     }
5580    
5581     int get_cache_state(void)
5582     {
5583     return letit;
5584     }
5585    
5586     uae_u32 get_jitted_size(void)
5587     {
5588     if (compiled_code)
5589     return current_compile_p-compiled_code;
5590     return 0;
5591     }
5592    
5593 gbeauche 1.20 const int CODE_ALLOC_MAX_ATTEMPTS = 10;
5594     const int CODE_ALLOC_BOUNDARIES = 128 * 1024; // 128 KB
5595    
5596     static uint8 *do_alloc_code(uint32 size, int depth)
5597     {
5598     #if defined(__linux__) && 0
5599     /*
5600     This is a really awful hack that is known to work on Linux at
5601     least.
5602    
5603     The trick here is to make sure the allocated cache is nearby
5604     code segment, and more precisely in the positive half of a
5605     32-bit address space. i.e. addr < 0x80000000. Actually, it
5606     turned out that a 32-bit binary run on AMD64 yields a cache
5607     allocated around 0xa0000000, thus causing some troubles when
5608     translating addresses from m68k to x86.
5609     */
5610     static uint8 * code_base = NULL;
5611     if (code_base == NULL) {
5612     uintptr page_size = getpagesize();
5613     uintptr boundaries = CODE_ALLOC_BOUNDARIES;
5614     if (boundaries < page_size)
5615     boundaries = page_size;
5616     code_base = (uint8 *)sbrk(0);
5617     for (int attempts = 0; attempts < CODE_ALLOC_MAX_ATTEMPTS; attempts++) {
5618     if (vm_acquire_fixed(code_base, size) == 0) {
5619     uint8 *code = code_base;
5620     code_base += size;
5621     return code;
5622     }
5623     code_base += boundaries;
5624     }
5625     return NULL;
5626     }
5627    
5628     if (vm_acquire_fixed(code_base, size) == 0) {
5629     uint8 *code = code_base;
5630     code_base += size;
5631     return code;
5632     }
5633    
5634     if (depth >= CODE_ALLOC_MAX_ATTEMPTS)
5635     return NULL;
5636    
5637     return do_alloc_code(size, depth + 1);
5638     #else
5639     uint8 *code = (uint8 *)vm_acquire(size);
5640     return code == VM_MAP_FAILED ? NULL : code;
5641     #endif
5642     }
5643    
5644     static inline uint8 *alloc_code(uint32 size)
5645     {
5646     return do_alloc_code(size, 0);
5647     }
5648    
5649 gbeauche 1.1 void alloc_cache(void)
5650     {
5651     if (compiled_code) {
5652     flush_icache_hard(6);
5653 gbeauche 1.3 vm_release(compiled_code, cache_size * 1024);
5654 gbeauche 1.1 compiled_code = 0;
5655     }
5656    
5657     if (cache_size == 0)
5658     return;
5659    
5660     while (!compiled_code && cache_size) {
5661 gbeauche 1.20 if ((compiled_code = alloc_code(cache_size * 1024)) == NULL) {
5662 gbeauche 1.1 compiled_code = 0;
5663     cache_size /= 2;
5664     }
5665     }
5666 gbeauche 1.25 vm_protect(compiled_code, cache_size * 1024, VM_PAGE_READ | VM_PAGE_WRITE | VM_PAGE_EXECUTE);
5667 gbeauche 1.1
5668     if (compiled_code) {
5669     write_log("<JIT compiler> : actual translation cache size : %d KB at 0x%08X\n", cache_size, compiled_code);
5670     max_compile_start = compiled_code + cache_size*1024 - BYTES_PER_INST;
5671     current_compile_p = compiled_code;
5672     current_cache_size = 0;
5673     }
5674     }
5675    
5676    
5677    
5678 gbeauche 1.13 extern void op_illg_1 (uae_u32 opcode) REGPARAM;
5679 gbeauche 1.1
5680 gbeauche 1.8 static void calc_checksum(blockinfo* bi, uae_u32* c1, uae_u32* c2)
5681 gbeauche 1.1 {
5682 gbeauche 1.8 uae_u32 k1 = 0;
5683     uae_u32 k2 = 0;
5684    
5685     #if USE_CHECKSUM_INFO
5686     checksum_info *csi = bi->csi;
5687     Dif(!csi) abort();
5688     while (csi) {
5689     uae_s32 len = csi->length;
5690 gbeauche 1.24 uintptr tmp = (uintptr)csi->start_p;
5691 gbeauche 1.8 #else
5692     uae_s32 len = bi->len;
5693 gbeauche 1.24 uintptr tmp = (uintptr)bi->min_pcp;
5694 gbeauche 1.8 #endif
5695     uae_u32*pos;
5696 gbeauche 1.1
5697 gbeauche 1.8 len += (tmp & 3);
5698 gbeauche 1.24 tmp &= ~((uintptr)3);
5699 gbeauche 1.8 pos = (uae_u32 *)tmp;
5700    
5701     if (len >= 0 && len <= MAX_CHECKSUM_LEN) {
5702     while (len > 0) {
5703     k1 += *pos;
5704     k2 ^= *pos;
5705     pos++;
5706     len -= 4;
5707     }
5708     }
5709 gbeauche 1.1
5710 gbeauche 1.8 #if USE_CHECKSUM_INFO
5711     csi = csi->next;
5712 gbeauche 1.1 }
5713 gbeauche 1.8 #endif
5714    
5715     *c1 = k1;
5716     *c2 = k2;
5717 gbeauche 1.1 }
5718    
5719 gbeauche 1.8 #if 0
5720 gbeauche 1.7 static void show_checksum(CSI_TYPE* csi)
5721 gbeauche 1.1 {
5722     uae_u32 k1=0;
5723     uae_u32 k2=0;
5724 gbeauche 1.7 uae_s32 len=CSI_LENGTH(csi);
5725 gbeauche 1.24 uae_u32 tmp=(uintptr)CSI_START_P(csi);
5726 gbeauche 1.1 uae_u32* pos;
5727    
5728     len+=(tmp&3);
5729     tmp&=(~3);
5730     pos=(uae_u32*)tmp;
5731    
5732     if (len<0 || len>MAX_CHECKSUM_LEN) {
5733     return;
5734     }
5735     else {
5736     while (len>0) {
5737     write_log("%08x ",*pos);
5738     pos++;
5739     len-=4;
5740     }
5741     write_log(" bla\n");
5742     }
5743     }
5744 gbeauche 1.8 #endif
5745 gbeauche 1.1
5746    
5747     int check_for_cache_miss(void)
5748     {
5749     blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5750    
5751     if (bi) {
5752     int cl=cacheline(regs.pc_p);
5753     if (bi!=cache_tags[cl+1].bi) {
5754     raise_in_cl_list(bi);
5755     return 1;
5756     }
5757     }
5758     return 0;
5759     }
5760    
5761    
5762     static void recompile_block(void)
5763     {
5764     /* An existing block's countdown code has expired. We need to make
5765     sure that execute_normal doesn't refuse to recompile due to a
5766     perceived cache miss... */
5767     blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5768    
5769     Dif (!bi)
5770     abort();
5771     raise_in_cl_list(bi);
5772     execute_normal();
5773     return;
5774     }
5775     static void cache_miss(void)
5776     {
5777     blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5778     uae_u32 cl=cacheline(regs.pc_p);
5779     blockinfo* bi2=get_blockinfo(cl);
5780    
5781     if (!bi) {
5782     execute_normal(); /* Compile this block now */
5783     return;
5784     }
5785     Dif (!bi2 || bi==bi2) {
5786     write_log("Unexplained cache miss %p %p\n",bi,bi2);
5787     abort();
5788     }
5789     raise_in_cl_list(bi);
5790     return;
5791     }
5792    
5793     static int called_check_checksum(blockinfo* bi);
5794    
5795     static inline int block_check_checksum(blockinfo* bi)
5796     {
5797     uae_u32 c1,c2;
5798 gbeauche 1.7 bool isgood;
5799 gbeauche 1.1
5800     if (bi->status!=BI_NEED_CHECK)
5801     return 1; /* This block is in a checked state */
5802    
5803     checksum_count++;
5804 gbeauche 1.7
5805 gbeauche 1.1 if (bi->c1 || bi->c2)
5806     calc_checksum(bi,&c1,&c2);
5807     else {
5808     c1=c2=1; /* Make sure it doesn't match */
5809 gbeauche 1.7 }
5810 gbeauche 1.1
5811     isgood=(c1==bi->c1 && c2==bi->c2);
5812 gbeauche 1.7
5813 gbeauche 1.1 if (isgood) {
5814     /* This block is still OK. So we reactivate. Of course, that
5815     means we have to move it into the needs-to-be-flushed list */
5816     bi->handler_to_use=bi->handler;
5817     set_dhtu(bi,bi->direct_handler);
5818     bi->status=BI_CHECKING;
5819     isgood=called_check_checksum(bi);
5820     }
5821     if (isgood) {
5822     /* write_log("reactivate %p/%p (%x %x/%x %x)\n",bi,bi->pc_p,
5823     c1,c2,bi->c1,bi->c2);*/
5824     remove_from_list(bi);
5825     add_to_active(bi);
5826     raise_in_cl_list(bi);
5827     bi->status=BI_ACTIVE;
5828     }
5829     else {
5830     /* This block actually changed. We need to invalidate it,
5831     and set it up to be recompiled */
5832     /* write_log("discard %p/%p (%x %x/%x %x)\n",bi,bi->pc_p,
5833     c1,c2,bi->c1,bi->c2); */
5834     invalidate_block(bi);
5835     raise_in_cl_list(bi);
5836     }
5837     return isgood;
5838     }
5839    
5840     static int called_check_checksum(blockinfo* bi)
5841     {
5842     dependency* x=bi->deplist;
5843     int isgood=1;
5844     int i;
5845    
5846     for (i=0;i<2 && isgood;i++) {
5847     if (bi->dep[i].jmp_off) {
5848     isgood=block_check_checksum(bi->dep[i].target);
5849     }
5850     }
5851     return isgood;
5852     }
5853    
5854     static void check_checksum(void)
5855     {
5856     blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5857     uae_u32 cl=cacheline(regs.pc_p);
5858     blockinfo* bi2=get_blockinfo(cl);
5859    
5860     /* These are not the droids you are looking for... */
5861     if (!bi) {
5862     /* Whoever is the primary target is in a dormant state, but
5863     calling it was accidental, and we should just compile this
5864     new block */
5865     execute_normal();
5866     return;
5867     }
5868     if (bi!=bi2) {
5869     /* The block was hit accidentally, but it does exist. Cache miss */
5870     cache_miss();
5871     return;
5872     }
5873    
5874     if (!block_check_checksum(bi))
5875     execute_normal();
5876     }
5877    
5878     static __inline__ void match_states(blockinfo* bi)
5879     {
5880     int i;
5881     smallstate* s=&(bi->env);
5882    
5883     if (bi->status==BI_NEED_CHECK) {
5884     block_check_checksum(bi);
5885     }
5886     if (bi->status==BI_ACTIVE ||
5887     bi->status==BI_FINALIZING) { /* Deal with the *promises* the
5888     block makes (about not using
5889     certain vregs) */
5890     for (i=0;i<16;i++) {
5891     if (s->virt[i]==L_UNNEEDED) {
5892     // write_log("unneeded reg %d at %p\n",i,target);
5893     COMPCALL(forget_about)(i); // FIXME
5894     }
5895     }
5896     }
5897     flush(1);
5898    
5899     /* And now deal with the *demands* the block makes */
5900     for (i=0;i<N_REGS;i++) {
5901     int v=s->nat[i];
5902     if (v>=0) {
5903     // printf("Loading reg %d into %d at %p\n",v,i,target);
5904     readreg_specific(v,4,i);
5905     // do_load_reg(i,v);
5906     // setlock(i);
5907     }
5908     }
5909     for (i=0;i<N_REGS;i++) {
5910     int v=s->nat[i];
5911     if (v>=0) {
5912     unlock2(i);
5913     }
5914     }
5915     }
5916    
5917     static __inline__ void create_popalls(void)
5918     {
5919     int i,r;
5920    
5921 gbeauche 1.24 if ((popallspace = alloc_code(POPALLSPACE_SIZE)) == NULL) {
5922     write_log("FATAL: Could not allocate popallspace!\n");
5923     abort();
5924     }
5925     vm_protect(popallspace, POPALLSPACE_SIZE, VM_PAGE_READ | VM_PAGE_WRITE);
5926    
5927 gbeauche 1.1 current_compile_p=popallspace;
5928     set_target(current_compile_p);
5929     #if USE_PUSH_POP
5930     /* If we can't use gcc inline assembly, we need to pop some
5931     registers before jumping back to the various get-out routines.
5932     This generates the code for it.
5933     */
5934 gbeauche 1.5 align_target(align_jumps);
5935     popall_do_nothing=get_target();
5936 gbeauche 1.1 for (i=0;i<N_REGS;i++) {
5937     if (need_to_preserve[i])
5938     raw_pop_l_r(i);
5939     }
5940 gbeauche 1.24 raw_jmp((uintptr)do_nothing);
5941 gbeauche 1.1
5942 gbeauche 1.5 align_target(align_jumps);
5943 gbeauche 1.1 popall_execute_normal=get_target();
5944     for (i=0;i<N_REGS;i++) {
5945     if (need_to_preserve[i])
5946     raw_pop_l_r(i);
5947     }
5948 gbeauche 1.24 raw_jmp((uintptr)execute_normal);
5949 gbeauche 1.1
5950 gbeauche 1.5 align_target(align_jumps);
5951 gbeauche 1.1 popall_cache_miss=get_target();
5952     for (i=0;i<N_REGS;i++) {
5953     if (need_to_preserve[i])
5954     raw_pop_l_r(i);
5955     }
5956 gbeauche 1.24 raw_jmp((uintptr)cache_miss);
5957 gbeauche 1.1
5958 gbeauche 1.5 align_target(align_jumps);
5959 gbeauche 1.1 popall_recompile_block=get_target();
5960     for (i=0;i<N_REGS;i++) {
5961     if (need_to_preserve[i])
5962     raw_pop_l_r(i);
5963     }
5964 gbeauche 1.24 raw_jmp((uintptr)recompile_block);
5965 gbeauche 1.5
5966     align_target(align_jumps);
5967 gbeauche 1.1 popall_exec_nostats=get_target();
5968     for (i=0;i<N_REGS;i++) {
5969     if (need_to_preserve[i])
5970     raw_pop_l_r(i);
5971     }
5972 gbeauche 1.24 raw_jmp((uintptr)exec_nostats);
5973 gbeauche 1.5
5974     align_target(align_jumps);
5975 gbeauche 1.1 popall_check_checksum=get_target();
5976     for (i=0;i<N_REGS;i++) {
5977     if (need_to_preserve[i])
5978     raw_pop_l_r(i);
5979     }
5980 gbeauche 1.24 raw_jmp((uintptr)check_checksum);
5981 gbeauche 1.5
5982     align_target(align_jumps);
5983 gbeauche 1.1 current_compile_p=get_target();
5984     #else
5985     popall_exec_nostats=(void *)exec_nostats;
5986     popall_execute_normal=(void *)execute_normal;
5987     popall_cache_miss=(void *)cache_miss;
5988     popall_recompile_block=(void *)recompile_block;
5989     popall_do_nothing=(void *)do_nothing;
5990     popall_check_checksum=(void *)check_checksum;
5991     #endif
5992    
5993     /* And now, the code to do the matching pushes and then jump
5994     into a handler routine */
5995     pushall_call_handler=get_target();
5996     #if USE_PUSH_POP
5997     for (i=N_REGS;i--;) {
5998     if (need_to_preserve[i])
5999     raw_push_l_r(i);
6000     }
6001     #endif
6002     r=REG_PC_TMP;
6003 gbeauche 1.24 raw_mov_l_rm(r,(uintptr)&regs.pc_p);
6004 gbeauche 1.1 raw_and_l_ri(r,TAGMASK);
6005 gbeauche 1.24 raw_jmp_m_indexed((uintptr)cache_tags,r,SIZEOF_VOID_P);
6006 gbeauche 1.6
6007 gbeauche 1.24 #if defined(X86_ASSEMBLY) || defined(X86_64_ASSEMBLY)
6008 gbeauche 1.6 align_target(align_jumps);
6009     m68k_compile_execute = (void (*)(void))get_target();
6010     for (i=N_REGS;i--;) {
6011     if (need_to_preserve[i])
6012     raw_push_l_r(i);
6013     }
6014     align_target(align_loops);
6015 gbeauche 1.24 uae_u32 dispatch_loop = (uintptr)get_target();
6016 gbeauche 1.6 r=REG_PC_TMP;
6017 gbeauche 1.24 raw_mov_l_rm(r,(uintptr)&regs.pc_p);
6018 gbeauche 1.6 raw_and_l_ri(r,TAGMASK);
6019 gbeauche 1.24 raw_call_m_indexed((uintptr)cache_tags,r,SIZEOF_VOID_P);
6020     raw_cmp_l_mi((uintptr)&regs.spcflags,0);
6021 gbeauche 1.6 raw_jcc_b_oponly(NATIVE_CC_EQ);
6022 gbeauche 1.24 emit_byte(dispatch_loop-((uintptr)get_target()+1));
6023     raw_call((uintptr)m68k_do_specialties);
6024 gbeauche 1.6 raw_test_l_rr(REG_RESULT,REG_RESULT);
6025     raw_jcc_b_oponly(NATIVE_CC_EQ);
6026 gbeauche 1.24 emit_byte(dispatch_loop-((uintptr)get_target()+1));
6027     raw_cmp_b_mi((uintptr)&quit_program,0);
6028 gbeauche 1.6 raw_jcc_b_oponly(NATIVE_CC_EQ);
6029 gbeauche 1.24 emit_byte(dispatch_loop-((uintptr)get_target()+1));
6030 gbeauche 1.6 for (i=0;i<N_REGS;i++) {
6031     if (need_to_preserve[i])
6032     raw_pop_l_r(i);
6033     }
6034     raw_ret();
6035     #endif
6036 gbeauche 1.24
6037     // no need to further write into popallspace
6038     vm_protect(popallspace, POPALLSPACE_SIZE, VM_PAGE_READ | VM_PAGE_EXECUTE);
6039 gbeauche 1.1 }
6040    
6041     static __inline__ void reset_lists(void)
6042     {
6043     int i;
6044    
6045     for (i=0;i<MAX_HOLD_BI;i++)
6046     hold_bi[i]=NULL;
6047     active=NULL;
6048     dormant=NULL;
6049     }
6050    
6051     static void prepare_block(blockinfo* bi)
6052     {
6053     int i;
6054    
6055     set_target(current_compile_p);
6056 gbeauche 1.5 align_target(align_jumps);
6057 gbeauche 1.1 bi->direct_pen=(cpuop_func *)get_target();
6058 gbeauche 1.24 raw_mov_l_rm(0,(uintptr)&(bi->pc_p));
6059     raw_mov_l_mr((uintptr)&regs.pc_p,0);
6060     raw_jmp((uintptr)popall_execute_normal);
6061 gbeauche 1.1
6062 gbeauche 1.5 align_target(align_jumps);
6063 gbeauche 1.1 bi->direct_pcc=(cpuop_func *)get_target();
6064 gbeauche 1.24 raw_mov_l_rm(0,(uintptr)&(bi->pc_p));
6065     raw_mov_l_mr((uintptr)&regs.pc_p,0);
6066     raw_jmp((uintptr)popall_check_checksum);
6067 gbeauche 1.1 current_compile_p=get_target();
6068    
6069     bi->deplist=NULL;
6070     for (i=0;i<2;i++) {
6071     bi->dep[i].prev_p=NULL;
6072     bi->dep[i].next=NULL;
6073     }
6074     bi->env=default_ss;
6075     bi->status=BI_INVALID;
6076     bi->havestate=0;
6077     //bi->env=empty_ss;
6078     }
6079    
6080 gbeauche 1.21 // OPCODE is in big endian format, use cft_map() beforehand, if needed.
6081     static inline void reset_compop(int opcode)
6082 gbeauche 1.17 {
6083 gbeauche 1.21 compfunctbl[opcode] = NULL;
6084     nfcompfunctbl[opcode] = NULL;
6085     }
6086    
6087     static int read_opcode(const char *p)
6088     {
6089     int opcode = 0;
6090     for (int i = 0; i < 4; i++) {
6091     int op = p[i];
6092     switch (op) {
6093     case '0': case '1': case '2': case '3': case '4':
6094     case '5': case '6': case '7': case '8': case '9':
6095     opcode = (opcode << 4) | (op - '0');
6096     break;
6097     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
6098     opcode = (opcode << 4) | ((op - 'a') + 10);
6099     break;
6100     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
6101     opcode = (opcode << 4) | ((op - 'A') + 10);
6102     break;
6103     default:
6104     return -1;
6105     }
6106     }
6107     return opcode;
6108     }
6109    
6110     static bool merge_blacklist()
6111     {
6112     const char *blacklist = PrefsFindString("jitblacklist");
6113     if (blacklist) {
6114     const char *p = blacklist;
6115     for (;;) {
6116     if (*p == 0)
6117     return true;
6118    
6119     int opcode1 = read_opcode(p);
6120     if (opcode1 < 0)
6121     return false;
6122     p += 4;
6123    
6124     int opcode2 = opcode1;
6125     if (*p == '-') {
6126     p++;
6127     opcode2 = read_opcode(p);
6128     if (opcode2 < 0)
6129     return false;
6130     p += 4;
6131     }
6132    
6133     if (*p == 0 || *p == ';') {
6134     write_log("<JIT compiler> : blacklist opcodes : %04x-%04x\n", opcode1, opcode2);
6135     for (int opcode = opcode1; opcode <= opcode2; opcode++)
6136     reset_compop(cft_map(opcode));
6137    
6138     if (*p++ == ';')
6139     continue;
6140    
6141     return true;
6142     }
6143    
6144     return false;
6145     }
6146     }
6147     return true;
6148 gbeauche 1.17 }
6149    
6150 gbeauche 1.1 void build_comp(void)
6151     {
6152     int i;
6153     int jumpcount=0;
6154     unsigned long opcode;
6155     struct comptbl* tbl=op_smalltbl_0_comp_ff;
6156     struct comptbl* nftbl=op_smalltbl_0_comp_nf;
6157     int count;
6158     int cpu_level = 0; // 68000 (default)
6159     if (CPUType == 4)
6160     cpu_level = 4; // 68040 with FPU
6161     else {
6162     if (FPUType)
6163     cpu_level = 3; // 68020 with FPU
6164     else if (CPUType >= 2)
6165     cpu_level = 2; // 68020
6166     else if (CPUType == 1)
6167     cpu_level = 1;
6168     }
6169     struct cputbl *nfctbl = (
6170     cpu_level == 4 ? op_smalltbl_0_nf
6171     : cpu_level == 3 ? op_smalltbl_1_nf
6172     : cpu_level == 2 ? op_smalltbl_2_nf
6173     : cpu_level == 1 ? op_smalltbl_3_nf
6174     : op_smalltbl_4_nf);
6175    
6176     write_log ("<JIT compiler> : building compiler function tables\n");
6177    
6178     for (opcode = 0; opcode < 65536; opcode++) {
6179 gbeauche 1.21 reset_compop(opcode);
6180 gbeauche 1.1 nfcpufunctbl[opcode] = op_illg_1;
6181     prop[opcode].use_flags = 0x1f;
6182     prop[opcode].set_flags = 0x1f;
6183     prop[opcode].cflow = fl_trap; // ILLEGAL instructions do trap
6184     }
6185    
6186     for (i = 0; tbl[i].opcode < 65536; i++) {
6187     int cflow = table68k[tbl[i].opcode].cflow;
6188 gbeauche 1.10 if (USE_INLINING && ((cflow & fl_const_jump) != 0))
6189     cflow = fl_const_jump;
6190 gbeauche 1.8 else
6191 gbeauche 1.10 cflow &= ~fl_const_jump;
6192     prop[cft_map(tbl[i].opcode)].cflow = cflow;
6193 gbeauche 1.1
6194     int uses_fpu = tbl[i].specific & 32;
6195 gbeauche 1.21 if (uses_fpu && avoid_fpu)
6196 gbeauche 1.1 compfunctbl[cft_map(tbl[i].opcode)] = NULL;
6197     else
6198     compfunctbl[cft_map(tbl[i].opcode)] = tbl[i].handler;
6199     }
6200 gbeauche 1.8
6201 gbeauche 1.1 for (i = 0; nftbl[i].opcode < 65536; i++) {
6202     int uses_fpu = tbl[i].specific & 32;
6203 gbeauche 1.21 if (uses_fpu && avoid_fpu)
6204 gbeauche 1.1 nfcompfunctbl[cft_map(nftbl[i].opcode)] = NULL;
6205     else
6206     nfcompfunctbl[cft_map(nftbl[i].opcode)] = nftbl[i].handler;
6207    
6208     nfcpufunctbl[cft_map(nftbl[i].opcode)] = nfctbl[i].handler;
6209     }
6210    
6211     for (i = 0; nfctbl[i].handler; i++) {
6212     nfcpufunctbl[cft_map(nfctbl[i].opcode)] = nfctbl[i].handler;
6213     }
6214    
6215     for (opcode = 0; opcode < 65536; opcode++) {
6216     compop_func *f;
6217     compop_func *nff;
6218     cpuop_func *nfcf;
6219     int isaddx,cflow;
6220    
6221     if (table68k[opcode].mnemo == i_ILLG || table68k[opcode].clev > cpu_level)
6222     continue;
6223    
6224     if (table68k[opcode].handler != -1) {
6225     f = compfunctbl[cft_map(table68k[opcode].handler)];
6226     nff = nfcompfunctbl[cft_map(table68k[opcode].handler)];
6227     nfcf = nfcpufunctbl[cft_map(table68k[opcode].handler)];
6228     cflow = prop[cft_map(table68k[opcode].handler)].cflow;
6229     isaddx = prop[cft_map(table68k[opcode].handler)].is_addx;
6230     prop[cft_map(opcode)].cflow = cflow;
6231     prop[cft_map(opcode)].is_addx = isaddx;
6232     compfunctbl[cft_map(opcode)] = f;
6233     nfcompfunctbl[cft_map(opcode)] = nff;
6234     Dif (nfcf == op_illg_1)
6235     abort();
6236     nfcpufunctbl[cft_map(opcode)] = nfcf;
6237     }
6238     prop[cft_map(opcode)].set_flags = table68k[opcode].flagdead;
6239     prop[cft_map(opcode)].use_flags = table68k[opcode].flaglive;
6240     }
6241     for (i = 0; nfctbl[i].handler != NULL; i++) {
6242     if (nfctbl[i].specific)
6243     nfcpufunctbl[cft_map(tbl[i].opcode)] = nfctbl[i].handler;
6244     }
6245 gbeauche 1.21
6246     /* Merge in blacklist */
6247     if (!merge_blacklist())
6248     write_log("<JIT compiler> : blacklist merge failure!\n");
6249 gbeauche 1.1
6250     count=0;
6251     for (opcode = 0; opcode < 65536; opcode++) {
6252     if (compfunctbl[cft_map(opcode)])
6253     count++;
6254     }
6255     write_log("<JIT compiler> : supposedly %d compileable opcodes!\n",count);
6256    
6257     /* Initialise state */
6258     create_popalls();
6259     alloc_cache();
6260     reset_lists();
6261    
6262     for (i=0;i<TAGSIZE;i+=2) {
6263     cache_tags[i].handler=(cpuop_func *)popall_execute_normal;
6264     cache_tags[i+1].bi=NULL;
6265     }
6266    
6267     #if 0
6268     for (i=0;i<N_REGS;i++) {
6269     empty_ss.nat[i].holds=-1;
6270     empty_ss.nat[i].validsize=0;
6271     empty_ss.nat[i].dirtysize=0;
6272     }
6273     #endif
6274     for (i=0;i<VREGS;i++) {
6275     empty_ss.virt[i]=L_NEEDED;
6276     }
6277     for (i=0;i<N_REGS;i++) {
6278     empty_ss.nat[i]=L_UNKNOWN;
6279     }
6280     default_ss=empty_ss;
6281     }
6282    
6283    
6284     static void flush_icache_none(int n)
6285     {
6286     /* Nothing to do. */
6287     }
6288    
6289     static void flush_icache_hard(int n)
6290     {
6291     uae_u32 i;
6292     blockinfo* bi, *dbi;
6293    
6294     hard_flush_count++;
6295     #if 0
6296     write_log("Flush Icache_hard(%d/%x/%p), %u KB\n",
6297     n,regs.pc,regs.pc_p,current_cache_size/1024);
6298     current_cache_size = 0;
6299     #endif
6300     bi=active;
6301     while(bi) {
6302     cache_tags[cacheline(bi->pc_p)].handler=(cpuop_func *)popall_execute_normal;
6303     cache_tags[cacheline(bi->pc_p)+1].bi=NULL;
6304     dbi=bi; bi=bi->next;
6305     free_blockinfo(dbi);
6306     }
6307     bi=dormant;
6308     while(bi) {
6309     cache_tags[cacheline(bi->pc_p)].handler=(cpuop_func *)popall_execute_normal;
6310     cache_tags[cacheline(bi->pc_p)+1].bi=NULL;
6311     dbi=bi; bi=bi->next;
6312     free_blockinfo(dbi);
6313     }
6314    
6315     reset_lists();
6316     if (!compiled_code)
6317     return;
6318     current_compile_p=compiled_code;
6319     SPCFLAGS_SET( SPCFLAG_JIT_EXEC_RETURN ); /* To get out of compiled code */
6320     }
6321    
6322    
6323     /* "Soft flushing" --- instead of actually throwing everything away,
6324     we simply mark everything as "needs to be checked".
6325     */
6326    
6327     static inline void flush_icache_lazy(int n)
6328     {
6329     uae_u32 i;
6330     blockinfo* bi;
6331     blockinfo* bi2;
6332    
6333     soft_flush_count++;
6334     if (!active)
6335     return;
6336    
6337     bi=active;
6338     while (bi) {
6339     uae_u32 cl=cacheline(bi->pc_p);
6340     if (bi->status==BI_INVALID ||
6341     bi->status==BI_NEED_RECOMP) {
6342     if (bi==cache_tags[cl+1].bi)
6343     cache_tags[cl].handler=(cpuop_func *)popall_execute_normal;
6344     bi->handler_to_use=(cpuop_func *)popall_execute_normal;
6345     set_dhtu(bi,bi->direct_pen);
6346     bi->status=BI_INVALID;
6347     }
6348     else {
6349     if (bi==cache_tags[cl+1].bi)
6350     cache_tags[cl].handler=(cpuop_func *)popall_check_checksum;
6351     bi->handler_to_use=(cpuop_func *)popall_check_checksum;
6352     set_dhtu(bi,bi->direct_pcc);
6353     bi->status=BI_NEED_CHECK;
6354     }
6355     bi2=bi;
6356     bi=bi->next;
6357     }
6358     /* bi2 is now the last entry in the active list */
6359     bi2->next=dormant;
6360     if (dormant)
6361     dormant->prev_p=&(bi2->next);
6362    
6363     dormant=active;
6364     active->prev_p=&dormant;
6365     active=NULL;
6366 gbeauche 1.22 }
6367    
6368     void flush_icache_range(uae_u32 start, uae_u32 length)
6369     {
6370     if (!active)
6371     return;
6372    
6373     #if LAZY_FLUSH_ICACHE_RANGE
6374     uae_u8 *start_p = get_real_address(start);
6375     blockinfo *bi = active;
6376     while (bi) {
6377     #if USE_CHECKSUM_INFO
6378     bool invalidate = false;
6379     for (checksum_info *csi = bi->csi; csi && !invalidate; csi = csi->next)
6380     invalidate = (((start_p - csi->start_p) < csi->length) ||
6381     ((csi->start_p - start_p) < length));
6382     #else
6383     // Assume system is consistent and would invalidate the right range
6384     const bool invalidate = (bi->pc_p - start_p) < length;
6385     #endif
6386     if (invalidate) {
6387     uae_u32 cl = cacheline(bi->pc_p);
6388     if (bi == cache_tags[cl + 1].bi)
6389     cache_tags[cl].handler = (cpuop_func *)popall_execute_normal;
6390     bi->handler_to_use = (cpuop_func *)popall_execute_normal;
6391     set_dhtu(bi, bi->direct_pen);
6392     bi->status = BI_NEED_RECOMP;
6393     }
6394     bi = bi->next;
6395     }
6396     return;
6397     #endif
6398     flush_icache(-1);
6399 gbeauche 1.1 }
6400    
6401     static void catastrophe(void)
6402     {
6403     abort();
6404     }
6405    
6406     int failure;
6407    
6408     #define TARGET_M68K 0
6409     #define TARGET_POWERPC 1
6410     #define TARGET_X86 2
6411 gbeauche 1.24 #define TARGET_X86_64 3
6412 gbeauche 1.1 #if defined(i386) || defined(__i386__)
6413     #define TARGET_NATIVE TARGET_X86
6414     #endif
6415     #if defined(powerpc) || defined(__powerpc__)
6416     #define TARGET_NATIVE TARGET_POWERPC
6417     #endif
6418 gbeauche 1.24 #if defined(x86_64) || defined(__x86_64__)
6419     #define TARGET_NATIVE TARGET_X86_64
6420     #endif
6421 gbeauche 1.1
6422     #ifdef ENABLE_MON
6423 gbeauche 1.24 static uae_u32 mon_read_byte_jit(uintptr addr)
6424 gbeauche 1.1 {
6425     uae_u8 *m = (uae_u8 *)addr;
6426 gbeauche 1.24 return (uintptr)(*m);
6427 gbeauche 1.1 }
6428    
6429 gbeauche 1.24 static void mon_write_byte_jit(uintptr addr, uae_u32 b)
6430 gbeauche 1.1 {
6431     uae_u8 *m = (uae_u8 *)addr;
6432     *m = b;
6433     }
6434     #endif
6435    
6436     void disasm_block(int target, uint8 * start, size_t length)
6437     {
6438     if (!JITDebug)
6439     return;
6440    
6441     #if defined(JIT_DEBUG) && defined(ENABLE_MON)
6442     char disasm_str[200];
6443     sprintf(disasm_str, "%s $%x $%x",
6444     target == TARGET_M68K ? "d68" :
6445     target == TARGET_X86 ? "d86" :
6446 gbeauche 1.24 target == TARGET_X86_64 ? "d8664" :
6447 gbeauche 1.1 target == TARGET_POWERPC ? "d" : "x",
6448     start, start + length - 1);
6449    
6450 gbeauche 1.24 uae_u32 (*old_mon_read_byte)(uintptr) = mon_read_byte;
6451     void (*old_mon_write_byte)(uintptr, uae_u32) = mon_write_byte;
6452 gbeauche 1.1
6453     mon_read_byte = mon_read_byte_jit;
6454     mon_write_byte = mon_write_byte_jit;
6455    
6456     char *arg[5] = {"mon", "-m", "-r", disasm_str, NULL};
6457     mon(4, arg);
6458    
6459     mon_read_byte = old_mon_read_byte;
6460     mon_write_byte = old_mon_write_byte;
6461     #endif
6462     }
6463    
6464 gbeauche 1.24 static void disasm_native_block(uint8 *start, size_t length)
6465 gbeauche 1.1 {
6466     disasm_block(TARGET_NATIVE, start, length);
6467     }
6468    
6469 gbeauche 1.24 static void disasm_m68k_block(uint8 *start, size_t length)
6470 gbeauche 1.1 {
6471     disasm_block(TARGET_M68K, start, length);
6472     }
6473    
6474     #ifdef HAVE_GET_WORD_UNSWAPPED
6475     # define DO_GET_OPCODE(a) (do_get_mem_word_unswapped((uae_u16 *)(a)))
6476     #else
6477     # define DO_GET_OPCODE(a) (do_get_mem_word((uae_u16 *)(a)))
6478     #endif
6479    
6480     #if JIT_DEBUG
6481     static uae_u8 *last_regs_pc_p = 0;
6482     static uae_u8 *last_compiled_block_addr = 0;
6483    
6484     void compiler_dumpstate(void)
6485     {
6486     if (!JITDebug)
6487     return;
6488    
6489     write_log("### Host addresses\n");
6490     write_log("MEM_BASE : %x\n", MEMBaseDiff);
6491     write_log("PC_P : %p\n", &regs.pc_p);
6492     write_log("SPCFLAGS : %p\n", &regs.spcflags);
6493     write_log("D0-D7 : %p-%p\n", &regs.regs[0], &regs.regs[7]);
6494     write_log("A0-A7 : %p-%p\n", &regs.regs[8], &regs.regs[15]);
6495     write_log("\n");
6496    
6497     write_log("### M68k processor state\n");
6498     m68k_dumpstate(0);
6499     write_log("\n");
6500    
6501     write_log("### Block in Mac address space\n");
6502     write_log("M68K block : %p\n",
6503 gbeauche 1.24 (void *)(uintptr)get_virtual_address(last_regs_pc_p));
6504 gbeauche 1.1 write_log("Native block : %p (%d bytes)\n",
6505 gbeauche 1.24 (void *)(uintptr)get_virtual_address(last_compiled_block_addr),
6506 gbeauche 1.1 get_blockinfo_addr(last_regs_pc_p)->direct_handler_size);
6507     write_log("\n");
6508     }
6509     #endif
6510    
6511     static void compile_block(cpu_history* pc_hist, int blocklen)
6512     {
6513     if (letit && compiled_code) {
6514     #if PROFILE_COMPILE_TIME
6515     compile_count++;
6516     clock_t start_time = clock();
6517     #endif
6518     #if JIT_DEBUG
6519     bool disasm_block = false;
6520     #endif
6521    
6522     /* OK, here we need to 'compile' a block */
6523     int i;
6524     int r;
6525     int was_comp=0;
6526     uae_u8 liveflags[MAXRUN+1];
6527 gbeauche 1.8 #if USE_CHECKSUM_INFO
6528     bool trace_in_rom = isinrom((uintptr)pc_hist[0].location);
6529 gbeauche 1.24 uintptr max_pcp=(uintptr)pc_hist[blocklen - 1].location;
6530     uintptr min_pcp=max_pcp;
6531 gbeauche 1.8 #else
6532 gbeauche 1.24 uintptr max_pcp=(uintptr)pc_hist[0].location;
6533     uintptr min_pcp=max_pcp;
6534 gbeauche 1.8 #endif
6535 gbeauche 1.1 uae_u32 cl=cacheline(pc_hist[0].location);
6536     void* specflags=(void*)&regs.spcflags;
6537     blockinfo* bi=NULL;
6538     blockinfo* bi2;
6539     int extra_len=0;
6540    
6541     redo_current_block=0;
6542     if (current_compile_p>=max_compile_start)
6543     flush_icache_hard(7);
6544    
6545     alloc_blockinfos();
6546    
6547     bi=get_blockinfo_addr_new(pc_hist[0].location,0);
6548     bi2=get_blockinfo(cl);
6549    
6550     optlev=bi->optlevel;
6551     if (bi->status!=BI_INVALID) {
6552     Dif (bi!=bi2) {
6553     /* I don't think it can happen anymore. Shouldn't, in
6554     any case. So let's make sure... */
6555     write_log("WOOOWOO count=%d, ol=%d %p %p\n",
6556     bi->count,bi->optlevel,bi->handler_to_use,
6557     cache_tags[cl].handler);
6558     abort();
6559     }
6560    
6561     Dif (bi->count!=-1 && bi->status!=BI_NEED_RECOMP) {
6562     write_log("bi->count=%d, bi->status=%d\n",bi->count,bi->status);
6563     /* What the heck? We are not supposed to be here! */
6564     abort();
6565     }
6566     }
6567     if (bi->count==-1) {
6568     optlev++;
6569     while (!optcount[optlev])
6570     optlev++;
6571     bi->count=optcount[optlev]-1;
6572     }
6573 gbeauche 1.24 current_block_pc_p=(uintptr)pc_hist[0].location;
6574 gbeauche 1.1
6575     remove_deps(bi); /* We are about to create new code */
6576     bi->optlevel=optlev;
6577     bi->pc_p=(uae_u8*)pc_hist[0].location;
6578 gbeauche 1.8 #if USE_CHECKSUM_INFO
6579     free_checksum_info_chain(bi->csi);
6580     bi->csi = NULL;
6581     #endif
6582 gbeauche 1.1
6583     liveflags[blocklen]=0x1f; /* All flags needed afterwards */
6584     i=blocklen;
6585     while (i--) {
6586     uae_u16* currpcp=pc_hist[i].location;
6587     uae_u32 op=DO_GET_OPCODE(currpcp);
6588    
6589 gbeauche 1.8 #if USE_CHECKSUM_INFO
6590     trace_in_rom = trace_in_rom && isinrom((uintptr)currpcp);
6591     #if USE_INLINING
6592     if (is_const_jump(op)) {
6593     checksum_info *csi = alloc_checksum_info();
6594     csi->start_p = (uae_u8 *)min_pcp;
6595     csi->length = max_pcp - min_pcp + LONGEST_68K_INST;
6596     csi->next = bi->csi;
6597     bi->csi = csi;
6598 gbeauche 1.24 max_pcp = (uintptr)currpcp;
6599 gbeauche 1.8 }
6600     #endif
6601 gbeauche 1.24 min_pcp = (uintptr)currpcp;
6602 gbeauche 1.8 #else
6603 gbeauche 1.24 if ((uintptr)currpcp<min_pcp)
6604     min_pcp=(uintptr)currpcp;
6605     if ((uintptr)currpcp>max_pcp)
6606     max_pcp=(uintptr)currpcp;
6607 gbeauche 1.8 #endif
6608 gbeauche 1.1
6609     liveflags[i]=((liveflags[i+1]&
6610     (~prop[op].set_flags))|
6611     prop[op].use_flags);
6612     if (prop[op].is_addx && (liveflags[i+1]&FLAG_Z)==0)
6613     liveflags[i]&= ~FLAG_Z;
6614     }
6615    
6616 gbeauche 1.8 #if USE_CHECKSUM_INFO
6617     checksum_info *csi = alloc_checksum_info();
6618     csi->start_p = (uae_u8 *)min_pcp;
6619     csi->length = max_pcp - min_pcp + LONGEST_68K_INST;
6620     csi->next = bi->csi;
6621     bi->csi = csi;
6622     #endif
6623    
6624 gbeauche 1.1 bi->needed_flags=liveflags[0];
6625    
6626 gbeauche 1.5 align_target(align_loops);
6627 gbeauche 1.1 was_comp=0;
6628    
6629     bi->direct_handler=(cpuop_func *)get_target();
6630     set_dhtu(bi,bi->direct_handler);
6631     bi->status=BI_COMPILING;
6632 gbeauche 1.24 current_block_start_target=(uintptr)get_target();
6633 gbeauche 1.1
6634     log_startblock();
6635    
6636     if (bi->count>=0) { /* Need to generate countdown code */
6637 gbeauche 1.24 raw_mov_l_mi((uintptr)&regs.pc_p,(uintptr)pc_hist[0].location);
6638     raw_sub_l_mi((uintptr)&(bi->count),1);
6639     raw_jl((uintptr)popall_recompile_block);
6640 gbeauche 1.1 }
6641     if (optlev==0) { /* No need to actually translate */
6642     /* Execute normally without keeping stats */
6643 gbeauche 1.24 raw_mov_l_mi((uintptr)&regs.pc_p,(uintptr)pc_hist[0].location);
6644     raw_jmp((uintptr)popall_exec_nostats);
6645 gbeauche 1.1 }
6646     else {
6647     reg_alloc_run=0;
6648     next_pc_p=0;
6649     taken_pc_p=0;
6650     branch_cc=0;
6651    
6652     comp_pc_p=(uae_u8*)pc_hist[0].location;
6653     init_comp();
6654     was_comp=1;
6655    
6656     #if JIT_DEBUG
6657     if (JITDebug) {
6658 gbeauche 1.24 raw_mov_l_mi((uintptr)&last_regs_pc_p,(uintptr)pc_hist[0].location);
6659     raw_mov_l_mi((uintptr)&last_compiled_block_addr,current_block_start_target);
6660 gbeauche 1.1 }
6661     #endif
6662    
6663     for (i=0;i<blocklen &&
6664     get_target_noopt()<max_compile_start;i++) {
6665     cpuop_func **cputbl;
6666     compop_func **comptbl;
6667     uae_u32 opcode=DO_GET_OPCODE(pc_hist[i].location);
6668     needed_flags=(liveflags[i+1] & prop[opcode].set_flags);
6669     if (!needed_flags) {
6670     cputbl=nfcpufunctbl;
6671     comptbl=nfcompfunctbl;
6672     }
6673     else {
6674     cputbl=cpufunctbl;
6675     comptbl=compfunctbl;
6676     }
6677    
6678     failure = 1; // gb-- defaults to failure state
6679     if (comptbl[opcode] && optlev>1) {
6680     failure=0;
6681     if (!was_comp) {
6682     comp_pc_p=(uae_u8*)pc_hist[i].location;
6683     init_comp();
6684     }
6685 gbeauche 1.18 was_comp=1;
6686 gbeauche 1.1
6687     comptbl[opcode](opcode);
6688     freescratch();
6689     if (!(liveflags[i+1] & FLAG_CZNV)) {
6690     /* We can forget about flags */
6691     dont_care_flags();
6692     }
6693     #if INDIVIDUAL_INST
6694     flush(1);
6695     nop();
6696     flush(1);
6697     was_comp=0;
6698     #endif
6699     }
6700    
6701     if (failure) {
6702     if (was_comp) {
6703     flush(1);
6704     was_comp=0;
6705     }
6706     raw_mov_l_ri(REG_PAR1,(uae_u32)opcode);
6707     #if USE_NORMAL_CALLING_CONVENTION
6708     raw_push_l_r(REG_PAR1);
6709     #endif
6710 gbeauche 1.24 raw_mov_l_mi((uintptr)&regs.pc_p,
6711     (uintptr)pc_hist[i].location);
6712     raw_call((uintptr)cputbl[opcode]);
6713 gbeauche 1.9 #if PROFILE_UNTRANSLATED_INSNS
6714     // raw_cputbl_count[] is indexed with plain opcode (in m68k order)
6715 gbeauche 1.24 raw_add_l_mi((uintptr)&raw_cputbl_count[cft_map(opcode)],1);
6716 gbeauche 1.9 #endif
6717 gbeauche 1.1 #if USE_NORMAL_CALLING_CONVENTION
6718     raw_inc_sp(4);
6719     #endif
6720    
6721     if (i < blocklen - 1) {
6722     uae_s8* branchadd;
6723    
6724 gbeauche 1.24 raw_mov_l_rm(0,(uintptr)specflags);
6725 gbeauche 1.1 raw_test_l_rr(0,0);
6726     raw_jz_b_oponly();
6727     branchadd=(uae_s8 *)get_target();
6728     emit_byte(0);
6729 gbeauche 1.24 raw_jmp((uintptr)popall_do_nothing);
6730     *branchadd=(uintptr)get_target()-(uintptr)branchadd-1;
6731 gbeauche 1.1 }
6732     }
6733     }
6734     #if 1 /* This isn't completely kosher yet; It really needs to be
6735     be integrated into a general inter-block-dependency scheme */
6736     if (next_pc_p && taken_pc_p &&
6737     was_comp && taken_pc_p==current_block_pc_p) {
6738     blockinfo* bi1=get_blockinfo_addr_new((void*)next_pc_p,0);
6739     blockinfo* bi2=get_blockinfo_addr_new((void*)taken_pc_p,0);
6740     uae_u8 x=bi1->needed_flags;
6741    
6742     if (x==0xff || 1) { /* To be on the safe side */
6743     uae_u16* next=(uae_u16*)next_pc_p;
6744     uae_u32 op=DO_GET_OPCODE(next);
6745    
6746     x=0x1f;
6747     x&=(~prop[op].set_flags);
6748     x|=prop[op].use_flags;
6749     }
6750    
6751     x|=bi2->needed_flags;
6752     if (!(x & FLAG_CZNV)) {
6753     /* We can forget about flags */
6754     dont_care_flags();
6755     extra_len+=2; /* The next instruction now is part of this
6756     block */
6757     }
6758    
6759     }
6760     #endif
6761     log_flush();
6762    
6763     if (next_pc_p) { /* A branch was registered */
6764 gbeauche 1.24 uintptr t1=next_pc_p;
6765     uintptr t2=taken_pc_p;
6766 gbeauche 1.1 int cc=branch_cc;
6767    
6768     uae_u32* branchadd;
6769     uae_u32* tba;
6770     bigstate tmp;
6771     blockinfo* tbi;
6772    
6773     if (taken_pc_p<next_pc_p) {
6774     /* backward branch. Optimize for the "taken" case ---
6775     which means the raw_jcc should fall through when
6776     the 68k branch is taken. */
6777     t1=taken_pc_p;
6778     t2=next_pc_p;
6779     cc=branch_cc^1;
6780     }
6781    
6782     tmp=live; /* ouch! This is big... */
6783     raw_jcc_l_oponly(cc);
6784     branchadd=(uae_u32*)get_target();
6785     emit_long(0);
6786    
6787     /* predicted outcome */
6788     tbi=get_blockinfo_addr_new((void*)t1,1);
6789     match_states(tbi);
6790 gbeauche 1.24 raw_cmp_l_mi((uintptr)specflags,0);
6791 gbeauche 1.1 raw_jcc_l_oponly(4);
6792     tba=(uae_u32*)get_target();
6793 gbeauche 1.24 emit_long(get_handler(t1)-((uintptr)tba+4));
6794     raw_mov_l_mi((uintptr)&regs.pc_p,t1);
6795     raw_jmp((uintptr)popall_do_nothing);
6796 gbeauche 1.1 create_jmpdep(bi,0,tba,t1);
6797    
6798 gbeauche 1.5 align_target(align_jumps);
6799 gbeauche 1.1 /* not-predicted outcome */
6800 gbeauche 1.24 *branchadd=(uintptr)get_target()-((uintptr)branchadd+4);
6801 gbeauche 1.1 live=tmp; /* Ouch again */
6802     tbi=get_blockinfo_addr_new((void*)t2,1);
6803     match_states(tbi);
6804    
6805     //flush(1); /* Can only get here if was_comp==1 */
6806 gbeauche 1.24 raw_cmp_l_mi((uintptr)specflags,0);
6807 gbeauche 1.1 raw_jcc_l_oponly(4);
6808     tba=(uae_u32*)get_target();
6809 gbeauche 1.24 emit_long(get_handler(t2)-((uintptr)tba+4));
6810     raw_mov_l_mi((uintptr)&regs.pc_p,t2);
6811     raw_jmp((uintptr)popall_do_nothing);
6812 gbeauche 1.1 create_jmpdep(bi,1,tba,t2);
6813     }
6814     else
6815     {
6816     if (was_comp) {
6817     flush(1);
6818     }
6819    
6820     /* Let's find out where next_handler is... */
6821     if (was_comp && isinreg(PC_P)) {
6822     r=live.state[PC_P].realreg;
6823     raw_and_l_ri(r,TAGMASK);
6824     int r2 = (r==0) ? 1 : 0;
6825 gbeauche 1.24 raw_mov_l_ri(r2,(uintptr)popall_do_nothing);
6826     raw_cmp_l_mi((uintptr)specflags,0);
6827 gbeauche 1.27 raw_cmov_l_rm_indexed(r2,(uintptr)cache_tags,r,SIZEOF_VOID_P,NATIVE_CC_EQ);
6828 gbeauche 1.1 raw_jmp_r(r2);
6829     }
6830     else if (was_comp && isconst(PC_P)) {
6831     uae_u32 v=live.state[PC_P].val;
6832     uae_u32* tba;
6833     blockinfo* tbi;
6834    
6835 gbeauche 1.24 tbi=get_blockinfo_addr_new((void*)(uintptr)v,1);
6836 gbeauche 1.1 match_states(tbi);
6837    
6838 gbeauche 1.24 raw_cmp_l_mi((uintptr)specflags,0);
6839 gbeauche 1.1 raw_jcc_l_oponly(4);
6840     tba=(uae_u32*)get_target();
6841 gbeauche 1.24 emit_long(get_handler(v)-((uintptr)tba+4));
6842     raw_mov_l_mi((uintptr)&regs.pc_p,v);
6843     raw_jmp((uintptr)popall_do_nothing);
6844 gbeauche 1.1 create_jmpdep(bi,0,tba,v);
6845     }
6846     else {
6847     r=REG_PC_TMP;
6848 gbeauche 1.24 raw_mov_l_rm(r,(uintptr)&regs.pc_p);
6849 gbeauche 1.1 raw_and_l_ri(r,TAGMASK);
6850     int r2 = (r==0) ? 1 : 0;
6851 gbeauche 1.24 raw_mov_l_ri(r2,(uintptr)popall_do_nothing);
6852     raw_cmp_l_mi((uintptr)specflags,0);
6853 gbeauche 1.27 raw_cmov_l_rm_indexed(r2,(uintptr)cache_tags,r,SIZEOF_VOID_P,NATIVE_CC_EQ);
6854 gbeauche 1.1 raw_jmp_r(r2);
6855     }
6856     }
6857     }
6858    
6859     #if USE_MATCH
6860     if (callers_need_recompile(&live,&(bi->env))) {
6861     mark_callers_recompile(bi);
6862     }
6863    
6864     big_to_small_state(&live,&(bi->env));
6865     #endif
6866    
6867 gbeauche 1.8 #if USE_CHECKSUM_INFO
6868     remove_from_list(bi);
6869     if (trace_in_rom) {
6870     // No need to checksum that block trace on cache invalidation
6871     free_checksum_info_chain(bi->csi);
6872     bi->csi = NULL;
6873     add_to_dormant(bi);
6874     }
6875     else {
6876     calc_checksum(bi,&(bi->c1),&(bi->c2));
6877     add_to_active(bi);
6878     }
6879     #else
6880 gbeauche 1.1 if (next_pc_p+extra_len>=max_pcp &&
6881     next_pc_p+extra_len<max_pcp+LONGEST_68K_INST)
6882     max_pcp=next_pc_p+extra_len; /* extra_len covers flags magic */
6883     else
6884     max_pcp+=LONGEST_68K_INST;
6885 gbeauche 1.7
6886 gbeauche 1.1 bi->len=max_pcp-min_pcp;
6887     bi->min_pcp=min_pcp;
6888 gbeauche 1.7
6889 gbeauche 1.1 remove_from_list(bi);
6890     if (isinrom(min_pcp) && isinrom(max_pcp)) {
6891     add_to_dormant(bi); /* No need to checksum it on cache flush.
6892     Please don't start changing ROMs in
6893     flight! */
6894     }
6895     else {
6896     calc_checksum(bi,&(bi->c1),&(bi->c2));
6897     add_to_active(bi);
6898     }
6899 gbeauche 1.8 #endif
6900 gbeauche 1.1
6901     current_cache_size += get_target() - (uae_u8 *)current_compile_p;
6902    
6903     #if JIT_DEBUG
6904     if (JITDebug)
6905     bi->direct_handler_size = get_target() - (uae_u8 *)current_block_start_target;
6906    
6907     if (JITDebug && disasm_block) {
6908     uaecptr block_addr = start_pc + ((char *)pc_hist[0].location - (char *)start_pc_p);
6909     D(bug("M68K block @ 0x%08x (%d insns)\n", block_addr, blocklen));
6910     uae_u32 block_size = ((uae_u8 *)pc_hist[blocklen - 1].location - (uae_u8 *)pc_hist[0].location) + 1;
6911     disasm_m68k_block((uae_u8 *)pc_hist[0].location, block_size);
6912     D(bug("Compiled block @ 0x%08x\n", pc_hist[0].location));
6913     disasm_native_block((uae_u8 *)current_block_start_target, bi->direct_handler_size);
6914     getchar();
6915     }
6916     #endif
6917    
6918     log_dump();
6919 gbeauche 1.5 align_target(align_jumps);
6920 gbeauche 1.1
6921     /* This is the non-direct handler */
6922     bi->handler=
6923     bi->handler_to_use=(cpuop_func *)get_target();
6924 gbeauche 1.24 raw_cmp_l_mi((uintptr)&regs.pc_p,(uintptr)pc_hist[0].location);
6925     raw_jnz((uintptr)popall_cache_miss);
6926 gbeauche 1.1 comp_pc_p=(uae_u8*)pc_hist[0].location;
6927    
6928     bi->status=BI_FINALIZING;
6929     init_comp();
6930     match_states(bi);
6931     flush(1);
6932    
6933 gbeauche 1.24 raw_jmp((uintptr)bi->direct_handler);
6934 gbeauche 1.1
6935     current_compile_p=get_target();
6936     raise_in_cl_list(bi);
6937    
6938     /* We will flush soon, anyway, so let's do it now */
6939     if (current_compile_p>=max_compile_start)
6940     flush_icache_hard(7);
6941    
6942     bi->status=BI_ACTIVE;
6943     if (redo_current_block)
6944     block_need_recompile(bi);
6945    
6946     #if PROFILE_COMPILE_TIME
6947     compile_time += (clock() - start_time);
6948     #endif
6949     }
6950     }
6951    
6952     void do_nothing(void)
6953     {
6954     /* What did you expect this to do? */
6955     }
6956    
6957     void exec_nostats(void)
6958     {
6959     for (;;) {
6960     uae_u32 opcode = GET_OPCODE;
6961     (*cpufunctbl[opcode])(opcode);
6962     if (end_block(opcode) || SPCFLAGS_TEST(SPCFLAG_ALL)) {
6963     return; /* We will deal with the spcflags in the caller */
6964     }
6965     }
6966     }
6967    
6968     void execute_normal(void)
6969     {
6970     if (!check_for_cache_miss()) {
6971     cpu_history pc_hist[MAXRUN];
6972     int blocklen = 0;
6973     #if REAL_ADDRESSING || DIRECT_ADDRESSING
6974     start_pc_p = regs.pc_p;
6975     start_pc = get_virtual_address(regs.pc_p);
6976     #else
6977     start_pc_p = regs.pc_oldp;
6978     start_pc = regs.pc;
6979     #endif
6980     for (;;) { /* Take note: This is the do-it-normal loop */
6981     pc_hist[blocklen++].location = (uae_u16 *)regs.pc_p;
6982     uae_u32 opcode = GET_OPCODE;
6983     #if FLIGHT_RECORDER
6984     m68k_record_step(m68k_getpc());
6985     #endif
6986     (*cpufunctbl[opcode])(opcode);
6987     if (end_block(opcode) || SPCFLAGS_TEST(SPCFLAG_ALL) || blocklen>=MAXRUN) {
6988     compile_block(pc_hist, blocklen);
6989     return; /* We will deal with the spcflags in the caller */
6990     }
6991     /* No need to check regs.spcflags, because if they were set,
6992     we'd have ended up inside that "if" */
6993     }
6994     }
6995     }
6996    
6997     typedef void (*compiled_handler)(void);
6998    
6999 gbeauche 1.24 #if defined(X86_ASSEMBLY) || defined(X86_64_ASSEMBLY)
7000 gbeauche 1.6 void (*m68k_compile_execute)(void) = NULL;
7001     #else
7002 gbeauche 1.1 void m68k_do_compile_execute(void)
7003     {
7004     for (;;) {
7005     ((compiled_handler)(pushall_call_handler))();
7006     /* Whenever we return from that, we should check spcflags */
7007     if (SPCFLAGS_TEST(SPCFLAG_ALL)) {
7008     if (m68k_do_specialties ())
7009     return;
7010     }
7011     }
7012     }
7013 gbeauche 1.6 #endif