ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/compiler/compemu_support.cpp
Revision: 1.24
Committed: 2004-11-01T16:01:51Z (19 years, 11 months ago) by gbeauche
Branch: MAIN
Changes since 1.23: +210 -150 lines
Log Message:
revive and fix almost two-year old port to x86_64

File Contents

# User Rev Content
1 gbeauche 1.11 /*
2     * compiler/compemu_support.cpp - Core dynamic translation engine
3     *
4     * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
5     *
6 cebix 1.23 * Adaptation for Basilisk II and improvements, copyright 2000-2004
7 gbeauche 1.11 * Gwenole Beauchesne
8     *
9 cebix 1.23 * Basilisk II (C) 1997-2004 Christian Bauer
10 gbeauche 1.11 *
11     * This program is free software; you can redistribute it and/or modify
12     * it under the terms of the GNU General Public License as published by
13     * the Free Software Foundation; either version 2 of the License, or
14     * (at your option) any later version.
15     *
16     * This program is distributed in the hope that it will be useful,
17     * but WITHOUT ANY WARRANTY; without even the implied warranty of
18     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19     * GNU General Public License for more details.
20     *
21     * You should have received a copy of the GNU General Public License
22     * along with this program; if not, write to the Free Software
23     * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24     */
25    
26 gbeauche 1.1 #if !REAL_ADDRESSING && !DIRECT_ADDRESSING
27     #error "Only Real or Direct Addressing is supported with the JIT Compiler"
28     #endif
29    
30 gbeauche 1.4 #if X86_ASSEMBLY && !SAHF_SETO_PROFITABLE
31     #error "Only [LS]AHF scheme to [gs]et flags is supported with the JIT Compiler"
32     #endif
33    
34 gbeauche 1.24 /* NOTE: support for AMD64 assumes translation cache and other code
35     * buffers are allocated into a 32-bit address space because (i) B2/JIT
36     * code is not 64-bit clean and (ii) it's faster to resolve branches
37     * that way.
38     */
39     #if !defined(__i386__) && !defined(__x86_64__)
40     #error "Only IA-32 and X86-64 targets are supported with the JIT Compiler"
41     #endif
42    
43 gbeauche 1.1 #define USE_MATCH 0
44    
45     /* kludge for Brian, so he can compile under MSVC++ */
46     #define USE_NORMAL_CALLING_CONVENTION 0
47    
48     #ifndef WIN32
49 gbeauche 1.20 #include <unistd.h>
50 gbeauche 1.1 #include <sys/types.h>
51     #include <sys/mman.h>
52     #endif
53    
54     #include <stdlib.h>
55     #include <fcntl.h>
56     #include <errno.h>
57    
58     #include "sysdeps.h"
59     #include "cpu_emulation.h"
60     #include "main.h"
61     #include "prefs.h"
62     #include "user_strings.h"
63 gbeauche 1.2 #include "vm_alloc.h"
64 gbeauche 1.1
65     #include "m68k.h"
66     #include "memory.h"
67     #include "readcpu.h"
68     #include "newcpu.h"
69     #include "comptbl.h"
70     #include "compiler/compemu.h"
71     #include "fpu/fpu.h"
72     #include "fpu/flags.h"
73    
74     #define DEBUG 1
75     #include "debug.h"
76    
77     #ifdef ENABLE_MON
78     #include "mon.h"
79     #endif
80    
81     #ifndef WIN32
82 gbeauche 1.9 #define PROFILE_COMPILE_TIME 1
83     #define PROFILE_UNTRANSLATED_INSNS 1
84 gbeauche 1.1 #endif
85    
86     #ifdef WIN32
87     #undef write_log
88     #define write_log dummy_write_log
89     static void dummy_write_log(const char *, ...) { }
90     #endif
91    
92     #if JIT_DEBUG
93     #undef abort
94     #define abort() do { \
95     fprintf(stderr, "Abort in file %s at line %d\n", __FILE__, __LINE__); \
96     exit(EXIT_FAILURE); \
97     } while (0)
98     #endif
99    
100     #if PROFILE_COMPILE_TIME
101     #include <time.h>
102     static uae_u32 compile_count = 0;
103     static clock_t compile_time = 0;
104     static clock_t emul_start_time = 0;
105     static clock_t emul_end_time = 0;
106     #endif
107    
108 gbeauche 1.9 #if PROFILE_UNTRANSLATED_INSNS
109     const int untranslated_top_ten = 20;
110     static uae_u32 raw_cputbl_count[65536] = { 0, };
111     static uae_u16 opcode_nums[65536];
112    
113     static int untranslated_compfn(const void *e1, const void *e2)
114     {
115     return raw_cputbl_count[*(const uae_u16 *)e1] < raw_cputbl_count[*(const uae_u16 *)e2];
116     }
117     #endif
118    
119 gbeauche 1.24 static compop_func *compfunctbl[65536];
120     static compop_func *nfcompfunctbl[65536];
121     static cpuop_func *nfcpufunctbl[65536];
122 gbeauche 1.1 uae_u8* comp_pc_p;
123    
124 gbeauche 1.6 // From newcpu.cpp
125     extern bool quit_program;
126    
127 gbeauche 1.1 // gb-- Extra data for Basilisk II/JIT
128     #if JIT_DEBUG
129     static bool JITDebug = false; // Enable runtime disassemblers through mon?
130     #else
131     const bool JITDebug = false; // Don't use JIT debug mode at all
132     #endif
133    
134 gbeauche 1.22 const uae_u32 MIN_CACHE_SIZE = 1024; // Minimal translation cache size (1 MB)
135 gbeauche 1.1 static uae_u32 cache_size = 0; // Size of total cache allocated for compiled blocks
136 gbeauche 1.3 static uae_u32 current_cache_size = 0; // Cache grows upwards: how much has been consumed already
137 gbeauche 1.1 static bool lazy_flush = true; // Flag: lazy translation cache invalidation
138     static bool avoid_fpu = true; // Flag: compile FPU instructions ?
139     static bool have_cmov = false; // target has CMOV instructions ?
140     static bool have_rat_stall = true; // target has partial register stalls ?
141 gbeauche 1.12 const bool tune_alignment = true; // Tune code alignments for running CPU ?
142     const bool tune_nop_fillers = true; // Tune no-op fillers for architecture
143 gbeauche 1.15 static bool setzflg_uses_bsf = false; // setzflg virtual instruction can use native BSF instruction correctly?
144 gbeauche 1.5 static int align_loops = 32; // Align the start of loops
145     static int align_jumps = 32; // Align the start of jumps
146 gbeauche 1.1 static int optcount[10] = {
147     10, // How often a block has to be executed before it is translated
148     0, // How often to use naive translation
149     0, 0, 0, 0,
150     -1, -1, -1, -1
151     };
152    
153     struct op_properties {
154     uae_u8 use_flags;
155     uae_u8 set_flags;
156     uae_u8 is_addx;
157     uae_u8 cflow;
158     };
159     static op_properties prop[65536];
160    
161     static inline int end_block(uae_u32 opcode)
162     {
163     return (prop[opcode].cflow & fl_end_block);
164     }
165    
166 gbeauche 1.8 static inline bool is_const_jump(uae_u32 opcode)
167     {
168     return (prop[opcode].cflow == fl_const_jump);
169     }
170    
171 gbeauche 1.18 static inline bool may_trap(uae_u32 opcode)
172     {
173     return (prop[opcode].cflow & fl_trap);
174     }
175    
176     static inline unsigned int cft_map (unsigned int f)
177     {
178     #ifndef HAVE_GET_WORD_UNSWAPPED
179     return f;
180     #else
181     return ((f >> 8) & 255) | ((f & 255) << 8);
182     #endif
183     }
184    
185 gbeauche 1.1 uae_u8* start_pc_p;
186     uae_u32 start_pc;
187     uae_u32 current_block_pc_p;
188 gbeauche 1.24 static uintptr current_block_start_target;
189 gbeauche 1.1 uae_u32 needed_flags;
190 gbeauche 1.24 static uintptr next_pc_p;
191     static uintptr taken_pc_p;
192 gbeauche 1.1 static int branch_cc;
193     static int redo_current_block;
194    
195     int segvcount=0;
196     int soft_flush_count=0;
197     int hard_flush_count=0;
198     int checksum_count=0;
199     static uae_u8* current_compile_p=NULL;
200     static uae_u8* max_compile_start;
201     static uae_u8* compiled_code=NULL;
202     static uae_s32 reg_alloc_run;
203 gbeauche 1.24 const int POPALLSPACE_SIZE = 1024; /* That should be enough space */
204     static uae_u8* popallspace=NULL;
205 gbeauche 1.1
206     void* pushall_call_handler=NULL;
207     static void* popall_do_nothing=NULL;
208     static void* popall_exec_nostats=NULL;
209     static void* popall_execute_normal=NULL;
210     static void* popall_cache_miss=NULL;
211     static void* popall_recompile_block=NULL;
212     static void* popall_check_checksum=NULL;
213    
214     /* The 68k only ever executes from even addresses. So right now, we
215     * waste half the entries in this array
216     * UPDATE: We now use those entries to store the start of the linked
217     * lists that we maintain for each hash result.
218     */
219     cacheline cache_tags[TAGSIZE];
220     int letit=0;
221     blockinfo* hold_bi[MAX_HOLD_BI];
222     blockinfo* active;
223     blockinfo* dormant;
224    
225     /* 68040 */
226     extern struct cputbl op_smalltbl_0_nf[];
227     extern struct comptbl op_smalltbl_0_comp_nf[];
228     extern struct comptbl op_smalltbl_0_comp_ff[];
229    
230     /* 68020 + 68881 */
231     extern struct cputbl op_smalltbl_1_nf[];
232    
233     /* 68020 */
234     extern struct cputbl op_smalltbl_2_nf[];
235    
236     /* 68010 */
237     extern struct cputbl op_smalltbl_3_nf[];
238    
239     /* 68000 */
240     extern struct cputbl op_smalltbl_4_nf[];
241    
242     /* 68000 slow but compatible. */
243     extern struct cputbl op_smalltbl_5_nf[];
244    
245     static void flush_icache_hard(int n);
246     static void flush_icache_lazy(int n);
247     static void flush_icache_none(int n);
248     void (*flush_icache)(int n) = flush_icache_none;
249    
250    
251    
252     bigstate live;
253     smallstate empty_ss;
254     smallstate default_ss;
255     static int optlev;
256    
257     static int writereg(int r, int size);
258     static void unlock2(int r);
259     static void setlock(int r);
260     static int readreg_specific(int r, int size, int spec);
261     static int writereg_specific(int r, int size, int spec);
262     static void prepare_for_call_1(void);
263     static void prepare_for_call_2(void);
264     static void align_target(uae_u32 a);
265    
266     static uae_s32 nextused[VREGS];
267    
268     uae_u32 m68k_pc_offset;
269    
270     /* Some arithmetic ooperations can be optimized away if the operands
271     * are known to be constant. But that's only a good idea when the
272     * side effects they would have on the flags are not important. This
273     * variable indicates whether we need the side effects or not
274     */
275     uae_u32 needflags=0;
276    
277     /* Flag handling is complicated.
278     *
279     * x86 instructions create flags, which quite often are exactly what we
280     * want. So at times, the "68k" flags are actually in the x86 flags.
281     *
282     * Then again, sometimes we do x86 instructions that clobber the x86
283     * flags, but don't represent a corresponding m68k instruction. In that
284     * case, we have to save them.
285     *
286     * We used to save them to the stack, but now store them back directly
287     * into the regflags.cznv of the traditional emulation. Thus some odd
288     * names.
289     *
290     * So flags can be in either of two places (used to be three; boy were
291     * things complicated back then!); And either place can contain either
292     * valid flags or invalid trash (and on the stack, there was also the
293     * option of "nothing at all", now gone). A couple of variables keep
294     * track of the respective states.
295     *
296     * To make things worse, we might or might not be interested in the flags.
297     * by default, we are, but a call to dont_care_flags can change that
298     * until the next call to live_flags. If we are not, pretty much whatever
299     * is in the register and/or the native flags is seen as valid.
300     */
301    
302     static __inline__ blockinfo* get_blockinfo(uae_u32 cl)
303     {
304     return cache_tags[cl+1].bi;
305     }
306    
307     static __inline__ blockinfo* get_blockinfo_addr(void* addr)
308     {
309     blockinfo* bi=get_blockinfo(cacheline(addr));
310    
311     while (bi) {
312     if (bi->pc_p==addr)
313     return bi;
314     bi=bi->next_same_cl;
315     }
316     return NULL;
317     }
318    
319    
320     /*******************************************************************
321     * All sorts of list related functions for all of the lists *
322     *******************************************************************/
323    
324     static __inline__ void remove_from_cl_list(blockinfo* bi)
325     {
326     uae_u32 cl=cacheline(bi->pc_p);
327    
328     if (bi->prev_same_cl_p)
329     *(bi->prev_same_cl_p)=bi->next_same_cl;
330     if (bi->next_same_cl)
331     bi->next_same_cl->prev_same_cl_p=bi->prev_same_cl_p;
332     if (cache_tags[cl+1].bi)
333     cache_tags[cl].handler=cache_tags[cl+1].bi->handler_to_use;
334     else
335     cache_tags[cl].handler=(cpuop_func *)popall_execute_normal;
336     }
337    
338     static __inline__ void remove_from_list(blockinfo* bi)
339     {
340     if (bi->prev_p)
341     *(bi->prev_p)=bi->next;
342     if (bi->next)
343     bi->next->prev_p=bi->prev_p;
344     }
345    
346     static __inline__ void remove_from_lists(blockinfo* bi)
347     {
348     remove_from_list(bi);
349     remove_from_cl_list(bi);
350     }
351    
352     static __inline__ void add_to_cl_list(blockinfo* bi)
353     {
354     uae_u32 cl=cacheline(bi->pc_p);
355    
356     if (cache_tags[cl+1].bi)
357     cache_tags[cl+1].bi->prev_same_cl_p=&(bi->next_same_cl);
358     bi->next_same_cl=cache_tags[cl+1].bi;
359    
360     cache_tags[cl+1].bi=bi;
361     bi->prev_same_cl_p=&(cache_tags[cl+1].bi);
362    
363     cache_tags[cl].handler=bi->handler_to_use;
364     }
365    
366     static __inline__ void raise_in_cl_list(blockinfo* bi)
367     {
368     remove_from_cl_list(bi);
369     add_to_cl_list(bi);
370     }
371    
372     static __inline__ void add_to_active(blockinfo* bi)
373     {
374     if (active)
375     active->prev_p=&(bi->next);
376     bi->next=active;
377    
378     active=bi;
379     bi->prev_p=&active;
380     }
381    
382     static __inline__ void add_to_dormant(blockinfo* bi)
383     {
384     if (dormant)
385     dormant->prev_p=&(bi->next);
386     bi->next=dormant;
387    
388     dormant=bi;
389     bi->prev_p=&dormant;
390     }
391    
392     static __inline__ void remove_dep(dependency* d)
393     {
394     if (d->prev_p)
395     *(d->prev_p)=d->next;
396     if (d->next)
397     d->next->prev_p=d->prev_p;
398     d->prev_p=NULL;
399     d->next=NULL;
400     }
401    
402     /* This block's code is about to be thrown away, so it no longer
403     depends on anything else */
404     static __inline__ void remove_deps(blockinfo* bi)
405     {
406     remove_dep(&(bi->dep[0]));
407     remove_dep(&(bi->dep[1]));
408     }
409    
410     static __inline__ void adjust_jmpdep(dependency* d, cpuop_func* a)
411     {
412     *(d->jmp_off)=(uintptr)a-((uintptr)d->jmp_off+4);
413     }
414    
415     /********************************************************************
416     * Soft flush handling support functions *
417     ********************************************************************/
418    
419     static __inline__ void set_dhtu(blockinfo* bi, cpuop_func* dh)
420     {
421     //write_log("bi is %p\n",bi);
422     if (dh!=bi->direct_handler_to_use) {
423     dependency* x=bi->deplist;
424     //write_log("bi->deplist=%p\n",bi->deplist);
425     while (x) {
426     //write_log("x is %p\n",x);
427     //write_log("x->next is %p\n",x->next);
428     //write_log("x->prev_p is %p\n",x->prev_p);
429    
430     if (x->jmp_off) {
431     adjust_jmpdep(x,dh);
432     }
433     x=x->next;
434     }
435     bi->direct_handler_to_use=dh;
436     }
437     }
438    
439     static __inline__ void invalidate_block(blockinfo* bi)
440     {
441     int i;
442    
443     bi->optlevel=0;
444     bi->count=optcount[0]-1;
445     bi->handler=NULL;
446     bi->handler_to_use=(cpuop_func *)popall_execute_normal;
447     bi->direct_handler=NULL;
448     set_dhtu(bi,bi->direct_pen);
449     bi->needed_flags=0xff;
450     bi->status=BI_INVALID;
451     for (i=0;i<2;i++) {
452     bi->dep[i].jmp_off=NULL;
453     bi->dep[i].target=NULL;
454     }
455     remove_deps(bi);
456     }
457    
458     static __inline__ void create_jmpdep(blockinfo* bi, int i, uae_u32* jmpaddr, uae_u32 target)
459     {
460 gbeauche 1.24 blockinfo* tbi=get_blockinfo_addr((void*)(uintptr)target);
461 gbeauche 1.1
462     Dif(!tbi) {
463     write_log("Could not create jmpdep!\n");
464     abort();
465     }
466     bi->dep[i].jmp_off=jmpaddr;
467     bi->dep[i].source=bi;
468     bi->dep[i].target=tbi;
469     bi->dep[i].next=tbi->deplist;
470     if (bi->dep[i].next)
471     bi->dep[i].next->prev_p=&(bi->dep[i].next);
472     bi->dep[i].prev_p=&(tbi->deplist);
473     tbi->deplist=&(bi->dep[i]);
474     }
475    
476     static __inline__ void block_need_recompile(blockinfo * bi)
477     {
478     uae_u32 cl = cacheline(bi->pc_p);
479    
480     set_dhtu(bi, bi->direct_pen);
481     bi->direct_handler = bi->direct_pen;
482    
483     bi->handler_to_use = (cpuop_func *)popall_execute_normal;
484     bi->handler = (cpuop_func *)popall_execute_normal;
485     if (bi == cache_tags[cl + 1].bi)
486     cache_tags[cl].handler = (cpuop_func *)popall_execute_normal;
487     bi->status = BI_NEED_RECOMP;
488     }
489    
490     static __inline__ void mark_callers_recompile(blockinfo * bi)
491     {
492     dependency *x = bi->deplist;
493    
494     while (x) {
495     dependency *next = x->next; /* This disappears when we mark for
496     * recompilation and thus remove the
497     * blocks from the lists */
498     if (x->jmp_off) {
499     blockinfo *cbi = x->source;
500    
501     Dif(cbi->status == BI_INVALID) {
502     // write_log("invalid block in dependency list\n"); // FIXME?
503     // abort();
504     }
505     if (cbi->status == BI_ACTIVE || cbi->status == BI_NEED_CHECK) {
506     block_need_recompile(cbi);
507     mark_callers_recompile(cbi);
508     }
509     else if (cbi->status == BI_COMPILING) {
510     redo_current_block = 1;
511     }
512     else if (cbi->status == BI_NEED_RECOMP) {
513     /* nothing */
514     }
515     else {
516     //write_log("Status %d in mark_callers\n",cbi->status); // FIXME?
517     }
518     }
519     x = next;
520     }
521     }
522    
523     static __inline__ blockinfo* get_blockinfo_addr_new(void* addr, int setstate)
524     {
525     blockinfo* bi=get_blockinfo_addr(addr);
526     int i;
527    
528     if (!bi) {
529     for (i=0;i<MAX_HOLD_BI && !bi;i++) {
530     if (hold_bi[i]) {
531     uae_u32 cl=cacheline(addr);
532    
533     bi=hold_bi[i];
534     hold_bi[i]=NULL;
535     bi->pc_p=(uae_u8 *)addr;
536     invalidate_block(bi);
537     add_to_active(bi);
538     add_to_cl_list(bi);
539    
540     }
541     }
542     }
543     if (!bi) {
544     write_log("Looking for blockinfo, can't find free one\n");
545     abort();
546     }
547     return bi;
548     }
549    
550     static void prepare_block(blockinfo* bi);
551    
552     /* Managment of blockinfos.
553    
554     A blockinfo struct is allocated whenever a new block has to be
555     compiled. If the list of free blockinfos is empty, we allocate a new
556     pool of blockinfos and link the newly created blockinfos altogether
557     into the list of free blockinfos. Otherwise, we simply pop a structure
558 gbeauche 1.7 off the free list.
559 gbeauche 1.1
560     Blockinfo are lazily deallocated, i.e. chained altogether in the
561     list of free blockinfos whenvever a translation cache flush (hard or
562     soft) request occurs.
563     */
564    
565 gbeauche 1.7 template< class T >
566     class LazyBlockAllocator
567     {
568     enum {
569     kPoolSize = 1 + 4096 / sizeof(T)
570     };
571     struct Pool {
572     T chunk[kPoolSize];
573     Pool * next;
574     };
575     Pool * mPools;
576     T * mChunks;
577     public:
578     LazyBlockAllocator() : mPools(0), mChunks(0) { }
579     ~LazyBlockAllocator();
580     T * acquire();
581     void release(T * const);
582 gbeauche 1.1 };
583    
584 gbeauche 1.7 template< class T >
585     LazyBlockAllocator<T>::~LazyBlockAllocator()
586 gbeauche 1.1 {
587 gbeauche 1.7 Pool * currentPool = mPools;
588     while (currentPool) {
589     Pool * deadPool = currentPool;
590     currentPool = currentPool->next;
591     free(deadPool);
592     }
593     }
594    
595     template< class T >
596     T * LazyBlockAllocator<T>::acquire()
597     {
598     if (!mChunks) {
599     // There is no chunk left, allocate a new pool and link the
600     // chunks into the free list
601     Pool * newPool = (Pool *)malloc(sizeof(Pool));
602     for (T * chunk = &newPool->chunk[0]; chunk < &newPool->chunk[kPoolSize]; chunk++) {
603     chunk->next = mChunks;
604     mChunks = chunk;
605 gbeauche 1.1 }
606 gbeauche 1.7 newPool->next = mPools;
607     mPools = newPool;
608     }
609     T * chunk = mChunks;
610     mChunks = chunk->next;
611     return chunk;
612     }
613    
614     template< class T >
615     void LazyBlockAllocator<T>::release(T * const chunk)
616     {
617     chunk->next = mChunks;
618     mChunks = chunk;
619     }
620    
621     template< class T >
622     class HardBlockAllocator
623     {
624     public:
625     T * acquire() {
626     T * data = (T *)current_compile_p;
627     current_compile_p += sizeof(T);
628     return data;
629 gbeauche 1.1 }
630 gbeauche 1.7
631     void release(T * const chunk) {
632     // Deallocated on invalidation
633     }
634     };
635    
636     #if USE_SEPARATE_BIA
637     static LazyBlockAllocator<blockinfo> BlockInfoAllocator;
638     static LazyBlockAllocator<checksum_info> ChecksumInfoAllocator;
639 gbeauche 1.1 #else
640 gbeauche 1.7 static HardBlockAllocator<blockinfo> BlockInfoAllocator;
641     static HardBlockAllocator<checksum_info> ChecksumInfoAllocator;
642 gbeauche 1.1 #endif
643    
644 gbeauche 1.8 static __inline__ checksum_info *alloc_checksum_info(void)
645     {
646     checksum_info *csi = ChecksumInfoAllocator.acquire();
647     csi->next = NULL;
648     return csi;
649     }
650    
651     static __inline__ void free_checksum_info(checksum_info *csi)
652     {
653     csi->next = NULL;
654     ChecksumInfoAllocator.release(csi);
655     }
656    
657     static __inline__ void free_checksum_info_chain(checksum_info *csi)
658     {
659     while (csi != NULL) {
660     checksum_info *csi2 = csi->next;
661     free_checksum_info(csi);
662     csi = csi2;
663     }
664     }
665 gbeauche 1.7
666     static __inline__ blockinfo *alloc_blockinfo(void)
667 gbeauche 1.1 {
668 gbeauche 1.7 blockinfo *bi = BlockInfoAllocator.acquire();
669     #if USE_CHECKSUM_INFO
670     bi->csi = NULL;
671 gbeauche 1.1 #endif
672 gbeauche 1.7 return bi;
673 gbeauche 1.1 }
674    
675 gbeauche 1.7 static __inline__ void free_blockinfo(blockinfo *bi)
676 gbeauche 1.1 {
677 gbeauche 1.7 #if USE_CHECKSUM_INFO
678 gbeauche 1.8 free_checksum_info_chain(bi->csi);
679     bi->csi = NULL;
680 gbeauche 1.1 #endif
681 gbeauche 1.7 BlockInfoAllocator.release(bi);
682 gbeauche 1.1 }
683    
684     static __inline__ void alloc_blockinfos(void)
685     {
686     int i;
687     blockinfo* bi;
688    
689     for (i=0;i<MAX_HOLD_BI;i++) {
690     if (hold_bi[i])
691     return;
692     bi=hold_bi[i]=alloc_blockinfo();
693     prepare_block(bi);
694     }
695     }
696    
697     /********************************************************************
698     * Functions to emit data into memory, and other general support *
699     ********************************************************************/
700    
701     static uae_u8* target;
702    
703     static void emit_init(void)
704     {
705     }
706    
707     static __inline__ void emit_byte(uae_u8 x)
708     {
709     *target++=x;
710     }
711    
712     static __inline__ void emit_word(uae_u16 x)
713     {
714     *((uae_u16*)target)=x;
715     target+=2;
716     }
717    
718     static __inline__ void emit_long(uae_u32 x)
719     {
720     *((uae_u32*)target)=x;
721     target+=4;
722     }
723    
724 gbeauche 1.24 static __inline__ void emit_quad(uae_u64 x)
725     {
726     *((uae_u64*)target)=x;
727     target+=8;
728     }
729    
730 gbeauche 1.12 static __inline__ void emit_block(const uae_u8 *block, uae_u32 blocklen)
731     {
732     memcpy((uae_u8 *)target,block,blocklen);
733     target+=blocklen;
734     }
735    
736 gbeauche 1.1 static __inline__ uae_u32 reverse32(uae_u32 v)
737     {
738     #if 1
739     // gb-- We have specialized byteswapping functions, just use them
740     return do_byteswap_32(v);
741     #else
742     return ((v>>24)&0xff) | ((v>>8)&0xff00) | ((v<<8)&0xff0000) | ((v<<24)&0xff000000);
743     #endif
744     }
745    
746     /********************************************************************
747     * Getting the information about the target CPU *
748     ********************************************************************/
749    
750     #include "codegen_x86.cpp"
751    
752     void set_target(uae_u8* t)
753     {
754     target=t;
755     }
756    
757     static __inline__ uae_u8* get_target_noopt(void)
758     {
759     return target;
760     }
761    
762     __inline__ uae_u8* get_target(void)
763     {
764     return get_target_noopt();
765     }
766    
767    
768     /********************************************************************
769     * Flags status handling. EMIT TIME! *
770     ********************************************************************/
771    
772     static void bt_l_ri_noclobber(R4 r, IMM i);
773    
774     static void make_flags_live_internal(void)
775     {
776     if (live.flags_in_flags==VALID)
777     return;
778     Dif (live.flags_on_stack==TRASH) {
779     write_log("Want flags, got something on stack, but it is TRASH\n");
780     abort();
781     }
782     if (live.flags_on_stack==VALID) {
783     int tmp;
784     tmp=readreg_specific(FLAGTMP,4,FLAG_NREG2);
785     raw_reg_to_flags(tmp);
786     unlock2(tmp);
787    
788     live.flags_in_flags=VALID;
789     return;
790     }
791     write_log("Huh? live.flags_in_flags=%d, live.flags_on_stack=%d, but need to make live\n",
792     live.flags_in_flags,live.flags_on_stack);
793     abort();
794     }
795    
796     static void flags_to_stack(void)
797     {
798     if (live.flags_on_stack==VALID)
799     return;
800     if (!live.flags_are_important) {
801     live.flags_on_stack=VALID;
802     return;
803     }
804     Dif (live.flags_in_flags!=VALID)
805     abort();
806     else {
807     int tmp;
808     tmp=writereg_specific(FLAGTMP,4,FLAG_NREG1);
809     raw_flags_to_reg(tmp);
810     unlock2(tmp);
811     }
812     live.flags_on_stack=VALID;
813     }
814    
815     static __inline__ void clobber_flags(void)
816     {
817     if (live.flags_in_flags==VALID && live.flags_on_stack!=VALID)
818     flags_to_stack();
819     live.flags_in_flags=TRASH;
820     }
821    
822     /* Prepare for leaving the compiled stuff */
823     static __inline__ void flush_flags(void)
824     {
825     flags_to_stack();
826     return;
827     }
828    
829     int touchcnt;
830    
831     /********************************************************************
832 gbeauche 1.18 * Partial register flushing for optimized calls *
833     ********************************************************************/
834    
835     struct regusage {
836     uae_u16 rmask;
837     uae_u16 wmask;
838     };
839    
840     static inline void ru_set(uae_u16 *mask, int reg)
841     {
842     #if USE_OPTIMIZED_CALLS
843     *mask |= 1 << reg;
844     #endif
845     }
846    
847     static inline bool ru_get(const uae_u16 *mask, int reg)
848     {
849     #if USE_OPTIMIZED_CALLS
850     return (*mask & (1 << reg));
851     #else
852     /* Default: instruction reads & write to register */
853     return true;
854     #endif
855     }
856    
857     static inline void ru_set_read(regusage *ru, int reg)
858     {
859     ru_set(&ru->rmask, reg);
860     }
861    
862     static inline void ru_set_write(regusage *ru, int reg)
863     {
864     ru_set(&ru->wmask, reg);
865     }
866    
867     static inline bool ru_read_p(const regusage *ru, int reg)
868     {
869     return ru_get(&ru->rmask, reg);
870     }
871    
872     static inline bool ru_write_p(const regusage *ru, int reg)
873     {
874     return ru_get(&ru->wmask, reg);
875     }
876    
877     static void ru_fill_ea(regusage *ru, int reg, amodes mode,
878     wordsizes size, int write_mode)
879     {
880     switch (mode) {
881     case Areg:
882     reg += 8;
883     /* fall through */
884     case Dreg:
885     ru_set(write_mode ? &ru->wmask : &ru->rmask, reg);
886     break;
887     case Ad16:
888     /* skip displacment */
889     m68k_pc_offset += 2;
890     case Aind:
891     case Aipi:
892     case Apdi:
893     ru_set_read(ru, reg+8);
894     break;
895     case Ad8r:
896     ru_set_read(ru, reg+8);
897     /* fall through */
898     case PC8r: {
899     uae_u16 dp = comp_get_iword((m68k_pc_offset+=2)-2);
900     reg = (dp >> 12) & 15;
901     ru_set_read(ru, reg);
902     if (dp & 0x100)
903     m68k_pc_offset += (((dp & 0x30) >> 3) & 7) + ((dp & 3) * 2);
904     break;
905     }
906     case PC16:
907     case absw:
908     case imm0:
909     case imm1:
910     m68k_pc_offset += 2;
911     break;
912     case absl:
913     case imm2:
914     m68k_pc_offset += 4;
915     break;
916     case immi:
917     m68k_pc_offset += (size == sz_long) ? 4 : 2;
918     break;
919     }
920     }
921    
922     /* TODO: split into a static initialization part and a dynamic one
923     (instructions depending on extension words) */
924     static void ru_fill(regusage *ru, uae_u32 opcode)
925     {
926     m68k_pc_offset += 2;
927    
928     /* Default: no register is used or written to */
929     ru->rmask = 0;
930     ru->wmask = 0;
931    
932     uae_u32 real_opcode = cft_map(opcode);
933     struct instr *dp = &table68k[real_opcode];
934    
935     bool rw_dest = true;
936     bool handled = false;
937    
938     /* Handle some instructions specifically */
939     uae_u16 reg, ext;
940     switch (dp->mnemo) {
941     case i_BFCHG:
942     case i_BFCLR:
943     case i_BFEXTS:
944     case i_BFEXTU:
945     case i_BFFFO:
946     case i_BFINS:
947     case i_BFSET:
948     case i_BFTST:
949     ext = comp_get_iword((m68k_pc_offset+=2)-2);
950     if (ext & 0x800) ru_set_read(ru, (ext >> 6) & 7);
951     if (ext & 0x020) ru_set_read(ru, ext & 7);
952     ru_fill_ea(ru, dp->dreg, (amodes)dp->dmode, (wordsizes)dp->size, 1);
953     if (dp->dmode == Dreg)
954     ru_set_read(ru, dp->dreg);
955     switch (dp->mnemo) {
956     case i_BFEXTS:
957     case i_BFEXTU:
958     case i_BFFFO:
959     ru_set_write(ru, (ext >> 12) & 7);
960     break;
961     case i_BFINS:
962     ru_set_read(ru, (ext >> 12) & 7);
963     /* fall through */
964     case i_BFCHG:
965     case i_BFCLR:
966     case i_BSET:
967     if (dp->dmode == Dreg)
968     ru_set_write(ru, dp->dreg);
969     break;
970     }
971     handled = true;
972     rw_dest = false;
973     break;
974    
975     case i_BTST:
976     rw_dest = false;
977     break;
978    
979     case i_CAS:
980     {
981     ext = comp_get_iword((m68k_pc_offset+=2)-2);
982     int Du = ext & 7;
983     ru_set_read(ru, Du);
984     int Dc = (ext >> 6) & 7;
985     ru_set_read(ru, Dc);
986     ru_set_write(ru, Dc);
987     break;
988     }
989     case i_CAS2:
990     {
991     int Dc1, Dc2, Du1, Du2, Rn1, Rn2;
992     ext = comp_get_iword((m68k_pc_offset+=2)-2);
993     Rn1 = (ext >> 12) & 15;
994     Du1 = (ext >> 6) & 7;
995     Dc1 = ext & 7;
996     ru_set_read(ru, Rn1);
997     ru_set_read(ru, Du1);
998     ru_set_read(ru, Dc1);
999     ru_set_write(ru, Dc1);
1000     ext = comp_get_iword((m68k_pc_offset+=2)-2);
1001     Rn2 = (ext >> 12) & 15;
1002     Du2 = (ext >> 6) & 7;
1003     Dc2 = ext & 7;
1004     ru_set_read(ru, Rn2);
1005     ru_set_read(ru, Du2);
1006     ru_set_write(ru, Dc2);
1007     break;
1008     }
1009     case i_DIVL: case i_MULL:
1010     m68k_pc_offset += 2;
1011     break;
1012     case i_LEA:
1013     case i_MOVE: case i_MOVEA: case i_MOVE16:
1014     rw_dest = false;
1015     break;
1016     case i_PACK: case i_UNPK:
1017     rw_dest = false;
1018     m68k_pc_offset += 2;
1019     break;
1020     case i_TRAPcc:
1021     m68k_pc_offset += (dp->size == sz_long) ? 4 : 2;
1022     break;
1023     case i_RTR:
1024     /* do nothing, just for coverage debugging */
1025     break;
1026     /* TODO: handle EXG instruction */
1027     }
1028    
1029     /* Handle A-Traps better */
1030     if ((real_opcode & 0xf000) == 0xa000) {
1031     handled = true;
1032     }
1033    
1034     /* Handle EmulOps better */
1035     if ((real_opcode & 0xff00) == 0x7100) {
1036     handled = true;
1037     ru->rmask = 0xffff;
1038     ru->wmask = 0;
1039     }
1040    
1041     if (dp->suse && !handled)
1042     ru_fill_ea(ru, dp->sreg, (amodes)dp->smode, (wordsizes)dp->size, 0);
1043    
1044     if (dp->duse && !handled)
1045     ru_fill_ea(ru, dp->dreg, (amodes)dp->dmode, (wordsizes)dp->size, 1);
1046    
1047     if (rw_dest)
1048     ru->rmask |= ru->wmask;
1049    
1050     handled = handled || dp->suse || dp->duse;
1051    
1052     /* Mark all registers as used/written if the instruction may trap */
1053     if (may_trap(opcode)) {
1054     handled = true;
1055     ru->rmask = 0xffff;
1056     ru->wmask = 0xffff;
1057     }
1058    
1059     if (!handled) {
1060     write_log("ru_fill: %04x = { %04x, %04x }\n",
1061     real_opcode, ru->rmask, ru->wmask);
1062     abort();
1063     }
1064     }
1065    
1066     /********************************************************************
1067 gbeauche 1.1 * register allocation per block logging *
1068     ********************************************************************/
1069    
1070     static uae_s8 vstate[VREGS];
1071     static uae_s8 vwritten[VREGS];
1072     static uae_s8 nstate[N_REGS];
1073    
1074     #define L_UNKNOWN -127
1075     #define L_UNAVAIL -1
1076     #define L_NEEDED -2
1077     #define L_UNNEEDED -3
1078    
1079     static __inline__ void big_to_small_state(bigstate * b, smallstate * s)
1080     {
1081     int i;
1082    
1083     for (i = 0; i < VREGS; i++)
1084     s->virt[i] = vstate[i];
1085     for (i = 0; i < N_REGS; i++)
1086     s->nat[i] = nstate[i];
1087     }
1088    
1089     static __inline__ int callers_need_recompile(bigstate * b, smallstate * s)
1090     {
1091     int i;
1092     int reverse = 0;
1093    
1094     for (i = 0; i < VREGS; i++) {
1095     if (vstate[i] != L_UNNEEDED && s->virt[i] == L_UNNEEDED)
1096     return 1;
1097     if (vstate[i] == L_UNNEEDED && s->virt[i] != L_UNNEEDED)
1098     reverse++;
1099     }
1100     for (i = 0; i < N_REGS; i++) {
1101     if (nstate[i] >= 0 && nstate[i] != s->nat[i])
1102     return 1;
1103     if (nstate[i] < 0 && s->nat[i] >= 0)
1104     reverse++;
1105     }
1106     if (reverse >= 2 && USE_MATCH)
1107     return 1; /* In this case, it might be worth recompiling the
1108     * callers */
1109     return 0;
1110     }
1111    
1112     static __inline__ void log_startblock(void)
1113     {
1114     int i;
1115    
1116     for (i = 0; i < VREGS; i++) {
1117     vstate[i] = L_UNKNOWN;
1118     vwritten[i] = 0;
1119     }
1120     for (i = 0; i < N_REGS; i++)
1121     nstate[i] = L_UNKNOWN;
1122     }
1123    
1124     /* Using an n-reg for a temp variable */
1125     static __inline__ void log_isused(int n)
1126     {
1127     if (nstate[n] == L_UNKNOWN)
1128     nstate[n] = L_UNAVAIL;
1129     }
1130    
1131     static __inline__ void log_visused(int r)
1132     {
1133     if (vstate[r] == L_UNKNOWN)
1134     vstate[r] = L_NEEDED;
1135     }
1136    
1137     static __inline__ void do_load_reg(int n, int r)
1138     {
1139     if (r == FLAGTMP)
1140     raw_load_flagreg(n, r);
1141     else if (r == FLAGX)
1142     raw_load_flagx(n, r);
1143     else
1144 gbeauche 1.24 raw_mov_l_rm(n, (uintptr) live.state[r].mem);
1145 gbeauche 1.1 }
1146    
1147     static __inline__ void check_load_reg(int n, int r)
1148     {
1149 gbeauche 1.24 raw_mov_l_rm(n, (uintptr) live.state[r].mem);
1150 gbeauche 1.1 }
1151    
1152     static __inline__ void log_vwrite(int r)
1153     {
1154     vwritten[r] = 1;
1155     }
1156    
1157     /* Using an n-reg to hold a v-reg */
1158     static __inline__ void log_isreg(int n, int r)
1159     {
1160     static int count = 0;
1161    
1162     if (nstate[n] == L_UNKNOWN && r < 16 && !vwritten[r] && USE_MATCH)
1163     nstate[n] = r;
1164     else {
1165     do_load_reg(n, r);
1166     if (nstate[n] == L_UNKNOWN)
1167     nstate[n] = L_UNAVAIL;
1168     }
1169     if (vstate[r] == L_UNKNOWN)
1170     vstate[r] = L_NEEDED;
1171     }
1172    
1173     static __inline__ void log_clobberreg(int r)
1174     {
1175     if (vstate[r] == L_UNKNOWN)
1176     vstate[r] = L_UNNEEDED;
1177     }
1178    
1179     /* This ends all possibility of clever register allocation */
1180    
1181     static __inline__ void log_flush(void)
1182     {
1183     int i;
1184    
1185     for (i = 0; i < VREGS; i++)
1186     if (vstate[i] == L_UNKNOWN)
1187     vstate[i] = L_NEEDED;
1188     for (i = 0; i < N_REGS; i++)
1189     if (nstate[i] == L_UNKNOWN)
1190     nstate[i] = L_UNAVAIL;
1191     }
1192    
1193     static __inline__ void log_dump(void)
1194     {
1195     int i;
1196    
1197     return;
1198    
1199     write_log("----------------------\n");
1200     for (i = 0; i < N_REGS; i++) {
1201     switch (nstate[i]) {
1202     case L_UNKNOWN:
1203     write_log("Nat %d : UNKNOWN\n", i);
1204     break;
1205     case L_UNAVAIL:
1206     write_log("Nat %d : UNAVAIL\n", i);
1207     break;
1208     default:
1209     write_log("Nat %d : %d\n", i, nstate[i]);
1210     break;
1211     }
1212     }
1213     for (i = 0; i < VREGS; i++) {
1214     if (vstate[i] == L_UNNEEDED)
1215     write_log("Virt %d: UNNEEDED\n", i);
1216     }
1217     }
1218    
1219     /********************************************************************
1220     * register status handling. EMIT TIME! *
1221     ********************************************************************/
1222    
1223     static __inline__ void set_status(int r, int status)
1224     {
1225     if (status == ISCONST)
1226     log_clobberreg(r);
1227     live.state[r].status=status;
1228     }
1229    
1230     static __inline__ int isinreg(int r)
1231     {
1232     return live.state[r].status==CLEAN || live.state[r].status==DIRTY;
1233     }
1234    
1235     static __inline__ void adjust_nreg(int r, uae_u32 val)
1236     {
1237     if (!val)
1238     return;
1239     raw_lea_l_brr(r,r,val);
1240     }
1241    
1242     static void tomem(int r)
1243     {
1244     int rr=live.state[r].realreg;
1245    
1246     if (isinreg(r)) {
1247     if (live.state[r].val && live.nat[rr].nholds==1
1248     && !live.nat[rr].locked) {
1249     // write_log("RemovingA offset %x from reg %d (%d) at %p\n",
1250     // live.state[r].val,r,rr,target);
1251     adjust_nreg(rr,live.state[r].val);
1252     live.state[r].val=0;
1253     live.state[r].dirtysize=4;
1254     set_status(r,DIRTY);
1255     }
1256     }
1257    
1258     if (live.state[r].status==DIRTY) {
1259     switch (live.state[r].dirtysize) {
1260 gbeauche 1.24 case 1: raw_mov_b_mr((uintptr)live.state[r].mem,rr); break;
1261     case 2: raw_mov_w_mr((uintptr)live.state[r].mem,rr); break;
1262     case 4: raw_mov_l_mr((uintptr)live.state[r].mem,rr); break;
1263 gbeauche 1.1 default: abort();
1264     }
1265     log_vwrite(r);
1266     set_status(r,CLEAN);
1267     live.state[r].dirtysize=0;
1268     }
1269     }
1270    
1271     static __inline__ int isconst(int r)
1272     {
1273     return live.state[r].status==ISCONST;
1274     }
1275    
1276     int is_const(int r)
1277     {
1278     return isconst(r);
1279     }
1280    
1281     static __inline__ void writeback_const(int r)
1282     {
1283     if (!isconst(r))
1284     return;
1285     Dif (live.state[r].needflush==NF_HANDLER) {
1286     write_log("Trying to write back constant NF_HANDLER!\n");
1287     abort();
1288     }
1289    
1290 gbeauche 1.24 raw_mov_l_mi((uintptr)live.state[r].mem,live.state[r].val);
1291 gbeauche 1.1 log_vwrite(r);
1292     live.state[r].val=0;
1293     set_status(r,INMEM);
1294     }
1295    
1296     static __inline__ void tomem_c(int r)
1297     {
1298     if (isconst(r)) {
1299     writeback_const(r);
1300     }
1301     else
1302     tomem(r);
1303     }
1304    
1305     static void evict(int r)
1306     {
1307     int rr;
1308    
1309     if (!isinreg(r))
1310     return;
1311     tomem(r);
1312     rr=live.state[r].realreg;
1313    
1314     Dif (live.nat[rr].locked &&
1315     live.nat[rr].nholds==1) {
1316     write_log("register %d in nreg %d is locked!\n",r,live.state[r].realreg);
1317     abort();
1318     }
1319    
1320     live.nat[rr].nholds--;
1321     if (live.nat[rr].nholds!=live.state[r].realind) { /* Was not last */
1322     int topreg=live.nat[rr].holds[live.nat[rr].nholds];
1323     int thisind=live.state[r].realind;
1324    
1325     live.nat[rr].holds[thisind]=topreg;
1326     live.state[topreg].realind=thisind;
1327     }
1328     live.state[r].realreg=-1;
1329     set_status(r,INMEM);
1330     }
1331    
1332     static __inline__ void free_nreg(int r)
1333     {
1334     int i=live.nat[r].nholds;
1335    
1336     while (i) {
1337     int vr;
1338    
1339     --i;
1340     vr=live.nat[r].holds[i];
1341     evict(vr);
1342     }
1343     Dif (live.nat[r].nholds!=0) {
1344     write_log("Failed to free nreg %d, nholds is %d\n",r,live.nat[r].nholds);
1345     abort();
1346     }
1347     }
1348    
1349     /* Use with care! */
1350     static __inline__ void isclean(int r)
1351     {
1352     if (!isinreg(r))
1353     return;
1354     live.state[r].validsize=4;
1355     live.state[r].dirtysize=0;
1356     live.state[r].val=0;
1357     set_status(r,CLEAN);
1358     }
1359    
1360     static __inline__ void disassociate(int r)
1361     {
1362     isclean(r);
1363     evict(r);
1364     }
1365    
1366     static __inline__ void set_const(int r, uae_u32 val)
1367     {
1368     disassociate(r);
1369     live.state[r].val=val;
1370     set_status(r,ISCONST);
1371     }
1372    
1373     static __inline__ uae_u32 get_offset(int r)
1374     {
1375     return live.state[r].val;
1376     }
1377    
1378     static int alloc_reg_hinted(int r, int size, int willclobber, int hint)
1379     {
1380     int bestreg;
1381     uae_s32 when;
1382     int i;
1383     uae_s32 badness=0; /* to shut up gcc */
1384     bestreg=-1;
1385     when=2000000000;
1386    
1387     for (i=N_REGS;i--;) {
1388     badness=live.nat[i].touched;
1389     if (live.nat[i].nholds==0)
1390     badness=0;
1391     if (i==hint)
1392     badness-=200000000;
1393     if (!live.nat[i].locked && badness<when) {
1394     if ((size==1 && live.nat[i].canbyte) ||
1395     (size==2 && live.nat[i].canword) ||
1396     (size==4)) {
1397     bestreg=i;
1398     when=badness;
1399     if (live.nat[i].nholds==0 && hint<0)
1400     break;
1401     if (i==hint)
1402     break;
1403     }
1404     }
1405     }
1406     Dif (bestreg==-1)
1407     abort();
1408    
1409     if (live.nat[bestreg].nholds>0) {
1410     free_nreg(bestreg);
1411     }
1412     if (isinreg(r)) {
1413     int rr=live.state[r].realreg;
1414     /* This will happen if we read a partially dirty register at a
1415     bigger size */
1416     Dif (willclobber || live.state[r].validsize>=size)
1417     abort();
1418     Dif (live.nat[rr].nholds!=1)
1419     abort();
1420     if (size==4 && live.state[r].validsize==2) {
1421     log_isused(bestreg);
1422     log_visused(r);
1423 gbeauche 1.24 raw_mov_l_rm(bestreg,(uintptr)live.state[r].mem);
1424 gbeauche 1.1 raw_bswap_32(bestreg);
1425     raw_zero_extend_16_rr(rr,rr);
1426     raw_zero_extend_16_rr(bestreg,bestreg);
1427     raw_bswap_32(bestreg);
1428     raw_lea_l_brr_indexed(rr,rr,bestreg,1,0);
1429     live.state[r].validsize=4;
1430     live.nat[rr].touched=touchcnt++;
1431     return rr;
1432     }
1433     if (live.state[r].validsize==1) {
1434     /* Nothing yet */
1435     }
1436     evict(r);
1437     }
1438    
1439     if (!willclobber) {
1440     if (live.state[r].status!=UNDEF) {
1441     if (isconst(r)) {
1442     raw_mov_l_ri(bestreg,live.state[r].val);
1443     live.state[r].val=0;
1444     live.state[r].dirtysize=4;
1445     set_status(r,DIRTY);
1446     log_isused(bestreg);
1447     }
1448     else {
1449     log_isreg(bestreg, r); /* This will also load it! */
1450     live.state[r].dirtysize=0;
1451     set_status(r,CLEAN);
1452     }
1453     }
1454     else {
1455     live.state[r].val=0;
1456     live.state[r].dirtysize=0;
1457     set_status(r,CLEAN);
1458     log_isused(bestreg);
1459     }
1460     live.state[r].validsize=4;
1461     }
1462     else { /* this is the easiest way, but not optimal. FIXME! */
1463     /* Now it's trickier, but hopefully still OK */
1464     if (!isconst(r) || size==4) {
1465     live.state[r].validsize=size;
1466     live.state[r].dirtysize=size;
1467     live.state[r].val=0;
1468     set_status(r,DIRTY);
1469     if (size == 4) {
1470     log_clobberreg(r);
1471     log_isused(bestreg);
1472     }
1473     else {
1474     log_visused(r);
1475     log_isused(bestreg);
1476     }
1477     }
1478     else {
1479     if (live.state[r].status!=UNDEF)
1480     raw_mov_l_ri(bestreg,live.state[r].val);
1481     live.state[r].val=0;
1482     live.state[r].validsize=4;
1483     live.state[r].dirtysize=4;
1484     set_status(r,DIRTY);
1485     log_isused(bestreg);
1486     }
1487     }
1488     live.state[r].realreg=bestreg;
1489     live.state[r].realind=live.nat[bestreg].nholds;
1490     live.nat[bestreg].touched=touchcnt++;
1491     live.nat[bestreg].holds[live.nat[bestreg].nholds]=r;
1492     live.nat[bestreg].nholds++;
1493    
1494     return bestreg;
1495     }
1496    
1497     static int alloc_reg(int r, int size, int willclobber)
1498     {
1499     return alloc_reg_hinted(r,size,willclobber,-1);
1500     }
1501    
1502     static void unlock2(int r)
1503     {
1504     Dif (!live.nat[r].locked)
1505     abort();
1506     live.nat[r].locked--;
1507     }
1508    
1509     static void setlock(int r)
1510     {
1511     live.nat[r].locked++;
1512     }
1513    
1514    
1515     static void mov_nregs(int d, int s)
1516     {
1517     int ns=live.nat[s].nholds;
1518     int nd=live.nat[d].nholds;
1519     int i;
1520    
1521     if (s==d)
1522     return;
1523    
1524     if (nd>0)
1525     free_nreg(d);
1526    
1527     log_isused(d);
1528     raw_mov_l_rr(d,s);
1529    
1530     for (i=0;i<live.nat[s].nholds;i++) {
1531     int vs=live.nat[s].holds[i];
1532    
1533     live.state[vs].realreg=d;
1534     live.state[vs].realind=i;
1535     live.nat[d].holds[i]=vs;
1536     }
1537     live.nat[d].nholds=live.nat[s].nholds;
1538    
1539     live.nat[s].nholds=0;
1540     }
1541    
1542    
1543     static __inline__ void make_exclusive(int r, int size, int spec)
1544     {
1545     int clobber;
1546     reg_status oldstate;
1547     int rr=live.state[r].realreg;
1548     int nr;
1549     int nind;
1550     int ndirt=0;
1551     int i;
1552    
1553     if (!isinreg(r))
1554     return;
1555     if (live.nat[rr].nholds==1)
1556     return;
1557     for (i=0;i<live.nat[rr].nholds;i++) {
1558     int vr=live.nat[rr].holds[i];
1559     if (vr!=r &&
1560     (live.state[vr].status==DIRTY || live.state[vr].val))
1561     ndirt++;
1562     }
1563     if (!ndirt && size<live.state[r].validsize && !live.nat[rr].locked) {
1564     /* Everything else is clean, so let's keep this register */
1565     for (i=0;i<live.nat[rr].nholds;i++) {
1566     int vr=live.nat[rr].holds[i];
1567     if (vr!=r) {
1568     evict(vr);
1569     i--; /* Try that index again! */
1570     }
1571     }
1572     Dif (live.nat[rr].nholds!=1) {
1573     write_log("natreg %d holds %d vregs, %d not exclusive\n",
1574     rr,live.nat[rr].nholds,r);
1575     abort();
1576     }
1577     return;
1578     }
1579    
1580     /* We have to split the register */
1581     oldstate=live.state[r];
1582    
1583     setlock(rr); /* Make sure this doesn't go away */
1584     /* Forget about r being in the register rr */
1585     disassociate(r);
1586     /* Get a new register, that we will clobber completely */
1587     if (oldstate.status==DIRTY) {
1588     /* If dirtysize is <4, we need a register that can handle the
1589     eventual smaller memory store! Thanks to Quake68k for exposing
1590     this detail ;-) */
1591     nr=alloc_reg_hinted(r,oldstate.dirtysize,1,spec);
1592     }
1593     else {
1594     nr=alloc_reg_hinted(r,4,1,spec);
1595     }
1596     nind=live.state[r].realind;
1597     live.state[r]=oldstate; /* Keep all the old state info */
1598     live.state[r].realreg=nr;
1599     live.state[r].realind=nind;
1600    
1601     if (size<live.state[r].validsize) {
1602     if (live.state[r].val) {
1603     /* Might as well compensate for the offset now */
1604     raw_lea_l_brr(nr,rr,oldstate.val);
1605     live.state[r].val=0;
1606     live.state[r].dirtysize=4;
1607     set_status(r,DIRTY);
1608     }
1609     else
1610     raw_mov_l_rr(nr,rr); /* Make another copy */
1611     }
1612     unlock2(rr);
1613     }
1614    
1615     static __inline__ void add_offset(int r, uae_u32 off)
1616     {
1617     live.state[r].val+=off;
1618     }
1619    
1620     static __inline__ void remove_offset(int r, int spec)
1621     {
1622     reg_status oldstate;
1623     int rr;
1624    
1625     if (isconst(r))
1626     return;
1627     if (live.state[r].val==0)
1628     return;
1629     if (isinreg(r) && live.state[r].validsize<4)
1630     evict(r);
1631    
1632     if (!isinreg(r))
1633     alloc_reg_hinted(r,4,0,spec);
1634    
1635     Dif (live.state[r].validsize!=4) {
1636     write_log("Validsize=%d in remove_offset\n",live.state[r].validsize);
1637     abort();
1638     }
1639     make_exclusive(r,0,-1);
1640     /* make_exclusive might have done the job already */
1641     if (live.state[r].val==0)
1642     return;
1643    
1644     rr=live.state[r].realreg;
1645    
1646     if (live.nat[rr].nholds==1) {
1647     //write_log("RemovingB offset %x from reg %d (%d) at %p\n",
1648     // live.state[r].val,r,rr,target);
1649     adjust_nreg(rr,live.state[r].val);
1650     live.state[r].dirtysize=4;
1651     live.state[r].val=0;
1652     set_status(r,DIRTY);
1653     return;
1654     }
1655     write_log("Failed in remove_offset\n");
1656     abort();
1657     }
1658    
1659     static __inline__ void remove_all_offsets(void)
1660     {
1661     int i;
1662    
1663     for (i=0;i<VREGS;i++)
1664     remove_offset(i,-1);
1665     }
1666    
1667     static __inline__ int readreg_general(int r, int size, int spec, int can_offset)
1668     {
1669     int n;
1670     int answer=-1;
1671    
1672     if (live.state[r].status==UNDEF) {
1673     write_log("WARNING: Unexpected read of undefined register %d\n",r);
1674     }
1675     if (!can_offset)
1676     remove_offset(r,spec);
1677    
1678     if (isinreg(r) && live.state[r].validsize>=size) {
1679     n=live.state[r].realreg;
1680     switch(size) {
1681     case 1:
1682     if (live.nat[n].canbyte || spec>=0) {
1683     answer=n;
1684     }
1685     break;
1686     case 2:
1687     if (live.nat[n].canword || spec>=0) {
1688     answer=n;
1689     }
1690     break;
1691     case 4:
1692     answer=n;
1693     break;
1694     default: abort();
1695     }
1696     if (answer<0)
1697     evict(r);
1698     }
1699     /* either the value was in memory to start with, or it was evicted and
1700     is in memory now */
1701     if (answer<0) {
1702     answer=alloc_reg_hinted(r,spec>=0?4:size,0,spec);
1703     }
1704    
1705     if (spec>=0 && spec!=answer) {
1706     /* Too bad */
1707     mov_nregs(spec,answer);
1708     answer=spec;
1709     }
1710     live.nat[answer].locked++;
1711     live.nat[answer].touched=touchcnt++;
1712     return answer;
1713     }
1714    
1715    
1716    
1717     static int readreg(int r, int size)
1718     {
1719     return readreg_general(r,size,-1,0);
1720     }
1721    
1722     static int readreg_specific(int r, int size, int spec)
1723     {
1724     return readreg_general(r,size,spec,0);
1725     }
1726    
1727     static int readreg_offset(int r, int size)
1728     {
1729     return readreg_general(r,size,-1,1);
1730     }
1731    
1732     /* writereg_general(r, size, spec)
1733     *
1734     * INPUT
1735     * - r : mid-layer register
1736     * - size : requested size (1/2/4)
1737     * - spec : -1 if find or make a register free, otherwise specifies
1738     * the physical register to use in any case
1739     *
1740     * OUTPUT
1741     * - hard (physical, x86 here) register allocated to virtual register r
1742     */
1743     static __inline__ int writereg_general(int r, int size, int spec)
1744     {
1745     int n;
1746     int answer=-1;
1747    
1748     if (size<4) {
1749     remove_offset(r,spec);
1750     }
1751    
1752     make_exclusive(r,size,spec);
1753     if (isinreg(r)) {
1754     int nvsize=size>live.state[r].validsize?size:live.state[r].validsize;
1755     int ndsize=size>live.state[r].dirtysize?size:live.state[r].dirtysize;
1756     n=live.state[r].realreg;
1757    
1758     Dif (live.nat[n].nholds!=1)
1759     abort();
1760     switch(size) {
1761     case 1:
1762     if (live.nat[n].canbyte || spec>=0) {
1763     live.state[r].dirtysize=ndsize;
1764     live.state[r].validsize=nvsize;
1765     answer=n;
1766     }
1767     break;
1768     case 2:
1769     if (live.nat[n].canword || spec>=0) {
1770     live.state[r].dirtysize=ndsize;
1771     live.state[r].validsize=nvsize;
1772     answer=n;
1773     }
1774     break;
1775     case 4:
1776     live.state[r].dirtysize=ndsize;
1777     live.state[r].validsize=nvsize;
1778     answer=n;
1779     break;
1780     default: abort();
1781     }
1782     if (answer<0)
1783     evict(r);
1784     }
1785     /* either the value was in memory to start with, or it was evicted and
1786     is in memory now */
1787     if (answer<0) {
1788     answer=alloc_reg_hinted(r,size,1,spec);
1789     }
1790     if (spec>=0 && spec!=answer) {
1791     mov_nregs(spec,answer);
1792     answer=spec;
1793     }
1794     if (live.state[r].status==UNDEF)
1795     live.state[r].validsize=4;
1796     live.state[r].dirtysize=size>live.state[r].dirtysize?size:live.state[r].dirtysize;
1797     live.state[r].validsize=size>live.state[r].validsize?size:live.state[r].validsize;
1798    
1799     live.nat[answer].locked++;
1800     live.nat[answer].touched=touchcnt++;
1801     if (size==4) {
1802     live.state[r].val=0;
1803     }
1804     else {
1805     Dif (live.state[r].val) {
1806     write_log("Problem with val\n");
1807     abort();
1808     }
1809     }
1810     set_status(r,DIRTY);
1811     return answer;
1812     }
1813    
1814     static int writereg(int r, int size)
1815     {
1816     return writereg_general(r,size,-1);
1817     }
1818    
1819     static int writereg_specific(int r, int size, int spec)
1820     {
1821     return writereg_general(r,size,spec);
1822     }
1823    
1824     static __inline__ int rmw_general(int r, int wsize, int rsize, int spec)
1825     {
1826     int n;
1827     int answer=-1;
1828    
1829     if (live.state[r].status==UNDEF) {
1830     write_log("WARNING: Unexpected read of undefined register %d\n",r);
1831     }
1832     remove_offset(r,spec);
1833     make_exclusive(r,0,spec);
1834    
1835     Dif (wsize<rsize) {
1836     write_log("Cannot handle wsize<rsize in rmw_general()\n");
1837     abort();
1838     }
1839     if (isinreg(r) && live.state[r].validsize>=rsize) {
1840     n=live.state[r].realreg;
1841     Dif (live.nat[n].nholds!=1)
1842     abort();
1843    
1844     switch(rsize) {
1845     case 1:
1846     if (live.nat[n].canbyte || spec>=0) {
1847     answer=n;
1848     }
1849     break;
1850     case 2:
1851     if (live.nat[n].canword || spec>=0) {
1852     answer=n;
1853     }
1854     break;
1855     case 4:
1856     answer=n;
1857     break;
1858     default: abort();
1859     }
1860     if (answer<0)
1861     evict(r);
1862     }
1863     /* either the value was in memory to start with, or it was evicted and
1864     is in memory now */
1865     if (answer<0) {
1866     answer=alloc_reg_hinted(r,spec>=0?4:rsize,0,spec);
1867     }
1868    
1869     if (spec>=0 && spec!=answer) {
1870     /* Too bad */
1871     mov_nregs(spec,answer);
1872     answer=spec;
1873     }
1874     if (wsize>live.state[r].dirtysize)
1875     live.state[r].dirtysize=wsize;
1876     if (wsize>live.state[r].validsize)
1877     live.state[r].validsize=wsize;
1878     set_status(r,DIRTY);
1879    
1880     live.nat[answer].locked++;
1881     live.nat[answer].touched=touchcnt++;
1882    
1883     Dif (live.state[r].val) {
1884     write_log("Problem with val(rmw)\n");
1885     abort();
1886     }
1887     return answer;
1888     }
1889    
1890     static int rmw(int r, int wsize, int rsize)
1891     {
1892     return rmw_general(r,wsize,rsize,-1);
1893     }
1894    
1895     static int rmw_specific(int r, int wsize, int rsize, int spec)
1896     {
1897     return rmw_general(r,wsize,rsize,spec);
1898     }
1899    
1900    
1901     /* needed for restoring the carry flag on non-P6 cores */
1902     static void bt_l_ri_noclobber(R4 r, IMM i)
1903     {
1904     int size=4;
1905     if (i<16)
1906     size=2;
1907     r=readreg(r,size);
1908     raw_bt_l_ri(r,i);
1909     unlock2(r);
1910     }
1911    
1912     /********************************************************************
1913     * FPU register status handling. EMIT TIME! *
1914     ********************************************************************/
1915    
1916     static void f_tomem(int r)
1917     {
1918     if (live.fate[r].status==DIRTY) {
1919     #if USE_LONG_DOUBLE
1920 gbeauche 1.24 raw_fmov_ext_mr((uintptr)live.fate[r].mem,live.fate[r].realreg);
1921 gbeauche 1.1 #else
1922 gbeauche 1.24 raw_fmov_mr((uintptr)live.fate[r].mem,live.fate[r].realreg);
1923 gbeauche 1.1 #endif
1924     live.fate[r].status=CLEAN;
1925     }
1926     }
1927    
1928     static void f_tomem_drop(int r)
1929     {
1930     if (live.fate[r].status==DIRTY) {
1931     #if USE_LONG_DOUBLE
1932 gbeauche 1.24 raw_fmov_ext_mr_drop((uintptr)live.fate[r].mem,live.fate[r].realreg);
1933 gbeauche 1.1 #else
1934 gbeauche 1.24 raw_fmov_mr_drop((uintptr)live.fate[r].mem,live.fate[r].realreg);
1935 gbeauche 1.1 #endif
1936     live.fate[r].status=INMEM;
1937     }
1938     }
1939    
1940    
1941     static __inline__ int f_isinreg(int r)
1942     {
1943     return live.fate[r].status==CLEAN || live.fate[r].status==DIRTY;
1944     }
1945    
1946     static void f_evict(int r)
1947     {
1948     int rr;
1949    
1950     if (!f_isinreg(r))
1951     return;
1952     rr=live.fate[r].realreg;
1953     if (live.fat[rr].nholds==1)
1954     f_tomem_drop(r);
1955     else
1956     f_tomem(r);
1957    
1958     Dif (live.fat[rr].locked &&
1959     live.fat[rr].nholds==1) {
1960     write_log("FPU register %d in nreg %d is locked!\n",r,live.fate[r].realreg);
1961     abort();
1962     }
1963    
1964     live.fat[rr].nholds--;
1965     if (live.fat[rr].nholds!=live.fate[r].realind) { /* Was not last */
1966     int topreg=live.fat[rr].holds[live.fat[rr].nholds];
1967     int thisind=live.fate[r].realind;
1968     live.fat[rr].holds[thisind]=topreg;
1969     live.fate[topreg].realind=thisind;
1970     }
1971     live.fate[r].status=INMEM;
1972     live.fate[r].realreg=-1;
1973     }
1974    
1975     static __inline__ void f_free_nreg(int r)
1976     {
1977     int i=live.fat[r].nholds;
1978    
1979     while (i) {
1980     int vr;
1981    
1982     --i;
1983     vr=live.fat[r].holds[i];
1984     f_evict(vr);
1985     }
1986     Dif (live.fat[r].nholds!=0) {
1987     write_log("Failed to free nreg %d, nholds is %d\n",r,live.fat[r].nholds);
1988     abort();
1989     }
1990     }
1991    
1992    
1993     /* Use with care! */
1994     static __inline__ void f_isclean(int r)
1995     {
1996     if (!f_isinreg(r))
1997     return;
1998     live.fate[r].status=CLEAN;
1999     }
2000    
2001     static __inline__ void f_disassociate(int r)
2002     {
2003     f_isclean(r);
2004     f_evict(r);
2005     }
2006    
2007    
2008    
2009     static int f_alloc_reg(int r, int willclobber)
2010     {
2011     int bestreg;
2012     uae_s32 when;
2013     int i;
2014     uae_s32 badness;
2015     bestreg=-1;
2016     when=2000000000;
2017     for (i=N_FREGS;i--;) {
2018     badness=live.fat[i].touched;
2019     if (live.fat[i].nholds==0)
2020     badness=0;
2021    
2022     if (!live.fat[i].locked && badness<when) {
2023     bestreg=i;
2024     when=badness;
2025     if (live.fat[i].nholds==0)
2026     break;
2027     }
2028     }
2029     Dif (bestreg==-1)
2030     abort();
2031    
2032     if (live.fat[bestreg].nholds>0) {
2033     f_free_nreg(bestreg);
2034     }
2035     if (f_isinreg(r)) {
2036     f_evict(r);
2037     }
2038    
2039     if (!willclobber) {
2040     if (live.fate[r].status!=UNDEF) {
2041     #if USE_LONG_DOUBLE
2042 gbeauche 1.24 raw_fmov_ext_rm(bestreg,(uintptr)live.fate[r].mem);
2043 gbeauche 1.1 #else
2044 gbeauche 1.24 raw_fmov_rm(bestreg,(uintptr)live.fate[r].mem);
2045 gbeauche 1.1 #endif
2046     }
2047     live.fate[r].status=CLEAN;
2048     }
2049     else {
2050     live.fate[r].status=DIRTY;
2051     }
2052     live.fate[r].realreg=bestreg;
2053     live.fate[r].realind=live.fat[bestreg].nholds;
2054     live.fat[bestreg].touched=touchcnt++;
2055     live.fat[bestreg].holds[live.fat[bestreg].nholds]=r;
2056     live.fat[bestreg].nholds++;
2057    
2058     return bestreg;
2059     }
2060    
2061     static void f_unlock(int r)
2062     {
2063     Dif (!live.fat[r].locked)
2064     abort();
2065     live.fat[r].locked--;
2066     }
2067    
2068     static void f_setlock(int r)
2069     {
2070     live.fat[r].locked++;
2071     }
2072    
2073     static __inline__ int f_readreg(int r)
2074     {
2075     int n;
2076     int answer=-1;
2077    
2078     if (f_isinreg(r)) {
2079     n=live.fate[r].realreg;
2080     answer=n;
2081     }
2082     /* either the value was in memory to start with, or it was evicted and
2083     is in memory now */
2084     if (answer<0)
2085     answer=f_alloc_reg(r,0);
2086    
2087     live.fat[answer].locked++;
2088     live.fat[answer].touched=touchcnt++;
2089     return answer;
2090     }
2091    
2092     static __inline__ void f_make_exclusive(int r, int clobber)
2093     {
2094     freg_status oldstate;
2095     int rr=live.fate[r].realreg;
2096     int nr;
2097     int nind;
2098     int ndirt=0;
2099     int i;
2100    
2101     if (!f_isinreg(r))
2102     return;
2103     if (live.fat[rr].nholds==1)
2104     return;
2105     for (i=0;i<live.fat[rr].nholds;i++) {
2106     int vr=live.fat[rr].holds[i];
2107     if (vr!=r && live.fate[vr].status==DIRTY)
2108     ndirt++;
2109     }
2110     if (!ndirt && !live.fat[rr].locked) {
2111     /* Everything else is clean, so let's keep this register */
2112     for (i=0;i<live.fat[rr].nholds;i++) {
2113     int vr=live.fat[rr].holds[i];
2114     if (vr!=r) {
2115     f_evict(vr);
2116     i--; /* Try that index again! */
2117     }
2118     }
2119     Dif (live.fat[rr].nholds!=1) {
2120     write_log("realreg %d holds %d (",rr,live.fat[rr].nholds);
2121     for (i=0;i<live.fat[rr].nholds;i++) {
2122     write_log(" %d(%d,%d)",live.fat[rr].holds[i],
2123     live.fate[live.fat[rr].holds[i]].realreg,
2124     live.fate[live.fat[rr].holds[i]].realind);
2125     }
2126     write_log("\n");
2127     abort();
2128     }
2129     return;
2130     }
2131    
2132     /* We have to split the register */
2133     oldstate=live.fate[r];
2134    
2135     f_setlock(rr); /* Make sure this doesn't go away */
2136     /* Forget about r being in the register rr */
2137     f_disassociate(r);
2138     /* Get a new register, that we will clobber completely */
2139     nr=f_alloc_reg(r,1);
2140     nind=live.fate[r].realind;
2141     if (!clobber)
2142     raw_fmov_rr(nr,rr); /* Make another copy */
2143     live.fate[r]=oldstate; /* Keep all the old state info */
2144     live.fate[r].realreg=nr;
2145     live.fate[r].realind=nind;
2146     f_unlock(rr);
2147     }
2148    
2149    
2150     static __inline__ int f_writereg(int r)
2151     {
2152     int n;
2153     int answer=-1;
2154    
2155     f_make_exclusive(r,1);
2156     if (f_isinreg(r)) {
2157     n=live.fate[r].realreg;
2158     answer=n;
2159     }
2160     if (answer<0) {
2161     answer=f_alloc_reg(r,1);
2162     }
2163     live.fate[r].status=DIRTY;
2164     live.fat[answer].locked++;
2165     live.fat[answer].touched=touchcnt++;
2166     return answer;
2167     }
2168    
2169     static int f_rmw(int r)
2170     {
2171     int n;
2172    
2173     f_make_exclusive(r,0);
2174     if (f_isinreg(r)) {
2175     n=live.fate[r].realreg;
2176     }
2177     else
2178     n=f_alloc_reg(r,0);
2179     live.fate[r].status=DIRTY;
2180     live.fat[n].locked++;
2181     live.fat[n].touched=touchcnt++;
2182     return n;
2183     }
2184    
2185     static void fflags_into_flags_internal(uae_u32 tmp)
2186     {
2187     int r;
2188    
2189     clobber_flags();
2190     r=f_readreg(FP_RESULT);
2191     if (FFLAG_NREG_CLOBBER_CONDITION) {
2192     int tmp2=tmp;
2193     tmp=writereg_specific(tmp,4,FFLAG_NREG);
2194     raw_fflags_into_flags(r);
2195     unlock2(tmp);
2196     forget_about(tmp2);
2197     }
2198     else
2199     raw_fflags_into_flags(r);
2200     f_unlock(r);
2201 gbeauche 1.19 live_flags();
2202 gbeauche 1.1 }
2203    
2204    
2205    
2206    
2207     /********************************************************************
2208     * CPU functions exposed to gencomp. Both CREATE and EMIT time *
2209     ********************************************************************/
2210    
2211     /*
2212     * RULES FOR HANDLING REGISTERS:
2213     *
2214     * * In the function headers, order the parameters
2215     * - 1st registers written to
2216     * - 2nd read/modify/write registers
2217     * - 3rd registers read from
2218     * * Before calling raw_*, you must call readreg, writereg or rmw for
2219     * each register
2220     * * The order for this is
2221     * - 1st call remove_offset for all registers written to with size<4
2222     * - 2nd call readreg for all registers read without offset
2223     * - 3rd call rmw for all rmw registers
2224     * - 4th call readreg_offset for all registers that can handle offsets
2225     * - 5th call get_offset for all the registers from the previous step
2226     * - 6th call writereg for all written-to registers
2227     * - 7th call raw_*
2228     * - 8th unlock2 all registers that were locked
2229     */
2230    
2231     MIDFUNC(0,live_flags,(void))
2232     {
2233     live.flags_on_stack=TRASH;
2234     live.flags_in_flags=VALID;
2235     live.flags_are_important=1;
2236     }
2237     MENDFUNC(0,live_flags,(void))
2238    
2239     MIDFUNC(0,dont_care_flags,(void))
2240     {
2241     live.flags_are_important=0;
2242     }
2243     MENDFUNC(0,dont_care_flags,(void))
2244    
2245    
2246     MIDFUNC(0,duplicate_carry,(void))
2247     {
2248     evict(FLAGX);
2249     make_flags_live_internal();
2250 gbeauche 1.24 COMPCALL(setcc_m)((uintptr)live.state[FLAGX].mem,2);
2251 gbeauche 1.1 log_vwrite(FLAGX);
2252     }
2253     MENDFUNC(0,duplicate_carry,(void))
2254    
2255     MIDFUNC(0,restore_carry,(void))
2256     {
2257     if (!have_rat_stall) { /* Not a P6 core, i.e. no partial stalls */
2258     bt_l_ri_noclobber(FLAGX,0);
2259     }
2260     else { /* Avoid the stall the above creates.
2261     This is slow on non-P6, though.
2262     */
2263     COMPCALL(rol_b_ri(FLAGX,8));
2264     isclean(FLAGX);
2265     }
2266     }
2267     MENDFUNC(0,restore_carry,(void))
2268    
2269     MIDFUNC(0,start_needflags,(void))
2270     {
2271     needflags=1;
2272     }
2273     MENDFUNC(0,start_needflags,(void))
2274    
2275     MIDFUNC(0,end_needflags,(void))
2276     {
2277     needflags=0;
2278     }
2279     MENDFUNC(0,end_needflags,(void))
2280    
2281     MIDFUNC(0,make_flags_live,(void))
2282     {
2283     make_flags_live_internal();
2284     }
2285     MENDFUNC(0,make_flags_live,(void))
2286    
2287     MIDFUNC(1,fflags_into_flags,(W2 tmp))
2288     {
2289     clobber_flags();
2290     fflags_into_flags_internal(tmp);
2291     }
2292     MENDFUNC(1,fflags_into_flags,(W2 tmp))
2293    
2294    
2295     MIDFUNC(2,bt_l_ri,(R4 r, IMM i)) /* This is defined as only affecting C */
2296     {
2297     int size=4;
2298     if (i<16)
2299     size=2;
2300     CLOBBER_BT;
2301     r=readreg(r,size);
2302     raw_bt_l_ri(r,i);
2303     unlock2(r);
2304     }
2305     MENDFUNC(2,bt_l_ri,(R4 r, IMM i)) /* This is defined as only affecting C */
2306    
2307     MIDFUNC(2,bt_l_rr,(R4 r, R4 b)) /* This is defined as only affecting C */
2308     {
2309     CLOBBER_BT;
2310     r=readreg(r,4);
2311     b=readreg(b,4);
2312     raw_bt_l_rr(r,b);
2313     unlock2(r);
2314     unlock2(b);
2315     }
2316     MENDFUNC(2,bt_l_rr,(R4 r, R4 b)) /* This is defined as only affecting C */
2317    
2318     MIDFUNC(2,btc_l_ri,(RW4 r, IMM i))
2319     {
2320     int size=4;
2321     if (i<16)
2322     size=2;
2323     CLOBBER_BT;
2324     r=rmw(r,size,size);
2325     raw_btc_l_ri(r,i);
2326     unlock2(r);
2327     }
2328     MENDFUNC(2,btc_l_ri,(RW4 r, IMM i))
2329    
2330     MIDFUNC(2,btc_l_rr,(RW4 r, R4 b))
2331     {
2332     CLOBBER_BT;
2333     b=readreg(b,4);
2334     r=rmw(r,4,4);
2335     raw_btc_l_rr(r,b);
2336     unlock2(r);
2337     unlock2(b);
2338     }
2339     MENDFUNC(2,btc_l_rr,(RW4 r, R4 b))
2340    
2341    
2342     MIDFUNC(2,btr_l_ri,(RW4 r, IMM i))
2343     {
2344     int size=4;
2345     if (i<16)
2346     size=2;
2347     CLOBBER_BT;
2348     r=rmw(r,size,size);
2349     raw_btr_l_ri(r,i);
2350     unlock2(r);
2351     }
2352     MENDFUNC(2,btr_l_ri,(RW4 r, IMM i))
2353    
2354     MIDFUNC(2,btr_l_rr,(RW4 r, R4 b))
2355     {
2356     CLOBBER_BT;
2357     b=readreg(b,4);
2358     r=rmw(r,4,4);
2359     raw_btr_l_rr(r,b);
2360     unlock2(r);
2361     unlock2(b);
2362     }
2363     MENDFUNC(2,btr_l_rr,(RW4 r, R4 b))
2364    
2365    
2366     MIDFUNC(2,bts_l_ri,(RW4 r, IMM i))
2367     {
2368     int size=4;
2369     if (i<16)
2370     size=2;
2371     CLOBBER_BT;
2372     r=rmw(r,size,size);
2373     raw_bts_l_ri(r,i);
2374     unlock2(r);
2375     }
2376     MENDFUNC(2,bts_l_ri,(RW4 r, IMM i))
2377    
2378     MIDFUNC(2,bts_l_rr,(RW4 r, R4 b))
2379     {
2380     CLOBBER_BT;
2381     b=readreg(b,4);
2382     r=rmw(r,4,4);
2383     raw_bts_l_rr(r,b);
2384     unlock2(r);
2385     unlock2(b);
2386     }
2387     MENDFUNC(2,bts_l_rr,(RW4 r, R4 b))
2388    
2389     MIDFUNC(2,mov_l_rm,(W4 d, IMM s))
2390     {
2391     CLOBBER_MOV;
2392     d=writereg(d,4);
2393     raw_mov_l_rm(d,s);
2394     unlock2(d);
2395     }
2396     MENDFUNC(2,mov_l_rm,(W4 d, IMM s))
2397    
2398    
2399     MIDFUNC(1,call_r,(R4 r)) /* Clobbering is implicit */
2400     {
2401     r=readreg(r,4);
2402     raw_call_r(r);
2403     unlock2(r);
2404     }
2405     MENDFUNC(1,call_r,(R4 r)) /* Clobbering is implicit */
2406    
2407     MIDFUNC(2,sub_l_mi,(IMM d, IMM s))
2408     {
2409     CLOBBER_SUB;
2410     raw_sub_l_mi(d,s) ;
2411     }
2412     MENDFUNC(2,sub_l_mi,(IMM d, IMM s))
2413    
2414     MIDFUNC(2,mov_l_mi,(IMM d, IMM s))
2415     {
2416     CLOBBER_MOV;
2417     raw_mov_l_mi(d,s) ;
2418     }
2419     MENDFUNC(2,mov_l_mi,(IMM d, IMM s))
2420    
2421     MIDFUNC(2,mov_w_mi,(IMM d, IMM s))
2422     {
2423     CLOBBER_MOV;
2424     raw_mov_w_mi(d,s) ;
2425     }
2426     MENDFUNC(2,mov_w_mi,(IMM d, IMM s))
2427    
2428     MIDFUNC(2,mov_b_mi,(IMM d, IMM s))
2429     {
2430     CLOBBER_MOV;
2431     raw_mov_b_mi(d,s) ;
2432     }
2433     MENDFUNC(2,mov_b_mi,(IMM d, IMM s))
2434    
2435     MIDFUNC(2,rol_b_ri,(RW1 r, IMM i))
2436     {
2437     if (!i && !needflags)
2438     return;
2439     CLOBBER_ROL;
2440     r=rmw(r,1,1);
2441     raw_rol_b_ri(r,i);
2442     unlock2(r);
2443     }
2444     MENDFUNC(2,rol_b_ri,(RW1 r, IMM i))
2445    
2446     MIDFUNC(2,rol_w_ri,(RW2 r, IMM i))
2447     {
2448     if (!i && !needflags)
2449     return;
2450     CLOBBER_ROL;
2451     r=rmw(r,2,2);
2452     raw_rol_w_ri(r,i);
2453     unlock2(r);
2454     }
2455     MENDFUNC(2,rol_w_ri,(RW2 r, IMM i))
2456    
2457     MIDFUNC(2,rol_l_ri,(RW4 r, IMM i))
2458     {
2459     if (!i && !needflags)
2460     return;
2461     CLOBBER_ROL;
2462     r=rmw(r,4,4);
2463     raw_rol_l_ri(r,i);
2464     unlock2(r);
2465     }
2466     MENDFUNC(2,rol_l_ri,(RW4 r, IMM i))
2467    
2468     MIDFUNC(2,rol_l_rr,(RW4 d, R1 r))
2469     {
2470     if (isconst(r)) {
2471     COMPCALL(rol_l_ri)(d,(uae_u8)live.state[r].val);
2472     return;
2473     }
2474     CLOBBER_ROL;
2475     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2476     d=rmw(d,4,4);
2477     Dif (r!=1) {
2478     write_log("Illegal register %d in raw_rol_b\n",r);
2479     abort();
2480     }
2481     raw_rol_l_rr(d,r) ;
2482     unlock2(r);
2483     unlock2(d);
2484     }
2485     MENDFUNC(2,rol_l_rr,(RW4 d, R1 r))
2486    
2487     MIDFUNC(2,rol_w_rr,(RW2 d, R1 r))
2488     { /* Can only do this with r==1, i.e. cl */
2489    
2490     if (isconst(r)) {
2491     COMPCALL(rol_w_ri)(d,(uae_u8)live.state[r].val);
2492     return;
2493     }
2494     CLOBBER_ROL;
2495     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2496     d=rmw(d,2,2);
2497     Dif (r!=1) {
2498     write_log("Illegal register %d in raw_rol_b\n",r);
2499     abort();
2500     }
2501     raw_rol_w_rr(d,r) ;
2502     unlock2(r);
2503     unlock2(d);
2504     }
2505     MENDFUNC(2,rol_w_rr,(RW2 d, R1 r))
2506    
2507     MIDFUNC(2,rol_b_rr,(RW1 d, R1 r))
2508     { /* Can only do this with r==1, i.e. cl */
2509    
2510     if (isconst(r)) {
2511     COMPCALL(rol_b_ri)(d,(uae_u8)live.state[r].val);
2512     return;
2513     }
2514    
2515     CLOBBER_ROL;
2516     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2517     d=rmw(d,1,1);
2518     Dif (r!=1) {
2519     write_log("Illegal register %d in raw_rol_b\n",r);
2520     abort();
2521     }
2522     raw_rol_b_rr(d,r) ;
2523     unlock2(r);
2524     unlock2(d);
2525     }
2526     MENDFUNC(2,rol_b_rr,(RW1 d, R1 r))
2527    
2528    
2529     MIDFUNC(2,shll_l_rr,(RW4 d, R1 r))
2530     {
2531     if (isconst(r)) {
2532     COMPCALL(shll_l_ri)(d,(uae_u8)live.state[r].val);
2533     return;
2534     }
2535     CLOBBER_SHLL;
2536     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2537     d=rmw(d,4,4);
2538     Dif (r!=1) {
2539     write_log("Illegal register %d in raw_rol_b\n",r);
2540     abort();
2541     }
2542     raw_shll_l_rr(d,r) ;
2543     unlock2(r);
2544     unlock2(d);
2545     }
2546     MENDFUNC(2,shll_l_rr,(RW4 d, R1 r))
2547    
2548     MIDFUNC(2,shll_w_rr,(RW2 d, R1 r))
2549     { /* Can only do this with r==1, i.e. cl */
2550    
2551     if (isconst(r)) {
2552     COMPCALL(shll_w_ri)(d,(uae_u8)live.state[r].val);
2553     return;
2554     }
2555     CLOBBER_SHLL;
2556     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2557     d=rmw(d,2,2);
2558     Dif (r!=1) {
2559     write_log("Illegal register %d in raw_shll_b\n",r);
2560     abort();
2561     }
2562     raw_shll_w_rr(d,r) ;
2563     unlock2(r);
2564     unlock2(d);
2565     }
2566     MENDFUNC(2,shll_w_rr,(RW2 d, R1 r))
2567    
2568     MIDFUNC(2,shll_b_rr,(RW1 d, R1 r))
2569     { /* Can only do this with r==1, i.e. cl */
2570    
2571     if (isconst(r)) {
2572     COMPCALL(shll_b_ri)(d,(uae_u8)live.state[r].val);
2573     return;
2574     }
2575    
2576     CLOBBER_SHLL;
2577     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2578     d=rmw(d,1,1);
2579     Dif (r!=1) {
2580     write_log("Illegal register %d in raw_shll_b\n",r);
2581     abort();
2582     }
2583     raw_shll_b_rr(d,r) ;
2584     unlock2(r);
2585     unlock2(d);
2586     }
2587     MENDFUNC(2,shll_b_rr,(RW1 d, R1 r))
2588    
2589    
2590     MIDFUNC(2,ror_b_ri,(R1 r, IMM i))
2591     {
2592     if (!i && !needflags)
2593     return;
2594     CLOBBER_ROR;
2595     r=rmw(r,1,1);
2596     raw_ror_b_ri(r,i);
2597     unlock2(r);
2598     }
2599     MENDFUNC(2,ror_b_ri,(R1 r, IMM i))
2600    
2601     MIDFUNC(2,ror_w_ri,(R2 r, IMM i))
2602     {
2603     if (!i && !needflags)
2604     return;
2605     CLOBBER_ROR;
2606     r=rmw(r,2,2);
2607     raw_ror_w_ri(r,i);
2608     unlock2(r);
2609     }
2610     MENDFUNC(2,ror_w_ri,(R2 r, IMM i))
2611    
2612     MIDFUNC(2,ror_l_ri,(R4 r, IMM i))
2613     {
2614     if (!i && !needflags)
2615     return;
2616     CLOBBER_ROR;
2617     r=rmw(r,4,4);
2618     raw_ror_l_ri(r,i);
2619     unlock2(r);
2620     }
2621     MENDFUNC(2,ror_l_ri,(R4 r, IMM i))
2622    
2623     MIDFUNC(2,ror_l_rr,(R4 d, R1 r))
2624     {
2625     if (isconst(r)) {
2626     COMPCALL(ror_l_ri)(d,(uae_u8)live.state[r].val);
2627     return;
2628     }
2629     CLOBBER_ROR;
2630     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2631     d=rmw(d,4,4);
2632     raw_ror_l_rr(d,r) ;
2633     unlock2(r);
2634     unlock2(d);
2635     }
2636     MENDFUNC(2,ror_l_rr,(R4 d, R1 r))
2637    
2638     MIDFUNC(2,ror_w_rr,(R2 d, R1 r))
2639     {
2640     if (isconst(r)) {
2641     COMPCALL(ror_w_ri)(d,(uae_u8)live.state[r].val);
2642     return;
2643     }
2644     CLOBBER_ROR;
2645     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2646     d=rmw(d,2,2);
2647     raw_ror_w_rr(d,r) ;
2648     unlock2(r);
2649     unlock2(d);
2650     }
2651     MENDFUNC(2,ror_w_rr,(R2 d, R1 r))
2652    
2653     MIDFUNC(2,ror_b_rr,(R1 d, R1 r))
2654     {
2655     if (isconst(r)) {
2656     COMPCALL(ror_b_ri)(d,(uae_u8)live.state[r].val);
2657     return;
2658     }
2659    
2660     CLOBBER_ROR;
2661     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2662     d=rmw(d,1,1);
2663     raw_ror_b_rr(d,r) ;
2664     unlock2(r);
2665     unlock2(d);
2666     }
2667     MENDFUNC(2,ror_b_rr,(R1 d, R1 r))
2668    
2669     MIDFUNC(2,shrl_l_rr,(RW4 d, R1 r))
2670     {
2671     if (isconst(r)) {
2672     COMPCALL(shrl_l_ri)(d,(uae_u8)live.state[r].val);
2673     return;
2674     }
2675     CLOBBER_SHRL;
2676     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2677     d=rmw(d,4,4);
2678     Dif (r!=1) {
2679     write_log("Illegal register %d in raw_rol_b\n",r);
2680     abort();
2681     }
2682     raw_shrl_l_rr(d,r) ;
2683     unlock2(r);
2684     unlock2(d);
2685     }
2686     MENDFUNC(2,shrl_l_rr,(RW4 d, R1 r))
2687    
2688     MIDFUNC(2,shrl_w_rr,(RW2 d, R1 r))
2689     { /* Can only do this with r==1, i.e. cl */
2690    
2691     if (isconst(r)) {
2692     COMPCALL(shrl_w_ri)(d,(uae_u8)live.state[r].val);
2693     return;
2694     }
2695     CLOBBER_SHRL;
2696     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2697     d=rmw(d,2,2);
2698     Dif (r!=1) {
2699     write_log("Illegal register %d in raw_shrl_b\n",r);
2700     abort();
2701     }
2702     raw_shrl_w_rr(d,r) ;
2703     unlock2(r);
2704     unlock2(d);
2705     }
2706     MENDFUNC(2,shrl_w_rr,(RW2 d, R1 r))
2707    
2708     MIDFUNC(2,shrl_b_rr,(RW1 d, R1 r))
2709     { /* Can only do this with r==1, i.e. cl */
2710    
2711     if (isconst(r)) {
2712     COMPCALL(shrl_b_ri)(d,(uae_u8)live.state[r].val);
2713     return;
2714     }
2715    
2716     CLOBBER_SHRL;
2717     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2718     d=rmw(d,1,1);
2719     Dif (r!=1) {
2720     write_log("Illegal register %d in raw_shrl_b\n",r);
2721     abort();
2722     }
2723     raw_shrl_b_rr(d,r) ;
2724     unlock2(r);
2725     unlock2(d);
2726     }
2727     MENDFUNC(2,shrl_b_rr,(RW1 d, R1 r))
2728    
2729    
2730    
2731     MIDFUNC(2,shll_l_ri,(RW4 r, IMM i))
2732     {
2733     if (!i && !needflags)
2734     return;
2735     if (isconst(r) && !needflags) {
2736     live.state[r].val<<=i;
2737     return;
2738     }
2739     CLOBBER_SHLL;
2740     r=rmw(r,4,4);
2741     raw_shll_l_ri(r,i);
2742     unlock2(r);
2743     }
2744     MENDFUNC(2,shll_l_ri,(RW4 r, IMM i))
2745    
2746     MIDFUNC(2,shll_w_ri,(RW2 r, IMM i))
2747     {
2748     if (!i && !needflags)
2749     return;
2750     CLOBBER_SHLL;
2751     r=rmw(r,2,2);
2752     raw_shll_w_ri(r,i);
2753     unlock2(r);
2754     }
2755     MENDFUNC(2,shll_w_ri,(RW2 r, IMM i))
2756    
2757     MIDFUNC(2,shll_b_ri,(RW1 r, IMM i))
2758     {
2759     if (!i && !needflags)
2760     return;
2761     CLOBBER_SHLL;
2762     r=rmw(r,1,1);
2763     raw_shll_b_ri(r,i);
2764     unlock2(r);
2765     }
2766     MENDFUNC(2,shll_b_ri,(RW1 r, IMM i))
2767    
2768     MIDFUNC(2,shrl_l_ri,(RW4 r, IMM i))
2769     {
2770     if (!i && !needflags)
2771     return;
2772     if (isconst(r) && !needflags) {
2773     live.state[r].val>>=i;
2774     return;
2775     }
2776     CLOBBER_SHRL;
2777     r=rmw(r,4,4);
2778     raw_shrl_l_ri(r,i);
2779     unlock2(r);
2780     }
2781     MENDFUNC(2,shrl_l_ri,(RW4 r, IMM i))
2782    
2783     MIDFUNC(2,shrl_w_ri,(RW2 r, IMM i))
2784     {
2785     if (!i && !needflags)
2786     return;
2787     CLOBBER_SHRL;
2788     r=rmw(r,2,2);
2789     raw_shrl_w_ri(r,i);
2790     unlock2(r);
2791     }
2792     MENDFUNC(2,shrl_w_ri,(RW2 r, IMM i))
2793    
2794     MIDFUNC(2,shrl_b_ri,(RW1 r, IMM i))
2795     {
2796     if (!i && !needflags)
2797     return;
2798     CLOBBER_SHRL;
2799     r=rmw(r,1,1);
2800     raw_shrl_b_ri(r,i);
2801     unlock2(r);
2802     }
2803     MENDFUNC(2,shrl_b_ri,(RW1 r, IMM i))
2804    
2805     MIDFUNC(2,shra_l_ri,(RW4 r, IMM i))
2806     {
2807     if (!i && !needflags)
2808     return;
2809     CLOBBER_SHRA;
2810     r=rmw(r,4,4);
2811     raw_shra_l_ri(r,i);
2812     unlock2(r);
2813     }
2814     MENDFUNC(2,shra_l_ri,(RW4 r, IMM i))
2815    
2816     MIDFUNC(2,shra_w_ri,(RW2 r, IMM i))
2817     {
2818     if (!i && !needflags)
2819     return;
2820     CLOBBER_SHRA;
2821     r=rmw(r,2,2);
2822     raw_shra_w_ri(r,i);
2823     unlock2(r);
2824     }
2825     MENDFUNC(2,shra_w_ri,(RW2 r, IMM i))
2826    
2827     MIDFUNC(2,shra_b_ri,(RW1 r, IMM i))
2828     {
2829     if (!i && !needflags)
2830     return;
2831     CLOBBER_SHRA;
2832     r=rmw(r,1,1);
2833     raw_shra_b_ri(r,i);
2834     unlock2(r);
2835     }
2836     MENDFUNC(2,shra_b_ri,(RW1 r, IMM i))
2837    
2838     MIDFUNC(2,shra_l_rr,(RW4 d, R1 r))
2839     {
2840     if (isconst(r)) {
2841     COMPCALL(shra_l_ri)(d,(uae_u8)live.state[r].val);
2842     return;
2843     }
2844     CLOBBER_SHRA;
2845     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2846     d=rmw(d,4,4);
2847     Dif (r!=1) {
2848     write_log("Illegal register %d in raw_rol_b\n",r);
2849     abort();
2850     }
2851     raw_shra_l_rr(d,r) ;
2852     unlock2(r);
2853     unlock2(d);
2854     }
2855     MENDFUNC(2,shra_l_rr,(RW4 d, R1 r))
2856    
2857     MIDFUNC(2,shra_w_rr,(RW2 d, R1 r))
2858     { /* Can only do this with r==1, i.e. cl */
2859    
2860     if (isconst(r)) {
2861     COMPCALL(shra_w_ri)(d,(uae_u8)live.state[r].val);
2862     return;
2863     }
2864     CLOBBER_SHRA;
2865     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2866     d=rmw(d,2,2);
2867     Dif (r!=1) {
2868     write_log("Illegal register %d in raw_shra_b\n",r);
2869     abort();
2870     }
2871     raw_shra_w_rr(d,r) ;
2872     unlock2(r);
2873     unlock2(d);
2874     }
2875     MENDFUNC(2,shra_w_rr,(RW2 d, R1 r))
2876    
2877     MIDFUNC(2,shra_b_rr,(RW1 d, R1 r))
2878     { /* Can only do this with r==1, i.e. cl */
2879    
2880     if (isconst(r)) {
2881     COMPCALL(shra_b_ri)(d,(uae_u8)live.state[r].val);
2882     return;
2883     }
2884    
2885     CLOBBER_SHRA;
2886     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2887     d=rmw(d,1,1);
2888     Dif (r!=1) {
2889     write_log("Illegal register %d in raw_shra_b\n",r);
2890     abort();
2891     }
2892     raw_shra_b_rr(d,r) ;
2893     unlock2(r);
2894     unlock2(d);
2895     }
2896     MENDFUNC(2,shra_b_rr,(RW1 d, R1 r))
2897    
2898    
2899     MIDFUNC(2,setcc,(W1 d, IMM cc))
2900     {
2901     CLOBBER_SETCC;
2902     d=writereg(d,1);
2903     raw_setcc(d,cc);
2904     unlock2(d);
2905     }
2906     MENDFUNC(2,setcc,(W1 d, IMM cc))
2907    
2908     MIDFUNC(2,setcc_m,(IMM d, IMM cc))
2909     {
2910     CLOBBER_SETCC;
2911     raw_setcc_m(d,cc);
2912     }
2913     MENDFUNC(2,setcc_m,(IMM d, IMM cc))
2914    
2915     MIDFUNC(3,cmov_l_rr,(RW4 d, R4 s, IMM cc))
2916     {
2917     if (d==s)
2918     return;
2919     CLOBBER_CMOV;
2920     s=readreg(s,4);
2921     d=rmw(d,4,4);
2922     raw_cmov_l_rr(d,s,cc);
2923     unlock2(s);
2924     unlock2(d);
2925     }
2926     MENDFUNC(3,cmov_l_rr,(RW4 d, R4 s, IMM cc))
2927    
2928     MIDFUNC(3,cmov_l_rm,(RW4 d, IMM s, IMM cc))
2929     {
2930     CLOBBER_CMOV;
2931     d=rmw(d,4,4);
2932     raw_cmov_l_rm(d,s,cc);
2933     unlock2(d);
2934     }
2935     MENDFUNC(3,cmov_l_rm,(RW4 d, IMM s, IMM cc))
2936    
2937 gbeauche 1.15 MIDFUNC(1,setzflg_l,(RW4 r))
2938 gbeauche 1.1 {
2939 gbeauche 1.15 if (setzflg_uses_bsf) {
2940     CLOBBER_BSF;
2941     r=rmw(r,4,4);
2942     raw_bsf_l_rr(r,r);
2943     unlock2(r);
2944     }
2945     else {
2946 gbeauche 1.16 Dif (live.flags_in_flags!=VALID) {
2947     write_log("setzflg() wanted flags in native flags, they are %d\n",
2948     live.flags_in_flags);
2949     abort();
2950     }
2951     r=readreg(r,4);
2952     int f=writereg(S11,4);
2953     int t=writereg(S12,4);
2954     raw_flags_set_zero(f,r,t);
2955     unlock2(f);
2956     unlock2(r);
2957     unlock2(t);
2958 gbeauche 1.15 }
2959 gbeauche 1.1 }
2960 gbeauche 1.15 MENDFUNC(1,setzflg_l,(RW4 r))
2961 gbeauche 1.1
2962     MIDFUNC(2,imul_32_32,(RW4 d, R4 s))
2963     {
2964     CLOBBER_MUL;
2965     s=readreg(s,4);
2966     d=rmw(d,4,4);
2967     raw_imul_32_32(d,s);
2968     unlock2(s);
2969     unlock2(d);
2970     }
2971     MENDFUNC(2,imul_32_32,(RW4 d, R4 s))
2972    
2973     MIDFUNC(2,imul_64_32,(RW4 d, RW4 s))
2974     {
2975     CLOBBER_MUL;
2976     s=rmw_specific(s,4,4,MUL_NREG2);
2977     d=rmw_specific(d,4,4,MUL_NREG1);
2978     raw_imul_64_32(d,s);
2979     unlock2(s);
2980     unlock2(d);
2981     }
2982     MENDFUNC(2,imul_64_32,(RW4 d, RW4 s))
2983    
2984     MIDFUNC(2,mul_64_32,(RW4 d, RW4 s))
2985     {
2986     CLOBBER_MUL;
2987     s=rmw_specific(s,4,4,MUL_NREG2);
2988     d=rmw_specific(d,4,4,MUL_NREG1);
2989     raw_mul_64_32(d,s);
2990     unlock2(s);
2991     unlock2(d);
2992     }
2993     MENDFUNC(2,mul_64_32,(RW4 d, RW4 s))
2994    
2995     MIDFUNC(2,mul_32_32,(RW4 d, R4 s))
2996     {
2997     CLOBBER_MUL;
2998     s=readreg(s,4);
2999     d=rmw(d,4,4);
3000     raw_mul_32_32(d,s);
3001     unlock2(s);
3002     unlock2(d);
3003     }
3004     MENDFUNC(2,mul_32_32,(RW4 d, R4 s))
3005    
3006 gbeauche 1.24 #if SIZEOF_VOID_P == 8
3007     MIDFUNC(2,sign_extend_32_rr,(W4 d, R2 s))
3008     {
3009     int isrmw;
3010    
3011     if (isconst(s)) {
3012     set_const(d,(uae_s32)live.state[s].val);
3013     return;
3014     }
3015    
3016     CLOBBER_SE32;
3017     isrmw=(s==d);
3018     if (!isrmw) {
3019     s=readreg(s,4);
3020     d=writereg(d,4);
3021     }
3022     else { /* If we try to lock this twice, with different sizes, we
3023     are int trouble! */
3024     s=d=rmw(s,4,4);
3025     }
3026     raw_sign_extend_32_rr(d,s);
3027     if (!isrmw) {
3028     unlock2(d);
3029     unlock2(s);
3030     }
3031     else {
3032     unlock2(s);
3033     }
3034     }
3035     MENDFUNC(2,sign_extend_32_rr,(W4 d, R2 s))
3036     #endif
3037    
3038 gbeauche 1.1 MIDFUNC(2,sign_extend_16_rr,(W4 d, R2 s))
3039     {
3040     int isrmw;
3041    
3042     if (isconst(s)) {
3043     set_const(d,(uae_s32)(uae_s16)live.state[s].val);
3044     return;
3045     }
3046    
3047     CLOBBER_SE16;
3048     isrmw=(s==d);
3049     if (!isrmw) {
3050     s=readreg(s,2);
3051     d=writereg(d,4);
3052     }
3053     else { /* If we try to lock this twice, with different sizes, we
3054     are int trouble! */
3055     s=d=rmw(s,4,2);
3056     }
3057     raw_sign_extend_16_rr(d,s);
3058     if (!isrmw) {
3059     unlock2(d);
3060     unlock2(s);
3061     }
3062     else {
3063     unlock2(s);
3064     }
3065     }
3066     MENDFUNC(2,sign_extend_16_rr,(W4 d, R2 s))
3067    
3068     MIDFUNC(2,sign_extend_8_rr,(W4 d, R1 s))
3069     {
3070     int isrmw;
3071    
3072     if (isconst(s)) {
3073     set_const(d,(uae_s32)(uae_s8)live.state[s].val);
3074     return;
3075     }
3076    
3077     isrmw=(s==d);
3078     CLOBBER_SE8;
3079     if (!isrmw) {
3080     s=readreg(s,1);
3081     d=writereg(d,4);
3082     }
3083     else { /* If we try to lock this twice, with different sizes, we
3084     are int trouble! */
3085     s=d=rmw(s,4,1);
3086     }
3087    
3088     raw_sign_extend_8_rr(d,s);
3089    
3090     if (!isrmw) {
3091     unlock2(d);
3092     unlock2(s);
3093     }
3094     else {
3095     unlock2(s);
3096     }
3097     }
3098     MENDFUNC(2,sign_extend_8_rr,(W4 d, R1 s))
3099    
3100    
3101     MIDFUNC(2,zero_extend_16_rr,(W4 d, R2 s))
3102     {
3103     int isrmw;
3104    
3105     if (isconst(s)) {
3106     set_const(d,(uae_u32)(uae_u16)live.state[s].val);
3107     return;
3108     }
3109    
3110     isrmw=(s==d);
3111     CLOBBER_ZE16;
3112     if (!isrmw) {
3113     s=readreg(s,2);
3114     d=writereg(d,4);
3115     }
3116     else { /* If we try to lock this twice, with different sizes, we
3117     are int trouble! */
3118     s=d=rmw(s,4,2);
3119     }
3120     raw_zero_extend_16_rr(d,s);
3121     if (!isrmw) {
3122     unlock2(d);
3123     unlock2(s);
3124     }
3125     else {
3126     unlock2(s);
3127     }
3128     }
3129     MENDFUNC(2,zero_extend_16_rr,(W4 d, R2 s))
3130    
3131     MIDFUNC(2,zero_extend_8_rr,(W4 d, R1 s))
3132     {
3133     int isrmw;
3134     if (isconst(s)) {
3135     set_const(d,(uae_u32)(uae_u8)live.state[s].val);
3136     return;
3137     }
3138    
3139     isrmw=(s==d);
3140     CLOBBER_ZE8;
3141     if (!isrmw) {
3142     s=readreg(s,1);
3143     d=writereg(d,4);
3144     }
3145     else { /* If we try to lock this twice, with different sizes, we
3146     are int trouble! */
3147     s=d=rmw(s,4,1);
3148     }
3149    
3150     raw_zero_extend_8_rr(d,s);
3151    
3152     if (!isrmw) {
3153     unlock2(d);
3154     unlock2(s);
3155     }
3156     else {
3157     unlock2(s);
3158     }
3159     }
3160     MENDFUNC(2,zero_extend_8_rr,(W4 d, R1 s))
3161    
3162     MIDFUNC(2,mov_b_rr,(W1 d, R1 s))
3163     {
3164     if (d==s)
3165     return;
3166     if (isconst(s)) {
3167     COMPCALL(mov_b_ri)(d,(uae_u8)live.state[s].val);
3168     return;
3169     }
3170    
3171     CLOBBER_MOV;
3172     s=readreg(s,1);
3173     d=writereg(d,1);
3174     raw_mov_b_rr(d,s);
3175     unlock2(d);
3176     unlock2(s);
3177     }
3178     MENDFUNC(2,mov_b_rr,(W1 d, R1 s))
3179    
3180     MIDFUNC(2,mov_w_rr,(W2 d, R2 s))
3181     {
3182     if (d==s)
3183     return;
3184     if (isconst(s)) {
3185     COMPCALL(mov_w_ri)(d,(uae_u16)live.state[s].val);
3186     return;
3187     }
3188    
3189     CLOBBER_MOV;
3190     s=readreg(s,2);
3191     d=writereg(d,2);
3192     raw_mov_w_rr(d,s);
3193     unlock2(d);
3194     unlock2(s);
3195     }
3196     MENDFUNC(2,mov_w_rr,(W2 d, R2 s))
3197    
3198    
3199     MIDFUNC(4,mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
3200     {
3201     CLOBBER_MOV;
3202     baser=readreg(baser,4);
3203     index=readreg(index,4);
3204     d=writereg(d,4);
3205    
3206     raw_mov_l_rrm_indexed(d,baser,index,factor);
3207     unlock2(d);
3208     unlock2(baser);
3209     unlock2(index);
3210     }
3211     MENDFUNC(4,mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
3212    
3213     MIDFUNC(4,mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
3214     {
3215     CLOBBER_MOV;
3216     baser=readreg(baser,4);
3217     index=readreg(index,4);
3218     d=writereg(d,2);
3219    
3220     raw_mov_w_rrm_indexed(d,baser,index,factor);
3221     unlock2(d);
3222     unlock2(baser);
3223     unlock2(index);
3224     }
3225     MENDFUNC(4,mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
3226    
3227     MIDFUNC(4,mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
3228     {
3229     CLOBBER_MOV;
3230     baser=readreg(baser,4);
3231     index=readreg(index,4);
3232     d=writereg(d,1);
3233    
3234     raw_mov_b_rrm_indexed(d,baser,index,factor);
3235    
3236     unlock2(d);
3237     unlock2(baser);
3238     unlock2(index);
3239     }
3240     MENDFUNC(4,mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
3241    
3242    
3243     MIDFUNC(4,mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
3244     {
3245     CLOBBER_MOV;
3246     baser=readreg(baser,4);
3247     index=readreg(index,4);
3248     s=readreg(s,4);
3249    
3250     Dif (baser==s || index==s)
3251     abort();
3252    
3253    
3254     raw_mov_l_mrr_indexed(baser,index,factor,s);
3255     unlock2(s);
3256     unlock2(baser);
3257     unlock2(index);
3258     }
3259     MENDFUNC(4,mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
3260    
3261     MIDFUNC(4,mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
3262     {
3263     CLOBBER_MOV;
3264     baser=readreg(baser,4);
3265     index=readreg(index,4);
3266     s=readreg(s,2);
3267    
3268     raw_mov_w_mrr_indexed(baser,index,factor,s);
3269     unlock2(s);
3270     unlock2(baser);
3271     unlock2(index);
3272     }
3273     MENDFUNC(4,mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
3274    
3275     MIDFUNC(4,mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
3276     {
3277     CLOBBER_MOV;
3278     s=readreg(s,1);
3279     baser=readreg(baser,4);
3280     index=readreg(index,4);
3281    
3282     raw_mov_b_mrr_indexed(baser,index,factor,s);
3283     unlock2(s);
3284     unlock2(baser);
3285     unlock2(index);
3286     }
3287     MENDFUNC(4,mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
3288    
3289    
3290     MIDFUNC(5,mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
3291     {
3292     int basereg=baser;
3293     int indexreg=index;
3294    
3295     CLOBBER_MOV;
3296     s=readreg(s,4);
3297     baser=readreg_offset(baser,4);
3298     index=readreg_offset(index,4);
3299    
3300     base+=get_offset(basereg);
3301     base+=factor*get_offset(indexreg);
3302    
3303     raw_mov_l_bmrr_indexed(base,baser,index,factor,s);
3304     unlock2(s);
3305     unlock2(baser);
3306     unlock2(index);
3307     }
3308     MENDFUNC(5,mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
3309    
3310     MIDFUNC(5,mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
3311     {
3312     int basereg=baser;
3313     int indexreg=index;
3314    
3315     CLOBBER_MOV;
3316     s=readreg(s,2);
3317     baser=readreg_offset(baser,4);
3318     index=readreg_offset(index,4);
3319    
3320     base+=get_offset(basereg);
3321     base+=factor*get_offset(indexreg);
3322    
3323     raw_mov_w_bmrr_indexed(base,baser,index,factor,s);
3324     unlock2(s);
3325     unlock2(baser);
3326     unlock2(index);
3327     }
3328     MENDFUNC(5,mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
3329    
3330     MIDFUNC(5,mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
3331     {
3332     int basereg=baser;
3333     int indexreg=index;
3334    
3335     CLOBBER_MOV;
3336     s=readreg(s,1);
3337     baser=readreg_offset(baser,4);
3338     index=readreg_offset(index,4);
3339    
3340     base+=get_offset(basereg);
3341     base+=factor*get_offset(indexreg);
3342    
3343     raw_mov_b_bmrr_indexed(base,baser,index,factor,s);
3344     unlock2(s);
3345     unlock2(baser);
3346     unlock2(index);
3347     }
3348     MENDFUNC(5,mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
3349    
3350    
3351    
3352     /* Read a long from base+baser+factor*index */
3353     MIDFUNC(5,mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
3354     {
3355     int basereg=baser;
3356     int indexreg=index;
3357    
3358     CLOBBER_MOV;
3359     baser=readreg_offset(baser,4);
3360     index=readreg_offset(index,4);
3361     base+=get_offset(basereg);
3362     base+=factor*get_offset(indexreg);
3363     d=writereg(d,4);
3364     raw_mov_l_brrm_indexed(d,base,baser,index,factor);
3365     unlock2(d);
3366     unlock2(baser);
3367     unlock2(index);
3368     }
3369     MENDFUNC(5,mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
3370    
3371    
3372     MIDFUNC(5,mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
3373     {
3374     int basereg=baser;
3375     int indexreg=index;
3376    
3377     CLOBBER_MOV;
3378     remove_offset(d,-1);
3379     baser=readreg_offset(baser,4);
3380     index=readreg_offset(index,4);
3381     base+=get_offset(basereg);
3382     base+=factor*get_offset(indexreg);
3383     d=writereg(d,2);
3384     raw_mov_w_brrm_indexed(d,base,baser,index,factor);
3385     unlock2(d);
3386     unlock2(baser);
3387     unlock2(index);
3388     }
3389     MENDFUNC(5,mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
3390    
3391    
3392     MIDFUNC(5,mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
3393     {
3394     int basereg=baser;
3395     int indexreg=index;
3396    
3397     CLOBBER_MOV;
3398     remove_offset(d,-1);
3399     baser=readreg_offset(baser,4);
3400     index=readreg_offset(index,4);
3401     base+=get_offset(basereg);
3402     base+=factor*get_offset(indexreg);
3403     d=writereg(d,1);
3404     raw_mov_b_brrm_indexed(d,base,baser,index,factor);
3405     unlock2(d);
3406     unlock2(baser);
3407     unlock2(index);
3408     }
3409     MENDFUNC(5,mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
3410    
3411     /* Read a long from base+factor*index */
3412     MIDFUNC(4,mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
3413     {
3414     int indexreg=index;
3415    
3416     if (isconst(index)) {
3417     COMPCALL(mov_l_rm)(d,base+factor*live.state[index].val);
3418     return;
3419     }
3420    
3421     CLOBBER_MOV;
3422     index=readreg_offset(index,4);
3423     base+=get_offset(indexreg)*factor;
3424     d=writereg(d,4);
3425    
3426     raw_mov_l_rm_indexed(d,base,index,factor);
3427     unlock2(index);
3428     unlock2(d);
3429     }
3430     MENDFUNC(4,mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
3431    
3432    
3433     /* read the long at the address contained in s+offset and store in d */
3434     MIDFUNC(3,mov_l_rR,(W4 d, R4 s, IMM offset))
3435     {
3436     if (isconst(s)) {
3437     COMPCALL(mov_l_rm)(d,live.state[s].val+offset);
3438     return;
3439     }
3440     CLOBBER_MOV;
3441     s=readreg(s,4);
3442     d=writereg(d,4);
3443    
3444     raw_mov_l_rR(d,s,offset);
3445     unlock2(d);
3446     unlock2(s);
3447     }
3448     MENDFUNC(3,mov_l_rR,(W4 d, R4 s, IMM offset))
3449    
3450     /* read the word at the address contained in s+offset and store in d */
3451     MIDFUNC(3,mov_w_rR,(W2 d, R4 s, IMM offset))
3452     {
3453     if (isconst(s)) {
3454     COMPCALL(mov_w_rm)(d,live.state[s].val+offset);
3455     return;
3456     }
3457     CLOBBER_MOV;
3458     s=readreg(s,4);
3459     d=writereg(d,2);
3460    
3461     raw_mov_w_rR(d,s,offset);
3462     unlock2(d);
3463     unlock2(s);
3464     }
3465     MENDFUNC(3,mov_w_rR,(W2 d, R4 s, IMM offset))
3466    
3467     /* read the word at the address contained in s+offset and store in d */
3468     MIDFUNC(3,mov_b_rR,(W1 d, R4 s, IMM offset))
3469     {
3470     if (isconst(s)) {
3471     COMPCALL(mov_b_rm)(d,live.state[s].val+offset);
3472     return;
3473     }
3474     CLOBBER_MOV;
3475     s=readreg(s,4);
3476     d=writereg(d,1);
3477    
3478     raw_mov_b_rR(d,s,offset);
3479     unlock2(d);
3480     unlock2(s);
3481     }
3482     MENDFUNC(3,mov_b_rR,(W1 d, R4 s, IMM offset))
3483    
3484     /* read the long at the address contained in s+offset and store in d */
3485     MIDFUNC(3,mov_l_brR,(W4 d, R4 s, IMM offset))
3486     {
3487     int sreg=s;
3488     if (isconst(s)) {
3489     COMPCALL(mov_l_rm)(d,live.state[s].val+offset);
3490     return;
3491     }
3492     CLOBBER_MOV;
3493     s=readreg_offset(s,4);
3494     offset+=get_offset(sreg);
3495     d=writereg(d,4);
3496    
3497     raw_mov_l_brR(d,s,offset);
3498     unlock2(d);
3499     unlock2(s);
3500     }
3501     MENDFUNC(3,mov_l_brR,(W4 d, R4 s, IMM offset))
3502    
3503     /* read the word at the address contained in s+offset and store in d */
3504     MIDFUNC(3,mov_w_brR,(W2 d, R4 s, IMM offset))
3505     {
3506     int sreg=s;
3507     if (isconst(s)) {
3508     COMPCALL(mov_w_rm)(d,live.state[s].val+offset);
3509     return;
3510     }
3511     CLOBBER_MOV;
3512     remove_offset(d,-1);
3513     s=readreg_offset(s,4);
3514     offset+=get_offset(sreg);
3515     d=writereg(d,2);
3516    
3517     raw_mov_w_brR(d,s,offset);
3518     unlock2(d);
3519     unlock2(s);
3520     }
3521     MENDFUNC(3,mov_w_brR,(W2 d, R4 s, IMM offset))
3522    
3523     /* read the word at the address contained in s+offset and store in d */
3524     MIDFUNC(3,mov_b_brR,(W1 d, R4 s, IMM offset))
3525     {
3526     int sreg=s;
3527     if (isconst(s)) {
3528     COMPCALL(mov_b_rm)(d,live.state[s].val+offset);
3529     return;
3530     }
3531     CLOBBER_MOV;
3532     remove_offset(d,-1);
3533     s=readreg_offset(s,4);
3534     offset+=get_offset(sreg);
3535     d=writereg(d,1);
3536    
3537     raw_mov_b_brR(d,s,offset);
3538     unlock2(d);
3539     unlock2(s);
3540     }
3541     MENDFUNC(3,mov_b_brR,(W1 d, R4 s, IMM offset))
3542    
3543     MIDFUNC(3,mov_l_Ri,(R4 d, IMM i, IMM offset))
3544     {
3545     int dreg=d;
3546     if (isconst(d)) {
3547     COMPCALL(mov_l_mi)(live.state[d].val+offset,i);
3548     return;
3549     }
3550    
3551     CLOBBER_MOV;
3552     d=readreg_offset(d,4);
3553     offset+=get_offset(dreg);
3554     raw_mov_l_Ri(d,i,offset);
3555     unlock2(d);
3556     }
3557     MENDFUNC(3,mov_l_Ri,(R4 d, IMM i, IMM offset))
3558    
3559     MIDFUNC(3,mov_w_Ri,(R4 d, IMM i, IMM offset))
3560     {
3561     int dreg=d;
3562     if (isconst(d)) {
3563     COMPCALL(mov_w_mi)(live.state[d].val+offset,i);
3564     return;
3565     }
3566    
3567     CLOBBER_MOV;
3568     d=readreg_offset(d,4);
3569     offset+=get_offset(dreg);
3570     raw_mov_w_Ri(d,i,offset);
3571     unlock2(d);
3572     }
3573     MENDFUNC(3,mov_w_Ri,(R4 d, IMM i, IMM offset))
3574    
3575     MIDFUNC(3,mov_b_Ri,(R4 d, IMM i, IMM offset))
3576     {
3577     int dreg=d;
3578     if (isconst(d)) {
3579     COMPCALL(mov_b_mi)(live.state[d].val+offset,i);
3580     return;
3581     }
3582    
3583     CLOBBER_MOV;
3584     d=readreg_offset(d,4);
3585     offset+=get_offset(dreg);
3586     raw_mov_b_Ri(d,i,offset);
3587     unlock2(d);
3588     }
3589     MENDFUNC(3,mov_b_Ri,(R4 d, IMM i, IMM offset))
3590    
3591     /* Warning! OFFSET is byte sized only! */
3592     MIDFUNC(3,mov_l_Rr,(R4 d, R4 s, IMM offset))
3593     {
3594     if (isconst(d)) {
3595     COMPCALL(mov_l_mr)(live.state[d].val+offset,s);
3596     return;
3597     }
3598     if (isconst(s)) {
3599     COMPCALL(mov_l_Ri)(d,live.state[s].val,offset);
3600     return;
3601     }
3602    
3603     CLOBBER_MOV;
3604     s=readreg(s,4);
3605     d=readreg(d,4);
3606    
3607     raw_mov_l_Rr(d,s,offset);
3608     unlock2(d);
3609     unlock2(s);
3610     }
3611     MENDFUNC(3,mov_l_Rr,(R4 d, R4 s, IMM offset))
3612    
3613     MIDFUNC(3,mov_w_Rr,(R4 d, R2 s, IMM offset))
3614     {
3615     if (isconst(d)) {
3616     COMPCALL(mov_w_mr)(live.state[d].val+offset,s);
3617     return;
3618     }
3619     if (isconst(s)) {
3620     COMPCALL(mov_w_Ri)(d,(uae_u16)live.state[s].val,offset);
3621     return;
3622     }
3623    
3624     CLOBBER_MOV;
3625     s=readreg(s,2);
3626     d=readreg(d,4);
3627     raw_mov_w_Rr(d,s,offset);
3628     unlock2(d);
3629     unlock2(s);
3630     }
3631     MENDFUNC(3,mov_w_Rr,(R4 d, R2 s, IMM offset))
3632    
3633     MIDFUNC(3,mov_b_Rr,(R4 d, R1 s, IMM offset))
3634     {
3635     if (isconst(d)) {
3636     COMPCALL(mov_b_mr)(live.state[d].val+offset,s);
3637     return;
3638     }
3639     if (isconst(s)) {
3640     COMPCALL(mov_b_Ri)(d,(uae_u8)live.state[s].val,offset);
3641     return;
3642     }
3643    
3644     CLOBBER_MOV;
3645     s=readreg(s,1);
3646     d=readreg(d,4);
3647     raw_mov_b_Rr(d,s,offset);
3648     unlock2(d);
3649     unlock2(s);
3650     }
3651     MENDFUNC(3,mov_b_Rr,(R4 d, R1 s, IMM offset))
3652    
3653     MIDFUNC(3,lea_l_brr,(W4 d, R4 s, IMM offset))
3654     {
3655     if (isconst(s)) {
3656     COMPCALL(mov_l_ri)(d,live.state[s].val+offset);
3657     return;
3658     }
3659     #if USE_OFFSET
3660     if (d==s) {
3661     add_offset(d,offset);
3662     return;
3663     }
3664     #endif
3665     CLOBBER_LEA;
3666     s=readreg(s,4);
3667     d=writereg(d,4);
3668     raw_lea_l_brr(d,s,offset);
3669     unlock2(d);
3670     unlock2(s);
3671     }
3672     MENDFUNC(3,lea_l_brr,(W4 d, R4 s, IMM offset))
3673    
3674     MIDFUNC(5,lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
3675     {
3676     if (!offset) {
3677     COMPCALL(lea_l_rr_indexed)(d,s,index,factor);
3678     return;
3679     }
3680     CLOBBER_LEA;
3681     s=readreg(s,4);
3682     index=readreg(index,4);
3683     d=writereg(d,4);
3684    
3685     raw_lea_l_brr_indexed(d,s,index,factor,offset);
3686     unlock2(d);
3687     unlock2(index);
3688     unlock2(s);
3689     }
3690     MENDFUNC(5,lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
3691    
3692     MIDFUNC(4,lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
3693     {
3694     CLOBBER_LEA;
3695     s=readreg(s,4);
3696     index=readreg(index,4);
3697     d=writereg(d,4);
3698    
3699     raw_lea_l_rr_indexed(d,s,index,factor);
3700     unlock2(d);
3701     unlock2(index);
3702     unlock2(s);
3703     }
3704     MENDFUNC(4,lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
3705    
3706     /* write d to the long at the address contained in s+offset */
3707     MIDFUNC(3,mov_l_bRr,(R4 d, R4 s, IMM offset))
3708     {
3709     int dreg=d;
3710     if (isconst(d)) {
3711     COMPCALL(mov_l_mr)(live.state[d].val+offset,s);
3712     return;
3713     }
3714    
3715     CLOBBER_MOV;
3716     s=readreg(s,4);
3717     d=readreg_offset(d,4);
3718     offset+=get_offset(dreg);
3719    
3720     raw_mov_l_bRr(d,s,offset);
3721     unlock2(d);
3722     unlock2(s);
3723     }
3724     MENDFUNC(3,mov_l_bRr,(R4 d, R4 s, IMM offset))
3725    
3726     /* write the word at the address contained in s+offset and store in d */
3727     MIDFUNC(3,mov_w_bRr,(R4 d, R2 s, IMM offset))
3728     {
3729     int dreg=d;
3730    
3731     if (isconst(d)) {
3732     COMPCALL(mov_w_mr)(live.state[d].val+offset,s);
3733     return;
3734     }
3735    
3736     CLOBBER_MOV;
3737     s=readreg(s,2);
3738     d=readreg_offset(d,4);
3739     offset+=get_offset(dreg);
3740     raw_mov_w_bRr(d,s,offset);
3741     unlock2(d);
3742     unlock2(s);
3743     }
3744     MENDFUNC(3,mov_w_bRr,(R4 d, R2 s, IMM offset))
3745    
3746     MIDFUNC(3,mov_b_bRr,(R4 d, R1 s, IMM offset))
3747     {
3748     int dreg=d;
3749     if (isconst(d)) {
3750     COMPCALL(mov_b_mr)(live.state[d].val+offset,s);
3751     return;
3752     }
3753    
3754     CLOBBER_MOV;
3755     s=readreg(s,1);
3756     d=readreg_offset(d,4);
3757     offset+=get_offset(dreg);
3758     raw_mov_b_bRr(d,s,offset);
3759     unlock2(d);
3760     unlock2(s);
3761     }
3762     MENDFUNC(3,mov_b_bRr,(R4 d, R1 s, IMM offset))
3763    
3764     MIDFUNC(1,bswap_32,(RW4 r))
3765     {
3766     int reg=r;
3767    
3768     if (isconst(r)) {
3769     uae_u32 oldv=live.state[r].val;
3770     live.state[r].val=reverse32(oldv);
3771     return;
3772     }
3773    
3774     CLOBBER_SW32;
3775     r=rmw(r,4,4);
3776     raw_bswap_32(r);
3777     unlock2(r);
3778     }
3779     MENDFUNC(1,bswap_32,(RW4 r))
3780    
3781     MIDFUNC(1,bswap_16,(RW2 r))
3782     {
3783     if (isconst(r)) {
3784     uae_u32 oldv=live.state[r].val;
3785     live.state[r].val=((oldv>>8)&0xff) | ((oldv<<8)&0xff00) |
3786     (oldv&0xffff0000);
3787     return;
3788     }
3789    
3790     CLOBBER_SW16;
3791     r=rmw(r,2,2);
3792    
3793     raw_bswap_16(r);
3794     unlock2(r);
3795     }
3796     MENDFUNC(1,bswap_16,(RW2 r))
3797    
3798    
3799    
3800     MIDFUNC(2,mov_l_rr,(W4 d, R4 s))
3801     {
3802     int olds;
3803    
3804     if (d==s) { /* How pointless! */
3805     return;
3806     }
3807     if (isconst(s)) {
3808     COMPCALL(mov_l_ri)(d,live.state[s].val);
3809     return;
3810     }
3811     olds=s;
3812     disassociate(d);
3813     s=readreg_offset(s,4);
3814     live.state[d].realreg=s;
3815     live.state[d].realind=live.nat[s].nholds;
3816     live.state[d].val=live.state[olds].val;
3817     live.state[d].validsize=4;
3818     live.state[d].dirtysize=4;
3819     set_status(d,DIRTY);
3820    
3821     live.nat[s].holds[live.nat[s].nholds]=d;
3822     live.nat[s].nholds++;
3823     log_clobberreg(d);
3824     /* write_log("Added %d to nreg %d(%d), now holds %d regs\n",
3825     d,s,live.state[d].realind,live.nat[s].nholds); */
3826     unlock2(s);
3827     }
3828     MENDFUNC(2,mov_l_rr,(W4 d, R4 s))
3829    
3830     MIDFUNC(2,mov_l_mr,(IMM d, R4 s))
3831     {
3832     if (isconst(s)) {
3833     COMPCALL(mov_l_mi)(d,live.state[s].val);
3834     return;
3835     }
3836     CLOBBER_MOV;
3837     s=readreg(s,4);
3838    
3839     raw_mov_l_mr(d,s);
3840     unlock2(s);
3841     }
3842     MENDFUNC(2,mov_l_mr,(IMM d, R4 s))
3843    
3844    
3845     MIDFUNC(2,mov_w_mr,(IMM d, R2 s))
3846     {
3847     if (isconst(s)) {
3848     COMPCALL(mov_w_mi)(d,(uae_u16)live.state[s].val);
3849     return;
3850     }
3851     CLOBBER_MOV;
3852     s=readreg(s,2);
3853    
3854     raw_mov_w_mr(d,s);
3855     unlock2(s);
3856     }
3857     MENDFUNC(2,mov_w_mr,(IMM d, R2 s))
3858    
3859     MIDFUNC(2,mov_w_rm,(W2 d, IMM s))
3860     {
3861     CLOBBER_MOV;
3862     d=writereg(d,2);
3863    
3864     raw_mov_w_rm(d,s);
3865     unlock2(d);
3866     }
3867     MENDFUNC(2,mov_w_rm,(W2 d, IMM s))
3868    
3869     MIDFUNC(2,mov_b_mr,(IMM d, R1 s))
3870     {
3871     if (isconst(s)) {
3872     COMPCALL(mov_b_mi)(d,(uae_u8)live.state[s].val);
3873     return;
3874     }
3875    
3876     CLOBBER_MOV;
3877     s=readreg(s,1);
3878    
3879     raw_mov_b_mr(d,s);
3880     unlock2(s);
3881     }
3882     MENDFUNC(2,mov_b_mr,(IMM d, R1 s))
3883    
3884     MIDFUNC(2,mov_b_rm,(W1 d, IMM s))
3885     {
3886     CLOBBER_MOV;
3887     d=writereg(d,1);
3888    
3889     raw_mov_b_rm(d,s);
3890     unlock2(d);
3891     }
3892     MENDFUNC(2,mov_b_rm,(W1 d, IMM s))
3893    
3894     MIDFUNC(2,mov_l_ri,(W4 d, IMM s))
3895     {
3896     set_const(d,s);
3897     return;
3898     }
3899     MENDFUNC(2,mov_l_ri,(W4 d, IMM s))
3900    
3901     MIDFUNC(2,mov_w_ri,(W2 d, IMM s))
3902     {
3903     CLOBBER_MOV;
3904     d=writereg(d,2);
3905    
3906     raw_mov_w_ri(d,s);
3907     unlock2(d);
3908     }
3909     MENDFUNC(2,mov_w_ri,(W2 d, IMM s))
3910    
3911     MIDFUNC(2,mov_b_ri,(W1 d, IMM s))
3912     {
3913     CLOBBER_MOV;
3914     d=writereg(d,1);
3915    
3916     raw_mov_b_ri(d,s);
3917     unlock2(d);
3918     }
3919     MENDFUNC(2,mov_b_ri,(W1 d, IMM s))
3920    
3921    
3922     MIDFUNC(2,add_l_mi,(IMM d, IMM s))
3923     {
3924     CLOBBER_ADD;
3925     raw_add_l_mi(d,s) ;
3926     }
3927     MENDFUNC(2,add_l_mi,(IMM d, IMM s))
3928    
3929     MIDFUNC(2,add_w_mi,(IMM d, IMM s))
3930     {
3931     CLOBBER_ADD;
3932     raw_add_w_mi(d,s) ;
3933     }
3934     MENDFUNC(2,add_w_mi,(IMM d, IMM s))
3935    
3936     MIDFUNC(2,add_b_mi,(IMM d, IMM s))
3937     {
3938     CLOBBER_ADD;
3939     raw_add_b_mi(d,s) ;
3940     }
3941     MENDFUNC(2,add_b_mi,(IMM d, IMM s))
3942    
3943    
3944     MIDFUNC(2,test_l_ri,(R4 d, IMM i))
3945     {
3946     CLOBBER_TEST;
3947     d=readreg(d,4);
3948    
3949     raw_test_l_ri(d,i);
3950     unlock2(d);
3951     }
3952     MENDFUNC(2,test_l_ri,(R4 d, IMM i))
3953    
3954     MIDFUNC(2,test_l_rr,(R4 d, R4 s))
3955     {
3956     CLOBBER_TEST;
3957     d=readreg(d,4);
3958     s=readreg(s,4);
3959    
3960     raw_test_l_rr(d,s);;
3961     unlock2(d);
3962     unlock2(s);
3963     }
3964     MENDFUNC(2,test_l_rr,(R4 d, R4 s))
3965    
3966     MIDFUNC(2,test_w_rr,(R2 d, R2 s))
3967     {
3968     CLOBBER_TEST;
3969     d=readreg(d,2);
3970     s=readreg(s,2);
3971    
3972     raw_test_w_rr(d,s);
3973     unlock2(d);
3974     unlock2(s);
3975     }
3976     MENDFUNC(2,test_w_rr,(R2 d, R2 s))
3977    
3978     MIDFUNC(2,test_b_rr,(R1 d, R1 s))
3979     {
3980     CLOBBER_TEST;
3981     d=readreg(d,1);
3982     s=readreg(s,1);
3983    
3984     raw_test_b_rr(d,s);
3985     unlock2(d);
3986     unlock2(s);
3987     }
3988     MENDFUNC(2,test_b_rr,(R1 d, R1 s))
3989    
3990    
3991     MIDFUNC(2,and_l_ri,(RW4 d, IMM i))
3992     {
3993     if (isconst(d) && !needflags) {
3994     live.state[d].val &= i;
3995     return;
3996     }
3997    
3998     CLOBBER_AND;
3999     d=rmw(d,4,4);
4000    
4001     raw_and_l_ri(d,i);
4002     unlock2(d);
4003     }
4004     MENDFUNC(2,and_l_ri,(RW4 d, IMM i))
4005    
4006     MIDFUNC(2,and_l,(RW4 d, R4 s))
4007     {
4008     CLOBBER_AND;
4009     s=readreg(s,4);
4010     d=rmw(d,4,4);
4011    
4012     raw_and_l(d,s);
4013     unlock2(d);
4014     unlock2(s);
4015     }
4016     MENDFUNC(2,and_l,(RW4 d, R4 s))
4017    
4018     MIDFUNC(2,and_w,(RW2 d, R2 s))
4019     {
4020     CLOBBER_AND;
4021     s=readreg(s,2);
4022     d=rmw(d,2,2);
4023    
4024     raw_and_w(d,s);
4025     unlock2(d);
4026     unlock2(s);
4027     }
4028     MENDFUNC(2,and_w,(RW2 d, R2 s))
4029    
4030     MIDFUNC(2,and_b,(RW1 d, R1 s))
4031     {
4032     CLOBBER_AND;
4033     s=readreg(s,1);
4034     d=rmw(d,1,1);
4035    
4036     raw_and_b(d,s);
4037     unlock2(d);
4038     unlock2(s);
4039     }
4040     MENDFUNC(2,and_b,(RW1 d, R1 s))
4041    
4042     // gb-- used for making an fpcr value in compemu_fpp.cpp
4043     MIDFUNC(2,or_l_rm,(RW4 d, IMM s))
4044     {
4045     CLOBBER_OR;
4046     d=rmw(d,4,4);
4047    
4048     raw_or_l_rm(d,s);
4049     unlock2(d);
4050     }
4051     MENDFUNC(2,or_l_rm,(RW4 d, IMM s))
4052    
4053     MIDFUNC(2,or_l_ri,(RW4 d, IMM i))
4054     {
4055     if (isconst(d) && !needflags) {
4056     live.state[d].val|=i;
4057     return;
4058     }
4059     CLOBBER_OR;
4060     d=rmw(d,4,4);
4061    
4062     raw_or_l_ri(d,i);
4063     unlock2(d);
4064     }
4065     MENDFUNC(2,or_l_ri,(RW4 d, IMM i))
4066    
4067     MIDFUNC(2,or_l,(RW4 d, R4 s))
4068     {
4069     if (isconst(d) && isconst(s) && !needflags) {
4070     live.state[d].val|=live.state[s].val;
4071     return;
4072     }
4073     CLOBBER_OR;
4074     s=readreg(s,4);
4075     d=rmw(d,4,4);
4076    
4077     raw_or_l(d,s);
4078     unlock2(d);
4079     unlock2(s);
4080     }
4081     MENDFUNC(2,or_l,(RW4 d, R4 s))
4082    
4083     MIDFUNC(2,or_w,(RW2 d, R2 s))
4084     {
4085     CLOBBER_OR;
4086     s=readreg(s,2);
4087     d=rmw(d,2,2);
4088    
4089     raw_or_w(d,s);
4090     unlock2(d);
4091     unlock2(s);
4092     }
4093     MENDFUNC(2,or_w,(RW2 d, R2 s))
4094    
4095     MIDFUNC(2,or_b,(RW1 d, R1 s))
4096     {
4097     CLOBBER_OR;
4098     s=readreg(s,1);
4099     d=rmw(d,1,1);
4100    
4101     raw_or_b(d,s);
4102     unlock2(d);
4103     unlock2(s);
4104     }
4105     MENDFUNC(2,or_b,(RW1 d, R1 s))
4106    
4107     MIDFUNC(2,adc_l,(RW4 d, R4 s))
4108     {
4109     CLOBBER_ADC;
4110     s=readreg(s,4);
4111     d=rmw(d,4,4);
4112    
4113     raw_adc_l(d,s);
4114    
4115     unlock2(d);
4116     unlock2(s);
4117     }
4118     MENDFUNC(2,adc_l,(RW4 d, R4 s))
4119    
4120     MIDFUNC(2,adc_w,(RW2 d, R2 s))
4121     {
4122     CLOBBER_ADC;
4123     s=readreg(s,2);
4124     d=rmw(d,2,2);
4125    
4126     raw_adc_w(d,s);
4127     unlock2(d);
4128     unlock2(s);
4129     }
4130     MENDFUNC(2,adc_w,(RW2 d, R2 s))
4131    
4132     MIDFUNC(2,adc_b,(RW1 d, R1 s))
4133     {
4134     CLOBBER_ADC;
4135     s=readreg(s,1);
4136     d=rmw(d,1,1);
4137    
4138     raw_adc_b(d,s);
4139     unlock2(d);
4140     unlock2(s);
4141     }
4142     MENDFUNC(2,adc_b,(RW1 d, R1 s))
4143    
4144     MIDFUNC(2,add_l,(RW4 d, R4 s))
4145     {
4146     if (isconst(s)) {
4147     COMPCALL(add_l_ri)(d,live.state[s].val);
4148     return;
4149     }
4150    
4151     CLOBBER_ADD;
4152     s=readreg(s,4);
4153     d=rmw(d,4,4);
4154    
4155     raw_add_l(d,s);
4156    
4157     unlock2(d);
4158     unlock2(s);
4159     }
4160     MENDFUNC(2,add_l,(RW4 d, R4 s))
4161    
4162     MIDFUNC(2,add_w,(RW2 d, R2 s))
4163     {
4164     if (isconst(s)) {
4165     COMPCALL(add_w_ri)(d,(uae_u16)live.state[s].val);
4166     return;
4167     }
4168    
4169     CLOBBER_ADD;
4170     s=readreg(s,2);
4171     d=rmw(d,2,2);
4172    
4173     raw_add_w(d,s);
4174     unlock2(d);
4175     unlock2(s);
4176     }
4177     MENDFUNC(2,add_w,(RW2 d, R2 s))
4178    
4179     MIDFUNC(2,add_b,(RW1 d, R1 s))
4180     {
4181     if (isconst(s)) {
4182     COMPCALL(add_b_ri)(d,(uae_u8)live.state[s].val);
4183     return;
4184     }
4185    
4186     CLOBBER_ADD;
4187     s=readreg(s,1);
4188     d=rmw(d,1,1);
4189    
4190     raw_add_b(d,s);
4191     unlock2(d);
4192     unlock2(s);
4193     }
4194     MENDFUNC(2,add_b,(RW1 d, R1 s))
4195    
4196     MIDFUNC(2,sub_l_ri,(RW4 d, IMM i))
4197     {
4198     if (!i && !needflags)
4199     return;
4200     if (isconst(d) && !needflags) {
4201     live.state[d].val-=i;
4202     return;
4203     }
4204     #if USE_OFFSET
4205     if (!needflags) {
4206     add_offset(d,-i);
4207     return;
4208     }
4209     #endif
4210    
4211     CLOBBER_SUB;
4212     d=rmw(d,4,4);
4213    
4214     raw_sub_l_ri(d,i);
4215     unlock2(d);
4216     }
4217     MENDFUNC(2,sub_l_ri,(RW4 d, IMM i))
4218    
4219     MIDFUNC(2,sub_w_ri,(RW2 d, IMM i))
4220     {
4221     if (!i && !needflags)
4222     return;
4223    
4224     CLOBBER_SUB;
4225     d=rmw(d,2,2);
4226    
4227     raw_sub_w_ri(d,i);
4228     unlock2(d);
4229     }
4230     MENDFUNC(2,sub_w_ri,(RW2 d, IMM i))
4231    
4232     MIDFUNC(2,sub_b_ri,(RW1 d, IMM i))
4233     {
4234     if (!i && !needflags)
4235     return;
4236    
4237     CLOBBER_SUB;
4238     d=rmw(d,1,1);
4239    
4240     raw_sub_b_ri(d,i);
4241    
4242     unlock2(d);
4243     }
4244     MENDFUNC(2,sub_b_ri,(RW1 d, IMM i))
4245    
4246     MIDFUNC(2,add_l_ri,(RW4 d, IMM i))
4247     {
4248     if (!i && !needflags)
4249     return;
4250     if (isconst(d) && !needflags) {
4251     live.state[d].val+=i;
4252     return;
4253     }
4254     #if USE_OFFSET
4255     if (!needflags) {
4256     add_offset(d,i);
4257     return;
4258     }
4259     #endif
4260     CLOBBER_ADD;
4261     d=rmw(d,4,4);
4262     raw_add_l_ri(d,i);
4263     unlock2(d);
4264     }
4265     MENDFUNC(2,add_l_ri,(RW4 d, IMM i))
4266    
4267     MIDFUNC(2,add_w_ri,(RW2 d, IMM i))
4268     {
4269     if (!i && !needflags)
4270     return;
4271    
4272     CLOBBER_ADD;
4273     d=rmw(d,2,2);
4274    
4275     raw_add_w_ri(d,i);
4276     unlock2(d);
4277     }
4278     MENDFUNC(2,add_w_ri,(RW2 d, IMM i))
4279    
4280     MIDFUNC(2,add_b_ri,(RW1 d, IMM i))
4281     {
4282     if (!i && !needflags)
4283     return;
4284    
4285     CLOBBER_ADD;
4286     d=rmw(d,1,1);
4287    
4288     raw_add_b_ri(d,i);
4289    
4290     unlock2(d);
4291     }
4292     MENDFUNC(2,add_b_ri,(RW1 d, IMM i))
4293    
4294     MIDFUNC(2,sbb_l,(RW4 d, R4 s))
4295     {
4296     CLOBBER_SBB;
4297     s=readreg(s,4);
4298     d=rmw(d,4,4);
4299    
4300     raw_sbb_l(d,s);
4301     unlock2(d);
4302     unlock2(s);
4303     }
4304     MENDFUNC(2,sbb_l,(RW4 d, R4 s))
4305    
4306     MIDFUNC(2,sbb_w,(RW2 d, R2 s))
4307     {
4308     CLOBBER_SBB;
4309     s=readreg(s,2);
4310     d=rmw(d,2,2);
4311    
4312     raw_sbb_w(d,s);
4313     unlock2(d);
4314     unlock2(s);
4315     }
4316     MENDFUNC(2,sbb_w,(RW2 d, R2 s))
4317    
4318     MIDFUNC(2,sbb_b,(RW1 d, R1 s))
4319     {
4320     CLOBBER_SBB;
4321     s=readreg(s,1);
4322     d=rmw(d,1,1);
4323    
4324     raw_sbb_b(d,s);
4325     unlock2(d);
4326     unlock2(s);
4327     }
4328     MENDFUNC(2,sbb_b,(RW1 d, R1 s))
4329    
4330     MIDFUNC(2,sub_l,(RW4 d, R4 s))
4331     {
4332     if (isconst(s)) {
4333     COMPCALL(sub_l_ri)(d,live.state[s].val);
4334     return;
4335     }
4336    
4337     CLOBBER_SUB;
4338     s=readreg(s,4);
4339     d=rmw(d,4,4);
4340    
4341     raw_sub_l(d,s);
4342     unlock2(d);
4343     unlock2(s);
4344     }
4345     MENDFUNC(2,sub_l,(RW4 d, R4 s))
4346    
4347     MIDFUNC(2,sub_w,(RW2 d, R2 s))
4348     {
4349     if (isconst(s)) {
4350     COMPCALL(sub_w_ri)(d,(uae_u16)live.state[s].val);
4351     return;
4352     }
4353    
4354     CLOBBER_SUB;
4355     s=readreg(s,2);
4356     d=rmw(d,2,2);
4357    
4358     raw_sub_w(d,s);
4359     unlock2(d);
4360     unlock2(s);
4361     }
4362     MENDFUNC(2,sub_w,(RW2 d, R2 s))
4363    
4364     MIDFUNC(2,sub_b,(RW1 d, R1 s))
4365     {
4366     if (isconst(s)) {
4367     COMPCALL(sub_b_ri)(d,(uae_u8)live.state[s].val);
4368     return;
4369     }
4370    
4371     CLOBBER_SUB;
4372     s=readreg(s,1);
4373     d=rmw(d,1,1);
4374    
4375     raw_sub_b(d,s);
4376     unlock2(d);
4377     unlock2(s);
4378     }
4379     MENDFUNC(2,sub_b,(RW1 d, R1 s))
4380    
4381     MIDFUNC(2,cmp_l,(R4 d, R4 s))
4382     {
4383     CLOBBER_CMP;
4384     s=readreg(s,4);
4385     d=readreg(d,4);
4386    
4387     raw_cmp_l(d,s);
4388     unlock2(d);
4389     unlock2(s);
4390     }
4391     MENDFUNC(2,cmp_l,(R4 d, R4 s))
4392    
4393     MIDFUNC(2,cmp_l_ri,(R4 r, IMM i))
4394     {
4395     CLOBBER_CMP;
4396     r=readreg(r,4);
4397    
4398     raw_cmp_l_ri(r,i);
4399     unlock2(r);
4400     }
4401     MENDFUNC(2,cmp_l_ri,(R4 r, IMM i))
4402    
4403     MIDFUNC(2,cmp_w,(R2 d, R2 s))
4404     {
4405     CLOBBER_CMP;
4406     s=readreg(s,2);
4407     d=readreg(d,2);
4408    
4409     raw_cmp_w(d,s);
4410     unlock2(d);
4411     unlock2(s);
4412     }
4413     MENDFUNC(2,cmp_w,(R2 d, R2 s))
4414    
4415     MIDFUNC(2,cmp_b,(R1 d, R1 s))
4416     {
4417     CLOBBER_CMP;
4418     s=readreg(s,1);
4419     d=readreg(d,1);
4420    
4421     raw_cmp_b(d,s);
4422     unlock2(d);
4423     unlock2(s);
4424     }
4425     MENDFUNC(2,cmp_b,(R1 d, R1 s))
4426    
4427    
4428     MIDFUNC(2,xor_l,(RW4 d, R4 s))
4429     {
4430     CLOBBER_XOR;
4431     s=readreg(s,4);
4432     d=rmw(d,4,4);
4433    
4434     raw_xor_l(d,s);
4435     unlock2(d);
4436     unlock2(s);
4437     }
4438     MENDFUNC(2,xor_l,(RW4 d, R4 s))
4439    
4440     MIDFUNC(2,xor_w,(RW2 d, R2 s))
4441     {
4442     CLOBBER_XOR;
4443     s=readreg(s,2);
4444     d=rmw(d,2,2);
4445    
4446     raw_xor_w(d,s);
4447     unlock2(d);
4448     unlock2(s);
4449     }
4450     MENDFUNC(2,xor_w,(RW2 d, R2 s))
4451    
4452     MIDFUNC(2,xor_b,(RW1 d, R1 s))
4453     {
4454     CLOBBER_XOR;
4455     s=readreg(s,1);
4456     d=rmw(d,1,1);
4457    
4458     raw_xor_b(d,s);
4459     unlock2(d);
4460     unlock2(s);
4461     }
4462     MENDFUNC(2,xor_b,(RW1 d, R1 s))
4463    
4464     MIDFUNC(5,call_r_11,(W4 out1, R4 r, R4 in1, IMM osize, IMM isize))
4465     {
4466     clobber_flags();
4467     remove_all_offsets();
4468     if (osize==4) {
4469     if (out1!=in1 && out1!=r) {
4470     COMPCALL(forget_about)(out1);
4471     }
4472     }
4473     else {
4474     tomem_c(out1);
4475     }
4476    
4477     in1=readreg_specific(in1,isize,REG_PAR1);
4478     r=readreg(r,4);
4479     prepare_for_call_1(); /* This should ensure that there won't be
4480     any need for swapping nregs in prepare_for_call_2
4481     */
4482     #if USE_NORMAL_CALLING_CONVENTION
4483     raw_push_l_r(in1);
4484     #endif
4485     unlock2(in1);
4486     unlock2(r);
4487    
4488     prepare_for_call_2();
4489     raw_call_r(r);
4490    
4491     #if USE_NORMAL_CALLING_CONVENTION
4492     raw_inc_sp(4);
4493     #endif
4494    
4495    
4496     live.nat[REG_RESULT].holds[0]=out1;
4497     live.nat[REG_RESULT].nholds=1;
4498     live.nat[REG_RESULT].touched=touchcnt++;
4499    
4500     live.state[out1].realreg=REG_RESULT;
4501     live.state[out1].realind=0;
4502     live.state[out1].val=0;
4503     live.state[out1].validsize=osize;
4504     live.state[out1].dirtysize=osize;
4505     set_status(out1,DIRTY);
4506     }
4507     MENDFUNC(5,call_r_11,(W4 out1, R4 r, R4 in1, IMM osize, IMM isize))
4508    
4509     MIDFUNC(5,call_r_02,(R4 r, R4 in1, R4 in2, IMM isize1, IMM isize2))
4510     {
4511     clobber_flags();
4512     remove_all_offsets();
4513     in1=readreg_specific(in1,isize1,REG_PAR1);
4514     in2=readreg_specific(in2,isize2,REG_PAR2);
4515     r=readreg(r,4);
4516     prepare_for_call_1(); /* This should ensure that there won't be
4517     any need for swapping nregs in prepare_for_call_2
4518     */
4519     #if USE_NORMAL_CALLING_CONVENTION
4520     raw_push_l_r(in2);
4521     raw_push_l_r(in1);
4522     #endif
4523     unlock2(r);
4524     unlock2(in1);
4525     unlock2(in2);
4526     prepare_for_call_2();
4527     raw_call_r(r);
4528     #if USE_NORMAL_CALLING_CONVENTION
4529     raw_inc_sp(8);
4530     #endif
4531     }
4532     MENDFUNC(5,call_r_02,(R4 r, R4 in1, R4 in2, IMM isize1, IMM isize2))
4533    
4534     /* forget_about() takes a mid-layer register */
4535     MIDFUNC(1,forget_about,(W4 r))
4536     {
4537     if (isinreg(r))
4538     disassociate(r);
4539     live.state[r].val=0;
4540     set_status(r,UNDEF);
4541     }
4542     MENDFUNC(1,forget_about,(W4 r))
4543    
4544     MIDFUNC(0,nop,(void))
4545     {
4546     raw_nop();
4547     }
4548     MENDFUNC(0,nop,(void))
4549    
4550    
4551     MIDFUNC(1,f_forget_about,(FW r))
4552     {
4553     if (f_isinreg(r))
4554     f_disassociate(r);
4555     live.fate[r].status=UNDEF;
4556     }
4557     MENDFUNC(1,f_forget_about,(FW r))
4558    
4559     MIDFUNC(1,fmov_pi,(FW r))
4560     {
4561     r=f_writereg(r);
4562     raw_fmov_pi(r);
4563     f_unlock(r);
4564     }
4565     MENDFUNC(1,fmov_pi,(FW r))
4566    
4567     MIDFUNC(1,fmov_log10_2,(FW r))
4568     {
4569     r=f_writereg(r);
4570     raw_fmov_log10_2(r);
4571     f_unlock(r);
4572     }
4573     MENDFUNC(1,fmov_log10_2,(FW r))
4574    
4575     MIDFUNC(1,fmov_log2_e,(FW r))
4576     {
4577     r=f_writereg(r);
4578     raw_fmov_log2_e(r);
4579     f_unlock(r);
4580     }
4581     MENDFUNC(1,fmov_log2_e,(FW r))
4582    
4583     MIDFUNC(1,fmov_loge_2,(FW r))
4584     {
4585     r=f_writereg(r);
4586     raw_fmov_loge_2(r);
4587     f_unlock(r);
4588     }
4589     MENDFUNC(1,fmov_loge_2,(FW r))
4590    
4591     MIDFUNC(1,fmov_1,(FW r))
4592     {
4593     r=f_writereg(r);
4594     raw_fmov_1(r);
4595     f_unlock(r);
4596     }
4597     MENDFUNC(1,fmov_1,(FW r))
4598    
4599     MIDFUNC(1,fmov_0,(FW r))
4600     {
4601     r=f_writereg(r);
4602     raw_fmov_0(r);
4603     f_unlock(r);
4604     }
4605     MENDFUNC(1,fmov_0,(FW r))
4606    
4607     MIDFUNC(2,fmov_rm,(FW r, MEMR m))
4608     {
4609     r=f_writereg(r);
4610     raw_fmov_rm(r,m);
4611     f_unlock(r);
4612     }
4613     MENDFUNC(2,fmov_rm,(FW r, MEMR m))
4614    
4615     MIDFUNC(2,fmovi_rm,(FW r, MEMR m))
4616     {
4617     r=f_writereg(r);
4618     raw_fmovi_rm(r,m);
4619     f_unlock(r);
4620     }
4621     MENDFUNC(2,fmovi_rm,(FW r, MEMR m))
4622    
4623     MIDFUNC(2,fmovi_mr,(MEMW m, FR r))
4624     {
4625     r=f_readreg(r);
4626     raw_fmovi_mr(m,r);
4627     f_unlock(r);
4628     }
4629     MENDFUNC(2,fmovi_mr,(MEMW m, FR r))
4630    
4631     MIDFUNC(2,fmovs_rm,(FW r, MEMR m))
4632     {
4633     r=f_writereg(r);
4634     raw_fmovs_rm(r,m);
4635     f_unlock(r);
4636     }
4637     MENDFUNC(2,fmovs_rm,(FW r, MEMR m))
4638    
4639     MIDFUNC(2,fmovs_mr,(MEMW m, FR r))
4640     {
4641     r=f_readreg(r);
4642     raw_fmovs_mr(m,r);
4643     f_unlock(r);
4644     }
4645     MENDFUNC(2,fmovs_mr,(MEMW m, FR r))
4646    
4647     MIDFUNC(2,fmov_ext_mr,(MEMW m, FR r))
4648     {
4649     r=f_readreg(r);
4650     raw_fmov_ext_mr(m,r);
4651     f_unlock(r);
4652     }
4653     MENDFUNC(2,fmov_ext_mr,(MEMW m, FR r))
4654    
4655     MIDFUNC(2,fmov_mr,(MEMW m, FR r))
4656     {
4657     r=f_readreg(r);
4658     raw_fmov_mr(m,r);
4659     f_unlock(r);
4660     }
4661     MENDFUNC(2,fmov_mr,(MEMW m, FR r))
4662    
4663     MIDFUNC(2,fmov_ext_rm,(FW r, MEMR m))
4664     {
4665     r=f_writereg(r);
4666     raw_fmov_ext_rm(r,m);
4667     f_unlock(r);
4668     }
4669     MENDFUNC(2,fmov_ext_rm,(FW r, MEMR m))
4670    
4671     MIDFUNC(2,fmov_rr,(FW d, FR s))
4672     {
4673     if (d==s) { /* How pointless! */
4674     return;
4675     }
4676     #if USE_F_ALIAS
4677     f_disassociate(d);
4678     s=f_readreg(s);
4679     live.fate[d].realreg=s;
4680     live.fate[d].realind=live.fat[s].nholds;
4681     live.fate[d].status=DIRTY;
4682     live.fat[s].holds[live.fat[s].nholds]=d;
4683     live.fat[s].nholds++;
4684     f_unlock(s);
4685     #else
4686     s=f_readreg(s);
4687     d=f_writereg(d);
4688     raw_fmov_rr(d,s);
4689     f_unlock(s);
4690     f_unlock(d);
4691     #endif
4692     }
4693     MENDFUNC(2,fmov_rr,(FW d, FR s))
4694    
4695     MIDFUNC(2,fldcw_m_indexed,(R4 index, IMM base))
4696     {
4697     index=readreg(index,4);
4698    
4699     raw_fldcw_m_indexed(index,base);
4700     unlock2(index);
4701     }
4702     MENDFUNC(2,fldcw_m_indexed,(R4 index, IMM base))
4703    
4704     MIDFUNC(1,ftst_r,(FR r))
4705     {
4706     r=f_readreg(r);
4707     raw_ftst_r(r);
4708     f_unlock(r);
4709     }
4710     MENDFUNC(1,ftst_r,(FR r))
4711    
4712     MIDFUNC(0,dont_care_fflags,(void))
4713     {
4714     f_disassociate(FP_RESULT);
4715     }
4716     MENDFUNC(0,dont_care_fflags,(void))
4717    
4718     MIDFUNC(2,fsqrt_rr,(FW d, FR s))
4719     {
4720     s=f_readreg(s);
4721     d=f_writereg(d);
4722     raw_fsqrt_rr(d,s);
4723     f_unlock(s);
4724     f_unlock(d);
4725     }
4726     MENDFUNC(2,fsqrt_rr,(FW d, FR s))
4727    
4728     MIDFUNC(2,fabs_rr,(FW d, FR s))
4729     {
4730     s=f_readreg(s);
4731     d=f_writereg(d);
4732     raw_fabs_rr(d,s);
4733     f_unlock(s);
4734     f_unlock(d);
4735     }
4736     MENDFUNC(2,fabs_rr,(FW d, FR s))
4737    
4738     MIDFUNC(2,fsin_rr,(FW d, FR s))
4739     {
4740     s=f_readreg(s);
4741     d=f_writereg(d);
4742     raw_fsin_rr(d,s);
4743     f_unlock(s);
4744     f_unlock(d);
4745     }
4746     MENDFUNC(2,fsin_rr,(FW d, FR s))
4747    
4748     MIDFUNC(2,fcos_rr,(FW d, FR s))
4749     {
4750     s=f_readreg(s);
4751     d=f_writereg(d);
4752     raw_fcos_rr(d,s);
4753     f_unlock(s);
4754     f_unlock(d);
4755     }
4756     MENDFUNC(2,fcos_rr,(FW d, FR s))
4757    
4758     MIDFUNC(2,ftwotox_rr,(FW d, FR s))
4759     {
4760     s=f_readreg(s);
4761     d=f_writereg(d);
4762     raw_ftwotox_rr(d,s);
4763     f_unlock(s);
4764     f_unlock(d);
4765     }
4766     MENDFUNC(2,ftwotox_rr,(FW d, FR s))
4767    
4768     MIDFUNC(2,fetox_rr,(FW d, FR s))
4769     {
4770     s=f_readreg(s);
4771     d=f_writereg(d);
4772     raw_fetox_rr(d,s);
4773     f_unlock(s);
4774     f_unlock(d);
4775     }
4776     MENDFUNC(2,fetox_rr,(FW d, FR s))
4777    
4778     MIDFUNC(2,frndint_rr,(FW d, FR s))
4779     {
4780     s=f_readreg(s);
4781     d=f_writereg(d);
4782     raw_frndint_rr(d,s);
4783     f_unlock(s);
4784     f_unlock(d);
4785     }
4786     MENDFUNC(2,frndint_rr,(FW d, FR s))
4787    
4788     MIDFUNC(2,flog2_rr,(FW d, FR s))
4789     {
4790     s=f_readreg(s);
4791     d=f_writereg(d);
4792     raw_flog2_rr(d,s);
4793     f_unlock(s);
4794     f_unlock(d);
4795     }
4796     MENDFUNC(2,flog2_rr,(FW d, FR s))
4797    
4798     MIDFUNC(2,fneg_rr,(FW d, FR s))
4799     {
4800     s=f_readreg(s);
4801     d=f_writereg(d);
4802     raw_fneg_rr(d,s);
4803     f_unlock(s);
4804     f_unlock(d);
4805     }
4806     MENDFUNC(2,fneg_rr,(FW d, FR s))
4807    
4808     MIDFUNC(2,fadd_rr,(FRW d, FR s))
4809     {
4810     s=f_readreg(s);
4811     d=f_rmw(d);
4812     raw_fadd_rr(d,s);
4813     f_unlock(s);
4814     f_unlock(d);
4815     }
4816     MENDFUNC(2,fadd_rr,(FRW d, FR s))
4817    
4818     MIDFUNC(2,fsub_rr,(FRW d, FR s))
4819     {
4820     s=f_readreg(s);
4821     d=f_rmw(d);
4822     raw_fsub_rr(d,s);
4823     f_unlock(s);
4824     f_unlock(d);
4825     }
4826     MENDFUNC(2,fsub_rr,(FRW d, FR s))
4827    
4828     MIDFUNC(2,fcmp_rr,(FR d, FR s))
4829     {
4830     d=f_readreg(d);
4831     s=f_readreg(s);
4832     raw_fcmp_rr(d,s);
4833     f_unlock(s);
4834     f_unlock(d);
4835     }
4836     MENDFUNC(2,fcmp_rr,(FR d, FR s))
4837    
4838     MIDFUNC(2,fdiv_rr,(FRW d, FR s))
4839     {
4840     s=f_readreg(s);
4841     d=f_rmw(d);
4842     raw_fdiv_rr(d,s);
4843     f_unlock(s);
4844     f_unlock(d);
4845     }
4846     MENDFUNC(2,fdiv_rr,(FRW d, FR s))
4847    
4848     MIDFUNC(2,frem_rr,(FRW d, FR s))
4849     {
4850     s=f_readreg(s);
4851     d=f_rmw(d);
4852     raw_frem_rr(d,s);
4853     f_unlock(s);
4854     f_unlock(d);
4855     }
4856     MENDFUNC(2,frem_rr,(FRW d, FR s))
4857    
4858     MIDFUNC(2,frem1_rr,(FRW d, FR s))
4859     {
4860     s=f_readreg(s);
4861     d=f_rmw(d);
4862     raw_frem1_rr(d,s);
4863     f_unlock(s);
4864     f_unlock(d);
4865     }
4866     MENDFUNC(2,frem1_rr,(FRW d, FR s))
4867    
4868     MIDFUNC(2,fmul_rr,(FRW d, FR s))
4869     {
4870     s=f_readreg(s);
4871     d=f_rmw(d);
4872     raw_fmul_rr(d,s);
4873     f_unlock(s);
4874     f_unlock(d);
4875     }
4876     MENDFUNC(2,fmul_rr,(FRW d, FR s))
4877    
4878     /********************************************************************
4879     * Support functions exposed to gencomp. CREATE time *
4880     ********************************************************************/
4881    
4882     int kill_rodent(int r)
4883     {
4884     return KILLTHERAT &&
4885     have_rat_stall &&
4886     (live.state[r].status==INMEM ||
4887     live.state[r].status==CLEAN ||
4888     live.state[r].status==ISCONST ||
4889     live.state[r].dirtysize==4);
4890     }
4891    
4892     uae_u32 get_const(int r)
4893     {
4894     Dif (!isconst(r)) {
4895     write_log("Register %d should be constant, but isn't\n",r);
4896     abort();
4897     }
4898     return live.state[r].val;
4899     }
4900    
4901     void sync_m68k_pc(void)
4902     {
4903     if (m68k_pc_offset) {
4904     add_l_ri(PC_P,m68k_pc_offset);
4905     comp_pc_p+=m68k_pc_offset;
4906     m68k_pc_offset=0;
4907     }
4908     }
4909    
4910     /********************************************************************
4911     * Scratch registers management *
4912     ********************************************************************/
4913    
4914     struct scratch_t {
4915     uae_u32 regs[VREGS];
4916     fpu_register fregs[VFREGS];
4917     };
4918    
4919     static scratch_t scratch;
4920    
4921     /********************************************************************
4922     * Support functions exposed to newcpu *
4923     ********************************************************************/
4924    
4925     static inline const char *str_on_off(bool b)
4926     {
4927     return b ? "on" : "off";
4928     }
4929    
4930     void compiler_init(void)
4931     {
4932     static bool initialized = false;
4933     if (initialized)
4934     return;
4935 gbeauche 1.24
4936 gbeauche 1.1 #if JIT_DEBUG
4937     // JIT debug mode ?
4938     JITDebug = PrefsFindBool("jitdebug");
4939     #endif
4940     write_log("<JIT compiler> : enable runtime disassemblers : %s\n", JITDebug ? "yes" : "no");
4941    
4942     #ifdef USE_JIT_FPU
4943     // Use JIT compiler for FPU instructions ?
4944     avoid_fpu = !PrefsFindBool("jitfpu");
4945     #else
4946     // JIT FPU is always disabled
4947     avoid_fpu = true;
4948     #endif
4949     write_log("<JIT compiler> : compile FPU instructions : %s\n", !avoid_fpu ? "yes" : "no");
4950    
4951     // Get size of the translation cache (in KB)
4952     cache_size = PrefsFindInt32("jitcachesize");
4953     write_log("<JIT compiler> : requested translation cache size : %d KB\n", cache_size);
4954    
4955     // Initialize target CPU (check for features, e.g. CMOV, rat stalls)
4956     raw_init_cpu();
4957 gbeauche 1.15 setzflg_uses_bsf = target_check_bsf();
4958 gbeauche 1.1 write_log("<JIT compiler> : target processor has CMOV instructions : %s\n", have_cmov ? "yes" : "no");
4959     write_log("<JIT compiler> : target processor can suffer from partial register stalls : %s\n", have_rat_stall ? "yes" : "no");
4960 gbeauche 1.5 write_log("<JIT compiler> : alignment for loops, jumps are %d, %d\n", align_loops, align_jumps);
4961 gbeauche 1.1
4962     // Translation cache flush mechanism
4963     lazy_flush = PrefsFindBool("jitlazyflush");
4964     write_log("<JIT compiler> : lazy translation cache invalidation : %s\n", str_on_off(lazy_flush));
4965     flush_icache = lazy_flush ? flush_icache_lazy : flush_icache_hard;
4966    
4967     // Compiler features
4968     write_log("<JIT compiler> : register aliasing : %s\n", str_on_off(1));
4969     write_log("<JIT compiler> : FP register aliasing : %s\n", str_on_off(USE_F_ALIAS));
4970     write_log("<JIT compiler> : lazy constant offsetting : %s\n", str_on_off(USE_OFFSET));
4971 gbeauche 1.8 write_log("<JIT compiler> : block inlining : %s\n", str_on_off(USE_INLINING));
4972 gbeauche 1.1 write_log("<JIT compiler> : separate blockinfo allocation : %s\n", str_on_off(USE_SEPARATE_BIA));
4973    
4974     // Build compiler tables
4975     build_comp();
4976    
4977     initialized = true;
4978    
4979 gbeauche 1.9 #if PROFILE_UNTRANSLATED_INSNS
4980     write_log("<JIT compiler> : gather statistics on untranslated insns count\n");
4981     #endif
4982    
4983 gbeauche 1.1 #if PROFILE_COMPILE_TIME
4984     write_log("<JIT compiler> : gather statistics on translation time\n");
4985     emul_start_time = clock();
4986     #endif
4987     }
4988    
4989     void compiler_exit(void)
4990     {
4991     #if PROFILE_COMPILE_TIME
4992     emul_end_time = clock();
4993     #endif
4994    
4995     // Deallocate translation cache
4996     if (compiled_code) {
4997 gbeauche 1.3 vm_release(compiled_code, cache_size * 1024);
4998 gbeauche 1.1 compiled_code = 0;
4999     }
5000 gbeauche 1.24
5001     // Deallocate popallspace
5002     if (popallspace) {
5003     vm_release(popallspace, POPALLSPACE_SIZE);
5004     popallspace = 0;
5005     }
5006 gbeauche 1.1
5007     #if PROFILE_COMPILE_TIME
5008     write_log("### Compile Block statistics\n");
5009     write_log("Number of calls to compile_block : %d\n", compile_count);
5010     uae_u32 emul_time = emul_end_time - emul_start_time;
5011     write_log("Total emulation time : %.1f sec\n", double(emul_time)/double(CLOCKS_PER_SEC));
5012     write_log("Total compilation time : %.1f sec (%.1f%%)\n", double(compile_time)/double(CLOCKS_PER_SEC),
5013     100.0*double(compile_time)/double(emul_time));
5014     write_log("\n");
5015     #endif
5016 gbeauche 1.9
5017     #if PROFILE_UNTRANSLATED_INSNS
5018     uae_u64 untranslated_count = 0;
5019     for (int i = 0; i < 65536; i++) {
5020     opcode_nums[i] = i;
5021     untranslated_count += raw_cputbl_count[i];
5022     }
5023     write_log("Sorting out untranslated instructions count...\n");
5024     qsort(opcode_nums, 65536, sizeof(uae_u16), untranslated_compfn);
5025     write_log("\nRank Opc Count Name\n");
5026     for (int i = 0; i < untranslated_top_ten; i++) {
5027     uae_u32 count = raw_cputbl_count[opcode_nums[i]];
5028     struct instr *dp;
5029     struct mnemolookup *lookup;
5030     if (!count)
5031     break;
5032     dp = table68k + opcode_nums[i];
5033     for (lookup = lookuptab; lookup->mnemo != dp->mnemo; lookup++)
5034     ;
5035     write_log("%03d: %04x %10lu %s\n", i, opcode_nums[i], count, lookup->name);
5036     }
5037     #endif
5038 gbeauche 1.1 }
5039    
5040     bool compiler_use_jit(void)
5041     {
5042     // Check for the "jit" prefs item
5043     if (!PrefsFindBool("jit"))
5044     return false;
5045    
5046     // Don't use JIT if translation cache size is less then MIN_CACHE_SIZE KB
5047     if (PrefsFindInt32("jitcachesize") < MIN_CACHE_SIZE) {
5048     write_log("<JIT compiler> : translation cache size is less than %d KB. Disabling JIT.\n", MIN_CACHE_SIZE);
5049     return false;
5050     }
5051    
5052     // FIXME: there are currently problems with JIT compilation and anything below a 68040
5053     if (CPUType < 4) {
5054     write_log("<JIT compiler> : 68040 emulation is required instead of 680%d0. Disabling JIT.\n", CPUType);
5055     return false;
5056     }
5057    
5058     return true;
5059     }
5060    
5061     void init_comp(void)
5062     {
5063     int i;
5064     uae_s8* cb=can_byte;
5065     uae_s8* cw=can_word;
5066     uae_s8* au=always_used;
5067    
5068     for (i=0;i<VREGS;i++) {
5069     live.state[i].realreg=-1;
5070     live.state[i].needflush=NF_SCRATCH;
5071     live.state[i].val=0;
5072     set_status(i,UNDEF);
5073     }
5074    
5075     for (i=0;i<VFREGS;i++) {
5076     live.fate[i].status=UNDEF;
5077     live.fate[i].realreg=-1;
5078     live.fate[i].needflush=NF_SCRATCH;
5079     }
5080    
5081     for (i=0;i<VREGS;i++) {
5082     if (i<16) { /* First 16 registers map to 68k registers */
5083     live.state[i].mem=((uae_u32*)&regs)+i;
5084     live.state[i].needflush=NF_TOMEM;
5085     set_status(i,INMEM);
5086     }
5087     else
5088     live.state[i].mem=scratch.regs+i;
5089     }
5090     live.state[PC_P].mem=(uae_u32*)&(regs.pc_p);
5091     live.state[PC_P].needflush=NF_TOMEM;
5092 gbeauche 1.24 set_const(PC_P,(uintptr)comp_pc_p);
5093 gbeauche 1.1
5094 gbeauche 1.24 live.state[FLAGX].mem=(uae_u32*)&(regflags.x);
5095 gbeauche 1.1 live.state[FLAGX].needflush=NF_TOMEM;
5096     set_status(FLAGX,INMEM);
5097    
5098 gbeauche 1.24 live.state[FLAGTMP].mem=(uae_u32*)&(regflags.cznv);
5099 gbeauche 1.1 live.state[FLAGTMP].needflush=NF_TOMEM;
5100     set_status(FLAGTMP,INMEM);
5101    
5102     live.state[NEXT_HANDLER].needflush=NF_HANDLER;
5103     set_status(NEXT_HANDLER,UNDEF);
5104    
5105     for (i=0;i<VFREGS;i++) {
5106     if (i<8) { /* First 8 registers map to 68k FPU registers */
5107     live.fate[i].mem=(uae_u32*)fpu_register_address(i);
5108     live.fate[i].needflush=NF_TOMEM;
5109     live.fate[i].status=INMEM;
5110     }
5111     else if (i==FP_RESULT) {
5112     live.fate[i].mem=(uae_u32*)(&fpu.result);
5113     live.fate[i].needflush=NF_TOMEM;
5114     live.fate[i].status=INMEM;
5115     }
5116     else
5117     live.fate[i].mem=(uae_u32*)(scratch.fregs+i);
5118     }
5119    
5120    
5121     for (i=0;i<N_REGS;i++) {
5122     live.nat[i].touched=0;
5123     live.nat[i].nholds=0;
5124     live.nat[i].locked=0;
5125     if (*cb==i) {
5126     live.nat[i].canbyte=1; cb++;
5127     } else live.nat[i].canbyte=0;
5128     if (*cw==i) {
5129     live.nat[i].canword=1; cw++;
5130     } else live.nat[i].canword=0;
5131     if (*au==i) {
5132     live.nat[i].locked=1; au++;
5133     }
5134     }
5135    
5136     for (i=0;i<N_FREGS;i++) {
5137     live.fat[i].touched=0;
5138     live.fat[i].nholds=0;
5139     live.fat[i].locked=0;
5140     }
5141    
5142     touchcnt=1;
5143     m68k_pc_offset=0;
5144     live.flags_in_flags=TRASH;
5145     live.flags_on_stack=VALID;
5146     live.flags_are_important=1;
5147    
5148     raw_fp_init();
5149     }
5150    
5151     /* Only do this if you really mean it! The next call should be to init!*/
5152     void flush(int save_regs)
5153     {
5154     int fi,i;
5155    
5156     log_flush();
5157     flush_flags(); /* low level */
5158     sync_m68k_pc(); /* mid level */
5159    
5160     if (save_regs) {
5161     for (i=0;i<VFREGS;i++) {
5162     if (live.fate[i].needflush==NF_SCRATCH ||
5163     live.fate[i].status==CLEAN) {
5164     f_disassociate(i);
5165     }
5166     }
5167     for (i=0;i<VREGS;i++) {
5168     if (live.state[i].needflush==NF_TOMEM) {
5169     switch(live.state[i].status) {
5170     case INMEM:
5171     if (live.state[i].val) {
5172 gbeauche 1.24 raw_add_l_mi((uintptr)live.state[i].mem,live.state[i].val);
5173 gbeauche 1.1 log_vwrite(i);
5174     live.state[i].val=0;
5175     }
5176     break;
5177     case CLEAN:
5178     case DIRTY:
5179     remove_offset(i,-1); tomem(i); break;
5180     case ISCONST:
5181     if (i!=PC_P)
5182     writeback_const(i);
5183     break;
5184     default: break;
5185     }
5186     Dif (live.state[i].val && i!=PC_P) {
5187     write_log("Register %d still has val %x\n",
5188     i,live.state[i].val);
5189     }
5190     }
5191     }
5192     for (i=0;i<VFREGS;i++) {
5193     if (live.fate[i].needflush==NF_TOMEM &&
5194     live.fate[i].status==DIRTY) {
5195     f_evict(i);
5196     }
5197     }
5198     raw_fp_cleanup_drop();
5199     }
5200     if (needflags) {
5201     write_log("Warning! flush with needflags=1!\n");
5202     }
5203     }
5204    
5205     static void flush_keepflags(void)
5206     {
5207     int fi,i;
5208    
5209     for (i=0;i<VFREGS;i++) {
5210     if (live.fate[i].needflush==NF_SCRATCH ||
5211     live.fate[i].status==CLEAN) {
5212     f_disassociate(i);
5213     }
5214     }
5215     for (i=0;i<VREGS;i++) {
5216     if (live.state[i].needflush==NF_TOMEM) {
5217     switch(live.state[i].status) {
5218     case INMEM:
5219     /* Can't adjust the offset here --- that needs "add" */
5220     break;
5221     case CLEAN:
5222     case DIRTY:
5223     remove_offset(i,-1); tomem(i); break;
5224     case ISCONST:
5225     if (i!=PC_P)
5226     writeback_const(i);
5227     break;
5228     default: break;
5229     }
5230     }
5231     }
5232     for (i=0;i<VFREGS;i++) {
5233     if (live.fate[i].needflush==NF_TOMEM &&
5234     live.fate[i].status==DIRTY) {
5235     f_evict(i);
5236     }
5237     }
5238     raw_fp_cleanup_drop();
5239     }
5240    
5241     void freescratch(void)
5242     {
5243     int i;
5244     for (i=0;i<N_REGS;i++)
5245     if (live.nat[i].locked && i!=4)
5246     write_log("Warning! %d is locked\n",i);
5247    
5248     for (i=0;i<VREGS;i++)
5249     if (live.state[i].needflush==NF_SCRATCH) {
5250     forget_about(i);
5251     }
5252    
5253     for (i=0;i<VFREGS;i++)
5254     if (live.fate[i].needflush==NF_SCRATCH) {
5255     f_forget_about(i);
5256     }
5257     }
5258    
5259     /********************************************************************
5260     * Support functions, internal *
5261     ********************************************************************/
5262    
5263    
5264     static void align_target(uae_u32 a)
5265     {
5266 gbeauche 1.14 if (!a)
5267     return;
5268    
5269 gbeauche 1.12 if (tune_nop_fillers)
5270 gbeauche 1.24 raw_emit_nop_filler(a - (((uintptr)target) & (a - 1)));
5271 gbeauche 1.12 else {
5272     /* Fill with NOPs --- makes debugging with gdb easier */
5273 gbeauche 1.24 while ((uintptr)target&(a-1))
5274 gbeauche 1.12 *target++=0x90;
5275     }
5276 gbeauche 1.1 }
5277    
5278     static __inline__ int isinrom(uintptr addr)
5279     {
5280     return ((addr >= (uintptr)ROMBaseHost) && (addr < (uintptr)ROMBaseHost + ROMSize));
5281     }
5282    
5283     static void flush_all(void)
5284     {
5285     int i;
5286    
5287     log_flush();
5288     for (i=0;i<VREGS;i++)
5289     if (live.state[i].status==DIRTY) {
5290     if (!call_saved[live.state[i].realreg]) {
5291     tomem(i);
5292     }
5293     }
5294     for (i=0;i<VFREGS;i++)
5295     if (f_isinreg(i))
5296     f_evict(i);
5297     raw_fp_cleanup_drop();
5298     }
5299    
5300     /* Make sure all registers that will get clobbered by a call are
5301     save and sound in memory */
5302     static void prepare_for_call_1(void)
5303     {
5304     flush_all(); /* If there are registers that don't get clobbered,
5305     * we should be a bit more selective here */
5306     }
5307    
5308     /* We will call a C routine in a moment. That will clobber all registers,
5309     so we need to disassociate everything */
5310     static void prepare_for_call_2(void)
5311     {
5312     int i;
5313     for (i=0;i<N_REGS;i++)
5314     if (!call_saved[i] && live.nat[i].nholds>0)
5315     free_nreg(i);
5316    
5317     for (i=0;i<N_FREGS;i++)
5318     if (live.fat[i].nholds>0)
5319     f_free_nreg(i);
5320    
5321     live.flags_in_flags=TRASH; /* Note: We assume we already rescued the
5322     flags at the very start of the call_r
5323     functions! */
5324     }
5325    
5326     /********************************************************************
5327     * Memory access and related functions, CREATE time *
5328     ********************************************************************/
5329    
5330     void register_branch(uae_u32 not_taken, uae_u32 taken, uae_u8 cond)
5331     {
5332     next_pc_p=not_taken;
5333     taken_pc_p=taken;
5334     branch_cc=cond;
5335     }
5336    
5337    
5338     static uae_u32 get_handler_address(uae_u32 addr)
5339     {
5340     uae_u32 cl=cacheline(addr);
5341 gbeauche 1.24 blockinfo* bi=get_blockinfo_addr_new((void*)(uintptr)addr,0);
5342     return (uintptr)&(bi->direct_handler_to_use);
5343 gbeauche 1.1 }
5344    
5345     static uae_u32 get_handler(uae_u32 addr)
5346     {
5347     uae_u32 cl=cacheline(addr);
5348 gbeauche 1.24 blockinfo* bi=get_blockinfo_addr_new((void*)(uintptr)addr,0);
5349     return (uintptr)bi->direct_handler_to_use;
5350 gbeauche 1.1 }
5351    
5352     static void load_handler(int reg, uae_u32 addr)
5353     {
5354     mov_l_rm(reg,get_handler_address(addr));
5355     }
5356    
5357     /* This version assumes that it is writing *real* memory, and *will* fail
5358     * if that assumption is wrong! No branches, no second chances, just
5359     * straight go-for-it attitude */
5360    
5361 gbeauche 1.24 static void writemem_real(int address, int source, int size, int tmp, int clobber)
5362 gbeauche 1.1 {
5363     int f=tmp;
5364    
5365     if (clobber)
5366     f=source;
5367 gbeauche 1.24
5368     #if SIZEOF_VOID_P == 8
5369     /* HACK: address calculation is suboptimal and possibly broken */
5370     sign_extend_32_rr(address, address);
5371     #endif
5372    
5373 gbeauche 1.1 switch(size) {
5374     case 1: mov_b_bRr(address,source,MEMBaseDiff); break;
5375     case 2: mov_w_rr(f,source); bswap_16(f); mov_w_bRr(address,f,MEMBaseDiff); break;
5376     case 4: mov_l_rr(f,source); bswap_32(f); mov_l_bRr(address,f,MEMBaseDiff); break;
5377     }
5378     forget_about(tmp);
5379     forget_about(f);
5380     }
5381    
5382     void writebyte(int address, int source, int tmp)
5383     {
5384 gbeauche 1.24 writemem_real(address,source,1,tmp,0);
5385 gbeauche 1.1 }
5386    
5387     static __inline__ void writeword_general(int address, int source, int tmp,
5388     int clobber)
5389     {
5390 gbeauche 1.24 writemem_real(address,source,2,tmp,clobber);
5391 gbeauche 1.1 }
5392    
5393     void writeword_clobber(int address, int source, int tmp)
5394     {
5395     writeword_general(address,source,tmp,1);
5396     }
5397    
5398     void writeword(int address, int source, int tmp)
5399     {
5400     writeword_general(address,source,tmp,0);
5401     }
5402    
5403     static __inline__ void writelong_general(int address, int source, int tmp,
5404     int clobber)
5405     {
5406 gbeauche 1.24 writemem_real(address,source,4,tmp,clobber);
5407 gbeauche 1.1 }
5408    
5409     void writelong_clobber(int address, int source, int tmp)
5410     {
5411     writelong_general(address,source,tmp,1);
5412     }
5413    
5414     void writelong(int address, int source, int tmp)
5415     {
5416     writelong_general(address,source,tmp,0);
5417     }
5418    
5419    
5420    
5421     /* This version assumes that it is reading *real* memory, and *will* fail
5422     * if that assumption is wrong! No branches, no second chances, just
5423     * straight go-for-it attitude */
5424    
5425 gbeauche 1.24 static void readmem_real(int address, int dest, int size, int tmp)
5426 gbeauche 1.1 {
5427     int f=tmp;
5428    
5429     if (size==4 && address!=dest)
5430     f=dest;
5431    
5432 gbeauche 1.24 #if SIZEOF_VOID_P == 8
5433     /* HACK: address calculation is suboptimal and possibly broken */
5434     sign_extend_32_rr(address, address);
5435     #endif
5436    
5437 gbeauche 1.1 switch(size) {
5438     case 1: mov_b_brR(dest,address,MEMBaseDiff); break;
5439     case 2: mov_w_brR(dest,address,MEMBaseDiff); bswap_16(dest); break;
5440     case 4: mov_l_brR(dest,address,MEMBaseDiff); bswap_32(dest); break;
5441     }
5442     forget_about(tmp);
5443     }
5444    
5445     void readbyte(int address, int dest, int tmp)
5446     {
5447 gbeauche 1.24 readmem_real(address,dest,1,tmp);
5448 gbeauche 1.1 }
5449    
5450     void readword(int address, int dest, int tmp)
5451     {
5452 gbeauche 1.24 readmem_real(address,dest,2,tmp);
5453 gbeauche 1.1 }
5454    
5455     void readlong(int address, int dest, int tmp)
5456     {
5457 gbeauche 1.24 readmem_real(address,dest,4,tmp);
5458 gbeauche 1.1 }
5459    
5460     void get_n_addr(int address, int dest, int tmp)
5461     {
5462     // a is the register containing the virtual address
5463     // after the offset had been fetched
5464     int a=tmp;
5465    
5466     // f is the register that will contain the offset
5467     int f=tmp;
5468    
5469     // a == f == tmp if (address == dest)
5470     if (address!=dest) {
5471     a=address;
5472     f=dest;
5473     }
5474    
5475     #if REAL_ADDRESSING
5476     mov_l_rr(dest, address);
5477     #elif DIRECT_ADDRESSING
5478     lea_l_brr(dest,address,MEMBaseDiff);
5479     #endif
5480     forget_about(tmp);
5481     }
5482    
5483     void get_n_addr_jmp(int address, int dest, int tmp)
5484     {
5485     /* For this, we need to get the same address as the rest of UAE
5486     would --- otherwise we end up translating everything twice */
5487     get_n_addr(address,dest,tmp);
5488     }
5489    
5490    
5491     /* base is a register, but dp is an actual value.
5492     target is a register, as is tmp */
5493     void calc_disp_ea_020(int base, uae_u32 dp, int target, int tmp)
5494     {
5495     int reg = (dp >> 12) & 15;
5496     int regd_shift=(dp >> 9) & 3;
5497    
5498     if (dp & 0x100) {
5499     int ignorebase=(dp&0x80);
5500     int ignorereg=(dp&0x40);
5501     int addbase=0;
5502     int outer=0;
5503    
5504     if ((dp & 0x30) == 0x20) addbase = (uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
5505     if ((dp & 0x30) == 0x30) addbase = comp_get_ilong((m68k_pc_offset+=4)-4);
5506    
5507     if ((dp & 0x3) == 0x2) outer = (uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
5508     if ((dp & 0x3) == 0x3) outer = comp_get_ilong((m68k_pc_offset+=4)-4);
5509    
5510     if ((dp & 0x4) == 0) { /* add regd *before* the get_long */
5511     if (!ignorereg) {
5512     if ((dp & 0x800) == 0)
5513     sign_extend_16_rr(target,reg);
5514     else
5515     mov_l_rr(target,reg);
5516     shll_l_ri(target,regd_shift);
5517     }
5518     else
5519     mov_l_ri(target,0);
5520    
5521     /* target is now regd */
5522     if (!ignorebase)
5523     add_l(target,base);
5524     add_l_ri(target,addbase);
5525     if (dp&0x03) readlong(target,target,tmp);
5526     } else { /* do the getlong first, then add regd */
5527     if (!ignorebase) {
5528     mov_l_rr(target,base);
5529     add_l_ri(target,addbase);
5530     }
5531     else
5532     mov_l_ri(target,addbase);
5533     if (dp&0x03) readlong(target,target,tmp);
5534    
5535     if (!ignorereg) {
5536     if ((dp & 0x800) == 0)
5537     sign_extend_16_rr(tmp,reg);
5538     else
5539     mov_l_rr(tmp,reg);
5540     shll_l_ri(tmp,regd_shift);
5541     /* tmp is now regd */
5542     add_l(target,tmp);
5543     }
5544     }
5545     add_l_ri(target,outer);
5546     }
5547     else { /* 68000 version */
5548     if ((dp & 0x800) == 0) { /* Sign extend */
5549     sign_extend_16_rr(target,reg);
5550     lea_l_brr_indexed(target,base,target,1<<regd_shift,(uae_s32)((uae_s8)dp));
5551     }
5552     else {
5553     lea_l_brr_indexed(target,base,reg,1<<regd_shift,(uae_s32)((uae_s8)dp));
5554     }
5555     }
5556     forget_about(tmp);
5557     }
5558    
5559    
5560    
5561    
5562    
5563     void set_cache_state(int enabled)
5564     {
5565     if (enabled!=letit)
5566     flush_icache_hard(77);
5567     letit=enabled;
5568     }
5569    
5570     int get_cache_state(void)
5571     {
5572     return letit;
5573     }
5574    
5575     uae_u32 get_jitted_size(void)
5576     {
5577     if (compiled_code)
5578     return current_compile_p-compiled_code;
5579     return 0;
5580     }
5581    
5582 gbeauche 1.20 const int CODE_ALLOC_MAX_ATTEMPTS = 10;
5583     const int CODE_ALLOC_BOUNDARIES = 128 * 1024; // 128 KB
5584    
5585     static uint8 *do_alloc_code(uint32 size, int depth)
5586     {
5587     #if defined(__linux__) && 0
5588     /*
5589     This is a really awful hack that is known to work on Linux at
5590     least.
5591    
5592     The trick here is to make sure the allocated cache is nearby
5593     code segment, and more precisely in the positive half of a
5594     32-bit address space. i.e. addr < 0x80000000. Actually, it
5595     turned out that a 32-bit binary run on AMD64 yields a cache
5596     allocated around 0xa0000000, thus causing some troubles when
5597     translating addresses from m68k to x86.
5598     */
5599     static uint8 * code_base = NULL;
5600     if (code_base == NULL) {
5601     uintptr page_size = getpagesize();
5602     uintptr boundaries = CODE_ALLOC_BOUNDARIES;
5603     if (boundaries < page_size)
5604     boundaries = page_size;
5605     code_base = (uint8 *)sbrk(0);
5606     for (int attempts = 0; attempts < CODE_ALLOC_MAX_ATTEMPTS; attempts++) {
5607     if (vm_acquire_fixed(code_base, size) == 0) {
5608     uint8 *code = code_base;
5609     code_base += size;
5610     return code;
5611     }
5612     code_base += boundaries;
5613     }
5614     return NULL;
5615     }
5616    
5617     if (vm_acquire_fixed(code_base, size) == 0) {
5618     uint8 *code = code_base;
5619     code_base += size;
5620     return code;
5621     }
5622    
5623     if (depth >= CODE_ALLOC_MAX_ATTEMPTS)
5624     return NULL;
5625    
5626     return do_alloc_code(size, depth + 1);
5627     #else
5628     uint8 *code = (uint8 *)vm_acquire(size);
5629     return code == VM_MAP_FAILED ? NULL : code;
5630     #endif
5631     }
5632    
5633     static inline uint8 *alloc_code(uint32 size)
5634     {
5635     return do_alloc_code(size, 0);
5636     }
5637    
5638 gbeauche 1.1 void alloc_cache(void)
5639     {
5640     if (compiled_code) {
5641     flush_icache_hard(6);
5642 gbeauche 1.3 vm_release(compiled_code, cache_size * 1024);
5643 gbeauche 1.1 compiled_code = 0;
5644     }
5645    
5646     if (cache_size == 0)
5647     return;
5648    
5649     while (!compiled_code && cache_size) {
5650 gbeauche 1.20 if ((compiled_code = alloc_code(cache_size * 1024)) == NULL) {
5651 gbeauche 1.1 compiled_code = 0;
5652     cache_size /= 2;
5653     }
5654     }
5655 gbeauche 1.2 vm_protect(compiled_code, cache_size, VM_PAGE_READ | VM_PAGE_WRITE | VM_PAGE_EXECUTE);
5656 gbeauche 1.1
5657     if (compiled_code) {
5658     write_log("<JIT compiler> : actual translation cache size : %d KB at 0x%08X\n", cache_size, compiled_code);
5659     max_compile_start = compiled_code + cache_size*1024 - BYTES_PER_INST;
5660     current_compile_p = compiled_code;
5661     current_cache_size = 0;
5662     }
5663     }
5664    
5665    
5666    
5667 gbeauche 1.13 extern void op_illg_1 (uae_u32 opcode) REGPARAM;
5668 gbeauche 1.1
5669 gbeauche 1.8 static void calc_checksum(blockinfo* bi, uae_u32* c1, uae_u32* c2)
5670 gbeauche 1.1 {
5671 gbeauche 1.8 uae_u32 k1 = 0;
5672     uae_u32 k2 = 0;
5673    
5674     #if USE_CHECKSUM_INFO
5675     checksum_info *csi = bi->csi;
5676     Dif(!csi) abort();
5677     while (csi) {
5678     uae_s32 len = csi->length;
5679 gbeauche 1.24 uintptr tmp = (uintptr)csi->start_p;
5680 gbeauche 1.8 #else
5681     uae_s32 len = bi->len;
5682 gbeauche 1.24 uintptr tmp = (uintptr)bi->min_pcp;
5683 gbeauche 1.8 #endif
5684     uae_u32*pos;
5685 gbeauche 1.1
5686 gbeauche 1.8 len += (tmp & 3);
5687 gbeauche 1.24 tmp &= ~((uintptr)3);
5688 gbeauche 1.8 pos = (uae_u32 *)tmp;
5689    
5690     if (len >= 0 && len <= MAX_CHECKSUM_LEN) {
5691     while (len > 0) {
5692     k1 += *pos;
5693     k2 ^= *pos;
5694     pos++;
5695     len -= 4;
5696     }
5697     }
5698 gbeauche 1.1
5699 gbeauche 1.8 #if USE_CHECKSUM_INFO
5700     csi = csi->next;
5701 gbeauche 1.1 }
5702 gbeauche 1.8 #endif
5703    
5704     *c1 = k1;
5705     *c2 = k2;
5706 gbeauche 1.1 }
5707    
5708 gbeauche 1.8 #if 0
5709 gbeauche 1.7 static void show_checksum(CSI_TYPE* csi)
5710 gbeauche 1.1 {
5711     uae_u32 k1=0;
5712     uae_u32 k2=0;
5713 gbeauche 1.7 uae_s32 len=CSI_LENGTH(csi);
5714 gbeauche 1.24 uae_u32 tmp=(uintptr)CSI_START_P(csi);
5715 gbeauche 1.1 uae_u32* pos;
5716    
5717     len+=(tmp&3);
5718     tmp&=(~3);
5719     pos=(uae_u32*)tmp;
5720    
5721     if (len<0 || len>MAX_CHECKSUM_LEN) {
5722     return;
5723     }
5724     else {
5725     while (len>0) {
5726     write_log("%08x ",*pos);
5727     pos++;
5728     len-=4;
5729     }
5730     write_log(" bla\n");
5731     }
5732     }
5733 gbeauche 1.8 #endif
5734 gbeauche 1.1
5735    
5736     int check_for_cache_miss(void)
5737     {
5738     blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5739    
5740     if (bi) {
5741     int cl=cacheline(regs.pc_p);
5742     if (bi!=cache_tags[cl+1].bi) {
5743     raise_in_cl_list(bi);
5744     return 1;
5745     }
5746     }
5747     return 0;
5748     }
5749    
5750    
5751     static void recompile_block(void)
5752     {
5753     /* An existing block's countdown code has expired. We need to make
5754     sure that execute_normal doesn't refuse to recompile due to a
5755     perceived cache miss... */
5756     blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5757    
5758     Dif (!bi)
5759     abort();
5760     raise_in_cl_list(bi);
5761     execute_normal();
5762     return;
5763     }
5764     static void cache_miss(void)
5765     {
5766     blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5767     uae_u32 cl=cacheline(regs.pc_p);
5768     blockinfo* bi2=get_blockinfo(cl);
5769    
5770     if (!bi) {
5771     execute_normal(); /* Compile this block now */
5772     return;
5773     }
5774     Dif (!bi2 || bi==bi2) {
5775     write_log("Unexplained cache miss %p %p\n",bi,bi2);
5776     abort();
5777     }
5778     raise_in_cl_list(bi);
5779     return;
5780     }
5781    
5782     static int called_check_checksum(blockinfo* bi);
5783    
5784     static inline int block_check_checksum(blockinfo* bi)
5785     {
5786     uae_u32 c1,c2;
5787 gbeauche 1.7 bool isgood;
5788 gbeauche 1.1
5789     if (bi->status!=BI_NEED_CHECK)
5790     return 1; /* This block is in a checked state */
5791    
5792     checksum_count++;
5793 gbeauche 1.7
5794 gbeauche 1.1 if (bi->c1 || bi->c2)
5795     calc_checksum(bi,&c1,&c2);
5796     else {
5797     c1=c2=1; /* Make sure it doesn't match */
5798 gbeauche 1.7 }
5799 gbeauche 1.1
5800     isgood=(c1==bi->c1 && c2==bi->c2);
5801 gbeauche 1.7
5802 gbeauche 1.1 if (isgood) {
5803     /* This block is still OK. So we reactivate. Of course, that
5804     means we have to move it into the needs-to-be-flushed list */
5805     bi->handler_to_use=bi->handler;
5806     set_dhtu(bi,bi->direct_handler);
5807     bi->status=BI_CHECKING;
5808     isgood=called_check_checksum(bi);
5809     }
5810     if (isgood) {
5811     /* write_log("reactivate %p/%p (%x %x/%x %x)\n",bi,bi->pc_p,
5812     c1,c2,bi->c1,bi->c2);*/
5813     remove_from_list(bi);
5814     add_to_active(bi);
5815     raise_in_cl_list(bi);
5816     bi->status=BI_ACTIVE;
5817     }
5818     else {
5819     /* This block actually changed. We need to invalidate it,
5820     and set it up to be recompiled */
5821     /* write_log("discard %p/%p (%x %x/%x %x)\n",bi,bi->pc_p,
5822     c1,c2,bi->c1,bi->c2); */
5823     invalidate_block(bi);
5824     raise_in_cl_list(bi);
5825     }
5826     return isgood;
5827     }
5828    
5829     static int called_check_checksum(blockinfo* bi)
5830     {
5831     dependency* x=bi->deplist;
5832     int isgood=1;
5833     int i;
5834    
5835     for (i=0;i<2 && isgood;i++) {
5836     if (bi->dep[i].jmp_off) {
5837     isgood=block_check_checksum(bi->dep[i].target);
5838     }
5839     }
5840     return isgood;
5841     }
5842    
5843     static void check_checksum(void)
5844     {
5845     blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5846     uae_u32 cl=cacheline(regs.pc_p);
5847     blockinfo* bi2=get_blockinfo(cl);
5848    
5849     /* These are not the droids you are looking for... */
5850     if (!bi) {
5851     /* Whoever is the primary target is in a dormant state, but
5852     calling it was accidental, and we should just compile this
5853     new block */
5854     execute_normal();
5855     return;
5856     }
5857     if (bi!=bi2) {
5858     /* The block was hit accidentally, but it does exist. Cache miss */
5859     cache_miss();
5860     return;
5861     }
5862    
5863     if (!block_check_checksum(bi))
5864     execute_normal();
5865     }
5866    
5867     static __inline__ void match_states(blockinfo* bi)
5868     {
5869     int i;
5870     smallstate* s=&(bi->env);
5871    
5872     if (bi->status==BI_NEED_CHECK) {
5873     block_check_checksum(bi);
5874     }
5875     if (bi->status==BI_ACTIVE ||
5876     bi->status==BI_FINALIZING) { /* Deal with the *promises* the
5877     block makes (about not using
5878     certain vregs) */
5879     for (i=0;i<16;i++) {
5880     if (s->virt[i]==L_UNNEEDED) {
5881     // write_log("unneeded reg %d at %p\n",i,target);
5882     COMPCALL(forget_about)(i); // FIXME
5883     }
5884     }
5885     }
5886     flush(1);
5887    
5888     /* And now deal with the *demands* the block makes */
5889     for (i=0;i<N_REGS;i++) {
5890     int v=s->nat[i];
5891     if (v>=0) {
5892     // printf("Loading reg %d into %d at %p\n",v,i,target);
5893     readreg_specific(v,4,i);
5894     // do_load_reg(i,v);
5895     // setlock(i);
5896     }
5897     }
5898     for (i=0;i<N_REGS;i++) {
5899     int v=s->nat[i];
5900     if (v>=0) {
5901     unlock2(i);
5902     }
5903     }
5904     }
5905    
5906     static __inline__ void create_popalls(void)
5907     {
5908     int i,r;
5909    
5910 gbeauche 1.24 if ((popallspace = alloc_code(POPALLSPACE_SIZE)) == NULL) {
5911     write_log("FATAL: Could not allocate popallspace!\n");
5912     abort();
5913     }
5914     vm_protect(popallspace, POPALLSPACE_SIZE, VM_PAGE_READ | VM_PAGE_WRITE);
5915    
5916 gbeauche 1.1 current_compile_p=popallspace;
5917     set_target(current_compile_p);
5918     #if USE_PUSH_POP
5919     /* If we can't use gcc inline assembly, we need to pop some
5920     registers before jumping back to the various get-out routines.
5921     This generates the code for it.
5922     */
5923 gbeauche 1.5 align_target(align_jumps);
5924     popall_do_nothing=get_target();
5925 gbeauche 1.1 for (i=0;i<N_REGS;i++) {
5926     if (need_to_preserve[i])
5927     raw_pop_l_r(i);
5928     }
5929 gbeauche 1.24 raw_jmp((uintptr)do_nothing);
5930 gbeauche 1.1
5931 gbeauche 1.5 align_target(align_jumps);
5932 gbeauche 1.1 popall_execute_normal=get_target();
5933     for (i=0;i<N_REGS;i++) {
5934     if (need_to_preserve[i])
5935     raw_pop_l_r(i);
5936     }
5937 gbeauche 1.24 raw_jmp((uintptr)execute_normal);
5938 gbeauche 1.1
5939 gbeauche 1.5 align_target(align_jumps);
5940 gbeauche 1.1 popall_cache_miss=get_target();
5941     for (i=0;i<N_REGS;i++) {
5942     if (need_to_preserve[i])
5943     raw_pop_l_r(i);
5944     }
5945 gbeauche 1.24 raw_jmp((uintptr)cache_miss);
5946 gbeauche 1.1
5947 gbeauche 1.5 align_target(align_jumps);
5948 gbeauche 1.1 popall_recompile_block=get_target();
5949     for (i=0;i<N_REGS;i++) {
5950     if (need_to_preserve[i])
5951     raw_pop_l_r(i);
5952     }
5953 gbeauche 1.24 raw_jmp((uintptr)recompile_block);
5954 gbeauche 1.5
5955     align_target(align_jumps);
5956 gbeauche 1.1 popall_exec_nostats=get_target();
5957     for (i=0;i<N_REGS;i++) {
5958     if (need_to_preserve[i])
5959     raw_pop_l_r(i);
5960     }
5961 gbeauche 1.24 raw_jmp((uintptr)exec_nostats);
5962 gbeauche 1.5
5963     align_target(align_jumps);
5964 gbeauche 1.1 popall_check_checksum=get_target();
5965     for (i=0;i<N_REGS;i++) {
5966     if (need_to_preserve[i])
5967     raw_pop_l_r(i);
5968     }
5969 gbeauche 1.24 raw_jmp((uintptr)check_checksum);
5970 gbeauche 1.5
5971     align_target(align_jumps);
5972 gbeauche 1.1 current_compile_p=get_target();
5973     #else
5974     popall_exec_nostats=(void *)exec_nostats;
5975     popall_execute_normal=(void *)execute_normal;
5976     popall_cache_miss=(void *)cache_miss;
5977     popall_recompile_block=(void *)recompile_block;
5978     popall_do_nothing=(void *)do_nothing;
5979     popall_check_checksum=(void *)check_checksum;
5980     #endif
5981    
5982     /* And now, the code to do the matching pushes and then jump
5983     into a handler routine */
5984     pushall_call_handler=get_target();
5985     #if USE_PUSH_POP
5986     for (i=N_REGS;i--;) {
5987     if (need_to_preserve[i])
5988     raw_push_l_r(i);
5989     }
5990     #endif
5991     r=REG_PC_TMP;
5992 gbeauche 1.24 raw_mov_l_rm(r,(uintptr)&regs.pc_p);
5993 gbeauche 1.1 raw_and_l_ri(r,TAGMASK);
5994 gbeauche 1.24 raw_jmp_m_indexed((uintptr)cache_tags,r,SIZEOF_VOID_P);
5995 gbeauche 1.6
5996 gbeauche 1.24 #if defined(X86_ASSEMBLY) || defined(X86_64_ASSEMBLY)
5997 gbeauche 1.6 align_target(align_jumps);
5998     m68k_compile_execute = (void (*)(void))get_target();
5999     for (i=N_REGS;i--;) {
6000     if (need_to_preserve[i])
6001     raw_push_l_r(i);
6002     }
6003     align_target(align_loops);
6004 gbeauche 1.24 uae_u32 dispatch_loop = (uintptr)get_target();
6005 gbeauche 1.6 r=REG_PC_TMP;
6006 gbeauche 1.24 raw_mov_l_rm(r,(uintptr)&regs.pc_p);
6007 gbeauche 1.6 raw_and_l_ri(r,TAGMASK);
6008 gbeauche 1.24 raw_call_m_indexed((uintptr)cache_tags,r,SIZEOF_VOID_P);
6009     raw_cmp_l_mi((uintptr)&regs.spcflags,0);
6010 gbeauche 1.6 raw_jcc_b_oponly(NATIVE_CC_EQ);
6011 gbeauche 1.24 emit_byte(dispatch_loop-((uintptr)get_target()+1));
6012     raw_call((uintptr)m68k_do_specialties);
6013 gbeauche 1.6 raw_test_l_rr(REG_RESULT,REG_RESULT);
6014     raw_jcc_b_oponly(NATIVE_CC_EQ);
6015 gbeauche 1.24 emit_byte(dispatch_loop-((uintptr)get_target()+1));
6016     raw_cmp_b_mi((uintptr)&quit_program,0);
6017 gbeauche 1.6 raw_jcc_b_oponly(NATIVE_CC_EQ);
6018 gbeauche 1.24 emit_byte(dispatch_loop-((uintptr)get_target()+1));
6019 gbeauche 1.6 for (i=0;i<N_REGS;i++) {
6020     if (need_to_preserve[i])
6021     raw_pop_l_r(i);
6022     }
6023     raw_ret();
6024     #endif
6025 gbeauche 1.24
6026     // no need to further write into popallspace
6027     vm_protect(popallspace, POPALLSPACE_SIZE, VM_PAGE_READ | VM_PAGE_EXECUTE);
6028 gbeauche 1.1 }
6029    
6030     static __inline__ void reset_lists(void)
6031     {
6032     int i;
6033    
6034     for (i=0;i<MAX_HOLD_BI;i++)
6035     hold_bi[i]=NULL;
6036     active=NULL;
6037     dormant=NULL;
6038     }
6039    
6040     static void prepare_block(blockinfo* bi)
6041     {
6042     int i;
6043    
6044     set_target(current_compile_p);
6045 gbeauche 1.5 align_target(align_jumps);
6046 gbeauche 1.1 bi->direct_pen=(cpuop_func *)get_target();
6047 gbeauche 1.24 raw_mov_l_rm(0,(uintptr)&(bi->pc_p));
6048     raw_mov_l_mr((uintptr)&regs.pc_p,0);
6049     raw_jmp((uintptr)popall_execute_normal);
6050 gbeauche 1.1
6051 gbeauche 1.5 align_target(align_jumps);
6052 gbeauche 1.1 bi->direct_pcc=(cpuop_func *)get_target();
6053 gbeauche 1.24 raw_mov_l_rm(0,(uintptr)&(bi->pc_p));
6054     raw_mov_l_mr((uintptr)&regs.pc_p,0);
6055     raw_jmp((uintptr)popall_check_checksum);
6056 gbeauche 1.1 current_compile_p=get_target();
6057    
6058     bi->deplist=NULL;
6059     for (i=0;i<2;i++) {
6060     bi->dep[i].prev_p=NULL;
6061     bi->dep[i].next=NULL;
6062     }
6063     bi->env=default_ss;
6064     bi->status=BI_INVALID;
6065     bi->havestate=0;
6066     //bi->env=empty_ss;
6067     }
6068    
6069 gbeauche 1.21 // OPCODE is in big endian format, use cft_map() beforehand, if needed.
6070     static inline void reset_compop(int opcode)
6071 gbeauche 1.17 {
6072 gbeauche 1.21 compfunctbl[opcode] = NULL;
6073     nfcompfunctbl[opcode] = NULL;
6074     }
6075    
6076     static int read_opcode(const char *p)
6077     {
6078     int opcode = 0;
6079     for (int i = 0; i < 4; i++) {
6080     int op = p[i];
6081     switch (op) {
6082     case '0': case '1': case '2': case '3': case '4':
6083     case '5': case '6': case '7': case '8': case '9':
6084     opcode = (opcode << 4) | (op - '0');
6085     break;
6086     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
6087     opcode = (opcode << 4) | ((op - 'a') + 10);
6088     break;
6089     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
6090     opcode = (opcode << 4) | ((op - 'A') + 10);
6091     break;
6092     default:
6093     return -1;
6094     }
6095     }
6096     return opcode;
6097     }
6098    
6099     static bool merge_blacklist()
6100     {
6101     const char *blacklist = PrefsFindString("jitblacklist");
6102     if (blacklist) {
6103     const char *p = blacklist;
6104     for (;;) {
6105     if (*p == 0)
6106     return true;
6107    
6108     int opcode1 = read_opcode(p);
6109     if (opcode1 < 0)
6110     return false;
6111     p += 4;
6112    
6113     int opcode2 = opcode1;
6114     if (*p == '-') {
6115     p++;
6116     opcode2 = read_opcode(p);
6117     if (opcode2 < 0)
6118     return false;
6119     p += 4;
6120     }
6121    
6122     if (*p == 0 || *p == ';') {
6123     write_log("<JIT compiler> : blacklist opcodes : %04x-%04x\n", opcode1, opcode2);
6124     for (int opcode = opcode1; opcode <= opcode2; opcode++)
6125     reset_compop(cft_map(opcode));
6126    
6127     if (*p++ == ';')
6128     continue;
6129    
6130     return true;
6131     }
6132    
6133     return false;
6134     }
6135     }
6136     return true;
6137 gbeauche 1.17 }
6138    
6139 gbeauche 1.1 void build_comp(void)
6140     {
6141     int i;
6142     int jumpcount=0;
6143     unsigned long opcode;
6144     struct comptbl* tbl=op_smalltbl_0_comp_ff;
6145     struct comptbl* nftbl=op_smalltbl_0_comp_nf;
6146     int count;
6147     int cpu_level = 0; // 68000 (default)
6148     if (CPUType == 4)
6149     cpu_level = 4; // 68040 with FPU
6150     else {
6151     if (FPUType)
6152     cpu_level = 3; // 68020 with FPU
6153     else if (CPUType >= 2)
6154     cpu_level = 2; // 68020
6155     else if (CPUType == 1)
6156     cpu_level = 1;
6157     }
6158     struct cputbl *nfctbl = (
6159     cpu_level == 4 ? op_smalltbl_0_nf
6160     : cpu_level == 3 ? op_smalltbl_1_nf
6161     : cpu_level == 2 ? op_smalltbl_2_nf
6162     : cpu_level == 1 ? op_smalltbl_3_nf
6163     : op_smalltbl_4_nf);
6164    
6165     write_log ("<JIT compiler> : building compiler function tables\n");
6166    
6167     for (opcode = 0; opcode < 65536; opcode++) {
6168 gbeauche 1.21 reset_compop(opcode);
6169 gbeauche 1.1 nfcpufunctbl[opcode] = op_illg_1;
6170     prop[opcode].use_flags = 0x1f;
6171     prop[opcode].set_flags = 0x1f;
6172     prop[opcode].cflow = fl_trap; // ILLEGAL instructions do trap
6173     }
6174    
6175     for (i = 0; tbl[i].opcode < 65536; i++) {
6176     int cflow = table68k[tbl[i].opcode].cflow;
6177 gbeauche 1.10 if (USE_INLINING && ((cflow & fl_const_jump) != 0))
6178     cflow = fl_const_jump;
6179 gbeauche 1.8 else
6180 gbeauche 1.10 cflow &= ~fl_const_jump;
6181     prop[cft_map(tbl[i].opcode)].cflow = cflow;
6182 gbeauche 1.1
6183     int uses_fpu = tbl[i].specific & 32;
6184 gbeauche 1.21 if (uses_fpu && avoid_fpu)
6185 gbeauche 1.1 compfunctbl[cft_map(tbl[i].opcode)] = NULL;
6186     else
6187     compfunctbl[cft_map(tbl[i].opcode)] = tbl[i].handler;
6188     }
6189 gbeauche 1.8
6190 gbeauche 1.1 for (i = 0; nftbl[i].opcode < 65536; i++) {
6191     int uses_fpu = tbl[i].specific & 32;
6192 gbeauche 1.21 if (uses_fpu && avoid_fpu)
6193 gbeauche 1.1 nfcompfunctbl[cft_map(nftbl[i].opcode)] = NULL;
6194     else
6195     nfcompfunctbl[cft_map(nftbl[i].opcode)] = nftbl[i].handler;
6196    
6197     nfcpufunctbl[cft_map(nftbl[i].opcode)] = nfctbl[i].handler;
6198     }
6199    
6200     for (i = 0; nfctbl[i].handler; i++) {
6201     nfcpufunctbl[cft_map(nfctbl[i].opcode)] = nfctbl[i].handler;
6202     }
6203    
6204     for (opcode = 0; opcode < 65536; opcode++) {
6205     compop_func *f;
6206     compop_func *nff;
6207     cpuop_func *nfcf;
6208     int isaddx,cflow;
6209    
6210     if (table68k[opcode].mnemo == i_ILLG || table68k[opcode].clev > cpu_level)
6211     continue;
6212    
6213     if (table68k[opcode].handler != -1) {
6214     f = compfunctbl[cft_map(table68k[opcode].handler)];
6215     nff = nfcompfunctbl[cft_map(table68k[opcode].handler)];
6216     nfcf = nfcpufunctbl[cft_map(table68k[opcode].handler)];
6217     cflow = prop[cft_map(table68k[opcode].handler)].cflow;
6218     isaddx = prop[cft_map(table68k[opcode].handler)].is_addx;
6219     prop[cft_map(opcode)].cflow = cflow;
6220     prop[cft_map(opcode)].is_addx = isaddx;
6221     compfunctbl[cft_map(opcode)] = f;
6222     nfcompfunctbl[cft_map(opcode)] = nff;
6223     Dif (nfcf == op_illg_1)
6224     abort();
6225     nfcpufunctbl[cft_map(opcode)] = nfcf;
6226     }
6227     prop[cft_map(opcode)].set_flags = table68k[opcode].flagdead;
6228     prop[cft_map(opcode)].use_flags = table68k[opcode].flaglive;
6229     }
6230     for (i = 0; nfctbl[i].handler != NULL; i++) {
6231     if (nfctbl[i].specific)
6232     nfcpufunctbl[cft_map(tbl[i].opcode)] = nfctbl[i].handler;
6233     }
6234 gbeauche 1.21
6235     /* Merge in blacklist */
6236     if (!merge_blacklist())
6237     write_log("<JIT compiler> : blacklist merge failure!\n");
6238 gbeauche 1.1
6239     count=0;
6240     for (opcode = 0; opcode < 65536; opcode++) {
6241     if (compfunctbl[cft_map(opcode)])
6242     count++;
6243     }
6244     write_log("<JIT compiler> : supposedly %d compileable opcodes!\n",count);
6245    
6246     /* Initialise state */
6247     create_popalls();
6248     alloc_cache();
6249     reset_lists();
6250    
6251     for (i=0;i<TAGSIZE;i+=2) {
6252     cache_tags[i].handler=(cpuop_func *)popall_execute_normal;
6253     cache_tags[i+1].bi=NULL;
6254     }
6255    
6256     #if 0
6257     for (i=0;i<N_REGS;i++) {
6258     empty_ss.nat[i].holds=-1;
6259     empty_ss.nat[i].validsize=0;
6260     empty_ss.nat[i].dirtysize=0;
6261     }
6262     #endif
6263     for (i=0;i<VREGS;i++) {
6264     empty_ss.virt[i]=L_NEEDED;
6265     }
6266     for (i=0;i<N_REGS;i++) {
6267     empty_ss.nat[i]=L_UNKNOWN;
6268     }
6269     default_ss=empty_ss;
6270     }
6271    
6272    
6273     static void flush_icache_none(int n)
6274     {
6275     /* Nothing to do. */
6276     }
6277    
6278     static void flush_icache_hard(int n)
6279     {
6280     uae_u32 i;
6281     blockinfo* bi, *dbi;
6282    
6283     hard_flush_count++;
6284     #if 0
6285     write_log("Flush Icache_hard(%d/%x/%p), %u KB\n",
6286     n,regs.pc,regs.pc_p,current_cache_size/1024);
6287     current_cache_size = 0;
6288     #endif
6289     bi=active;
6290     while(bi) {
6291     cache_tags[cacheline(bi->pc_p)].handler=(cpuop_func *)popall_execute_normal;
6292     cache_tags[cacheline(bi->pc_p)+1].bi=NULL;
6293     dbi=bi; bi=bi->next;
6294     free_blockinfo(dbi);
6295     }
6296     bi=dormant;
6297     while(bi) {
6298     cache_tags[cacheline(bi->pc_p)].handler=(cpuop_func *)popall_execute_normal;
6299     cache_tags[cacheline(bi->pc_p)+1].bi=NULL;
6300     dbi=bi; bi=bi->next;
6301     free_blockinfo(dbi);
6302     }
6303    
6304     reset_lists();
6305     if (!compiled_code)
6306     return;
6307     current_compile_p=compiled_code;
6308     SPCFLAGS_SET( SPCFLAG_JIT_EXEC_RETURN ); /* To get out of compiled code */
6309     }
6310    
6311    
6312     /* "Soft flushing" --- instead of actually throwing everything away,
6313     we simply mark everything as "needs to be checked".
6314     */
6315    
6316     static inline void flush_icache_lazy(int n)
6317     {
6318     uae_u32 i;
6319     blockinfo* bi;
6320     blockinfo* bi2;
6321    
6322     soft_flush_count++;
6323     if (!active)
6324     return;
6325    
6326     bi=active;
6327     while (bi) {
6328     uae_u32 cl=cacheline(bi->pc_p);
6329     if (bi->status==BI_INVALID ||
6330     bi->status==BI_NEED_RECOMP) {
6331     if (bi==cache_tags[cl+1].bi)
6332     cache_tags[cl].handler=(cpuop_func *)popall_execute_normal;
6333     bi->handler_to_use=(cpuop_func *)popall_execute_normal;
6334     set_dhtu(bi,bi->direct_pen);
6335     bi->status=BI_INVALID;
6336     }
6337     else {
6338     if (bi==cache_tags[cl+1].bi)
6339     cache_tags[cl].handler=(cpuop_func *)popall_check_checksum;
6340     bi->handler_to_use=(cpuop_func *)popall_check_checksum;
6341     set_dhtu(bi,bi->direct_pcc);
6342     bi->status=BI_NEED_CHECK;
6343     }
6344     bi2=bi;
6345     bi=bi->next;
6346     }
6347     /* bi2 is now the last entry in the active list */
6348     bi2->next=dormant;
6349     if (dormant)
6350     dormant->prev_p=&(bi2->next);
6351    
6352     dormant=active;
6353     active->prev_p=&dormant;
6354     active=NULL;
6355 gbeauche 1.22 }
6356    
6357     void flush_icache_range(uae_u32 start, uae_u32 length)
6358     {
6359     if (!active)
6360     return;
6361    
6362     #if LAZY_FLUSH_ICACHE_RANGE
6363     uae_u8 *start_p = get_real_address(start);
6364     blockinfo *bi = active;
6365     while (bi) {
6366     #if USE_CHECKSUM_INFO
6367     bool invalidate = false;
6368     for (checksum_info *csi = bi->csi; csi && !invalidate; csi = csi->next)
6369     invalidate = (((start_p - csi->start_p) < csi->length) ||
6370     ((csi->start_p - start_p) < length));
6371     #else
6372     // Assume system is consistent and would invalidate the right range
6373     const bool invalidate = (bi->pc_p - start_p) < length;
6374     #endif
6375     if (invalidate) {
6376     uae_u32 cl = cacheline(bi->pc_p);
6377     if (bi == cache_tags[cl + 1].bi)
6378     cache_tags[cl].handler = (cpuop_func *)popall_execute_normal;
6379     bi->handler_to_use = (cpuop_func *)popall_execute_normal;
6380     set_dhtu(bi, bi->direct_pen);
6381     bi->status = BI_NEED_RECOMP;
6382     }
6383     bi = bi->next;
6384     }
6385     return;
6386     #endif
6387     flush_icache(-1);
6388 gbeauche 1.1 }
6389    
6390     static void catastrophe(void)
6391     {
6392     abort();
6393     }
6394    
6395     int failure;
6396    
6397     #define TARGET_M68K 0
6398     #define TARGET_POWERPC 1
6399     #define TARGET_X86 2
6400 gbeauche 1.24 #define TARGET_X86_64 3
6401 gbeauche 1.1 #if defined(i386) || defined(__i386__)
6402     #define TARGET_NATIVE TARGET_X86
6403     #endif
6404     #if defined(powerpc) || defined(__powerpc__)
6405     #define TARGET_NATIVE TARGET_POWERPC
6406     #endif
6407 gbeauche 1.24 #if defined(x86_64) || defined(__x86_64__)
6408     #define TARGET_NATIVE TARGET_X86_64
6409     #endif
6410 gbeauche 1.1
6411     #ifdef ENABLE_MON
6412 gbeauche 1.24 static uae_u32 mon_read_byte_jit(uintptr addr)
6413 gbeauche 1.1 {
6414     uae_u8 *m = (uae_u8 *)addr;
6415 gbeauche 1.24 return (uintptr)(*m);
6416 gbeauche 1.1 }
6417    
6418 gbeauche 1.24 static void mon_write_byte_jit(uintptr addr, uae_u32 b)
6419 gbeauche 1.1 {
6420     uae_u8 *m = (uae_u8 *)addr;
6421     *m = b;
6422     }
6423     #endif
6424    
6425     void disasm_block(int target, uint8 * start, size_t length)
6426     {
6427     if (!JITDebug)
6428     return;
6429    
6430     #if defined(JIT_DEBUG) && defined(ENABLE_MON)
6431     char disasm_str[200];
6432     sprintf(disasm_str, "%s $%x $%x",
6433     target == TARGET_M68K ? "d68" :
6434     target == TARGET_X86 ? "d86" :
6435 gbeauche 1.24 target == TARGET_X86_64 ? "d8664" :
6436 gbeauche 1.1 target == TARGET_POWERPC ? "d" : "x",
6437     start, start + length - 1);
6438    
6439 gbeauche 1.24 uae_u32 (*old_mon_read_byte)(uintptr) = mon_read_byte;
6440     void (*old_mon_write_byte)(uintptr, uae_u32) = mon_write_byte;
6441 gbeauche 1.1
6442     mon_read_byte = mon_read_byte_jit;
6443     mon_write_byte = mon_write_byte_jit;
6444    
6445     char *arg[5] = {"mon", "-m", "-r", disasm_str, NULL};
6446     mon(4, arg);
6447    
6448     mon_read_byte = old_mon_read_byte;
6449     mon_write_byte = old_mon_write_byte;
6450     #endif
6451     }
6452    
6453 gbeauche 1.24 static void disasm_native_block(uint8 *start, size_t length)
6454 gbeauche 1.1 {
6455     disasm_block(TARGET_NATIVE, start, length);
6456     }
6457    
6458 gbeauche 1.24 static void disasm_m68k_block(uint8 *start, size_t length)
6459 gbeauche 1.1 {
6460     disasm_block(TARGET_M68K, start, length);
6461     }
6462    
6463     #ifdef HAVE_GET_WORD_UNSWAPPED
6464     # define DO_GET_OPCODE(a) (do_get_mem_word_unswapped((uae_u16 *)(a)))
6465     #else
6466     # define DO_GET_OPCODE(a) (do_get_mem_word((uae_u16 *)(a)))
6467     #endif
6468    
6469     #if JIT_DEBUG
6470     static uae_u8 *last_regs_pc_p = 0;
6471     static uae_u8 *last_compiled_block_addr = 0;
6472    
6473     void compiler_dumpstate(void)
6474     {
6475     if (!JITDebug)
6476     return;
6477    
6478     write_log("### Host addresses\n");
6479     write_log("MEM_BASE : %x\n", MEMBaseDiff);
6480     write_log("PC_P : %p\n", &regs.pc_p);
6481     write_log("SPCFLAGS : %p\n", &regs.spcflags);
6482     write_log("D0-D7 : %p-%p\n", &regs.regs[0], &regs.regs[7]);
6483     write_log("A0-A7 : %p-%p\n", &regs.regs[8], &regs.regs[15]);
6484     write_log("\n");
6485    
6486     write_log("### M68k processor state\n");
6487     m68k_dumpstate(0);
6488     write_log("\n");
6489    
6490     write_log("### Block in Mac address space\n");
6491     write_log("M68K block : %p\n",
6492 gbeauche 1.24 (void *)(uintptr)get_virtual_address(last_regs_pc_p));
6493 gbeauche 1.1 write_log("Native block : %p (%d bytes)\n",
6494 gbeauche 1.24 (void *)(uintptr)get_virtual_address(last_compiled_block_addr),
6495 gbeauche 1.1 get_blockinfo_addr(last_regs_pc_p)->direct_handler_size);
6496     write_log("\n");
6497     }
6498     #endif
6499    
6500     static void compile_block(cpu_history* pc_hist, int blocklen)
6501     {
6502     if (letit && compiled_code) {
6503     #if PROFILE_COMPILE_TIME
6504     compile_count++;
6505     clock_t start_time = clock();
6506     #endif
6507     #if JIT_DEBUG
6508     bool disasm_block = false;
6509     #endif
6510    
6511     /* OK, here we need to 'compile' a block */
6512     int i;
6513     int r;
6514     int was_comp=0;
6515     uae_u8 liveflags[MAXRUN+1];
6516 gbeauche 1.8 #if USE_CHECKSUM_INFO
6517     bool trace_in_rom = isinrom((uintptr)pc_hist[0].location);
6518 gbeauche 1.24 uintptr max_pcp=(uintptr)pc_hist[blocklen - 1].location;
6519     uintptr min_pcp=max_pcp;
6520 gbeauche 1.8 #else
6521 gbeauche 1.24 uintptr max_pcp=(uintptr)pc_hist[0].location;
6522     uintptr min_pcp=max_pcp;
6523 gbeauche 1.8 #endif
6524 gbeauche 1.1 uae_u32 cl=cacheline(pc_hist[0].location);
6525     void* specflags=(void*)&regs.spcflags;
6526     blockinfo* bi=NULL;
6527     blockinfo* bi2;
6528     int extra_len=0;
6529    
6530     redo_current_block=0;
6531     if (current_compile_p>=max_compile_start)
6532     flush_icache_hard(7);
6533    
6534     alloc_blockinfos();
6535    
6536     bi=get_blockinfo_addr_new(pc_hist[0].location,0);
6537     bi2=get_blockinfo(cl);
6538    
6539     optlev=bi->optlevel;
6540     if (bi->status!=BI_INVALID) {
6541     Dif (bi!=bi2) {
6542     /* I don't think it can happen anymore. Shouldn't, in
6543     any case. So let's make sure... */
6544     write_log("WOOOWOO count=%d, ol=%d %p %p\n",
6545     bi->count,bi->optlevel,bi->handler_to_use,
6546     cache_tags[cl].handler);
6547     abort();
6548     }
6549    
6550     Dif (bi->count!=-1 && bi->status!=BI_NEED_RECOMP) {
6551     write_log("bi->count=%d, bi->status=%d\n",bi->count,bi->status);
6552     /* What the heck? We are not supposed to be here! */
6553     abort();
6554     }
6555     }
6556     if (bi->count==-1) {
6557     optlev++;
6558     while (!optcount[optlev])
6559     optlev++;
6560     bi->count=optcount[optlev]-1;
6561     }
6562 gbeauche 1.24 current_block_pc_p=(uintptr)pc_hist[0].location;
6563 gbeauche 1.1
6564     remove_deps(bi); /* We are about to create new code */
6565     bi->optlevel=optlev;
6566     bi->pc_p=(uae_u8*)pc_hist[0].location;
6567 gbeauche 1.8 #if USE_CHECKSUM_INFO
6568     free_checksum_info_chain(bi->csi);
6569     bi->csi = NULL;
6570     #endif
6571 gbeauche 1.1
6572     liveflags[blocklen]=0x1f; /* All flags needed afterwards */
6573     i=blocklen;
6574     while (i--) {
6575     uae_u16* currpcp=pc_hist[i].location;
6576     uae_u32 op=DO_GET_OPCODE(currpcp);
6577    
6578 gbeauche 1.8 #if USE_CHECKSUM_INFO
6579     trace_in_rom = trace_in_rom && isinrom((uintptr)currpcp);
6580     #if USE_INLINING
6581     if (is_const_jump(op)) {
6582     checksum_info *csi = alloc_checksum_info();
6583     csi->start_p = (uae_u8 *)min_pcp;
6584     csi->length = max_pcp - min_pcp + LONGEST_68K_INST;
6585     csi->next = bi->csi;
6586     bi->csi = csi;
6587 gbeauche 1.24 max_pcp = (uintptr)currpcp;
6588 gbeauche 1.8 }
6589     #endif
6590 gbeauche 1.24 min_pcp = (uintptr)currpcp;
6591 gbeauche 1.8 #else
6592 gbeauche 1.24 if ((uintptr)currpcp<min_pcp)
6593     min_pcp=(uintptr)currpcp;
6594     if ((uintptr)currpcp>max_pcp)
6595     max_pcp=(uintptr)currpcp;
6596 gbeauche 1.8 #endif
6597 gbeauche 1.1
6598     liveflags[i]=((liveflags[i+1]&
6599     (~prop[op].set_flags))|
6600     prop[op].use_flags);
6601     if (prop[op].is_addx && (liveflags[i+1]&FLAG_Z)==0)
6602     liveflags[i]&= ~FLAG_Z;
6603     }
6604    
6605 gbeauche 1.8 #if USE_CHECKSUM_INFO
6606     checksum_info *csi = alloc_checksum_info();
6607     csi->start_p = (uae_u8 *)min_pcp;
6608     csi->length = max_pcp - min_pcp + LONGEST_68K_INST;
6609     csi->next = bi->csi;
6610     bi->csi = csi;
6611     #endif
6612    
6613 gbeauche 1.1 bi->needed_flags=liveflags[0];
6614    
6615 gbeauche 1.5 align_target(align_loops);
6616 gbeauche 1.1 was_comp=0;
6617    
6618     bi->direct_handler=(cpuop_func *)get_target();
6619     set_dhtu(bi,bi->direct_handler);
6620     bi->status=BI_COMPILING;
6621 gbeauche 1.24 current_block_start_target=(uintptr)get_target();
6622 gbeauche 1.1
6623     log_startblock();
6624    
6625     if (bi->count>=0) { /* Need to generate countdown code */
6626 gbeauche 1.24 raw_mov_l_mi((uintptr)&regs.pc_p,(uintptr)pc_hist[0].location);
6627     raw_sub_l_mi((uintptr)&(bi->count),1);
6628     raw_jl((uintptr)popall_recompile_block);
6629 gbeauche 1.1 }
6630     if (optlev==0) { /* No need to actually translate */
6631     /* Execute normally without keeping stats */
6632 gbeauche 1.24 raw_mov_l_mi((uintptr)&regs.pc_p,(uintptr)pc_hist[0].location);
6633     raw_jmp((uintptr)popall_exec_nostats);
6634 gbeauche 1.1 }
6635     else {
6636     reg_alloc_run=0;
6637     next_pc_p=0;
6638     taken_pc_p=0;
6639     branch_cc=0;
6640    
6641     comp_pc_p=(uae_u8*)pc_hist[0].location;
6642     init_comp();
6643     was_comp=1;
6644    
6645     #if JIT_DEBUG
6646     if (JITDebug) {
6647 gbeauche 1.24 raw_mov_l_mi((uintptr)&last_regs_pc_p,(uintptr)pc_hist[0].location);
6648     raw_mov_l_mi((uintptr)&last_compiled_block_addr,current_block_start_target);
6649 gbeauche 1.1 }
6650     #endif
6651    
6652     for (i=0;i<blocklen &&
6653     get_target_noopt()<max_compile_start;i++) {
6654     cpuop_func **cputbl;
6655     compop_func **comptbl;
6656     uae_u32 opcode=DO_GET_OPCODE(pc_hist[i].location);
6657     needed_flags=(liveflags[i+1] & prop[opcode].set_flags);
6658     if (!needed_flags) {
6659     cputbl=nfcpufunctbl;
6660     comptbl=nfcompfunctbl;
6661     }
6662     else {
6663     cputbl=cpufunctbl;
6664     comptbl=compfunctbl;
6665     }
6666    
6667     failure = 1; // gb-- defaults to failure state
6668     if (comptbl[opcode] && optlev>1) {
6669     failure=0;
6670     if (!was_comp) {
6671     comp_pc_p=(uae_u8*)pc_hist[i].location;
6672     init_comp();
6673     }
6674 gbeauche 1.18 was_comp=1;
6675 gbeauche 1.1
6676     comptbl[opcode](opcode);
6677     freescratch();
6678     if (!(liveflags[i+1] & FLAG_CZNV)) {
6679     /* We can forget about flags */
6680     dont_care_flags();
6681     }
6682     #if INDIVIDUAL_INST
6683     flush(1);
6684     nop();
6685     flush(1);
6686     was_comp=0;
6687     #endif
6688     }
6689    
6690     if (failure) {
6691     if (was_comp) {
6692     flush(1);
6693     was_comp=0;
6694     }
6695     raw_mov_l_ri(REG_PAR1,(uae_u32)opcode);
6696     #if USE_NORMAL_CALLING_CONVENTION
6697     raw_push_l_r(REG_PAR1);
6698     #endif
6699 gbeauche 1.24 raw_mov_l_mi((uintptr)&regs.pc_p,
6700     (uintptr)pc_hist[i].location);
6701     raw_call((uintptr)cputbl[opcode]);
6702 gbeauche 1.9 #if PROFILE_UNTRANSLATED_INSNS
6703     // raw_cputbl_count[] is indexed with plain opcode (in m68k order)
6704 gbeauche 1.24 raw_add_l_mi((uintptr)&raw_cputbl_count[cft_map(opcode)],1);
6705 gbeauche 1.9 #endif
6706 gbeauche 1.1 #if USE_NORMAL_CALLING_CONVENTION
6707     raw_inc_sp(4);
6708     #endif
6709    
6710     if (i < blocklen - 1) {
6711     uae_s8* branchadd;
6712    
6713 gbeauche 1.24 raw_mov_l_rm(0,(uintptr)specflags);
6714 gbeauche 1.1 raw_test_l_rr(0,0);
6715     raw_jz_b_oponly();
6716     branchadd=(uae_s8 *)get_target();
6717     emit_byte(0);
6718 gbeauche 1.24 raw_jmp((uintptr)popall_do_nothing);
6719     *branchadd=(uintptr)get_target()-(uintptr)branchadd-1;
6720 gbeauche 1.1 }
6721     }
6722     }
6723     #if 1 /* This isn't completely kosher yet; It really needs to be
6724     be integrated into a general inter-block-dependency scheme */
6725     if (next_pc_p && taken_pc_p &&
6726     was_comp && taken_pc_p==current_block_pc_p) {
6727     blockinfo* bi1=get_blockinfo_addr_new((void*)next_pc_p,0);
6728     blockinfo* bi2=get_blockinfo_addr_new((void*)taken_pc_p,0);
6729     uae_u8 x=bi1->needed_flags;
6730    
6731     if (x==0xff || 1) { /* To be on the safe side */
6732     uae_u16* next=(uae_u16*)next_pc_p;
6733     uae_u32 op=DO_GET_OPCODE(next);
6734    
6735     x=0x1f;
6736     x&=(~prop[op].set_flags);
6737     x|=prop[op].use_flags;
6738     }
6739    
6740     x|=bi2->needed_flags;
6741     if (!(x & FLAG_CZNV)) {
6742     /* We can forget about flags */
6743     dont_care_flags();
6744     extra_len+=2; /* The next instruction now is part of this
6745     block */
6746     }
6747    
6748     }
6749     #endif
6750     log_flush();
6751    
6752     if (next_pc_p) { /* A branch was registered */
6753 gbeauche 1.24 uintptr t1=next_pc_p;
6754     uintptr t2=taken_pc_p;
6755 gbeauche 1.1 int cc=branch_cc;
6756    
6757     uae_u32* branchadd;
6758     uae_u32* tba;
6759     bigstate tmp;
6760     blockinfo* tbi;
6761    
6762     if (taken_pc_p<next_pc_p) {
6763     /* backward branch. Optimize for the "taken" case ---
6764     which means the raw_jcc should fall through when
6765     the 68k branch is taken. */
6766     t1=taken_pc_p;
6767     t2=next_pc_p;
6768     cc=branch_cc^1;
6769     }
6770    
6771     tmp=live; /* ouch! This is big... */
6772     raw_jcc_l_oponly(cc);
6773     branchadd=(uae_u32*)get_target();
6774     emit_long(0);
6775    
6776     /* predicted outcome */
6777     tbi=get_blockinfo_addr_new((void*)t1,1);
6778     match_states(tbi);
6779 gbeauche 1.24 raw_cmp_l_mi((uintptr)specflags,0);
6780 gbeauche 1.1 raw_jcc_l_oponly(4);
6781     tba=(uae_u32*)get_target();
6782 gbeauche 1.24 emit_long(get_handler(t1)-((uintptr)tba+4));
6783     raw_mov_l_mi((uintptr)&regs.pc_p,t1);
6784     raw_jmp((uintptr)popall_do_nothing);
6785 gbeauche 1.1 create_jmpdep(bi,0,tba,t1);
6786    
6787 gbeauche 1.5 align_target(align_jumps);
6788 gbeauche 1.1 /* not-predicted outcome */
6789 gbeauche 1.24 *branchadd=(uintptr)get_target()-((uintptr)branchadd+4);
6790 gbeauche 1.1 live=tmp; /* Ouch again */
6791     tbi=get_blockinfo_addr_new((void*)t2,1);
6792     match_states(tbi);
6793    
6794     //flush(1); /* Can only get here if was_comp==1 */
6795 gbeauche 1.24 raw_cmp_l_mi((uintptr)specflags,0);
6796 gbeauche 1.1 raw_jcc_l_oponly(4);
6797     tba=(uae_u32*)get_target();
6798 gbeauche 1.24 emit_long(get_handler(t2)-((uintptr)tba+4));
6799     raw_mov_l_mi((uintptr)&regs.pc_p,t2);
6800     raw_jmp((uintptr)popall_do_nothing);
6801 gbeauche 1.1 create_jmpdep(bi,1,tba,t2);
6802     }
6803     else
6804     {
6805     if (was_comp) {
6806     flush(1);
6807     }
6808    
6809     /* Let's find out where next_handler is... */
6810     if (was_comp && isinreg(PC_P)) {
6811     r=live.state[PC_P].realreg;
6812     raw_and_l_ri(r,TAGMASK);
6813     int r2 = (r==0) ? 1 : 0;
6814 gbeauche 1.24 raw_mov_l_ri(r2,(uintptr)popall_do_nothing);
6815     raw_cmp_l_mi((uintptr)specflags,0);
6816     raw_cmov_l_rm_indexed(r2,(uintptr)cache_tags,r,SIZEOF_VOID_P,SIZEOF_VOID_P);
6817 gbeauche 1.1 raw_jmp_r(r2);
6818     }
6819     else if (was_comp && isconst(PC_P)) {
6820     uae_u32 v=live.state[PC_P].val;
6821     uae_u32* tba;
6822     blockinfo* tbi;
6823    
6824 gbeauche 1.24 tbi=get_blockinfo_addr_new((void*)(uintptr)v,1);
6825 gbeauche 1.1 match_states(tbi);
6826    
6827 gbeauche 1.24 raw_cmp_l_mi((uintptr)specflags,0);
6828 gbeauche 1.1 raw_jcc_l_oponly(4);
6829     tba=(uae_u32*)get_target();
6830 gbeauche 1.24 emit_long(get_handler(v)-((uintptr)tba+4));
6831     raw_mov_l_mi((uintptr)&regs.pc_p,v);
6832     raw_jmp((uintptr)popall_do_nothing);
6833 gbeauche 1.1 create_jmpdep(bi,0,tba,v);
6834     }
6835     else {
6836     r=REG_PC_TMP;
6837 gbeauche 1.24 raw_mov_l_rm(r,(uintptr)&regs.pc_p);
6838 gbeauche 1.1 raw_and_l_ri(r,TAGMASK);
6839     int r2 = (r==0) ? 1 : 0;
6840 gbeauche 1.24 raw_mov_l_ri(r2,(uintptr)popall_do_nothing);
6841     raw_cmp_l_mi((uintptr)specflags,0);
6842     raw_cmov_l_rm_indexed(r2,(uintptr)cache_tags,r,SIZEOF_VOID_P,SIZEOF_VOID_P);
6843 gbeauche 1.1 raw_jmp_r(r2);
6844     }
6845     }
6846     }
6847    
6848     #if USE_MATCH
6849     if (callers_need_recompile(&live,&(bi->env))) {
6850     mark_callers_recompile(bi);
6851     }
6852    
6853     big_to_small_state(&live,&(bi->env));
6854     #endif
6855    
6856 gbeauche 1.8 #if USE_CHECKSUM_INFO
6857     remove_from_list(bi);
6858     if (trace_in_rom) {
6859     // No need to checksum that block trace on cache invalidation
6860     free_checksum_info_chain(bi->csi);
6861     bi->csi = NULL;
6862     add_to_dormant(bi);
6863     }
6864     else {
6865     calc_checksum(bi,&(bi->c1),&(bi->c2));
6866     add_to_active(bi);
6867     }
6868     #else
6869 gbeauche 1.1 if (next_pc_p+extra_len>=max_pcp &&
6870     next_pc_p+extra_len<max_pcp+LONGEST_68K_INST)
6871     max_pcp=next_pc_p+extra_len; /* extra_len covers flags magic */
6872     else
6873     max_pcp+=LONGEST_68K_INST;
6874 gbeauche 1.7
6875 gbeauche 1.1 bi->len=max_pcp-min_pcp;
6876     bi->min_pcp=min_pcp;
6877 gbeauche 1.7
6878 gbeauche 1.1 remove_from_list(bi);
6879     if (isinrom(min_pcp) && isinrom(max_pcp)) {
6880     add_to_dormant(bi); /* No need to checksum it on cache flush.
6881     Please don't start changing ROMs in
6882     flight! */
6883     }
6884     else {
6885     calc_checksum(bi,&(bi->c1),&(bi->c2));
6886     add_to_active(bi);
6887     }
6888 gbeauche 1.8 #endif
6889 gbeauche 1.1
6890     current_cache_size += get_target() - (uae_u8 *)current_compile_p;
6891    
6892     #if JIT_DEBUG
6893     if (JITDebug)
6894     bi->direct_handler_size = get_target() - (uae_u8 *)current_block_start_target;
6895    
6896     if (JITDebug && disasm_block) {
6897     uaecptr block_addr = start_pc + ((char *)pc_hist[0].location - (char *)start_pc_p);
6898     D(bug("M68K block @ 0x%08x (%d insns)\n", block_addr, blocklen));
6899     uae_u32 block_size = ((uae_u8 *)pc_hist[blocklen - 1].location - (uae_u8 *)pc_hist[0].location) + 1;
6900     disasm_m68k_block((uae_u8 *)pc_hist[0].location, block_size);
6901     D(bug("Compiled block @ 0x%08x\n", pc_hist[0].location));
6902     disasm_native_block((uae_u8 *)current_block_start_target, bi->direct_handler_size);
6903     getchar();
6904     }
6905     #endif
6906    
6907     log_dump();
6908 gbeauche 1.5 align_target(align_jumps);
6909 gbeauche 1.1
6910     /* This is the non-direct handler */
6911     bi->handler=
6912     bi->handler_to_use=(cpuop_func *)get_target();
6913 gbeauche 1.24 raw_cmp_l_mi((uintptr)&regs.pc_p,(uintptr)pc_hist[0].location);
6914     raw_jnz((uintptr)popall_cache_miss);
6915 gbeauche 1.1 comp_pc_p=(uae_u8*)pc_hist[0].location;
6916    
6917     bi->status=BI_FINALIZING;
6918     init_comp();
6919     match_states(bi);
6920     flush(1);
6921    
6922 gbeauche 1.24 raw_jmp((uintptr)bi->direct_handler);
6923 gbeauche 1.1
6924     current_compile_p=get_target();
6925     raise_in_cl_list(bi);
6926    
6927     /* We will flush soon, anyway, so let's do it now */
6928     if (current_compile_p>=max_compile_start)
6929     flush_icache_hard(7);
6930    
6931     bi->status=BI_ACTIVE;
6932     if (redo_current_block)
6933     block_need_recompile(bi);
6934    
6935     #if PROFILE_COMPILE_TIME
6936     compile_time += (clock() - start_time);
6937     #endif
6938     }
6939     }
6940    
6941     void do_nothing(void)
6942     {
6943     /* What did you expect this to do? */
6944     }
6945    
6946     void exec_nostats(void)
6947     {
6948     for (;;) {
6949     uae_u32 opcode = GET_OPCODE;
6950     (*cpufunctbl[opcode])(opcode);
6951     if (end_block(opcode) || SPCFLAGS_TEST(SPCFLAG_ALL)) {
6952     return; /* We will deal with the spcflags in the caller */
6953     }
6954     }
6955     }
6956    
6957     void execute_normal(void)
6958     {
6959     if (!check_for_cache_miss()) {
6960     cpu_history pc_hist[MAXRUN];
6961     int blocklen = 0;
6962     #if REAL_ADDRESSING || DIRECT_ADDRESSING
6963     start_pc_p = regs.pc_p;
6964     start_pc = get_virtual_address(regs.pc_p);
6965     #else
6966     start_pc_p = regs.pc_oldp;
6967     start_pc = regs.pc;
6968     #endif
6969     for (;;) { /* Take note: This is the do-it-normal loop */
6970     pc_hist[blocklen++].location = (uae_u16 *)regs.pc_p;
6971     uae_u32 opcode = GET_OPCODE;
6972     #if FLIGHT_RECORDER
6973     m68k_record_step(m68k_getpc());
6974     #endif
6975     (*cpufunctbl[opcode])(opcode);
6976     if (end_block(opcode) || SPCFLAGS_TEST(SPCFLAG_ALL) || blocklen>=MAXRUN) {
6977     compile_block(pc_hist, blocklen);
6978     return; /* We will deal with the spcflags in the caller */
6979     }
6980     /* No need to check regs.spcflags, because if they were set,
6981     we'd have ended up inside that "if" */
6982     }
6983     }
6984     }
6985    
6986     typedef void (*compiled_handler)(void);
6987    
6988 gbeauche 1.24 #if defined(X86_ASSEMBLY) || defined(X86_64_ASSEMBLY)
6989 gbeauche 1.6 void (*m68k_compile_execute)(void) = NULL;
6990     #else
6991 gbeauche 1.1 void m68k_do_compile_execute(void)
6992     {
6993     for (;;) {
6994     ((compiled_handler)(pushall_call_handler))();
6995     /* Whenever we return from that, we should check spcflags */
6996     if (SPCFLAGS_TEST(SPCFLAG_ALL)) {
6997     if (m68k_do_specialties ())
6998     return;
6999     }
7000     }
7001     }
7002 gbeauche 1.6 #endif