ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/compiler/compemu_support.cpp
Revision: 1.30
Committed: 2005-04-21T09:08:57Z (19 years, 2 months ago) by gbeauche
Branch: MAIN
Changes since 1.29: +1 -0 lines
Log Message:
Recognize lahf_lm from Dual Core Opterons. This enables use of LAHF/SETO
instructions in long mode (64-bit). However, there seems to be another bug
in the JIT preventing it from being fully supported. m68k.h & codegen_x86.h
are easily fixed bug another patch is still needed.

File Contents

# User Rev Content
1 gbeauche 1.11 /*
2     * compiler/compemu_support.cpp - Core dynamic translation engine
3     *
4     * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
5     *
6 gbeauche 1.29 * Adaptation for Basilisk II and improvements, copyright 2000-2005
7 gbeauche 1.11 * Gwenole Beauchesne
8     *
9 gbeauche 1.29 * Basilisk II (C) 1997-2005 Christian Bauer
10 gbeauche 1.11 *
11     * This program is free software; you can redistribute it and/or modify
12     * it under the terms of the GNU General Public License as published by
13     * the Free Software Foundation; either version 2 of the License, or
14     * (at your option) any later version.
15     *
16     * This program is distributed in the hope that it will be useful,
17     * but WITHOUT ANY WARRANTY; without even the implied warranty of
18     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19     * GNU General Public License for more details.
20     *
21     * You should have received a copy of the GNU General Public License
22     * along with this program; if not, write to the Free Software
23     * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24     */
25    
26 gbeauche 1.1 #if !REAL_ADDRESSING && !DIRECT_ADDRESSING
27     #error "Only Real or Direct Addressing is supported with the JIT Compiler"
28     #endif
29    
30 gbeauche 1.4 #if X86_ASSEMBLY && !SAHF_SETO_PROFITABLE
31     #error "Only [LS]AHF scheme to [gs]et flags is supported with the JIT Compiler"
32     #endif
33    
34 gbeauche 1.24 /* NOTE: support for AMD64 assumes translation cache and other code
35     * buffers are allocated into a 32-bit address space because (i) B2/JIT
36     * code is not 64-bit clean and (ii) it's faster to resolve branches
37     * that way.
38     */
39     #if !defined(__i386__) && !defined(__x86_64__)
40     #error "Only IA-32 and X86-64 targets are supported with the JIT Compiler"
41     #endif
42    
43 gbeauche 1.1 #define USE_MATCH 0
44    
45     /* kludge for Brian, so he can compile under MSVC++ */
46     #define USE_NORMAL_CALLING_CONVENTION 0
47    
48     #ifndef WIN32
49 gbeauche 1.20 #include <unistd.h>
50 gbeauche 1.1 #include <sys/types.h>
51     #include <sys/mman.h>
52     #endif
53    
54     #include <stdlib.h>
55     #include <fcntl.h>
56     #include <errno.h>
57    
58     #include "sysdeps.h"
59     #include "cpu_emulation.h"
60     #include "main.h"
61     #include "prefs.h"
62     #include "user_strings.h"
63 gbeauche 1.2 #include "vm_alloc.h"
64 gbeauche 1.1
65     #include "m68k.h"
66     #include "memory.h"
67     #include "readcpu.h"
68     #include "newcpu.h"
69     #include "comptbl.h"
70     #include "compiler/compemu.h"
71     #include "fpu/fpu.h"
72     #include "fpu/flags.h"
73    
74     #define DEBUG 1
75     #include "debug.h"
76    
77     #ifdef ENABLE_MON
78     #include "mon.h"
79     #endif
80    
81     #ifndef WIN32
82 gbeauche 1.9 #define PROFILE_COMPILE_TIME 1
83     #define PROFILE_UNTRANSLATED_INSNS 1
84 gbeauche 1.1 #endif
85    
86 gbeauche 1.28 #if defined(__x86_64__) && 0
87     #define RECORD_REGISTER_USAGE 1
88     #endif
89    
90 gbeauche 1.1 #ifdef WIN32
91     #undef write_log
92     #define write_log dummy_write_log
93     static void dummy_write_log(const char *, ...) { }
94     #endif
95    
96     #if JIT_DEBUG
97     #undef abort
98     #define abort() do { \
99     fprintf(stderr, "Abort in file %s at line %d\n", __FILE__, __LINE__); \
100     exit(EXIT_FAILURE); \
101     } while (0)
102     #endif
103    
104 gbeauche 1.28 #if RECORD_REGISTER_USAGE
105     static uint64 reg_count[16];
106     static int reg_count_local[16];
107    
108     static int reg_count_compare(const void *ap, const void *bp)
109     {
110     const int a = *((int *)ap);
111     const int b = *((int *)bp);
112     return reg_count[b] - reg_count[a];
113     }
114     #endif
115    
116 gbeauche 1.1 #if PROFILE_COMPILE_TIME
117     #include <time.h>
118     static uae_u32 compile_count = 0;
119     static clock_t compile_time = 0;
120     static clock_t emul_start_time = 0;
121     static clock_t emul_end_time = 0;
122     #endif
123    
124 gbeauche 1.9 #if PROFILE_UNTRANSLATED_INSNS
125     const int untranslated_top_ten = 20;
126     static uae_u32 raw_cputbl_count[65536] = { 0, };
127     static uae_u16 opcode_nums[65536];
128    
129     static int untranslated_compfn(const void *e1, const void *e2)
130     {
131     return raw_cputbl_count[*(const uae_u16 *)e1] < raw_cputbl_count[*(const uae_u16 *)e2];
132     }
133     #endif
134    
135 gbeauche 1.24 static compop_func *compfunctbl[65536];
136     static compop_func *nfcompfunctbl[65536];
137     static cpuop_func *nfcpufunctbl[65536];
138 gbeauche 1.1 uae_u8* comp_pc_p;
139    
140 gbeauche 1.26 // From main_unix.cpp
141     extern bool ThirtyThreeBitAddressing;
142    
143 gbeauche 1.6 // From newcpu.cpp
144     extern bool quit_program;
145    
146 gbeauche 1.1 // gb-- Extra data for Basilisk II/JIT
147     #if JIT_DEBUG
148     static bool JITDebug = false; // Enable runtime disassemblers through mon?
149     #else
150     const bool JITDebug = false; // Don't use JIT debug mode at all
151     #endif
152    
153 gbeauche 1.22 const uae_u32 MIN_CACHE_SIZE = 1024; // Minimal translation cache size (1 MB)
154 gbeauche 1.1 static uae_u32 cache_size = 0; // Size of total cache allocated for compiled blocks
155 gbeauche 1.3 static uae_u32 current_cache_size = 0; // Cache grows upwards: how much has been consumed already
156 gbeauche 1.1 static bool lazy_flush = true; // Flag: lazy translation cache invalidation
157     static bool avoid_fpu = true; // Flag: compile FPU instructions ?
158     static bool have_cmov = false; // target has CMOV instructions ?
159 gbeauche 1.30 static bool have_lahf_lm = true; // target has LAHF supported in long mode ?
160 gbeauche 1.1 static bool have_rat_stall = true; // target has partial register stalls ?
161 gbeauche 1.12 const bool tune_alignment = true; // Tune code alignments for running CPU ?
162     const bool tune_nop_fillers = true; // Tune no-op fillers for architecture
163 gbeauche 1.15 static bool setzflg_uses_bsf = false; // setzflg virtual instruction can use native BSF instruction correctly?
164 gbeauche 1.5 static int align_loops = 32; // Align the start of loops
165     static int align_jumps = 32; // Align the start of jumps
166 gbeauche 1.1 static int optcount[10] = {
167     10, // How often a block has to be executed before it is translated
168     0, // How often to use naive translation
169     0, 0, 0, 0,
170     -1, -1, -1, -1
171     };
172    
173     struct op_properties {
174     uae_u8 use_flags;
175     uae_u8 set_flags;
176     uae_u8 is_addx;
177     uae_u8 cflow;
178     };
179     static op_properties prop[65536];
180    
181     static inline int end_block(uae_u32 opcode)
182     {
183     return (prop[opcode].cflow & fl_end_block);
184     }
185    
186 gbeauche 1.8 static inline bool is_const_jump(uae_u32 opcode)
187     {
188     return (prop[opcode].cflow == fl_const_jump);
189     }
190    
191 gbeauche 1.18 static inline bool may_trap(uae_u32 opcode)
192     {
193     return (prop[opcode].cflow & fl_trap);
194     }
195    
196     static inline unsigned int cft_map (unsigned int f)
197     {
198     #ifndef HAVE_GET_WORD_UNSWAPPED
199     return f;
200     #else
201     return ((f >> 8) & 255) | ((f & 255) << 8);
202     #endif
203     }
204    
205 gbeauche 1.1 uae_u8* start_pc_p;
206     uae_u32 start_pc;
207     uae_u32 current_block_pc_p;
208 gbeauche 1.24 static uintptr current_block_start_target;
209 gbeauche 1.1 uae_u32 needed_flags;
210 gbeauche 1.24 static uintptr next_pc_p;
211     static uintptr taken_pc_p;
212 gbeauche 1.1 static int branch_cc;
213     static int redo_current_block;
214    
215     int segvcount=0;
216     int soft_flush_count=0;
217     int hard_flush_count=0;
218     int checksum_count=0;
219     static uae_u8* current_compile_p=NULL;
220     static uae_u8* max_compile_start;
221     static uae_u8* compiled_code=NULL;
222     static uae_s32 reg_alloc_run;
223 gbeauche 1.24 const int POPALLSPACE_SIZE = 1024; /* That should be enough space */
224     static uae_u8* popallspace=NULL;
225 gbeauche 1.1
226     void* pushall_call_handler=NULL;
227     static void* popall_do_nothing=NULL;
228     static void* popall_exec_nostats=NULL;
229     static void* popall_execute_normal=NULL;
230     static void* popall_cache_miss=NULL;
231     static void* popall_recompile_block=NULL;
232     static void* popall_check_checksum=NULL;
233    
234     /* The 68k only ever executes from even addresses. So right now, we
235     * waste half the entries in this array
236     * UPDATE: We now use those entries to store the start of the linked
237     * lists that we maintain for each hash result.
238     */
239     cacheline cache_tags[TAGSIZE];
240     int letit=0;
241     blockinfo* hold_bi[MAX_HOLD_BI];
242     blockinfo* active;
243     blockinfo* dormant;
244    
245     /* 68040 */
246     extern struct cputbl op_smalltbl_0_nf[];
247     extern struct comptbl op_smalltbl_0_comp_nf[];
248     extern struct comptbl op_smalltbl_0_comp_ff[];
249    
250     /* 68020 + 68881 */
251     extern struct cputbl op_smalltbl_1_nf[];
252    
253     /* 68020 */
254     extern struct cputbl op_smalltbl_2_nf[];
255    
256     /* 68010 */
257     extern struct cputbl op_smalltbl_3_nf[];
258    
259     /* 68000 */
260     extern struct cputbl op_smalltbl_4_nf[];
261    
262     /* 68000 slow but compatible. */
263     extern struct cputbl op_smalltbl_5_nf[];
264    
265     static void flush_icache_hard(int n);
266     static void flush_icache_lazy(int n);
267     static void flush_icache_none(int n);
268     void (*flush_icache)(int n) = flush_icache_none;
269    
270    
271    
272     bigstate live;
273     smallstate empty_ss;
274     smallstate default_ss;
275     static int optlev;
276    
277     static int writereg(int r, int size);
278     static void unlock2(int r);
279     static void setlock(int r);
280     static int readreg_specific(int r, int size, int spec);
281     static int writereg_specific(int r, int size, int spec);
282     static void prepare_for_call_1(void);
283     static void prepare_for_call_2(void);
284     static void align_target(uae_u32 a);
285    
286     static uae_s32 nextused[VREGS];
287    
288     uae_u32 m68k_pc_offset;
289    
290     /* Some arithmetic ooperations can be optimized away if the operands
291     * are known to be constant. But that's only a good idea when the
292     * side effects they would have on the flags are not important. This
293     * variable indicates whether we need the side effects or not
294     */
295     uae_u32 needflags=0;
296    
297     /* Flag handling is complicated.
298     *
299     * x86 instructions create flags, which quite often are exactly what we
300     * want. So at times, the "68k" flags are actually in the x86 flags.
301     *
302     * Then again, sometimes we do x86 instructions that clobber the x86
303     * flags, but don't represent a corresponding m68k instruction. In that
304     * case, we have to save them.
305     *
306     * We used to save them to the stack, but now store them back directly
307     * into the regflags.cznv of the traditional emulation. Thus some odd
308     * names.
309     *
310     * So flags can be in either of two places (used to be three; boy were
311     * things complicated back then!); And either place can contain either
312     * valid flags or invalid trash (and on the stack, there was also the
313     * option of "nothing at all", now gone). A couple of variables keep
314     * track of the respective states.
315     *
316     * To make things worse, we might or might not be interested in the flags.
317     * by default, we are, but a call to dont_care_flags can change that
318     * until the next call to live_flags. If we are not, pretty much whatever
319     * is in the register and/or the native flags is seen as valid.
320     */
321    
322     static __inline__ blockinfo* get_blockinfo(uae_u32 cl)
323     {
324     return cache_tags[cl+1].bi;
325     }
326    
327     static __inline__ blockinfo* get_blockinfo_addr(void* addr)
328     {
329     blockinfo* bi=get_blockinfo(cacheline(addr));
330    
331     while (bi) {
332     if (bi->pc_p==addr)
333     return bi;
334     bi=bi->next_same_cl;
335     }
336     return NULL;
337     }
338    
339    
340     /*******************************************************************
341     * All sorts of list related functions for all of the lists *
342     *******************************************************************/
343    
344     static __inline__ void remove_from_cl_list(blockinfo* bi)
345     {
346     uae_u32 cl=cacheline(bi->pc_p);
347    
348     if (bi->prev_same_cl_p)
349     *(bi->prev_same_cl_p)=bi->next_same_cl;
350     if (bi->next_same_cl)
351     bi->next_same_cl->prev_same_cl_p=bi->prev_same_cl_p;
352     if (cache_tags[cl+1].bi)
353     cache_tags[cl].handler=cache_tags[cl+1].bi->handler_to_use;
354     else
355     cache_tags[cl].handler=(cpuop_func *)popall_execute_normal;
356     }
357    
358     static __inline__ void remove_from_list(blockinfo* bi)
359     {
360     if (bi->prev_p)
361     *(bi->prev_p)=bi->next;
362     if (bi->next)
363     bi->next->prev_p=bi->prev_p;
364     }
365    
366     static __inline__ void remove_from_lists(blockinfo* bi)
367     {
368     remove_from_list(bi);
369     remove_from_cl_list(bi);
370     }
371    
372     static __inline__ void add_to_cl_list(blockinfo* bi)
373     {
374     uae_u32 cl=cacheline(bi->pc_p);
375    
376     if (cache_tags[cl+1].bi)
377     cache_tags[cl+1].bi->prev_same_cl_p=&(bi->next_same_cl);
378     bi->next_same_cl=cache_tags[cl+1].bi;
379    
380     cache_tags[cl+1].bi=bi;
381     bi->prev_same_cl_p=&(cache_tags[cl+1].bi);
382    
383     cache_tags[cl].handler=bi->handler_to_use;
384     }
385    
386     static __inline__ void raise_in_cl_list(blockinfo* bi)
387     {
388     remove_from_cl_list(bi);
389     add_to_cl_list(bi);
390     }
391    
392     static __inline__ void add_to_active(blockinfo* bi)
393     {
394     if (active)
395     active->prev_p=&(bi->next);
396     bi->next=active;
397    
398     active=bi;
399     bi->prev_p=&active;
400     }
401    
402     static __inline__ void add_to_dormant(blockinfo* bi)
403     {
404     if (dormant)
405     dormant->prev_p=&(bi->next);
406     bi->next=dormant;
407    
408     dormant=bi;
409     bi->prev_p=&dormant;
410     }
411    
412     static __inline__ void remove_dep(dependency* d)
413     {
414     if (d->prev_p)
415     *(d->prev_p)=d->next;
416     if (d->next)
417     d->next->prev_p=d->prev_p;
418     d->prev_p=NULL;
419     d->next=NULL;
420     }
421    
422     /* This block's code is about to be thrown away, so it no longer
423     depends on anything else */
424     static __inline__ void remove_deps(blockinfo* bi)
425     {
426     remove_dep(&(bi->dep[0]));
427     remove_dep(&(bi->dep[1]));
428     }
429    
430     static __inline__ void adjust_jmpdep(dependency* d, cpuop_func* a)
431     {
432     *(d->jmp_off)=(uintptr)a-((uintptr)d->jmp_off+4);
433     }
434    
435     /********************************************************************
436     * Soft flush handling support functions *
437     ********************************************************************/
438    
439     static __inline__ void set_dhtu(blockinfo* bi, cpuop_func* dh)
440     {
441     //write_log("bi is %p\n",bi);
442     if (dh!=bi->direct_handler_to_use) {
443     dependency* x=bi->deplist;
444     //write_log("bi->deplist=%p\n",bi->deplist);
445     while (x) {
446     //write_log("x is %p\n",x);
447     //write_log("x->next is %p\n",x->next);
448     //write_log("x->prev_p is %p\n",x->prev_p);
449    
450     if (x->jmp_off) {
451     adjust_jmpdep(x,dh);
452     }
453     x=x->next;
454     }
455     bi->direct_handler_to_use=dh;
456     }
457     }
458    
459     static __inline__ void invalidate_block(blockinfo* bi)
460     {
461     int i;
462    
463     bi->optlevel=0;
464     bi->count=optcount[0]-1;
465     bi->handler=NULL;
466     bi->handler_to_use=(cpuop_func *)popall_execute_normal;
467     bi->direct_handler=NULL;
468     set_dhtu(bi,bi->direct_pen);
469     bi->needed_flags=0xff;
470     bi->status=BI_INVALID;
471     for (i=0;i<2;i++) {
472     bi->dep[i].jmp_off=NULL;
473     bi->dep[i].target=NULL;
474     }
475     remove_deps(bi);
476     }
477    
478     static __inline__ void create_jmpdep(blockinfo* bi, int i, uae_u32* jmpaddr, uae_u32 target)
479     {
480 gbeauche 1.24 blockinfo* tbi=get_blockinfo_addr((void*)(uintptr)target);
481 gbeauche 1.1
482     Dif(!tbi) {
483     write_log("Could not create jmpdep!\n");
484     abort();
485     }
486     bi->dep[i].jmp_off=jmpaddr;
487     bi->dep[i].source=bi;
488     bi->dep[i].target=tbi;
489     bi->dep[i].next=tbi->deplist;
490     if (bi->dep[i].next)
491     bi->dep[i].next->prev_p=&(bi->dep[i].next);
492     bi->dep[i].prev_p=&(tbi->deplist);
493     tbi->deplist=&(bi->dep[i]);
494     }
495    
496     static __inline__ void block_need_recompile(blockinfo * bi)
497     {
498     uae_u32 cl = cacheline(bi->pc_p);
499    
500     set_dhtu(bi, bi->direct_pen);
501     bi->direct_handler = bi->direct_pen;
502    
503     bi->handler_to_use = (cpuop_func *)popall_execute_normal;
504     bi->handler = (cpuop_func *)popall_execute_normal;
505     if (bi == cache_tags[cl + 1].bi)
506     cache_tags[cl].handler = (cpuop_func *)popall_execute_normal;
507     bi->status = BI_NEED_RECOMP;
508     }
509    
510     static __inline__ void mark_callers_recompile(blockinfo * bi)
511     {
512     dependency *x = bi->deplist;
513    
514     while (x) {
515     dependency *next = x->next; /* This disappears when we mark for
516     * recompilation and thus remove the
517     * blocks from the lists */
518     if (x->jmp_off) {
519     blockinfo *cbi = x->source;
520    
521     Dif(cbi->status == BI_INVALID) {
522     // write_log("invalid block in dependency list\n"); // FIXME?
523     // abort();
524     }
525     if (cbi->status == BI_ACTIVE || cbi->status == BI_NEED_CHECK) {
526     block_need_recompile(cbi);
527     mark_callers_recompile(cbi);
528     }
529     else if (cbi->status == BI_COMPILING) {
530     redo_current_block = 1;
531     }
532     else if (cbi->status == BI_NEED_RECOMP) {
533     /* nothing */
534     }
535     else {
536     //write_log("Status %d in mark_callers\n",cbi->status); // FIXME?
537     }
538     }
539     x = next;
540     }
541     }
542    
543     static __inline__ blockinfo* get_blockinfo_addr_new(void* addr, int setstate)
544     {
545     blockinfo* bi=get_blockinfo_addr(addr);
546     int i;
547    
548     if (!bi) {
549     for (i=0;i<MAX_HOLD_BI && !bi;i++) {
550     if (hold_bi[i]) {
551     uae_u32 cl=cacheline(addr);
552    
553     bi=hold_bi[i];
554     hold_bi[i]=NULL;
555     bi->pc_p=(uae_u8 *)addr;
556     invalidate_block(bi);
557     add_to_active(bi);
558     add_to_cl_list(bi);
559    
560     }
561     }
562     }
563     if (!bi) {
564     write_log("Looking for blockinfo, can't find free one\n");
565     abort();
566     }
567     return bi;
568     }
569    
570     static void prepare_block(blockinfo* bi);
571    
572     /* Managment of blockinfos.
573    
574     A blockinfo struct is allocated whenever a new block has to be
575     compiled. If the list of free blockinfos is empty, we allocate a new
576     pool of blockinfos and link the newly created blockinfos altogether
577     into the list of free blockinfos. Otherwise, we simply pop a structure
578 gbeauche 1.7 off the free list.
579 gbeauche 1.1
580     Blockinfo are lazily deallocated, i.e. chained altogether in the
581     list of free blockinfos whenvever a translation cache flush (hard or
582     soft) request occurs.
583     */
584    
585 gbeauche 1.7 template< class T >
586     class LazyBlockAllocator
587     {
588     enum {
589     kPoolSize = 1 + 4096 / sizeof(T)
590     };
591     struct Pool {
592     T chunk[kPoolSize];
593     Pool * next;
594     };
595     Pool * mPools;
596     T * mChunks;
597     public:
598     LazyBlockAllocator() : mPools(0), mChunks(0) { }
599     ~LazyBlockAllocator();
600     T * acquire();
601     void release(T * const);
602 gbeauche 1.1 };
603    
604 gbeauche 1.7 template< class T >
605     LazyBlockAllocator<T>::~LazyBlockAllocator()
606 gbeauche 1.1 {
607 gbeauche 1.7 Pool * currentPool = mPools;
608     while (currentPool) {
609     Pool * deadPool = currentPool;
610     currentPool = currentPool->next;
611     free(deadPool);
612     }
613     }
614    
615     template< class T >
616     T * LazyBlockAllocator<T>::acquire()
617     {
618     if (!mChunks) {
619     // There is no chunk left, allocate a new pool and link the
620     // chunks into the free list
621     Pool * newPool = (Pool *)malloc(sizeof(Pool));
622     for (T * chunk = &newPool->chunk[0]; chunk < &newPool->chunk[kPoolSize]; chunk++) {
623     chunk->next = mChunks;
624     mChunks = chunk;
625 gbeauche 1.1 }
626 gbeauche 1.7 newPool->next = mPools;
627     mPools = newPool;
628     }
629     T * chunk = mChunks;
630     mChunks = chunk->next;
631     return chunk;
632     }
633    
634     template< class T >
635     void LazyBlockAllocator<T>::release(T * const chunk)
636     {
637     chunk->next = mChunks;
638     mChunks = chunk;
639     }
640    
641     template< class T >
642     class HardBlockAllocator
643     {
644     public:
645     T * acquire() {
646     T * data = (T *)current_compile_p;
647     current_compile_p += sizeof(T);
648     return data;
649 gbeauche 1.1 }
650 gbeauche 1.7
651     void release(T * const chunk) {
652     // Deallocated on invalidation
653     }
654     };
655    
656     #if USE_SEPARATE_BIA
657     static LazyBlockAllocator<blockinfo> BlockInfoAllocator;
658     static LazyBlockAllocator<checksum_info> ChecksumInfoAllocator;
659 gbeauche 1.1 #else
660 gbeauche 1.7 static HardBlockAllocator<blockinfo> BlockInfoAllocator;
661     static HardBlockAllocator<checksum_info> ChecksumInfoAllocator;
662 gbeauche 1.1 #endif
663    
664 gbeauche 1.8 static __inline__ checksum_info *alloc_checksum_info(void)
665     {
666     checksum_info *csi = ChecksumInfoAllocator.acquire();
667     csi->next = NULL;
668     return csi;
669     }
670    
671     static __inline__ void free_checksum_info(checksum_info *csi)
672     {
673     csi->next = NULL;
674     ChecksumInfoAllocator.release(csi);
675     }
676    
677     static __inline__ void free_checksum_info_chain(checksum_info *csi)
678     {
679     while (csi != NULL) {
680     checksum_info *csi2 = csi->next;
681     free_checksum_info(csi);
682     csi = csi2;
683     }
684     }
685 gbeauche 1.7
686     static __inline__ blockinfo *alloc_blockinfo(void)
687 gbeauche 1.1 {
688 gbeauche 1.7 blockinfo *bi = BlockInfoAllocator.acquire();
689     #if USE_CHECKSUM_INFO
690     bi->csi = NULL;
691 gbeauche 1.1 #endif
692 gbeauche 1.7 return bi;
693 gbeauche 1.1 }
694    
695 gbeauche 1.7 static __inline__ void free_blockinfo(blockinfo *bi)
696 gbeauche 1.1 {
697 gbeauche 1.7 #if USE_CHECKSUM_INFO
698 gbeauche 1.8 free_checksum_info_chain(bi->csi);
699     bi->csi = NULL;
700 gbeauche 1.1 #endif
701 gbeauche 1.7 BlockInfoAllocator.release(bi);
702 gbeauche 1.1 }
703    
704     static __inline__ void alloc_blockinfos(void)
705     {
706     int i;
707     blockinfo* bi;
708    
709     for (i=0;i<MAX_HOLD_BI;i++) {
710     if (hold_bi[i])
711     return;
712     bi=hold_bi[i]=alloc_blockinfo();
713     prepare_block(bi);
714     }
715     }
716    
717     /********************************************************************
718     * Functions to emit data into memory, and other general support *
719     ********************************************************************/
720    
721     static uae_u8* target;
722    
723     static void emit_init(void)
724     {
725     }
726    
727     static __inline__ void emit_byte(uae_u8 x)
728     {
729     *target++=x;
730     }
731    
732     static __inline__ void emit_word(uae_u16 x)
733     {
734     *((uae_u16*)target)=x;
735     target+=2;
736     }
737    
738     static __inline__ void emit_long(uae_u32 x)
739     {
740     *((uae_u32*)target)=x;
741     target+=4;
742     }
743    
744 gbeauche 1.24 static __inline__ void emit_quad(uae_u64 x)
745     {
746     *((uae_u64*)target)=x;
747     target+=8;
748     }
749    
750 gbeauche 1.12 static __inline__ void emit_block(const uae_u8 *block, uae_u32 blocklen)
751     {
752     memcpy((uae_u8 *)target,block,blocklen);
753     target+=blocklen;
754     }
755    
756 gbeauche 1.1 static __inline__ uae_u32 reverse32(uae_u32 v)
757     {
758     #if 1
759     // gb-- We have specialized byteswapping functions, just use them
760     return do_byteswap_32(v);
761     #else
762     return ((v>>24)&0xff) | ((v>>8)&0xff00) | ((v<<8)&0xff0000) | ((v<<24)&0xff000000);
763     #endif
764     }
765    
766     /********************************************************************
767     * Getting the information about the target CPU *
768     ********************************************************************/
769    
770     #include "codegen_x86.cpp"
771    
772     void set_target(uae_u8* t)
773     {
774     target=t;
775     }
776    
777     static __inline__ uae_u8* get_target_noopt(void)
778     {
779     return target;
780     }
781    
782     __inline__ uae_u8* get_target(void)
783     {
784     return get_target_noopt();
785     }
786    
787    
788     /********************************************************************
789     * Flags status handling. EMIT TIME! *
790     ********************************************************************/
791    
792     static void bt_l_ri_noclobber(R4 r, IMM i);
793    
794     static void make_flags_live_internal(void)
795     {
796     if (live.flags_in_flags==VALID)
797     return;
798     Dif (live.flags_on_stack==TRASH) {
799     write_log("Want flags, got something on stack, but it is TRASH\n");
800     abort();
801     }
802     if (live.flags_on_stack==VALID) {
803     int tmp;
804     tmp=readreg_specific(FLAGTMP,4,FLAG_NREG2);
805     raw_reg_to_flags(tmp);
806     unlock2(tmp);
807    
808     live.flags_in_flags=VALID;
809     return;
810     }
811     write_log("Huh? live.flags_in_flags=%d, live.flags_on_stack=%d, but need to make live\n",
812     live.flags_in_flags,live.flags_on_stack);
813     abort();
814     }
815    
816     static void flags_to_stack(void)
817     {
818     if (live.flags_on_stack==VALID)
819     return;
820     if (!live.flags_are_important) {
821     live.flags_on_stack=VALID;
822     return;
823     }
824     Dif (live.flags_in_flags!=VALID)
825     abort();
826     else {
827     int tmp;
828     tmp=writereg_specific(FLAGTMP,4,FLAG_NREG1);
829     raw_flags_to_reg(tmp);
830     unlock2(tmp);
831     }
832     live.flags_on_stack=VALID;
833     }
834    
835     static __inline__ void clobber_flags(void)
836     {
837     if (live.flags_in_flags==VALID && live.flags_on_stack!=VALID)
838     flags_to_stack();
839     live.flags_in_flags=TRASH;
840     }
841    
842     /* Prepare for leaving the compiled stuff */
843     static __inline__ void flush_flags(void)
844     {
845     flags_to_stack();
846     return;
847     }
848    
849     int touchcnt;
850    
851     /********************************************************************
852 gbeauche 1.18 * Partial register flushing for optimized calls *
853     ********************************************************************/
854    
855     struct regusage {
856     uae_u16 rmask;
857     uae_u16 wmask;
858     };
859    
860     static inline void ru_set(uae_u16 *mask, int reg)
861     {
862     #if USE_OPTIMIZED_CALLS
863     *mask |= 1 << reg;
864     #endif
865     }
866    
867     static inline bool ru_get(const uae_u16 *mask, int reg)
868     {
869     #if USE_OPTIMIZED_CALLS
870     return (*mask & (1 << reg));
871     #else
872     /* Default: instruction reads & write to register */
873     return true;
874     #endif
875     }
876    
877     static inline void ru_set_read(regusage *ru, int reg)
878     {
879     ru_set(&ru->rmask, reg);
880     }
881    
882     static inline void ru_set_write(regusage *ru, int reg)
883     {
884     ru_set(&ru->wmask, reg);
885     }
886    
887     static inline bool ru_read_p(const regusage *ru, int reg)
888     {
889     return ru_get(&ru->rmask, reg);
890     }
891    
892     static inline bool ru_write_p(const regusage *ru, int reg)
893     {
894     return ru_get(&ru->wmask, reg);
895     }
896    
897     static void ru_fill_ea(regusage *ru, int reg, amodes mode,
898     wordsizes size, int write_mode)
899     {
900     switch (mode) {
901     case Areg:
902     reg += 8;
903     /* fall through */
904     case Dreg:
905     ru_set(write_mode ? &ru->wmask : &ru->rmask, reg);
906     break;
907     case Ad16:
908     /* skip displacment */
909     m68k_pc_offset += 2;
910     case Aind:
911     case Aipi:
912     case Apdi:
913     ru_set_read(ru, reg+8);
914     break;
915     case Ad8r:
916     ru_set_read(ru, reg+8);
917     /* fall through */
918     case PC8r: {
919     uae_u16 dp = comp_get_iword((m68k_pc_offset+=2)-2);
920     reg = (dp >> 12) & 15;
921     ru_set_read(ru, reg);
922     if (dp & 0x100)
923     m68k_pc_offset += (((dp & 0x30) >> 3) & 7) + ((dp & 3) * 2);
924     break;
925     }
926     case PC16:
927     case absw:
928     case imm0:
929     case imm1:
930     m68k_pc_offset += 2;
931     break;
932     case absl:
933     case imm2:
934     m68k_pc_offset += 4;
935     break;
936     case immi:
937     m68k_pc_offset += (size == sz_long) ? 4 : 2;
938     break;
939     }
940     }
941    
942     /* TODO: split into a static initialization part and a dynamic one
943     (instructions depending on extension words) */
944     static void ru_fill(regusage *ru, uae_u32 opcode)
945     {
946     m68k_pc_offset += 2;
947    
948     /* Default: no register is used or written to */
949     ru->rmask = 0;
950     ru->wmask = 0;
951    
952     uae_u32 real_opcode = cft_map(opcode);
953     struct instr *dp = &table68k[real_opcode];
954    
955     bool rw_dest = true;
956     bool handled = false;
957    
958     /* Handle some instructions specifically */
959     uae_u16 reg, ext;
960     switch (dp->mnemo) {
961     case i_BFCHG:
962     case i_BFCLR:
963     case i_BFEXTS:
964     case i_BFEXTU:
965     case i_BFFFO:
966     case i_BFINS:
967     case i_BFSET:
968     case i_BFTST:
969     ext = comp_get_iword((m68k_pc_offset+=2)-2);
970     if (ext & 0x800) ru_set_read(ru, (ext >> 6) & 7);
971     if (ext & 0x020) ru_set_read(ru, ext & 7);
972     ru_fill_ea(ru, dp->dreg, (amodes)dp->dmode, (wordsizes)dp->size, 1);
973     if (dp->dmode == Dreg)
974     ru_set_read(ru, dp->dreg);
975     switch (dp->mnemo) {
976     case i_BFEXTS:
977     case i_BFEXTU:
978     case i_BFFFO:
979     ru_set_write(ru, (ext >> 12) & 7);
980     break;
981     case i_BFINS:
982     ru_set_read(ru, (ext >> 12) & 7);
983     /* fall through */
984     case i_BFCHG:
985     case i_BFCLR:
986     case i_BSET:
987     if (dp->dmode == Dreg)
988     ru_set_write(ru, dp->dreg);
989     break;
990     }
991     handled = true;
992     rw_dest = false;
993     break;
994    
995     case i_BTST:
996     rw_dest = false;
997     break;
998    
999     case i_CAS:
1000     {
1001     ext = comp_get_iword((m68k_pc_offset+=2)-2);
1002     int Du = ext & 7;
1003     ru_set_read(ru, Du);
1004     int Dc = (ext >> 6) & 7;
1005     ru_set_read(ru, Dc);
1006     ru_set_write(ru, Dc);
1007     break;
1008     }
1009     case i_CAS2:
1010     {
1011     int Dc1, Dc2, Du1, Du2, Rn1, Rn2;
1012     ext = comp_get_iword((m68k_pc_offset+=2)-2);
1013     Rn1 = (ext >> 12) & 15;
1014     Du1 = (ext >> 6) & 7;
1015     Dc1 = ext & 7;
1016     ru_set_read(ru, Rn1);
1017     ru_set_read(ru, Du1);
1018     ru_set_read(ru, Dc1);
1019     ru_set_write(ru, Dc1);
1020     ext = comp_get_iword((m68k_pc_offset+=2)-2);
1021     Rn2 = (ext >> 12) & 15;
1022     Du2 = (ext >> 6) & 7;
1023     Dc2 = ext & 7;
1024     ru_set_read(ru, Rn2);
1025     ru_set_read(ru, Du2);
1026     ru_set_write(ru, Dc2);
1027     break;
1028     }
1029     case i_DIVL: case i_MULL:
1030     m68k_pc_offset += 2;
1031     break;
1032     case i_LEA:
1033     case i_MOVE: case i_MOVEA: case i_MOVE16:
1034     rw_dest = false;
1035     break;
1036     case i_PACK: case i_UNPK:
1037     rw_dest = false;
1038     m68k_pc_offset += 2;
1039     break;
1040     case i_TRAPcc:
1041     m68k_pc_offset += (dp->size == sz_long) ? 4 : 2;
1042     break;
1043     case i_RTR:
1044     /* do nothing, just for coverage debugging */
1045     break;
1046     /* TODO: handle EXG instruction */
1047     }
1048    
1049     /* Handle A-Traps better */
1050     if ((real_opcode & 0xf000) == 0xa000) {
1051     handled = true;
1052     }
1053    
1054     /* Handle EmulOps better */
1055     if ((real_opcode & 0xff00) == 0x7100) {
1056     handled = true;
1057     ru->rmask = 0xffff;
1058     ru->wmask = 0;
1059     }
1060    
1061     if (dp->suse && !handled)
1062     ru_fill_ea(ru, dp->sreg, (amodes)dp->smode, (wordsizes)dp->size, 0);
1063    
1064     if (dp->duse && !handled)
1065     ru_fill_ea(ru, dp->dreg, (amodes)dp->dmode, (wordsizes)dp->size, 1);
1066    
1067     if (rw_dest)
1068     ru->rmask |= ru->wmask;
1069    
1070     handled = handled || dp->suse || dp->duse;
1071    
1072     /* Mark all registers as used/written if the instruction may trap */
1073     if (may_trap(opcode)) {
1074     handled = true;
1075     ru->rmask = 0xffff;
1076     ru->wmask = 0xffff;
1077     }
1078    
1079     if (!handled) {
1080     write_log("ru_fill: %04x = { %04x, %04x }\n",
1081     real_opcode, ru->rmask, ru->wmask);
1082     abort();
1083     }
1084     }
1085    
1086     /********************************************************************
1087 gbeauche 1.1 * register allocation per block logging *
1088     ********************************************************************/
1089    
1090     static uae_s8 vstate[VREGS];
1091     static uae_s8 vwritten[VREGS];
1092     static uae_s8 nstate[N_REGS];
1093    
1094     #define L_UNKNOWN -127
1095     #define L_UNAVAIL -1
1096     #define L_NEEDED -2
1097     #define L_UNNEEDED -3
1098    
1099     static __inline__ void big_to_small_state(bigstate * b, smallstate * s)
1100     {
1101     int i;
1102    
1103     for (i = 0; i < VREGS; i++)
1104     s->virt[i] = vstate[i];
1105     for (i = 0; i < N_REGS; i++)
1106     s->nat[i] = nstate[i];
1107     }
1108    
1109     static __inline__ int callers_need_recompile(bigstate * b, smallstate * s)
1110     {
1111     int i;
1112     int reverse = 0;
1113    
1114     for (i = 0; i < VREGS; i++) {
1115     if (vstate[i] != L_UNNEEDED && s->virt[i] == L_UNNEEDED)
1116     return 1;
1117     if (vstate[i] == L_UNNEEDED && s->virt[i] != L_UNNEEDED)
1118     reverse++;
1119     }
1120     for (i = 0; i < N_REGS; i++) {
1121     if (nstate[i] >= 0 && nstate[i] != s->nat[i])
1122     return 1;
1123     if (nstate[i] < 0 && s->nat[i] >= 0)
1124     reverse++;
1125     }
1126     if (reverse >= 2 && USE_MATCH)
1127     return 1; /* In this case, it might be worth recompiling the
1128     * callers */
1129     return 0;
1130     }
1131    
1132     static __inline__ void log_startblock(void)
1133     {
1134     int i;
1135    
1136     for (i = 0; i < VREGS; i++) {
1137     vstate[i] = L_UNKNOWN;
1138     vwritten[i] = 0;
1139     }
1140     for (i = 0; i < N_REGS; i++)
1141     nstate[i] = L_UNKNOWN;
1142     }
1143    
1144     /* Using an n-reg for a temp variable */
1145     static __inline__ void log_isused(int n)
1146     {
1147     if (nstate[n] == L_UNKNOWN)
1148     nstate[n] = L_UNAVAIL;
1149     }
1150    
1151     static __inline__ void log_visused(int r)
1152     {
1153     if (vstate[r] == L_UNKNOWN)
1154     vstate[r] = L_NEEDED;
1155     }
1156    
1157     static __inline__ void do_load_reg(int n, int r)
1158     {
1159     if (r == FLAGTMP)
1160     raw_load_flagreg(n, r);
1161     else if (r == FLAGX)
1162     raw_load_flagx(n, r);
1163     else
1164 gbeauche 1.24 raw_mov_l_rm(n, (uintptr) live.state[r].mem);
1165 gbeauche 1.1 }
1166    
1167     static __inline__ void check_load_reg(int n, int r)
1168     {
1169 gbeauche 1.24 raw_mov_l_rm(n, (uintptr) live.state[r].mem);
1170 gbeauche 1.1 }
1171    
1172     static __inline__ void log_vwrite(int r)
1173     {
1174     vwritten[r] = 1;
1175     }
1176    
1177     /* Using an n-reg to hold a v-reg */
1178     static __inline__ void log_isreg(int n, int r)
1179     {
1180     static int count = 0;
1181    
1182     if (nstate[n] == L_UNKNOWN && r < 16 && !vwritten[r] && USE_MATCH)
1183     nstate[n] = r;
1184     else {
1185     do_load_reg(n, r);
1186     if (nstate[n] == L_UNKNOWN)
1187     nstate[n] = L_UNAVAIL;
1188     }
1189     if (vstate[r] == L_UNKNOWN)
1190     vstate[r] = L_NEEDED;
1191     }
1192    
1193     static __inline__ void log_clobberreg(int r)
1194     {
1195     if (vstate[r] == L_UNKNOWN)
1196     vstate[r] = L_UNNEEDED;
1197     }
1198    
1199     /* This ends all possibility of clever register allocation */
1200    
1201     static __inline__ void log_flush(void)
1202     {
1203     int i;
1204    
1205     for (i = 0; i < VREGS; i++)
1206     if (vstate[i] == L_UNKNOWN)
1207     vstate[i] = L_NEEDED;
1208     for (i = 0; i < N_REGS; i++)
1209     if (nstate[i] == L_UNKNOWN)
1210     nstate[i] = L_UNAVAIL;
1211     }
1212    
1213     static __inline__ void log_dump(void)
1214     {
1215     int i;
1216    
1217     return;
1218    
1219     write_log("----------------------\n");
1220     for (i = 0; i < N_REGS; i++) {
1221     switch (nstate[i]) {
1222     case L_UNKNOWN:
1223     write_log("Nat %d : UNKNOWN\n", i);
1224     break;
1225     case L_UNAVAIL:
1226     write_log("Nat %d : UNAVAIL\n", i);
1227     break;
1228     default:
1229     write_log("Nat %d : %d\n", i, nstate[i]);
1230     break;
1231     }
1232     }
1233     for (i = 0; i < VREGS; i++) {
1234     if (vstate[i] == L_UNNEEDED)
1235     write_log("Virt %d: UNNEEDED\n", i);
1236     }
1237     }
1238    
1239     /********************************************************************
1240     * register status handling. EMIT TIME! *
1241     ********************************************************************/
1242    
1243     static __inline__ void set_status(int r, int status)
1244     {
1245     if (status == ISCONST)
1246     log_clobberreg(r);
1247     live.state[r].status=status;
1248     }
1249    
1250     static __inline__ int isinreg(int r)
1251     {
1252     return live.state[r].status==CLEAN || live.state[r].status==DIRTY;
1253     }
1254    
1255     static __inline__ void adjust_nreg(int r, uae_u32 val)
1256     {
1257     if (!val)
1258     return;
1259     raw_lea_l_brr(r,r,val);
1260     }
1261    
1262     static void tomem(int r)
1263     {
1264     int rr=live.state[r].realreg;
1265    
1266     if (isinreg(r)) {
1267     if (live.state[r].val && live.nat[rr].nholds==1
1268     && !live.nat[rr].locked) {
1269     // write_log("RemovingA offset %x from reg %d (%d) at %p\n",
1270     // live.state[r].val,r,rr,target);
1271     adjust_nreg(rr,live.state[r].val);
1272     live.state[r].val=0;
1273     live.state[r].dirtysize=4;
1274     set_status(r,DIRTY);
1275     }
1276     }
1277    
1278     if (live.state[r].status==DIRTY) {
1279     switch (live.state[r].dirtysize) {
1280 gbeauche 1.24 case 1: raw_mov_b_mr((uintptr)live.state[r].mem,rr); break;
1281     case 2: raw_mov_w_mr((uintptr)live.state[r].mem,rr); break;
1282     case 4: raw_mov_l_mr((uintptr)live.state[r].mem,rr); break;
1283 gbeauche 1.1 default: abort();
1284     }
1285     log_vwrite(r);
1286     set_status(r,CLEAN);
1287     live.state[r].dirtysize=0;
1288     }
1289     }
1290    
1291     static __inline__ int isconst(int r)
1292     {
1293     return live.state[r].status==ISCONST;
1294     }
1295    
1296     int is_const(int r)
1297     {
1298     return isconst(r);
1299     }
1300    
1301     static __inline__ void writeback_const(int r)
1302     {
1303     if (!isconst(r))
1304     return;
1305     Dif (live.state[r].needflush==NF_HANDLER) {
1306     write_log("Trying to write back constant NF_HANDLER!\n");
1307     abort();
1308     }
1309    
1310 gbeauche 1.24 raw_mov_l_mi((uintptr)live.state[r].mem,live.state[r].val);
1311 gbeauche 1.1 log_vwrite(r);
1312     live.state[r].val=0;
1313     set_status(r,INMEM);
1314     }
1315    
1316     static __inline__ void tomem_c(int r)
1317     {
1318     if (isconst(r)) {
1319     writeback_const(r);
1320     }
1321     else
1322     tomem(r);
1323     }
1324    
1325     static void evict(int r)
1326     {
1327     int rr;
1328    
1329     if (!isinreg(r))
1330     return;
1331     tomem(r);
1332     rr=live.state[r].realreg;
1333    
1334     Dif (live.nat[rr].locked &&
1335     live.nat[rr].nholds==1) {
1336     write_log("register %d in nreg %d is locked!\n",r,live.state[r].realreg);
1337     abort();
1338     }
1339    
1340     live.nat[rr].nholds--;
1341     if (live.nat[rr].nholds!=live.state[r].realind) { /* Was not last */
1342     int topreg=live.nat[rr].holds[live.nat[rr].nholds];
1343     int thisind=live.state[r].realind;
1344    
1345     live.nat[rr].holds[thisind]=topreg;
1346     live.state[topreg].realind=thisind;
1347     }
1348     live.state[r].realreg=-1;
1349     set_status(r,INMEM);
1350     }
1351    
1352     static __inline__ void free_nreg(int r)
1353     {
1354     int i=live.nat[r].nholds;
1355    
1356     while (i) {
1357     int vr;
1358    
1359     --i;
1360     vr=live.nat[r].holds[i];
1361     evict(vr);
1362     }
1363     Dif (live.nat[r].nholds!=0) {
1364     write_log("Failed to free nreg %d, nholds is %d\n",r,live.nat[r].nholds);
1365     abort();
1366     }
1367     }
1368    
1369     /* Use with care! */
1370     static __inline__ void isclean(int r)
1371     {
1372     if (!isinreg(r))
1373     return;
1374     live.state[r].validsize=4;
1375     live.state[r].dirtysize=0;
1376     live.state[r].val=0;
1377     set_status(r,CLEAN);
1378     }
1379    
1380     static __inline__ void disassociate(int r)
1381     {
1382     isclean(r);
1383     evict(r);
1384     }
1385    
1386     static __inline__ void set_const(int r, uae_u32 val)
1387     {
1388     disassociate(r);
1389     live.state[r].val=val;
1390     set_status(r,ISCONST);
1391     }
1392    
1393     static __inline__ uae_u32 get_offset(int r)
1394     {
1395     return live.state[r].val;
1396     }
1397    
1398     static int alloc_reg_hinted(int r, int size, int willclobber, int hint)
1399     {
1400     int bestreg;
1401     uae_s32 when;
1402     int i;
1403     uae_s32 badness=0; /* to shut up gcc */
1404     bestreg=-1;
1405     when=2000000000;
1406    
1407     for (i=N_REGS;i--;) {
1408     badness=live.nat[i].touched;
1409     if (live.nat[i].nholds==0)
1410     badness=0;
1411     if (i==hint)
1412     badness-=200000000;
1413     if (!live.nat[i].locked && badness<when) {
1414     if ((size==1 && live.nat[i].canbyte) ||
1415     (size==2 && live.nat[i].canword) ||
1416     (size==4)) {
1417     bestreg=i;
1418     when=badness;
1419     if (live.nat[i].nholds==0 && hint<0)
1420     break;
1421     if (i==hint)
1422     break;
1423     }
1424     }
1425     }
1426     Dif (bestreg==-1)
1427     abort();
1428    
1429     if (live.nat[bestreg].nholds>0) {
1430     free_nreg(bestreg);
1431     }
1432     if (isinreg(r)) {
1433     int rr=live.state[r].realreg;
1434     /* This will happen if we read a partially dirty register at a
1435     bigger size */
1436     Dif (willclobber || live.state[r].validsize>=size)
1437     abort();
1438     Dif (live.nat[rr].nholds!=1)
1439     abort();
1440     if (size==4 && live.state[r].validsize==2) {
1441     log_isused(bestreg);
1442     log_visused(r);
1443 gbeauche 1.24 raw_mov_l_rm(bestreg,(uintptr)live.state[r].mem);
1444 gbeauche 1.1 raw_bswap_32(bestreg);
1445     raw_zero_extend_16_rr(rr,rr);
1446     raw_zero_extend_16_rr(bestreg,bestreg);
1447     raw_bswap_32(bestreg);
1448     raw_lea_l_brr_indexed(rr,rr,bestreg,1,0);
1449     live.state[r].validsize=4;
1450     live.nat[rr].touched=touchcnt++;
1451     return rr;
1452     }
1453     if (live.state[r].validsize==1) {
1454     /* Nothing yet */
1455     }
1456     evict(r);
1457     }
1458    
1459     if (!willclobber) {
1460     if (live.state[r].status!=UNDEF) {
1461     if (isconst(r)) {
1462     raw_mov_l_ri(bestreg,live.state[r].val);
1463     live.state[r].val=0;
1464     live.state[r].dirtysize=4;
1465     set_status(r,DIRTY);
1466     log_isused(bestreg);
1467     }
1468     else {
1469     log_isreg(bestreg, r); /* This will also load it! */
1470     live.state[r].dirtysize=0;
1471     set_status(r,CLEAN);
1472     }
1473     }
1474     else {
1475     live.state[r].val=0;
1476     live.state[r].dirtysize=0;
1477     set_status(r,CLEAN);
1478     log_isused(bestreg);
1479     }
1480     live.state[r].validsize=4;
1481     }
1482     else { /* this is the easiest way, but not optimal. FIXME! */
1483     /* Now it's trickier, but hopefully still OK */
1484     if (!isconst(r) || size==4) {
1485     live.state[r].validsize=size;
1486     live.state[r].dirtysize=size;
1487     live.state[r].val=0;
1488     set_status(r,DIRTY);
1489     if (size == 4) {
1490     log_clobberreg(r);
1491     log_isused(bestreg);
1492     }
1493     else {
1494     log_visused(r);
1495     log_isused(bestreg);
1496     }
1497     }
1498     else {
1499     if (live.state[r].status!=UNDEF)
1500     raw_mov_l_ri(bestreg,live.state[r].val);
1501     live.state[r].val=0;
1502     live.state[r].validsize=4;
1503     live.state[r].dirtysize=4;
1504     set_status(r,DIRTY);
1505     log_isused(bestreg);
1506     }
1507     }
1508     live.state[r].realreg=bestreg;
1509     live.state[r].realind=live.nat[bestreg].nholds;
1510     live.nat[bestreg].touched=touchcnt++;
1511     live.nat[bestreg].holds[live.nat[bestreg].nholds]=r;
1512     live.nat[bestreg].nholds++;
1513    
1514     return bestreg;
1515     }
1516    
1517     static int alloc_reg(int r, int size, int willclobber)
1518     {
1519     return alloc_reg_hinted(r,size,willclobber,-1);
1520     }
1521    
1522     static void unlock2(int r)
1523     {
1524     Dif (!live.nat[r].locked)
1525     abort();
1526     live.nat[r].locked--;
1527     }
1528    
1529     static void setlock(int r)
1530     {
1531     live.nat[r].locked++;
1532     }
1533    
1534    
1535     static void mov_nregs(int d, int s)
1536     {
1537     int ns=live.nat[s].nholds;
1538     int nd=live.nat[d].nholds;
1539     int i;
1540    
1541     if (s==d)
1542     return;
1543    
1544     if (nd>0)
1545     free_nreg(d);
1546    
1547     log_isused(d);
1548     raw_mov_l_rr(d,s);
1549    
1550     for (i=0;i<live.nat[s].nholds;i++) {
1551     int vs=live.nat[s].holds[i];
1552    
1553     live.state[vs].realreg=d;
1554     live.state[vs].realind=i;
1555     live.nat[d].holds[i]=vs;
1556     }
1557     live.nat[d].nholds=live.nat[s].nholds;
1558    
1559     live.nat[s].nholds=0;
1560     }
1561    
1562    
1563     static __inline__ void make_exclusive(int r, int size, int spec)
1564     {
1565     int clobber;
1566     reg_status oldstate;
1567     int rr=live.state[r].realreg;
1568     int nr;
1569     int nind;
1570     int ndirt=0;
1571     int i;
1572    
1573     if (!isinreg(r))
1574     return;
1575     if (live.nat[rr].nholds==1)
1576     return;
1577     for (i=0;i<live.nat[rr].nholds;i++) {
1578     int vr=live.nat[rr].holds[i];
1579     if (vr!=r &&
1580     (live.state[vr].status==DIRTY || live.state[vr].val))
1581     ndirt++;
1582     }
1583     if (!ndirt && size<live.state[r].validsize && !live.nat[rr].locked) {
1584     /* Everything else is clean, so let's keep this register */
1585     for (i=0;i<live.nat[rr].nholds;i++) {
1586     int vr=live.nat[rr].holds[i];
1587     if (vr!=r) {
1588     evict(vr);
1589     i--; /* Try that index again! */
1590     }
1591     }
1592     Dif (live.nat[rr].nholds!=1) {
1593     write_log("natreg %d holds %d vregs, %d not exclusive\n",
1594     rr,live.nat[rr].nholds,r);
1595     abort();
1596     }
1597     return;
1598     }
1599    
1600     /* We have to split the register */
1601     oldstate=live.state[r];
1602    
1603     setlock(rr); /* Make sure this doesn't go away */
1604     /* Forget about r being in the register rr */
1605     disassociate(r);
1606     /* Get a new register, that we will clobber completely */
1607     if (oldstate.status==DIRTY) {
1608     /* If dirtysize is <4, we need a register that can handle the
1609     eventual smaller memory store! Thanks to Quake68k for exposing
1610     this detail ;-) */
1611     nr=alloc_reg_hinted(r,oldstate.dirtysize,1,spec);
1612     }
1613     else {
1614     nr=alloc_reg_hinted(r,4,1,spec);
1615     }
1616     nind=live.state[r].realind;
1617     live.state[r]=oldstate; /* Keep all the old state info */
1618     live.state[r].realreg=nr;
1619     live.state[r].realind=nind;
1620    
1621     if (size<live.state[r].validsize) {
1622     if (live.state[r].val) {
1623     /* Might as well compensate for the offset now */
1624     raw_lea_l_brr(nr,rr,oldstate.val);
1625     live.state[r].val=0;
1626     live.state[r].dirtysize=4;
1627     set_status(r,DIRTY);
1628     }
1629     else
1630     raw_mov_l_rr(nr,rr); /* Make another copy */
1631     }
1632     unlock2(rr);
1633     }
1634    
1635     static __inline__ void add_offset(int r, uae_u32 off)
1636     {
1637     live.state[r].val+=off;
1638     }
1639    
1640     static __inline__ void remove_offset(int r, int spec)
1641     {
1642     reg_status oldstate;
1643     int rr;
1644    
1645     if (isconst(r))
1646     return;
1647     if (live.state[r].val==0)
1648     return;
1649     if (isinreg(r) && live.state[r].validsize<4)
1650     evict(r);
1651    
1652     if (!isinreg(r))
1653     alloc_reg_hinted(r,4,0,spec);
1654    
1655     Dif (live.state[r].validsize!=4) {
1656     write_log("Validsize=%d in remove_offset\n",live.state[r].validsize);
1657     abort();
1658     }
1659     make_exclusive(r,0,-1);
1660     /* make_exclusive might have done the job already */
1661     if (live.state[r].val==0)
1662     return;
1663    
1664     rr=live.state[r].realreg;
1665    
1666     if (live.nat[rr].nholds==1) {
1667     //write_log("RemovingB offset %x from reg %d (%d) at %p\n",
1668     // live.state[r].val,r,rr,target);
1669     adjust_nreg(rr,live.state[r].val);
1670     live.state[r].dirtysize=4;
1671     live.state[r].val=0;
1672     set_status(r,DIRTY);
1673     return;
1674     }
1675     write_log("Failed in remove_offset\n");
1676     abort();
1677     }
1678    
1679     static __inline__ void remove_all_offsets(void)
1680     {
1681     int i;
1682    
1683     for (i=0;i<VREGS;i++)
1684     remove_offset(i,-1);
1685     }
1686    
1687 gbeauche 1.28 static inline void flush_reg_count(void)
1688     {
1689     #if RECORD_REGISTER_USAGE
1690     for (int r = 0; r < 16; r++)
1691     if (reg_count_local[r])
1692     ADDQim(reg_count_local[r], ((uintptr)reg_count) + (8 * r), X86_NOREG, X86_NOREG, 1);
1693     #endif
1694     }
1695    
1696     static inline void record_register(int r)
1697     {
1698     #if RECORD_REGISTER_USAGE
1699     if (r < 16)
1700     reg_count_local[r]++;
1701     #endif
1702     }
1703    
1704 gbeauche 1.1 static __inline__ int readreg_general(int r, int size, int spec, int can_offset)
1705     {
1706     int n;
1707     int answer=-1;
1708    
1709 gbeauche 1.28 record_register(r);
1710 gbeauche 1.1 if (live.state[r].status==UNDEF) {
1711     write_log("WARNING: Unexpected read of undefined register %d\n",r);
1712     }
1713     if (!can_offset)
1714     remove_offset(r,spec);
1715    
1716     if (isinreg(r) && live.state[r].validsize>=size) {
1717     n=live.state[r].realreg;
1718     switch(size) {
1719     case 1:
1720     if (live.nat[n].canbyte || spec>=0) {
1721     answer=n;
1722     }
1723     break;
1724     case 2:
1725     if (live.nat[n].canword || spec>=0) {
1726     answer=n;
1727     }
1728     break;
1729     case 4:
1730     answer=n;
1731     break;
1732     default: abort();
1733     }
1734     if (answer<0)
1735     evict(r);
1736     }
1737     /* either the value was in memory to start with, or it was evicted and
1738     is in memory now */
1739     if (answer<0) {
1740     answer=alloc_reg_hinted(r,spec>=0?4:size,0,spec);
1741     }
1742    
1743     if (spec>=0 && spec!=answer) {
1744     /* Too bad */
1745     mov_nregs(spec,answer);
1746     answer=spec;
1747     }
1748     live.nat[answer].locked++;
1749     live.nat[answer].touched=touchcnt++;
1750     return answer;
1751     }
1752    
1753    
1754    
1755     static int readreg(int r, int size)
1756     {
1757     return readreg_general(r,size,-1,0);
1758     }
1759    
1760     static int readreg_specific(int r, int size, int spec)
1761     {
1762     return readreg_general(r,size,spec,0);
1763     }
1764    
1765     static int readreg_offset(int r, int size)
1766     {
1767     return readreg_general(r,size,-1,1);
1768     }
1769    
1770     /* writereg_general(r, size, spec)
1771     *
1772     * INPUT
1773     * - r : mid-layer register
1774     * - size : requested size (1/2/4)
1775     * - spec : -1 if find or make a register free, otherwise specifies
1776     * the physical register to use in any case
1777     *
1778     * OUTPUT
1779     * - hard (physical, x86 here) register allocated to virtual register r
1780     */
1781     static __inline__ int writereg_general(int r, int size, int spec)
1782     {
1783     int n;
1784     int answer=-1;
1785    
1786 gbeauche 1.28 record_register(r);
1787 gbeauche 1.1 if (size<4) {
1788     remove_offset(r,spec);
1789     }
1790    
1791     make_exclusive(r,size,spec);
1792     if (isinreg(r)) {
1793     int nvsize=size>live.state[r].validsize?size:live.state[r].validsize;
1794     int ndsize=size>live.state[r].dirtysize?size:live.state[r].dirtysize;
1795     n=live.state[r].realreg;
1796    
1797     Dif (live.nat[n].nholds!=1)
1798     abort();
1799     switch(size) {
1800     case 1:
1801     if (live.nat[n].canbyte || spec>=0) {
1802     live.state[r].dirtysize=ndsize;
1803     live.state[r].validsize=nvsize;
1804     answer=n;
1805     }
1806     break;
1807     case 2:
1808     if (live.nat[n].canword || spec>=0) {
1809     live.state[r].dirtysize=ndsize;
1810     live.state[r].validsize=nvsize;
1811     answer=n;
1812     }
1813     break;
1814     case 4:
1815     live.state[r].dirtysize=ndsize;
1816     live.state[r].validsize=nvsize;
1817     answer=n;
1818     break;
1819     default: abort();
1820     }
1821     if (answer<0)
1822     evict(r);
1823     }
1824     /* either the value was in memory to start with, or it was evicted and
1825     is in memory now */
1826     if (answer<0) {
1827     answer=alloc_reg_hinted(r,size,1,spec);
1828     }
1829     if (spec>=0 && spec!=answer) {
1830     mov_nregs(spec,answer);
1831     answer=spec;
1832     }
1833     if (live.state[r].status==UNDEF)
1834     live.state[r].validsize=4;
1835     live.state[r].dirtysize=size>live.state[r].dirtysize?size:live.state[r].dirtysize;
1836     live.state[r].validsize=size>live.state[r].validsize?size:live.state[r].validsize;
1837    
1838     live.nat[answer].locked++;
1839     live.nat[answer].touched=touchcnt++;
1840     if (size==4) {
1841     live.state[r].val=0;
1842     }
1843     else {
1844     Dif (live.state[r].val) {
1845     write_log("Problem with val\n");
1846     abort();
1847     }
1848     }
1849     set_status(r,DIRTY);
1850     return answer;
1851     }
1852    
1853     static int writereg(int r, int size)
1854     {
1855     return writereg_general(r,size,-1);
1856     }
1857    
1858     static int writereg_specific(int r, int size, int spec)
1859     {
1860     return writereg_general(r,size,spec);
1861     }
1862    
1863     static __inline__ int rmw_general(int r, int wsize, int rsize, int spec)
1864     {
1865     int n;
1866     int answer=-1;
1867    
1868 gbeauche 1.28 record_register(r);
1869 gbeauche 1.1 if (live.state[r].status==UNDEF) {
1870     write_log("WARNING: Unexpected read of undefined register %d\n",r);
1871     }
1872     remove_offset(r,spec);
1873     make_exclusive(r,0,spec);
1874    
1875     Dif (wsize<rsize) {
1876     write_log("Cannot handle wsize<rsize in rmw_general()\n");
1877     abort();
1878     }
1879     if (isinreg(r) && live.state[r].validsize>=rsize) {
1880     n=live.state[r].realreg;
1881     Dif (live.nat[n].nholds!=1)
1882     abort();
1883    
1884     switch(rsize) {
1885     case 1:
1886     if (live.nat[n].canbyte || spec>=0) {
1887     answer=n;
1888     }
1889     break;
1890     case 2:
1891     if (live.nat[n].canword || spec>=0) {
1892     answer=n;
1893     }
1894     break;
1895     case 4:
1896     answer=n;
1897     break;
1898     default: abort();
1899     }
1900     if (answer<0)
1901     evict(r);
1902     }
1903     /* either the value was in memory to start with, or it was evicted and
1904     is in memory now */
1905     if (answer<0) {
1906     answer=alloc_reg_hinted(r,spec>=0?4:rsize,0,spec);
1907     }
1908    
1909     if (spec>=0 && spec!=answer) {
1910     /* Too bad */
1911     mov_nregs(spec,answer);
1912     answer=spec;
1913     }
1914     if (wsize>live.state[r].dirtysize)
1915     live.state[r].dirtysize=wsize;
1916     if (wsize>live.state[r].validsize)
1917     live.state[r].validsize=wsize;
1918     set_status(r,DIRTY);
1919    
1920     live.nat[answer].locked++;
1921     live.nat[answer].touched=touchcnt++;
1922    
1923     Dif (live.state[r].val) {
1924     write_log("Problem with val(rmw)\n");
1925     abort();
1926     }
1927     return answer;
1928     }
1929    
1930     static int rmw(int r, int wsize, int rsize)
1931     {
1932     return rmw_general(r,wsize,rsize,-1);
1933     }
1934    
1935     static int rmw_specific(int r, int wsize, int rsize, int spec)
1936     {
1937     return rmw_general(r,wsize,rsize,spec);
1938     }
1939    
1940    
1941     /* needed for restoring the carry flag on non-P6 cores */
1942     static void bt_l_ri_noclobber(R4 r, IMM i)
1943     {
1944     int size=4;
1945     if (i<16)
1946     size=2;
1947     r=readreg(r,size);
1948     raw_bt_l_ri(r,i);
1949     unlock2(r);
1950     }
1951    
1952     /********************************************************************
1953     * FPU register status handling. EMIT TIME! *
1954     ********************************************************************/
1955    
1956     static void f_tomem(int r)
1957     {
1958     if (live.fate[r].status==DIRTY) {
1959     #if USE_LONG_DOUBLE
1960 gbeauche 1.24 raw_fmov_ext_mr((uintptr)live.fate[r].mem,live.fate[r].realreg);
1961 gbeauche 1.1 #else
1962 gbeauche 1.24 raw_fmov_mr((uintptr)live.fate[r].mem,live.fate[r].realreg);
1963 gbeauche 1.1 #endif
1964     live.fate[r].status=CLEAN;
1965     }
1966     }
1967    
1968     static void f_tomem_drop(int r)
1969     {
1970     if (live.fate[r].status==DIRTY) {
1971     #if USE_LONG_DOUBLE
1972 gbeauche 1.24 raw_fmov_ext_mr_drop((uintptr)live.fate[r].mem,live.fate[r].realreg);
1973 gbeauche 1.1 #else
1974 gbeauche 1.24 raw_fmov_mr_drop((uintptr)live.fate[r].mem,live.fate[r].realreg);
1975 gbeauche 1.1 #endif
1976     live.fate[r].status=INMEM;
1977     }
1978     }
1979    
1980    
1981     static __inline__ int f_isinreg(int r)
1982     {
1983     return live.fate[r].status==CLEAN || live.fate[r].status==DIRTY;
1984     }
1985    
1986     static void f_evict(int r)
1987     {
1988     int rr;
1989    
1990     if (!f_isinreg(r))
1991     return;
1992     rr=live.fate[r].realreg;
1993     if (live.fat[rr].nholds==1)
1994     f_tomem_drop(r);
1995     else
1996     f_tomem(r);
1997    
1998     Dif (live.fat[rr].locked &&
1999     live.fat[rr].nholds==1) {
2000     write_log("FPU register %d in nreg %d is locked!\n",r,live.fate[r].realreg);
2001     abort();
2002     }
2003    
2004     live.fat[rr].nholds--;
2005     if (live.fat[rr].nholds!=live.fate[r].realind) { /* Was not last */
2006     int topreg=live.fat[rr].holds[live.fat[rr].nholds];
2007     int thisind=live.fate[r].realind;
2008     live.fat[rr].holds[thisind]=topreg;
2009     live.fate[topreg].realind=thisind;
2010     }
2011     live.fate[r].status=INMEM;
2012     live.fate[r].realreg=-1;
2013     }
2014    
2015     static __inline__ void f_free_nreg(int r)
2016     {
2017     int i=live.fat[r].nholds;
2018    
2019     while (i) {
2020     int vr;
2021    
2022     --i;
2023     vr=live.fat[r].holds[i];
2024     f_evict(vr);
2025     }
2026     Dif (live.fat[r].nholds!=0) {
2027     write_log("Failed to free nreg %d, nholds is %d\n",r,live.fat[r].nholds);
2028     abort();
2029     }
2030     }
2031    
2032    
2033     /* Use with care! */
2034     static __inline__ void f_isclean(int r)
2035     {
2036     if (!f_isinreg(r))
2037     return;
2038     live.fate[r].status=CLEAN;
2039     }
2040    
2041     static __inline__ void f_disassociate(int r)
2042     {
2043     f_isclean(r);
2044     f_evict(r);
2045     }
2046    
2047    
2048    
2049     static int f_alloc_reg(int r, int willclobber)
2050     {
2051     int bestreg;
2052     uae_s32 when;
2053     int i;
2054     uae_s32 badness;
2055     bestreg=-1;
2056     when=2000000000;
2057     for (i=N_FREGS;i--;) {
2058     badness=live.fat[i].touched;
2059     if (live.fat[i].nholds==0)
2060     badness=0;
2061    
2062     if (!live.fat[i].locked && badness<when) {
2063     bestreg=i;
2064     when=badness;
2065     if (live.fat[i].nholds==0)
2066     break;
2067     }
2068     }
2069     Dif (bestreg==-1)
2070     abort();
2071    
2072     if (live.fat[bestreg].nholds>0) {
2073     f_free_nreg(bestreg);
2074     }
2075     if (f_isinreg(r)) {
2076     f_evict(r);
2077     }
2078    
2079     if (!willclobber) {
2080     if (live.fate[r].status!=UNDEF) {
2081     #if USE_LONG_DOUBLE
2082 gbeauche 1.24 raw_fmov_ext_rm(bestreg,(uintptr)live.fate[r].mem);
2083 gbeauche 1.1 #else
2084 gbeauche 1.24 raw_fmov_rm(bestreg,(uintptr)live.fate[r].mem);
2085 gbeauche 1.1 #endif
2086     }
2087     live.fate[r].status=CLEAN;
2088     }
2089     else {
2090     live.fate[r].status=DIRTY;
2091     }
2092     live.fate[r].realreg=bestreg;
2093     live.fate[r].realind=live.fat[bestreg].nholds;
2094     live.fat[bestreg].touched=touchcnt++;
2095     live.fat[bestreg].holds[live.fat[bestreg].nholds]=r;
2096     live.fat[bestreg].nholds++;
2097    
2098     return bestreg;
2099     }
2100    
2101     static void f_unlock(int r)
2102     {
2103     Dif (!live.fat[r].locked)
2104     abort();
2105     live.fat[r].locked--;
2106     }
2107    
2108     static void f_setlock(int r)
2109     {
2110     live.fat[r].locked++;
2111     }
2112    
2113     static __inline__ int f_readreg(int r)
2114     {
2115     int n;
2116     int answer=-1;
2117    
2118     if (f_isinreg(r)) {
2119     n=live.fate[r].realreg;
2120     answer=n;
2121     }
2122     /* either the value was in memory to start with, or it was evicted and
2123     is in memory now */
2124     if (answer<0)
2125     answer=f_alloc_reg(r,0);
2126    
2127     live.fat[answer].locked++;
2128     live.fat[answer].touched=touchcnt++;
2129     return answer;
2130     }
2131    
2132     static __inline__ void f_make_exclusive(int r, int clobber)
2133     {
2134     freg_status oldstate;
2135     int rr=live.fate[r].realreg;
2136     int nr;
2137     int nind;
2138     int ndirt=0;
2139     int i;
2140    
2141     if (!f_isinreg(r))
2142     return;
2143     if (live.fat[rr].nholds==1)
2144     return;
2145     for (i=0;i<live.fat[rr].nholds;i++) {
2146     int vr=live.fat[rr].holds[i];
2147     if (vr!=r && live.fate[vr].status==DIRTY)
2148     ndirt++;
2149     }
2150     if (!ndirt && !live.fat[rr].locked) {
2151     /* Everything else is clean, so let's keep this register */
2152     for (i=0;i<live.fat[rr].nholds;i++) {
2153     int vr=live.fat[rr].holds[i];
2154     if (vr!=r) {
2155     f_evict(vr);
2156     i--; /* Try that index again! */
2157     }
2158     }
2159     Dif (live.fat[rr].nholds!=1) {
2160     write_log("realreg %d holds %d (",rr,live.fat[rr].nholds);
2161     for (i=0;i<live.fat[rr].nholds;i++) {
2162     write_log(" %d(%d,%d)",live.fat[rr].holds[i],
2163     live.fate[live.fat[rr].holds[i]].realreg,
2164     live.fate[live.fat[rr].holds[i]].realind);
2165     }
2166     write_log("\n");
2167     abort();
2168     }
2169     return;
2170     }
2171    
2172     /* We have to split the register */
2173     oldstate=live.fate[r];
2174    
2175     f_setlock(rr); /* Make sure this doesn't go away */
2176     /* Forget about r being in the register rr */
2177     f_disassociate(r);
2178     /* Get a new register, that we will clobber completely */
2179     nr=f_alloc_reg(r,1);
2180     nind=live.fate[r].realind;
2181     if (!clobber)
2182     raw_fmov_rr(nr,rr); /* Make another copy */
2183     live.fate[r]=oldstate; /* Keep all the old state info */
2184     live.fate[r].realreg=nr;
2185     live.fate[r].realind=nind;
2186     f_unlock(rr);
2187     }
2188    
2189    
2190     static __inline__ int f_writereg(int r)
2191     {
2192     int n;
2193     int answer=-1;
2194    
2195     f_make_exclusive(r,1);
2196     if (f_isinreg(r)) {
2197     n=live.fate[r].realreg;
2198     answer=n;
2199     }
2200     if (answer<0) {
2201     answer=f_alloc_reg(r,1);
2202     }
2203     live.fate[r].status=DIRTY;
2204     live.fat[answer].locked++;
2205     live.fat[answer].touched=touchcnt++;
2206     return answer;
2207     }
2208    
2209     static int f_rmw(int r)
2210     {
2211     int n;
2212    
2213     f_make_exclusive(r,0);
2214     if (f_isinreg(r)) {
2215     n=live.fate[r].realreg;
2216     }
2217     else
2218     n=f_alloc_reg(r,0);
2219     live.fate[r].status=DIRTY;
2220     live.fat[n].locked++;
2221     live.fat[n].touched=touchcnt++;
2222     return n;
2223     }
2224    
2225     static void fflags_into_flags_internal(uae_u32 tmp)
2226     {
2227     int r;
2228    
2229     clobber_flags();
2230     r=f_readreg(FP_RESULT);
2231     if (FFLAG_NREG_CLOBBER_CONDITION) {
2232     int tmp2=tmp;
2233     tmp=writereg_specific(tmp,4,FFLAG_NREG);
2234     raw_fflags_into_flags(r);
2235     unlock2(tmp);
2236     forget_about(tmp2);
2237     }
2238     else
2239     raw_fflags_into_flags(r);
2240     f_unlock(r);
2241 gbeauche 1.19 live_flags();
2242 gbeauche 1.1 }
2243    
2244    
2245    
2246    
2247     /********************************************************************
2248     * CPU functions exposed to gencomp. Both CREATE and EMIT time *
2249     ********************************************************************/
2250    
2251     /*
2252     * RULES FOR HANDLING REGISTERS:
2253     *
2254     * * In the function headers, order the parameters
2255     * - 1st registers written to
2256     * - 2nd read/modify/write registers
2257     * - 3rd registers read from
2258     * * Before calling raw_*, you must call readreg, writereg or rmw for
2259     * each register
2260     * * The order for this is
2261     * - 1st call remove_offset for all registers written to with size<4
2262     * - 2nd call readreg for all registers read without offset
2263     * - 3rd call rmw for all rmw registers
2264     * - 4th call readreg_offset for all registers that can handle offsets
2265     * - 5th call get_offset for all the registers from the previous step
2266     * - 6th call writereg for all written-to registers
2267     * - 7th call raw_*
2268     * - 8th unlock2 all registers that were locked
2269     */
2270    
2271     MIDFUNC(0,live_flags,(void))
2272     {
2273     live.flags_on_stack=TRASH;
2274     live.flags_in_flags=VALID;
2275     live.flags_are_important=1;
2276     }
2277     MENDFUNC(0,live_flags,(void))
2278    
2279     MIDFUNC(0,dont_care_flags,(void))
2280     {
2281     live.flags_are_important=0;
2282     }
2283     MENDFUNC(0,dont_care_flags,(void))
2284    
2285    
2286     MIDFUNC(0,duplicate_carry,(void))
2287     {
2288     evict(FLAGX);
2289     make_flags_live_internal();
2290 gbeauche 1.24 COMPCALL(setcc_m)((uintptr)live.state[FLAGX].mem,2);
2291 gbeauche 1.1 log_vwrite(FLAGX);
2292     }
2293     MENDFUNC(0,duplicate_carry,(void))
2294    
2295     MIDFUNC(0,restore_carry,(void))
2296     {
2297     if (!have_rat_stall) { /* Not a P6 core, i.e. no partial stalls */
2298     bt_l_ri_noclobber(FLAGX,0);
2299     }
2300     else { /* Avoid the stall the above creates.
2301     This is slow on non-P6, though.
2302     */
2303     COMPCALL(rol_b_ri(FLAGX,8));
2304     isclean(FLAGX);
2305     }
2306     }
2307     MENDFUNC(0,restore_carry,(void))
2308    
2309     MIDFUNC(0,start_needflags,(void))
2310     {
2311     needflags=1;
2312     }
2313     MENDFUNC(0,start_needflags,(void))
2314    
2315     MIDFUNC(0,end_needflags,(void))
2316     {
2317     needflags=0;
2318     }
2319     MENDFUNC(0,end_needflags,(void))
2320    
2321     MIDFUNC(0,make_flags_live,(void))
2322     {
2323     make_flags_live_internal();
2324     }
2325     MENDFUNC(0,make_flags_live,(void))
2326    
2327     MIDFUNC(1,fflags_into_flags,(W2 tmp))
2328     {
2329     clobber_flags();
2330     fflags_into_flags_internal(tmp);
2331     }
2332     MENDFUNC(1,fflags_into_flags,(W2 tmp))
2333    
2334    
2335     MIDFUNC(2,bt_l_ri,(R4 r, IMM i)) /* This is defined as only affecting C */
2336     {
2337     int size=4;
2338     if (i<16)
2339     size=2;
2340     CLOBBER_BT;
2341     r=readreg(r,size);
2342     raw_bt_l_ri(r,i);
2343     unlock2(r);
2344     }
2345     MENDFUNC(2,bt_l_ri,(R4 r, IMM i)) /* This is defined as only affecting C */
2346    
2347     MIDFUNC(2,bt_l_rr,(R4 r, R4 b)) /* This is defined as only affecting C */
2348     {
2349     CLOBBER_BT;
2350     r=readreg(r,4);
2351     b=readreg(b,4);
2352     raw_bt_l_rr(r,b);
2353     unlock2(r);
2354     unlock2(b);
2355     }
2356     MENDFUNC(2,bt_l_rr,(R4 r, R4 b)) /* This is defined as only affecting C */
2357    
2358     MIDFUNC(2,btc_l_ri,(RW4 r, IMM i))
2359     {
2360     int size=4;
2361     if (i<16)
2362     size=2;
2363     CLOBBER_BT;
2364     r=rmw(r,size,size);
2365     raw_btc_l_ri(r,i);
2366     unlock2(r);
2367     }
2368     MENDFUNC(2,btc_l_ri,(RW4 r, IMM i))
2369    
2370     MIDFUNC(2,btc_l_rr,(RW4 r, R4 b))
2371     {
2372     CLOBBER_BT;
2373     b=readreg(b,4);
2374     r=rmw(r,4,4);
2375     raw_btc_l_rr(r,b);
2376     unlock2(r);
2377     unlock2(b);
2378     }
2379     MENDFUNC(2,btc_l_rr,(RW4 r, R4 b))
2380    
2381    
2382     MIDFUNC(2,btr_l_ri,(RW4 r, IMM i))
2383     {
2384     int size=4;
2385     if (i<16)
2386     size=2;
2387     CLOBBER_BT;
2388     r=rmw(r,size,size);
2389     raw_btr_l_ri(r,i);
2390     unlock2(r);
2391     }
2392     MENDFUNC(2,btr_l_ri,(RW4 r, IMM i))
2393    
2394     MIDFUNC(2,btr_l_rr,(RW4 r, R4 b))
2395     {
2396     CLOBBER_BT;
2397     b=readreg(b,4);
2398     r=rmw(r,4,4);
2399     raw_btr_l_rr(r,b);
2400     unlock2(r);
2401     unlock2(b);
2402     }
2403     MENDFUNC(2,btr_l_rr,(RW4 r, R4 b))
2404    
2405    
2406     MIDFUNC(2,bts_l_ri,(RW4 r, IMM i))
2407     {
2408     int size=4;
2409     if (i<16)
2410     size=2;
2411     CLOBBER_BT;
2412     r=rmw(r,size,size);
2413     raw_bts_l_ri(r,i);
2414     unlock2(r);
2415     }
2416     MENDFUNC(2,bts_l_ri,(RW4 r, IMM i))
2417    
2418     MIDFUNC(2,bts_l_rr,(RW4 r, R4 b))
2419     {
2420     CLOBBER_BT;
2421     b=readreg(b,4);
2422     r=rmw(r,4,4);
2423     raw_bts_l_rr(r,b);
2424     unlock2(r);
2425     unlock2(b);
2426     }
2427     MENDFUNC(2,bts_l_rr,(RW4 r, R4 b))
2428    
2429     MIDFUNC(2,mov_l_rm,(W4 d, IMM s))
2430     {
2431     CLOBBER_MOV;
2432     d=writereg(d,4);
2433     raw_mov_l_rm(d,s);
2434     unlock2(d);
2435     }
2436     MENDFUNC(2,mov_l_rm,(W4 d, IMM s))
2437    
2438    
2439     MIDFUNC(1,call_r,(R4 r)) /* Clobbering is implicit */
2440     {
2441     r=readreg(r,4);
2442     raw_call_r(r);
2443     unlock2(r);
2444     }
2445     MENDFUNC(1,call_r,(R4 r)) /* Clobbering is implicit */
2446    
2447     MIDFUNC(2,sub_l_mi,(IMM d, IMM s))
2448     {
2449     CLOBBER_SUB;
2450     raw_sub_l_mi(d,s) ;
2451     }
2452     MENDFUNC(2,sub_l_mi,(IMM d, IMM s))
2453    
2454     MIDFUNC(2,mov_l_mi,(IMM d, IMM s))
2455     {
2456     CLOBBER_MOV;
2457     raw_mov_l_mi(d,s) ;
2458     }
2459     MENDFUNC(2,mov_l_mi,(IMM d, IMM s))
2460    
2461     MIDFUNC(2,mov_w_mi,(IMM d, IMM s))
2462     {
2463     CLOBBER_MOV;
2464     raw_mov_w_mi(d,s) ;
2465     }
2466     MENDFUNC(2,mov_w_mi,(IMM d, IMM s))
2467    
2468     MIDFUNC(2,mov_b_mi,(IMM d, IMM s))
2469     {
2470     CLOBBER_MOV;
2471     raw_mov_b_mi(d,s) ;
2472     }
2473     MENDFUNC(2,mov_b_mi,(IMM d, IMM s))
2474    
2475     MIDFUNC(2,rol_b_ri,(RW1 r, IMM i))
2476     {
2477     if (!i && !needflags)
2478     return;
2479     CLOBBER_ROL;
2480     r=rmw(r,1,1);
2481     raw_rol_b_ri(r,i);
2482     unlock2(r);
2483     }
2484     MENDFUNC(2,rol_b_ri,(RW1 r, IMM i))
2485    
2486     MIDFUNC(2,rol_w_ri,(RW2 r, IMM i))
2487     {
2488     if (!i && !needflags)
2489     return;
2490     CLOBBER_ROL;
2491     r=rmw(r,2,2);
2492     raw_rol_w_ri(r,i);
2493     unlock2(r);
2494     }
2495     MENDFUNC(2,rol_w_ri,(RW2 r, IMM i))
2496    
2497     MIDFUNC(2,rol_l_ri,(RW4 r, IMM i))
2498     {
2499     if (!i && !needflags)
2500     return;
2501     CLOBBER_ROL;
2502     r=rmw(r,4,4);
2503     raw_rol_l_ri(r,i);
2504     unlock2(r);
2505     }
2506     MENDFUNC(2,rol_l_ri,(RW4 r, IMM i))
2507    
2508     MIDFUNC(2,rol_l_rr,(RW4 d, R1 r))
2509     {
2510     if (isconst(r)) {
2511     COMPCALL(rol_l_ri)(d,(uae_u8)live.state[r].val);
2512     return;
2513     }
2514     CLOBBER_ROL;
2515     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2516     d=rmw(d,4,4);
2517     Dif (r!=1) {
2518     write_log("Illegal register %d in raw_rol_b\n",r);
2519     abort();
2520     }
2521     raw_rol_l_rr(d,r) ;
2522     unlock2(r);
2523     unlock2(d);
2524     }
2525     MENDFUNC(2,rol_l_rr,(RW4 d, R1 r))
2526    
2527     MIDFUNC(2,rol_w_rr,(RW2 d, R1 r))
2528     { /* Can only do this with r==1, i.e. cl */
2529    
2530     if (isconst(r)) {
2531     COMPCALL(rol_w_ri)(d,(uae_u8)live.state[r].val);
2532     return;
2533     }
2534     CLOBBER_ROL;
2535     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2536     d=rmw(d,2,2);
2537     Dif (r!=1) {
2538     write_log("Illegal register %d in raw_rol_b\n",r);
2539     abort();
2540     }
2541     raw_rol_w_rr(d,r) ;
2542     unlock2(r);
2543     unlock2(d);
2544     }
2545     MENDFUNC(2,rol_w_rr,(RW2 d, R1 r))
2546    
2547     MIDFUNC(2,rol_b_rr,(RW1 d, R1 r))
2548     { /* Can only do this with r==1, i.e. cl */
2549    
2550     if (isconst(r)) {
2551     COMPCALL(rol_b_ri)(d,(uae_u8)live.state[r].val);
2552     return;
2553     }
2554    
2555     CLOBBER_ROL;
2556     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2557     d=rmw(d,1,1);
2558     Dif (r!=1) {
2559     write_log("Illegal register %d in raw_rol_b\n",r);
2560     abort();
2561     }
2562     raw_rol_b_rr(d,r) ;
2563     unlock2(r);
2564     unlock2(d);
2565     }
2566     MENDFUNC(2,rol_b_rr,(RW1 d, R1 r))
2567    
2568    
2569     MIDFUNC(2,shll_l_rr,(RW4 d, R1 r))
2570     {
2571     if (isconst(r)) {
2572     COMPCALL(shll_l_ri)(d,(uae_u8)live.state[r].val);
2573     return;
2574     }
2575     CLOBBER_SHLL;
2576     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2577     d=rmw(d,4,4);
2578     Dif (r!=1) {
2579     write_log("Illegal register %d in raw_rol_b\n",r);
2580     abort();
2581     }
2582     raw_shll_l_rr(d,r) ;
2583     unlock2(r);
2584     unlock2(d);
2585     }
2586     MENDFUNC(2,shll_l_rr,(RW4 d, R1 r))
2587    
2588     MIDFUNC(2,shll_w_rr,(RW2 d, R1 r))
2589     { /* Can only do this with r==1, i.e. cl */
2590    
2591     if (isconst(r)) {
2592     COMPCALL(shll_w_ri)(d,(uae_u8)live.state[r].val);
2593     return;
2594     }
2595     CLOBBER_SHLL;
2596     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2597     d=rmw(d,2,2);
2598     Dif (r!=1) {
2599     write_log("Illegal register %d in raw_shll_b\n",r);
2600     abort();
2601     }
2602     raw_shll_w_rr(d,r) ;
2603     unlock2(r);
2604     unlock2(d);
2605     }
2606     MENDFUNC(2,shll_w_rr,(RW2 d, R1 r))
2607    
2608     MIDFUNC(2,shll_b_rr,(RW1 d, R1 r))
2609     { /* Can only do this with r==1, i.e. cl */
2610    
2611     if (isconst(r)) {
2612     COMPCALL(shll_b_ri)(d,(uae_u8)live.state[r].val);
2613     return;
2614     }
2615    
2616     CLOBBER_SHLL;
2617     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2618     d=rmw(d,1,1);
2619     Dif (r!=1) {
2620     write_log("Illegal register %d in raw_shll_b\n",r);
2621     abort();
2622     }
2623     raw_shll_b_rr(d,r) ;
2624     unlock2(r);
2625     unlock2(d);
2626     }
2627     MENDFUNC(2,shll_b_rr,(RW1 d, R1 r))
2628    
2629    
2630     MIDFUNC(2,ror_b_ri,(R1 r, IMM i))
2631     {
2632     if (!i && !needflags)
2633     return;
2634     CLOBBER_ROR;
2635     r=rmw(r,1,1);
2636     raw_ror_b_ri(r,i);
2637     unlock2(r);
2638     }
2639     MENDFUNC(2,ror_b_ri,(R1 r, IMM i))
2640    
2641     MIDFUNC(2,ror_w_ri,(R2 r, IMM i))
2642     {
2643     if (!i && !needflags)
2644     return;
2645     CLOBBER_ROR;
2646     r=rmw(r,2,2);
2647     raw_ror_w_ri(r,i);
2648     unlock2(r);
2649     }
2650     MENDFUNC(2,ror_w_ri,(R2 r, IMM i))
2651    
2652     MIDFUNC(2,ror_l_ri,(R4 r, IMM i))
2653     {
2654     if (!i && !needflags)
2655     return;
2656     CLOBBER_ROR;
2657     r=rmw(r,4,4);
2658     raw_ror_l_ri(r,i);
2659     unlock2(r);
2660     }
2661     MENDFUNC(2,ror_l_ri,(R4 r, IMM i))
2662    
2663     MIDFUNC(2,ror_l_rr,(R4 d, R1 r))
2664     {
2665     if (isconst(r)) {
2666     COMPCALL(ror_l_ri)(d,(uae_u8)live.state[r].val);
2667     return;
2668     }
2669     CLOBBER_ROR;
2670     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2671     d=rmw(d,4,4);
2672     raw_ror_l_rr(d,r) ;
2673     unlock2(r);
2674     unlock2(d);
2675     }
2676     MENDFUNC(2,ror_l_rr,(R4 d, R1 r))
2677    
2678     MIDFUNC(2,ror_w_rr,(R2 d, R1 r))
2679     {
2680     if (isconst(r)) {
2681     COMPCALL(ror_w_ri)(d,(uae_u8)live.state[r].val);
2682     return;
2683     }
2684     CLOBBER_ROR;
2685     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2686     d=rmw(d,2,2);
2687     raw_ror_w_rr(d,r) ;
2688     unlock2(r);
2689     unlock2(d);
2690     }
2691     MENDFUNC(2,ror_w_rr,(R2 d, R1 r))
2692    
2693     MIDFUNC(2,ror_b_rr,(R1 d, R1 r))
2694     {
2695     if (isconst(r)) {
2696     COMPCALL(ror_b_ri)(d,(uae_u8)live.state[r].val);
2697     return;
2698     }
2699    
2700     CLOBBER_ROR;
2701     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2702     d=rmw(d,1,1);
2703     raw_ror_b_rr(d,r) ;
2704     unlock2(r);
2705     unlock2(d);
2706     }
2707     MENDFUNC(2,ror_b_rr,(R1 d, R1 r))
2708    
2709     MIDFUNC(2,shrl_l_rr,(RW4 d, R1 r))
2710     {
2711     if (isconst(r)) {
2712     COMPCALL(shrl_l_ri)(d,(uae_u8)live.state[r].val);
2713     return;
2714     }
2715     CLOBBER_SHRL;
2716     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2717     d=rmw(d,4,4);
2718     Dif (r!=1) {
2719     write_log("Illegal register %d in raw_rol_b\n",r);
2720     abort();
2721     }
2722     raw_shrl_l_rr(d,r) ;
2723     unlock2(r);
2724     unlock2(d);
2725     }
2726     MENDFUNC(2,shrl_l_rr,(RW4 d, R1 r))
2727    
2728     MIDFUNC(2,shrl_w_rr,(RW2 d, R1 r))
2729     { /* Can only do this with r==1, i.e. cl */
2730    
2731     if (isconst(r)) {
2732     COMPCALL(shrl_w_ri)(d,(uae_u8)live.state[r].val);
2733     return;
2734     }
2735     CLOBBER_SHRL;
2736     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2737     d=rmw(d,2,2);
2738     Dif (r!=1) {
2739     write_log("Illegal register %d in raw_shrl_b\n",r);
2740     abort();
2741     }
2742     raw_shrl_w_rr(d,r) ;
2743     unlock2(r);
2744     unlock2(d);
2745     }
2746     MENDFUNC(2,shrl_w_rr,(RW2 d, R1 r))
2747    
2748     MIDFUNC(2,shrl_b_rr,(RW1 d, R1 r))
2749     { /* Can only do this with r==1, i.e. cl */
2750    
2751     if (isconst(r)) {
2752     COMPCALL(shrl_b_ri)(d,(uae_u8)live.state[r].val);
2753     return;
2754     }
2755    
2756     CLOBBER_SHRL;
2757     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2758     d=rmw(d,1,1);
2759     Dif (r!=1) {
2760     write_log("Illegal register %d in raw_shrl_b\n",r);
2761     abort();
2762     }
2763     raw_shrl_b_rr(d,r) ;
2764     unlock2(r);
2765     unlock2(d);
2766     }
2767     MENDFUNC(2,shrl_b_rr,(RW1 d, R1 r))
2768    
2769    
2770    
2771     MIDFUNC(2,shll_l_ri,(RW4 r, IMM i))
2772     {
2773     if (!i && !needflags)
2774     return;
2775     if (isconst(r) && !needflags) {
2776     live.state[r].val<<=i;
2777     return;
2778     }
2779     CLOBBER_SHLL;
2780     r=rmw(r,4,4);
2781     raw_shll_l_ri(r,i);
2782     unlock2(r);
2783     }
2784     MENDFUNC(2,shll_l_ri,(RW4 r, IMM i))
2785    
2786     MIDFUNC(2,shll_w_ri,(RW2 r, IMM i))
2787     {
2788     if (!i && !needflags)
2789     return;
2790     CLOBBER_SHLL;
2791     r=rmw(r,2,2);
2792     raw_shll_w_ri(r,i);
2793     unlock2(r);
2794     }
2795     MENDFUNC(2,shll_w_ri,(RW2 r, IMM i))
2796    
2797     MIDFUNC(2,shll_b_ri,(RW1 r, IMM i))
2798     {
2799     if (!i && !needflags)
2800     return;
2801     CLOBBER_SHLL;
2802     r=rmw(r,1,1);
2803     raw_shll_b_ri(r,i);
2804     unlock2(r);
2805     }
2806     MENDFUNC(2,shll_b_ri,(RW1 r, IMM i))
2807    
2808     MIDFUNC(2,shrl_l_ri,(RW4 r, IMM i))
2809     {
2810     if (!i && !needflags)
2811     return;
2812     if (isconst(r) && !needflags) {
2813     live.state[r].val>>=i;
2814     return;
2815     }
2816     CLOBBER_SHRL;
2817     r=rmw(r,4,4);
2818     raw_shrl_l_ri(r,i);
2819     unlock2(r);
2820     }
2821     MENDFUNC(2,shrl_l_ri,(RW4 r, IMM i))
2822    
2823     MIDFUNC(2,shrl_w_ri,(RW2 r, IMM i))
2824     {
2825     if (!i && !needflags)
2826     return;
2827     CLOBBER_SHRL;
2828     r=rmw(r,2,2);
2829     raw_shrl_w_ri(r,i);
2830     unlock2(r);
2831     }
2832     MENDFUNC(2,shrl_w_ri,(RW2 r, IMM i))
2833    
2834     MIDFUNC(2,shrl_b_ri,(RW1 r, IMM i))
2835     {
2836     if (!i && !needflags)
2837     return;
2838     CLOBBER_SHRL;
2839     r=rmw(r,1,1);
2840     raw_shrl_b_ri(r,i);
2841     unlock2(r);
2842     }
2843     MENDFUNC(2,shrl_b_ri,(RW1 r, IMM i))
2844    
2845     MIDFUNC(2,shra_l_ri,(RW4 r, IMM i))
2846     {
2847     if (!i && !needflags)
2848     return;
2849     CLOBBER_SHRA;
2850     r=rmw(r,4,4);
2851     raw_shra_l_ri(r,i);
2852     unlock2(r);
2853     }
2854     MENDFUNC(2,shra_l_ri,(RW4 r, IMM i))
2855    
2856     MIDFUNC(2,shra_w_ri,(RW2 r, IMM i))
2857     {
2858     if (!i && !needflags)
2859     return;
2860     CLOBBER_SHRA;
2861     r=rmw(r,2,2);
2862     raw_shra_w_ri(r,i);
2863     unlock2(r);
2864     }
2865     MENDFUNC(2,shra_w_ri,(RW2 r, IMM i))
2866    
2867     MIDFUNC(2,shra_b_ri,(RW1 r, IMM i))
2868     {
2869     if (!i && !needflags)
2870     return;
2871     CLOBBER_SHRA;
2872     r=rmw(r,1,1);
2873     raw_shra_b_ri(r,i);
2874     unlock2(r);
2875     }
2876     MENDFUNC(2,shra_b_ri,(RW1 r, IMM i))
2877    
2878     MIDFUNC(2,shra_l_rr,(RW4 d, R1 r))
2879     {
2880     if (isconst(r)) {
2881     COMPCALL(shra_l_ri)(d,(uae_u8)live.state[r].val);
2882     return;
2883     }
2884     CLOBBER_SHRA;
2885     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2886     d=rmw(d,4,4);
2887     Dif (r!=1) {
2888     write_log("Illegal register %d in raw_rol_b\n",r);
2889     abort();
2890     }
2891     raw_shra_l_rr(d,r) ;
2892     unlock2(r);
2893     unlock2(d);
2894     }
2895     MENDFUNC(2,shra_l_rr,(RW4 d, R1 r))
2896    
2897     MIDFUNC(2,shra_w_rr,(RW2 d, R1 r))
2898     { /* Can only do this with r==1, i.e. cl */
2899    
2900     if (isconst(r)) {
2901     COMPCALL(shra_w_ri)(d,(uae_u8)live.state[r].val);
2902     return;
2903     }
2904     CLOBBER_SHRA;
2905     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2906     d=rmw(d,2,2);
2907     Dif (r!=1) {
2908     write_log("Illegal register %d in raw_shra_b\n",r);
2909     abort();
2910     }
2911     raw_shra_w_rr(d,r) ;
2912     unlock2(r);
2913     unlock2(d);
2914     }
2915     MENDFUNC(2,shra_w_rr,(RW2 d, R1 r))
2916    
2917     MIDFUNC(2,shra_b_rr,(RW1 d, R1 r))
2918     { /* Can only do this with r==1, i.e. cl */
2919    
2920     if (isconst(r)) {
2921     COMPCALL(shra_b_ri)(d,(uae_u8)live.state[r].val);
2922     return;
2923     }
2924    
2925     CLOBBER_SHRA;
2926     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2927     d=rmw(d,1,1);
2928     Dif (r!=1) {
2929     write_log("Illegal register %d in raw_shra_b\n",r);
2930     abort();
2931     }
2932     raw_shra_b_rr(d,r) ;
2933     unlock2(r);
2934     unlock2(d);
2935     }
2936     MENDFUNC(2,shra_b_rr,(RW1 d, R1 r))
2937    
2938    
2939     MIDFUNC(2,setcc,(W1 d, IMM cc))
2940     {
2941     CLOBBER_SETCC;
2942     d=writereg(d,1);
2943     raw_setcc(d,cc);
2944     unlock2(d);
2945     }
2946     MENDFUNC(2,setcc,(W1 d, IMM cc))
2947    
2948     MIDFUNC(2,setcc_m,(IMM d, IMM cc))
2949     {
2950     CLOBBER_SETCC;
2951     raw_setcc_m(d,cc);
2952     }
2953     MENDFUNC(2,setcc_m,(IMM d, IMM cc))
2954    
2955     MIDFUNC(3,cmov_l_rr,(RW4 d, R4 s, IMM cc))
2956     {
2957     if (d==s)
2958     return;
2959     CLOBBER_CMOV;
2960     s=readreg(s,4);
2961     d=rmw(d,4,4);
2962     raw_cmov_l_rr(d,s,cc);
2963     unlock2(s);
2964     unlock2(d);
2965     }
2966     MENDFUNC(3,cmov_l_rr,(RW4 d, R4 s, IMM cc))
2967    
2968     MIDFUNC(3,cmov_l_rm,(RW4 d, IMM s, IMM cc))
2969     {
2970     CLOBBER_CMOV;
2971     d=rmw(d,4,4);
2972     raw_cmov_l_rm(d,s,cc);
2973     unlock2(d);
2974     }
2975     MENDFUNC(3,cmov_l_rm,(RW4 d, IMM s, IMM cc))
2976    
2977 gbeauche 1.26 MIDFUNC(2,bsf_l_rr,(W4 d, W4 s))
2978     {
2979     CLOBBER_BSF;
2980     s = readreg(s, 4);
2981     d = writereg(d, 4);
2982     raw_bsf_l_rr(d, s);
2983     unlock2(s);
2984     unlock2(d);
2985     }
2986     MENDFUNC(2,bsf_l_rr,(W4 d, W4 s))
2987    
2988     /* Set the Z flag depending on the value in s. Note that the
2989     value has to be 0 or -1 (or, more precisely, for non-zero
2990     values, bit 14 must be set)! */
2991     MIDFUNC(2,simulate_bsf,(W4 tmp, RW4 s))
2992 gbeauche 1.1 {
2993 gbeauche 1.26 CLOBBER_BSF;
2994     s=rmw_specific(s,4,4,FLAG_NREG3);
2995     tmp=writereg(tmp,4);
2996     raw_flags_set_zero(s, tmp);
2997     unlock2(tmp);
2998     unlock2(s);
2999 gbeauche 1.1 }
3000 gbeauche 1.26 MENDFUNC(2,simulate_bsf,(W4 tmp, RW4 s))
3001 gbeauche 1.1
3002     MIDFUNC(2,imul_32_32,(RW4 d, R4 s))
3003     {
3004     CLOBBER_MUL;
3005     s=readreg(s,4);
3006     d=rmw(d,4,4);
3007     raw_imul_32_32(d,s);
3008     unlock2(s);
3009     unlock2(d);
3010     }
3011     MENDFUNC(2,imul_32_32,(RW4 d, R4 s))
3012    
3013     MIDFUNC(2,imul_64_32,(RW4 d, RW4 s))
3014     {
3015     CLOBBER_MUL;
3016     s=rmw_specific(s,4,4,MUL_NREG2);
3017     d=rmw_specific(d,4,4,MUL_NREG1);
3018     raw_imul_64_32(d,s);
3019     unlock2(s);
3020     unlock2(d);
3021     }
3022     MENDFUNC(2,imul_64_32,(RW4 d, RW4 s))
3023    
3024     MIDFUNC(2,mul_64_32,(RW4 d, RW4 s))
3025     {
3026     CLOBBER_MUL;
3027     s=rmw_specific(s,4,4,MUL_NREG2);
3028     d=rmw_specific(d,4,4,MUL_NREG1);
3029     raw_mul_64_32(d,s);
3030     unlock2(s);
3031     unlock2(d);
3032     }
3033     MENDFUNC(2,mul_64_32,(RW4 d, RW4 s))
3034    
3035     MIDFUNC(2,mul_32_32,(RW4 d, R4 s))
3036     {
3037     CLOBBER_MUL;
3038     s=readreg(s,4);
3039     d=rmw(d,4,4);
3040     raw_mul_32_32(d,s);
3041     unlock2(s);
3042     unlock2(d);
3043     }
3044     MENDFUNC(2,mul_32_32,(RW4 d, R4 s))
3045    
3046 gbeauche 1.24 #if SIZEOF_VOID_P == 8
3047     MIDFUNC(2,sign_extend_32_rr,(W4 d, R2 s))
3048     {
3049     int isrmw;
3050    
3051     if (isconst(s)) {
3052     set_const(d,(uae_s32)live.state[s].val);
3053     return;
3054     }
3055    
3056     CLOBBER_SE32;
3057     isrmw=(s==d);
3058     if (!isrmw) {
3059     s=readreg(s,4);
3060     d=writereg(d,4);
3061     }
3062     else { /* If we try to lock this twice, with different sizes, we
3063     are int trouble! */
3064     s=d=rmw(s,4,4);
3065     }
3066     raw_sign_extend_32_rr(d,s);
3067     if (!isrmw) {
3068     unlock2(d);
3069     unlock2(s);
3070     }
3071     else {
3072     unlock2(s);
3073     }
3074     }
3075     MENDFUNC(2,sign_extend_32_rr,(W4 d, R2 s))
3076     #endif
3077    
3078 gbeauche 1.1 MIDFUNC(2,sign_extend_16_rr,(W4 d, R2 s))
3079     {
3080     int isrmw;
3081    
3082     if (isconst(s)) {
3083     set_const(d,(uae_s32)(uae_s16)live.state[s].val);
3084     return;
3085     }
3086    
3087     CLOBBER_SE16;
3088     isrmw=(s==d);
3089     if (!isrmw) {
3090     s=readreg(s,2);
3091     d=writereg(d,4);
3092     }
3093     else { /* If we try to lock this twice, with different sizes, we
3094     are int trouble! */
3095     s=d=rmw(s,4,2);
3096     }
3097     raw_sign_extend_16_rr(d,s);
3098     if (!isrmw) {
3099     unlock2(d);
3100     unlock2(s);
3101     }
3102     else {
3103     unlock2(s);
3104     }
3105     }
3106     MENDFUNC(2,sign_extend_16_rr,(W4 d, R2 s))
3107    
3108     MIDFUNC(2,sign_extend_8_rr,(W4 d, R1 s))
3109     {
3110     int isrmw;
3111    
3112     if (isconst(s)) {
3113     set_const(d,(uae_s32)(uae_s8)live.state[s].val);
3114     return;
3115     }
3116    
3117     isrmw=(s==d);
3118     CLOBBER_SE8;
3119     if (!isrmw) {
3120     s=readreg(s,1);
3121     d=writereg(d,4);
3122     }
3123     else { /* If we try to lock this twice, with different sizes, we
3124     are int trouble! */
3125     s=d=rmw(s,4,1);
3126     }
3127    
3128     raw_sign_extend_8_rr(d,s);
3129    
3130     if (!isrmw) {
3131     unlock2(d);
3132     unlock2(s);
3133     }
3134     else {
3135     unlock2(s);
3136     }
3137     }
3138     MENDFUNC(2,sign_extend_8_rr,(W4 d, R1 s))
3139    
3140    
3141     MIDFUNC(2,zero_extend_16_rr,(W4 d, R2 s))
3142     {
3143     int isrmw;
3144    
3145     if (isconst(s)) {
3146     set_const(d,(uae_u32)(uae_u16)live.state[s].val);
3147     return;
3148     }
3149    
3150     isrmw=(s==d);
3151     CLOBBER_ZE16;
3152     if (!isrmw) {
3153     s=readreg(s,2);
3154     d=writereg(d,4);
3155     }
3156     else { /* If we try to lock this twice, with different sizes, we
3157     are int trouble! */
3158     s=d=rmw(s,4,2);
3159     }
3160     raw_zero_extend_16_rr(d,s);
3161     if (!isrmw) {
3162     unlock2(d);
3163     unlock2(s);
3164     }
3165     else {
3166     unlock2(s);
3167     }
3168     }
3169     MENDFUNC(2,zero_extend_16_rr,(W4 d, R2 s))
3170    
3171     MIDFUNC(2,zero_extend_8_rr,(W4 d, R1 s))
3172     {
3173     int isrmw;
3174     if (isconst(s)) {
3175     set_const(d,(uae_u32)(uae_u8)live.state[s].val);
3176     return;
3177     }
3178    
3179     isrmw=(s==d);
3180     CLOBBER_ZE8;
3181     if (!isrmw) {
3182     s=readreg(s,1);
3183     d=writereg(d,4);
3184     }
3185     else { /* If we try to lock this twice, with different sizes, we
3186     are int trouble! */
3187     s=d=rmw(s,4,1);
3188     }
3189    
3190     raw_zero_extend_8_rr(d,s);
3191    
3192     if (!isrmw) {
3193     unlock2(d);
3194     unlock2(s);
3195     }
3196     else {
3197     unlock2(s);
3198     }
3199     }
3200     MENDFUNC(2,zero_extend_8_rr,(W4 d, R1 s))
3201    
3202     MIDFUNC(2,mov_b_rr,(W1 d, R1 s))
3203     {
3204     if (d==s)
3205     return;
3206     if (isconst(s)) {
3207     COMPCALL(mov_b_ri)(d,(uae_u8)live.state[s].val);
3208     return;
3209     }
3210    
3211     CLOBBER_MOV;
3212     s=readreg(s,1);
3213     d=writereg(d,1);
3214     raw_mov_b_rr(d,s);
3215     unlock2(d);
3216     unlock2(s);
3217     }
3218     MENDFUNC(2,mov_b_rr,(W1 d, R1 s))
3219    
3220     MIDFUNC(2,mov_w_rr,(W2 d, R2 s))
3221     {
3222     if (d==s)
3223     return;
3224     if (isconst(s)) {
3225     COMPCALL(mov_w_ri)(d,(uae_u16)live.state[s].val);
3226     return;
3227     }
3228    
3229     CLOBBER_MOV;
3230     s=readreg(s,2);
3231     d=writereg(d,2);
3232     raw_mov_w_rr(d,s);
3233     unlock2(d);
3234     unlock2(s);
3235     }
3236     MENDFUNC(2,mov_w_rr,(W2 d, R2 s))
3237    
3238    
3239     MIDFUNC(4,mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
3240     {
3241     CLOBBER_MOV;
3242     baser=readreg(baser,4);
3243     index=readreg(index,4);
3244     d=writereg(d,4);
3245    
3246     raw_mov_l_rrm_indexed(d,baser,index,factor);
3247     unlock2(d);
3248     unlock2(baser);
3249     unlock2(index);
3250     }
3251     MENDFUNC(4,mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
3252    
3253     MIDFUNC(4,mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
3254     {
3255     CLOBBER_MOV;
3256     baser=readreg(baser,4);
3257     index=readreg(index,4);
3258     d=writereg(d,2);
3259    
3260     raw_mov_w_rrm_indexed(d,baser,index,factor);
3261     unlock2(d);
3262     unlock2(baser);
3263     unlock2(index);
3264     }
3265     MENDFUNC(4,mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
3266    
3267     MIDFUNC(4,mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
3268     {
3269     CLOBBER_MOV;
3270     baser=readreg(baser,4);
3271     index=readreg(index,4);
3272     d=writereg(d,1);
3273    
3274     raw_mov_b_rrm_indexed(d,baser,index,factor);
3275    
3276     unlock2(d);
3277     unlock2(baser);
3278     unlock2(index);
3279     }
3280     MENDFUNC(4,mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
3281    
3282    
3283     MIDFUNC(4,mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
3284     {
3285     CLOBBER_MOV;
3286     baser=readreg(baser,4);
3287     index=readreg(index,4);
3288     s=readreg(s,4);
3289    
3290     Dif (baser==s || index==s)
3291     abort();
3292    
3293    
3294     raw_mov_l_mrr_indexed(baser,index,factor,s);
3295     unlock2(s);
3296     unlock2(baser);
3297     unlock2(index);
3298     }
3299     MENDFUNC(4,mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
3300    
3301     MIDFUNC(4,mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
3302     {
3303     CLOBBER_MOV;
3304     baser=readreg(baser,4);
3305     index=readreg(index,4);
3306     s=readreg(s,2);
3307    
3308     raw_mov_w_mrr_indexed(baser,index,factor,s);
3309     unlock2(s);
3310     unlock2(baser);
3311     unlock2(index);
3312     }
3313     MENDFUNC(4,mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
3314    
3315     MIDFUNC(4,mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
3316     {
3317     CLOBBER_MOV;
3318     s=readreg(s,1);
3319     baser=readreg(baser,4);
3320     index=readreg(index,4);
3321    
3322     raw_mov_b_mrr_indexed(baser,index,factor,s);
3323     unlock2(s);
3324     unlock2(baser);
3325     unlock2(index);
3326     }
3327     MENDFUNC(4,mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
3328    
3329    
3330     MIDFUNC(5,mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
3331     {
3332     int basereg=baser;
3333     int indexreg=index;
3334    
3335     CLOBBER_MOV;
3336     s=readreg(s,4);
3337     baser=readreg_offset(baser,4);
3338     index=readreg_offset(index,4);
3339    
3340     base+=get_offset(basereg);
3341     base+=factor*get_offset(indexreg);
3342    
3343     raw_mov_l_bmrr_indexed(base,baser,index,factor,s);
3344     unlock2(s);
3345     unlock2(baser);
3346     unlock2(index);
3347     }
3348     MENDFUNC(5,mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
3349    
3350     MIDFUNC(5,mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
3351     {
3352     int basereg=baser;
3353     int indexreg=index;
3354    
3355     CLOBBER_MOV;
3356     s=readreg(s,2);
3357     baser=readreg_offset(baser,4);
3358     index=readreg_offset(index,4);
3359    
3360     base+=get_offset(basereg);
3361     base+=factor*get_offset(indexreg);
3362    
3363     raw_mov_w_bmrr_indexed(base,baser,index,factor,s);
3364     unlock2(s);
3365     unlock2(baser);
3366     unlock2(index);
3367     }
3368     MENDFUNC(5,mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
3369    
3370     MIDFUNC(5,mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
3371     {
3372     int basereg=baser;
3373     int indexreg=index;
3374    
3375     CLOBBER_MOV;
3376     s=readreg(s,1);
3377     baser=readreg_offset(baser,4);
3378     index=readreg_offset(index,4);
3379    
3380     base+=get_offset(basereg);
3381     base+=factor*get_offset(indexreg);
3382    
3383     raw_mov_b_bmrr_indexed(base,baser,index,factor,s);
3384     unlock2(s);
3385     unlock2(baser);
3386     unlock2(index);
3387     }
3388     MENDFUNC(5,mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
3389    
3390    
3391    
3392     /* Read a long from base+baser+factor*index */
3393     MIDFUNC(5,mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
3394     {
3395     int basereg=baser;
3396     int indexreg=index;
3397    
3398     CLOBBER_MOV;
3399     baser=readreg_offset(baser,4);
3400     index=readreg_offset(index,4);
3401     base+=get_offset(basereg);
3402     base+=factor*get_offset(indexreg);
3403     d=writereg(d,4);
3404     raw_mov_l_brrm_indexed(d,base,baser,index,factor);
3405     unlock2(d);
3406     unlock2(baser);
3407     unlock2(index);
3408     }
3409     MENDFUNC(5,mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
3410    
3411    
3412     MIDFUNC(5,mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
3413     {
3414     int basereg=baser;
3415     int indexreg=index;
3416    
3417     CLOBBER_MOV;
3418     remove_offset(d,-1);
3419     baser=readreg_offset(baser,4);
3420     index=readreg_offset(index,4);
3421     base+=get_offset(basereg);
3422     base+=factor*get_offset(indexreg);
3423     d=writereg(d,2);
3424     raw_mov_w_brrm_indexed(d,base,baser,index,factor);
3425     unlock2(d);
3426     unlock2(baser);
3427     unlock2(index);
3428     }
3429     MENDFUNC(5,mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
3430    
3431    
3432     MIDFUNC(5,mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
3433     {
3434     int basereg=baser;
3435     int indexreg=index;
3436    
3437     CLOBBER_MOV;
3438     remove_offset(d,-1);
3439     baser=readreg_offset(baser,4);
3440     index=readreg_offset(index,4);
3441     base+=get_offset(basereg);
3442     base+=factor*get_offset(indexreg);
3443     d=writereg(d,1);
3444     raw_mov_b_brrm_indexed(d,base,baser,index,factor);
3445     unlock2(d);
3446     unlock2(baser);
3447     unlock2(index);
3448     }
3449     MENDFUNC(5,mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
3450    
3451     /* Read a long from base+factor*index */
3452     MIDFUNC(4,mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
3453     {
3454     int indexreg=index;
3455    
3456     if (isconst(index)) {
3457     COMPCALL(mov_l_rm)(d,base+factor*live.state[index].val);
3458     return;
3459     }
3460    
3461     CLOBBER_MOV;
3462     index=readreg_offset(index,4);
3463     base+=get_offset(indexreg)*factor;
3464     d=writereg(d,4);
3465    
3466     raw_mov_l_rm_indexed(d,base,index,factor);
3467     unlock2(index);
3468     unlock2(d);
3469     }
3470     MENDFUNC(4,mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
3471    
3472    
3473     /* read the long at the address contained in s+offset and store in d */
3474     MIDFUNC(3,mov_l_rR,(W4 d, R4 s, IMM offset))
3475     {
3476     if (isconst(s)) {
3477     COMPCALL(mov_l_rm)(d,live.state[s].val+offset);
3478     return;
3479     }
3480     CLOBBER_MOV;
3481     s=readreg(s,4);
3482     d=writereg(d,4);
3483    
3484     raw_mov_l_rR(d,s,offset);
3485     unlock2(d);
3486     unlock2(s);
3487     }
3488     MENDFUNC(3,mov_l_rR,(W4 d, R4 s, IMM offset))
3489    
3490     /* read the word at the address contained in s+offset and store in d */
3491     MIDFUNC(3,mov_w_rR,(W2 d, R4 s, IMM offset))
3492     {
3493     if (isconst(s)) {
3494     COMPCALL(mov_w_rm)(d,live.state[s].val+offset);
3495     return;
3496     }
3497     CLOBBER_MOV;
3498     s=readreg(s,4);
3499     d=writereg(d,2);
3500    
3501     raw_mov_w_rR(d,s,offset);
3502     unlock2(d);
3503     unlock2(s);
3504     }
3505     MENDFUNC(3,mov_w_rR,(W2 d, R4 s, IMM offset))
3506    
3507     /* read the word at the address contained in s+offset and store in d */
3508     MIDFUNC(3,mov_b_rR,(W1 d, R4 s, IMM offset))
3509     {
3510     if (isconst(s)) {
3511     COMPCALL(mov_b_rm)(d,live.state[s].val+offset);
3512     return;
3513     }
3514     CLOBBER_MOV;
3515     s=readreg(s,4);
3516     d=writereg(d,1);
3517    
3518     raw_mov_b_rR(d,s,offset);
3519     unlock2(d);
3520     unlock2(s);
3521     }
3522     MENDFUNC(3,mov_b_rR,(W1 d, R4 s, IMM offset))
3523    
3524     /* read the long at the address contained in s+offset and store in d */
3525     MIDFUNC(3,mov_l_brR,(W4 d, R4 s, IMM offset))
3526     {
3527     int sreg=s;
3528     if (isconst(s)) {
3529     COMPCALL(mov_l_rm)(d,live.state[s].val+offset);
3530     return;
3531     }
3532     CLOBBER_MOV;
3533     s=readreg_offset(s,4);
3534     offset+=get_offset(sreg);
3535     d=writereg(d,4);
3536    
3537     raw_mov_l_brR(d,s,offset);
3538     unlock2(d);
3539     unlock2(s);
3540     }
3541     MENDFUNC(3,mov_l_brR,(W4 d, R4 s, IMM offset))
3542    
3543     /* read the word at the address contained in s+offset and store in d */
3544     MIDFUNC(3,mov_w_brR,(W2 d, R4 s, IMM offset))
3545     {
3546     int sreg=s;
3547     if (isconst(s)) {
3548     COMPCALL(mov_w_rm)(d,live.state[s].val+offset);
3549     return;
3550     }
3551     CLOBBER_MOV;
3552     remove_offset(d,-1);
3553     s=readreg_offset(s,4);
3554     offset+=get_offset(sreg);
3555     d=writereg(d,2);
3556    
3557     raw_mov_w_brR(d,s,offset);
3558     unlock2(d);
3559     unlock2(s);
3560     }
3561     MENDFUNC(3,mov_w_brR,(W2 d, R4 s, IMM offset))
3562    
3563     /* read the word at the address contained in s+offset and store in d */
3564     MIDFUNC(3,mov_b_brR,(W1 d, R4 s, IMM offset))
3565     {
3566     int sreg=s;
3567     if (isconst(s)) {
3568     COMPCALL(mov_b_rm)(d,live.state[s].val+offset);
3569     return;
3570     }
3571     CLOBBER_MOV;
3572     remove_offset(d,-1);
3573     s=readreg_offset(s,4);
3574     offset+=get_offset(sreg);
3575     d=writereg(d,1);
3576    
3577     raw_mov_b_brR(d,s,offset);
3578     unlock2(d);
3579     unlock2(s);
3580     }
3581     MENDFUNC(3,mov_b_brR,(W1 d, R4 s, IMM offset))
3582    
3583     MIDFUNC(3,mov_l_Ri,(R4 d, IMM i, IMM offset))
3584     {
3585     int dreg=d;
3586     if (isconst(d)) {
3587     COMPCALL(mov_l_mi)(live.state[d].val+offset,i);
3588     return;
3589     }
3590    
3591     CLOBBER_MOV;
3592     d=readreg_offset(d,4);
3593     offset+=get_offset(dreg);
3594     raw_mov_l_Ri(d,i,offset);
3595     unlock2(d);
3596     }
3597     MENDFUNC(3,mov_l_Ri,(R4 d, IMM i, IMM offset))
3598    
3599     MIDFUNC(3,mov_w_Ri,(R4 d, IMM i, IMM offset))
3600     {
3601     int dreg=d;
3602     if (isconst(d)) {
3603     COMPCALL(mov_w_mi)(live.state[d].val+offset,i);
3604     return;
3605     }
3606    
3607     CLOBBER_MOV;
3608     d=readreg_offset(d,4);
3609     offset+=get_offset(dreg);
3610     raw_mov_w_Ri(d,i,offset);
3611     unlock2(d);
3612     }
3613     MENDFUNC(3,mov_w_Ri,(R4 d, IMM i, IMM offset))
3614    
3615     MIDFUNC(3,mov_b_Ri,(R4 d, IMM i, IMM offset))
3616     {
3617     int dreg=d;
3618     if (isconst(d)) {
3619     COMPCALL(mov_b_mi)(live.state[d].val+offset,i);
3620     return;
3621     }
3622    
3623     CLOBBER_MOV;
3624     d=readreg_offset(d,4);
3625     offset+=get_offset(dreg);
3626     raw_mov_b_Ri(d,i,offset);
3627     unlock2(d);
3628     }
3629     MENDFUNC(3,mov_b_Ri,(R4 d, IMM i, IMM offset))
3630    
3631     /* Warning! OFFSET is byte sized only! */
3632     MIDFUNC(3,mov_l_Rr,(R4 d, R4 s, IMM offset))
3633     {
3634     if (isconst(d)) {
3635     COMPCALL(mov_l_mr)(live.state[d].val+offset,s);
3636     return;
3637     }
3638     if (isconst(s)) {
3639     COMPCALL(mov_l_Ri)(d,live.state[s].val,offset);
3640     return;
3641     }
3642    
3643     CLOBBER_MOV;
3644     s=readreg(s,4);
3645     d=readreg(d,4);
3646    
3647     raw_mov_l_Rr(d,s,offset);
3648     unlock2(d);
3649     unlock2(s);
3650     }
3651     MENDFUNC(3,mov_l_Rr,(R4 d, R4 s, IMM offset))
3652    
3653     MIDFUNC(3,mov_w_Rr,(R4 d, R2 s, IMM offset))
3654     {
3655     if (isconst(d)) {
3656     COMPCALL(mov_w_mr)(live.state[d].val+offset,s);
3657     return;
3658     }
3659     if (isconst(s)) {
3660     COMPCALL(mov_w_Ri)(d,(uae_u16)live.state[s].val,offset);
3661     return;
3662     }
3663    
3664     CLOBBER_MOV;
3665     s=readreg(s,2);
3666     d=readreg(d,4);
3667     raw_mov_w_Rr(d,s,offset);
3668     unlock2(d);
3669     unlock2(s);
3670     }
3671     MENDFUNC(3,mov_w_Rr,(R4 d, R2 s, IMM offset))
3672    
3673     MIDFUNC(3,mov_b_Rr,(R4 d, R1 s, IMM offset))
3674     {
3675     if (isconst(d)) {
3676     COMPCALL(mov_b_mr)(live.state[d].val+offset,s);
3677     return;
3678     }
3679     if (isconst(s)) {
3680     COMPCALL(mov_b_Ri)(d,(uae_u8)live.state[s].val,offset);
3681     return;
3682     }
3683    
3684     CLOBBER_MOV;
3685     s=readreg(s,1);
3686     d=readreg(d,4);
3687     raw_mov_b_Rr(d,s,offset);
3688     unlock2(d);
3689     unlock2(s);
3690     }
3691     MENDFUNC(3,mov_b_Rr,(R4 d, R1 s, IMM offset))
3692    
3693     MIDFUNC(3,lea_l_brr,(W4 d, R4 s, IMM offset))
3694     {
3695     if (isconst(s)) {
3696     COMPCALL(mov_l_ri)(d,live.state[s].val+offset);
3697     return;
3698     }
3699     #if USE_OFFSET
3700     if (d==s) {
3701     add_offset(d,offset);
3702     return;
3703     }
3704     #endif
3705     CLOBBER_LEA;
3706     s=readreg(s,4);
3707     d=writereg(d,4);
3708     raw_lea_l_brr(d,s,offset);
3709     unlock2(d);
3710     unlock2(s);
3711     }
3712     MENDFUNC(3,lea_l_brr,(W4 d, R4 s, IMM offset))
3713    
3714     MIDFUNC(5,lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
3715     {
3716     if (!offset) {
3717     COMPCALL(lea_l_rr_indexed)(d,s,index,factor);
3718     return;
3719     }
3720     CLOBBER_LEA;
3721     s=readreg(s,4);
3722     index=readreg(index,4);
3723     d=writereg(d,4);
3724    
3725     raw_lea_l_brr_indexed(d,s,index,factor,offset);
3726     unlock2(d);
3727     unlock2(index);
3728     unlock2(s);
3729     }
3730     MENDFUNC(5,lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
3731    
3732     MIDFUNC(4,lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
3733     {
3734     CLOBBER_LEA;
3735     s=readreg(s,4);
3736     index=readreg(index,4);
3737     d=writereg(d,4);
3738    
3739     raw_lea_l_rr_indexed(d,s,index,factor);
3740     unlock2(d);
3741     unlock2(index);
3742     unlock2(s);
3743     }
3744     MENDFUNC(4,lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
3745    
3746     /* write d to the long at the address contained in s+offset */
3747     MIDFUNC(3,mov_l_bRr,(R4 d, R4 s, IMM offset))
3748     {
3749     int dreg=d;
3750     if (isconst(d)) {
3751     COMPCALL(mov_l_mr)(live.state[d].val+offset,s);
3752     return;
3753     }
3754    
3755     CLOBBER_MOV;
3756     s=readreg(s,4);
3757     d=readreg_offset(d,4);
3758     offset+=get_offset(dreg);
3759    
3760     raw_mov_l_bRr(d,s,offset);
3761     unlock2(d);
3762     unlock2(s);
3763     }
3764     MENDFUNC(3,mov_l_bRr,(R4 d, R4 s, IMM offset))
3765    
3766     /* write the word at the address contained in s+offset and store in d */
3767     MIDFUNC(3,mov_w_bRr,(R4 d, R2 s, IMM offset))
3768     {
3769     int dreg=d;
3770    
3771     if (isconst(d)) {
3772     COMPCALL(mov_w_mr)(live.state[d].val+offset,s);
3773     return;
3774     }
3775    
3776     CLOBBER_MOV;
3777     s=readreg(s,2);
3778     d=readreg_offset(d,4);
3779     offset+=get_offset(dreg);
3780     raw_mov_w_bRr(d,s,offset);
3781     unlock2(d);
3782     unlock2(s);
3783     }
3784     MENDFUNC(3,mov_w_bRr,(R4 d, R2 s, IMM offset))
3785    
3786     MIDFUNC(3,mov_b_bRr,(R4 d, R1 s, IMM offset))
3787     {
3788     int dreg=d;
3789     if (isconst(d)) {
3790     COMPCALL(mov_b_mr)(live.state[d].val+offset,s);
3791     return;
3792     }
3793    
3794     CLOBBER_MOV;
3795     s=readreg(s,1);
3796     d=readreg_offset(d,4);
3797     offset+=get_offset(dreg);
3798     raw_mov_b_bRr(d,s,offset);
3799     unlock2(d);
3800     unlock2(s);
3801     }
3802     MENDFUNC(3,mov_b_bRr,(R4 d, R1 s, IMM offset))
3803    
3804     MIDFUNC(1,bswap_32,(RW4 r))
3805     {
3806     int reg=r;
3807    
3808     if (isconst(r)) {
3809     uae_u32 oldv=live.state[r].val;
3810     live.state[r].val=reverse32(oldv);
3811     return;
3812     }
3813    
3814     CLOBBER_SW32;
3815     r=rmw(r,4,4);
3816     raw_bswap_32(r);
3817     unlock2(r);
3818     }
3819     MENDFUNC(1,bswap_32,(RW4 r))
3820    
3821     MIDFUNC(1,bswap_16,(RW2 r))
3822     {
3823     if (isconst(r)) {
3824     uae_u32 oldv=live.state[r].val;
3825     live.state[r].val=((oldv>>8)&0xff) | ((oldv<<8)&0xff00) |
3826     (oldv&0xffff0000);
3827     return;
3828     }
3829    
3830     CLOBBER_SW16;
3831     r=rmw(r,2,2);
3832    
3833     raw_bswap_16(r);
3834     unlock2(r);
3835     }
3836     MENDFUNC(1,bswap_16,(RW2 r))
3837    
3838    
3839    
3840     MIDFUNC(2,mov_l_rr,(W4 d, R4 s))
3841     {
3842     int olds;
3843    
3844     if (d==s) { /* How pointless! */
3845     return;
3846     }
3847     if (isconst(s)) {
3848     COMPCALL(mov_l_ri)(d,live.state[s].val);
3849     return;
3850     }
3851     olds=s;
3852     disassociate(d);
3853     s=readreg_offset(s,4);
3854     live.state[d].realreg=s;
3855     live.state[d].realind=live.nat[s].nholds;
3856     live.state[d].val=live.state[olds].val;
3857     live.state[d].validsize=4;
3858     live.state[d].dirtysize=4;
3859     set_status(d,DIRTY);
3860    
3861     live.nat[s].holds[live.nat[s].nholds]=d;
3862     live.nat[s].nholds++;
3863     log_clobberreg(d);
3864     /* write_log("Added %d to nreg %d(%d), now holds %d regs\n",
3865     d,s,live.state[d].realind,live.nat[s].nholds); */
3866     unlock2(s);
3867     }
3868     MENDFUNC(2,mov_l_rr,(W4 d, R4 s))
3869    
3870     MIDFUNC(2,mov_l_mr,(IMM d, R4 s))
3871     {
3872     if (isconst(s)) {
3873     COMPCALL(mov_l_mi)(d,live.state[s].val);
3874     return;
3875     }
3876     CLOBBER_MOV;
3877     s=readreg(s,4);
3878    
3879     raw_mov_l_mr(d,s);
3880     unlock2(s);
3881     }
3882     MENDFUNC(2,mov_l_mr,(IMM d, R4 s))
3883    
3884    
3885     MIDFUNC(2,mov_w_mr,(IMM d, R2 s))
3886     {
3887     if (isconst(s)) {
3888     COMPCALL(mov_w_mi)(d,(uae_u16)live.state[s].val);
3889     return;
3890     }
3891     CLOBBER_MOV;
3892     s=readreg(s,2);
3893    
3894     raw_mov_w_mr(d,s);
3895     unlock2(s);
3896     }
3897     MENDFUNC(2,mov_w_mr,(IMM d, R2 s))
3898    
3899     MIDFUNC(2,mov_w_rm,(W2 d, IMM s))
3900     {
3901     CLOBBER_MOV;
3902     d=writereg(d,2);
3903    
3904     raw_mov_w_rm(d,s);
3905     unlock2(d);
3906     }
3907     MENDFUNC(2,mov_w_rm,(W2 d, IMM s))
3908    
3909     MIDFUNC(2,mov_b_mr,(IMM d, R1 s))
3910     {
3911     if (isconst(s)) {
3912     COMPCALL(mov_b_mi)(d,(uae_u8)live.state[s].val);
3913     return;
3914     }
3915    
3916     CLOBBER_MOV;
3917     s=readreg(s,1);
3918    
3919     raw_mov_b_mr(d,s);
3920     unlock2(s);
3921     }
3922     MENDFUNC(2,mov_b_mr,(IMM d, R1 s))
3923    
3924     MIDFUNC(2,mov_b_rm,(W1 d, IMM s))
3925     {
3926     CLOBBER_MOV;
3927     d=writereg(d,1);
3928    
3929     raw_mov_b_rm(d,s);
3930     unlock2(d);
3931     }
3932     MENDFUNC(2,mov_b_rm,(W1 d, IMM s))
3933    
3934     MIDFUNC(2,mov_l_ri,(W4 d, IMM s))
3935     {
3936     set_const(d,s);
3937     return;
3938     }
3939     MENDFUNC(2,mov_l_ri,(W4 d, IMM s))
3940    
3941     MIDFUNC(2,mov_w_ri,(W2 d, IMM s))
3942     {
3943     CLOBBER_MOV;
3944     d=writereg(d,2);
3945    
3946     raw_mov_w_ri(d,s);
3947     unlock2(d);
3948     }
3949     MENDFUNC(2,mov_w_ri,(W2 d, IMM s))
3950    
3951     MIDFUNC(2,mov_b_ri,(W1 d, IMM s))
3952     {
3953     CLOBBER_MOV;
3954     d=writereg(d,1);
3955    
3956     raw_mov_b_ri(d,s);
3957     unlock2(d);
3958     }
3959     MENDFUNC(2,mov_b_ri,(W1 d, IMM s))
3960    
3961    
3962     MIDFUNC(2,add_l_mi,(IMM d, IMM s))
3963     {
3964     CLOBBER_ADD;
3965     raw_add_l_mi(d,s) ;
3966     }
3967     MENDFUNC(2,add_l_mi,(IMM d, IMM s))
3968    
3969     MIDFUNC(2,add_w_mi,(IMM d, IMM s))
3970     {
3971     CLOBBER_ADD;
3972     raw_add_w_mi(d,s) ;
3973     }
3974     MENDFUNC(2,add_w_mi,(IMM d, IMM s))
3975    
3976     MIDFUNC(2,add_b_mi,(IMM d, IMM s))
3977     {
3978     CLOBBER_ADD;
3979     raw_add_b_mi(d,s) ;
3980     }
3981     MENDFUNC(2,add_b_mi,(IMM d, IMM s))
3982    
3983    
3984     MIDFUNC(2,test_l_ri,(R4 d, IMM i))
3985     {
3986     CLOBBER_TEST;
3987     d=readreg(d,4);
3988    
3989     raw_test_l_ri(d,i);
3990     unlock2(d);
3991     }
3992     MENDFUNC(2,test_l_ri,(R4 d, IMM i))
3993    
3994     MIDFUNC(2,test_l_rr,(R4 d, R4 s))
3995     {
3996     CLOBBER_TEST;
3997     d=readreg(d,4);
3998     s=readreg(s,4);
3999    
4000     raw_test_l_rr(d,s);;
4001     unlock2(d);
4002     unlock2(s);
4003     }
4004     MENDFUNC(2,test_l_rr,(R4 d, R4 s))
4005    
4006     MIDFUNC(2,test_w_rr,(R2 d, R2 s))
4007     {
4008     CLOBBER_TEST;
4009     d=readreg(d,2);
4010     s=readreg(s,2);
4011    
4012     raw_test_w_rr(d,s);
4013     unlock2(d);
4014     unlock2(s);
4015     }
4016     MENDFUNC(2,test_w_rr,(R2 d, R2 s))
4017    
4018     MIDFUNC(2,test_b_rr,(R1 d, R1 s))
4019     {
4020     CLOBBER_TEST;
4021     d=readreg(d,1);
4022     s=readreg(s,1);
4023    
4024     raw_test_b_rr(d,s);
4025     unlock2(d);
4026     unlock2(s);
4027     }
4028     MENDFUNC(2,test_b_rr,(R1 d, R1 s))
4029    
4030    
4031     MIDFUNC(2,and_l_ri,(RW4 d, IMM i))
4032     {
4033     if (isconst(d) && !needflags) {
4034     live.state[d].val &= i;
4035     return;
4036     }
4037    
4038     CLOBBER_AND;
4039     d=rmw(d,4,4);
4040    
4041     raw_and_l_ri(d,i);
4042     unlock2(d);
4043     }
4044     MENDFUNC(2,and_l_ri,(RW4 d, IMM i))
4045    
4046     MIDFUNC(2,and_l,(RW4 d, R4 s))
4047     {
4048     CLOBBER_AND;
4049     s=readreg(s,4);
4050     d=rmw(d,4,4);
4051    
4052     raw_and_l(d,s);
4053     unlock2(d);
4054     unlock2(s);
4055     }
4056     MENDFUNC(2,and_l,(RW4 d, R4 s))
4057    
4058     MIDFUNC(2,and_w,(RW2 d, R2 s))
4059     {
4060     CLOBBER_AND;
4061     s=readreg(s,2);
4062     d=rmw(d,2,2);
4063    
4064     raw_and_w(d,s);
4065     unlock2(d);
4066     unlock2(s);
4067     }
4068     MENDFUNC(2,and_w,(RW2 d, R2 s))
4069    
4070     MIDFUNC(2,and_b,(RW1 d, R1 s))
4071     {
4072     CLOBBER_AND;
4073     s=readreg(s,1);
4074     d=rmw(d,1,1);
4075    
4076     raw_and_b(d,s);
4077     unlock2(d);
4078     unlock2(s);
4079     }
4080     MENDFUNC(2,and_b,(RW1 d, R1 s))
4081    
4082     // gb-- used for making an fpcr value in compemu_fpp.cpp
4083     MIDFUNC(2,or_l_rm,(RW4 d, IMM s))
4084     {
4085     CLOBBER_OR;
4086     d=rmw(d,4,4);
4087    
4088     raw_or_l_rm(d,s);
4089     unlock2(d);
4090     }
4091     MENDFUNC(2,or_l_rm,(RW4 d, IMM s))
4092    
4093     MIDFUNC(2,or_l_ri,(RW4 d, IMM i))
4094     {
4095     if (isconst(d) && !needflags) {
4096     live.state[d].val|=i;
4097     return;
4098     }
4099     CLOBBER_OR;
4100     d=rmw(d,4,4);
4101    
4102     raw_or_l_ri(d,i);
4103     unlock2(d);
4104     }
4105     MENDFUNC(2,or_l_ri,(RW4 d, IMM i))
4106    
4107     MIDFUNC(2,or_l,(RW4 d, R4 s))
4108     {
4109     if (isconst(d) && isconst(s) && !needflags) {
4110     live.state[d].val|=live.state[s].val;
4111     return;
4112     }
4113     CLOBBER_OR;
4114     s=readreg(s,4);
4115     d=rmw(d,4,4);
4116    
4117     raw_or_l(d,s);
4118     unlock2(d);
4119     unlock2(s);
4120     }
4121     MENDFUNC(2,or_l,(RW4 d, R4 s))
4122    
4123     MIDFUNC(2,or_w,(RW2 d, R2 s))
4124     {
4125     CLOBBER_OR;
4126     s=readreg(s,2);
4127     d=rmw(d,2,2);
4128    
4129     raw_or_w(d,s);
4130     unlock2(d);
4131     unlock2(s);
4132     }
4133     MENDFUNC(2,or_w,(RW2 d, R2 s))
4134    
4135     MIDFUNC(2,or_b,(RW1 d, R1 s))
4136     {
4137     CLOBBER_OR;
4138     s=readreg(s,1);
4139     d=rmw(d,1,1);
4140    
4141     raw_or_b(d,s);
4142     unlock2(d);
4143     unlock2(s);
4144     }
4145     MENDFUNC(2,or_b,(RW1 d, R1 s))
4146    
4147     MIDFUNC(2,adc_l,(RW4 d, R4 s))
4148     {
4149     CLOBBER_ADC;
4150     s=readreg(s,4);
4151     d=rmw(d,4,4);
4152    
4153     raw_adc_l(d,s);
4154    
4155     unlock2(d);
4156     unlock2(s);
4157     }
4158     MENDFUNC(2,adc_l,(RW4 d, R4 s))
4159    
4160     MIDFUNC(2,adc_w,(RW2 d, R2 s))
4161     {
4162     CLOBBER_ADC;
4163     s=readreg(s,2);
4164     d=rmw(d,2,2);
4165    
4166     raw_adc_w(d,s);
4167     unlock2(d);
4168     unlock2(s);
4169     }
4170     MENDFUNC(2,adc_w,(RW2 d, R2 s))
4171    
4172     MIDFUNC(2,adc_b,(RW1 d, R1 s))
4173     {
4174     CLOBBER_ADC;
4175     s=readreg(s,1);
4176     d=rmw(d,1,1);
4177    
4178     raw_adc_b(d,s);
4179     unlock2(d);
4180     unlock2(s);
4181     }
4182     MENDFUNC(2,adc_b,(RW1 d, R1 s))
4183    
4184     MIDFUNC(2,add_l,(RW4 d, R4 s))
4185     {
4186     if (isconst(s)) {
4187     COMPCALL(add_l_ri)(d,live.state[s].val);
4188     return;
4189     }
4190    
4191     CLOBBER_ADD;
4192     s=readreg(s,4);
4193     d=rmw(d,4,4);
4194    
4195     raw_add_l(d,s);
4196    
4197     unlock2(d);
4198     unlock2(s);
4199     }
4200     MENDFUNC(2,add_l,(RW4 d, R4 s))
4201    
4202     MIDFUNC(2,add_w,(RW2 d, R2 s))
4203     {
4204     if (isconst(s)) {
4205     COMPCALL(add_w_ri)(d,(uae_u16)live.state[s].val);
4206     return;
4207     }
4208    
4209     CLOBBER_ADD;
4210     s=readreg(s,2);
4211     d=rmw(d,2,2);
4212    
4213     raw_add_w(d,s);
4214     unlock2(d);
4215     unlock2(s);
4216     }
4217     MENDFUNC(2,add_w,(RW2 d, R2 s))
4218    
4219     MIDFUNC(2,add_b,(RW1 d, R1 s))
4220     {
4221     if (isconst(s)) {
4222     COMPCALL(add_b_ri)(d,(uae_u8)live.state[s].val);
4223     return;
4224     }
4225    
4226     CLOBBER_ADD;
4227     s=readreg(s,1);
4228     d=rmw(d,1,1);
4229    
4230     raw_add_b(d,s);
4231     unlock2(d);
4232     unlock2(s);
4233     }
4234     MENDFUNC(2,add_b,(RW1 d, R1 s))
4235    
4236     MIDFUNC(2,sub_l_ri,(RW4 d, IMM i))
4237     {
4238     if (!i && !needflags)
4239     return;
4240     if (isconst(d) && !needflags) {
4241     live.state[d].val-=i;
4242     return;
4243     }
4244     #if USE_OFFSET
4245     if (!needflags) {
4246     add_offset(d,-i);
4247     return;
4248     }
4249     #endif
4250    
4251     CLOBBER_SUB;
4252     d=rmw(d,4,4);
4253    
4254     raw_sub_l_ri(d,i);
4255     unlock2(d);
4256     }
4257     MENDFUNC(2,sub_l_ri,(RW4 d, IMM i))
4258    
4259     MIDFUNC(2,sub_w_ri,(RW2 d, IMM i))
4260     {
4261     if (!i && !needflags)
4262     return;
4263    
4264     CLOBBER_SUB;
4265     d=rmw(d,2,2);
4266    
4267     raw_sub_w_ri(d,i);
4268     unlock2(d);
4269     }
4270     MENDFUNC(2,sub_w_ri,(RW2 d, IMM i))
4271    
4272     MIDFUNC(2,sub_b_ri,(RW1 d, IMM i))
4273     {
4274     if (!i && !needflags)
4275     return;
4276    
4277     CLOBBER_SUB;
4278     d=rmw(d,1,1);
4279    
4280     raw_sub_b_ri(d,i);
4281    
4282     unlock2(d);
4283     }
4284     MENDFUNC(2,sub_b_ri,(RW1 d, IMM i))
4285    
4286     MIDFUNC(2,add_l_ri,(RW4 d, IMM i))
4287     {
4288     if (!i && !needflags)
4289     return;
4290     if (isconst(d) && !needflags) {
4291     live.state[d].val+=i;
4292     return;
4293     }
4294     #if USE_OFFSET
4295     if (!needflags) {
4296     add_offset(d,i);
4297     return;
4298     }
4299     #endif
4300     CLOBBER_ADD;
4301     d=rmw(d,4,4);
4302     raw_add_l_ri(d,i);
4303     unlock2(d);
4304     }
4305     MENDFUNC(2,add_l_ri,(RW4 d, IMM i))
4306    
4307     MIDFUNC(2,add_w_ri,(RW2 d, IMM i))
4308     {
4309     if (!i && !needflags)
4310     return;
4311    
4312     CLOBBER_ADD;
4313     d=rmw(d,2,2);
4314    
4315     raw_add_w_ri(d,i);
4316     unlock2(d);
4317     }
4318     MENDFUNC(2,add_w_ri,(RW2 d, IMM i))
4319    
4320     MIDFUNC(2,add_b_ri,(RW1 d, IMM i))
4321     {
4322     if (!i && !needflags)
4323     return;
4324    
4325     CLOBBER_ADD;
4326     d=rmw(d,1,1);
4327    
4328     raw_add_b_ri(d,i);
4329    
4330     unlock2(d);
4331     }
4332     MENDFUNC(2,add_b_ri,(RW1 d, IMM i))
4333    
4334     MIDFUNC(2,sbb_l,(RW4 d, R4 s))
4335     {
4336     CLOBBER_SBB;
4337     s=readreg(s,4);
4338     d=rmw(d,4,4);
4339    
4340     raw_sbb_l(d,s);
4341     unlock2(d);
4342     unlock2(s);
4343     }
4344     MENDFUNC(2,sbb_l,(RW4 d, R4 s))
4345    
4346     MIDFUNC(2,sbb_w,(RW2 d, R2 s))
4347     {
4348     CLOBBER_SBB;
4349     s=readreg(s,2);
4350     d=rmw(d,2,2);
4351    
4352     raw_sbb_w(d,s);
4353     unlock2(d);
4354     unlock2(s);
4355     }
4356     MENDFUNC(2,sbb_w,(RW2 d, R2 s))
4357    
4358     MIDFUNC(2,sbb_b,(RW1 d, R1 s))
4359     {
4360     CLOBBER_SBB;
4361     s=readreg(s,1);
4362     d=rmw(d,1,1);
4363    
4364     raw_sbb_b(d,s);
4365     unlock2(d);
4366     unlock2(s);
4367     }
4368     MENDFUNC(2,sbb_b,(RW1 d, R1 s))
4369    
4370     MIDFUNC(2,sub_l,(RW4 d, R4 s))
4371     {
4372     if (isconst(s)) {
4373     COMPCALL(sub_l_ri)(d,live.state[s].val);
4374     return;
4375     }
4376    
4377     CLOBBER_SUB;
4378     s=readreg(s,4);
4379     d=rmw(d,4,4);
4380    
4381     raw_sub_l(d,s);
4382     unlock2(d);
4383     unlock2(s);
4384     }
4385     MENDFUNC(2,sub_l,(RW4 d, R4 s))
4386    
4387     MIDFUNC(2,sub_w,(RW2 d, R2 s))
4388     {
4389     if (isconst(s)) {
4390     COMPCALL(sub_w_ri)(d,(uae_u16)live.state[s].val);
4391     return;
4392     }
4393    
4394     CLOBBER_SUB;
4395     s=readreg(s,2);
4396     d=rmw(d,2,2);
4397    
4398     raw_sub_w(d,s);
4399     unlock2(d);
4400     unlock2(s);
4401     }
4402     MENDFUNC(2,sub_w,(RW2 d, R2 s))
4403    
4404     MIDFUNC(2,sub_b,(RW1 d, R1 s))
4405     {
4406     if (isconst(s)) {
4407     COMPCALL(sub_b_ri)(d,(uae_u8)live.state[s].val);
4408     return;
4409     }
4410    
4411     CLOBBER_SUB;
4412     s=readreg(s,1);
4413     d=rmw(d,1,1);
4414    
4415     raw_sub_b(d,s);
4416     unlock2(d);
4417     unlock2(s);
4418     }
4419     MENDFUNC(2,sub_b,(RW1 d, R1 s))
4420    
4421     MIDFUNC(2,cmp_l,(R4 d, R4 s))
4422     {
4423     CLOBBER_CMP;
4424     s=readreg(s,4);
4425     d=readreg(d,4);
4426    
4427     raw_cmp_l(d,s);
4428     unlock2(d);
4429     unlock2(s);
4430     }
4431     MENDFUNC(2,cmp_l,(R4 d, R4 s))
4432    
4433     MIDFUNC(2,cmp_l_ri,(R4 r, IMM i))
4434     {
4435     CLOBBER_CMP;
4436     r=readreg(r,4);
4437    
4438     raw_cmp_l_ri(r,i);
4439     unlock2(r);
4440     }
4441     MENDFUNC(2,cmp_l_ri,(R4 r, IMM i))
4442    
4443     MIDFUNC(2,cmp_w,(R2 d, R2 s))
4444     {
4445     CLOBBER_CMP;
4446     s=readreg(s,2);
4447     d=readreg(d,2);
4448    
4449     raw_cmp_w(d,s);
4450     unlock2(d);
4451     unlock2(s);
4452     }
4453     MENDFUNC(2,cmp_w,(R2 d, R2 s))
4454    
4455     MIDFUNC(2,cmp_b,(R1 d, R1 s))
4456     {
4457     CLOBBER_CMP;
4458     s=readreg(s,1);
4459     d=readreg(d,1);
4460    
4461     raw_cmp_b(d,s);
4462     unlock2(d);
4463     unlock2(s);
4464     }
4465     MENDFUNC(2,cmp_b,(R1 d, R1 s))
4466    
4467    
4468     MIDFUNC(2,xor_l,(RW4 d, R4 s))
4469     {
4470     CLOBBER_XOR;
4471     s=readreg(s,4);
4472     d=rmw(d,4,4);
4473    
4474     raw_xor_l(d,s);
4475     unlock2(d);
4476     unlock2(s);
4477     }
4478     MENDFUNC(2,xor_l,(RW4 d, R4 s))
4479    
4480     MIDFUNC(2,xor_w,(RW2 d, R2 s))
4481     {
4482     CLOBBER_XOR;
4483     s=readreg(s,2);
4484     d=rmw(d,2,2);
4485    
4486     raw_xor_w(d,s);
4487     unlock2(d);
4488     unlock2(s);
4489     }
4490     MENDFUNC(2,xor_w,(RW2 d, R2 s))
4491    
4492     MIDFUNC(2,xor_b,(RW1 d, R1 s))
4493     {
4494     CLOBBER_XOR;
4495     s=readreg(s,1);
4496     d=rmw(d,1,1);
4497    
4498     raw_xor_b(d,s);
4499     unlock2(d);
4500     unlock2(s);
4501     }
4502     MENDFUNC(2,xor_b,(RW1 d, R1 s))
4503    
4504     MIDFUNC(5,call_r_11,(W4 out1, R4 r, R4 in1, IMM osize, IMM isize))
4505     {
4506     clobber_flags();
4507     remove_all_offsets();
4508     if (osize==4) {
4509     if (out1!=in1 && out1!=r) {
4510     COMPCALL(forget_about)(out1);
4511     }
4512     }
4513     else {
4514     tomem_c(out1);
4515     }
4516    
4517     in1=readreg_specific(in1,isize,REG_PAR1);
4518     r=readreg(r,4);
4519     prepare_for_call_1(); /* This should ensure that there won't be
4520     any need for swapping nregs in prepare_for_call_2
4521     */
4522     #if USE_NORMAL_CALLING_CONVENTION
4523     raw_push_l_r(in1);
4524     #endif
4525     unlock2(in1);
4526     unlock2(r);
4527    
4528     prepare_for_call_2();
4529     raw_call_r(r);
4530    
4531     #if USE_NORMAL_CALLING_CONVENTION
4532     raw_inc_sp(4);
4533     #endif
4534    
4535    
4536     live.nat[REG_RESULT].holds[0]=out1;
4537     live.nat[REG_RESULT].nholds=1;
4538     live.nat[REG_RESULT].touched=touchcnt++;
4539    
4540     live.state[out1].realreg=REG_RESULT;
4541     live.state[out1].realind=0;
4542     live.state[out1].val=0;
4543     live.state[out1].validsize=osize;
4544     live.state[out1].dirtysize=osize;
4545     set_status(out1,DIRTY);
4546     }
4547     MENDFUNC(5,call_r_11,(W4 out1, R4 r, R4 in1, IMM osize, IMM isize))
4548    
4549     MIDFUNC(5,call_r_02,(R4 r, R4 in1, R4 in2, IMM isize1, IMM isize2))
4550     {
4551     clobber_flags();
4552     remove_all_offsets();
4553     in1=readreg_specific(in1,isize1,REG_PAR1);
4554     in2=readreg_specific(in2,isize2,REG_PAR2);
4555     r=readreg(r,4);
4556     prepare_for_call_1(); /* This should ensure that there won't be
4557     any need for swapping nregs in prepare_for_call_2
4558     */
4559     #if USE_NORMAL_CALLING_CONVENTION
4560     raw_push_l_r(in2);
4561     raw_push_l_r(in1);
4562     #endif
4563     unlock2(r);
4564     unlock2(in1);
4565     unlock2(in2);
4566     prepare_for_call_2();
4567     raw_call_r(r);
4568     #if USE_NORMAL_CALLING_CONVENTION
4569     raw_inc_sp(8);
4570     #endif
4571     }
4572     MENDFUNC(5,call_r_02,(R4 r, R4 in1, R4 in2, IMM isize1, IMM isize2))
4573    
4574     /* forget_about() takes a mid-layer register */
4575     MIDFUNC(1,forget_about,(W4 r))
4576     {
4577     if (isinreg(r))
4578     disassociate(r);
4579     live.state[r].val=0;
4580     set_status(r,UNDEF);
4581     }
4582     MENDFUNC(1,forget_about,(W4 r))
4583    
4584     MIDFUNC(0,nop,(void))
4585     {
4586     raw_nop();
4587     }
4588     MENDFUNC(0,nop,(void))
4589    
4590    
4591     MIDFUNC(1,f_forget_about,(FW r))
4592     {
4593     if (f_isinreg(r))
4594     f_disassociate(r);
4595     live.fate[r].status=UNDEF;
4596     }
4597     MENDFUNC(1,f_forget_about,(FW r))
4598    
4599     MIDFUNC(1,fmov_pi,(FW r))
4600     {
4601     r=f_writereg(r);
4602     raw_fmov_pi(r);
4603     f_unlock(r);
4604     }
4605     MENDFUNC(1,fmov_pi,(FW r))
4606    
4607     MIDFUNC(1,fmov_log10_2,(FW r))
4608     {
4609     r=f_writereg(r);
4610     raw_fmov_log10_2(r);
4611     f_unlock(r);
4612     }
4613     MENDFUNC(1,fmov_log10_2,(FW r))
4614    
4615     MIDFUNC(1,fmov_log2_e,(FW r))
4616     {
4617     r=f_writereg(r);
4618     raw_fmov_log2_e(r);
4619     f_unlock(r);
4620     }
4621     MENDFUNC(1,fmov_log2_e,(FW r))
4622    
4623     MIDFUNC(1,fmov_loge_2,(FW r))
4624     {
4625     r=f_writereg(r);
4626     raw_fmov_loge_2(r);
4627     f_unlock(r);
4628     }
4629     MENDFUNC(1,fmov_loge_2,(FW r))
4630    
4631     MIDFUNC(1,fmov_1,(FW r))
4632     {
4633     r=f_writereg(r);
4634     raw_fmov_1(r);
4635     f_unlock(r);
4636     }
4637     MENDFUNC(1,fmov_1,(FW r))
4638    
4639     MIDFUNC(1,fmov_0,(FW r))
4640     {
4641     r=f_writereg(r);
4642     raw_fmov_0(r);
4643     f_unlock(r);
4644     }
4645     MENDFUNC(1,fmov_0,(FW r))
4646    
4647     MIDFUNC(2,fmov_rm,(FW r, MEMR m))
4648     {
4649     r=f_writereg(r);
4650     raw_fmov_rm(r,m);
4651     f_unlock(r);
4652     }
4653     MENDFUNC(2,fmov_rm,(FW r, MEMR m))
4654    
4655     MIDFUNC(2,fmovi_rm,(FW r, MEMR m))
4656     {
4657     r=f_writereg(r);
4658     raw_fmovi_rm(r,m);
4659     f_unlock(r);
4660     }
4661     MENDFUNC(2,fmovi_rm,(FW r, MEMR m))
4662    
4663     MIDFUNC(2,fmovi_mr,(MEMW m, FR r))
4664     {
4665     r=f_readreg(r);
4666     raw_fmovi_mr(m,r);
4667     f_unlock(r);
4668     }
4669     MENDFUNC(2,fmovi_mr,(MEMW m, FR r))
4670    
4671     MIDFUNC(2,fmovs_rm,(FW r, MEMR m))
4672     {
4673     r=f_writereg(r);
4674     raw_fmovs_rm(r,m);
4675     f_unlock(r);
4676     }
4677     MENDFUNC(2,fmovs_rm,(FW r, MEMR m))
4678    
4679     MIDFUNC(2,fmovs_mr,(MEMW m, FR r))
4680     {
4681     r=f_readreg(r);
4682     raw_fmovs_mr(m,r);
4683     f_unlock(r);
4684     }
4685     MENDFUNC(2,fmovs_mr,(MEMW m, FR r))
4686    
4687     MIDFUNC(2,fmov_ext_mr,(MEMW m, FR r))
4688     {
4689     r=f_readreg(r);
4690     raw_fmov_ext_mr(m,r);
4691     f_unlock(r);
4692     }
4693     MENDFUNC(2,fmov_ext_mr,(MEMW m, FR r))
4694    
4695     MIDFUNC(2,fmov_mr,(MEMW m, FR r))
4696     {
4697     r=f_readreg(r);
4698     raw_fmov_mr(m,r);
4699     f_unlock(r);
4700     }
4701     MENDFUNC(2,fmov_mr,(MEMW m, FR r))
4702    
4703     MIDFUNC(2,fmov_ext_rm,(FW r, MEMR m))
4704     {
4705     r=f_writereg(r);
4706     raw_fmov_ext_rm(r,m);
4707     f_unlock(r);
4708     }
4709     MENDFUNC(2,fmov_ext_rm,(FW r, MEMR m))
4710    
4711     MIDFUNC(2,fmov_rr,(FW d, FR s))
4712     {
4713     if (d==s) { /* How pointless! */
4714     return;
4715     }
4716     #if USE_F_ALIAS
4717     f_disassociate(d);
4718     s=f_readreg(s);
4719     live.fate[d].realreg=s;
4720     live.fate[d].realind=live.fat[s].nholds;
4721     live.fate[d].status=DIRTY;
4722     live.fat[s].holds[live.fat[s].nholds]=d;
4723     live.fat[s].nholds++;
4724     f_unlock(s);
4725     #else
4726     s=f_readreg(s);
4727     d=f_writereg(d);
4728     raw_fmov_rr(d,s);
4729     f_unlock(s);
4730     f_unlock(d);
4731     #endif
4732     }
4733     MENDFUNC(2,fmov_rr,(FW d, FR s))
4734    
4735     MIDFUNC(2,fldcw_m_indexed,(R4 index, IMM base))
4736     {
4737     index=readreg(index,4);
4738    
4739     raw_fldcw_m_indexed(index,base);
4740     unlock2(index);
4741     }
4742     MENDFUNC(2,fldcw_m_indexed,(R4 index, IMM base))
4743    
4744     MIDFUNC(1,ftst_r,(FR r))
4745     {
4746     r=f_readreg(r);
4747     raw_ftst_r(r);
4748     f_unlock(r);
4749     }
4750     MENDFUNC(1,ftst_r,(FR r))
4751    
4752     MIDFUNC(0,dont_care_fflags,(void))
4753     {
4754     f_disassociate(FP_RESULT);
4755     }
4756     MENDFUNC(0,dont_care_fflags,(void))
4757    
4758     MIDFUNC(2,fsqrt_rr,(FW d, FR s))
4759     {
4760     s=f_readreg(s);
4761     d=f_writereg(d);
4762     raw_fsqrt_rr(d,s);
4763     f_unlock(s);
4764     f_unlock(d);
4765     }
4766     MENDFUNC(2,fsqrt_rr,(FW d, FR s))
4767    
4768     MIDFUNC(2,fabs_rr,(FW d, FR s))
4769     {
4770     s=f_readreg(s);
4771     d=f_writereg(d);
4772     raw_fabs_rr(d,s);
4773     f_unlock(s);
4774     f_unlock(d);
4775     }
4776     MENDFUNC(2,fabs_rr,(FW d, FR s))
4777    
4778     MIDFUNC(2,fsin_rr,(FW d, FR s))
4779     {
4780     s=f_readreg(s);
4781     d=f_writereg(d);
4782     raw_fsin_rr(d,s);
4783     f_unlock(s);
4784     f_unlock(d);
4785     }
4786     MENDFUNC(2,fsin_rr,(FW d, FR s))
4787    
4788     MIDFUNC(2,fcos_rr,(FW d, FR s))
4789     {
4790     s=f_readreg(s);
4791     d=f_writereg(d);
4792     raw_fcos_rr(d,s);
4793     f_unlock(s);
4794     f_unlock(d);
4795     }
4796     MENDFUNC(2,fcos_rr,(FW d, FR s))
4797    
4798     MIDFUNC(2,ftwotox_rr,(FW d, FR s))
4799     {
4800     s=f_readreg(s);
4801     d=f_writereg(d);
4802     raw_ftwotox_rr(d,s);
4803     f_unlock(s);
4804     f_unlock(d);
4805     }
4806     MENDFUNC(2,ftwotox_rr,(FW d, FR s))
4807    
4808     MIDFUNC(2,fetox_rr,(FW d, FR s))
4809     {
4810     s=f_readreg(s);
4811     d=f_writereg(d);
4812     raw_fetox_rr(d,s);
4813     f_unlock(s);
4814     f_unlock(d);
4815     }
4816     MENDFUNC(2,fetox_rr,(FW d, FR s))
4817    
4818     MIDFUNC(2,frndint_rr,(FW d, FR s))
4819     {
4820     s=f_readreg(s);
4821     d=f_writereg(d);
4822     raw_frndint_rr(d,s);
4823     f_unlock(s);
4824     f_unlock(d);
4825     }
4826     MENDFUNC(2,frndint_rr,(FW d, FR s))
4827    
4828     MIDFUNC(2,flog2_rr,(FW d, FR s))
4829     {
4830     s=f_readreg(s);
4831     d=f_writereg(d);
4832     raw_flog2_rr(d,s);
4833     f_unlock(s);
4834     f_unlock(d);
4835     }
4836     MENDFUNC(2,flog2_rr,(FW d, FR s))
4837    
4838     MIDFUNC(2,fneg_rr,(FW d, FR s))
4839     {
4840     s=f_readreg(s);
4841     d=f_writereg(d);
4842     raw_fneg_rr(d,s);
4843     f_unlock(s);
4844     f_unlock(d);
4845     }
4846     MENDFUNC(2,fneg_rr,(FW d, FR s))
4847    
4848     MIDFUNC(2,fadd_rr,(FRW d, FR s))
4849     {
4850     s=f_readreg(s);
4851     d=f_rmw(d);
4852     raw_fadd_rr(d,s);
4853     f_unlock(s);
4854     f_unlock(d);
4855     }
4856     MENDFUNC(2,fadd_rr,(FRW d, FR s))
4857    
4858     MIDFUNC(2,fsub_rr,(FRW d, FR s))
4859     {
4860     s=f_readreg(s);
4861     d=f_rmw(d);
4862     raw_fsub_rr(d,s);
4863     f_unlock(s);
4864     f_unlock(d);
4865     }
4866     MENDFUNC(2,fsub_rr,(FRW d, FR s))
4867    
4868     MIDFUNC(2,fcmp_rr,(FR d, FR s))
4869     {
4870     d=f_readreg(d);
4871     s=f_readreg(s);
4872     raw_fcmp_rr(d,s);
4873     f_unlock(s);
4874     f_unlock(d);
4875     }
4876     MENDFUNC(2,fcmp_rr,(FR d, FR s))
4877    
4878     MIDFUNC(2,fdiv_rr,(FRW d, FR s))
4879     {
4880     s=f_readreg(s);
4881     d=f_rmw(d);
4882     raw_fdiv_rr(d,s);
4883     f_unlock(s);
4884     f_unlock(d);
4885     }
4886     MENDFUNC(2,fdiv_rr,(FRW d, FR s))
4887    
4888     MIDFUNC(2,frem_rr,(FRW d, FR s))
4889     {
4890     s=f_readreg(s);
4891     d=f_rmw(d);
4892     raw_frem_rr(d,s);
4893     f_unlock(s);
4894     f_unlock(d);
4895     }
4896     MENDFUNC(2,frem_rr,(FRW d, FR s))
4897    
4898     MIDFUNC(2,frem1_rr,(FRW d, FR s))
4899     {
4900     s=f_readreg(s);
4901     d=f_rmw(d);
4902     raw_frem1_rr(d,s);
4903     f_unlock(s);
4904     f_unlock(d);
4905     }
4906     MENDFUNC(2,frem1_rr,(FRW d, FR s))
4907    
4908     MIDFUNC(2,fmul_rr,(FRW d, FR s))
4909     {
4910     s=f_readreg(s);
4911     d=f_rmw(d);
4912     raw_fmul_rr(d,s);
4913     f_unlock(s);
4914     f_unlock(d);
4915     }
4916     MENDFUNC(2,fmul_rr,(FRW d, FR s))
4917    
4918     /********************************************************************
4919     * Support functions exposed to gencomp. CREATE time *
4920     ********************************************************************/
4921    
4922 gbeauche 1.26 void set_zero(int r, int tmp)
4923     {
4924     if (setzflg_uses_bsf)
4925     bsf_l_rr(r,r);
4926     else
4927     simulate_bsf(tmp,r);
4928     }
4929    
4930 gbeauche 1.1 int kill_rodent(int r)
4931     {
4932     return KILLTHERAT &&
4933     have_rat_stall &&
4934     (live.state[r].status==INMEM ||
4935     live.state[r].status==CLEAN ||
4936     live.state[r].status==ISCONST ||
4937     live.state[r].dirtysize==4);
4938     }
4939    
4940     uae_u32 get_const(int r)
4941     {
4942     Dif (!isconst(r)) {
4943     write_log("Register %d should be constant, but isn't\n",r);
4944     abort();
4945     }
4946     return live.state[r].val;
4947     }
4948    
4949     void sync_m68k_pc(void)
4950     {
4951     if (m68k_pc_offset) {
4952     add_l_ri(PC_P,m68k_pc_offset);
4953     comp_pc_p+=m68k_pc_offset;
4954     m68k_pc_offset=0;
4955     }
4956     }
4957    
4958     /********************************************************************
4959     * Scratch registers management *
4960     ********************************************************************/
4961    
4962     struct scratch_t {
4963     uae_u32 regs[VREGS];
4964     fpu_register fregs[VFREGS];
4965     };
4966    
4967     static scratch_t scratch;
4968    
4969     /********************************************************************
4970     * Support functions exposed to newcpu *
4971     ********************************************************************/
4972    
4973     static inline const char *str_on_off(bool b)
4974     {
4975     return b ? "on" : "off";
4976     }
4977    
4978     void compiler_init(void)
4979     {
4980     static bool initialized = false;
4981     if (initialized)
4982     return;
4983 gbeauche 1.24
4984 gbeauche 1.1 #if JIT_DEBUG
4985     // JIT debug mode ?
4986     JITDebug = PrefsFindBool("jitdebug");
4987     #endif
4988     write_log("<JIT compiler> : enable runtime disassemblers : %s\n", JITDebug ? "yes" : "no");
4989    
4990     #ifdef USE_JIT_FPU
4991     // Use JIT compiler for FPU instructions ?
4992     avoid_fpu = !PrefsFindBool("jitfpu");
4993     #else
4994     // JIT FPU is always disabled
4995     avoid_fpu = true;
4996     #endif
4997     write_log("<JIT compiler> : compile FPU instructions : %s\n", !avoid_fpu ? "yes" : "no");
4998    
4999     // Get size of the translation cache (in KB)
5000     cache_size = PrefsFindInt32("jitcachesize");
5001     write_log("<JIT compiler> : requested translation cache size : %d KB\n", cache_size);
5002    
5003     // Initialize target CPU (check for features, e.g. CMOV, rat stalls)
5004     raw_init_cpu();
5005 gbeauche 1.15 setzflg_uses_bsf = target_check_bsf();
5006 gbeauche 1.1 write_log("<JIT compiler> : target processor has CMOV instructions : %s\n", have_cmov ? "yes" : "no");
5007     write_log("<JIT compiler> : target processor can suffer from partial register stalls : %s\n", have_rat_stall ? "yes" : "no");
5008 gbeauche 1.5 write_log("<JIT compiler> : alignment for loops, jumps are %d, %d\n", align_loops, align_jumps);
5009 gbeauche 1.1
5010     // Translation cache flush mechanism
5011     lazy_flush = PrefsFindBool("jitlazyflush");
5012     write_log("<JIT compiler> : lazy translation cache invalidation : %s\n", str_on_off(lazy_flush));
5013     flush_icache = lazy_flush ? flush_icache_lazy : flush_icache_hard;
5014    
5015     // Compiler features
5016     write_log("<JIT compiler> : register aliasing : %s\n", str_on_off(1));
5017     write_log("<JIT compiler> : FP register aliasing : %s\n", str_on_off(USE_F_ALIAS));
5018     write_log("<JIT compiler> : lazy constant offsetting : %s\n", str_on_off(USE_OFFSET));
5019 gbeauche 1.8 write_log("<JIT compiler> : block inlining : %s\n", str_on_off(USE_INLINING));
5020 gbeauche 1.1 write_log("<JIT compiler> : separate blockinfo allocation : %s\n", str_on_off(USE_SEPARATE_BIA));
5021    
5022     // Build compiler tables
5023     build_comp();
5024    
5025     initialized = true;
5026    
5027 gbeauche 1.9 #if PROFILE_UNTRANSLATED_INSNS
5028     write_log("<JIT compiler> : gather statistics on untranslated insns count\n");
5029     #endif
5030    
5031 gbeauche 1.1 #if PROFILE_COMPILE_TIME
5032     write_log("<JIT compiler> : gather statistics on translation time\n");
5033     emul_start_time = clock();
5034     #endif
5035     }
5036    
5037     void compiler_exit(void)
5038     {
5039     #if PROFILE_COMPILE_TIME
5040     emul_end_time = clock();
5041     #endif
5042    
5043     // Deallocate translation cache
5044     if (compiled_code) {
5045 gbeauche 1.3 vm_release(compiled_code, cache_size * 1024);
5046 gbeauche 1.1 compiled_code = 0;
5047     }
5048 gbeauche 1.24
5049     // Deallocate popallspace
5050     if (popallspace) {
5051     vm_release(popallspace, POPALLSPACE_SIZE);
5052     popallspace = 0;
5053     }
5054 gbeauche 1.1
5055     #if PROFILE_COMPILE_TIME
5056     write_log("### Compile Block statistics\n");
5057     write_log("Number of calls to compile_block : %d\n", compile_count);
5058     uae_u32 emul_time = emul_end_time - emul_start_time;
5059     write_log("Total emulation time : %.1f sec\n", double(emul_time)/double(CLOCKS_PER_SEC));
5060     write_log("Total compilation time : %.1f sec (%.1f%%)\n", double(compile_time)/double(CLOCKS_PER_SEC),
5061     100.0*double(compile_time)/double(emul_time));
5062     write_log("\n");
5063     #endif
5064 gbeauche 1.9
5065     #if PROFILE_UNTRANSLATED_INSNS
5066     uae_u64 untranslated_count = 0;
5067     for (int i = 0; i < 65536; i++) {
5068     opcode_nums[i] = i;
5069     untranslated_count += raw_cputbl_count[i];
5070     }
5071     write_log("Sorting out untranslated instructions count...\n");
5072     qsort(opcode_nums, 65536, sizeof(uae_u16), untranslated_compfn);
5073     write_log("\nRank Opc Count Name\n");
5074     for (int i = 0; i < untranslated_top_ten; i++) {
5075     uae_u32 count = raw_cputbl_count[opcode_nums[i]];
5076     struct instr *dp;
5077     struct mnemolookup *lookup;
5078     if (!count)
5079     break;
5080     dp = table68k + opcode_nums[i];
5081     for (lookup = lookuptab; lookup->mnemo != dp->mnemo; lookup++)
5082     ;
5083     write_log("%03d: %04x %10lu %s\n", i, opcode_nums[i], count, lookup->name);
5084     }
5085     #endif
5086 gbeauche 1.28
5087     #if RECORD_REGISTER_USAGE
5088     int reg_count_ids[16];
5089     uint64 tot_reg_count = 0;
5090     for (int i = 0; i < 16; i++) {
5091     reg_count_ids[i] = i;
5092     tot_reg_count += reg_count[i];
5093     }
5094     qsort(reg_count_ids, 16, sizeof(int), reg_count_compare);
5095     uint64 cum_reg_count = 0;
5096     for (int i = 0; i < 16; i++) {
5097     int r = reg_count_ids[i];
5098     cum_reg_count += reg_count[r];
5099     printf("%c%d : %16ld %2.1f%% [%2.1f]\n", r < 8 ? 'D' : 'A', r % 8,
5100     reg_count[r],
5101     100.0*double(reg_count[r])/double(tot_reg_count),
5102     100.0*double(cum_reg_count)/double(tot_reg_count));
5103     }
5104     #endif
5105 gbeauche 1.1 }
5106    
5107     bool compiler_use_jit(void)
5108     {
5109     // Check for the "jit" prefs item
5110     if (!PrefsFindBool("jit"))
5111     return false;
5112    
5113     // Don't use JIT if translation cache size is less then MIN_CACHE_SIZE KB
5114     if (PrefsFindInt32("jitcachesize") < MIN_CACHE_SIZE) {
5115     write_log("<JIT compiler> : translation cache size is less than %d KB. Disabling JIT.\n", MIN_CACHE_SIZE);
5116     return false;
5117     }
5118    
5119     // FIXME: there are currently problems with JIT compilation and anything below a 68040
5120     if (CPUType < 4) {
5121     write_log("<JIT compiler> : 68040 emulation is required instead of 680%d0. Disabling JIT.\n", CPUType);
5122     return false;
5123     }
5124    
5125     return true;
5126     }
5127    
5128     void init_comp(void)
5129     {
5130     int i;
5131     uae_s8* cb=can_byte;
5132     uae_s8* cw=can_word;
5133     uae_s8* au=always_used;
5134    
5135 gbeauche 1.28 #if RECORD_REGISTER_USAGE
5136     for (i=0;i<16;i++)
5137     reg_count_local[i] = 0;
5138     #endif
5139    
5140 gbeauche 1.1 for (i=0;i<VREGS;i++) {
5141     live.state[i].realreg=-1;
5142     live.state[i].needflush=NF_SCRATCH;
5143     live.state[i].val=0;
5144     set_status(i,UNDEF);
5145     }
5146    
5147     for (i=0;i<VFREGS;i++) {
5148     live.fate[i].status=UNDEF;
5149     live.fate[i].realreg=-1;
5150     live.fate[i].needflush=NF_SCRATCH;
5151     }
5152    
5153     for (i=0;i<VREGS;i++) {
5154     if (i<16) { /* First 16 registers map to 68k registers */
5155     live.state[i].mem=((uae_u32*)&regs)+i;
5156     live.state[i].needflush=NF_TOMEM;
5157     set_status(i,INMEM);
5158     }
5159     else
5160     live.state[i].mem=scratch.regs+i;
5161     }
5162     live.state[PC_P].mem=(uae_u32*)&(regs.pc_p);
5163     live.state[PC_P].needflush=NF_TOMEM;
5164 gbeauche 1.24 set_const(PC_P,(uintptr)comp_pc_p);
5165 gbeauche 1.1
5166 gbeauche 1.24 live.state[FLAGX].mem=(uae_u32*)&(regflags.x);
5167 gbeauche 1.1 live.state[FLAGX].needflush=NF_TOMEM;
5168     set_status(FLAGX,INMEM);
5169    
5170 gbeauche 1.24 live.state[FLAGTMP].mem=(uae_u32*)&(regflags.cznv);
5171 gbeauche 1.1 live.state[FLAGTMP].needflush=NF_TOMEM;
5172     set_status(FLAGTMP,INMEM);
5173    
5174     live.state[NEXT_HANDLER].needflush=NF_HANDLER;
5175     set_status(NEXT_HANDLER,UNDEF);
5176    
5177     for (i=0;i<VFREGS;i++) {
5178     if (i<8) { /* First 8 registers map to 68k FPU registers */
5179     live.fate[i].mem=(uae_u32*)fpu_register_address(i);
5180     live.fate[i].needflush=NF_TOMEM;
5181     live.fate[i].status=INMEM;
5182     }
5183     else if (i==FP_RESULT) {
5184     live.fate[i].mem=(uae_u32*)(&fpu.result);
5185     live.fate[i].needflush=NF_TOMEM;
5186     live.fate[i].status=INMEM;
5187     }
5188     else
5189 gbeauche 1.25 live.fate[i].mem=(uae_u32*)(&scratch.fregs[i]);
5190 gbeauche 1.1 }
5191    
5192    
5193     for (i=0;i<N_REGS;i++) {
5194     live.nat[i].touched=0;
5195     live.nat[i].nholds=0;
5196     live.nat[i].locked=0;
5197     if (*cb==i) {
5198     live.nat[i].canbyte=1; cb++;
5199     } else live.nat[i].canbyte=0;
5200     if (*cw==i) {
5201     live.nat[i].canword=1; cw++;
5202     } else live.nat[i].canword=0;
5203     if (*au==i) {
5204     live.nat[i].locked=1; au++;
5205     }
5206     }
5207    
5208     for (i=0;i<N_FREGS;i++) {
5209     live.fat[i].touched=0;
5210     live.fat[i].nholds=0;
5211     live.fat[i].locked=0;
5212     }
5213    
5214     touchcnt=1;
5215     m68k_pc_offset=0;
5216     live.flags_in_flags=TRASH;
5217     live.flags_on_stack=VALID;
5218     live.flags_are_important=1;
5219    
5220     raw_fp_init();
5221     }
5222    
5223     /* Only do this if you really mean it! The next call should be to init!*/
5224     void flush(int save_regs)
5225     {
5226     int fi,i;
5227    
5228     log_flush();
5229     flush_flags(); /* low level */
5230     sync_m68k_pc(); /* mid level */
5231    
5232     if (save_regs) {
5233     for (i=0;i<VFREGS;i++) {
5234     if (live.fate[i].needflush==NF_SCRATCH ||
5235     live.fate[i].status==CLEAN) {
5236     f_disassociate(i);
5237     }
5238     }
5239     for (i=0;i<VREGS;i++) {
5240     if (live.state[i].needflush==NF_TOMEM) {
5241     switch(live.state[i].status) {
5242     case INMEM:
5243     if (live.state[i].val) {
5244 gbeauche 1.24 raw_add_l_mi((uintptr)live.state[i].mem,live.state[i].val);
5245 gbeauche 1.1 log_vwrite(i);
5246     live.state[i].val=0;
5247     }
5248     break;
5249     case CLEAN:
5250     case DIRTY:
5251     remove_offset(i,-1); tomem(i); break;
5252     case ISCONST:
5253     if (i!=PC_P)
5254     writeback_const(i);
5255     break;
5256     default: break;
5257     }
5258     Dif (live.state[i].val && i!=PC_P) {
5259     write_log("Register %d still has val %x\n",
5260     i,live.state[i].val);
5261     }
5262     }
5263     }
5264     for (i=0;i<VFREGS;i++) {
5265     if (live.fate[i].needflush==NF_TOMEM &&
5266     live.fate[i].status==DIRTY) {
5267     f_evict(i);
5268     }
5269     }
5270     raw_fp_cleanup_drop();
5271     }
5272     if (needflags) {
5273     write_log("Warning! flush with needflags=1!\n");
5274     }
5275     }
5276    
5277     static void flush_keepflags(void)
5278     {
5279     int fi,i;
5280    
5281     for (i=0;i<VFREGS;i++) {
5282     if (live.fate[i].needflush==NF_SCRATCH ||
5283     live.fate[i].status==CLEAN) {
5284     f_disassociate(i);
5285     }
5286     }
5287     for (i=0;i<VREGS;i++) {
5288     if (live.state[i].needflush==NF_TOMEM) {
5289     switch(live.state[i].status) {
5290     case INMEM:
5291     /* Can't adjust the offset here --- that needs "add" */
5292     break;
5293     case CLEAN:
5294     case DIRTY:
5295     remove_offset(i,-1); tomem(i); break;
5296     case ISCONST:
5297     if (i!=PC_P)
5298     writeback_const(i);
5299     break;
5300     default: break;
5301     }
5302     }
5303     }
5304     for (i=0;i<VFREGS;i++) {
5305     if (live.fate[i].needflush==NF_TOMEM &&
5306     live.fate[i].status==DIRTY) {
5307     f_evict(i);
5308     }
5309     }
5310     raw_fp_cleanup_drop();
5311     }
5312    
5313     void freescratch(void)
5314     {
5315     int i;
5316     for (i=0;i<N_REGS;i++)
5317     if (live.nat[i].locked && i!=4)
5318     write_log("Warning! %d is locked\n",i);
5319    
5320     for (i=0;i<VREGS;i++)
5321     if (live.state[i].needflush==NF_SCRATCH) {
5322     forget_about(i);
5323     }
5324    
5325     for (i=0;i<VFREGS;i++)
5326     if (live.fate[i].needflush==NF_SCRATCH) {
5327     f_forget_about(i);
5328     }
5329     }
5330    
5331     /********************************************************************
5332     * Support functions, internal *
5333     ********************************************************************/
5334    
5335    
5336     static void align_target(uae_u32 a)
5337     {
5338 gbeauche 1.14 if (!a)
5339     return;
5340    
5341 gbeauche 1.12 if (tune_nop_fillers)
5342 gbeauche 1.24 raw_emit_nop_filler(a - (((uintptr)target) & (a - 1)));
5343 gbeauche 1.12 else {
5344     /* Fill with NOPs --- makes debugging with gdb easier */
5345 gbeauche 1.24 while ((uintptr)target&(a-1))
5346 gbeauche 1.12 *target++=0x90;
5347     }
5348 gbeauche 1.1 }
5349    
5350     static __inline__ int isinrom(uintptr addr)
5351     {
5352     return ((addr >= (uintptr)ROMBaseHost) && (addr < (uintptr)ROMBaseHost + ROMSize));
5353     }
5354    
5355     static void flush_all(void)
5356     {
5357     int i;
5358    
5359     log_flush();
5360     for (i=0;i<VREGS;i++)
5361     if (live.state[i].status==DIRTY) {
5362     if (!call_saved[live.state[i].realreg]) {
5363     tomem(i);
5364     }
5365     }
5366     for (i=0;i<VFREGS;i++)
5367     if (f_isinreg(i))
5368     f_evict(i);
5369     raw_fp_cleanup_drop();
5370     }
5371    
5372     /* Make sure all registers that will get clobbered by a call are
5373     save and sound in memory */
5374     static void prepare_for_call_1(void)
5375     {
5376     flush_all(); /* If there are registers that don't get clobbered,
5377     * we should be a bit more selective here */
5378     }
5379    
5380     /* We will call a C routine in a moment. That will clobber all registers,
5381     so we need to disassociate everything */
5382     static void prepare_for_call_2(void)
5383     {
5384     int i;
5385     for (i=0;i<N_REGS;i++)
5386     if (!call_saved[i] && live.nat[i].nholds>0)
5387     free_nreg(i);
5388    
5389     for (i=0;i<N_FREGS;i++)
5390     if (live.fat[i].nholds>0)
5391     f_free_nreg(i);
5392    
5393     live.flags_in_flags=TRASH; /* Note: We assume we already rescued the
5394     flags at the very start of the call_r
5395     functions! */
5396     }
5397    
5398     /********************************************************************
5399     * Memory access and related functions, CREATE time *
5400     ********************************************************************/
5401    
5402     void register_branch(uae_u32 not_taken, uae_u32 taken, uae_u8 cond)
5403     {
5404     next_pc_p=not_taken;
5405     taken_pc_p=taken;
5406     branch_cc=cond;
5407     }
5408    
5409    
5410     static uae_u32 get_handler_address(uae_u32 addr)
5411     {
5412     uae_u32 cl=cacheline(addr);
5413 gbeauche 1.24 blockinfo* bi=get_blockinfo_addr_new((void*)(uintptr)addr,0);
5414     return (uintptr)&(bi->direct_handler_to_use);
5415 gbeauche 1.1 }
5416    
5417     static uae_u32 get_handler(uae_u32 addr)
5418     {
5419     uae_u32 cl=cacheline(addr);
5420 gbeauche 1.24 blockinfo* bi=get_blockinfo_addr_new((void*)(uintptr)addr,0);
5421     return (uintptr)bi->direct_handler_to_use;
5422 gbeauche 1.1 }
5423    
5424     static void load_handler(int reg, uae_u32 addr)
5425     {
5426     mov_l_rm(reg,get_handler_address(addr));
5427     }
5428    
5429     /* This version assumes that it is writing *real* memory, and *will* fail
5430     * if that assumption is wrong! No branches, no second chances, just
5431     * straight go-for-it attitude */
5432    
5433 gbeauche 1.24 static void writemem_real(int address, int source, int size, int tmp, int clobber)
5434 gbeauche 1.1 {
5435     int f=tmp;
5436    
5437     if (clobber)
5438     f=source;
5439 gbeauche 1.24
5440     #if SIZEOF_VOID_P == 8
5441 gbeauche 1.26 if (!ThirtyThreeBitAddressing)
5442     sign_extend_32_rr(address, address);
5443 gbeauche 1.24 #endif
5444    
5445 gbeauche 1.1 switch(size) {
5446     case 1: mov_b_bRr(address,source,MEMBaseDiff); break;
5447     case 2: mov_w_rr(f,source); bswap_16(f); mov_w_bRr(address,f,MEMBaseDiff); break;
5448     case 4: mov_l_rr(f,source); bswap_32(f); mov_l_bRr(address,f,MEMBaseDiff); break;
5449     }
5450     forget_about(tmp);
5451     forget_about(f);
5452     }
5453    
5454     void writebyte(int address, int source, int tmp)
5455     {
5456 gbeauche 1.24 writemem_real(address,source,1,tmp,0);
5457 gbeauche 1.1 }
5458    
5459     static __inline__ void writeword_general(int address, int source, int tmp,
5460     int clobber)
5461     {
5462 gbeauche 1.24 writemem_real(address,source,2,tmp,clobber);
5463 gbeauche 1.1 }
5464    
5465     void writeword_clobber(int address, int source, int tmp)
5466     {
5467     writeword_general(address,source,tmp,1);
5468     }
5469    
5470     void writeword(int address, int source, int tmp)
5471     {
5472     writeword_general(address,source,tmp,0);
5473     }
5474    
5475     static __inline__ void writelong_general(int address, int source, int tmp,
5476     int clobber)
5477     {
5478 gbeauche 1.24 writemem_real(address,source,4,tmp,clobber);
5479 gbeauche 1.1 }
5480    
5481     void writelong_clobber(int address, int source, int tmp)
5482     {
5483     writelong_general(address,source,tmp,1);
5484     }
5485    
5486     void writelong(int address, int source, int tmp)
5487     {
5488     writelong_general(address,source,tmp,0);
5489     }
5490    
5491    
5492    
5493     /* This version assumes that it is reading *real* memory, and *will* fail
5494     * if that assumption is wrong! No branches, no second chances, just
5495     * straight go-for-it attitude */
5496    
5497 gbeauche 1.24 static void readmem_real(int address, int dest, int size, int tmp)
5498 gbeauche 1.1 {
5499     int f=tmp;
5500    
5501     if (size==4 && address!=dest)
5502     f=dest;
5503    
5504 gbeauche 1.24 #if SIZEOF_VOID_P == 8
5505 gbeauche 1.26 if (!ThirtyThreeBitAddressing)
5506     sign_extend_32_rr(address, address);
5507 gbeauche 1.24 #endif
5508    
5509 gbeauche 1.1 switch(size) {
5510     case 1: mov_b_brR(dest,address,MEMBaseDiff); break;
5511     case 2: mov_w_brR(dest,address,MEMBaseDiff); bswap_16(dest); break;
5512     case 4: mov_l_brR(dest,address,MEMBaseDiff); bswap_32(dest); break;
5513     }
5514     forget_about(tmp);
5515     }
5516    
5517     void readbyte(int address, int dest, int tmp)
5518     {
5519 gbeauche 1.24 readmem_real(address,dest,1,tmp);
5520 gbeauche 1.1 }
5521    
5522     void readword(int address, int dest, int tmp)
5523     {
5524 gbeauche 1.24 readmem_real(address,dest,2,tmp);
5525 gbeauche 1.1 }
5526    
5527     void readlong(int address, int dest, int tmp)
5528     {
5529 gbeauche 1.24 readmem_real(address,dest,4,tmp);
5530 gbeauche 1.1 }
5531    
5532     void get_n_addr(int address, int dest, int tmp)
5533     {
5534     // a is the register containing the virtual address
5535     // after the offset had been fetched
5536     int a=tmp;
5537    
5538     // f is the register that will contain the offset
5539     int f=tmp;
5540    
5541     // a == f == tmp if (address == dest)
5542     if (address!=dest) {
5543     a=address;
5544     f=dest;
5545     }
5546    
5547     #if REAL_ADDRESSING
5548     mov_l_rr(dest, address);
5549     #elif DIRECT_ADDRESSING
5550     lea_l_brr(dest,address,MEMBaseDiff);
5551     #endif
5552     forget_about(tmp);
5553     }
5554    
5555     void get_n_addr_jmp(int address, int dest, int tmp)
5556     {
5557     /* For this, we need to get the same address as the rest of UAE
5558     would --- otherwise we end up translating everything twice */
5559     get_n_addr(address,dest,tmp);
5560     }
5561    
5562    
5563     /* base is a register, but dp is an actual value.
5564     target is a register, as is tmp */
5565     void calc_disp_ea_020(int base, uae_u32 dp, int target, int tmp)
5566     {
5567     int reg = (dp >> 12) & 15;
5568     int regd_shift=(dp >> 9) & 3;
5569    
5570     if (dp & 0x100) {
5571     int ignorebase=(dp&0x80);
5572     int ignorereg=(dp&0x40);
5573     int addbase=0;
5574     int outer=0;
5575    
5576     if ((dp & 0x30) == 0x20) addbase = (uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
5577     if ((dp & 0x30) == 0x30) addbase = comp_get_ilong((m68k_pc_offset+=4)-4);
5578    
5579     if ((dp & 0x3) == 0x2) outer = (uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
5580     if ((dp & 0x3) == 0x3) outer = comp_get_ilong((m68k_pc_offset+=4)-4);
5581    
5582     if ((dp & 0x4) == 0) { /* add regd *before* the get_long */
5583     if (!ignorereg) {
5584     if ((dp & 0x800) == 0)
5585     sign_extend_16_rr(target,reg);
5586     else
5587     mov_l_rr(target,reg);
5588     shll_l_ri(target,regd_shift);
5589     }
5590     else
5591     mov_l_ri(target,0);
5592    
5593     /* target is now regd */
5594     if (!ignorebase)
5595     add_l(target,base);
5596     add_l_ri(target,addbase);
5597     if (dp&0x03) readlong(target,target,tmp);
5598     } else { /* do the getlong first, then add regd */
5599     if (!ignorebase) {
5600     mov_l_rr(target,base);
5601     add_l_ri(target,addbase);
5602     }
5603     else
5604     mov_l_ri(target,addbase);
5605     if (dp&0x03) readlong(target,target,tmp);
5606    
5607     if (!ignorereg) {
5608     if ((dp & 0x800) == 0)
5609     sign_extend_16_rr(tmp,reg);
5610     else
5611     mov_l_rr(tmp,reg);
5612     shll_l_ri(tmp,regd_shift);
5613     /* tmp is now regd */
5614     add_l(target,tmp);
5615     }
5616     }
5617     add_l_ri(target,outer);
5618     }
5619     else { /* 68000 version */
5620     if ((dp & 0x800) == 0) { /* Sign extend */
5621     sign_extend_16_rr(target,reg);
5622     lea_l_brr_indexed(target,base,target,1<<regd_shift,(uae_s32)((uae_s8)dp));
5623     }
5624     else {
5625     lea_l_brr_indexed(target,base,reg,1<<regd_shift,(uae_s32)((uae_s8)dp));
5626     }
5627     }
5628     forget_about(tmp);
5629     }
5630    
5631    
5632    
5633    
5634    
5635     void set_cache_state(int enabled)
5636     {
5637     if (enabled!=letit)
5638     flush_icache_hard(77);
5639     letit=enabled;
5640     }
5641    
5642     int get_cache_state(void)
5643     {
5644     return letit;
5645     }
5646    
5647     uae_u32 get_jitted_size(void)
5648     {
5649     if (compiled_code)
5650     return current_compile_p-compiled_code;
5651     return 0;
5652     }
5653    
5654 gbeauche 1.20 const int CODE_ALLOC_MAX_ATTEMPTS = 10;
5655     const int CODE_ALLOC_BOUNDARIES = 128 * 1024; // 128 KB
5656    
5657     static uint8 *do_alloc_code(uint32 size, int depth)
5658     {
5659     #if defined(__linux__) && 0
5660     /*
5661     This is a really awful hack that is known to work on Linux at
5662     least.
5663    
5664     The trick here is to make sure the allocated cache is nearby
5665     code segment, and more precisely in the positive half of a
5666     32-bit address space. i.e. addr < 0x80000000. Actually, it
5667     turned out that a 32-bit binary run on AMD64 yields a cache
5668     allocated around 0xa0000000, thus causing some troubles when
5669     translating addresses from m68k to x86.
5670     */
5671     static uint8 * code_base = NULL;
5672     if (code_base == NULL) {
5673     uintptr page_size = getpagesize();
5674     uintptr boundaries = CODE_ALLOC_BOUNDARIES;
5675     if (boundaries < page_size)
5676     boundaries = page_size;
5677     code_base = (uint8 *)sbrk(0);
5678     for (int attempts = 0; attempts < CODE_ALLOC_MAX_ATTEMPTS; attempts++) {
5679     if (vm_acquire_fixed(code_base, size) == 0) {
5680     uint8 *code = code_base;
5681     code_base += size;
5682     return code;
5683     }
5684     code_base += boundaries;
5685     }
5686     return NULL;
5687     }
5688    
5689     if (vm_acquire_fixed(code_base, size) == 0) {
5690     uint8 *code = code_base;
5691     code_base += size;
5692     return code;
5693     }
5694    
5695     if (depth >= CODE_ALLOC_MAX_ATTEMPTS)
5696     return NULL;
5697    
5698     return do_alloc_code(size, depth + 1);
5699     #else
5700     uint8 *code = (uint8 *)vm_acquire(size);
5701     return code == VM_MAP_FAILED ? NULL : code;
5702     #endif
5703     }
5704    
5705     static inline uint8 *alloc_code(uint32 size)
5706     {
5707     return do_alloc_code(size, 0);
5708     }
5709    
5710 gbeauche 1.1 void alloc_cache(void)
5711     {
5712     if (compiled_code) {
5713     flush_icache_hard(6);
5714 gbeauche 1.3 vm_release(compiled_code, cache_size * 1024);
5715 gbeauche 1.1 compiled_code = 0;
5716     }
5717    
5718     if (cache_size == 0)
5719     return;
5720    
5721     while (!compiled_code && cache_size) {
5722 gbeauche 1.20 if ((compiled_code = alloc_code(cache_size * 1024)) == NULL) {
5723 gbeauche 1.1 compiled_code = 0;
5724     cache_size /= 2;
5725     }
5726     }
5727 gbeauche 1.25 vm_protect(compiled_code, cache_size * 1024, VM_PAGE_READ | VM_PAGE_WRITE | VM_PAGE_EXECUTE);
5728 gbeauche 1.1
5729     if (compiled_code) {
5730     write_log("<JIT compiler> : actual translation cache size : %d KB at 0x%08X\n", cache_size, compiled_code);
5731     max_compile_start = compiled_code + cache_size*1024 - BYTES_PER_INST;
5732     current_compile_p = compiled_code;
5733     current_cache_size = 0;
5734     }
5735     }
5736    
5737    
5738    
5739 gbeauche 1.13 extern void op_illg_1 (uae_u32 opcode) REGPARAM;
5740 gbeauche 1.1
5741 gbeauche 1.8 static void calc_checksum(blockinfo* bi, uae_u32* c1, uae_u32* c2)
5742 gbeauche 1.1 {
5743 gbeauche 1.8 uae_u32 k1 = 0;
5744     uae_u32 k2 = 0;
5745    
5746     #if USE_CHECKSUM_INFO
5747     checksum_info *csi = bi->csi;
5748     Dif(!csi) abort();
5749     while (csi) {
5750     uae_s32 len = csi->length;
5751 gbeauche 1.24 uintptr tmp = (uintptr)csi->start_p;
5752 gbeauche 1.8 #else
5753     uae_s32 len = bi->len;
5754 gbeauche 1.24 uintptr tmp = (uintptr)bi->min_pcp;
5755 gbeauche 1.8 #endif
5756     uae_u32*pos;
5757 gbeauche 1.1
5758 gbeauche 1.8 len += (tmp & 3);
5759 gbeauche 1.24 tmp &= ~((uintptr)3);
5760 gbeauche 1.8 pos = (uae_u32 *)tmp;
5761    
5762     if (len >= 0 && len <= MAX_CHECKSUM_LEN) {
5763     while (len > 0) {
5764     k1 += *pos;
5765     k2 ^= *pos;
5766     pos++;
5767     len -= 4;
5768     }
5769     }
5770 gbeauche 1.1
5771 gbeauche 1.8 #if USE_CHECKSUM_INFO
5772     csi = csi->next;
5773 gbeauche 1.1 }
5774 gbeauche 1.8 #endif
5775    
5776     *c1 = k1;
5777     *c2 = k2;
5778 gbeauche 1.1 }
5779    
5780 gbeauche 1.8 #if 0
5781 gbeauche 1.7 static void show_checksum(CSI_TYPE* csi)
5782 gbeauche 1.1 {
5783     uae_u32 k1=0;
5784     uae_u32 k2=0;
5785 gbeauche 1.7 uae_s32 len=CSI_LENGTH(csi);
5786 gbeauche 1.24 uae_u32 tmp=(uintptr)CSI_START_P(csi);
5787 gbeauche 1.1 uae_u32* pos;
5788    
5789     len+=(tmp&3);
5790     tmp&=(~3);
5791     pos=(uae_u32*)tmp;
5792    
5793     if (len<0 || len>MAX_CHECKSUM_LEN) {
5794     return;
5795     }
5796     else {
5797     while (len>0) {
5798     write_log("%08x ",*pos);
5799     pos++;
5800     len-=4;
5801     }
5802     write_log(" bla\n");
5803     }
5804     }
5805 gbeauche 1.8 #endif
5806 gbeauche 1.1
5807    
5808     int check_for_cache_miss(void)
5809     {
5810     blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5811    
5812     if (bi) {
5813     int cl=cacheline(regs.pc_p);
5814     if (bi!=cache_tags[cl+1].bi) {
5815     raise_in_cl_list(bi);
5816     return 1;
5817     }
5818     }
5819     return 0;
5820     }
5821    
5822    
5823     static void recompile_block(void)
5824     {
5825     /* An existing block's countdown code has expired. We need to make
5826     sure that execute_normal doesn't refuse to recompile due to a
5827     perceived cache miss... */
5828     blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5829    
5830     Dif (!bi)
5831     abort();
5832     raise_in_cl_list(bi);
5833     execute_normal();
5834     return;
5835     }
5836     static void cache_miss(void)
5837     {
5838     blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5839     uae_u32 cl=cacheline(regs.pc_p);
5840     blockinfo* bi2=get_blockinfo(cl);
5841    
5842     if (!bi) {
5843     execute_normal(); /* Compile this block now */
5844     return;
5845     }
5846     Dif (!bi2 || bi==bi2) {
5847     write_log("Unexplained cache miss %p %p\n",bi,bi2);
5848     abort();
5849     }
5850     raise_in_cl_list(bi);
5851     return;
5852     }
5853    
5854     static int called_check_checksum(blockinfo* bi);
5855    
5856     static inline int block_check_checksum(blockinfo* bi)
5857     {
5858     uae_u32 c1,c2;
5859 gbeauche 1.7 bool isgood;
5860 gbeauche 1.1
5861     if (bi->status!=BI_NEED_CHECK)
5862     return 1; /* This block is in a checked state */
5863    
5864     checksum_count++;
5865 gbeauche 1.7
5866 gbeauche 1.1 if (bi->c1 || bi->c2)
5867     calc_checksum(bi,&c1,&c2);
5868     else {
5869     c1=c2=1; /* Make sure it doesn't match */
5870 gbeauche 1.7 }
5871 gbeauche 1.1
5872     isgood=(c1==bi->c1 && c2==bi->c2);
5873 gbeauche 1.7
5874 gbeauche 1.1 if (isgood) {
5875     /* This block is still OK. So we reactivate. Of course, that
5876     means we have to move it into the needs-to-be-flushed list */
5877     bi->handler_to_use=bi->handler;
5878     set_dhtu(bi,bi->direct_handler);
5879     bi->status=BI_CHECKING;
5880     isgood=called_check_checksum(bi);
5881     }
5882     if (isgood) {
5883     /* write_log("reactivate %p/%p (%x %x/%x %x)\n",bi,bi->pc_p,
5884     c1,c2,bi->c1,bi->c2);*/
5885     remove_from_list(bi);
5886     add_to_active(bi);
5887     raise_in_cl_list(bi);
5888     bi->status=BI_ACTIVE;
5889     }
5890     else {
5891     /* This block actually changed. We need to invalidate it,
5892     and set it up to be recompiled */
5893     /* write_log("discard %p/%p (%x %x/%x %x)\n",bi,bi->pc_p,
5894     c1,c2,bi->c1,bi->c2); */
5895     invalidate_block(bi);
5896     raise_in_cl_list(bi);
5897     }
5898     return isgood;
5899     }
5900    
5901     static int called_check_checksum(blockinfo* bi)
5902     {
5903     dependency* x=bi->deplist;
5904     int isgood=1;
5905     int i;
5906    
5907     for (i=0;i<2 && isgood;i++) {
5908     if (bi->dep[i].jmp_off) {
5909     isgood=block_check_checksum(bi->dep[i].target);
5910     }
5911     }
5912     return isgood;
5913     }
5914    
5915     static void check_checksum(void)
5916     {
5917     blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5918     uae_u32 cl=cacheline(regs.pc_p);
5919     blockinfo* bi2=get_blockinfo(cl);
5920    
5921     /* These are not the droids you are looking for... */
5922     if (!bi) {
5923     /* Whoever is the primary target is in a dormant state, but
5924     calling it was accidental, and we should just compile this
5925     new block */
5926     execute_normal();
5927     return;
5928     }
5929     if (bi!=bi2) {
5930     /* The block was hit accidentally, but it does exist. Cache miss */
5931     cache_miss();
5932     return;
5933     }
5934    
5935     if (!block_check_checksum(bi))
5936     execute_normal();
5937     }
5938    
5939     static __inline__ void match_states(blockinfo* bi)
5940     {
5941     int i;
5942     smallstate* s=&(bi->env);
5943    
5944     if (bi->status==BI_NEED_CHECK) {
5945     block_check_checksum(bi);
5946     }
5947     if (bi->status==BI_ACTIVE ||
5948     bi->status==BI_FINALIZING) { /* Deal with the *promises* the
5949     block makes (about not using
5950     certain vregs) */
5951     for (i=0;i<16;i++) {
5952     if (s->virt[i]==L_UNNEEDED) {
5953     // write_log("unneeded reg %d at %p\n",i,target);
5954     COMPCALL(forget_about)(i); // FIXME
5955     }
5956     }
5957     }
5958     flush(1);
5959    
5960     /* And now deal with the *demands* the block makes */
5961     for (i=0;i<N_REGS;i++) {
5962     int v=s->nat[i];
5963     if (v>=0) {
5964     // printf("Loading reg %d into %d at %p\n",v,i,target);
5965     readreg_specific(v,4,i);
5966     // do_load_reg(i,v);
5967     // setlock(i);
5968     }
5969     }
5970     for (i=0;i<N_REGS;i++) {
5971     int v=s->nat[i];
5972     if (v>=0) {
5973     unlock2(i);
5974     }
5975     }
5976     }
5977    
5978     static __inline__ void create_popalls(void)
5979     {
5980     int i,r;
5981    
5982 gbeauche 1.24 if ((popallspace = alloc_code(POPALLSPACE_SIZE)) == NULL) {
5983     write_log("FATAL: Could not allocate popallspace!\n");
5984     abort();
5985     }
5986     vm_protect(popallspace, POPALLSPACE_SIZE, VM_PAGE_READ | VM_PAGE_WRITE);
5987    
5988 gbeauche 1.1 current_compile_p=popallspace;
5989     set_target(current_compile_p);
5990     #if USE_PUSH_POP
5991     /* If we can't use gcc inline assembly, we need to pop some
5992     registers before jumping back to the various get-out routines.
5993     This generates the code for it.
5994     */
5995 gbeauche 1.5 align_target(align_jumps);
5996     popall_do_nothing=get_target();
5997 gbeauche 1.1 for (i=0;i<N_REGS;i++) {
5998     if (need_to_preserve[i])
5999     raw_pop_l_r(i);
6000     }
6001 gbeauche 1.24 raw_jmp((uintptr)do_nothing);
6002 gbeauche 1.1
6003 gbeauche 1.5 align_target(align_jumps);
6004 gbeauche 1.1 popall_execute_normal=get_target();
6005     for (i=0;i<N_REGS;i++) {
6006     if (need_to_preserve[i])
6007     raw_pop_l_r(i);
6008     }
6009 gbeauche 1.24 raw_jmp((uintptr)execute_normal);
6010 gbeauche 1.1
6011 gbeauche 1.5 align_target(align_jumps);
6012 gbeauche 1.1 popall_cache_miss=get_target();
6013     for (i=0;i<N_REGS;i++) {
6014     if (need_to_preserve[i])
6015     raw_pop_l_r(i);
6016     }
6017 gbeauche 1.24 raw_jmp((uintptr)cache_miss);
6018 gbeauche 1.1
6019 gbeauche 1.5 align_target(align_jumps);
6020 gbeauche 1.1 popall_recompile_block=get_target();
6021     for (i=0;i<N_REGS;i++) {
6022     if (need_to_preserve[i])
6023     raw_pop_l_r(i);
6024     }
6025 gbeauche 1.24 raw_jmp((uintptr)recompile_block);
6026 gbeauche 1.5
6027     align_target(align_jumps);
6028 gbeauche 1.1 popall_exec_nostats=get_target();
6029     for (i=0;i<N_REGS;i++) {
6030     if (need_to_preserve[i])
6031     raw_pop_l_r(i);
6032     }
6033 gbeauche 1.24 raw_jmp((uintptr)exec_nostats);
6034 gbeauche 1.5
6035     align_target(align_jumps);
6036 gbeauche 1.1 popall_check_checksum=get_target();
6037     for (i=0;i<N_REGS;i++) {
6038     if (need_to_preserve[i])
6039     raw_pop_l_r(i);
6040     }
6041 gbeauche 1.24 raw_jmp((uintptr)check_checksum);
6042 gbeauche 1.5
6043     align_target(align_jumps);
6044 gbeauche 1.1 current_compile_p=get_target();
6045     #else
6046     popall_exec_nostats=(void *)exec_nostats;
6047     popall_execute_normal=(void *)execute_normal;
6048     popall_cache_miss=(void *)cache_miss;
6049     popall_recompile_block=(void *)recompile_block;
6050     popall_do_nothing=(void *)do_nothing;
6051     popall_check_checksum=(void *)check_checksum;
6052     #endif
6053    
6054     /* And now, the code to do the matching pushes and then jump
6055     into a handler routine */
6056     pushall_call_handler=get_target();
6057     #if USE_PUSH_POP
6058     for (i=N_REGS;i--;) {
6059     if (need_to_preserve[i])
6060     raw_push_l_r(i);
6061     }
6062     #endif
6063     r=REG_PC_TMP;
6064 gbeauche 1.24 raw_mov_l_rm(r,(uintptr)&regs.pc_p);
6065 gbeauche 1.1 raw_and_l_ri(r,TAGMASK);
6066 gbeauche 1.24 raw_jmp_m_indexed((uintptr)cache_tags,r,SIZEOF_VOID_P);
6067 gbeauche 1.6
6068 gbeauche 1.24 #if defined(X86_ASSEMBLY) || defined(X86_64_ASSEMBLY)
6069 gbeauche 1.6 align_target(align_jumps);
6070     m68k_compile_execute = (void (*)(void))get_target();
6071     for (i=N_REGS;i--;) {
6072     if (need_to_preserve[i])
6073     raw_push_l_r(i);
6074     }
6075     align_target(align_loops);
6076 gbeauche 1.24 uae_u32 dispatch_loop = (uintptr)get_target();
6077 gbeauche 1.6 r=REG_PC_TMP;
6078 gbeauche 1.24 raw_mov_l_rm(r,(uintptr)&regs.pc_p);
6079 gbeauche 1.6 raw_and_l_ri(r,TAGMASK);
6080 gbeauche 1.24 raw_call_m_indexed((uintptr)cache_tags,r,SIZEOF_VOID_P);
6081     raw_cmp_l_mi((uintptr)&regs.spcflags,0);
6082 gbeauche 1.6 raw_jcc_b_oponly(NATIVE_CC_EQ);
6083 gbeauche 1.24 emit_byte(dispatch_loop-((uintptr)get_target()+1));
6084     raw_call((uintptr)m68k_do_specialties);
6085 gbeauche 1.6 raw_test_l_rr(REG_RESULT,REG_RESULT);
6086     raw_jcc_b_oponly(NATIVE_CC_EQ);
6087 gbeauche 1.24 emit_byte(dispatch_loop-((uintptr)get_target()+1));
6088     raw_cmp_b_mi((uintptr)&quit_program,0);
6089 gbeauche 1.6 raw_jcc_b_oponly(NATIVE_CC_EQ);
6090 gbeauche 1.24 emit_byte(dispatch_loop-((uintptr)get_target()+1));
6091 gbeauche 1.6 for (i=0;i<N_REGS;i++) {
6092     if (need_to_preserve[i])
6093     raw_pop_l_r(i);
6094     }
6095     raw_ret();
6096     #endif
6097 gbeauche 1.24
6098     // no need to further write into popallspace
6099     vm_protect(popallspace, POPALLSPACE_SIZE, VM_PAGE_READ | VM_PAGE_EXECUTE);
6100 gbeauche 1.1 }
6101    
6102     static __inline__ void reset_lists(void)
6103     {
6104     int i;
6105    
6106     for (i=0;i<MAX_HOLD_BI;i++)
6107     hold_bi[i]=NULL;
6108     active=NULL;
6109     dormant=NULL;
6110     }
6111    
6112     static void prepare_block(blockinfo* bi)
6113     {
6114     int i;
6115    
6116     set_target(current_compile_p);
6117 gbeauche 1.5 align_target(align_jumps);
6118 gbeauche 1.1 bi->direct_pen=(cpuop_func *)get_target();
6119 gbeauche 1.24 raw_mov_l_rm(0,(uintptr)&(bi->pc_p));
6120     raw_mov_l_mr((uintptr)&regs.pc_p,0);
6121     raw_jmp((uintptr)popall_execute_normal);
6122 gbeauche 1.1
6123 gbeauche 1.5 align_target(align_jumps);
6124 gbeauche 1.1 bi->direct_pcc=(cpuop_func *)get_target();
6125 gbeauche 1.24 raw_mov_l_rm(0,(uintptr)&(bi->pc_p));
6126     raw_mov_l_mr((uintptr)&regs.pc_p,0);
6127     raw_jmp((uintptr)popall_check_checksum);
6128 gbeauche 1.1 current_compile_p=get_target();
6129    
6130     bi->deplist=NULL;
6131     for (i=0;i<2;i++) {
6132     bi->dep[i].prev_p=NULL;
6133     bi->dep[i].next=NULL;
6134     }
6135     bi->env=default_ss;
6136     bi->status=BI_INVALID;
6137     bi->havestate=0;
6138     //bi->env=empty_ss;
6139     }
6140    
6141 gbeauche 1.21 // OPCODE is in big endian format, use cft_map() beforehand, if needed.
6142     static inline void reset_compop(int opcode)
6143 gbeauche 1.17 {
6144 gbeauche 1.21 compfunctbl[opcode] = NULL;
6145     nfcompfunctbl[opcode] = NULL;
6146     }
6147    
6148     static int read_opcode(const char *p)
6149     {
6150     int opcode = 0;
6151     for (int i = 0; i < 4; i++) {
6152     int op = p[i];
6153     switch (op) {
6154     case '0': case '1': case '2': case '3': case '4':
6155     case '5': case '6': case '7': case '8': case '9':
6156     opcode = (opcode << 4) | (op - '0');
6157     break;
6158     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
6159     opcode = (opcode << 4) | ((op - 'a') + 10);
6160     break;
6161     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
6162     opcode = (opcode << 4) | ((op - 'A') + 10);
6163     break;
6164     default:
6165     return -1;
6166     }
6167     }
6168     return opcode;
6169     }
6170    
6171     static bool merge_blacklist()
6172     {
6173     const char *blacklist = PrefsFindString("jitblacklist");
6174     if (blacklist) {
6175     const char *p = blacklist;
6176     for (;;) {
6177     if (*p == 0)
6178     return true;
6179    
6180     int opcode1 = read_opcode(p);
6181     if (opcode1 < 0)
6182     return false;
6183     p += 4;
6184    
6185     int opcode2 = opcode1;
6186     if (*p == '-') {
6187     p++;
6188     opcode2 = read_opcode(p);
6189     if (opcode2 < 0)
6190     return false;
6191     p += 4;
6192     }
6193    
6194     if (*p == 0 || *p == ';') {
6195     write_log("<JIT compiler> : blacklist opcodes : %04x-%04x\n", opcode1, opcode2);
6196     for (int opcode = opcode1; opcode <= opcode2; opcode++)
6197     reset_compop(cft_map(opcode));
6198    
6199     if (*p++ == ';')
6200     continue;
6201    
6202     return true;
6203     }
6204    
6205     return false;
6206     }
6207     }
6208     return true;
6209 gbeauche 1.17 }
6210    
6211 gbeauche 1.1 void build_comp(void)
6212     {
6213     int i;
6214     int jumpcount=0;
6215     unsigned long opcode;
6216     struct comptbl* tbl=op_smalltbl_0_comp_ff;
6217     struct comptbl* nftbl=op_smalltbl_0_comp_nf;
6218     int count;
6219     int cpu_level = 0; // 68000 (default)
6220     if (CPUType == 4)
6221     cpu_level = 4; // 68040 with FPU
6222     else {
6223     if (FPUType)
6224     cpu_level = 3; // 68020 with FPU
6225     else if (CPUType >= 2)
6226     cpu_level = 2; // 68020
6227     else if (CPUType == 1)
6228     cpu_level = 1;
6229     }
6230     struct cputbl *nfctbl = (
6231     cpu_level == 4 ? op_smalltbl_0_nf
6232     : cpu_level == 3 ? op_smalltbl_1_nf
6233     : cpu_level == 2 ? op_smalltbl_2_nf
6234     : cpu_level == 1 ? op_smalltbl_3_nf
6235     : op_smalltbl_4_nf);
6236    
6237     write_log ("<JIT compiler> : building compiler function tables\n");
6238    
6239     for (opcode = 0; opcode < 65536; opcode++) {
6240 gbeauche 1.21 reset_compop(opcode);
6241 gbeauche 1.1 nfcpufunctbl[opcode] = op_illg_1;
6242     prop[opcode].use_flags = 0x1f;
6243     prop[opcode].set_flags = 0x1f;
6244     prop[opcode].cflow = fl_trap; // ILLEGAL instructions do trap
6245     }
6246    
6247     for (i = 0; tbl[i].opcode < 65536; i++) {
6248     int cflow = table68k[tbl[i].opcode].cflow;
6249 gbeauche 1.10 if (USE_INLINING && ((cflow & fl_const_jump) != 0))
6250     cflow = fl_const_jump;
6251 gbeauche 1.8 else
6252 gbeauche 1.10 cflow &= ~fl_const_jump;
6253     prop[cft_map(tbl[i].opcode)].cflow = cflow;
6254 gbeauche 1.1
6255     int uses_fpu = tbl[i].specific & 32;
6256 gbeauche 1.21 if (uses_fpu && avoid_fpu)
6257 gbeauche 1.1 compfunctbl[cft_map(tbl[i].opcode)] = NULL;
6258     else
6259     compfunctbl[cft_map(tbl[i].opcode)] = tbl[i].handler;
6260     }
6261 gbeauche 1.8
6262 gbeauche 1.1 for (i = 0; nftbl[i].opcode < 65536; i++) {
6263     int uses_fpu = tbl[i].specific & 32;
6264 gbeauche 1.21 if (uses_fpu && avoid_fpu)
6265 gbeauche 1.1 nfcompfunctbl[cft_map(nftbl[i].opcode)] = NULL;
6266     else
6267     nfcompfunctbl[cft_map(nftbl[i].opcode)] = nftbl[i].handler;
6268    
6269     nfcpufunctbl[cft_map(nftbl[i].opcode)] = nfctbl[i].handler;
6270     }
6271    
6272     for (i = 0; nfctbl[i].handler; i++) {
6273     nfcpufunctbl[cft_map(nfctbl[i].opcode)] = nfctbl[i].handler;
6274     }
6275    
6276     for (opcode = 0; opcode < 65536; opcode++) {
6277     compop_func *f;
6278     compop_func *nff;
6279     cpuop_func *nfcf;
6280     int isaddx,cflow;
6281    
6282     if (table68k[opcode].mnemo == i_ILLG || table68k[opcode].clev > cpu_level)
6283     continue;
6284    
6285     if (table68k[opcode].handler != -1) {
6286     f = compfunctbl[cft_map(table68k[opcode].handler)];
6287     nff = nfcompfunctbl[cft_map(table68k[opcode].handler)];
6288     nfcf = nfcpufunctbl[cft_map(table68k[opcode].handler)];
6289     cflow = prop[cft_map(table68k[opcode].handler)].cflow;
6290     isaddx = prop[cft_map(table68k[opcode].handler)].is_addx;
6291     prop[cft_map(opcode)].cflow = cflow;
6292     prop[cft_map(opcode)].is_addx = isaddx;
6293     compfunctbl[cft_map(opcode)] = f;
6294     nfcompfunctbl[cft_map(opcode)] = nff;
6295     Dif (nfcf == op_illg_1)
6296     abort();
6297     nfcpufunctbl[cft_map(opcode)] = nfcf;
6298     }
6299     prop[cft_map(opcode)].set_flags = table68k[opcode].flagdead;
6300     prop[cft_map(opcode)].use_flags = table68k[opcode].flaglive;
6301     }
6302     for (i = 0; nfctbl[i].handler != NULL; i++) {
6303     if (nfctbl[i].specific)
6304     nfcpufunctbl[cft_map(tbl[i].opcode)] = nfctbl[i].handler;
6305     }
6306 gbeauche 1.21
6307     /* Merge in blacklist */
6308     if (!merge_blacklist())
6309     write_log("<JIT compiler> : blacklist merge failure!\n");
6310 gbeauche 1.1
6311     count=0;
6312     for (opcode = 0; opcode < 65536; opcode++) {
6313     if (compfunctbl[cft_map(opcode)])
6314     count++;
6315     }
6316     write_log("<JIT compiler> : supposedly %d compileable opcodes!\n",count);
6317    
6318     /* Initialise state */
6319     create_popalls();
6320     alloc_cache();
6321     reset_lists();
6322    
6323     for (i=0;i<TAGSIZE;i+=2) {
6324     cache_tags[i].handler=(cpuop_func *)popall_execute_normal;
6325     cache_tags[i+1].bi=NULL;
6326     }
6327    
6328     #if 0
6329     for (i=0;i<N_REGS;i++) {
6330     empty_ss.nat[i].holds=-1;
6331     empty_ss.nat[i].validsize=0;
6332     empty_ss.nat[i].dirtysize=0;
6333     }
6334     #endif
6335     for (i=0;i<VREGS;i++) {
6336     empty_ss.virt[i]=L_NEEDED;
6337     }
6338     for (i=0;i<N_REGS;i++) {
6339     empty_ss.nat[i]=L_UNKNOWN;
6340     }
6341     default_ss=empty_ss;
6342     }
6343    
6344    
6345     static void flush_icache_none(int n)
6346     {
6347     /* Nothing to do. */
6348     }
6349    
6350     static void flush_icache_hard(int n)
6351     {
6352     uae_u32 i;
6353     blockinfo* bi, *dbi;
6354    
6355     hard_flush_count++;
6356     #if 0
6357     write_log("Flush Icache_hard(%d/%x/%p), %u KB\n",
6358     n,regs.pc,regs.pc_p,current_cache_size/1024);
6359     current_cache_size = 0;
6360     #endif
6361     bi=active;
6362     while(bi) {
6363     cache_tags[cacheline(bi->pc_p)].handler=(cpuop_func *)popall_execute_normal;
6364     cache_tags[cacheline(bi->pc_p)+1].bi=NULL;
6365     dbi=bi; bi=bi->next;
6366     free_blockinfo(dbi);
6367     }
6368     bi=dormant;
6369     while(bi) {
6370     cache_tags[cacheline(bi->pc_p)].handler=(cpuop_func *)popall_execute_normal;
6371     cache_tags[cacheline(bi->pc_p)+1].bi=NULL;
6372     dbi=bi; bi=bi->next;
6373     free_blockinfo(dbi);
6374     }
6375    
6376     reset_lists();
6377     if (!compiled_code)
6378     return;
6379     current_compile_p=compiled_code;
6380     SPCFLAGS_SET( SPCFLAG_JIT_EXEC_RETURN ); /* To get out of compiled code */
6381     }
6382    
6383    
6384     /* "Soft flushing" --- instead of actually throwing everything away,
6385     we simply mark everything as "needs to be checked".
6386     */
6387    
6388     static inline void flush_icache_lazy(int n)
6389     {
6390     uae_u32 i;
6391     blockinfo* bi;
6392     blockinfo* bi2;
6393    
6394     soft_flush_count++;
6395     if (!active)
6396     return;
6397    
6398     bi=active;
6399     while (bi) {
6400     uae_u32 cl=cacheline(bi->pc_p);
6401     if (bi->status==BI_INVALID ||
6402     bi->status==BI_NEED_RECOMP) {
6403     if (bi==cache_tags[cl+1].bi)
6404     cache_tags[cl].handler=(cpuop_func *)popall_execute_normal;
6405     bi->handler_to_use=(cpuop_func *)popall_execute_normal;
6406     set_dhtu(bi,bi->direct_pen);
6407     bi->status=BI_INVALID;
6408     }
6409     else {
6410     if (bi==cache_tags[cl+1].bi)
6411     cache_tags[cl].handler=(cpuop_func *)popall_check_checksum;
6412     bi->handler_to_use=(cpuop_func *)popall_check_checksum;
6413     set_dhtu(bi,bi->direct_pcc);
6414     bi->status=BI_NEED_CHECK;
6415     }
6416     bi2=bi;
6417     bi=bi->next;
6418     }
6419     /* bi2 is now the last entry in the active list */
6420     bi2->next=dormant;
6421     if (dormant)
6422     dormant->prev_p=&(bi2->next);
6423    
6424     dormant=active;
6425     active->prev_p=&dormant;
6426     active=NULL;
6427 gbeauche 1.22 }
6428    
6429     void flush_icache_range(uae_u32 start, uae_u32 length)
6430     {
6431     if (!active)
6432     return;
6433    
6434     #if LAZY_FLUSH_ICACHE_RANGE
6435     uae_u8 *start_p = get_real_address(start);
6436     blockinfo *bi = active;
6437     while (bi) {
6438     #if USE_CHECKSUM_INFO
6439     bool invalidate = false;
6440     for (checksum_info *csi = bi->csi; csi && !invalidate; csi = csi->next)
6441     invalidate = (((start_p - csi->start_p) < csi->length) ||
6442     ((csi->start_p - start_p) < length));
6443     #else
6444     // Assume system is consistent and would invalidate the right range
6445     const bool invalidate = (bi->pc_p - start_p) < length;
6446     #endif
6447     if (invalidate) {
6448     uae_u32 cl = cacheline(bi->pc_p);
6449     if (bi == cache_tags[cl + 1].bi)
6450     cache_tags[cl].handler = (cpuop_func *)popall_execute_normal;
6451     bi->handler_to_use = (cpuop_func *)popall_execute_normal;
6452     set_dhtu(bi, bi->direct_pen);
6453     bi->status = BI_NEED_RECOMP;
6454     }
6455     bi = bi->next;
6456     }
6457     return;
6458     #endif
6459     flush_icache(-1);
6460 gbeauche 1.1 }
6461    
6462     static void catastrophe(void)
6463     {
6464     abort();
6465     }
6466    
6467     int failure;
6468    
6469     #define TARGET_M68K 0
6470     #define TARGET_POWERPC 1
6471     #define TARGET_X86 2
6472 gbeauche 1.24 #define TARGET_X86_64 3
6473 gbeauche 1.1 #if defined(i386) || defined(__i386__)
6474     #define TARGET_NATIVE TARGET_X86
6475     #endif
6476     #if defined(powerpc) || defined(__powerpc__)
6477     #define TARGET_NATIVE TARGET_POWERPC
6478     #endif
6479 gbeauche 1.24 #if defined(x86_64) || defined(__x86_64__)
6480     #define TARGET_NATIVE TARGET_X86_64
6481     #endif
6482 gbeauche 1.1
6483     #ifdef ENABLE_MON
6484 gbeauche 1.24 static uae_u32 mon_read_byte_jit(uintptr addr)
6485 gbeauche 1.1 {
6486     uae_u8 *m = (uae_u8 *)addr;
6487 gbeauche 1.24 return (uintptr)(*m);
6488 gbeauche 1.1 }
6489    
6490 gbeauche 1.24 static void mon_write_byte_jit(uintptr addr, uae_u32 b)
6491 gbeauche 1.1 {
6492     uae_u8 *m = (uae_u8 *)addr;
6493     *m = b;
6494     }
6495     #endif
6496    
6497     void disasm_block(int target, uint8 * start, size_t length)
6498     {
6499     if (!JITDebug)
6500     return;
6501    
6502     #if defined(JIT_DEBUG) && defined(ENABLE_MON)
6503     char disasm_str[200];
6504     sprintf(disasm_str, "%s $%x $%x",
6505     target == TARGET_M68K ? "d68" :
6506     target == TARGET_X86 ? "d86" :
6507 gbeauche 1.24 target == TARGET_X86_64 ? "d8664" :
6508 gbeauche 1.1 target == TARGET_POWERPC ? "d" : "x",
6509     start, start + length - 1);
6510    
6511 gbeauche 1.24 uae_u32 (*old_mon_read_byte)(uintptr) = mon_read_byte;
6512     void (*old_mon_write_byte)(uintptr, uae_u32) = mon_write_byte;
6513 gbeauche 1.1
6514     mon_read_byte = mon_read_byte_jit;
6515     mon_write_byte = mon_write_byte_jit;
6516    
6517     char *arg[5] = {"mon", "-m", "-r", disasm_str, NULL};
6518     mon(4, arg);
6519    
6520     mon_read_byte = old_mon_read_byte;
6521     mon_write_byte = old_mon_write_byte;
6522     #endif
6523     }
6524    
6525 gbeauche 1.24 static void disasm_native_block(uint8 *start, size_t length)
6526 gbeauche 1.1 {
6527     disasm_block(TARGET_NATIVE, start, length);
6528     }
6529    
6530 gbeauche 1.24 static void disasm_m68k_block(uint8 *start, size_t length)
6531 gbeauche 1.1 {
6532     disasm_block(TARGET_M68K, start, length);
6533     }
6534    
6535     #ifdef HAVE_GET_WORD_UNSWAPPED
6536     # define DO_GET_OPCODE(a) (do_get_mem_word_unswapped((uae_u16 *)(a)))
6537     #else
6538     # define DO_GET_OPCODE(a) (do_get_mem_word((uae_u16 *)(a)))
6539     #endif
6540    
6541     #if JIT_DEBUG
6542     static uae_u8 *last_regs_pc_p = 0;
6543     static uae_u8 *last_compiled_block_addr = 0;
6544    
6545     void compiler_dumpstate(void)
6546     {
6547     if (!JITDebug)
6548     return;
6549    
6550     write_log("### Host addresses\n");
6551     write_log("MEM_BASE : %x\n", MEMBaseDiff);
6552     write_log("PC_P : %p\n", &regs.pc_p);
6553     write_log("SPCFLAGS : %p\n", &regs.spcflags);
6554     write_log("D0-D7 : %p-%p\n", &regs.regs[0], &regs.regs[7]);
6555     write_log("A0-A7 : %p-%p\n", &regs.regs[8], &regs.regs[15]);
6556     write_log("\n");
6557    
6558     write_log("### M68k processor state\n");
6559     m68k_dumpstate(0);
6560     write_log("\n");
6561    
6562     write_log("### Block in Mac address space\n");
6563     write_log("M68K block : %p\n",
6564 gbeauche 1.24 (void *)(uintptr)get_virtual_address(last_regs_pc_p));
6565 gbeauche 1.1 write_log("Native block : %p (%d bytes)\n",
6566 gbeauche 1.24 (void *)(uintptr)get_virtual_address(last_compiled_block_addr),
6567 gbeauche 1.1 get_blockinfo_addr(last_regs_pc_p)->direct_handler_size);
6568     write_log("\n");
6569     }
6570     #endif
6571    
6572     static void compile_block(cpu_history* pc_hist, int blocklen)
6573     {
6574     if (letit && compiled_code) {
6575     #if PROFILE_COMPILE_TIME
6576     compile_count++;
6577     clock_t start_time = clock();
6578     #endif
6579     #if JIT_DEBUG
6580     bool disasm_block = false;
6581     #endif
6582    
6583     /* OK, here we need to 'compile' a block */
6584     int i;
6585     int r;
6586     int was_comp=0;
6587     uae_u8 liveflags[MAXRUN+1];
6588 gbeauche 1.8 #if USE_CHECKSUM_INFO
6589     bool trace_in_rom = isinrom((uintptr)pc_hist[0].location);
6590 gbeauche 1.24 uintptr max_pcp=(uintptr)pc_hist[blocklen - 1].location;
6591     uintptr min_pcp=max_pcp;
6592 gbeauche 1.8 #else
6593 gbeauche 1.24 uintptr max_pcp=(uintptr)pc_hist[0].location;
6594     uintptr min_pcp=max_pcp;
6595 gbeauche 1.8 #endif
6596 gbeauche 1.1 uae_u32 cl=cacheline(pc_hist[0].location);
6597     void* specflags=(void*)&regs.spcflags;
6598     blockinfo* bi=NULL;
6599     blockinfo* bi2;
6600     int extra_len=0;
6601    
6602     redo_current_block=0;
6603     if (current_compile_p>=max_compile_start)
6604     flush_icache_hard(7);
6605    
6606     alloc_blockinfos();
6607    
6608     bi=get_blockinfo_addr_new(pc_hist[0].location,0);
6609     bi2=get_blockinfo(cl);
6610    
6611     optlev=bi->optlevel;
6612     if (bi->status!=BI_INVALID) {
6613     Dif (bi!=bi2) {
6614     /* I don't think it can happen anymore. Shouldn't, in
6615     any case. So let's make sure... */
6616     write_log("WOOOWOO count=%d, ol=%d %p %p\n",
6617     bi->count,bi->optlevel,bi->handler_to_use,
6618     cache_tags[cl].handler);
6619     abort();
6620     }
6621    
6622     Dif (bi->count!=-1 && bi->status!=BI_NEED_RECOMP) {
6623     write_log("bi->count=%d, bi->status=%d\n",bi->count,bi->status);
6624     /* What the heck? We are not supposed to be here! */
6625     abort();
6626     }
6627     }
6628     if (bi->count==-1) {
6629     optlev++;
6630     while (!optcount[optlev])
6631     optlev++;
6632     bi->count=optcount[optlev]-1;
6633     }
6634 gbeauche 1.24 current_block_pc_p=(uintptr)pc_hist[0].location;
6635 gbeauche 1.1
6636     remove_deps(bi); /* We are about to create new code */
6637     bi->optlevel=optlev;
6638     bi->pc_p=(uae_u8*)pc_hist[0].location;
6639 gbeauche 1.8 #if USE_CHECKSUM_INFO
6640     free_checksum_info_chain(bi->csi);
6641     bi->csi = NULL;
6642     #endif
6643 gbeauche 1.1
6644     liveflags[blocklen]=0x1f; /* All flags needed afterwards */
6645     i=blocklen;
6646     while (i--) {
6647     uae_u16* currpcp=pc_hist[i].location;
6648     uae_u32 op=DO_GET_OPCODE(currpcp);
6649    
6650 gbeauche 1.8 #if USE_CHECKSUM_INFO
6651     trace_in_rom = trace_in_rom && isinrom((uintptr)currpcp);
6652     #if USE_INLINING
6653     if (is_const_jump(op)) {
6654     checksum_info *csi = alloc_checksum_info();
6655     csi->start_p = (uae_u8 *)min_pcp;
6656     csi->length = max_pcp - min_pcp + LONGEST_68K_INST;
6657     csi->next = bi->csi;
6658     bi->csi = csi;
6659 gbeauche 1.24 max_pcp = (uintptr)currpcp;
6660 gbeauche 1.8 }
6661     #endif
6662 gbeauche 1.24 min_pcp = (uintptr)currpcp;
6663 gbeauche 1.8 #else
6664 gbeauche 1.24 if ((uintptr)currpcp<min_pcp)
6665     min_pcp=(uintptr)currpcp;
6666     if ((uintptr)currpcp>max_pcp)
6667     max_pcp=(uintptr)currpcp;
6668 gbeauche 1.8 #endif
6669 gbeauche 1.1
6670     liveflags[i]=((liveflags[i+1]&
6671     (~prop[op].set_flags))|
6672     prop[op].use_flags);
6673     if (prop[op].is_addx && (liveflags[i+1]&FLAG_Z)==0)
6674     liveflags[i]&= ~FLAG_Z;
6675     }
6676    
6677 gbeauche 1.8 #if USE_CHECKSUM_INFO
6678     checksum_info *csi = alloc_checksum_info();
6679     csi->start_p = (uae_u8 *)min_pcp;
6680     csi->length = max_pcp - min_pcp + LONGEST_68K_INST;
6681     csi->next = bi->csi;
6682     bi->csi = csi;
6683     #endif
6684    
6685 gbeauche 1.1 bi->needed_flags=liveflags[0];
6686    
6687 gbeauche 1.5 align_target(align_loops);
6688 gbeauche 1.1 was_comp=0;
6689    
6690     bi->direct_handler=(cpuop_func *)get_target();
6691     set_dhtu(bi,bi->direct_handler);
6692     bi->status=BI_COMPILING;
6693 gbeauche 1.24 current_block_start_target=(uintptr)get_target();
6694 gbeauche 1.1
6695     log_startblock();
6696    
6697     if (bi->count>=0) { /* Need to generate countdown code */
6698 gbeauche 1.24 raw_mov_l_mi((uintptr)&regs.pc_p,(uintptr)pc_hist[0].location);
6699     raw_sub_l_mi((uintptr)&(bi->count),1);
6700     raw_jl((uintptr)popall_recompile_block);
6701 gbeauche 1.1 }
6702     if (optlev==0) { /* No need to actually translate */
6703     /* Execute normally without keeping stats */
6704 gbeauche 1.24 raw_mov_l_mi((uintptr)&regs.pc_p,(uintptr)pc_hist[0].location);
6705     raw_jmp((uintptr)popall_exec_nostats);
6706 gbeauche 1.1 }
6707     else {
6708     reg_alloc_run=0;
6709     next_pc_p=0;
6710     taken_pc_p=0;
6711     branch_cc=0;
6712    
6713     comp_pc_p=(uae_u8*)pc_hist[0].location;
6714     init_comp();
6715     was_comp=1;
6716    
6717     #if JIT_DEBUG
6718     if (JITDebug) {
6719 gbeauche 1.24 raw_mov_l_mi((uintptr)&last_regs_pc_p,(uintptr)pc_hist[0].location);
6720     raw_mov_l_mi((uintptr)&last_compiled_block_addr,current_block_start_target);
6721 gbeauche 1.1 }
6722     #endif
6723    
6724     for (i=0;i<blocklen &&
6725     get_target_noopt()<max_compile_start;i++) {
6726     cpuop_func **cputbl;
6727     compop_func **comptbl;
6728     uae_u32 opcode=DO_GET_OPCODE(pc_hist[i].location);
6729     needed_flags=(liveflags[i+1] & prop[opcode].set_flags);
6730     if (!needed_flags) {
6731     cputbl=nfcpufunctbl;
6732     comptbl=nfcompfunctbl;
6733     }
6734     else {
6735     cputbl=cpufunctbl;
6736     comptbl=compfunctbl;
6737     }
6738    
6739     failure = 1; // gb-- defaults to failure state
6740     if (comptbl[opcode] && optlev>1) {
6741     failure=0;
6742     if (!was_comp) {
6743     comp_pc_p=(uae_u8*)pc_hist[i].location;
6744     init_comp();
6745     }
6746 gbeauche 1.18 was_comp=1;
6747 gbeauche 1.1
6748     comptbl[opcode](opcode);
6749     freescratch();
6750     if (!(liveflags[i+1] & FLAG_CZNV)) {
6751     /* We can forget about flags */
6752     dont_care_flags();
6753     }
6754     #if INDIVIDUAL_INST
6755     flush(1);
6756     nop();
6757     flush(1);
6758     was_comp=0;
6759     #endif
6760     }
6761    
6762     if (failure) {
6763     if (was_comp) {
6764     flush(1);
6765     was_comp=0;
6766     }
6767     raw_mov_l_ri(REG_PAR1,(uae_u32)opcode);
6768     #if USE_NORMAL_CALLING_CONVENTION
6769     raw_push_l_r(REG_PAR1);
6770     #endif
6771 gbeauche 1.24 raw_mov_l_mi((uintptr)&regs.pc_p,
6772     (uintptr)pc_hist[i].location);
6773     raw_call((uintptr)cputbl[opcode]);
6774 gbeauche 1.9 #if PROFILE_UNTRANSLATED_INSNS
6775     // raw_cputbl_count[] is indexed with plain opcode (in m68k order)
6776 gbeauche 1.24 raw_add_l_mi((uintptr)&raw_cputbl_count[cft_map(opcode)],1);
6777 gbeauche 1.9 #endif
6778 gbeauche 1.1 #if USE_NORMAL_CALLING_CONVENTION
6779     raw_inc_sp(4);
6780     #endif
6781    
6782     if (i < blocklen - 1) {
6783     uae_s8* branchadd;
6784    
6785 gbeauche 1.24 raw_mov_l_rm(0,(uintptr)specflags);
6786 gbeauche 1.1 raw_test_l_rr(0,0);
6787     raw_jz_b_oponly();
6788     branchadd=(uae_s8 *)get_target();
6789     emit_byte(0);
6790 gbeauche 1.24 raw_jmp((uintptr)popall_do_nothing);
6791     *branchadd=(uintptr)get_target()-(uintptr)branchadd-1;
6792 gbeauche 1.1 }
6793     }
6794     }
6795     #if 1 /* This isn't completely kosher yet; It really needs to be
6796     be integrated into a general inter-block-dependency scheme */
6797     if (next_pc_p && taken_pc_p &&
6798     was_comp && taken_pc_p==current_block_pc_p) {
6799     blockinfo* bi1=get_blockinfo_addr_new((void*)next_pc_p,0);
6800     blockinfo* bi2=get_blockinfo_addr_new((void*)taken_pc_p,0);
6801     uae_u8 x=bi1->needed_flags;
6802    
6803     if (x==0xff || 1) { /* To be on the safe side */
6804     uae_u16* next=(uae_u16*)next_pc_p;
6805     uae_u32 op=DO_GET_OPCODE(next);
6806    
6807     x=0x1f;
6808     x&=(~prop[op].set_flags);
6809     x|=prop[op].use_flags;
6810     }
6811    
6812     x|=bi2->needed_flags;
6813     if (!(x & FLAG_CZNV)) {
6814     /* We can forget about flags */
6815     dont_care_flags();
6816     extra_len+=2; /* The next instruction now is part of this
6817     block */
6818     }
6819    
6820     }
6821     #endif
6822     log_flush();
6823    
6824     if (next_pc_p) { /* A branch was registered */
6825 gbeauche 1.24 uintptr t1=next_pc_p;
6826     uintptr t2=taken_pc_p;
6827 gbeauche 1.1 int cc=branch_cc;
6828    
6829     uae_u32* branchadd;
6830     uae_u32* tba;
6831     bigstate tmp;
6832     blockinfo* tbi;
6833    
6834     if (taken_pc_p<next_pc_p) {
6835     /* backward branch. Optimize for the "taken" case ---
6836     which means the raw_jcc should fall through when
6837     the 68k branch is taken. */
6838     t1=taken_pc_p;
6839     t2=next_pc_p;
6840     cc=branch_cc^1;
6841     }
6842    
6843     tmp=live; /* ouch! This is big... */
6844     raw_jcc_l_oponly(cc);
6845     branchadd=(uae_u32*)get_target();
6846     emit_long(0);
6847    
6848     /* predicted outcome */
6849     tbi=get_blockinfo_addr_new((void*)t1,1);
6850     match_states(tbi);
6851 gbeauche 1.24 raw_cmp_l_mi((uintptr)specflags,0);
6852 gbeauche 1.1 raw_jcc_l_oponly(4);
6853     tba=(uae_u32*)get_target();
6854 gbeauche 1.24 emit_long(get_handler(t1)-((uintptr)tba+4));
6855     raw_mov_l_mi((uintptr)&regs.pc_p,t1);
6856 gbeauche 1.28 flush_reg_count();
6857 gbeauche 1.24 raw_jmp((uintptr)popall_do_nothing);
6858 gbeauche 1.1 create_jmpdep(bi,0,tba,t1);
6859    
6860 gbeauche 1.5 align_target(align_jumps);
6861 gbeauche 1.1 /* not-predicted outcome */
6862 gbeauche 1.24 *branchadd=(uintptr)get_target()-((uintptr)branchadd+4);
6863 gbeauche 1.1 live=tmp; /* Ouch again */
6864     tbi=get_blockinfo_addr_new((void*)t2,1);
6865     match_states(tbi);
6866    
6867     //flush(1); /* Can only get here if was_comp==1 */
6868 gbeauche 1.24 raw_cmp_l_mi((uintptr)specflags,0);
6869 gbeauche 1.1 raw_jcc_l_oponly(4);
6870     tba=(uae_u32*)get_target();
6871 gbeauche 1.24 emit_long(get_handler(t2)-((uintptr)tba+4));
6872     raw_mov_l_mi((uintptr)&regs.pc_p,t2);
6873 gbeauche 1.28 flush_reg_count();
6874 gbeauche 1.24 raw_jmp((uintptr)popall_do_nothing);
6875 gbeauche 1.1 create_jmpdep(bi,1,tba,t2);
6876     }
6877     else
6878     {
6879     if (was_comp) {
6880     flush(1);
6881     }
6882 gbeauche 1.28 flush_reg_count();
6883 gbeauche 1.1
6884     /* Let's find out where next_handler is... */
6885     if (was_comp && isinreg(PC_P)) {
6886     r=live.state[PC_P].realreg;
6887     raw_and_l_ri(r,TAGMASK);
6888     int r2 = (r==0) ? 1 : 0;
6889 gbeauche 1.24 raw_mov_l_ri(r2,(uintptr)popall_do_nothing);
6890     raw_cmp_l_mi((uintptr)specflags,0);
6891 gbeauche 1.27 raw_cmov_l_rm_indexed(r2,(uintptr)cache_tags,r,SIZEOF_VOID_P,NATIVE_CC_EQ);
6892 gbeauche 1.1 raw_jmp_r(r2);
6893     }
6894     else if (was_comp && isconst(PC_P)) {
6895     uae_u32 v=live.state[PC_P].val;
6896     uae_u32* tba;
6897     blockinfo* tbi;
6898    
6899 gbeauche 1.24 tbi=get_blockinfo_addr_new((void*)(uintptr)v,1);
6900 gbeauche 1.1 match_states(tbi);
6901    
6902 gbeauche 1.24 raw_cmp_l_mi((uintptr)specflags,0);
6903 gbeauche 1.1 raw_jcc_l_oponly(4);
6904     tba=(uae_u32*)get_target();
6905 gbeauche 1.24 emit_long(get_handler(v)-((uintptr)tba+4));
6906     raw_mov_l_mi((uintptr)&regs.pc_p,v);
6907     raw_jmp((uintptr)popall_do_nothing);
6908 gbeauche 1.1 create_jmpdep(bi,0,tba,v);
6909     }
6910     else {
6911     r=REG_PC_TMP;
6912 gbeauche 1.24 raw_mov_l_rm(r,(uintptr)&regs.pc_p);
6913 gbeauche 1.1 raw_and_l_ri(r,TAGMASK);
6914     int r2 = (r==0) ? 1 : 0;
6915 gbeauche 1.24 raw_mov_l_ri(r2,(uintptr)popall_do_nothing);
6916     raw_cmp_l_mi((uintptr)specflags,0);
6917 gbeauche 1.27 raw_cmov_l_rm_indexed(r2,(uintptr)cache_tags,r,SIZEOF_VOID_P,NATIVE_CC_EQ);
6918 gbeauche 1.1 raw_jmp_r(r2);
6919     }
6920     }
6921     }
6922    
6923     #if USE_MATCH
6924     if (callers_need_recompile(&live,&(bi->env))) {
6925     mark_callers_recompile(bi);
6926     }
6927    
6928     big_to_small_state(&live,&(bi->env));
6929     #endif
6930    
6931 gbeauche 1.8 #if USE_CHECKSUM_INFO
6932     remove_from_list(bi);
6933     if (trace_in_rom) {
6934     // No need to checksum that block trace on cache invalidation
6935     free_checksum_info_chain(bi->csi);
6936     bi->csi = NULL;
6937     add_to_dormant(bi);
6938     }
6939     else {
6940     calc_checksum(bi,&(bi->c1),&(bi->c2));
6941     add_to_active(bi);
6942     }
6943     #else
6944 gbeauche 1.1 if (next_pc_p+extra_len>=max_pcp &&
6945     next_pc_p+extra_len<max_pcp+LONGEST_68K_INST)
6946     max_pcp=next_pc_p+extra_len; /* extra_len covers flags magic */
6947     else
6948     max_pcp+=LONGEST_68K_INST;
6949 gbeauche 1.7
6950 gbeauche 1.1 bi->len=max_pcp-min_pcp;
6951     bi->min_pcp=min_pcp;
6952 gbeauche 1.7
6953 gbeauche 1.1 remove_from_list(bi);
6954     if (isinrom(min_pcp) && isinrom(max_pcp)) {
6955     add_to_dormant(bi); /* No need to checksum it on cache flush.
6956     Please don't start changing ROMs in
6957     flight! */
6958     }
6959     else {
6960     calc_checksum(bi,&(bi->c1),&(bi->c2));
6961     add_to_active(bi);
6962     }
6963 gbeauche 1.8 #endif
6964 gbeauche 1.1
6965     current_cache_size += get_target() - (uae_u8 *)current_compile_p;
6966    
6967     #if JIT_DEBUG
6968     if (JITDebug)
6969     bi->direct_handler_size = get_target() - (uae_u8 *)current_block_start_target;
6970    
6971     if (JITDebug && disasm_block) {
6972     uaecptr block_addr = start_pc + ((char *)pc_hist[0].location - (char *)start_pc_p);
6973     D(bug("M68K block @ 0x%08x (%d insns)\n", block_addr, blocklen));
6974     uae_u32 block_size = ((uae_u8 *)pc_hist[blocklen - 1].location - (uae_u8 *)pc_hist[0].location) + 1;
6975     disasm_m68k_block((uae_u8 *)pc_hist[0].location, block_size);
6976     D(bug("Compiled block @ 0x%08x\n", pc_hist[0].location));
6977     disasm_native_block((uae_u8 *)current_block_start_target, bi->direct_handler_size);
6978     getchar();
6979     }
6980     #endif
6981    
6982     log_dump();
6983 gbeauche 1.5 align_target(align_jumps);
6984 gbeauche 1.1
6985     /* This is the non-direct handler */
6986     bi->handler=
6987     bi->handler_to_use=(cpuop_func *)get_target();
6988 gbeauche 1.24 raw_cmp_l_mi((uintptr)&regs.pc_p,(uintptr)pc_hist[0].location);
6989     raw_jnz((uintptr)popall_cache_miss);
6990 gbeauche 1.1 comp_pc_p=(uae_u8*)pc_hist[0].location;
6991    
6992     bi->status=BI_FINALIZING;
6993     init_comp();
6994     match_states(bi);
6995     flush(1);
6996    
6997 gbeauche 1.24 raw_jmp((uintptr)bi->direct_handler);
6998 gbeauche 1.1
6999     current_compile_p=get_target();
7000     raise_in_cl_list(bi);
7001    
7002     /* We will flush soon, anyway, so let's do it now */
7003     if (current_compile_p>=max_compile_start)
7004     flush_icache_hard(7);
7005    
7006     bi->status=BI_ACTIVE;
7007     if (redo_current_block)
7008     block_need_recompile(bi);
7009    
7010     #if PROFILE_COMPILE_TIME
7011     compile_time += (clock() - start_time);
7012     #endif
7013     }
7014     }
7015    
7016     void do_nothing(void)
7017     {
7018     /* What did you expect this to do? */
7019     }
7020    
7021     void exec_nostats(void)
7022     {
7023     for (;;) {
7024     uae_u32 opcode = GET_OPCODE;
7025     (*cpufunctbl[opcode])(opcode);
7026     if (end_block(opcode) || SPCFLAGS_TEST(SPCFLAG_ALL)) {
7027     return; /* We will deal with the spcflags in the caller */
7028     }
7029     }
7030     }
7031    
7032     void execute_normal(void)
7033     {
7034     if (!check_for_cache_miss()) {
7035     cpu_history pc_hist[MAXRUN];
7036     int blocklen = 0;
7037     #if REAL_ADDRESSING || DIRECT_ADDRESSING
7038     start_pc_p = regs.pc_p;
7039     start_pc = get_virtual_address(regs.pc_p);
7040     #else
7041     start_pc_p = regs.pc_oldp;
7042     start_pc = regs.pc;
7043     #endif
7044     for (;;) { /* Take note: This is the do-it-normal loop */
7045     pc_hist[blocklen++].location = (uae_u16 *)regs.pc_p;
7046     uae_u32 opcode = GET_OPCODE;
7047     #if FLIGHT_RECORDER
7048     m68k_record_step(m68k_getpc());
7049     #endif
7050     (*cpufunctbl[opcode])(opcode);
7051     if (end_block(opcode) || SPCFLAGS_TEST(SPCFLAG_ALL) || blocklen>=MAXRUN) {
7052     compile_block(pc_hist, blocklen);
7053     return; /* We will deal with the spcflags in the caller */
7054     }
7055     /* No need to check regs.spcflags, because if they were set,
7056     we'd have ended up inside that "if" */
7057     }
7058     }
7059     }
7060    
7061     typedef void (*compiled_handler)(void);
7062    
7063 gbeauche 1.24 #if defined(X86_ASSEMBLY) || defined(X86_64_ASSEMBLY)
7064 gbeauche 1.6 void (*m68k_compile_execute)(void) = NULL;
7065     #else
7066 gbeauche 1.1 void m68k_do_compile_execute(void)
7067     {
7068     for (;;) {
7069     ((compiled_handler)(pushall_call_handler))();
7070     /* Whenever we return from that, we should check spcflags */
7071     if (SPCFLAGS_TEST(SPCFLAG_ALL)) {
7072     if (m68k_do_specialties ())
7073     return;
7074     }
7075     }
7076     }
7077 gbeauche 1.6 #endif