ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/compiler/compemu_support.cpp
Revision: 1.34
Committed: 2005-06-11T06:43:24Z (19 years, 1 month ago) by gbeauche
Branch: MAIN
CVS Tags: nigel-build-17
Changes since 1.33: +14 -0 lines
Log Message:
Much improved responsiveness on NetBSD systems.

On those systems, it's really hard to get high resolution timings and the
system oftens fails to honour a timeout in less than 20 ms. The idea here
is to have an average m68k instruction count (countdown quantum) that
triggers real interrupt checks. The quantum is calibrated every 10 ticks
and has a 1000 Hz resolution on average.

File Contents

# User Rev Content
1 gbeauche 1.11 /*
2     * compiler/compemu_support.cpp - Core dynamic translation engine
3     *
4     * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
5     *
6 gbeauche 1.29 * Adaptation for Basilisk II and improvements, copyright 2000-2005
7 gbeauche 1.11 * Gwenole Beauchesne
8     *
9 gbeauche 1.29 * Basilisk II (C) 1997-2005 Christian Bauer
10 gbeauche 1.11 *
11     * This program is free software; you can redistribute it and/or modify
12     * it under the terms of the GNU General Public License as published by
13     * the Free Software Foundation; either version 2 of the License, or
14     * (at your option) any later version.
15     *
16     * This program is distributed in the hope that it will be useful,
17     * but WITHOUT ANY WARRANTY; without even the implied warranty of
18     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19     * GNU General Public License for more details.
20     *
21     * You should have received a copy of the GNU General Public License
22     * along with this program; if not, write to the Free Software
23     * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24     */
25    
26 gbeauche 1.1 #if !REAL_ADDRESSING && !DIRECT_ADDRESSING
27     #error "Only Real or Direct Addressing is supported with the JIT Compiler"
28     #endif
29    
30 gbeauche 1.4 #if X86_ASSEMBLY && !SAHF_SETO_PROFITABLE
31     #error "Only [LS]AHF scheme to [gs]et flags is supported with the JIT Compiler"
32     #endif
33    
34 gbeauche 1.24 /* NOTE: support for AMD64 assumes translation cache and other code
35     * buffers are allocated into a 32-bit address space because (i) B2/JIT
36     * code is not 64-bit clean and (ii) it's faster to resolve branches
37     * that way.
38     */
39     #if !defined(__i386__) && !defined(__x86_64__)
40     #error "Only IA-32 and X86-64 targets are supported with the JIT Compiler"
41     #endif
42    
43 gbeauche 1.1 #define USE_MATCH 0
44    
45     /* kludge for Brian, so he can compile under MSVC++ */
46     #define USE_NORMAL_CALLING_CONVENTION 0
47    
48     #ifndef WIN32
49 gbeauche 1.20 #include <unistd.h>
50 gbeauche 1.1 #include <sys/types.h>
51     #include <sys/mman.h>
52     #endif
53    
54     #include <stdlib.h>
55     #include <fcntl.h>
56     #include <errno.h>
57    
58     #include "sysdeps.h"
59     #include "cpu_emulation.h"
60     #include "main.h"
61     #include "prefs.h"
62     #include "user_strings.h"
63 gbeauche 1.2 #include "vm_alloc.h"
64 gbeauche 1.1
65     #include "m68k.h"
66     #include "memory.h"
67     #include "readcpu.h"
68     #include "newcpu.h"
69     #include "comptbl.h"
70     #include "compiler/compemu.h"
71     #include "fpu/fpu.h"
72     #include "fpu/flags.h"
73    
74     #define DEBUG 1
75     #include "debug.h"
76    
77     #ifdef ENABLE_MON
78     #include "mon.h"
79     #endif
80    
81     #ifndef WIN32
82 gbeauche 1.9 #define PROFILE_COMPILE_TIME 1
83     #define PROFILE_UNTRANSLATED_INSNS 1
84 gbeauche 1.1 #endif
85    
86 gbeauche 1.28 #if defined(__x86_64__) && 0
87     #define RECORD_REGISTER_USAGE 1
88     #endif
89    
90 gbeauche 1.1 #ifdef WIN32
91     #undef write_log
92     #define write_log dummy_write_log
93     static void dummy_write_log(const char *, ...) { }
94     #endif
95    
96     #if JIT_DEBUG
97     #undef abort
98     #define abort() do { \
99     fprintf(stderr, "Abort in file %s at line %d\n", __FILE__, __LINE__); \
100     exit(EXIT_FAILURE); \
101     } while (0)
102     #endif
103    
104 gbeauche 1.28 #if RECORD_REGISTER_USAGE
105     static uint64 reg_count[16];
106     static int reg_count_local[16];
107    
108     static int reg_count_compare(const void *ap, const void *bp)
109     {
110     const int a = *((int *)ap);
111     const int b = *((int *)bp);
112     return reg_count[b] - reg_count[a];
113     }
114     #endif
115    
116 gbeauche 1.1 #if PROFILE_COMPILE_TIME
117     #include <time.h>
118     static uae_u32 compile_count = 0;
119     static clock_t compile_time = 0;
120     static clock_t emul_start_time = 0;
121     static clock_t emul_end_time = 0;
122     #endif
123    
124 gbeauche 1.9 #if PROFILE_UNTRANSLATED_INSNS
125     const int untranslated_top_ten = 20;
126     static uae_u32 raw_cputbl_count[65536] = { 0, };
127     static uae_u16 opcode_nums[65536];
128    
129     static int untranslated_compfn(const void *e1, const void *e2)
130     {
131     return raw_cputbl_count[*(const uae_u16 *)e1] < raw_cputbl_count[*(const uae_u16 *)e2];
132     }
133     #endif
134    
135 gbeauche 1.24 static compop_func *compfunctbl[65536];
136     static compop_func *nfcompfunctbl[65536];
137     static cpuop_func *nfcpufunctbl[65536];
138 gbeauche 1.1 uae_u8* comp_pc_p;
139    
140 gbeauche 1.26 // From main_unix.cpp
141     extern bool ThirtyThreeBitAddressing;
142    
143 gbeauche 1.6 // From newcpu.cpp
144     extern bool quit_program;
145    
146 gbeauche 1.1 // gb-- Extra data for Basilisk II/JIT
147     #if JIT_DEBUG
148     static bool JITDebug = false; // Enable runtime disassemblers through mon?
149     #else
150     const bool JITDebug = false; // Don't use JIT debug mode at all
151     #endif
152 gbeauche 1.33 #if USE_INLINING
153     static bool follow_const_jumps = true; // Flag: translation through constant jumps
154     #else
155     const bool follow_const_jumps = false;
156     #endif
157 gbeauche 1.1
158 gbeauche 1.22 const uae_u32 MIN_CACHE_SIZE = 1024; // Minimal translation cache size (1 MB)
159 gbeauche 1.1 static uae_u32 cache_size = 0; // Size of total cache allocated for compiled blocks
160 gbeauche 1.3 static uae_u32 current_cache_size = 0; // Cache grows upwards: how much has been consumed already
161 gbeauche 1.1 static bool lazy_flush = true; // Flag: lazy translation cache invalidation
162     static bool avoid_fpu = true; // Flag: compile FPU instructions ?
163     static bool have_cmov = false; // target has CMOV instructions ?
164 gbeauche 1.30 static bool have_lahf_lm = true; // target has LAHF supported in long mode ?
165 gbeauche 1.1 static bool have_rat_stall = true; // target has partial register stalls ?
166 gbeauche 1.12 const bool tune_alignment = true; // Tune code alignments for running CPU ?
167     const bool tune_nop_fillers = true; // Tune no-op fillers for architecture
168 gbeauche 1.15 static bool setzflg_uses_bsf = false; // setzflg virtual instruction can use native BSF instruction correctly?
169 gbeauche 1.5 static int align_loops = 32; // Align the start of loops
170     static int align_jumps = 32; // Align the start of jumps
171 gbeauche 1.1 static int optcount[10] = {
172     10, // How often a block has to be executed before it is translated
173     0, // How often to use naive translation
174     0, 0, 0, 0,
175     -1, -1, -1, -1
176     };
177    
178     struct op_properties {
179     uae_u8 use_flags;
180     uae_u8 set_flags;
181     uae_u8 is_addx;
182     uae_u8 cflow;
183     };
184     static op_properties prop[65536];
185    
186     static inline int end_block(uae_u32 opcode)
187     {
188     return (prop[opcode].cflow & fl_end_block);
189     }
190    
191 gbeauche 1.8 static inline bool is_const_jump(uae_u32 opcode)
192     {
193     return (prop[opcode].cflow == fl_const_jump);
194     }
195    
196 gbeauche 1.18 static inline bool may_trap(uae_u32 opcode)
197     {
198     return (prop[opcode].cflow & fl_trap);
199     }
200    
201     static inline unsigned int cft_map (unsigned int f)
202     {
203     #ifndef HAVE_GET_WORD_UNSWAPPED
204     return f;
205     #else
206     return ((f >> 8) & 255) | ((f & 255) << 8);
207     #endif
208     }
209    
210 gbeauche 1.1 uae_u8* start_pc_p;
211     uae_u32 start_pc;
212     uae_u32 current_block_pc_p;
213 gbeauche 1.24 static uintptr current_block_start_target;
214 gbeauche 1.1 uae_u32 needed_flags;
215 gbeauche 1.24 static uintptr next_pc_p;
216     static uintptr taken_pc_p;
217 gbeauche 1.1 static int branch_cc;
218     static int redo_current_block;
219    
220     int segvcount=0;
221     int soft_flush_count=0;
222     int hard_flush_count=0;
223     int checksum_count=0;
224     static uae_u8* current_compile_p=NULL;
225     static uae_u8* max_compile_start;
226     static uae_u8* compiled_code=NULL;
227     static uae_s32 reg_alloc_run;
228 gbeauche 1.24 const int POPALLSPACE_SIZE = 1024; /* That should be enough space */
229     static uae_u8* popallspace=NULL;
230 gbeauche 1.1
231     void* pushall_call_handler=NULL;
232     static void* popall_do_nothing=NULL;
233     static void* popall_exec_nostats=NULL;
234     static void* popall_execute_normal=NULL;
235     static void* popall_cache_miss=NULL;
236     static void* popall_recompile_block=NULL;
237     static void* popall_check_checksum=NULL;
238    
239     /* The 68k only ever executes from even addresses. So right now, we
240     * waste half the entries in this array
241     * UPDATE: We now use those entries to store the start of the linked
242     * lists that we maintain for each hash result.
243     */
244     cacheline cache_tags[TAGSIZE];
245     int letit=0;
246     blockinfo* hold_bi[MAX_HOLD_BI];
247     blockinfo* active;
248     blockinfo* dormant;
249    
250     /* 68040 */
251     extern struct cputbl op_smalltbl_0_nf[];
252     extern struct comptbl op_smalltbl_0_comp_nf[];
253     extern struct comptbl op_smalltbl_0_comp_ff[];
254    
255     /* 68020 + 68881 */
256     extern struct cputbl op_smalltbl_1_nf[];
257    
258     /* 68020 */
259     extern struct cputbl op_smalltbl_2_nf[];
260    
261     /* 68010 */
262     extern struct cputbl op_smalltbl_3_nf[];
263    
264     /* 68000 */
265     extern struct cputbl op_smalltbl_4_nf[];
266    
267     /* 68000 slow but compatible. */
268     extern struct cputbl op_smalltbl_5_nf[];
269    
270     static void flush_icache_hard(int n);
271     static void flush_icache_lazy(int n);
272     static void flush_icache_none(int n);
273     void (*flush_icache)(int n) = flush_icache_none;
274    
275    
276    
277     bigstate live;
278     smallstate empty_ss;
279     smallstate default_ss;
280     static int optlev;
281    
282     static int writereg(int r, int size);
283     static void unlock2(int r);
284     static void setlock(int r);
285     static int readreg_specific(int r, int size, int spec);
286     static int writereg_specific(int r, int size, int spec);
287     static void prepare_for_call_1(void);
288     static void prepare_for_call_2(void);
289     static void align_target(uae_u32 a);
290    
291     static uae_s32 nextused[VREGS];
292    
293     uae_u32 m68k_pc_offset;
294    
295     /* Some arithmetic ooperations can be optimized away if the operands
296     * are known to be constant. But that's only a good idea when the
297     * side effects they would have on the flags are not important. This
298     * variable indicates whether we need the side effects or not
299     */
300     uae_u32 needflags=0;
301    
302     /* Flag handling is complicated.
303     *
304     * x86 instructions create flags, which quite often are exactly what we
305     * want. So at times, the "68k" flags are actually in the x86 flags.
306     *
307     * Then again, sometimes we do x86 instructions that clobber the x86
308     * flags, but don't represent a corresponding m68k instruction. In that
309     * case, we have to save them.
310     *
311     * We used to save them to the stack, but now store them back directly
312     * into the regflags.cznv of the traditional emulation. Thus some odd
313     * names.
314     *
315     * So flags can be in either of two places (used to be three; boy were
316     * things complicated back then!); And either place can contain either
317     * valid flags or invalid trash (and on the stack, there was also the
318     * option of "nothing at all", now gone). A couple of variables keep
319     * track of the respective states.
320     *
321     * To make things worse, we might or might not be interested in the flags.
322     * by default, we are, but a call to dont_care_flags can change that
323     * until the next call to live_flags. If we are not, pretty much whatever
324     * is in the register and/or the native flags is seen as valid.
325     */
326    
327     static __inline__ blockinfo* get_blockinfo(uae_u32 cl)
328     {
329     return cache_tags[cl+1].bi;
330     }
331    
332     static __inline__ blockinfo* get_blockinfo_addr(void* addr)
333     {
334     blockinfo* bi=get_blockinfo(cacheline(addr));
335    
336     while (bi) {
337     if (bi->pc_p==addr)
338     return bi;
339     bi=bi->next_same_cl;
340     }
341     return NULL;
342     }
343    
344    
345     /*******************************************************************
346     * All sorts of list related functions for all of the lists *
347     *******************************************************************/
348    
349     static __inline__ void remove_from_cl_list(blockinfo* bi)
350     {
351     uae_u32 cl=cacheline(bi->pc_p);
352    
353     if (bi->prev_same_cl_p)
354     *(bi->prev_same_cl_p)=bi->next_same_cl;
355     if (bi->next_same_cl)
356     bi->next_same_cl->prev_same_cl_p=bi->prev_same_cl_p;
357     if (cache_tags[cl+1].bi)
358     cache_tags[cl].handler=cache_tags[cl+1].bi->handler_to_use;
359     else
360     cache_tags[cl].handler=(cpuop_func *)popall_execute_normal;
361     }
362    
363     static __inline__ void remove_from_list(blockinfo* bi)
364     {
365     if (bi->prev_p)
366     *(bi->prev_p)=bi->next;
367     if (bi->next)
368     bi->next->prev_p=bi->prev_p;
369     }
370    
371     static __inline__ void remove_from_lists(blockinfo* bi)
372     {
373     remove_from_list(bi);
374     remove_from_cl_list(bi);
375     }
376    
377     static __inline__ void add_to_cl_list(blockinfo* bi)
378     {
379     uae_u32 cl=cacheline(bi->pc_p);
380    
381     if (cache_tags[cl+1].bi)
382     cache_tags[cl+1].bi->prev_same_cl_p=&(bi->next_same_cl);
383     bi->next_same_cl=cache_tags[cl+1].bi;
384    
385     cache_tags[cl+1].bi=bi;
386     bi->prev_same_cl_p=&(cache_tags[cl+1].bi);
387    
388     cache_tags[cl].handler=bi->handler_to_use;
389     }
390    
391     static __inline__ void raise_in_cl_list(blockinfo* bi)
392     {
393     remove_from_cl_list(bi);
394     add_to_cl_list(bi);
395     }
396    
397     static __inline__ void add_to_active(blockinfo* bi)
398     {
399     if (active)
400     active->prev_p=&(bi->next);
401     bi->next=active;
402    
403     active=bi;
404     bi->prev_p=&active;
405     }
406    
407     static __inline__ void add_to_dormant(blockinfo* bi)
408     {
409     if (dormant)
410     dormant->prev_p=&(bi->next);
411     bi->next=dormant;
412    
413     dormant=bi;
414     bi->prev_p=&dormant;
415     }
416    
417     static __inline__ void remove_dep(dependency* d)
418     {
419     if (d->prev_p)
420     *(d->prev_p)=d->next;
421     if (d->next)
422     d->next->prev_p=d->prev_p;
423     d->prev_p=NULL;
424     d->next=NULL;
425     }
426    
427     /* This block's code is about to be thrown away, so it no longer
428     depends on anything else */
429     static __inline__ void remove_deps(blockinfo* bi)
430     {
431     remove_dep(&(bi->dep[0]));
432     remove_dep(&(bi->dep[1]));
433     }
434    
435     static __inline__ void adjust_jmpdep(dependency* d, cpuop_func* a)
436     {
437     *(d->jmp_off)=(uintptr)a-((uintptr)d->jmp_off+4);
438     }
439    
440     /********************************************************************
441     * Soft flush handling support functions *
442     ********************************************************************/
443    
444     static __inline__ void set_dhtu(blockinfo* bi, cpuop_func* dh)
445     {
446     //write_log("bi is %p\n",bi);
447     if (dh!=bi->direct_handler_to_use) {
448     dependency* x=bi->deplist;
449     //write_log("bi->deplist=%p\n",bi->deplist);
450     while (x) {
451     //write_log("x is %p\n",x);
452     //write_log("x->next is %p\n",x->next);
453     //write_log("x->prev_p is %p\n",x->prev_p);
454    
455     if (x->jmp_off) {
456     adjust_jmpdep(x,dh);
457     }
458     x=x->next;
459     }
460     bi->direct_handler_to_use=dh;
461     }
462     }
463    
464     static __inline__ void invalidate_block(blockinfo* bi)
465     {
466     int i;
467    
468     bi->optlevel=0;
469     bi->count=optcount[0]-1;
470     bi->handler=NULL;
471     bi->handler_to_use=(cpuop_func *)popall_execute_normal;
472     bi->direct_handler=NULL;
473     set_dhtu(bi,bi->direct_pen);
474     bi->needed_flags=0xff;
475     bi->status=BI_INVALID;
476     for (i=0;i<2;i++) {
477     bi->dep[i].jmp_off=NULL;
478     bi->dep[i].target=NULL;
479     }
480     remove_deps(bi);
481     }
482    
483     static __inline__ void create_jmpdep(blockinfo* bi, int i, uae_u32* jmpaddr, uae_u32 target)
484     {
485 gbeauche 1.24 blockinfo* tbi=get_blockinfo_addr((void*)(uintptr)target);
486 gbeauche 1.1
487     Dif(!tbi) {
488     write_log("Could not create jmpdep!\n");
489     abort();
490     }
491     bi->dep[i].jmp_off=jmpaddr;
492     bi->dep[i].source=bi;
493     bi->dep[i].target=tbi;
494     bi->dep[i].next=tbi->deplist;
495     if (bi->dep[i].next)
496     bi->dep[i].next->prev_p=&(bi->dep[i].next);
497     bi->dep[i].prev_p=&(tbi->deplist);
498     tbi->deplist=&(bi->dep[i]);
499     }
500    
501     static __inline__ void block_need_recompile(blockinfo * bi)
502     {
503     uae_u32 cl = cacheline(bi->pc_p);
504    
505     set_dhtu(bi, bi->direct_pen);
506     bi->direct_handler = bi->direct_pen;
507    
508     bi->handler_to_use = (cpuop_func *)popall_execute_normal;
509     bi->handler = (cpuop_func *)popall_execute_normal;
510     if (bi == cache_tags[cl + 1].bi)
511     cache_tags[cl].handler = (cpuop_func *)popall_execute_normal;
512     bi->status = BI_NEED_RECOMP;
513     }
514    
515     static __inline__ void mark_callers_recompile(blockinfo * bi)
516     {
517     dependency *x = bi->deplist;
518    
519     while (x) {
520     dependency *next = x->next; /* This disappears when we mark for
521     * recompilation and thus remove the
522     * blocks from the lists */
523     if (x->jmp_off) {
524     blockinfo *cbi = x->source;
525    
526     Dif(cbi->status == BI_INVALID) {
527     // write_log("invalid block in dependency list\n"); // FIXME?
528     // abort();
529     }
530     if (cbi->status == BI_ACTIVE || cbi->status == BI_NEED_CHECK) {
531     block_need_recompile(cbi);
532     mark_callers_recompile(cbi);
533     }
534     else if (cbi->status == BI_COMPILING) {
535     redo_current_block = 1;
536     }
537     else if (cbi->status == BI_NEED_RECOMP) {
538     /* nothing */
539     }
540     else {
541     //write_log("Status %d in mark_callers\n",cbi->status); // FIXME?
542     }
543     }
544     x = next;
545     }
546     }
547    
548     static __inline__ blockinfo* get_blockinfo_addr_new(void* addr, int setstate)
549     {
550     blockinfo* bi=get_blockinfo_addr(addr);
551     int i;
552    
553     if (!bi) {
554     for (i=0;i<MAX_HOLD_BI && !bi;i++) {
555     if (hold_bi[i]) {
556     uae_u32 cl=cacheline(addr);
557    
558     bi=hold_bi[i];
559     hold_bi[i]=NULL;
560     bi->pc_p=(uae_u8 *)addr;
561     invalidate_block(bi);
562     add_to_active(bi);
563     add_to_cl_list(bi);
564    
565     }
566     }
567     }
568     if (!bi) {
569     write_log("Looking for blockinfo, can't find free one\n");
570     abort();
571     }
572     return bi;
573     }
574    
575     static void prepare_block(blockinfo* bi);
576    
577     /* Managment of blockinfos.
578    
579     A blockinfo struct is allocated whenever a new block has to be
580     compiled. If the list of free blockinfos is empty, we allocate a new
581     pool of blockinfos and link the newly created blockinfos altogether
582     into the list of free blockinfos. Otherwise, we simply pop a structure
583 gbeauche 1.7 off the free list.
584 gbeauche 1.1
585     Blockinfo are lazily deallocated, i.e. chained altogether in the
586     list of free blockinfos whenvever a translation cache flush (hard or
587     soft) request occurs.
588     */
589    
590 gbeauche 1.7 template< class T >
591     class LazyBlockAllocator
592     {
593     enum {
594     kPoolSize = 1 + 4096 / sizeof(T)
595     };
596     struct Pool {
597     T chunk[kPoolSize];
598     Pool * next;
599     };
600     Pool * mPools;
601     T * mChunks;
602     public:
603     LazyBlockAllocator() : mPools(0), mChunks(0) { }
604     ~LazyBlockAllocator();
605     T * acquire();
606     void release(T * const);
607 gbeauche 1.1 };
608    
609 gbeauche 1.7 template< class T >
610     LazyBlockAllocator<T>::~LazyBlockAllocator()
611 gbeauche 1.1 {
612 gbeauche 1.7 Pool * currentPool = mPools;
613     while (currentPool) {
614     Pool * deadPool = currentPool;
615     currentPool = currentPool->next;
616     free(deadPool);
617     }
618     }
619    
620     template< class T >
621     T * LazyBlockAllocator<T>::acquire()
622     {
623     if (!mChunks) {
624     // There is no chunk left, allocate a new pool and link the
625     // chunks into the free list
626     Pool * newPool = (Pool *)malloc(sizeof(Pool));
627     for (T * chunk = &newPool->chunk[0]; chunk < &newPool->chunk[kPoolSize]; chunk++) {
628     chunk->next = mChunks;
629     mChunks = chunk;
630 gbeauche 1.1 }
631 gbeauche 1.7 newPool->next = mPools;
632     mPools = newPool;
633     }
634     T * chunk = mChunks;
635     mChunks = chunk->next;
636     return chunk;
637     }
638    
639     template< class T >
640     void LazyBlockAllocator<T>::release(T * const chunk)
641     {
642     chunk->next = mChunks;
643     mChunks = chunk;
644     }
645    
646     template< class T >
647     class HardBlockAllocator
648     {
649     public:
650     T * acquire() {
651     T * data = (T *)current_compile_p;
652     current_compile_p += sizeof(T);
653     return data;
654 gbeauche 1.1 }
655 gbeauche 1.7
656     void release(T * const chunk) {
657     // Deallocated on invalidation
658     }
659     };
660    
661     #if USE_SEPARATE_BIA
662     static LazyBlockAllocator<blockinfo> BlockInfoAllocator;
663     static LazyBlockAllocator<checksum_info> ChecksumInfoAllocator;
664 gbeauche 1.1 #else
665 gbeauche 1.7 static HardBlockAllocator<blockinfo> BlockInfoAllocator;
666     static HardBlockAllocator<checksum_info> ChecksumInfoAllocator;
667 gbeauche 1.1 #endif
668    
669 gbeauche 1.8 static __inline__ checksum_info *alloc_checksum_info(void)
670     {
671     checksum_info *csi = ChecksumInfoAllocator.acquire();
672     csi->next = NULL;
673     return csi;
674     }
675    
676     static __inline__ void free_checksum_info(checksum_info *csi)
677     {
678     csi->next = NULL;
679     ChecksumInfoAllocator.release(csi);
680     }
681    
682     static __inline__ void free_checksum_info_chain(checksum_info *csi)
683     {
684     while (csi != NULL) {
685     checksum_info *csi2 = csi->next;
686     free_checksum_info(csi);
687     csi = csi2;
688     }
689     }
690 gbeauche 1.7
691     static __inline__ blockinfo *alloc_blockinfo(void)
692 gbeauche 1.1 {
693 gbeauche 1.7 blockinfo *bi = BlockInfoAllocator.acquire();
694     #if USE_CHECKSUM_INFO
695     bi->csi = NULL;
696 gbeauche 1.1 #endif
697 gbeauche 1.7 return bi;
698 gbeauche 1.1 }
699    
700 gbeauche 1.7 static __inline__ void free_blockinfo(blockinfo *bi)
701 gbeauche 1.1 {
702 gbeauche 1.7 #if USE_CHECKSUM_INFO
703 gbeauche 1.8 free_checksum_info_chain(bi->csi);
704     bi->csi = NULL;
705 gbeauche 1.1 #endif
706 gbeauche 1.7 BlockInfoAllocator.release(bi);
707 gbeauche 1.1 }
708    
709     static __inline__ void alloc_blockinfos(void)
710     {
711     int i;
712     blockinfo* bi;
713    
714     for (i=0;i<MAX_HOLD_BI;i++) {
715     if (hold_bi[i])
716     return;
717     bi=hold_bi[i]=alloc_blockinfo();
718     prepare_block(bi);
719     }
720     }
721    
722     /********************************************************************
723     * Functions to emit data into memory, and other general support *
724     ********************************************************************/
725    
726     static uae_u8* target;
727    
728     static void emit_init(void)
729     {
730     }
731    
732     static __inline__ void emit_byte(uae_u8 x)
733     {
734     *target++=x;
735     }
736    
737     static __inline__ void emit_word(uae_u16 x)
738     {
739     *((uae_u16*)target)=x;
740     target+=2;
741     }
742    
743     static __inline__ void emit_long(uae_u32 x)
744     {
745     *((uae_u32*)target)=x;
746     target+=4;
747     }
748    
749 gbeauche 1.24 static __inline__ void emit_quad(uae_u64 x)
750     {
751     *((uae_u64*)target)=x;
752     target+=8;
753     }
754    
755 gbeauche 1.12 static __inline__ void emit_block(const uae_u8 *block, uae_u32 blocklen)
756     {
757     memcpy((uae_u8 *)target,block,blocklen);
758     target+=blocklen;
759     }
760    
761 gbeauche 1.1 static __inline__ uae_u32 reverse32(uae_u32 v)
762     {
763     #if 1
764     // gb-- We have specialized byteswapping functions, just use them
765     return do_byteswap_32(v);
766     #else
767     return ((v>>24)&0xff) | ((v>>8)&0xff00) | ((v<<8)&0xff0000) | ((v<<24)&0xff000000);
768     #endif
769     }
770    
771     /********************************************************************
772     * Getting the information about the target CPU *
773     ********************************************************************/
774    
775     #include "codegen_x86.cpp"
776    
777     void set_target(uae_u8* t)
778     {
779     target=t;
780     }
781    
782     static __inline__ uae_u8* get_target_noopt(void)
783     {
784     return target;
785     }
786    
787     __inline__ uae_u8* get_target(void)
788     {
789     return get_target_noopt();
790     }
791    
792    
793     /********************************************************************
794     * Flags status handling. EMIT TIME! *
795     ********************************************************************/
796    
797     static void bt_l_ri_noclobber(R4 r, IMM i);
798    
799     static void make_flags_live_internal(void)
800     {
801     if (live.flags_in_flags==VALID)
802     return;
803     Dif (live.flags_on_stack==TRASH) {
804     write_log("Want flags, got something on stack, but it is TRASH\n");
805     abort();
806     }
807     if (live.flags_on_stack==VALID) {
808     int tmp;
809     tmp=readreg_specific(FLAGTMP,4,FLAG_NREG2);
810     raw_reg_to_flags(tmp);
811     unlock2(tmp);
812    
813     live.flags_in_flags=VALID;
814     return;
815     }
816     write_log("Huh? live.flags_in_flags=%d, live.flags_on_stack=%d, but need to make live\n",
817     live.flags_in_flags,live.flags_on_stack);
818     abort();
819     }
820    
821     static void flags_to_stack(void)
822     {
823     if (live.flags_on_stack==VALID)
824     return;
825     if (!live.flags_are_important) {
826     live.flags_on_stack=VALID;
827     return;
828     }
829     Dif (live.flags_in_flags!=VALID)
830     abort();
831     else {
832     int tmp;
833     tmp=writereg_specific(FLAGTMP,4,FLAG_NREG1);
834     raw_flags_to_reg(tmp);
835     unlock2(tmp);
836     }
837     live.flags_on_stack=VALID;
838     }
839    
840     static __inline__ void clobber_flags(void)
841     {
842     if (live.flags_in_flags==VALID && live.flags_on_stack!=VALID)
843     flags_to_stack();
844     live.flags_in_flags=TRASH;
845     }
846    
847     /* Prepare for leaving the compiled stuff */
848     static __inline__ void flush_flags(void)
849     {
850     flags_to_stack();
851     return;
852     }
853    
854     int touchcnt;
855    
856     /********************************************************************
857 gbeauche 1.18 * Partial register flushing for optimized calls *
858     ********************************************************************/
859    
860     struct regusage {
861     uae_u16 rmask;
862     uae_u16 wmask;
863     };
864    
865     static inline void ru_set(uae_u16 *mask, int reg)
866     {
867     #if USE_OPTIMIZED_CALLS
868     *mask |= 1 << reg;
869     #endif
870     }
871    
872     static inline bool ru_get(const uae_u16 *mask, int reg)
873     {
874     #if USE_OPTIMIZED_CALLS
875     return (*mask & (1 << reg));
876     #else
877     /* Default: instruction reads & write to register */
878     return true;
879     #endif
880     }
881    
882     static inline void ru_set_read(regusage *ru, int reg)
883     {
884     ru_set(&ru->rmask, reg);
885     }
886    
887     static inline void ru_set_write(regusage *ru, int reg)
888     {
889     ru_set(&ru->wmask, reg);
890     }
891    
892     static inline bool ru_read_p(const regusage *ru, int reg)
893     {
894     return ru_get(&ru->rmask, reg);
895     }
896    
897     static inline bool ru_write_p(const regusage *ru, int reg)
898     {
899     return ru_get(&ru->wmask, reg);
900     }
901    
902     static void ru_fill_ea(regusage *ru, int reg, amodes mode,
903     wordsizes size, int write_mode)
904     {
905     switch (mode) {
906     case Areg:
907     reg += 8;
908     /* fall through */
909     case Dreg:
910     ru_set(write_mode ? &ru->wmask : &ru->rmask, reg);
911     break;
912     case Ad16:
913     /* skip displacment */
914     m68k_pc_offset += 2;
915     case Aind:
916     case Aipi:
917     case Apdi:
918     ru_set_read(ru, reg+8);
919     break;
920     case Ad8r:
921     ru_set_read(ru, reg+8);
922     /* fall through */
923     case PC8r: {
924     uae_u16 dp = comp_get_iword((m68k_pc_offset+=2)-2);
925     reg = (dp >> 12) & 15;
926     ru_set_read(ru, reg);
927     if (dp & 0x100)
928     m68k_pc_offset += (((dp & 0x30) >> 3) & 7) + ((dp & 3) * 2);
929     break;
930     }
931     case PC16:
932     case absw:
933     case imm0:
934     case imm1:
935     m68k_pc_offset += 2;
936     break;
937     case absl:
938     case imm2:
939     m68k_pc_offset += 4;
940     break;
941     case immi:
942     m68k_pc_offset += (size == sz_long) ? 4 : 2;
943     break;
944     }
945     }
946    
947     /* TODO: split into a static initialization part and a dynamic one
948     (instructions depending on extension words) */
949     static void ru_fill(regusage *ru, uae_u32 opcode)
950     {
951     m68k_pc_offset += 2;
952    
953     /* Default: no register is used or written to */
954     ru->rmask = 0;
955     ru->wmask = 0;
956    
957     uae_u32 real_opcode = cft_map(opcode);
958     struct instr *dp = &table68k[real_opcode];
959    
960     bool rw_dest = true;
961     bool handled = false;
962    
963     /* Handle some instructions specifically */
964     uae_u16 reg, ext;
965     switch (dp->mnemo) {
966     case i_BFCHG:
967     case i_BFCLR:
968     case i_BFEXTS:
969     case i_BFEXTU:
970     case i_BFFFO:
971     case i_BFINS:
972     case i_BFSET:
973     case i_BFTST:
974     ext = comp_get_iword((m68k_pc_offset+=2)-2);
975     if (ext & 0x800) ru_set_read(ru, (ext >> 6) & 7);
976     if (ext & 0x020) ru_set_read(ru, ext & 7);
977     ru_fill_ea(ru, dp->dreg, (amodes)dp->dmode, (wordsizes)dp->size, 1);
978     if (dp->dmode == Dreg)
979     ru_set_read(ru, dp->dreg);
980     switch (dp->mnemo) {
981     case i_BFEXTS:
982     case i_BFEXTU:
983     case i_BFFFO:
984     ru_set_write(ru, (ext >> 12) & 7);
985     break;
986     case i_BFINS:
987     ru_set_read(ru, (ext >> 12) & 7);
988     /* fall through */
989     case i_BFCHG:
990     case i_BFCLR:
991     case i_BSET:
992     if (dp->dmode == Dreg)
993     ru_set_write(ru, dp->dreg);
994     break;
995     }
996     handled = true;
997     rw_dest = false;
998     break;
999    
1000     case i_BTST:
1001     rw_dest = false;
1002     break;
1003    
1004     case i_CAS:
1005     {
1006     ext = comp_get_iword((m68k_pc_offset+=2)-2);
1007     int Du = ext & 7;
1008     ru_set_read(ru, Du);
1009     int Dc = (ext >> 6) & 7;
1010     ru_set_read(ru, Dc);
1011     ru_set_write(ru, Dc);
1012     break;
1013     }
1014     case i_CAS2:
1015     {
1016     int Dc1, Dc2, Du1, Du2, Rn1, Rn2;
1017     ext = comp_get_iword((m68k_pc_offset+=2)-2);
1018     Rn1 = (ext >> 12) & 15;
1019     Du1 = (ext >> 6) & 7;
1020     Dc1 = ext & 7;
1021     ru_set_read(ru, Rn1);
1022     ru_set_read(ru, Du1);
1023     ru_set_read(ru, Dc1);
1024     ru_set_write(ru, Dc1);
1025     ext = comp_get_iword((m68k_pc_offset+=2)-2);
1026     Rn2 = (ext >> 12) & 15;
1027     Du2 = (ext >> 6) & 7;
1028     Dc2 = ext & 7;
1029     ru_set_read(ru, Rn2);
1030     ru_set_read(ru, Du2);
1031     ru_set_write(ru, Dc2);
1032     break;
1033     }
1034     case i_DIVL: case i_MULL:
1035     m68k_pc_offset += 2;
1036     break;
1037     case i_LEA:
1038     case i_MOVE: case i_MOVEA: case i_MOVE16:
1039     rw_dest = false;
1040     break;
1041     case i_PACK: case i_UNPK:
1042     rw_dest = false;
1043     m68k_pc_offset += 2;
1044     break;
1045     case i_TRAPcc:
1046     m68k_pc_offset += (dp->size == sz_long) ? 4 : 2;
1047     break;
1048     case i_RTR:
1049     /* do nothing, just for coverage debugging */
1050     break;
1051     /* TODO: handle EXG instruction */
1052     }
1053    
1054     /* Handle A-Traps better */
1055     if ((real_opcode & 0xf000) == 0xa000) {
1056     handled = true;
1057     }
1058    
1059     /* Handle EmulOps better */
1060     if ((real_opcode & 0xff00) == 0x7100) {
1061     handled = true;
1062     ru->rmask = 0xffff;
1063     ru->wmask = 0;
1064     }
1065    
1066     if (dp->suse && !handled)
1067     ru_fill_ea(ru, dp->sreg, (amodes)dp->smode, (wordsizes)dp->size, 0);
1068    
1069     if (dp->duse && !handled)
1070     ru_fill_ea(ru, dp->dreg, (amodes)dp->dmode, (wordsizes)dp->size, 1);
1071    
1072     if (rw_dest)
1073     ru->rmask |= ru->wmask;
1074    
1075     handled = handled || dp->suse || dp->duse;
1076    
1077     /* Mark all registers as used/written if the instruction may trap */
1078     if (may_trap(opcode)) {
1079     handled = true;
1080     ru->rmask = 0xffff;
1081     ru->wmask = 0xffff;
1082     }
1083    
1084     if (!handled) {
1085     write_log("ru_fill: %04x = { %04x, %04x }\n",
1086     real_opcode, ru->rmask, ru->wmask);
1087     abort();
1088     }
1089     }
1090    
1091     /********************************************************************
1092 gbeauche 1.1 * register allocation per block logging *
1093     ********************************************************************/
1094    
1095     static uae_s8 vstate[VREGS];
1096     static uae_s8 vwritten[VREGS];
1097     static uae_s8 nstate[N_REGS];
1098    
1099     #define L_UNKNOWN -127
1100     #define L_UNAVAIL -1
1101     #define L_NEEDED -2
1102     #define L_UNNEEDED -3
1103    
1104     static __inline__ void big_to_small_state(bigstate * b, smallstate * s)
1105     {
1106     int i;
1107    
1108     for (i = 0; i < VREGS; i++)
1109     s->virt[i] = vstate[i];
1110     for (i = 0; i < N_REGS; i++)
1111     s->nat[i] = nstate[i];
1112     }
1113    
1114     static __inline__ int callers_need_recompile(bigstate * b, smallstate * s)
1115     {
1116     int i;
1117     int reverse = 0;
1118    
1119     for (i = 0; i < VREGS; i++) {
1120     if (vstate[i] != L_UNNEEDED && s->virt[i] == L_UNNEEDED)
1121     return 1;
1122     if (vstate[i] == L_UNNEEDED && s->virt[i] != L_UNNEEDED)
1123     reverse++;
1124     }
1125     for (i = 0; i < N_REGS; i++) {
1126     if (nstate[i] >= 0 && nstate[i] != s->nat[i])
1127     return 1;
1128     if (nstate[i] < 0 && s->nat[i] >= 0)
1129     reverse++;
1130     }
1131     if (reverse >= 2 && USE_MATCH)
1132     return 1; /* In this case, it might be worth recompiling the
1133     * callers */
1134     return 0;
1135     }
1136    
1137     static __inline__ void log_startblock(void)
1138     {
1139     int i;
1140    
1141     for (i = 0; i < VREGS; i++) {
1142     vstate[i] = L_UNKNOWN;
1143     vwritten[i] = 0;
1144     }
1145     for (i = 0; i < N_REGS; i++)
1146     nstate[i] = L_UNKNOWN;
1147     }
1148    
1149     /* Using an n-reg for a temp variable */
1150     static __inline__ void log_isused(int n)
1151     {
1152     if (nstate[n] == L_UNKNOWN)
1153     nstate[n] = L_UNAVAIL;
1154     }
1155    
1156     static __inline__ void log_visused(int r)
1157     {
1158     if (vstate[r] == L_UNKNOWN)
1159     vstate[r] = L_NEEDED;
1160     }
1161    
1162     static __inline__ void do_load_reg(int n, int r)
1163     {
1164     if (r == FLAGTMP)
1165     raw_load_flagreg(n, r);
1166     else if (r == FLAGX)
1167     raw_load_flagx(n, r);
1168     else
1169 gbeauche 1.24 raw_mov_l_rm(n, (uintptr) live.state[r].mem);
1170 gbeauche 1.1 }
1171    
1172     static __inline__ void check_load_reg(int n, int r)
1173     {
1174 gbeauche 1.24 raw_mov_l_rm(n, (uintptr) live.state[r].mem);
1175 gbeauche 1.1 }
1176    
1177     static __inline__ void log_vwrite(int r)
1178     {
1179     vwritten[r] = 1;
1180     }
1181    
1182     /* Using an n-reg to hold a v-reg */
1183     static __inline__ void log_isreg(int n, int r)
1184     {
1185     static int count = 0;
1186    
1187     if (nstate[n] == L_UNKNOWN && r < 16 && !vwritten[r] && USE_MATCH)
1188     nstate[n] = r;
1189     else {
1190     do_load_reg(n, r);
1191     if (nstate[n] == L_UNKNOWN)
1192     nstate[n] = L_UNAVAIL;
1193     }
1194     if (vstate[r] == L_UNKNOWN)
1195     vstate[r] = L_NEEDED;
1196     }
1197    
1198     static __inline__ void log_clobberreg(int r)
1199     {
1200     if (vstate[r] == L_UNKNOWN)
1201     vstate[r] = L_UNNEEDED;
1202     }
1203    
1204     /* This ends all possibility of clever register allocation */
1205    
1206     static __inline__ void log_flush(void)
1207     {
1208     int i;
1209    
1210     for (i = 0; i < VREGS; i++)
1211     if (vstate[i] == L_UNKNOWN)
1212     vstate[i] = L_NEEDED;
1213     for (i = 0; i < N_REGS; i++)
1214     if (nstate[i] == L_UNKNOWN)
1215     nstate[i] = L_UNAVAIL;
1216     }
1217    
1218     static __inline__ void log_dump(void)
1219     {
1220     int i;
1221    
1222     return;
1223    
1224     write_log("----------------------\n");
1225     for (i = 0; i < N_REGS; i++) {
1226     switch (nstate[i]) {
1227     case L_UNKNOWN:
1228     write_log("Nat %d : UNKNOWN\n", i);
1229     break;
1230     case L_UNAVAIL:
1231     write_log("Nat %d : UNAVAIL\n", i);
1232     break;
1233     default:
1234     write_log("Nat %d : %d\n", i, nstate[i]);
1235     break;
1236     }
1237     }
1238     for (i = 0; i < VREGS; i++) {
1239     if (vstate[i] == L_UNNEEDED)
1240     write_log("Virt %d: UNNEEDED\n", i);
1241     }
1242     }
1243    
1244     /********************************************************************
1245     * register status handling. EMIT TIME! *
1246     ********************************************************************/
1247    
1248     static __inline__ void set_status(int r, int status)
1249     {
1250     if (status == ISCONST)
1251     log_clobberreg(r);
1252     live.state[r].status=status;
1253     }
1254    
1255     static __inline__ int isinreg(int r)
1256     {
1257     return live.state[r].status==CLEAN || live.state[r].status==DIRTY;
1258     }
1259    
1260     static __inline__ void adjust_nreg(int r, uae_u32 val)
1261     {
1262     if (!val)
1263     return;
1264     raw_lea_l_brr(r,r,val);
1265     }
1266    
1267     static void tomem(int r)
1268     {
1269     int rr=live.state[r].realreg;
1270    
1271     if (isinreg(r)) {
1272     if (live.state[r].val && live.nat[rr].nholds==1
1273     && !live.nat[rr].locked) {
1274     // write_log("RemovingA offset %x from reg %d (%d) at %p\n",
1275     // live.state[r].val,r,rr,target);
1276     adjust_nreg(rr,live.state[r].val);
1277     live.state[r].val=0;
1278     live.state[r].dirtysize=4;
1279     set_status(r,DIRTY);
1280     }
1281     }
1282    
1283     if (live.state[r].status==DIRTY) {
1284     switch (live.state[r].dirtysize) {
1285 gbeauche 1.24 case 1: raw_mov_b_mr((uintptr)live.state[r].mem,rr); break;
1286     case 2: raw_mov_w_mr((uintptr)live.state[r].mem,rr); break;
1287     case 4: raw_mov_l_mr((uintptr)live.state[r].mem,rr); break;
1288 gbeauche 1.1 default: abort();
1289     }
1290     log_vwrite(r);
1291     set_status(r,CLEAN);
1292     live.state[r].dirtysize=0;
1293     }
1294     }
1295    
1296     static __inline__ int isconst(int r)
1297     {
1298     return live.state[r].status==ISCONST;
1299     }
1300    
1301     int is_const(int r)
1302     {
1303     return isconst(r);
1304     }
1305    
1306     static __inline__ void writeback_const(int r)
1307     {
1308     if (!isconst(r))
1309     return;
1310     Dif (live.state[r].needflush==NF_HANDLER) {
1311     write_log("Trying to write back constant NF_HANDLER!\n");
1312     abort();
1313     }
1314    
1315 gbeauche 1.24 raw_mov_l_mi((uintptr)live.state[r].mem,live.state[r].val);
1316 gbeauche 1.1 log_vwrite(r);
1317     live.state[r].val=0;
1318     set_status(r,INMEM);
1319     }
1320    
1321     static __inline__ void tomem_c(int r)
1322     {
1323     if (isconst(r)) {
1324     writeback_const(r);
1325     }
1326     else
1327     tomem(r);
1328     }
1329    
1330     static void evict(int r)
1331     {
1332     int rr;
1333    
1334     if (!isinreg(r))
1335     return;
1336     tomem(r);
1337     rr=live.state[r].realreg;
1338    
1339     Dif (live.nat[rr].locked &&
1340     live.nat[rr].nholds==1) {
1341     write_log("register %d in nreg %d is locked!\n",r,live.state[r].realreg);
1342     abort();
1343     }
1344    
1345     live.nat[rr].nholds--;
1346     if (live.nat[rr].nholds!=live.state[r].realind) { /* Was not last */
1347     int topreg=live.nat[rr].holds[live.nat[rr].nholds];
1348     int thisind=live.state[r].realind;
1349    
1350     live.nat[rr].holds[thisind]=topreg;
1351     live.state[topreg].realind=thisind;
1352     }
1353     live.state[r].realreg=-1;
1354     set_status(r,INMEM);
1355     }
1356    
1357     static __inline__ void free_nreg(int r)
1358     {
1359     int i=live.nat[r].nholds;
1360    
1361     while (i) {
1362     int vr;
1363    
1364     --i;
1365     vr=live.nat[r].holds[i];
1366     evict(vr);
1367     }
1368     Dif (live.nat[r].nholds!=0) {
1369     write_log("Failed to free nreg %d, nholds is %d\n",r,live.nat[r].nholds);
1370     abort();
1371     }
1372     }
1373    
1374     /* Use with care! */
1375     static __inline__ void isclean(int r)
1376     {
1377     if (!isinreg(r))
1378     return;
1379     live.state[r].validsize=4;
1380     live.state[r].dirtysize=0;
1381     live.state[r].val=0;
1382     set_status(r,CLEAN);
1383     }
1384    
1385     static __inline__ void disassociate(int r)
1386     {
1387     isclean(r);
1388     evict(r);
1389     }
1390    
1391     static __inline__ void set_const(int r, uae_u32 val)
1392     {
1393     disassociate(r);
1394     live.state[r].val=val;
1395     set_status(r,ISCONST);
1396     }
1397    
1398     static __inline__ uae_u32 get_offset(int r)
1399     {
1400     return live.state[r].val;
1401     }
1402    
1403     static int alloc_reg_hinted(int r, int size, int willclobber, int hint)
1404     {
1405     int bestreg;
1406     uae_s32 when;
1407     int i;
1408     uae_s32 badness=0; /* to shut up gcc */
1409     bestreg=-1;
1410     when=2000000000;
1411    
1412     for (i=N_REGS;i--;) {
1413     badness=live.nat[i].touched;
1414     if (live.nat[i].nholds==0)
1415     badness=0;
1416     if (i==hint)
1417     badness-=200000000;
1418     if (!live.nat[i].locked && badness<when) {
1419     if ((size==1 && live.nat[i].canbyte) ||
1420     (size==2 && live.nat[i].canword) ||
1421     (size==4)) {
1422     bestreg=i;
1423     when=badness;
1424     if (live.nat[i].nholds==0 && hint<0)
1425     break;
1426     if (i==hint)
1427     break;
1428     }
1429     }
1430     }
1431     Dif (bestreg==-1)
1432     abort();
1433    
1434     if (live.nat[bestreg].nholds>0) {
1435     free_nreg(bestreg);
1436     }
1437     if (isinreg(r)) {
1438     int rr=live.state[r].realreg;
1439     /* This will happen if we read a partially dirty register at a
1440     bigger size */
1441     Dif (willclobber || live.state[r].validsize>=size)
1442     abort();
1443     Dif (live.nat[rr].nholds!=1)
1444     abort();
1445     if (size==4 && live.state[r].validsize==2) {
1446     log_isused(bestreg);
1447     log_visused(r);
1448 gbeauche 1.24 raw_mov_l_rm(bestreg,(uintptr)live.state[r].mem);
1449 gbeauche 1.1 raw_bswap_32(bestreg);
1450     raw_zero_extend_16_rr(rr,rr);
1451     raw_zero_extend_16_rr(bestreg,bestreg);
1452     raw_bswap_32(bestreg);
1453     raw_lea_l_brr_indexed(rr,rr,bestreg,1,0);
1454     live.state[r].validsize=4;
1455     live.nat[rr].touched=touchcnt++;
1456     return rr;
1457     }
1458     if (live.state[r].validsize==1) {
1459     /* Nothing yet */
1460     }
1461     evict(r);
1462     }
1463    
1464     if (!willclobber) {
1465     if (live.state[r].status!=UNDEF) {
1466     if (isconst(r)) {
1467     raw_mov_l_ri(bestreg,live.state[r].val);
1468     live.state[r].val=0;
1469     live.state[r].dirtysize=4;
1470     set_status(r,DIRTY);
1471     log_isused(bestreg);
1472     }
1473     else {
1474     log_isreg(bestreg, r); /* This will also load it! */
1475     live.state[r].dirtysize=0;
1476     set_status(r,CLEAN);
1477     }
1478     }
1479     else {
1480     live.state[r].val=0;
1481     live.state[r].dirtysize=0;
1482     set_status(r,CLEAN);
1483     log_isused(bestreg);
1484     }
1485     live.state[r].validsize=4;
1486     }
1487     else { /* this is the easiest way, but not optimal. FIXME! */
1488     /* Now it's trickier, but hopefully still OK */
1489     if (!isconst(r) || size==4) {
1490     live.state[r].validsize=size;
1491     live.state[r].dirtysize=size;
1492     live.state[r].val=0;
1493     set_status(r,DIRTY);
1494     if (size == 4) {
1495     log_clobberreg(r);
1496     log_isused(bestreg);
1497     }
1498     else {
1499     log_visused(r);
1500     log_isused(bestreg);
1501     }
1502     }
1503     else {
1504     if (live.state[r].status!=UNDEF)
1505     raw_mov_l_ri(bestreg,live.state[r].val);
1506     live.state[r].val=0;
1507     live.state[r].validsize=4;
1508     live.state[r].dirtysize=4;
1509     set_status(r,DIRTY);
1510     log_isused(bestreg);
1511     }
1512     }
1513     live.state[r].realreg=bestreg;
1514     live.state[r].realind=live.nat[bestreg].nholds;
1515     live.nat[bestreg].touched=touchcnt++;
1516     live.nat[bestreg].holds[live.nat[bestreg].nholds]=r;
1517     live.nat[bestreg].nholds++;
1518    
1519     return bestreg;
1520     }
1521    
1522     static int alloc_reg(int r, int size, int willclobber)
1523     {
1524     return alloc_reg_hinted(r,size,willclobber,-1);
1525     }
1526    
1527     static void unlock2(int r)
1528     {
1529     Dif (!live.nat[r].locked)
1530     abort();
1531     live.nat[r].locked--;
1532     }
1533    
1534     static void setlock(int r)
1535     {
1536     live.nat[r].locked++;
1537     }
1538    
1539    
1540     static void mov_nregs(int d, int s)
1541     {
1542     int ns=live.nat[s].nholds;
1543     int nd=live.nat[d].nholds;
1544     int i;
1545    
1546     if (s==d)
1547     return;
1548    
1549     if (nd>0)
1550     free_nreg(d);
1551    
1552     log_isused(d);
1553     raw_mov_l_rr(d,s);
1554    
1555     for (i=0;i<live.nat[s].nholds;i++) {
1556     int vs=live.nat[s].holds[i];
1557    
1558     live.state[vs].realreg=d;
1559     live.state[vs].realind=i;
1560     live.nat[d].holds[i]=vs;
1561     }
1562     live.nat[d].nholds=live.nat[s].nholds;
1563    
1564     live.nat[s].nholds=0;
1565     }
1566    
1567    
1568     static __inline__ void make_exclusive(int r, int size, int spec)
1569     {
1570     int clobber;
1571     reg_status oldstate;
1572     int rr=live.state[r].realreg;
1573     int nr;
1574     int nind;
1575     int ndirt=0;
1576     int i;
1577    
1578     if (!isinreg(r))
1579     return;
1580     if (live.nat[rr].nholds==1)
1581     return;
1582     for (i=0;i<live.nat[rr].nholds;i++) {
1583     int vr=live.nat[rr].holds[i];
1584     if (vr!=r &&
1585     (live.state[vr].status==DIRTY || live.state[vr].val))
1586     ndirt++;
1587     }
1588     if (!ndirt && size<live.state[r].validsize && !live.nat[rr].locked) {
1589     /* Everything else is clean, so let's keep this register */
1590     for (i=0;i<live.nat[rr].nholds;i++) {
1591     int vr=live.nat[rr].holds[i];
1592     if (vr!=r) {
1593     evict(vr);
1594     i--; /* Try that index again! */
1595     }
1596     }
1597     Dif (live.nat[rr].nholds!=1) {
1598     write_log("natreg %d holds %d vregs, %d not exclusive\n",
1599     rr,live.nat[rr].nholds,r);
1600     abort();
1601     }
1602     return;
1603     }
1604    
1605     /* We have to split the register */
1606     oldstate=live.state[r];
1607    
1608     setlock(rr); /* Make sure this doesn't go away */
1609     /* Forget about r being in the register rr */
1610     disassociate(r);
1611     /* Get a new register, that we will clobber completely */
1612     if (oldstate.status==DIRTY) {
1613     /* If dirtysize is <4, we need a register that can handle the
1614     eventual smaller memory store! Thanks to Quake68k for exposing
1615     this detail ;-) */
1616     nr=alloc_reg_hinted(r,oldstate.dirtysize,1,spec);
1617     }
1618     else {
1619     nr=alloc_reg_hinted(r,4,1,spec);
1620     }
1621     nind=live.state[r].realind;
1622     live.state[r]=oldstate; /* Keep all the old state info */
1623     live.state[r].realreg=nr;
1624     live.state[r].realind=nind;
1625    
1626     if (size<live.state[r].validsize) {
1627     if (live.state[r].val) {
1628     /* Might as well compensate for the offset now */
1629     raw_lea_l_brr(nr,rr,oldstate.val);
1630     live.state[r].val=0;
1631     live.state[r].dirtysize=4;
1632     set_status(r,DIRTY);
1633     }
1634     else
1635     raw_mov_l_rr(nr,rr); /* Make another copy */
1636     }
1637     unlock2(rr);
1638     }
1639    
1640     static __inline__ void add_offset(int r, uae_u32 off)
1641     {
1642     live.state[r].val+=off;
1643     }
1644    
1645     static __inline__ void remove_offset(int r, int spec)
1646     {
1647     reg_status oldstate;
1648     int rr;
1649    
1650     if (isconst(r))
1651     return;
1652     if (live.state[r].val==0)
1653     return;
1654     if (isinreg(r) && live.state[r].validsize<4)
1655     evict(r);
1656    
1657     if (!isinreg(r))
1658     alloc_reg_hinted(r,4,0,spec);
1659    
1660     Dif (live.state[r].validsize!=4) {
1661     write_log("Validsize=%d in remove_offset\n",live.state[r].validsize);
1662     abort();
1663     }
1664     make_exclusive(r,0,-1);
1665     /* make_exclusive might have done the job already */
1666     if (live.state[r].val==0)
1667     return;
1668    
1669     rr=live.state[r].realreg;
1670    
1671     if (live.nat[rr].nholds==1) {
1672     //write_log("RemovingB offset %x from reg %d (%d) at %p\n",
1673     // live.state[r].val,r,rr,target);
1674     adjust_nreg(rr,live.state[r].val);
1675     live.state[r].dirtysize=4;
1676     live.state[r].val=0;
1677     set_status(r,DIRTY);
1678     return;
1679     }
1680     write_log("Failed in remove_offset\n");
1681     abort();
1682     }
1683    
1684     static __inline__ void remove_all_offsets(void)
1685     {
1686     int i;
1687    
1688     for (i=0;i<VREGS;i++)
1689     remove_offset(i,-1);
1690     }
1691    
1692 gbeauche 1.28 static inline void flush_reg_count(void)
1693     {
1694     #if RECORD_REGISTER_USAGE
1695     for (int r = 0; r < 16; r++)
1696     if (reg_count_local[r])
1697     ADDQim(reg_count_local[r], ((uintptr)reg_count) + (8 * r), X86_NOREG, X86_NOREG, 1);
1698     #endif
1699     }
1700    
1701     static inline void record_register(int r)
1702     {
1703     #if RECORD_REGISTER_USAGE
1704     if (r < 16)
1705     reg_count_local[r]++;
1706     #endif
1707     }
1708    
1709 gbeauche 1.1 static __inline__ int readreg_general(int r, int size, int spec, int can_offset)
1710     {
1711     int n;
1712     int answer=-1;
1713    
1714 gbeauche 1.28 record_register(r);
1715 gbeauche 1.1 if (live.state[r].status==UNDEF) {
1716     write_log("WARNING: Unexpected read of undefined register %d\n",r);
1717     }
1718     if (!can_offset)
1719     remove_offset(r,spec);
1720    
1721     if (isinreg(r) && live.state[r].validsize>=size) {
1722     n=live.state[r].realreg;
1723     switch(size) {
1724     case 1:
1725     if (live.nat[n].canbyte || spec>=0) {
1726     answer=n;
1727     }
1728     break;
1729     case 2:
1730     if (live.nat[n].canword || spec>=0) {
1731     answer=n;
1732     }
1733     break;
1734     case 4:
1735     answer=n;
1736     break;
1737     default: abort();
1738     }
1739     if (answer<0)
1740     evict(r);
1741     }
1742     /* either the value was in memory to start with, or it was evicted and
1743     is in memory now */
1744     if (answer<0) {
1745     answer=alloc_reg_hinted(r,spec>=0?4:size,0,spec);
1746     }
1747    
1748     if (spec>=0 && spec!=answer) {
1749     /* Too bad */
1750     mov_nregs(spec,answer);
1751     answer=spec;
1752     }
1753     live.nat[answer].locked++;
1754     live.nat[answer].touched=touchcnt++;
1755     return answer;
1756     }
1757    
1758    
1759    
1760     static int readreg(int r, int size)
1761     {
1762     return readreg_general(r,size,-1,0);
1763     }
1764    
1765     static int readreg_specific(int r, int size, int spec)
1766     {
1767     return readreg_general(r,size,spec,0);
1768     }
1769    
1770     static int readreg_offset(int r, int size)
1771     {
1772     return readreg_general(r,size,-1,1);
1773     }
1774    
1775     /* writereg_general(r, size, spec)
1776     *
1777     * INPUT
1778     * - r : mid-layer register
1779     * - size : requested size (1/2/4)
1780     * - spec : -1 if find or make a register free, otherwise specifies
1781     * the physical register to use in any case
1782     *
1783     * OUTPUT
1784     * - hard (physical, x86 here) register allocated to virtual register r
1785     */
1786     static __inline__ int writereg_general(int r, int size, int spec)
1787     {
1788     int n;
1789     int answer=-1;
1790    
1791 gbeauche 1.28 record_register(r);
1792 gbeauche 1.1 if (size<4) {
1793     remove_offset(r,spec);
1794     }
1795    
1796     make_exclusive(r,size,spec);
1797     if (isinreg(r)) {
1798     int nvsize=size>live.state[r].validsize?size:live.state[r].validsize;
1799     int ndsize=size>live.state[r].dirtysize?size:live.state[r].dirtysize;
1800     n=live.state[r].realreg;
1801    
1802     Dif (live.nat[n].nholds!=1)
1803     abort();
1804     switch(size) {
1805     case 1:
1806     if (live.nat[n].canbyte || spec>=0) {
1807     live.state[r].dirtysize=ndsize;
1808     live.state[r].validsize=nvsize;
1809     answer=n;
1810     }
1811     break;
1812     case 2:
1813     if (live.nat[n].canword || spec>=0) {
1814     live.state[r].dirtysize=ndsize;
1815     live.state[r].validsize=nvsize;
1816     answer=n;
1817     }
1818     break;
1819     case 4:
1820     live.state[r].dirtysize=ndsize;
1821     live.state[r].validsize=nvsize;
1822     answer=n;
1823     break;
1824     default: abort();
1825     }
1826     if (answer<0)
1827     evict(r);
1828     }
1829     /* either the value was in memory to start with, or it was evicted and
1830     is in memory now */
1831     if (answer<0) {
1832     answer=alloc_reg_hinted(r,size,1,spec);
1833     }
1834     if (spec>=0 && spec!=answer) {
1835     mov_nregs(spec,answer);
1836     answer=spec;
1837     }
1838     if (live.state[r].status==UNDEF)
1839     live.state[r].validsize=4;
1840     live.state[r].dirtysize=size>live.state[r].dirtysize?size:live.state[r].dirtysize;
1841     live.state[r].validsize=size>live.state[r].validsize?size:live.state[r].validsize;
1842    
1843     live.nat[answer].locked++;
1844     live.nat[answer].touched=touchcnt++;
1845     if (size==4) {
1846     live.state[r].val=0;
1847     }
1848     else {
1849     Dif (live.state[r].val) {
1850     write_log("Problem with val\n");
1851     abort();
1852     }
1853     }
1854     set_status(r,DIRTY);
1855     return answer;
1856     }
1857    
1858     static int writereg(int r, int size)
1859     {
1860     return writereg_general(r,size,-1);
1861     }
1862    
1863     static int writereg_specific(int r, int size, int spec)
1864     {
1865     return writereg_general(r,size,spec);
1866     }
1867    
1868     static __inline__ int rmw_general(int r, int wsize, int rsize, int spec)
1869     {
1870     int n;
1871     int answer=-1;
1872    
1873 gbeauche 1.28 record_register(r);
1874 gbeauche 1.1 if (live.state[r].status==UNDEF) {
1875     write_log("WARNING: Unexpected read of undefined register %d\n",r);
1876     }
1877     remove_offset(r,spec);
1878     make_exclusive(r,0,spec);
1879    
1880     Dif (wsize<rsize) {
1881     write_log("Cannot handle wsize<rsize in rmw_general()\n");
1882     abort();
1883     }
1884     if (isinreg(r) && live.state[r].validsize>=rsize) {
1885     n=live.state[r].realreg;
1886     Dif (live.nat[n].nholds!=1)
1887     abort();
1888    
1889     switch(rsize) {
1890     case 1:
1891     if (live.nat[n].canbyte || spec>=0) {
1892     answer=n;
1893     }
1894     break;
1895     case 2:
1896     if (live.nat[n].canword || spec>=0) {
1897     answer=n;
1898     }
1899     break;
1900     case 4:
1901     answer=n;
1902     break;
1903     default: abort();
1904     }
1905     if (answer<0)
1906     evict(r);
1907     }
1908     /* either the value was in memory to start with, or it was evicted and
1909     is in memory now */
1910     if (answer<0) {
1911     answer=alloc_reg_hinted(r,spec>=0?4:rsize,0,spec);
1912     }
1913    
1914     if (spec>=0 && spec!=answer) {
1915     /* Too bad */
1916     mov_nregs(spec,answer);
1917     answer=spec;
1918     }
1919     if (wsize>live.state[r].dirtysize)
1920     live.state[r].dirtysize=wsize;
1921     if (wsize>live.state[r].validsize)
1922     live.state[r].validsize=wsize;
1923     set_status(r,DIRTY);
1924    
1925     live.nat[answer].locked++;
1926     live.nat[answer].touched=touchcnt++;
1927    
1928     Dif (live.state[r].val) {
1929     write_log("Problem with val(rmw)\n");
1930     abort();
1931     }
1932     return answer;
1933     }
1934    
1935     static int rmw(int r, int wsize, int rsize)
1936     {
1937     return rmw_general(r,wsize,rsize,-1);
1938     }
1939    
1940     static int rmw_specific(int r, int wsize, int rsize, int spec)
1941     {
1942     return rmw_general(r,wsize,rsize,spec);
1943     }
1944    
1945    
1946     /* needed for restoring the carry flag on non-P6 cores */
1947     static void bt_l_ri_noclobber(R4 r, IMM i)
1948     {
1949     int size=4;
1950     if (i<16)
1951     size=2;
1952     r=readreg(r,size);
1953     raw_bt_l_ri(r,i);
1954     unlock2(r);
1955     }
1956    
1957     /********************************************************************
1958     * FPU register status handling. EMIT TIME! *
1959     ********************************************************************/
1960    
1961     static void f_tomem(int r)
1962     {
1963     if (live.fate[r].status==DIRTY) {
1964     #if USE_LONG_DOUBLE
1965 gbeauche 1.24 raw_fmov_ext_mr((uintptr)live.fate[r].mem,live.fate[r].realreg);
1966 gbeauche 1.1 #else
1967 gbeauche 1.24 raw_fmov_mr((uintptr)live.fate[r].mem,live.fate[r].realreg);
1968 gbeauche 1.1 #endif
1969     live.fate[r].status=CLEAN;
1970     }
1971     }
1972    
1973     static void f_tomem_drop(int r)
1974     {
1975     if (live.fate[r].status==DIRTY) {
1976     #if USE_LONG_DOUBLE
1977 gbeauche 1.24 raw_fmov_ext_mr_drop((uintptr)live.fate[r].mem,live.fate[r].realreg);
1978 gbeauche 1.1 #else
1979 gbeauche 1.24 raw_fmov_mr_drop((uintptr)live.fate[r].mem,live.fate[r].realreg);
1980 gbeauche 1.1 #endif
1981     live.fate[r].status=INMEM;
1982     }
1983     }
1984    
1985    
1986     static __inline__ int f_isinreg(int r)
1987     {
1988     return live.fate[r].status==CLEAN || live.fate[r].status==DIRTY;
1989     }
1990    
1991     static void f_evict(int r)
1992     {
1993     int rr;
1994    
1995     if (!f_isinreg(r))
1996     return;
1997     rr=live.fate[r].realreg;
1998     if (live.fat[rr].nholds==1)
1999     f_tomem_drop(r);
2000     else
2001     f_tomem(r);
2002    
2003     Dif (live.fat[rr].locked &&
2004     live.fat[rr].nholds==1) {
2005     write_log("FPU register %d in nreg %d is locked!\n",r,live.fate[r].realreg);
2006     abort();
2007     }
2008    
2009     live.fat[rr].nholds--;
2010     if (live.fat[rr].nholds!=live.fate[r].realind) { /* Was not last */
2011     int topreg=live.fat[rr].holds[live.fat[rr].nholds];
2012     int thisind=live.fate[r].realind;
2013     live.fat[rr].holds[thisind]=topreg;
2014     live.fate[topreg].realind=thisind;
2015     }
2016     live.fate[r].status=INMEM;
2017     live.fate[r].realreg=-1;
2018     }
2019    
2020     static __inline__ void f_free_nreg(int r)
2021     {
2022     int i=live.fat[r].nholds;
2023    
2024     while (i) {
2025     int vr;
2026    
2027     --i;
2028     vr=live.fat[r].holds[i];
2029     f_evict(vr);
2030     }
2031     Dif (live.fat[r].nholds!=0) {
2032     write_log("Failed to free nreg %d, nholds is %d\n",r,live.fat[r].nholds);
2033     abort();
2034     }
2035     }
2036    
2037    
2038     /* Use with care! */
2039     static __inline__ void f_isclean(int r)
2040     {
2041     if (!f_isinreg(r))
2042     return;
2043     live.fate[r].status=CLEAN;
2044     }
2045    
2046     static __inline__ void f_disassociate(int r)
2047     {
2048     f_isclean(r);
2049     f_evict(r);
2050     }
2051    
2052    
2053    
2054     static int f_alloc_reg(int r, int willclobber)
2055     {
2056     int bestreg;
2057     uae_s32 when;
2058     int i;
2059     uae_s32 badness;
2060     bestreg=-1;
2061     when=2000000000;
2062     for (i=N_FREGS;i--;) {
2063     badness=live.fat[i].touched;
2064     if (live.fat[i].nholds==0)
2065     badness=0;
2066    
2067     if (!live.fat[i].locked && badness<when) {
2068     bestreg=i;
2069     when=badness;
2070     if (live.fat[i].nholds==0)
2071     break;
2072     }
2073     }
2074     Dif (bestreg==-1)
2075     abort();
2076    
2077     if (live.fat[bestreg].nholds>0) {
2078     f_free_nreg(bestreg);
2079     }
2080     if (f_isinreg(r)) {
2081     f_evict(r);
2082     }
2083    
2084     if (!willclobber) {
2085     if (live.fate[r].status!=UNDEF) {
2086     #if USE_LONG_DOUBLE
2087 gbeauche 1.24 raw_fmov_ext_rm(bestreg,(uintptr)live.fate[r].mem);
2088 gbeauche 1.1 #else
2089 gbeauche 1.24 raw_fmov_rm(bestreg,(uintptr)live.fate[r].mem);
2090 gbeauche 1.1 #endif
2091     }
2092     live.fate[r].status=CLEAN;
2093     }
2094     else {
2095     live.fate[r].status=DIRTY;
2096     }
2097     live.fate[r].realreg=bestreg;
2098     live.fate[r].realind=live.fat[bestreg].nholds;
2099     live.fat[bestreg].touched=touchcnt++;
2100     live.fat[bestreg].holds[live.fat[bestreg].nholds]=r;
2101     live.fat[bestreg].nholds++;
2102    
2103     return bestreg;
2104     }
2105    
2106     static void f_unlock(int r)
2107     {
2108     Dif (!live.fat[r].locked)
2109     abort();
2110     live.fat[r].locked--;
2111     }
2112    
2113     static void f_setlock(int r)
2114     {
2115     live.fat[r].locked++;
2116     }
2117    
2118     static __inline__ int f_readreg(int r)
2119     {
2120     int n;
2121     int answer=-1;
2122    
2123     if (f_isinreg(r)) {
2124     n=live.fate[r].realreg;
2125     answer=n;
2126     }
2127     /* either the value was in memory to start with, or it was evicted and
2128     is in memory now */
2129     if (answer<0)
2130     answer=f_alloc_reg(r,0);
2131    
2132     live.fat[answer].locked++;
2133     live.fat[answer].touched=touchcnt++;
2134     return answer;
2135     }
2136    
2137     static __inline__ void f_make_exclusive(int r, int clobber)
2138     {
2139     freg_status oldstate;
2140     int rr=live.fate[r].realreg;
2141     int nr;
2142     int nind;
2143     int ndirt=0;
2144     int i;
2145    
2146     if (!f_isinreg(r))
2147     return;
2148     if (live.fat[rr].nholds==1)
2149     return;
2150     for (i=0;i<live.fat[rr].nholds;i++) {
2151     int vr=live.fat[rr].holds[i];
2152     if (vr!=r && live.fate[vr].status==DIRTY)
2153     ndirt++;
2154     }
2155     if (!ndirt && !live.fat[rr].locked) {
2156     /* Everything else is clean, so let's keep this register */
2157     for (i=0;i<live.fat[rr].nholds;i++) {
2158     int vr=live.fat[rr].holds[i];
2159     if (vr!=r) {
2160     f_evict(vr);
2161     i--; /* Try that index again! */
2162     }
2163     }
2164     Dif (live.fat[rr].nholds!=1) {
2165     write_log("realreg %d holds %d (",rr,live.fat[rr].nholds);
2166     for (i=0;i<live.fat[rr].nholds;i++) {
2167     write_log(" %d(%d,%d)",live.fat[rr].holds[i],
2168     live.fate[live.fat[rr].holds[i]].realreg,
2169     live.fate[live.fat[rr].holds[i]].realind);
2170     }
2171     write_log("\n");
2172     abort();
2173     }
2174     return;
2175     }
2176    
2177     /* We have to split the register */
2178     oldstate=live.fate[r];
2179    
2180     f_setlock(rr); /* Make sure this doesn't go away */
2181     /* Forget about r being in the register rr */
2182     f_disassociate(r);
2183     /* Get a new register, that we will clobber completely */
2184     nr=f_alloc_reg(r,1);
2185     nind=live.fate[r].realind;
2186     if (!clobber)
2187     raw_fmov_rr(nr,rr); /* Make another copy */
2188     live.fate[r]=oldstate; /* Keep all the old state info */
2189     live.fate[r].realreg=nr;
2190     live.fate[r].realind=nind;
2191     f_unlock(rr);
2192     }
2193    
2194    
2195     static __inline__ int f_writereg(int r)
2196     {
2197     int n;
2198     int answer=-1;
2199    
2200     f_make_exclusive(r,1);
2201     if (f_isinreg(r)) {
2202     n=live.fate[r].realreg;
2203     answer=n;
2204     }
2205     if (answer<0) {
2206     answer=f_alloc_reg(r,1);
2207     }
2208     live.fate[r].status=DIRTY;
2209     live.fat[answer].locked++;
2210     live.fat[answer].touched=touchcnt++;
2211     return answer;
2212     }
2213    
2214     static int f_rmw(int r)
2215     {
2216     int n;
2217    
2218     f_make_exclusive(r,0);
2219     if (f_isinreg(r)) {
2220     n=live.fate[r].realreg;
2221     }
2222     else
2223     n=f_alloc_reg(r,0);
2224     live.fate[r].status=DIRTY;
2225     live.fat[n].locked++;
2226     live.fat[n].touched=touchcnt++;
2227     return n;
2228     }
2229    
2230     static void fflags_into_flags_internal(uae_u32 tmp)
2231     {
2232     int r;
2233    
2234     clobber_flags();
2235     r=f_readreg(FP_RESULT);
2236     if (FFLAG_NREG_CLOBBER_CONDITION) {
2237     int tmp2=tmp;
2238     tmp=writereg_specific(tmp,4,FFLAG_NREG);
2239     raw_fflags_into_flags(r);
2240     unlock2(tmp);
2241     forget_about(tmp2);
2242     }
2243     else
2244     raw_fflags_into_flags(r);
2245     f_unlock(r);
2246 gbeauche 1.19 live_flags();
2247 gbeauche 1.1 }
2248    
2249    
2250    
2251    
2252     /********************************************************************
2253     * CPU functions exposed to gencomp. Both CREATE and EMIT time *
2254     ********************************************************************/
2255    
2256     /*
2257     * RULES FOR HANDLING REGISTERS:
2258     *
2259     * * In the function headers, order the parameters
2260     * - 1st registers written to
2261     * - 2nd read/modify/write registers
2262     * - 3rd registers read from
2263     * * Before calling raw_*, you must call readreg, writereg or rmw for
2264     * each register
2265     * * The order for this is
2266     * - 1st call remove_offset for all registers written to with size<4
2267     * - 2nd call readreg for all registers read without offset
2268     * - 3rd call rmw for all rmw registers
2269     * - 4th call readreg_offset for all registers that can handle offsets
2270     * - 5th call get_offset for all the registers from the previous step
2271     * - 6th call writereg for all written-to registers
2272     * - 7th call raw_*
2273     * - 8th unlock2 all registers that were locked
2274     */
2275    
2276     MIDFUNC(0,live_flags,(void))
2277     {
2278     live.flags_on_stack=TRASH;
2279     live.flags_in_flags=VALID;
2280     live.flags_are_important=1;
2281     }
2282     MENDFUNC(0,live_flags,(void))
2283    
2284     MIDFUNC(0,dont_care_flags,(void))
2285     {
2286     live.flags_are_important=0;
2287     }
2288     MENDFUNC(0,dont_care_flags,(void))
2289    
2290    
2291     MIDFUNC(0,duplicate_carry,(void))
2292     {
2293     evict(FLAGX);
2294     make_flags_live_internal();
2295 gbeauche 1.24 COMPCALL(setcc_m)((uintptr)live.state[FLAGX].mem,2);
2296 gbeauche 1.1 log_vwrite(FLAGX);
2297     }
2298     MENDFUNC(0,duplicate_carry,(void))
2299    
2300     MIDFUNC(0,restore_carry,(void))
2301     {
2302     if (!have_rat_stall) { /* Not a P6 core, i.e. no partial stalls */
2303     bt_l_ri_noclobber(FLAGX,0);
2304     }
2305     else { /* Avoid the stall the above creates.
2306     This is slow on non-P6, though.
2307     */
2308     COMPCALL(rol_b_ri(FLAGX,8));
2309     isclean(FLAGX);
2310     }
2311     }
2312     MENDFUNC(0,restore_carry,(void))
2313    
2314     MIDFUNC(0,start_needflags,(void))
2315     {
2316     needflags=1;
2317     }
2318     MENDFUNC(0,start_needflags,(void))
2319    
2320     MIDFUNC(0,end_needflags,(void))
2321     {
2322     needflags=0;
2323     }
2324     MENDFUNC(0,end_needflags,(void))
2325    
2326     MIDFUNC(0,make_flags_live,(void))
2327     {
2328     make_flags_live_internal();
2329     }
2330     MENDFUNC(0,make_flags_live,(void))
2331    
2332     MIDFUNC(1,fflags_into_flags,(W2 tmp))
2333     {
2334     clobber_flags();
2335     fflags_into_flags_internal(tmp);
2336     }
2337     MENDFUNC(1,fflags_into_flags,(W2 tmp))
2338    
2339    
2340     MIDFUNC(2,bt_l_ri,(R4 r, IMM i)) /* This is defined as only affecting C */
2341     {
2342     int size=4;
2343     if (i<16)
2344     size=2;
2345     CLOBBER_BT;
2346     r=readreg(r,size);
2347     raw_bt_l_ri(r,i);
2348     unlock2(r);
2349     }
2350     MENDFUNC(2,bt_l_ri,(R4 r, IMM i)) /* This is defined as only affecting C */
2351    
2352     MIDFUNC(2,bt_l_rr,(R4 r, R4 b)) /* This is defined as only affecting C */
2353     {
2354     CLOBBER_BT;
2355     r=readreg(r,4);
2356     b=readreg(b,4);
2357     raw_bt_l_rr(r,b);
2358     unlock2(r);
2359     unlock2(b);
2360     }
2361     MENDFUNC(2,bt_l_rr,(R4 r, R4 b)) /* This is defined as only affecting C */
2362    
2363     MIDFUNC(2,btc_l_ri,(RW4 r, IMM i))
2364     {
2365     int size=4;
2366     if (i<16)
2367     size=2;
2368     CLOBBER_BT;
2369     r=rmw(r,size,size);
2370     raw_btc_l_ri(r,i);
2371     unlock2(r);
2372     }
2373     MENDFUNC(2,btc_l_ri,(RW4 r, IMM i))
2374    
2375     MIDFUNC(2,btc_l_rr,(RW4 r, R4 b))
2376     {
2377     CLOBBER_BT;
2378     b=readreg(b,4);
2379     r=rmw(r,4,4);
2380     raw_btc_l_rr(r,b);
2381     unlock2(r);
2382     unlock2(b);
2383     }
2384     MENDFUNC(2,btc_l_rr,(RW4 r, R4 b))
2385    
2386    
2387     MIDFUNC(2,btr_l_ri,(RW4 r, IMM i))
2388     {
2389     int size=4;
2390     if (i<16)
2391     size=2;
2392     CLOBBER_BT;
2393     r=rmw(r,size,size);
2394     raw_btr_l_ri(r,i);
2395     unlock2(r);
2396     }
2397     MENDFUNC(2,btr_l_ri,(RW4 r, IMM i))
2398    
2399     MIDFUNC(2,btr_l_rr,(RW4 r, R4 b))
2400     {
2401     CLOBBER_BT;
2402     b=readreg(b,4);
2403     r=rmw(r,4,4);
2404     raw_btr_l_rr(r,b);
2405     unlock2(r);
2406     unlock2(b);
2407     }
2408     MENDFUNC(2,btr_l_rr,(RW4 r, R4 b))
2409    
2410    
2411     MIDFUNC(2,bts_l_ri,(RW4 r, IMM i))
2412     {
2413     int size=4;
2414     if (i<16)
2415     size=2;
2416     CLOBBER_BT;
2417     r=rmw(r,size,size);
2418     raw_bts_l_ri(r,i);
2419     unlock2(r);
2420     }
2421     MENDFUNC(2,bts_l_ri,(RW4 r, IMM i))
2422    
2423     MIDFUNC(2,bts_l_rr,(RW4 r, R4 b))
2424     {
2425     CLOBBER_BT;
2426     b=readreg(b,4);
2427     r=rmw(r,4,4);
2428     raw_bts_l_rr(r,b);
2429     unlock2(r);
2430     unlock2(b);
2431     }
2432     MENDFUNC(2,bts_l_rr,(RW4 r, R4 b))
2433    
2434     MIDFUNC(2,mov_l_rm,(W4 d, IMM s))
2435     {
2436     CLOBBER_MOV;
2437     d=writereg(d,4);
2438     raw_mov_l_rm(d,s);
2439     unlock2(d);
2440     }
2441     MENDFUNC(2,mov_l_rm,(W4 d, IMM s))
2442    
2443    
2444     MIDFUNC(1,call_r,(R4 r)) /* Clobbering is implicit */
2445     {
2446     r=readreg(r,4);
2447     raw_call_r(r);
2448     unlock2(r);
2449     }
2450     MENDFUNC(1,call_r,(R4 r)) /* Clobbering is implicit */
2451    
2452     MIDFUNC(2,sub_l_mi,(IMM d, IMM s))
2453     {
2454     CLOBBER_SUB;
2455     raw_sub_l_mi(d,s) ;
2456     }
2457     MENDFUNC(2,sub_l_mi,(IMM d, IMM s))
2458    
2459     MIDFUNC(2,mov_l_mi,(IMM d, IMM s))
2460     {
2461     CLOBBER_MOV;
2462     raw_mov_l_mi(d,s) ;
2463     }
2464     MENDFUNC(2,mov_l_mi,(IMM d, IMM s))
2465    
2466     MIDFUNC(2,mov_w_mi,(IMM d, IMM s))
2467     {
2468     CLOBBER_MOV;
2469     raw_mov_w_mi(d,s) ;
2470     }
2471     MENDFUNC(2,mov_w_mi,(IMM d, IMM s))
2472    
2473     MIDFUNC(2,mov_b_mi,(IMM d, IMM s))
2474     {
2475     CLOBBER_MOV;
2476     raw_mov_b_mi(d,s) ;
2477     }
2478     MENDFUNC(2,mov_b_mi,(IMM d, IMM s))
2479    
2480     MIDFUNC(2,rol_b_ri,(RW1 r, IMM i))
2481     {
2482     if (!i && !needflags)
2483     return;
2484     CLOBBER_ROL;
2485     r=rmw(r,1,1);
2486     raw_rol_b_ri(r,i);
2487     unlock2(r);
2488     }
2489     MENDFUNC(2,rol_b_ri,(RW1 r, IMM i))
2490    
2491     MIDFUNC(2,rol_w_ri,(RW2 r, IMM i))
2492     {
2493     if (!i && !needflags)
2494     return;
2495     CLOBBER_ROL;
2496     r=rmw(r,2,2);
2497     raw_rol_w_ri(r,i);
2498     unlock2(r);
2499     }
2500     MENDFUNC(2,rol_w_ri,(RW2 r, IMM i))
2501    
2502     MIDFUNC(2,rol_l_ri,(RW4 r, IMM i))
2503     {
2504     if (!i && !needflags)
2505     return;
2506     CLOBBER_ROL;
2507     r=rmw(r,4,4);
2508     raw_rol_l_ri(r,i);
2509     unlock2(r);
2510     }
2511     MENDFUNC(2,rol_l_ri,(RW4 r, IMM i))
2512    
2513     MIDFUNC(2,rol_l_rr,(RW4 d, R1 r))
2514     {
2515     if (isconst(r)) {
2516     COMPCALL(rol_l_ri)(d,(uae_u8)live.state[r].val);
2517     return;
2518     }
2519     CLOBBER_ROL;
2520     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2521     d=rmw(d,4,4);
2522     Dif (r!=1) {
2523     write_log("Illegal register %d in raw_rol_b\n",r);
2524     abort();
2525     }
2526     raw_rol_l_rr(d,r) ;
2527     unlock2(r);
2528     unlock2(d);
2529     }
2530     MENDFUNC(2,rol_l_rr,(RW4 d, R1 r))
2531    
2532     MIDFUNC(2,rol_w_rr,(RW2 d, R1 r))
2533     { /* Can only do this with r==1, i.e. cl */
2534    
2535     if (isconst(r)) {
2536     COMPCALL(rol_w_ri)(d,(uae_u8)live.state[r].val);
2537     return;
2538     }
2539     CLOBBER_ROL;
2540     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2541     d=rmw(d,2,2);
2542     Dif (r!=1) {
2543     write_log("Illegal register %d in raw_rol_b\n",r);
2544     abort();
2545     }
2546     raw_rol_w_rr(d,r) ;
2547     unlock2(r);
2548     unlock2(d);
2549     }
2550     MENDFUNC(2,rol_w_rr,(RW2 d, R1 r))
2551    
2552     MIDFUNC(2,rol_b_rr,(RW1 d, R1 r))
2553     { /* Can only do this with r==1, i.e. cl */
2554    
2555     if (isconst(r)) {
2556     COMPCALL(rol_b_ri)(d,(uae_u8)live.state[r].val);
2557     return;
2558     }
2559    
2560     CLOBBER_ROL;
2561     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2562     d=rmw(d,1,1);
2563     Dif (r!=1) {
2564     write_log("Illegal register %d in raw_rol_b\n",r);
2565     abort();
2566     }
2567     raw_rol_b_rr(d,r) ;
2568     unlock2(r);
2569     unlock2(d);
2570     }
2571     MENDFUNC(2,rol_b_rr,(RW1 d, R1 r))
2572    
2573    
2574     MIDFUNC(2,shll_l_rr,(RW4 d, R1 r))
2575     {
2576     if (isconst(r)) {
2577     COMPCALL(shll_l_ri)(d,(uae_u8)live.state[r].val);
2578     return;
2579     }
2580     CLOBBER_SHLL;
2581     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2582     d=rmw(d,4,4);
2583     Dif (r!=1) {
2584     write_log("Illegal register %d in raw_rol_b\n",r);
2585     abort();
2586     }
2587     raw_shll_l_rr(d,r) ;
2588     unlock2(r);
2589     unlock2(d);
2590     }
2591     MENDFUNC(2,shll_l_rr,(RW4 d, R1 r))
2592    
2593     MIDFUNC(2,shll_w_rr,(RW2 d, R1 r))
2594     { /* Can only do this with r==1, i.e. cl */
2595    
2596     if (isconst(r)) {
2597     COMPCALL(shll_w_ri)(d,(uae_u8)live.state[r].val);
2598     return;
2599     }
2600     CLOBBER_SHLL;
2601     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2602     d=rmw(d,2,2);
2603     Dif (r!=1) {
2604     write_log("Illegal register %d in raw_shll_b\n",r);
2605     abort();
2606     }
2607     raw_shll_w_rr(d,r) ;
2608     unlock2(r);
2609     unlock2(d);
2610     }
2611     MENDFUNC(2,shll_w_rr,(RW2 d, R1 r))
2612    
2613     MIDFUNC(2,shll_b_rr,(RW1 d, R1 r))
2614     { /* Can only do this with r==1, i.e. cl */
2615    
2616     if (isconst(r)) {
2617     COMPCALL(shll_b_ri)(d,(uae_u8)live.state[r].val);
2618     return;
2619     }
2620    
2621     CLOBBER_SHLL;
2622     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2623     d=rmw(d,1,1);
2624     Dif (r!=1) {
2625     write_log("Illegal register %d in raw_shll_b\n",r);
2626     abort();
2627     }
2628     raw_shll_b_rr(d,r) ;
2629     unlock2(r);
2630     unlock2(d);
2631     }
2632     MENDFUNC(2,shll_b_rr,(RW1 d, R1 r))
2633    
2634    
2635     MIDFUNC(2,ror_b_ri,(R1 r, IMM i))
2636     {
2637     if (!i && !needflags)
2638     return;
2639     CLOBBER_ROR;
2640     r=rmw(r,1,1);
2641     raw_ror_b_ri(r,i);
2642     unlock2(r);
2643     }
2644     MENDFUNC(2,ror_b_ri,(R1 r, IMM i))
2645    
2646     MIDFUNC(2,ror_w_ri,(R2 r, IMM i))
2647     {
2648     if (!i && !needflags)
2649     return;
2650     CLOBBER_ROR;
2651     r=rmw(r,2,2);
2652     raw_ror_w_ri(r,i);
2653     unlock2(r);
2654     }
2655     MENDFUNC(2,ror_w_ri,(R2 r, IMM i))
2656    
2657     MIDFUNC(2,ror_l_ri,(R4 r, IMM i))
2658     {
2659     if (!i && !needflags)
2660     return;
2661     CLOBBER_ROR;
2662     r=rmw(r,4,4);
2663     raw_ror_l_ri(r,i);
2664     unlock2(r);
2665     }
2666     MENDFUNC(2,ror_l_ri,(R4 r, IMM i))
2667    
2668     MIDFUNC(2,ror_l_rr,(R4 d, R1 r))
2669     {
2670     if (isconst(r)) {
2671     COMPCALL(ror_l_ri)(d,(uae_u8)live.state[r].val);
2672     return;
2673     }
2674     CLOBBER_ROR;
2675     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2676     d=rmw(d,4,4);
2677     raw_ror_l_rr(d,r) ;
2678     unlock2(r);
2679     unlock2(d);
2680     }
2681     MENDFUNC(2,ror_l_rr,(R4 d, R1 r))
2682    
2683     MIDFUNC(2,ror_w_rr,(R2 d, R1 r))
2684     {
2685     if (isconst(r)) {
2686     COMPCALL(ror_w_ri)(d,(uae_u8)live.state[r].val);
2687     return;
2688     }
2689     CLOBBER_ROR;
2690     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2691     d=rmw(d,2,2);
2692     raw_ror_w_rr(d,r) ;
2693     unlock2(r);
2694     unlock2(d);
2695     }
2696     MENDFUNC(2,ror_w_rr,(R2 d, R1 r))
2697    
2698     MIDFUNC(2,ror_b_rr,(R1 d, R1 r))
2699     {
2700     if (isconst(r)) {
2701     COMPCALL(ror_b_ri)(d,(uae_u8)live.state[r].val);
2702     return;
2703     }
2704    
2705     CLOBBER_ROR;
2706     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2707     d=rmw(d,1,1);
2708     raw_ror_b_rr(d,r) ;
2709     unlock2(r);
2710     unlock2(d);
2711     }
2712     MENDFUNC(2,ror_b_rr,(R1 d, R1 r))
2713    
2714     MIDFUNC(2,shrl_l_rr,(RW4 d, R1 r))
2715     {
2716     if (isconst(r)) {
2717     COMPCALL(shrl_l_ri)(d,(uae_u8)live.state[r].val);
2718     return;
2719     }
2720     CLOBBER_SHRL;
2721     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2722     d=rmw(d,4,4);
2723     Dif (r!=1) {
2724     write_log("Illegal register %d in raw_rol_b\n",r);
2725     abort();
2726     }
2727     raw_shrl_l_rr(d,r) ;
2728     unlock2(r);
2729     unlock2(d);
2730     }
2731     MENDFUNC(2,shrl_l_rr,(RW4 d, R1 r))
2732    
2733     MIDFUNC(2,shrl_w_rr,(RW2 d, R1 r))
2734     { /* Can only do this with r==1, i.e. cl */
2735    
2736     if (isconst(r)) {
2737     COMPCALL(shrl_w_ri)(d,(uae_u8)live.state[r].val);
2738     return;
2739     }
2740     CLOBBER_SHRL;
2741     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2742     d=rmw(d,2,2);
2743     Dif (r!=1) {
2744     write_log("Illegal register %d in raw_shrl_b\n",r);
2745     abort();
2746     }
2747     raw_shrl_w_rr(d,r) ;
2748     unlock2(r);
2749     unlock2(d);
2750     }
2751     MENDFUNC(2,shrl_w_rr,(RW2 d, R1 r))
2752    
2753     MIDFUNC(2,shrl_b_rr,(RW1 d, R1 r))
2754     { /* Can only do this with r==1, i.e. cl */
2755    
2756     if (isconst(r)) {
2757     COMPCALL(shrl_b_ri)(d,(uae_u8)live.state[r].val);
2758     return;
2759     }
2760    
2761     CLOBBER_SHRL;
2762     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2763     d=rmw(d,1,1);
2764     Dif (r!=1) {
2765     write_log("Illegal register %d in raw_shrl_b\n",r);
2766     abort();
2767     }
2768     raw_shrl_b_rr(d,r) ;
2769     unlock2(r);
2770     unlock2(d);
2771     }
2772     MENDFUNC(2,shrl_b_rr,(RW1 d, R1 r))
2773    
2774    
2775    
2776     MIDFUNC(2,shll_l_ri,(RW4 r, IMM i))
2777     {
2778     if (!i && !needflags)
2779     return;
2780     if (isconst(r) && !needflags) {
2781     live.state[r].val<<=i;
2782     return;
2783     }
2784     CLOBBER_SHLL;
2785     r=rmw(r,4,4);
2786     raw_shll_l_ri(r,i);
2787     unlock2(r);
2788     }
2789     MENDFUNC(2,shll_l_ri,(RW4 r, IMM i))
2790    
2791     MIDFUNC(2,shll_w_ri,(RW2 r, IMM i))
2792     {
2793     if (!i && !needflags)
2794     return;
2795     CLOBBER_SHLL;
2796     r=rmw(r,2,2);
2797     raw_shll_w_ri(r,i);
2798     unlock2(r);
2799     }
2800     MENDFUNC(2,shll_w_ri,(RW2 r, IMM i))
2801    
2802     MIDFUNC(2,shll_b_ri,(RW1 r, IMM i))
2803     {
2804     if (!i && !needflags)
2805     return;
2806     CLOBBER_SHLL;
2807     r=rmw(r,1,1);
2808     raw_shll_b_ri(r,i);
2809     unlock2(r);
2810     }
2811     MENDFUNC(2,shll_b_ri,(RW1 r, IMM i))
2812    
2813     MIDFUNC(2,shrl_l_ri,(RW4 r, IMM i))
2814     {
2815     if (!i && !needflags)
2816     return;
2817     if (isconst(r) && !needflags) {
2818     live.state[r].val>>=i;
2819     return;
2820     }
2821     CLOBBER_SHRL;
2822     r=rmw(r,4,4);
2823     raw_shrl_l_ri(r,i);
2824     unlock2(r);
2825     }
2826     MENDFUNC(2,shrl_l_ri,(RW4 r, IMM i))
2827    
2828     MIDFUNC(2,shrl_w_ri,(RW2 r, IMM i))
2829     {
2830     if (!i && !needflags)
2831     return;
2832     CLOBBER_SHRL;
2833     r=rmw(r,2,2);
2834     raw_shrl_w_ri(r,i);
2835     unlock2(r);
2836     }
2837     MENDFUNC(2,shrl_w_ri,(RW2 r, IMM i))
2838    
2839     MIDFUNC(2,shrl_b_ri,(RW1 r, IMM i))
2840     {
2841     if (!i && !needflags)
2842     return;
2843     CLOBBER_SHRL;
2844     r=rmw(r,1,1);
2845     raw_shrl_b_ri(r,i);
2846     unlock2(r);
2847     }
2848     MENDFUNC(2,shrl_b_ri,(RW1 r, IMM i))
2849    
2850     MIDFUNC(2,shra_l_ri,(RW4 r, IMM i))
2851     {
2852     if (!i && !needflags)
2853     return;
2854     CLOBBER_SHRA;
2855     r=rmw(r,4,4);
2856     raw_shra_l_ri(r,i);
2857     unlock2(r);
2858     }
2859     MENDFUNC(2,shra_l_ri,(RW4 r, IMM i))
2860    
2861     MIDFUNC(2,shra_w_ri,(RW2 r, IMM i))
2862     {
2863     if (!i && !needflags)
2864     return;
2865     CLOBBER_SHRA;
2866     r=rmw(r,2,2);
2867     raw_shra_w_ri(r,i);
2868     unlock2(r);
2869     }
2870     MENDFUNC(2,shra_w_ri,(RW2 r, IMM i))
2871    
2872     MIDFUNC(2,shra_b_ri,(RW1 r, IMM i))
2873     {
2874     if (!i && !needflags)
2875     return;
2876     CLOBBER_SHRA;
2877     r=rmw(r,1,1);
2878     raw_shra_b_ri(r,i);
2879     unlock2(r);
2880     }
2881     MENDFUNC(2,shra_b_ri,(RW1 r, IMM i))
2882    
2883     MIDFUNC(2,shra_l_rr,(RW4 d, R1 r))
2884     {
2885     if (isconst(r)) {
2886     COMPCALL(shra_l_ri)(d,(uae_u8)live.state[r].val);
2887     return;
2888     }
2889     CLOBBER_SHRA;
2890     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2891     d=rmw(d,4,4);
2892     Dif (r!=1) {
2893     write_log("Illegal register %d in raw_rol_b\n",r);
2894     abort();
2895     }
2896     raw_shra_l_rr(d,r) ;
2897     unlock2(r);
2898     unlock2(d);
2899     }
2900     MENDFUNC(2,shra_l_rr,(RW4 d, R1 r))
2901    
2902     MIDFUNC(2,shra_w_rr,(RW2 d, R1 r))
2903     { /* Can only do this with r==1, i.e. cl */
2904    
2905     if (isconst(r)) {
2906     COMPCALL(shra_w_ri)(d,(uae_u8)live.state[r].val);
2907     return;
2908     }
2909     CLOBBER_SHRA;
2910     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2911     d=rmw(d,2,2);
2912     Dif (r!=1) {
2913     write_log("Illegal register %d in raw_shra_b\n",r);
2914     abort();
2915     }
2916     raw_shra_w_rr(d,r) ;
2917     unlock2(r);
2918     unlock2(d);
2919     }
2920     MENDFUNC(2,shra_w_rr,(RW2 d, R1 r))
2921    
2922     MIDFUNC(2,shra_b_rr,(RW1 d, R1 r))
2923     { /* Can only do this with r==1, i.e. cl */
2924    
2925     if (isconst(r)) {
2926     COMPCALL(shra_b_ri)(d,(uae_u8)live.state[r].val);
2927     return;
2928     }
2929    
2930     CLOBBER_SHRA;
2931     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2932     d=rmw(d,1,1);
2933     Dif (r!=1) {
2934     write_log("Illegal register %d in raw_shra_b\n",r);
2935     abort();
2936     }
2937     raw_shra_b_rr(d,r) ;
2938     unlock2(r);
2939     unlock2(d);
2940     }
2941     MENDFUNC(2,shra_b_rr,(RW1 d, R1 r))
2942    
2943    
2944     MIDFUNC(2,setcc,(W1 d, IMM cc))
2945     {
2946     CLOBBER_SETCC;
2947     d=writereg(d,1);
2948     raw_setcc(d,cc);
2949     unlock2(d);
2950     }
2951     MENDFUNC(2,setcc,(W1 d, IMM cc))
2952    
2953     MIDFUNC(2,setcc_m,(IMM d, IMM cc))
2954     {
2955     CLOBBER_SETCC;
2956     raw_setcc_m(d,cc);
2957     }
2958     MENDFUNC(2,setcc_m,(IMM d, IMM cc))
2959    
2960     MIDFUNC(3,cmov_l_rr,(RW4 d, R4 s, IMM cc))
2961     {
2962     if (d==s)
2963     return;
2964     CLOBBER_CMOV;
2965     s=readreg(s,4);
2966     d=rmw(d,4,4);
2967     raw_cmov_l_rr(d,s,cc);
2968     unlock2(s);
2969     unlock2(d);
2970     }
2971     MENDFUNC(3,cmov_l_rr,(RW4 d, R4 s, IMM cc))
2972    
2973     MIDFUNC(3,cmov_l_rm,(RW4 d, IMM s, IMM cc))
2974     {
2975     CLOBBER_CMOV;
2976     d=rmw(d,4,4);
2977     raw_cmov_l_rm(d,s,cc);
2978     unlock2(d);
2979     }
2980     MENDFUNC(3,cmov_l_rm,(RW4 d, IMM s, IMM cc))
2981    
2982 gbeauche 1.26 MIDFUNC(2,bsf_l_rr,(W4 d, W4 s))
2983     {
2984     CLOBBER_BSF;
2985     s = readreg(s, 4);
2986     d = writereg(d, 4);
2987     raw_bsf_l_rr(d, s);
2988     unlock2(s);
2989     unlock2(d);
2990     }
2991     MENDFUNC(2,bsf_l_rr,(W4 d, W4 s))
2992    
2993     /* Set the Z flag depending on the value in s. Note that the
2994     value has to be 0 or -1 (or, more precisely, for non-zero
2995     values, bit 14 must be set)! */
2996     MIDFUNC(2,simulate_bsf,(W4 tmp, RW4 s))
2997 gbeauche 1.1 {
2998 gbeauche 1.26 CLOBBER_BSF;
2999     s=rmw_specific(s,4,4,FLAG_NREG3);
3000     tmp=writereg(tmp,4);
3001     raw_flags_set_zero(s, tmp);
3002     unlock2(tmp);
3003     unlock2(s);
3004 gbeauche 1.1 }
3005 gbeauche 1.26 MENDFUNC(2,simulate_bsf,(W4 tmp, RW4 s))
3006 gbeauche 1.1
3007     MIDFUNC(2,imul_32_32,(RW4 d, R4 s))
3008     {
3009     CLOBBER_MUL;
3010     s=readreg(s,4);
3011     d=rmw(d,4,4);
3012     raw_imul_32_32(d,s);
3013     unlock2(s);
3014     unlock2(d);
3015     }
3016     MENDFUNC(2,imul_32_32,(RW4 d, R4 s))
3017    
3018     MIDFUNC(2,imul_64_32,(RW4 d, RW4 s))
3019     {
3020     CLOBBER_MUL;
3021     s=rmw_specific(s,4,4,MUL_NREG2);
3022     d=rmw_specific(d,4,4,MUL_NREG1);
3023     raw_imul_64_32(d,s);
3024     unlock2(s);
3025     unlock2(d);
3026     }
3027     MENDFUNC(2,imul_64_32,(RW4 d, RW4 s))
3028    
3029     MIDFUNC(2,mul_64_32,(RW4 d, RW4 s))
3030     {
3031     CLOBBER_MUL;
3032     s=rmw_specific(s,4,4,MUL_NREG2);
3033     d=rmw_specific(d,4,4,MUL_NREG1);
3034     raw_mul_64_32(d,s);
3035     unlock2(s);
3036     unlock2(d);
3037     }
3038     MENDFUNC(2,mul_64_32,(RW4 d, RW4 s))
3039    
3040     MIDFUNC(2,mul_32_32,(RW4 d, R4 s))
3041     {
3042     CLOBBER_MUL;
3043     s=readreg(s,4);
3044     d=rmw(d,4,4);
3045     raw_mul_32_32(d,s);
3046     unlock2(s);
3047     unlock2(d);
3048     }
3049     MENDFUNC(2,mul_32_32,(RW4 d, R4 s))
3050    
3051 gbeauche 1.24 #if SIZEOF_VOID_P == 8
3052     MIDFUNC(2,sign_extend_32_rr,(W4 d, R2 s))
3053     {
3054     int isrmw;
3055    
3056     if (isconst(s)) {
3057     set_const(d,(uae_s32)live.state[s].val);
3058     return;
3059     }
3060    
3061     CLOBBER_SE32;
3062     isrmw=(s==d);
3063     if (!isrmw) {
3064     s=readreg(s,4);
3065     d=writereg(d,4);
3066     }
3067     else { /* If we try to lock this twice, with different sizes, we
3068     are int trouble! */
3069     s=d=rmw(s,4,4);
3070     }
3071     raw_sign_extend_32_rr(d,s);
3072     if (!isrmw) {
3073     unlock2(d);
3074     unlock2(s);
3075     }
3076     else {
3077     unlock2(s);
3078     }
3079     }
3080     MENDFUNC(2,sign_extend_32_rr,(W4 d, R2 s))
3081     #endif
3082    
3083 gbeauche 1.1 MIDFUNC(2,sign_extend_16_rr,(W4 d, R2 s))
3084     {
3085     int isrmw;
3086    
3087     if (isconst(s)) {
3088     set_const(d,(uae_s32)(uae_s16)live.state[s].val);
3089     return;
3090     }
3091    
3092     CLOBBER_SE16;
3093     isrmw=(s==d);
3094     if (!isrmw) {
3095     s=readreg(s,2);
3096     d=writereg(d,4);
3097     }
3098     else { /* If we try to lock this twice, with different sizes, we
3099     are int trouble! */
3100     s=d=rmw(s,4,2);
3101     }
3102     raw_sign_extend_16_rr(d,s);
3103     if (!isrmw) {
3104     unlock2(d);
3105     unlock2(s);
3106     }
3107     else {
3108     unlock2(s);
3109     }
3110     }
3111     MENDFUNC(2,sign_extend_16_rr,(W4 d, R2 s))
3112    
3113     MIDFUNC(2,sign_extend_8_rr,(W4 d, R1 s))
3114     {
3115     int isrmw;
3116    
3117     if (isconst(s)) {
3118     set_const(d,(uae_s32)(uae_s8)live.state[s].val);
3119     return;
3120     }
3121    
3122     isrmw=(s==d);
3123     CLOBBER_SE8;
3124     if (!isrmw) {
3125     s=readreg(s,1);
3126     d=writereg(d,4);
3127     }
3128     else { /* If we try to lock this twice, with different sizes, we
3129     are int trouble! */
3130     s=d=rmw(s,4,1);
3131     }
3132    
3133     raw_sign_extend_8_rr(d,s);
3134    
3135     if (!isrmw) {
3136     unlock2(d);
3137     unlock2(s);
3138     }
3139     else {
3140     unlock2(s);
3141     }
3142     }
3143     MENDFUNC(2,sign_extend_8_rr,(W4 d, R1 s))
3144    
3145    
3146     MIDFUNC(2,zero_extend_16_rr,(W4 d, R2 s))
3147     {
3148     int isrmw;
3149    
3150     if (isconst(s)) {
3151     set_const(d,(uae_u32)(uae_u16)live.state[s].val);
3152     return;
3153     }
3154    
3155     isrmw=(s==d);
3156     CLOBBER_ZE16;
3157     if (!isrmw) {
3158     s=readreg(s,2);
3159     d=writereg(d,4);
3160     }
3161     else { /* If we try to lock this twice, with different sizes, we
3162     are int trouble! */
3163     s=d=rmw(s,4,2);
3164     }
3165     raw_zero_extend_16_rr(d,s);
3166     if (!isrmw) {
3167     unlock2(d);
3168     unlock2(s);
3169     }
3170     else {
3171     unlock2(s);
3172     }
3173     }
3174     MENDFUNC(2,zero_extend_16_rr,(W4 d, R2 s))
3175    
3176     MIDFUNC(2,zero_extend_8_rr,(W4 d, R1 s))
3177     {
3178     int isrmw;
3179     if (isconst(s)) {
3180     set_const(d,(uae_u32)(uae_u8)live.state[s].val);
3181     return;
3182     }
3183    
3184     isrmw=(s==d);
3185     CLOBBER_ZE8;
3186     if (!isrmw) {
3187     s=readreg(s,1);
3188     d=writereg(d,4);
3189     }
3190     else { /* If we try to lock this twice, with different sizes, we
3191     are int trouble! */
3192     s=d=rmw(s,4,1);
3193     }
3194    
3195     raw_zero_extend_8_rr(d,s);
3196    
3197     if (!isrmw) {
3198     unlock2(d);
3199     unlock2(s);
3200     }
3201     else {
3202     unlock2(s);
3203     }
3204     }
3205     MENDFUNC(2,zero_extend_8_rr,(W4 d, R1 s))
3206    
3207     MIDFUNC(2,mov_b_rr,(W1 d, R1 s))
3208     {
3209     if (d==s)
3210     return;
3211     if (isconst(s)) {
3212     COMPCALL(mov_b_ri)(d,(uae_u8)live.state[s].val);
3213     return;
3214     }
3215    
3216     CLOBBER_MOV;
3217     s=readreg(s,1);
3218     d=writereg(d,1);
3219     raw_mov_b_rr(d,s);
3220     unlock2(d);
3221     unlock2(s);
3222     }
3223     MENDFUNC(2,mov_b_rr,(W1 d, R1 s))
3224    
3225     MIDFUNC(2,mov_w_rr,(W2 d, R2 s))
3226     {
3227     if (d==s)
3228     return;
3229     if (isconst(s)) {
3230     COMPCALL(mov_w_ri)(d,(uae_u16)live.state[s].val);
3231     return;
3232     }
3233    
3234     CLOBBER_MOV;
3235     s=readreg(s,2);
3236     d=writereg(d,2);
3237     raw_mov_w_rr(d,s);
3238     unlock2(d);
3239     unlock2(s);
3240     }
3241     MENDFUNC(2,mov_w_rr,(W2 d, R2 s))
3242    
3243    
3244     MIDFUNC(4,mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
3245     {
3246     CLOBBER_MOV;
3247     baser=readreg(baser,4);
3248     index=readreg(index,4);
3249     d=writereg(d,4);
3250    
3251     raw_mov_l_rrm_indexed(d,baser,index,factor);
3252     unlock2(d);
3253     unlock2(baser);
3254     unlock2(index);
3255     }
3256     MENDFUNC(4,mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
3257    
3258     MIDFUNC(4,mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
3259     {
3260     CLOBBER_MOV;
3261     baser=readreg(baser,4);
3262     index=readreg(index,4);
3263     d=writereg(d,2);
3264    
3265     raw_mov_w_rrm_indexed(d,baser,index,factor);
3266     unlock2(d);
3267     unlock2(baser);
3268     unlock2(index);
3269     }
3270     MENDFUNC(4,mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
3271    
3272     MIDFUNC(4,mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
3273     {
3274     CLOBBER_MOV;
3275     baser=readreg(baser,4);
3276     index=readreg(index,4);
3277     d=writereg(d,1);
3278    
3279     raw_mov_b_rrm_indexed(d,baser,index,factor);
3280    
3281     unlock2(d);
3282     unlock2(baser);
3283     unlock2(index);
3284     }
3285     MENDFUNC(4,mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
3286    
3287    
3288     MIDFUNC(4,mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
3289     {
3290     CLOBBER_MOV;
3291     baser=readreg(baser,4);
3292     index=readreg(index,4);
3293     s=readreg(s,4);
3294    
3295     Dif (baser==s || index==s)
3296     abort();
3297    
3298    
3299     raw_mov_l_mrr_indexed(baser,index,factor,s);
3300     unlock2(s);
3301     unlock2(baser);
3302     unlock2(index);
3303     }
3304     MENDFUNC(4,mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
3305    
3306     MIDFUNC(4,mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
3307     {
3308     CLOBBER_MOV;
3309     baser=readreg(baser,4);
3310     index=readreg(index,4);
3311     s=readreg(s,2);
3312    
3313     raw_mov_w_mrr_indexed(baser,index,factor,s);
3314     unlock2(s);
3315     unlock2(baser);
3316     unlock2(index);
3317     }
3318     MENDFUNC(4,mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
3319    
3320     MIDFUNC(4,mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
3321     {
3322     CLOBBER_MOV;
3323     s=readreg(s,1);
3324     baser=readreg(baser,4);
3325     index=readreg(index,4);
3326    
3327     raw_mov_b_mrr_indexed(baser,index,factor,s);
3328     unlock2(s);
3329     unlock2(baser);
3330     unlock2(index);
3331     }
3332     MENDFUNC(4,mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
3333    
3334    
3335     MIDFUNC(5,mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
3336     {
3337     int basereg=baser;
3338     int indexreg=index;
3339    
3340     CLOBBER_MOV;
3341     s=readreg(s,4);
3342     baser=readreg_offset(baser,4);
3343     index=readreg_offset(index,4);
3344    
3345     base+=get_offset(basereg);
3346     base+=factor*get_offset(indexreg);
3347    
3348     raw_mov_l_bmrr_indexed(base,baser,index,factor,s);
3349     unlock2(s);
3350     unlock2(baser);
3351     unlock2(index);
3352     }
3353     MENDFUNC(5,mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
3354    
3355     MIDFUNC(5,mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
3356     {
3357     int basereg=baser;
3358     int indexreg=index;
3359    
3360     CLOBBER_MOV;
3361     s=readreg(s,2);
3362     baser=readreg_offset(baser,4);
3363     index=readreg_offset(index,4);
3364    
3365     base+=get_offset(basereg);
3366     base+=factor*get_offset(indexreg);
3367    
3368     raw_mov_w_bmrr_indexed(base,baser,index,factor,s);
3369     unlock2(s);
3370     unlock2(baser);
3371     unlock2(index);
3372     }
3373     MENDFUNC(5,mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
3374    
3375     MIDFUNC(5,mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
3376     {
3377     int basereg=baser;
3378     int indexreg=index;
3379    
3380     CLOBBER_MOV;
3381     s=readreg(s,1);
3382     baser=readreg_offset(baser,4);
3383     index=readreg_offset(index,4);
3384    
3385     base+=get_offset(basereg);
3386     base+=factor*get_offset(indexreg);
3387    
3388     raw_mov_b_bmrr_indexed(base,baser,index,factor,s);
3389     unlock2(s);
3390     unlock2(baser);
3391     unlock2(index);
3392     }
3393     MENDFUNC(5,mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
3394    
3395    
3396    
3397     /* Read a long from base+baser+factor*index */
3398     MIDFUNC(5,mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
3399     {
3400     int basereg=baser;
3401     int indexreg=index;
3402    
3403     CLOBBER_MOV;
3404     baser=readreg_offset(baser,4);
3405     index=readreg_offset(index,4);
3406     base+=get_offset(basereg);
3407     base+=factor*get_offset(indexreg);
3408     d=writereg(d,4);
3409     raw_mov_l_brrm_indexed(d,base,baser,index,factor);
3410     unlock2(d);
3411     unlock2(baser);
3412     unlock2(index);
3413     }
3414     MENDFUNC(5,mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
3415    
3416    
3417     MIDFUNC(5,mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
3418     {
3419     int basereg=baser;
3420     int indexreg=index;
3421    
3422     CLOBBER_MOV;
3423     remove_offset(d,-1);
3424     baser=readreg_offset(baser,4);
3425     index=readreg_offset(index,4);
3426     base+=get_offset(basereg);
3427     base+=factor*get_offset(indexreg);
3428     d=writereg(d,2);
3429     raw_mov_w_brrm_indexed(d,base,baser,index,factor);
3430     unlock2(d);
3431     unlock2(baser);
3432     unlock2(index);
3433     }
3434     MENDFUNC(5,mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
3435    
3436    
3437     MIDFUNC(5,mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
3438     {
3439     int basereg=baser;
3440     int indexreg=index;
3441    
3442     CLOBBER_MOV;
3443     remove_offset(d,-1);
3444     baser=readreg_offset(baser,4);
3445     index=readreg_offset(index,4);
3446     base+=get_offset(basereg);
3447     base+=factor*get_offset(indexreg);
3448     d=writereg(d,1);
3449     raw_mov_b_brrm_indexed(d,base,baser,index,factor);
3450     unlock2(d);
3451     unlock2(baser);
3452     unlock2(index);
3453     }
3454     MENDFUNC(5,mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
3455    
3456     /* Read a long from base+factor*index */
3457     MIDFUNC(4,mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
3458     {
3459     int indexreg=index;
3460    
3461     if (isconst(index)) {
3462     COMPCALL(mov_l_rm)(d,base+factor*live.state[index].val);
3463     return;
3464     }
3465    
3466     CLOBBER_MOV;
3467     index=readreg_offset(index,4);
3468     base+=get_offset(indexreg)*factor;
3469     d=writereg(d,4);
3470    
3471     raw_mov_l_rm_indexed(d,base,index,factor);
3472     unlock2(index);
3473     unlock2(d);
3474     }
3475     MENDFUNC(4,mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
3476    
3477    
3478     /* read the long at the address contained in s+offset and store in d */
3479     MIDFUNC(3,mov_l_rR,(W4 d, R4 s, IMM offset))
3480     {
3481     if (isconst(s)) {
3482     COMPCALL(mov_l_rm)(d,live.state[s].val+offset);
3483     return;
3484     }
3485     CLOBBER_MOV;
3486     s=readreg(s,4);
3487     d=writereg(d,4);
3488    
3489     raw_mov_l_rR(d,s,offset);
3490     unlock2(d);
3491     unlock2(s);
3492     }
3493     MENDFUNC(3,mov_l_rR,(W4 d, R4 s, IMM offset))
3494    
3495     /* read the word at the address contained in s+offset and store in d */
3496     MIDFUNC(3,mov_w_rR,(W2 d, R4 s, IMM offset))
3497     {
3498     if (isconst(s)) {
3499     COMPCALL(mov_w_rm)(d,live.state[s].val+offset);
3500     return;
3501     }
3502     CLOBBER_MOV;
3503     s=readreg(s,4);
3504     d=writereg(d,2);
3505    
3506     raw_mov_w_rR(d,s,offset);
3507     unlock2(d);
3508     unlock2(s);
3509     }
3510     MENDFUNC(3,mov_w_rR,(W2 d, R4 s, IMM offset))
3511    
3512     /* read the word at the address contained in s+offset and store in d */
3513     MIDFUNC(3,mov_b_rR,(W1 d, R4 s, IMM offset))
3514     {
3515     if (isconst(s)) {
3516     COMPCALL(mov_b_rm)(d,live.state[s].val+offset);
3517     return;
3518     }
3519     CLOBBER_MOV;
3520     s=readreg(s,4);
3521     d=writereg(d,1);
3522    
3523     raw_mov_b_rR(d,s,offset);
3524     unlock2(d);
3525     unlock2(s);
3526     }
3527     MENDFUNC(3,mov_b_rR,(W1 d, R4 s, IMM offset))
3528    
3529     /* read the long at the address contained in s+offset and store in d */
3530     MIDFUNC(3,mov_l_brR,(W4 d, R4 s, IMM offset))
3531     {
3532     int sreg=s;
3533     if (isconst(s)) {
3534     COMPCALL(mov_l_rm)(d,live.state[s].val+offset);
3535     return;
3536     }
3537     CLOBBER_MOV;
3538     s=readreg_offset(s,4);
3539     offset+=get_offset(sreg);
3540     d=writereg(d,4);
3541    
3542     raw_mov_l_brR(d,s,offset);
3543     unlock2(d);
3544     unlock2(s);
3545     }
3546     MENDFUNC(3,mov_l_brR,(W4 d, R4 s, IMM offset))
3547    
3548     /* read the word at the address contained in s+offset and store in d */
3549     MIDFUNC(3,mov_w_brR,(W2 d, R4 s, IMM offset))
3550     {
3551     int sreg=s;
3552     if (isconst(s)) {
3553     COMPCALL(mov_w_rm)(d,live.state[s].val+offset);
3554     return;
3555     }
3556     CLOBBER_MOV;
3557     remove_offset(d,-1);
3558     s=readreg_offset(s,4);
3559     offset+=get_offset(sreg);
3560     d=writereg(d,2);
3561    
3562     raw_mov_w_brR(d,s,offset);
3563     unlock2(d);
3564     unlock2(s);
3565     }
3566     MENDFUNC(3,mov_w_brR,(W2 d, R4 s, IMM offset))
3567    
3568     /* read the word at the address contained in s+offset and store in d */
3569     MIDFUNC(3,mov_b_brR,(W1 d, R4 s, IMM offset))
3570     {
3571     int sreg=s;
3572     if (isconst(s)) {
3573     COMPCALL(mov_b_rm)(d,live.state[s].val+offset);
3574     return;
3575     }
3576     CLOBBER_MOV;
3577     remove_offset(d,-1);
3578     s=readreg_offset(s,4);
3579     offset+=get_offset(sreg);
3580     d=writereg(d,1);
3581    
3582     raw_mov_b_brR(d,s,offset);
3583     unlock2(d);
3584     unlock2(s);
3585     }
3586     MENDFUNC(3,mov_b_brR,(W1 d, R4 s, IMM offset))
3587    
3588     MIDFUNC(3,mov_l_Ri,(R4 d, IMM i, IMM offset))
3589     {
3590     int dreg=d;
3591     if (isconst(d)) {
3592     COMPCALL(mov_l_mi)(live.state[d].val+offset,i);
3593     return;
3594     }
3595    
3596     CLOBBER_MOV;
3597     d=readreg_offset(d,4);
3598     offset+=get_offset(dreg);
3599     raw_mov_l_Ri(d,i,offset);
3600     unlock2(d);
3601     }
3602     MENDFUNC(3,mov_l_Ri,(R4 d, IMM i, IMM offset))
3603    
3604     MIDFUNC(3,mov_w_Ri,(R4 d, IMM i, IMM offset))
3605     {
3606     int dreg=d;
3607     if (isconst(d)) {
3608     COMPCALL(mov_w_mi)(live.state[d].val+offset,i);
3609     return;
3610     }
3611    
3612     CLOBBER_MOV;
3613     d=readreg_offset(d,4);
3614     offset+=get_offset(dreg);
3615     raw_mov_w_Ri(d,i,offset);
3616     unlock2(d);
3617     }
3618     MENDFUNC(3,mov_w_Ri,(R4 d, IMM i, IMM offset))
3619    
3620     MIDFUNC(3,mov_b_Ri,(R4 d, IMM i, IMM offset))
3621     {
3622     int dreg=d;
3623     if (isconst(d)) {
3624     COMPCALL(mov_b_mi)(live.state[d].val+offset,i);
3625     return;
3626     }
3627    
3628     CLOBBER_MOV;
3629     d=readreg_offset(d,4);
3630     offset+=get_offset(dreg);
3631     raw_mov_b_Ri(d,i,offset);
3632     unlock2(d);
3633     }
3634     MENDFUNC(3,mov_b_Ri,(R4 d, IMM i, IMM offset))
3635    
3636     /* Warning! OFFSET is byte sized only! */
3637     MIDFUNC(3,mov_l_Rr,(R4 d, R4 s, IMM offset))
3638     {
3639     if (isconst(d)) {
3640     COMPCALL(mov_l_mr)(live.state[d].val+offset,s);
3641     return;
3642     }
3643     if (isconst(s)) {
3644     COMPCALL(mov_l_Ri)(d,live.state[s].val,offset);
3645     return;
3646     }
3647    
3648     CLOBBER_MOV;
3649     s=readreg(s,4);
3650     d=readreg(d,4);
3651    
3652     raw_mov_l_Rr(d,s,offset);
3653     unlock2(d);
3654     unlock2(s);
3655     }
3656     MENDFUNC(3,mov_l_Rr,(R4 d, R4 s, IMM offset))
3657    
3658     MIDFUNC(3,mov_w_Rr,(R4 d, R2 s, IMM offset))
3659     {
3660     if (isconst(d)) {
3661     COMPCALL(mov_w_mr)(live.state[d].val+offset,s);
3662     return;
3663     }
3664     if (isconst(s)) {
3665     COMPCALL(mov_w_Ri)(d,(uae_u16)live.state[s].val,offset);
3666     return;
3667     }
3668    
3669     CLOBBER_MOV;
3670     s=readreg(s,2);
3671     d=readreg(d,4);
3672     raw_mov_w_Rr(d,s,offset);
3673     unlock2(d);
3674     unlock2(s);
3675     }
3676     MENDFUNC(3,mov_w_Rr,(R4 d, R2 s, IMM offset))
3677    
3678     MIDFUNC(3,mov_b_Rr,(R4 d, R1 s, IMM offset))
3679     {
3680     if (isconst(d)) {
3681     COMPCALL(mov_b_mr)(live.state[d].val+offset,s);
3682     return;
3683     }
3684     if (isconst(s)) {
3685     COMPCALL(mov_b_Ri)(d,(uae_u8)live.state[s].val,offset);
3686     return;
3687     }
3688    
3689     CLOBBER_MOV;
3690     s=readreg(s,1);
3691     d=readreg(d,4);
3692     raw_mov_b_Rr(d,s,offset);
3693     unlock2(d);
3694     unlock2(s);
3695     }
3696     MENDFUNC(3,mov_b_Rr,(R4 d, R1 s, IMM offset))
3697    
3698     MIDFUNC(3,lea_l_brr,(W4 d, R4 s, IMM offset))
3699     {
3700     if (isconst(s)) {
3701     COMPCALL(mov_l_ri)(d,live.state[s].val+offset);
3702     return;
3703     }
3704     #if USE_OFFSET
3705     if (d==s) {
3706     add_offset(d,offset);
3707     return;
3708     }
3709     #endif
3710     CLOBBER_LEA;
3711     s=readreg(s,4);
3712     d=writereg(d,4);
3713     raw_lea_l_brr(d,s,offset);
3714     unlock2(d);
3715     unlock2(s);
3716     }
3717     MENDFUNC(3,lea_l_brr,(W4 d, R4 s, IMM offset))
3718    
3719     MIDFUNC(5,lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
3720     {
3721     if (!offset) {
3722     COMPCALL(lea_l_rr_indexed)(d,s,index,factor);
3723     return;
3724     }
3725     CLOBBER_LEA;
3726     s=readreg(s,4);
3727     index=readreg(index,4);
3728     d=writereg(d,4);
3729    
3730     raw_lea_l_brr_indexed(d,s,index,factor,offset);
3731     unlock2(d);
3732     unlock2(index);
3733     unlock2(s);
3734     }
3735     MENDFUNC(5,lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
3736    
3737     MIDFUNC(4,lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
3738     {
3739     CLOBBER_LEA;
3740     s=readreg(s,4);
3741     index=readreg(index,4);
3742     d=writereg(d,4);
3743    
3744     raw_lea_l_rr_indexed(d,s,index,factor);
3745     unlock2(d);
3746     unlock2(index);
3747     unlock2(s);
3748     }
3749     MENDFUNC(4,lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
3750    
3751     /* write d to the long at the address contained in s+offset */
3752     MIDFUNC(3,mov_l_bRr,(R4 d, R4 s, IMM offset))
3753     {
3754     int dreg=d;
3755     if (isconst(d)) {
3756     COMPCALL(mov_l_mr)(live.state[d].val+offset,s);
3757     return;
3758     }
3759    
3760     CLOBBER_MOV;
3761     s=readreg(s,4);
3762     d=readreg_offset(d,4);
3763     offset+=get_offset(dreg);
3764    
3765     raw_mov_l_bRr(d,s,offset);
3766     unlock2(d);
3767     unlock2(s);
3768     }
3769     MENDFUNC(3,mov_l_bRr,(R4 d, R4 s, IMM offset))
3770    
3771     /* write the word at the address contained in s+offset and store in d */
3772     MIDFUNC(3,mov_w_bRr,(R4 d, R2 s, IMM offset))
3773     {
3774     int dreg=d;
3775    
3776     if (isconst(d)) {
3777     COMPCALL(mov_w_mr)(live.state[d].val+offset,s);
3778     return;
3779     }
3780    
3781     CLOBBER_MOV;
3782     s=readreg(s,2);
3783     d=readreg_offset(d,4);
3784     offset+=get_offset(dreg);
3785     raw_mov_w_bRr(d,s,offset);
3786     unlock2(d);
3787     unlock2(s);
3788     }
3789     MENDFUNC(3,mov_w_bRr,(R4 d, R2 s, IMM offset))
3790    
3791     MIDFUNC(3,mov_b_bRr,(R4 d, R1 s, IMM offset))
3792     {
3793     int dreg=d;
3794     if (isconst(d)) {
3795     COMPCALL(mov_b_mr)(live.state[d].val+offset,s);
3796     return;
3797     }
3798    
3799     CLOBBER_MOV;
3800     s=readreg(s,1);
3801     d=readreg_offset(d,4);
3802     offset+=get_offset(dreg);
3803     raw_mov_b_bRr(d,s,offset);
3804     unlock2(d);
3805     unlock2(s);
3806     }
3807     MENDFUNC(3,mov_b_bRr,(R4 d, R1 s, IMM offset))
3808    
3809     MIDFUNC(1,bswap_32,(RW4 r))
3810     {
3811     int reg=r;
3812    
3813     if (isconst(r)) {
3814     uae_u32 oldv=live.state[r].val;
3815     live.state[r].val=reverse32(oldv);
3816     return;
3817     }
3818    
3819     CLOBBER_SW32;
3820     r=rmw(r,4,4);
3821     raw_bswap_32(r);
3822     unlock2(r);
3823     }
3824     MENDFUNC(1,bswap_32,(RW4 r))
3825    
3826     MIDFUNC(1,bswap_16,(RW2 r))
3827     {
3828     if (isconst(r)) {
3829     uae_u32 oldv=live.state[r].val;
3830     live.state[r].val=((oldv>>8)&0xff) | ((oldv<<8)&0xff00) |
3831     (oldv&0xffff0000);
3832     return;
3833     }
3834    
3835     CLOBBER_SW16;
3836     r=rmw(r,2,2);
3837    
3838     raw_bswap_16(r);
3839     unlock2(r);
3840     }
3841     MENDFUNC(1,bswap_16,(RW2 r))
3842    
3843    
3844    
3845     MIDFUNC(2,mov_l_rr,(W4 d, R4 s))
3846     {
3847     int olds;
3848    
3849     if (d==s) { /* How pointless! */
3850     return;
3851     }
3852     if (isconst(s)) {
3853     COMPCALL(mov_l_ri)(d,live.state[s].val);
3854     return;
3855     }
3856     olds=s;
3857     disassociate(d);
3858     s=readreg_offset(s,4);
3859     live.state[d].realreg=s;
3860     live.state[d].realind=live.nat[s].nholds;
3861     live.state[d].val=live.state[olds].val;
3862     live.state[d].validsize=4;
3863     live.state[d].dirtysize=4;
3864     set_status(d,DIRTY);
3865    
3866     live.nat[s].holds[live.nat[s].nholds]=d;
3867     live.nat[s].nholds++;
3868     log_clobberreg(d);
3869     /* write_log("Added %d to nreg %d(%d), now holds %d regs\n",
3870     d,s,live.state[d].realind,live.nat[s].nholds); */
3871     unlock2(s);
3872     }
3873     MENDFUNC(2,mov_l_rr,(W4 d, R4 s))
3874    
3875     MIDFUNC(2,mov_l_mr,(IMM d, R4 s))
3876     {
3877     if (isconst(s)) {
3878     COMPCALL(mov_l_mi)(d,live.state[s].val);
3879     return;
3880     }
3881     CLOBBER_MOV;
3882     s=readreg(s,4);
3883    
3884     raw_mov_l_mr(d,s);
3885     unlock2(s);
3886     }
3887     MENDFUNC(2,mov_l_mr,(IMM d, R4 s))
3888    
3889    
3890     MIDFUNC(2,mov_w_mr,(IMM d, R2 s))
3891     {
3892     if (isconst(s)) {
3893     COMPCALL(mov_w_mi)(d,(uae_u16)live.state[s].val);
3894     return;
3895     }
3896     CLOBBER_MOV;
3897     s=readreg(s,2);
3898    
3899     raw_mov_w_mr(d,s);
3900     unlock2(s);
3901     }
3902     MENDFUNC(2,mov_w_mr,(IMM d, R2 s))
3903    
3904     MIDFUNC(2,mov_w_rm,(W2 d, IMM s))
3905     {
3906     CLOBBER_MOV;
3907     d=writereg(d,2);
3908    
3909     raw_mov_w_rm(d,s);
3910     unlock2(d);
3911     }
3912     MENDFUNC(2,mov_w_rm,(W2 d, IMM s))
3913    
3914     MIDFUNC(2,mov_b_mr,(IMM d, R1 s))
3915     {
3916     if (isconst(s)) {
3917     COMPCALL(mov_b_mi)(d,(uae_u8)live.state[s].val);
3918     return;
3919     }
3920    
3921     CLOBBER_MOV;
3922     s=readreg(s,1);
3923    
3924     raw_mov_b_mr(d,s);
3925     unlock2(s);
3926     }
3927     MENDFUNC(2,mov_b_mr,(IMM d, R1 s))
3928    
3929     MIDFUNC(2,mov_b_rm,(W1 d, IMM s))
3930     {
3931     CLOBBER_MOV;
3932     d=writereg(d,1);
3933    
3934     raw_mov_b_rm(d,s);
3935     unlock2(d);
3936     }
3937     MENDFUNC(2,mov_b_rm,(W1 d, IMM s))
3938    
3939     MIDFUNC(2,mov_l_ri,(W4 d, IMM s))
3940     {
3941     set_const(d,s);
3942     return;
3943     }
3944     MENDFUNC(2,mov_l_ri,(W4 d, IMM s))
3945    
3946     MIDFUNC(2,mov_w_ri,(W2 d, IMM s))
3947     {
3948     CLOBBER_MOV;
3949     d=writereg(d,2);
3950    
3951     raw_mov_w_ri(d,s);
3952     unlock2(d);
3953     }
3954     MENDFUNC(2,mov_w_ri,(W2 d, IMM s))
3955    
3956     MIDFUNC(2,mov_b_ri,(W1 d, IMM s))
3957     {
3958     CLOBBER_MOV;
3959     d=writereg(d,1);
3960    
3961     raw_mov_b_ri(d,s);
3962     unlock2(d);
3963     }
3964     MENDFUNC(2,mov_b_ri,(W1 d, IMM s))
3965    
3966    
3967     MIDFUNC(2,add_l_mi,(IMM d, IMM s))
3968     {
3969     CLOBBER_ADD;
3970     raw_add_l_mi(d,s) ;
3971     }
3972     MENDFUNC(2,add_l_mi,(IMM d, IMM s))
3973    
3974     MIDFUNC(2,add_w_mi,(IMM d, IMM s))
3975     {
3976     CLOBBER_ADD;
3977     raw_add_w_mi(d,s) ;
3978     }
3979     MENDFUNC(2,add_w_mi,(IMM d, IMM s))
3980    
3981     MIDFUNC(2,add_b_mi,(IMM d, IMM s))
3982     {
3983     CLOBBER_ADD;
3984     raw_add_b_mi(d,s) ;
3985     }
3986     MENDFUNC(2,add_b_mi,(IMM d, IMM s))
3987    
3988    
3989     MIDFUNC(2,test_l_ri,(R4 d, IMM i))
3990     {
3991     CLOBBER_TEST;
3992     d=readreg(d,4);
3993    
3994     raw_test_l_ri(d,i);
3995     unlock2(d);
3996     }
3997     MENDFUNC(2,test_l_ri,(R4 d, IMM i))
3998    
3999     MIDFUNC(2,test_l_rr,(R4 d, R4 s))
4000     {
4001     CLOBBER_TEST;
4002     d=readreg(d,4);
4003     s=readreg(s,4);
4004    
4005     raw_test_l_rr(d,s);;
4006     unlock2(d);
4007     unlock2(s);
4008     }
4009     MENDFUNC(2,test_l_rr,(R4 d, R4 s))
4010    
4011     MIDFUNC(2,test_w_rr,(R2 d, R2 s))
4012     {
4013     CLOBBER_TEST;
4014     d=readreg(d,2);
4015     s=readreg(s,2);
4016    
4017     raw_test_w_rr(d,s);
4018     unlock2(d);
4019     unlock2(s);
4020     }
4021     MENDFUNC(2,test_w_rr,(R2 d, R2 s))
4022    
4023     MIDFUNC(2,test_b_rr,(R1 d, R1 s))
4024     {
4025     CLOBBER_TEST;
4026     d=readreg(d,1);
4027     s=readreg(s,1);
4028    
4029     raw_test_b_rr(d,s);
4030     unlock2(d);
4031     unlock2(s);
4032     }
4033     MENDFUNC(2,test_b_rr,(R1 d, R1 s))
4034    
4035    
4036     MIDFUNC(2,and_l_ri,(RW4 d, IMM i))
4037     {
4038     if (isconst(d) && !needflags) {
4039     live.state[d].val &= i;
4040     return;
4041     }
4042    
4043     CLOBBER_AND;
4044     d=rmw(d,4,4);
4045    
4046     raw_and_l_ri(d,i);
4047     unlock2(d);
4048     }
4049     MENDFUNC(2,and_l_ri,(RW4 d, IMM i))
4050    
4051     MIDFUNC(2,and_l,(RW4 d, R4 s))
4052     {
4053     CLOBBER_AND;
4054     s=readreg(s,4);
4055     d=rmw(d,4,4);
4056    
4057     raw_and_l(d,s);
4058     unlock2(d);
4059     unlock2(s);
4060     }
4061     MENDFUNC(2,and_l,(RW4 d, R4 s))
4062    
4063     MIDFUNC(2,and_w,(RW2 d, R2 s))
4064     {
4065     CLOBBER_AND;
4066     s=readreg(s,2);
4067     d=rmw(d,2,2);
4068    
4069     raw_and_w(d,s);
4070     unlock2(d);
4071     unlock2(s);
4072     }
4073     MENDFUNC(2,and_w,(RW2 d, R2 s))
4074    
4075     MIDFUNC(2,and_b,(RW1 d, R1 s))
4076     {
4077     CLOBBER_AND;
4078     s=readreg(s,1);
4079     d=rmw(d,1,1);
4080    
4081     raw_and_b(d,s);
4082     unlock2(d);
4083     unlock2(s);
4084     }
4085     MENDFUNC(2,and_b,(RW1 d, R1 s))
4086    
4087     // gb-- used for making an fpcr value in compemu_fpp.cpp
4088     MIDFUNC(2,or_l_rm,(RW4 d, IMM s))
4089     {
4090     CLOBBER_OR;
4091     d=rmw(d,4,4);
4092    
4093     raw_or_l_rm(d,s);
4094     unlock2(d);
4095     }
4096     MENDFUNC(2,or_l_rm,(RW4 d, IMM s))
4097    
4098     MIDFUNC(2,or_l_ri,(RW4 d, IMM i))
4099     {
4100     if (isconst(d) && !needflags) {
4101     live.state[d].val|=i;
4102     return;
4103     }
4104     CLOBBER_OR;
4105     d=rmw(d,4,4);
4106    
4107     raw_or_l_ri(d,i);
4108     unlock2(d);
4109     }
4110     MENDFUNC(2,or_l_ri,(RW4 d, IMM i))
4111    
4112     MIDFUNC(2,or_l,(RW4 d, R4 s))
4113     {
4114     if (isconst(d) && isconst(s) && !needflags) {
4115     live.state[d].val|=live.state[s].val;
4116     return;
4117     }
4118     CLOBBER_OR;
4119     s=readreg(s,4);
4120     d=rmw(d,4,4);
4121    
4122     raw_or_l(d,s);
4123     unlock2(d);
4124     unlock2(s);
4125     }
4126     MENDFUNC(2,or_l,(RW4 d, R4 s))
4127    
4128     MIDFUNC(2,or_w,(RW2 d, R2 s))
4129     {
4130     CLOBBER_OR;
4131     s=readreg(s,2);
4132     d=rmw(d,2,2);
4133    
4134     raw_or_w(d,s);
4135     unlock2(d);
4136     unlock2(s);
4137     }
4138     MENDFUNC(2,or_w,(RW2 d, R2 s))
4139    
4140     MIDFUNC(2,or_b,(RW1 d, R1 s))
4141     {
4142     CLOBBER_OR;
4143     s=readreg(s,1);
4144     d=rmw(d,1,1);
4145    
4146     raw_or_b(d,s);
4147     unlock2(d);
4148     unlock2(s);
4149     }
4150     MENDFUNC(2,or_b,(RW1 d, R1 s))
4151    
4152     MIDFUNC(2,adc_l,(RW4 d, R4 s))
4153     {
4154     CLOBBER_ADC;
4155     s=readreg(s,4);
4156     d=rmw(d,4,4);
4157    
4158     raw_adc_l(d,s);
4159    
4160     unlock2(d);
4161     unlock2(s);
4162     }
4163     MENDFUNC(2,adc_l,(RW4 d, R4 s))
4164    
4165     MIDFUNC(2,adc_w,(RW2 d, R2 s))
4166     {
4167     CLOBBER_ADC;
4168     s=readreg(s,2);
4169     d=rmw(d,2,2);
4170    
4171     raw_adc_w(d,s);
4172     unlock2(d);
4173     unlock2(s);
4174     }
4175     MENDFUNC(2,adc_w,(RW2 d, R2 s))
4176    
4177     MIDFUNC(2,adc_b,(RW1 d, R1 s))
4178     {
4179     CLOBBER_ADC;
4180     s=readreg(s,1);
4181     d=rmw(d,1,1);
4182    
4183     raw_adc_b(d,s);
4184     unlock2(d);
4185     unlock2(s);
4186     }
4187     MENDFUNC(2,adc_b,(RW1 d, R1 s))
4188    
4189     MIDFUNC(2,add_l,(RW4 d, R4 s))
4190     {
4191     if (isconst(s)) {
4192     COMPCALL(add_l_ri)(d,live.state[s].val);
4193     return;
4194     }
4195    
4196     CLOBBER_ADD;
4197     s=readreg(s,4);
4198     d=rmw(d,4,4);
4199    
4200     raw_add_l(d,s);
4201    
4202     unlock2(d);
4203     unlock2(s);
4204     }
4205     MENDFUNC(2,add_l,(RW4 d, R4 s))
4206    
4207     MIDFUNC(2,add_w,(RW2 d, R2 s))
4208     {
4209     if (isconst(s)) {
4210     COMPCALL(add_w_ri)(d,(uae_u16)live.state[s].val);
4211     return;
4212     }
4213    
4214     CLOBBER_ADD;
4215     s=readreg(s,2);
4216     d=rmw(d,2,2);
4217    
4218     raw_add_w(d,s);
4219     unlock2(d);
4220     unlock2(s);
4221     }
4222     MENDFUNC(2,add_w,(RW2 d, R2 s))
4223    
4224     MIDFUNC(2,add_b,(RW1 d, R1 s))
4225     {
4226     if (isconst(s)) {
4227     COMPCALL(add_b_ri)(d,(uae_u8)live.state[s].val);
4228     return;
4229     }
4230    
4231     CLOBBER_ADD;
4232     s=readreg(s,1);
4233     d=rmw(d,1,1);
4234    
4235     raw_add_b(d,s);
4236     unlock2(d);
4237     unlock2(s);
4238     }
4239     MENDFUNC(2,add_b,(RW1 d, R1 s))
4240    
4241     MIDFUNC(2,sub_l_ri,(RW4 d, IMM i))
4242     {
4243     if (!i && !needflags)
4244     return;
4245     if (isconst(d) && !needflags) {
4246     live.state[d].val-=i;
4247     return;
4248     }
4249     #if USE_OFFSET
4250     if (!needflags) {
4251     add_offset(d,-i);
4252     return;
4253     }
4254     #endif
4255    
4256     CLOBBER_SUB;
4257     d=rmw(d,4,4);
4258    
4259     raw_sub_l_ri(d,i);
4260     unlock2(d);
4261     }
4262     MENDFUNC(2,sub_l_ri,(RW4 d, IMM i))
4263    
4264     MIDFUNC(2,sub_w_ri,(RW2 d, IMM i))
4265     {
4266     if (!i && !needflags)
4267     return;
4268    
4269     CLOBBER_SUB;
4270     d=rmw(d,2,2);
4271    
4272     raw_sub_w_ri(d,i);
4273     unlock2(d);
4274     }
4275     MENDFUNC(2,sub_w_ri,(RW2 d, IMM i))
4276    
4277     MIDFUNC(2,sub_b_ri,(RW1 d, IMM i))
4278     {
4279     if (!i && !needflags)
4280     return;
4281    
4282     CLOBBER_SUB;
4283     d=rmw(d,1,1);
4284    
4285     raw_sub_b_ri(d,i);
4286    
4287     unlock2(d);
4288     }
4289     MENDFUNC(2,sub_b_ri,(RW1 d, IMM i))
4290    
4291     MIDFUNC(2,add_l_ri,(RW4 d, IMM i))
4292     {
4293     if (!i && !needflags)
4294     return;
4295     if (isconst(d) && !needflags) {
4296     live.state[d].val+=i;
4297     return;
4298     }
4299     #if USE_OFFSET
4300     if (!needflags) {
4301     add_offset(d,i);
4302     return;
4303     }
4304     #endif
4305     CLOBBER_ADD;
4306     d=rmw(d,4,4);
4307     raw_add_l_ri(d,i);
4308     unlock2(d);
4309     }
4310     MENDFUNC(2,add_l_ri,(RW4 d, IMM i))
4311    
4312     MIDFUNC(2,add_w_ri,(RW2 d, IMM i))
4313     {
4314     if (!i && !needflags)
4315     return;
4316    
4317     CLOBBER_ADD;
4318     d=rmw(d,2,2);
4319    
4320     raw_add_w_ri(d,i);
4321     unlock2(d);
4322     }
4323     MENDFUNC(2,add_w_ri,(RW2 d, IMM i))
4324    
4325     MIDFUNC(2,add_b_ri,(RW1 d, IMM i))
4326     {
4327     if (!i && !needflags)
4328     return;
4329    
4330     CLOBBER_ADD;
4331     d=rmw(d,1,1);
4332    
4333     raw_add_b_ri(d,i);
4334    
4335     unlock2(d);
4336     }
4337     MENDFUNC(2,add_b_ri,(RW1 d, IMM i))
4338    
4339     MIDFUNC(2,sbb_l,(RW4 d, R4 s))
4340     {
4341     CLOBBER_SBB;
4342     s=readreg(s,4);
4343     d=rmw(d,4,4);
4344    
4345     raw_sbb_l(d,s);
4346     unlock2(d);
4347     unlock2(s);
4348     }
4349     MENDFUNC(2,sbb_l,(RW4 d, R4 s))
4350    
4351     MIDFUNC(2,sbb_w,(RW2 d, R2 s))
4352     {
4353     CLOBBER_SBB;
4354     s=readreg(s,2);
4355     d=rmw(d,2,2);
4356    
4357     raw_sbb_w(d,s);
4358     unlock2(d);
4359     unlock2(s);
4360     }
4361     MENDFUNC(2,sbb_w,(RW2 d, R2 s))
4362    
4363     MIDFUNC(2,sbb_b,(RW1 d, R1 s))
4364     {
4365     CLOBBER_SBB;
4366     s=readreg(s,1);
4367     d=rmw(d,1,1);
4368    
4369     raw_sbb_b(d,s);
4370     unlock2(d);
4371     unlock2(s);
4372     }
4373     MENDFUNC(2,sbb_b,(RW1 d, R1 s))
4374    
4375     MIDFUNC(2,sub_l,(RW4 d, R4 s))
4376     {
4377     if (isconst(s)) {
4378     COMPCALL(sub_l_ri)(d,live.state[s].val);
4379     return;
4380     }
4381    
4382     CLOBBER_SUB;
4383     s=readreg(s,4);
4384     d=rmw(d,4,4);
4385    
4386     raw_sub_l(d,s);
4387     unlock2(d);
4388     unlock2(s);
4389     }
4390     MENDFUNC(2,sub_l,(RW4 d, R4 s))
4391    
4392     MIDFUNC(2,sub_w,(RW2 d, R2 s))
4393     {
4394     if (isconst(s)) {
4395     COMPCALL(sub_w_ri)(d,(uae_u16)live.state[s].val);
4396     return;
4397     }
4398    
4399     CLOBBER_SUB;
4400     s=readreg(s,2);
4401     d=rmw(d,2,2);
4402    
4403     raw_sub_w(d,s);
4404     unlock2(d);
4405     unlock2(s);
4406     }
4407     MENDFUNC(2,sub_w,(RW2 d, R2 s))
4408    
4409     MIDFUNC(2,sub_b,(RW1 d, R1 s))
4410     {
4411     if (isconst(s)) {
4412     COMPCALL(sub_b_ri)(d,(uae_u8)live.state[s].val);
4413     return;
4414     }
4415    
4416     CLOBBER_SUB;
4417     s=readreg(s,1);
4418     d=rmw(d,1,1);
4419    
4420     raw_sub_b(d,s);
4421     unlock2(d);
4422     unlock2(s);
4423     }
4424     MENDFUNC(2,sub_b,(RW1 d, R1 s))
4425    
4426     MIDFUNC(2,cmp_l,(R4 d, R4 s))
4427     {
4428     CLOBBER_CMP;
4429     s=readreg(s,4);
4430     d=readreg(d,4);
4431    
4432     raw_cmp_l(d,s);
4433     unlock2(d);
4434     unlock2(s);
4435     }
4436     MENDFUNC(2,cmp_l,(R4 d, R4 s))
4437    
4438     MIDFUNC(2,cmp_l_ri,(R4 r, IMM i))
4439     {
4440     CLOBBER_CMP;
4441     r=readreg(r,4);
4442    
4443     raw_cmp_l_ri(r,i);
4444     unlock2(r);
4445     }
4446     MENDFUNC(2,cmp_l_ri,(R4 r, IMM i))
4447    
4448     MIDFUNC(2,cmp_w,(R2 d, R2 s))
4449     {
4450     CLOBBER_CMP;
4451     s=readreg(s,2);
4452     d=readreg(d,2);
4453    
4454     raw_cmp_w(d,s);
4455     unlock2(d);
4456     unlock2(s);
4457     }
4458     MENDFUNC(2,cmp_w,(R2 d, R2 s))
4459    
4460     MIDFUNC(2,cmp_b,(R1 d, R1 s))
4461     {
4462     CLOBBER_CMP;
4463     s=readreg(s,1);
4464     d=readreg(d,1);
4465    
4466     raw_cmp_b(d,s);
4467     unlock2(d);
4468     unlock2(s);
4469     }
4470     MENDFUNC(2,cmp_b,(R1 d, R1 s))
4471    
4472    
4473     MIDFUNC(2,xor_l,(RW4 d, R4 s))
4474     {
4475     CLOBBER_XOR;
4476     s=readreg(s,4);
4477     d=rmw(d,4,4);
4478    
4479     raw_xor_l(d,s);
4480     unlock2(d);
4481     unlock2(s);
4482     }
4483     MENDFUNC(2,xor_l,(RW4 d, R4 s))
4484    
4485     MIDFUNC(2,xor_w,(RW2 d, R2 s))
4486     {
4487     CLOBBER_XOR;
4488     s=readreg(s,2);
4489     d=rmw(d,2,2);
4490    
4491     raw_xor_w(d,s);
4492     unlock2(d);
4493     unlock2(s);
4494     }
4495     MENDFUNC(2,xor_w,(RW2 d, R2 s))
4496    
4497     MIDFUNC(2,xor_b,(RW1 d, R1 s))
4498     {
4499     CLOBBER_XOR;
4500     s=readreg(s,1);
4501     d=rmw(d,1,1);
4502    
4503     raw_xor_b(d,s);
4504     unlock2(d);
4505     unlock2(s);
4506     }
4507     MENDFUNC(2,xor_b,(RW1 d, R1 s))
4508    
4509     MIDFUNC(5,call_r_11,(W4 out1, R4 r, R4 in1, IMM osize, IMM isize))
4510     {
4511     clobber_flags();
4512     remove_all_offsets();
4513     if (osize==4) {
4514     if (out1!=in1 && out1!=r) {
4515     COMPCALL(forget_about)(out1);
4516     }
4517     }
4518     else {
4519     tomem_c(out1);
4520     }
4521    
4522     in1=readreg_specific(in1,isize,REG_PAR1);
4523     r=readreg(r,4);
4524     prepare_for_call_1(); /* This should ensure that there won't be
4525     any need for swapping nregs in prepare_for_call_2
4526     */
4527     #if USE_NORMAL_CALLING_CONVENTION
4528     raw_push_l_r(in1);
4529     #endif
4530     unlock2(in1);
4531     unlock2(r);
4532    
4533     prepare_for_call_2();
4534     raw_call_r(r);
4535    
4536     #if USE_NORMAL_CALLING_CONVENTION
4537     raw_inc_sp(4);
4538     #endif
4539    
4540    
4541     live.nat[REG_RESULT].holds[0]=out1;
4542     live.nat[REG_RESULT].nholds=1;
4543     live.nat[REG_RESULT].touched=touchcnt++;
4544    
4545     live.state[out1].realreg=REG_RESULT;
4546     live.state[out1].realind=0;
4547     live.state[out1].val=0;
4548     live.state[out1].validsize=osize;
4549     live.state[out1].dirtysize=osize;
4550     set_status(out1,DIRTY);
4551     }
4552     MENDFUNC(5,call_r_11,(W4 out1, R4 r, R4 in1, IMM osize, IMM isize))
4553    
4554     MIDFUNC(5,call_r_02,(R4 r, R4 in1, R4 in2, IMM isize1, IMM isize2))
4555     {
4556     clobber_flags();
4557     remove_all_offsets();
4558     in1=readreg_specific(in1,isize1,REG_PAR1);
4559     in2=readreg_specific(in2,isize2,REG_PAR2);
4560     r=readreg(r,4);
4561     prepare_for_call_1(); /* This should ensure that there won't be
4562     any need for swapping nregs in prepare_for_call_2
4563     */
4564     #if USE_NORMAL_CALLING_CONVENTION
4565     raw_push_l_r(in2);
4566     raw_push_l_r(in1);
4567     #endif
4568     unlock2(r);
4569     unlock2(in1);
4570     unlock2(in2);
4571     prepare_for_call_2();
4572     raw_call_r(r);
4573     #if USE_NORMAL_CALLING_CONVENTION
4574     raw_inc_sp(8);
4575     #endif
4576     }
4577     MENDFUNC(5,call_r_02,(R4 r, R4 in1, R4 in2, IMM isize1, IMM isize2))
4578    
4579     /* forget_about() takes a mid-layer register */
4580     MIDFUNC(1,forget_about,(W4 r))
4581     {
4582     if (isinreg(r))
4583     disassociate(r);
4584     live.state[r].val=0;
4585     set_status(r,UNDEF);
4586     }
4587     MENDFUNC(1,forget_about,(W4 r))
4588    
4589     MIDFUNC(0,nop,(void))
4590     {
4591     raw_nop();
4592     }
4593     MENDFUNC(0,nop,(void))
4594    
4595    
4596     MIDFUNC(1,f_forget_about,(FW r))
4597     {
4598     if (f_isinreg(r))
4599     f_disassociate(r);
4600     live.fate[r].status=UNDEF;
4601     }
4602     MENDFUNC(1,f_forget_about,(FW r))
4603    
4604     MIDFUNC(1,fmov_pi,(FW r))
4605     {
4606     r=f_writereg(r);
4607     raw_fmov_pi(r);
4608     f_unlock(r);
4609     }
4610     MENDFUNC(1,fmov_pi,(FW r))
4611    
4612     MIDFUNC(1,fmov_log10_2,(FW r))
4613     {
4614     r=f_writereg(r);
4615     raw_fmov_log10_2(r);
4616     f_unlock(r);
4617     }
4618     MENDFUNC(1,fmov_log10_2,(FW r))
4619    
4620     MIDFUNC(1,fmov_log2_e,(FW r))
4621     {
4622     r=f_writereg(r);
4623     raw_fmov_log2_e(r);
4624     f_unlock(r);
4625     }
4626     MENDFUNC(1,fmov_log2_e,(FW r))
4627    
4628     MIDFUNC(1,fmov_loge_2,(FW r))
4629     {
4630     r=f_writereg(r);
4631     raw_fmov_loge_2(r);
4632     f_unlock(r);
4633     }
4634     MENDFUNC(1,fmov_loge_2,(FW r))
4635    
4636     MIDFUNC(1,fmov_1,(FW r))
4637     {
4638     r=f_writereg(r);
4639     raw_fmov_1(r);
4640     f_unlock(r);
4641     }
4642     MENDFUNC(1,fmov_1,(FW r))
4643    
4644     MIDFUNC(1,fmov_0,(FW r))
4645     {
4646     r=f_writereg(r);
4647     raw_fmov_0(r);
4648     f_unlock(r);
4649     }
4650     MENDFUNC(1,fmov_0,(FW r))
4651    
4652     MIDFUNC(2,fmov_rm,(FW r, MEMR m))
4653     {
4654     r=f_writereg(r);
4655     raw_fmov_rm(r,m);
4656     f_unlock(r);
4657     }
4658     MENDFUNC(2,fmov_rm,(FW r, MEMR m))
4659    
4660     MIDFUNC(2,fmovi_rm,(FW r, MEMR m))
4661     {
4662     r=f_writereg(r);
4663     raw_fmovi_rm(r,m);
4664     f_unlock(r);
4665     }
4666     MENDFUNC(2,fmovi_rm,(FW r, MEMR m))
4667    
4668     MIDFUNC(2,fmovi_mr,(MEMW m, FR r))
4669     {
4670     r=f_readreg(r);
4671     raw_fmovi_mr(m,r);
4672     f_unlock(r);
4673     }
4674     MENDFUNC(2,fmovi_mr,(MEMW m, FR r))
4675    
4676     MIDFUNC(2,fmovs_rm,(FW r, MEMR m))
4677     {
4678     r=f_writereg(r);
4679     raw_fmovs_rm(r,m);
4680     f_unlock(r);
4681     }
4682     MENDFUNC(2,fmovs_rm,(FW r, MEMR m))
4683    
4684     MIDFUNC(2,fmovs_mr,(MEMW m, FR r))
4685     {
4686     r=f_readreg(r);
4687     raw_fmovs_mr(m,r);
4688     f_unlock(r);
4689     }
4690     MENDFUNC(2,fmovs_mr,(MEMW m, FR r))
4691    
4692     MIDFUNC(2,fmov_ext_mr,(MEMW m, FR r))
4693     {
4694     r=f_readreg(r);
4695     raw_fmov_ext_mr(m,r);
4696     f_unlock(r);
4697     }
4698     MENDFUNC(2,fmov_ext_mr,(MEMW m, FR r))
4699    
4700     MIDFUNC(2,fmov_mr,(MEMW m, FR r))
4701     {
4702     r=f_readreg(r);
4703     raw_fmov_mr(m,r);
4704     f_unlock(r);
4705     }
4706     MENDFUNC(2,fmov_mr,(MEMW m, FR r))
4707    
4708     MIDFUNC(2,fmov_ext_rm,(FW r, MEMR m))
4709     {
4710     r=f_writereg(r);
4711     raw_fmov_ext_rm(r,m);
4712     f_unlock(r);
4713     }
4714     MENDFUNC(2,fmov_ext_rm,(FW r, MEMR m))
4715    
4716     MIDFUNC(2,fmov_rr,(FW d, FR s))
4717     {
4718     if (d==s) { /* How pointless! */
4719     return;
4720     }
4721     #if USE_F_ALIAS
4722     f_disassociate(d);
4723     s=f_readreg(s);
4724     live.fate[d].realreg=s;
4725     live.fate[d].realind=live.fat[s].nholds;
4726     live.fate[d].status=DIRTY;
4727     live.fat[s].holds[live.fat[s].nholds]=d;
4728     live.fat[s].nholds++;
4729     f_unlock(s);
4730     #else
4731     s=f_readreg(s);
4732     d=f_writereg(d);
4733     raw_fmov_rr(d,s);
4734     f_unlock(s);
4735     f_unlock(d);
4736     #endif
4737     }
4738     MENDFUNC(2,fmov_rr,(FW d, FR s))
4739    
4740     MIDFUNC(2,fldcw_m_indexed,(R4 index, IMM base))
4741     {
4742     index=readreg(index,4);
4743    
4744     raw_fldcw_m_indexed(index,base);
4745     unlock2(index);
4746     }
4747     MENDFUNC(2,fldcw_m_indexed,(R4 index, IMM base))
4748    
4749     MIDFUNC(1,ftst_r,(FR r))
4750     {
4751     r=f_readreg(r);
4752     raw_ftst_r(r);
4753     f_unlock(r);
4754     }
4755     MENDFUNC(1,ftst_r,(FR r))
4756    
4757     MIDFUNC(0,dont_care_fflags,(void))
4758     {
4759     f_disassociate(FP_RESULT);
4760     }
4761     MENDFUNC(0,dont_care_fflags,(void))
4762    
4763     MIDFUNC(2,fsqrt_rr,(FW d, FR s))
4764     {
4765     s=f_readreg(s);
4766     d=f_writereg(d);
4767     raw_fsqrt_rr(d,s);
4768     f_unlock(s);
4769     f_unlock(d);
4770     }
4771     MENDFUNC(2,fsqrt_rr,(FW d, FR s))
4772    
4773     MIDFUNC(2,fabs_rr,(FW d, FR s))
4774     {
4775     s=f_readreg(s);
4776     d=f_writereg(d);
4777     raw_fabs_rr(d,s);
4778     f_unlock(s);
4779     f_unlock(d);
4780     }
4781     MENDFUNC(2,fabs_rr,(FW d, FR s))
4782    
4783     MIDFUNC(2,fsin_rr,(FW d, FR s))
4784     {
4785     s=f_readreg(s);
4786     d=f_writereg(d);
4787     raw_fsin_rr(d,s);
4788     f_unlock(s);
4789     f_unlock(d);
4790     }
4791     MENDFUNC(2,fsin_rr,(FW d, FR s))
4792    
4793     MIDFUNC(2,fcos_rr,(FW d, FR s))
4794     {
4795     s=f_readreg(s);
4796     d=f_writereg(d);
4797     raw_fcos_rr(d,s);
4798     f_unlock(s);
4799     f_unlock(d);
4800     }
4801     MENDFUNC(2,fcos_rr,(FW d, FR s))
4802    
4803     MIDFUNC(2,ftwotox_rr,(FW d, FR s))
4804     {
4805     s=f_readreg(s);
4806     d=f_writereg(d);
4807     raw_ftwotox_rr(d,s);
4808     f_unlock(s);
4809     f_unlock(d);
4810     }
4811     MENDFUNC(2,ftwotox_rr,(FW d, FR s))
4812    
4813     MIDFUNC(2,fetox_rr,(FW d, FR s))
4814     {
4815     s=f_readreg(s);
4816     d=f_writereg(d);
4817     raw_fetox_rr(d,s);
4818     f_unlock(s);
4819     f_unlock(d);
4820     }
4821     MENDFUNC(2,fetox_rr,(FW d, FR s))
4822    
4823     MIDFUNC(2,frndint_rr,(FW d, FR s))
4824     {
4825     s=f_readreg(s);
4826     d=f_writereg(d);
4827     raw_frndint_rr(d,s);
4828     f_unlock(s);
4829     f_unlock(d);
4830     }
4831     MENDFUNC(2,frndint_rr,(FW d, FR s))
4832    
4833     MIDFUNC(2,flog2_rr,(FW d, FR s))
4834     {
4835     s=f_readreg(s);
4836     d=f_writereg(d);
4837     raw_flog2_rr(d,s);
4838     f_unlock(s);
4839     f_unlock(d);
4840     }
4841     MENDFUNC(2,flog2_rr,(FW d, FR s))
4842    
4843     MIDFUNC(2,fneg_rr,(FW d, FR s))
4844     {
4845     s=f_readreg(s);
4846     d=f_writereg(d);
4847     raw_fneg_rr(d,s);
4848     f_unlock(s);
4849     f_unlock(d);
4850     }
4851     MENDFUNC(2,fneg_rr,(FW d, FR s))
4852    
4853     MIDFUNC(2,fadd_rr,(FRW d, FR s))
4854     {
4855     s=f_readreg(s);
4856     d=f_rmw(d);
4857     raw_fadd_rr(d,s);
4858     f_unlock(s);
4859     f_unlock(d);
4860     }
4861     MENDFUNC(2,fadd_rr,(FRW d, FR s))
4862    
4863     MIDFUNC(2,fsub_rr,(FRW d, FR s))
4864     {
4865     s=f_readreg(s);
4866     d=f_rmw(d);
4867     raw_fsub_rr(d,s);
4868     f_unlock(s);
4869     f_unlock(d);
4870     }
4871     MENDFUNC(2,fsub_rr,(FRW d, FR s))
4872    
4873     MIDFUNC(2,fcmp_rr,(FR d, FR s))
4874     {
4875     d=f_readreg(d);
4876     s=f_readreg(s);
4877     raw_fcmp_rr(d,s);
4878     f_unlock(s);
4879     f_unlock(d);
4880     }
4881     MENDFUNC(2,fcmp_rr,(FR d, FR s))
4882    
4883     MIDFUNC(2,fdiv_rr,(FRW d, FR s))
4884     {
4885     s=f_readreg(s);
4886     d=f_rmw(d);
4887     raw_fdiv_rr(d,s);
4888     f_unlock(s);
4889     f_unlock(d);
4890     }
4891     MENDFUNC(2,fdiv_rr,(FRW d, FR s))
4892    
4893     MIDFUNC(2,frem_rr,(FRW d, FR s))
4894     {
4895     s=f_readreg(s);
4896     d=f_rmw(d);
4897     raw_frem_rr(d,s);
4898     f_unlock(s);
4899     f_unlock(d);
4900     }
4901     MENDFUNC(2,frem_rr,(FRW d, FR s))
4902    
4903     MIDFUNC(2,frem1_rr,(FRW d, FR s))
4904     {
4905     s=f_readreg(s);
4906     d=f_rmw(d);
4907     raw_frem1_rr(d,s);
4908     f_unlock(s);
4909     f_unlock(d);
4910     }
4911     MENDFUNC(2,frem1_rr,(FRW d, FR s))
4912    
4913     MIDFUNC(2,fmul_rr,(FRW d, FR s))
4914     {
4915     s=f_readreg(s);
4916     d=f_rmw(d);
4917     raw_fmul_rr(d,s);
4918     f_unlock(s);
4919     f_unlock(d);
4920     }
4921     MENDFUNC(2,fmul_rr,(FRW d, FR s))
4922    
4923     /********************************************************************
4924     * Support functions exposed to gencomp. CREATE time *
4925     ********************************************************************/
4926    
4927 gbeauche 1.26 void set_zero(int r, int tmp)
4928     {
4929     if (setzflg_uses_bsf)
4930     bsf_l_rr(r,r);
4931     else
4932     simulate_bsf(tmp,r);
4933     }
4934    
4935 gbeauche 1.1 int kill_rodent(int r)
4936     {
4937     return KILLTHERAT &&
4938     have_rat_stall &&
4939     (live.state[r].status==INMEM ||
4940     live.state[r].status==CLEAN ||
4941     live.state[r].status==ISCONST ||
4942     live.state[r].dirtysize==4);
4943     }
4944    
4945     uae_u32 get_const(int r)
4946     {
4947     Dif (!isconst(r)) {
4948     write_log("Register %d should be constant, but isn't\n",r);
4949     abort();
4950     }
4951     return live.state[r].val;
4952     }
4953    
4954     void sync_m68k_pc(void)
4955     {
4956     if (m68k_pc_offset) {
4957     add_l_ri(PC_P,m68k_pc_offset);
4958     comp_pc_p+=m68k_pc_offset;
4959     m68k_pc_offset=0;
4960     }
4961     }
4962    
4963     /********************************************************************
4964     * Scratch registers management *
4965     ********************************************************************/
4966    
4967     struct scratch_t {
4968     uae_u32 regs[VREGS];
4969     fpu_register fregs[VFREGS];
4970     };
4971    
4972     static scratch_t scratch;
4973    
4974     /********************************************************************
4975     * Support functions exposed to newcpu *
4976     ********************************************************************/
4977    
4978     static inline const char *str_on_off(bool b)
4979     {
4980     return b ? "on" : "off";
4981     }
4982    
4983     void compiler_init(void)
4984     {
4985     static bool initialized = false;
4986     if (initialized)
4987     return;
4988 gbeauche 1.24
4989 gbeauche 1.1 #if JIT_DEBUG
4990     // JIT debug mode ?
4991     JITDebug = PrefsFindBool("jitdebug");
4992     #endif
4993     write_log("<JIT compiler> : enable runtime disassemblers : %s\n", JITDebug ? "yes" : "no");
4994    
4995     #ifdef USE_JIT_FPU
4996     // Use JIT compiler for FPU instructions ?
4997     avoid_fpu = !PrefsFindBool("jitfpu");
4998     #else
4999     // JIT FPU is always disabled
5000     avoid_fpu = true;
5001     #endif
5002     write_log("<JIT compiler> : compile FPU instructions : %s\n", !avoid_fpu ? "yes" : "no");
5003    
5004     // Get size of the translation cache (in KB)
5005     cache_size = PrefsFindInt32("jitcachesize");
5006     write_log("<JIT compiler> : requested translation cache size : %d KB\n", cache_size);
5007    
5008     // Initialize target CPU (check for features, e.g. CMOV, rat stalls)
5009     raw_init_cpu();
5010 gbeauche 1.15 setzflg_uses_bsf = target_check_bsf();
5011 gbeauche 1.1 write_log("<JIT compiler> : target processor has CMOV instructions : %s\n", have_cmov ? "yes" : "no");
5012     write_log("<JIT compiler> : target processor can suffer from partial register stalls : %s\n", have_rat_stall ? "yes" : "no");
5013 gbeauche 1.5 write_log("<JIT compiler> : alignment for loops, jumps are %d, %d\n", align_loops, align_jumps);
5014 gbeauche 1.1
5015     // Translation cache flush mechanism
5016     lazy_flush = PrefsFindBool("jitlazyflush");
5017     write_log("<JIT compiler> : lazy translation cache invalidation : %s\n", str_on_off(lazy_flush));
5018     flush_icache = lazy_flush ? flush_icache_lazy : flush_icache_hard;
5019    
5020     // Compiler features
5021     write_log("<JIT compiler> : register aliasing : %s\n", str_on_off(1));
5022     write_log("<JIT compiler> : FP register aliasing : %s\n", str_on_off(USE_F_ALIAS));
5023     write_log("<JIT compiler> : lazy constant offsetting : %s\n", str_on_off(USE_OFFSET));
5024 gbeauche 1.33 #if USE_INLINING
5025     follow_const_jumps = PrefsFindBool("jitinline");
5026     #endif
5027     write_log("<JIT compiler> : translate through constant jumps : %s\n", str_on_off(follow_const_jumps));
5028 gbeauche 1.1 write_log("<JIT compiler> : separate blockinfo allocation : %s\n", str_on_off(USE_SEPARATE_BIA));
5029    
5030     // Build compiler tables
5031     build_comp();
5032    
5033     initialized = true;
5034    
5035 gbeauche 1.9 #if PROFILE_UNTRANSLATED_INSNS
5036     write_log("<JIT compiler> : gather statistics on untranslated insns count\n");
5037     #endif
5038    
5039 gbeauche 1.1 #if PROFILE_COMPILE_TIME
5040     write_log("<JIT compiler> : gather statistics on translation time\n");
5041     emul_start_time = clock();
5042     #endif
5043     }
5044    
5045     void compiler_exit(void)
5046     {
5047     #if PROFILE_COMPILE_TIME
5048     emul_end_time = clock();
5049     #endif
5050    
5051     // Deallocate translation cache
5052     if (compiled_code) {
5053 gbeauche 1.3 vm_release(compiled_code, cache_size * 1024);
5054 gbeauche 1.1 compiled_code = 0;
5055     }
5056 gbeauche 1.24
5057     // Deallocate popallspace
5058     if (popallspace) {
5059     vm_release(popallspace, POPALLSPACE_SIZE);
5060     popallspace = 0;
5061     }
5062 gbeauche 1.1
5063     #if PROFILE_COMPILE_TIME
5064     write_log("### Compile Block statistics\n");
5065     write_log("Number of calls to compile_block : %d\n", compile_count);
5066     uae_u32 emul_time = emul_end_time - emul_start_time;
5067     write_log("Total emulation time : %.1f sec\n", double(emul_time)/double(CLOCKS_PER_SEC));
5068     write_log("Total compilation time : %.1f sec (%.1f%%)\n", double(compile_time)/double(CLOCKS_PER_SEC),
5069     100.0*double(compile_time)/double(emul_time));
5070     write_log("\n");
5071     #endif
5072 gbeauche 1.9
5073     #if PROFILE_UNTRANSLATED_INSNS
5074     uae_u64 untranslated_count = 0;
5075     for (int i = 0; i < 65536; i++) {
5076     opcode_nums[i] = i;
5077     untranslated_count += raw_cputbl_count[i];
5078     }
5079     write_log("Sorting out untranslated instructions count...\n");
5080     qsort(opcode_nums, 65536, sizeof(uae_u16), untranslated_compfn);
5081     write_log("\nRank Opc Count Name\n");
5082     for (int i = 0; i < untranslated_top_ten; i++) {
5083     uae_u32 count = raw_cputbl_count[opcode_nums[i]];
5084     struct instr *dp;
5085     struct mnemolookup *lookup;
5086     if (!count)
5087     break;
5088     dp = table68k + opcode_nums[i];
5089     for (lookup = lookuptab; lookup->mnemo != dp->mnemo; lookup++)
5090     ;
5091     write_log("%03d: %04x %10lu %s\n", i, opcode_nums[i], count, lookup->name);
5092     }
5093     #endif
5094 gbeauche 1.28
5095     #if RECORD_REGISTER_USAGE
5096     int reg_count_ids[16];
5097     uint64 tot_reg_count = 0;
5098     for (int i = 0; i < 16; i++) {
5099     reg_count_ids[i] = i;
5100     tot_reg_count += reg_count[i];
5101     }
5102     qsort(reg_count_ids, 16, sizeof(int), reg_count_compare);
5103     uint64 cum_reg_count = 0;
5104     for (int i = 0; i < 16; i++) {
5105     int r = reg_count_ids[i];
5106     cum_reg_count += reg_count[r];
5107     printf("%c%d : %16ld %2.1f%% [%2.1f]\n", r < 8 ? 'D' : 'A', r % 8,
5108     reg_count[r],
5109     100.0*double(reg_count[r])/double(tot_reg_count),
5110     100.0*double(cum_reg_count)/double(tot_reg_count));
5111     }
5112     #endif
5113 gbeauche 1.1 }
5114    
5115     bool compiler_use_jit(void)
5116     {
5117     // Check for the "jit" prefs item
5118     if (!PrefsFindBool("jit"))
5119     return false;
5120    
5121     // Don't use JIT if translation cache size is less then MIN_CACHE_SIZE KB
5122     if (PrefsFindInt32("jitcachesize") < MIN_CACHE_SIZE) {
5123     write_log("<JIT compiler> : translation cache size is less than %d KB. Disabling JIT.\n", MIN_CACHE_SIZE);
5124     return false;
5125     }
5126    
5127     // FIXME: there are currently problems with JIT compilation and anything below a 68040
5128     if (CPUType < 4) {
5129     write_log("<JIT compiler> : 68040 emulation is required instead of 680%d0. Disabling JIT.\n", CPUType);
5130     return false;
5131     }
5132    
5133     return true;
5134     }
5135    
5136     void init_comp(void)
5137     {
5138     int i;
5139     uae_s8* cb=can_byte;
5140     uae_s8* cw=can_word;
5141     uae_s8* au=always_used;
5142    
5143 gbeauche 1.28 #if RECORD_REGISTER_USAGE
5144     for (i=0;i<16;i++)
5145     reg_count_local[i] = 0;
5146     #endif
5147    
5148 gbeauche 1.1 for (i=0;i<VREGS;i++) {
5149     live.state[i].realreg=-1;
5150     live.state[i].needflush=NF_SCRATCH;
5151     live.state[i].val=0;
5152     set_status(i,UNDEF);
5153     }
5154    
5155     for (i=0;i<VFREGS;i++) {
5156     live.fate[i].status=UNDEF;
5157     live.fate[i].realreg=-1;
5158     live.fate[i].needflush=NF_SCRATCH;
5159     }
5160    
5161     for (i=0;i<VREGS;i++) {
5162     if (i<16) { /* First 16 registers map to 68k registers */
5163     live.state[i].mem=((uae_u32*)&regs)+i;
5164     live.state[i].needflush=NF_TOMEM;
5165     set_status(i,INMEM);
5166     }
5167     else
5168     live.state[i].mem=scratch.regs+i;
5169     }
5170     live.state[PC_P].mem=(uae_u32*)&(regs.pc_p);
5171     live.state[PC_P].needflush=NF_TOMEM;
5172 gbeauche 1.24 set_const(PC_P,(uintptr)comp_pc_p);
5173 gbeauche 1.1
5174 gbeauche 1.24 live.state[FLAGX].mem=(uae_u32*)&(regflags.x);
5175 gbeauche 1.1 live.state[FLAGX].needflush=NF_TOMEM;
5176     set_status(FLAGX,INMEM);
5177    
5178 gbeauche 1.24 live.state[FLAGTMP].mem=(uae_u32*)&(regflags.cznv);
5179 gbeauche 1.1 live.state[FLAGTMP].needflush=NF_TOMEM;
5180     set_status(FLAGTMP,INMEM);
5181    
5182     live.state[NEXT_HANDLER].needflush=NF_HANDLER;
5183     set_status(NEXT_HANDLER,UNDEF);
5184    
5185     for (i=0;i<VFREGS;i++) {
5186     if (i<8) { /* First 8 registers map to 68k FPU registers */
5187     live.fate[i].mem=(uae_u32*)fpu_register_address(i);
5188     live.fate[i].needflush=NF_TOMEM;
5189     live.fate[i].status=INMEM;
5190     }
5191     else if (i==FP_RESULT) {
5192     live.fate[i].mem=(uae_u32*)(&fpu.result);
5193     live.fate[i].needflush=NF_TOMEM;
5194     live.fate[i].status=INMEM;
5195     }
5196     else
5197 gbeauche 1.25 live.fate[i].mem=(uae_u32*)(&scratch.fregs[i]);
5198 gbeauche 1.1 }
5199    
5200    
5201     for (i=0;i<N_REGS;i++) {
5202     live.nat[i].touched=0;
5203     live.nat[i].nholds=0;
5204     live.nat[i].locked=0;
5205     if (*cb==i) {
5206     live.nat[i].canbyte=1; cb++;
5207     } else live.nat[i].canbyte=0;
5208     if (*cw==i) {
5209     live.nat[i].canword=1; cw++;
5210     } else live.nat[i].canword=0;
5211     if (*au==i) {
5212     live.nat[i].locked=1; au++;
5213     }
5214     }
5215    
5216     for (i=0;i<N_FREGS;i++) {
5217     live.fat[i].touched=0;
5218     live.fat[i].nholds=0;
5219     live.fat[i].locked=0;
5220     }
5221    
5222     touchcnt=1;
5223     m68k_pc_offset=0;
5224     live.flags_in_flags=TRASH;
5225     live.flags_on_stack=VALID;
5226     live.flags_are_important=1;
5227    
5228     raw_fp_init();
5229     }
5230    
5231     /* Only do this if you really mean it! The next call should be to init!*/
5232     void flush(int save_regs)
5233     {
5234     int fi,i;
5235    
5236     log_flush();
5237     flush_flags(); /* low level */
5238     sync_m68k_pc(); /* mid level */
5239    
5240     if (save_regs) {
5241     for (i=0;i<VFREGS;i++) {
5242     if (live.fate[i].needflush==NF_SCRATCH ||
5243     live.fate[i].status==CLEAN) {
5244     f_disassociate(i);
5245     }
5246     }
5247     for (i=0;i<VREGS;i++) {
5248     if (live.state[i].needflush==NF_TOMEM) {
5249     switch(live.state[i].status) {
5250     case INMEM:
5251     if (live.state[i].val) {
5252 gbeauche 1.24 raw_add_l_mi((uintptr)live.state[i].mem,live.state[i].val);
5253 gbeauche 1.1 log_vwrite(i);
5254     live.state[i].val=0;
5255     }
5256     break;
5257     case CLEAN:
5258     case DIRTY:
5259     remove_offset(i,-1); tomem(i); break;
5260     case ISCONST:
5261     if (i!=PC_P)
5262     writeback_const(i);
5263     break;
5264     default: break;
5265     }
5266     Dif (live.state[i].val && i!=PC_P) {
5267     write_log("Register %d still has val %x\n",
5268     i,live.state[i].val);
5269     }
5270     }
5271     }
5272     for (i=0;i<VFREGS;i++) {
5273     if (live.fate[i].needflush==NF_TOMEM &&
5274     live.fate[i].status==DIRTY) {
5275     f_evict(i);
5276     }
5277     }
5278     raw_fp_cleanup_drop();
5279     }
5280     if (needflags) {
5281     write_log("Warning! flush with needflags=1!\n");
5282     }
5283     }
5284    
5285     static void flush_keepflags(void)
5286     {
5287     int fi,i;
5288    
5289     for (i=0;i<VFREGS;i++) {
5290     if (live.fate[i].needflush==NF_SCRATCH ||
5291     live.fate[i].status==CLEAN) {
5292     f_disassociate(i);
5293     }
5294     }
5295     for (i=0;i<VREGS;i++) {
5296     if (live.state[i].needflush==NF_TOMEM) {
5297     switch(live.state[i].status) {
5298     case INMEM:
5299     /* Can't adjust the offset here --- that needs "add" */
5300     break;
5301     case CLEAN:
5302     case DIRTY:
5303     remove_offset(i,-1); tomem(i); break;
5304     case ISCONST:
5305     if (i!=PC_P)
5306     writeback_const(i);
5307     break;
5308     default: break;
5309     }
5310     }
5311     }
5312     for (i=0;i<VFREGS;i++) {
5313     if (live.fate[i].needflush==NF_TOMEM &&
5314     live.fate[i].status==DIRTY) {
5315     f_evict(i);
5316     }
5317     }
5318     raw_fp_cleanup_drop();
5319     }
5320    
5321     void freescratch(void)
5322     {
5323     int i;
5324     for (i=0;i<N_REGS;i++)
5325     if (live.nat[i].locked && i!=4)
5326     write_log("Warning! %d is locked\n",i);
5327    
5328     for (i=0;i<VREGS;i++)
5329     if (live.state[i].needflush==NF_SCRATCH) {
5330     forget_about(i);
5331     }
5332    
5333     for (i=0;i<VFREGS;i++)
5334     if (live.fate[i].needflush==NF_SCRATCH) {
5335     f_forget_about(i);
5336     }
5337     }
5338    
5339     /********************************************************************
5340     * Support functions, internal *
5341     ********************************************************************/
5342    
5343    
5344     static void align_target(uae_u32 a)
5345     {
5346 gbeauche 1.14 if (!a)
5347     return;
5348    
5349 gbeauche 1.12 if (tune_nop_fillers)
5350 gbeauche 1.24 raw_emit_nop_filler(a - (((uintptr)target) & (a - 1)));
5351 gbeauche 1.12 else {
5352     /* Fill with NOPs --- makes debugging with gdb easier */
5353 gbeauche 1.24 while ((uintptr)target&(a-1))
5354 gbeauche 1.12 *target++=0x90;
5355     }
5356 gbeauche 1.1 }
5357    
5358     static __inline__ int isinrom(uintptr addr)
5359     {
5360     return ((addr >= (uintptr)ROMBaseHost) && (addr < (uintptr)ROMBaseHost + ROMSize));
5361     }
5362    
5363     static void flush_all(void)
5364     {
5365     int i;
5366    
5367     log_flush();
5368     for (i=0;i<VREGS;i++)
5369     if (live.state[i].status==DIRTY) {
5370     if (!call_saved[live.state[i].realreg]) {
5371     tomem(i);
5372     }
5373     }
5374     for (i=0;i<VFREGS;i++)
5375     if (f_isinreg(i))
5376     f_evict(i);
5377     raw_fp_cleanup_drop();
5378     }
5379    
5380     /* Make sure all registers that will get clobbered by a call are
5381     save and sound in memory */
5382     static void prepare_for_call_1(void)
5383     {
5384     flush_all(); /* If there are registers that don't get clobbered,
5385     * we should be a bit more selective here */
5386     }
5387    
5388     /* We will call a C routine in a moment. That will clobber all registers,
5389     so we need to disassociate everything */
5390     static void prepare_for_call_2(void)
5391     {
5392     int i;
5393     for (i=0;i<N_REGS;i++)
5394     if (!call_saved[i] && live.nat[i].nholds>0)
5395     free_nreg(i);
5396    
5397     for (i=0;i<N_FREGS;i++)
5398     if (live.fat[i].nholds>0)
5399     f_free_nreg(i);
5400    
5401     live.flags_in_flags=TRASH; /* Note: We assume we already rescued the
5402     flags at the very start of the call_r
5403     functions! */
5404     }
5405    
5406     /********************************************************************
5407     * Memory access and related functions, CREATE time *
5408     ********************************************************************/
5409    
5410     void register_branch(uae_u32 not_taken, uae_u32 taken, uae_u8 cond)
5411     {
5412     next_pc_p=not_taken;
5413     taken_pc_p=taken;
5414     branch_cc=cond;
5415     }
5416    
5417    
5418     static uae_u32 get_handler_address(uae_u32 addr)
5419     {
5420     uae_u32 cl=cacheline(addr);
5421 gbeauche 1.24 blockinfo* bi=get_blockinfo_addr_new((void*)(uintptr)addr,0);
5422     return (uintptr)&(bi->direct_handler_to_use);
5423 gbeauche 1.1 }
5424    
5425     static uae_u32 get_handler(uae_u32 addr)
5426     {
5427     uae_u32 cl=cacheline(addr);
5428 gbeauche 1.24 blockinfo* bi=get_blockinfo_addr_new((void*)(uintptr)addr,0);
5429     return (uintptr)bi->direct_handler_to_use;
5430 gbeauche 1.1 }
5431    
5432     static void load_handler(int reg, uae_u32 addr)
5433     {
5434     mov_l_rm(reg,get_handler_address(addr));
5435     }
5436    
5437     /* This version assumes that it is writing *real* memory, and *will* fail
5438     * if that assumption is wrong! No branches, no second chances, just
5439     * straight go-for-it attitude */
5440    
5441 gbeauche 1.24 static void writemem_real(int address, int source, int size, int tmp, int clobber)
5442 gbeauche 1.1 {
5443     int f=tmp;
5444    
5445     if (clobber)
5446     f=source;
5447 gbeauche 1.24
5448     #if SIZEOF_VOID_P == 8
5449 gbeauche 1.26 if (!ThirtyThreeBitAddressing)
5450     sign_extend_32_rr(address, address);
5451 gbeauche 1.24 #endif
5452    
5453 gbeauche 1.1 switch(size) {
5454     case 1: mov_b_bRr(address,source,MEMBaseDiff); break;
5455     case 2: mov_w_rr(f,source); bswap_16(f); mov_w_bRr(address,f,MEMBaseDiff); break;
5456     case 4: mov_l_rr(f,source); bswap_32(f); mov_l_bRr(address,f,MEMBaseDiff); break;
5457     }
5458     forget_about(tmp);
5459     forget_about(f);
5460     }
5461    
5462     void writebyte(int address, int source, int tmp)
5463     {
5464 gbeauche 1.24 writemem_real(address,source,1,tmp,0);
5465 gbeauche 1.1 }
5466    
5467     static __inline__ void writeword_general(int address, int source, int tmp,
5468     int clobber)
5469     {
5470 gbeauche 1.24 writemem_real(address,source,2,tmp,clobber);
5471 gbeauche 1.1 }
5472    
5473     void writeword_clobber(int address, int source, int tmp)
5474     {
5475     writeword_general(address,source,tmp,1);
5476     }
5477    
5478     void writeword(int address, int source, int tmp)
5479     {
5480     writeword_general(address,source,tmp,0);
5481     }
5482    
5483     static __inline__ void writelong_general(int address, int source, int tmp,
5484     int clobber)
5485     {
5486 gbeauche 1.24 writemem_real(address,source,4,tmp,clobber);
5487 gbeauche 1.1 }
5488    
5489     void writelong_clobber(int address, int source, int tmp)
5490     {
5491     writelong_general(address,source,tmp,1);
5492     }
5493    
5494     void writelong(int address, int source, int tmp)
5495     {
5496     writelong_general(address,source,tmp,0);
5497     }
5498    
5499    
5500    
5501     /* This version assumes that it is reading *real* memory, and *will* fail
5502     * if that assumption is wrong! No branches, no second chances, just
5503     * straight go-for-it attitude */
5504    
5505 gbeauche 1.24 static void readmem_real(int address, int dest, int size, int tmp)
5506 gbeauche 1.1 {
5507     int f=tmp;
5508    
5509     if (size==4 && address!=dest)
5510     f=dest;
5511    
5512 gbeauche 1.24 #if SIZEOF_VOID_P == 8
5513 gbeauche 1.26 if (!ThirtyThreeBitAddressing)
5514     sign_extend_32_rr(address, address);
5515 gbeauche 1.24 #endif
5516    
5517 gbeauche 1.1 switch(size) {
5518     case 1: mov_b_brR(dest,address,MEMBaseDiff); break;
5519     case 2: mov_w_brR(dest,address,MEMBaseDiff); bswap_16(dest); break;
5520     case 4: mov_l_brR(dest,address,MEMBaseDiff); bswap_32(dest); break;
5521     }
5522     forget_about(tmp);
5523     }
5524    
5525     void readbyte(int address, int dest, int tmp)
5526     {
5527 gbeauche 1.24 readmem_real(address,dest,1,tmp);
5528 gbeauche 1.1 }
5529    
5530     void readword(int address, int dest, int tmp)
5531     {
5532 gbeauche 1.24 readmem_real(address,dest,2,tmp);
5533 gbeauche 1.1 }
5534    
5535     void readlong(int address, int dest, int tmp)
5536     {
5537 gbeauche 1.24 readmem_real(address,dest,4,tmp);
5538 gbeauche 1.1 }
5539    
5540     void get_n_addr(int address, int dest, int tmp)
5541     {
5542     // a is the register containing the virtual address
5543     // after the offset had been fetched
5544     int a=tmp;
5545    
5546     // f is the register that will contain the offset
5547     int f=tmp;
5548    
5549     // a == f == tmp if (address == dest)
5550     if (address!=dest) {
5551     a=address;
5552     f=dest;
5553     }
5554    
5555     #if REAL_ADDRESSING
5556     mov_l_rr(dest, address);
5557     #elif DIRECT_ADDRESSING
5558     lea_l_brr(dest,address,MEMBaseDiff);
5559     #endif
5560     forget_about(tmp);
5561     }
5562    
5563     void get_n_addr_jmp(int address, int dest, int tmp)
5564     {
5565     /* For this, we need to get the same address as the rest of UAE
5566     would --- otherwise we end up translating everything twice */
5567     get_n_addr(address,dest,tmp);
5568     }
5569    
5570    
5571     /* base is a register, but dp is an actual value.
5572     target is a register, as is tmp */
5573     void calc_disp_ea_020(int base, uae_u32 dp, int target, int tmp)
5574     {
5575     int reg = (dp >> 12) & 15;
5576     int regd_shift=(dp >> 9) & 3;
5577    
5578     if (dp & 0x100) {
5579     int ignorebase=(dp&0x80);
5580     int ignorereg=(dp&0x40);
5581     int addbase=0;
5582     int outer=0;
5583    
5584     if ((dp & 0x30) == 0x20) addbase = (uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
5585     if ((dp & 0x30) == 0x30) addbase = comp_get_ilong((m68k_pc_offset+=4)-4);
5586    
5587     if ((dp & 0x3) == 0x2) outer = (uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
5588     if ((dp & 0x3) == 0x3) outer = comp_get_ilong((m68k_pc_offset+=4)-4);
5589    
5590     if ((dp & 0x4) == 0) { /* add regd *before* the get_long */
5591     if (!ignorereg) {
5592     if ((dp & 0x800) == 0)
5593     sign_extend_16_rr(target,reg);
5594     else
5595     mov_l_rr(target,reg);
5596     shll_l_ri(target,regd_shift);
5597     }
5598     else
5599     mov_l_ri(target,0);
5600    
5601     /* target is now regd */
5602     if (!ignorebase)
5603     add_l(target,base);
5604     add_l_ri(target,addbase);
5605     if (dp&0x03) readlong(target,target,tmp);
5606     } else { /* do the getlong first, then add regd */
5607     if (!ignorebase) {
5608     mov_l_rr(target,base);
5609     add_l_ri(target,addbase);
5610     }
5611     else
5612     mov_l_ri(target,addbase);
5613     if (dp&0x03) readlong(target,target,tmp);
5614    
5615     if (!ignorereg) {
5616     if ((dp & 0x800) == 0)
5617     sign_extend_16_rr(tmp,reg);
5618     else
5619     mov_l_rr(tmp,reg);
5620     shll_l_ri(tmp,regd_shift);
5621     /* tmp is now regd */
5622     add_l(target,tmp);
5623     }
5624     }
5625     add_l_ri(target,outer);
5626     }
5627     else { /* 68000 version */
5628     if ((dp & 0x800) == 0) { /* Sign extend */
5629     sign_extend_16_rr(target,reg);
5630     lea_l_brr_indexed(target,base,target,1<<regd_shift,(uae_s32)((uae_s8)dp));
5631     }
5632     else {
5633     lea_l_brr_indexed(target,base,reg,1<<regd_shift,(uae_s32)((uae_s8)dp));
5634     }
5635     }
5636     forget_about(tmp);
5637     }
5638    
5639    
5640    
5641    
5642    
5643     void set_cache_state(int enabled)
5644     {
5645     if (enabled!=letit)
5646     flush_icache_hard(77);
5647     letit=enabled;
5648     }
5649    
5650     int get_cache_state(void)
5651     {
5652     return letit;
5653     }
5654    
5655     uae_u32 get_jitted_size(void)
5656     {
5657     if (compiled_code)
5658     return current_compile_p-compiled_code;
5659     return 0;
5660     }
5661    
5662 gbeauche 1.20 const int CODE_ALLOC_MAX_ATTEMPTS = 10;
5663     const int CODE_ALLOC_BOUNDARIES = 128 * 1024; // 128 KB
5664    
5665     static uint8 *do_alloc_code(uint32 size, int depth)
5666     {
5667     #if defined(__linux__) && 0
5668     /*
5669     This is a really awful hack that is known to work on Linux at
5670     least.
5671    
5672     The trick here is to make sure the allocated cache is nearby
5673     code segment, and more precisely in the positive half of a
5674     32-bit address space. i.e. addr < 0x80000000. Actually, it
5675     turned out that a 32-bit binary run on AMD64 yields a cache
5676     allocated around 0xa0000000, thus causing some troubles when
5677     translating addresses from m68k to x86.
5678     */
5679     static uint8 * code_base = NULL;
5680     if (code_base == NULL) {
5681     uintptr page_size = getpagesize();
5682     uintptr boundaries = CODE_ALLOC_BOUNDARIES;
5683     if (boundaries < page_size)
5684     boundaries = page_size;
5685     code_base = (uint8 *)sbrk(0);
5686     for (int attempts = 0; attempts < CODE_ALLOC_MAX_ATTEMPTS; attempts++) {
5687     if (vm_acquire_fixed(code_base, size) == 0) {
5688     uint8 *code = code_base;
5689     code_base += size;
5690     return code;
5691     }
5692     code_base += boundaries;
5693     }
5694     return NULL;
5695     }
5696    
5697     if (vm_acquire_fixed(code_base, size) == 0) {
5698     uint8 *code = code_base;
5699     code_base += size;
5700     return code;
5701     }
5702    
5703     if (depth >= CODE_ALLOC_MAX_ATTEMPTS)
5704     return NULL;
5705    
5706     return do_alloc_code(size, depth + 1);
5707     #else
5708     uint8 *code = (uint8 *)vm_acquire(size);
5709     return code == VM_MAP_FAILED ? NULL : code;
5710     #endif
5711     }
5712    
5713     static inline uint8 *alloc_code(uint32 size)
5714     {
5715 gbeauche 1.31 uint8 *ptr = do_alloc_code(size, 0);
5716     /* allocated code must fit in 32-bit boundaries */
5717     assert((uintptr)ptr <= 0xffffffff);
5718     return ptr;
5719 gbeauche 1.20 }
5720    
5721 gbeauche 1.1 void alloc_cache(void)
5722     {
5723     if (compiled_code) {
5724     flush_icache_hard(6);
5725 gbeauche 1.3 vm_release(compiled_code, cache_size * 1024);
5726 gbeauche 1.1 compiled_code = 0;
5727     }
5728    
5729     if (cache_size == 0)
5730     return;
5731    
5732     while (!compiled_code && cache_size) {
5733 gbeauche 1.20 if ((compiled_code = alloc_code(cache_size * 1024)) == NULL) {
5734 gbeauche 1.1 compiled_code = 0;
5735     cache_size /= 2;
5736     }
5737     }
5738 gbeauche 1.25 vm_protect(compiled_code, cache_size * 1024, VM_PAGE_READ | VM_PAGE_WRITE | VM_PAGE_EXECUTE);
5739 gbeauche 1.1
5740     if (compiled_code) {
5741     write_log("<JIT compiler> : actual translation cache size : %d KB at 0x%08X\n", cache_size, compiled_code);
5742     max_compile_start = compiled_code + cache_size*1024 - BYTES_PER_INST;
5743     current_compile_p = compiled_code;
5744     current_cache_size = 0;
5745     }
5746     }
5747    
5748    
5749    
5750 gbeauche 1.13 extern void op_illg_1 (uae_u32 opcode) REGPARAM;
5751 gbeauche 1.1
5752 gbeauche 1.8 static void calc_checksum(blockinfo* bi, uae_u32* c1, uae_u32* c2)
5753 gbeauche 1.1 {
5754 gbeauche 1.8 uae_u32 k1 = 0;
5755     uae_u32 k2 = 0;
5756    
5757     #if USE_CHECKSUM_INFO
5758     checksum_info *csi = bi->csi;
5759     Dif(!csi) abort();
5760     while (csi) {
5761     uae_s32 len = csi->length;
5762 gbeauche 1.24 uintptr tmp = (uintptr)csi->start_p;
5763 gbeauche 1.8 #else
5764     uae_s32 len = bi->len;
5765 gbeauche 1.24 uintptr tmp = (uintptr)bi->min_pcp;
5766 gbeauche 1.8 #endif
5767     uae_u32*pos;
5768 gbeauche 1.1
5769 gbeauche 1.8 len += (tmp & 3);
5770 gbeauche 1.24 tmp &= ~((uintptr)3);
5771 gbeauche 1.8 pos = (uae_u32 *)tmp;
5772    
5773     if (len >= 0 && len <= MAX_CHECKSUM_LEN) {
5774     while (len > 0) {
5775     k1 += *pos;
5776     k2 ^= *pos;
5777     pos++;
5778     len -= 4;
5779     }
5780     }
5781 gbeauche 1.1
5782 gbeauche 1.8 #if USE_CHECKSUM_INFO
5783     csi = csi->next;
5784 gbeauche 1.1 }
5785 gbeauche 1.8 #endif
5786    
5787     *c1 = k1;
5788     *c2 = k2;
5789 gbeauche 1.1 }
5790    
5791 gbeauche 1.8 #if 0
5792 gbeauche 1.7 static void show_checksum(CSI_TYPE* csi)
5793 gbeauche 1.1 {
5794     uae_u32 k1=0;
5795     uae_u32 k2=0;
5796 gbeauche 1.7 uae_s32 len=CSI_LENGTH(csi);
5797 gbeauche 1.24 uae_u32 tmp=(uintptr)CSI_START_P(csi);
5798 gbeauche 1.1 uae_u32* pos;
5799    
5800     len+=(tmp&3);
5801     tmp&=(~3);
5802     pos=(uae_u32*)tmp;
5803    
5804     if (len<0 || len>MAX_CHECKSUM_LEN) {
5805     return;
5806     }
5807     else {
5808     while (len>0) {
5809     write_log("%08x ",*pos);
5810     pos++;
5811     len-=4;
5812     }
5813     write_log(" bla\n");
5814     }
5815     }
5816 gbeauche 1.8 #endif
5817 gbeauche 1.1
5818    
5819     int check_for_cache_miss(void)
5820     {
5821     blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5822    
5823     if (bi) {
5824     int cl=cacheline(regs.pc_p);
5825     if (bi!=cache_tags[cl+1].bi) {
5826     raise_in_cl_list(bi);
5827     return 1;
5828     }
5829     }
5830     return 0;
5831     }
5832    
5833    
5834     static void recompile_block(void)
5835     {
5836     /* An existing block's countdown code has expired. We need to make
5837     sure that execute_normal doesn't refuse to recompile due to a
5838     perceived cache miss... */
5839     blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5840    
5841     Dif (!bi)
5842     abort();
5843     raise_in_cl_list(bi);
5844     execute_normal();
5845     return;
5846     }
5847     static void cache_miss(void)
5848     {
5849     blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5850     uae_u32 cl=cacheline(regs.pc_p);
5851     blockinfo* bi2=get_blockinfo(cl);
5852    
5853     if (!bi) {
5854     execute_normal(); /* Compile this block now */
5855     return;
5856     }
5857     Dif (!bi2 || bi==bi2) {
5858     write_log("Unexplained cache miss %p %p\n",bi,bi2);
5859     abort();
5860     }
5861     raise_in_cl_list(bi);
5862     return;
5863     }
5864    
5865     static int called_check_checksum(blockinfo* bi);
5866    
5867     static inline int block_check_checksum(blockinfo* bi)
5868     {
5869     uae_u32 c1,c2;
5870 gbeauche 1.7 bool isgood;
5871 gbeauche 1.1
5872     if (bi->status!=BI_NEED_CHECK)
5873     return 1; /* This block is in a checked state */
5874    
5875     checksum_count++;
5876 gbeauche 1.7
5877 gbeauche 1.1 if (bi->c1 || bi->c2)
5878     calc_checksum(bi,&c1,&c2);
5879     else {
5880     c1=c2=1; /* Make sure it doesn't match */
5881 gbeauche 1.7 }
5882 gbeauche 1.1
5883     isgood=(c1==bi->c1 && c2==bi->c2);
5884 gbeauche 1.7
5885 gbeauche 1.1 if (isgood) {
5886     /* This block is still OK. So we reactivate. Of course, that
5887     means we have to move it into the needs-to-be-flushed list */
5888     bi->handler_to_use=bi->handler;
5889     set_dhtu(bi,bi->direct_handler);
5890     bi->status=BI_CHECKING;
5891     isgood=called_check_checksum(bi);
5892     }
5893     if (isgood) {
5894     /* write_log("reactivate %p/%p (%x %x/%x %x)\n",bi,bi->pc_p,
5895     c1,c2,bi->c1,bi->c2);*/
5896     remove_from_list(bi);
5897     add_to_active(bi);
5898     raise_in_cl_list(bi);
5899     bi->status=BI_ACTIVE;
5900     }
5901     else {
5902     /* This block actually changed. We need to invalidate it,
5903     and set it up to be recompiled */
5904     /* write_log("discard %p/%p (%x %x/%x %x)\n",bi,bi->pc_p,
5905     c1,c2,bi->c1,bi->c2); */
5906     invalidate_block(bi);
5907     raise_in_cl_list(bi);
5908     }
5909     return isgood;
5910     }
5911    
5912     static int called_check_checksum(blockinfo* bi)
5913     {
5914     dependency* x=bi->deplist;
5915     int isgood=1;
5916     int i;
5917    
5918     for (i=0;i<2 && isgood;i++) {
5919     if (bi->dep[i].jmp_off) {
5920     isgood=block_check_checksum(bi->dep[i].target);
5921     }
5922     }
5923     return isgood;
5924     }
5925    
5926     static void check_checksum(void)
5927     {
5928     blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5929     uae_u32 cl=cacheline(regs.pc_p);
5930     blockinfo* bi2=get_blockinfo(cl);
5931    
5932     /* These are not the droids you are looking for... */
5933     if (!bi) {
5934     /* Whoever is the primary target is in a dormant state, but
5935     calling it was accidental, and we should just compile this
5936     new block */
5937     execute_normal();
5938     return;
5939     }
5940     if (bi!=bi2) {
5941     /* The block was hit accidentally, but it does exist. Cache miss */
5942     cache_miss();
5943     return;
5944     }
5945    
5946     if (!block_check_checksum(bi))
5947     execute_normal();
5948     }
5949    
5950     static __inline__ void match_states(blockinfo* bi)
5951     {
5952     int i;
5953     smallstate* s=&(bi->env);
5954    
5955     if (bi->status==BI_NEED_CHECK) {
5956     block_check_checksum(bi);
5957     }
5958     if (bi->status==BI_ACTIVE ||
5959     bi->status==BI_FINALIZING) { /* Deal with the *promises* the
5960     block makes (about not using
5961     certain vregs) */
5962     for (i=0;i<16;i++) {
5963     if (s->virt[i]==L_UNNEEDED) {
5964     // write_log("unneeded reg %d at %p\n",i,target);
5965     COMPCALL(forget_about)(i); // FIXME
5966     }
5967     }
5968     }
5969     flush(1);
5970    
5971     /* And now deal with the *demands* the block makes */
5972     for (i=0;i<N_REGS;i++) {
5973     int v=s->nat[i];
5974     if (v>=0) {
5975     // printf("Loading reg %d into %d at %p\n",v,i,target);
5976     readreg_specific(v,4,i);
5977     // do_load_reg(i,v);
5978     // setlock(i);
5979     }
5980     }
5981     for (i=0;i<N_REGS;i++) {
5982     int v=s->nat[i];
5983     if (v>=0) {
5984     unlock2(i);
5985     }
5986     }
5987     }
5988    
5989     static __inline__ void create_popalls(void)
5990     {
5991     int i,r;
5992    
5993 gbeauche 1.24 if ((popallspace = alloc_code(POPALLSPACE_SIZE)) == NULL) {
5994     write_log("FATAL: Could not allocate popallspace!\n");
5995     abort();
5996     }
5997     vm_protect(popallspace, POPALLSPACE_SIZE, VM_PAGE_READ | VM_PAGE_WRITE);
5998    
5999 gbeauche 1.1 current_compile_p=popallspace;
6000     set_target(current_compile_p);
6001     #if USE_PUSH_POP
6002     /* If we can't use gcc inline assembly, we need to pop some
6003     registers before jumping back to the various get-out routines.
6004     This generates the code for it.
6005     */
6006 gbeauche 1.5 align_target(align_jumps);
6007     popall_do_nothing=get_target();
6008 gbeauche 1.1 for (i=0;i<N_REGS;i++) {
6009     if (need_to_preserve[i])
6010     raw_pop_l_r(i);
6011     }
6012 gbeauche 1.24 raw_jmp((uintptr)do_nothing);
6013 gbeauche 1.1
6014 gbeauche 1.5 align_target(align_jumps);
6015 gbeauche 1.1 popall_execute_normal=get_target();
6016     for (i=0;i<N_REGS;i++) {
6017     if (need_to_preserve[i])
6018     raw_pop_l_r(i);
6019     }
6020 gbeauche 1.24 raw_jmp((uintptr)execute_normal);
6021 gbeauche 1.1
6022 gbeauche 1.5 align_target(align_jumps);
6023 gbeauche 1.1 popall_cache_miss=get_target();
6024     for (i=0;i<N_REGS;i++) {
6025     if (need_to_preserve[i])
6026     raw_pop_l_r(i);
6027     }
6028 gbeauche 1.24 raw_jmp((uintptr)cache_miss);
6029 gbeauche 1.1
6030 gbeauche 1.5 align_target(align_jumps);
6031 gbeauche 1.1 popall_recompile_block=get_target();
6032     for (i=0;i<N_REGS;i++) {
6033     if (need_to_preserve[i])
6034     raw_pop_l_r(i);
6035     }
6036 gbeauche 1.24 raw_jmp((uintptr)recompile_block);
6037 gbeauche 1.5
6038     align_target(align_jumps);
6039 gbeauche 1.1 popall_exec_nostats=get_target();
6040     for (i=0;i<N_REGS;i++) {
6041     if (need_to_preserve[i])
6042     raw_pop_l_r(i);
6043     }
6044 gbeauche 1.24 raw_jmp((uintptr)exec_nostats);
6045 gbeauche 1.5
6046     align_target(align_jumps);
6047 gbeauche 1.1 popall_check_checksum=get_target();
6048     for (i=0;i<N_REGS;i++) {
6049     if (need_to_preserve[i])
6050     raw_pop_l_r(i);
6051     }
6052 gbeauche 1.24 raw_jmp((uintptr)check_checksum);
6053 gbeauche 1.5
6054     align_target(align_jumps);
6055 gbeauche 1.1 current_compile_p=get_target();
6056     #else
6057     popall_exec_nostats=(void *)exec_nostats;
6058     popall_execute_normal=(void *)execute_normal;
6059     popall_cache_miss=(void *)cache_miss;
6060     popall_recompile_block=(void *)recompile_block;
6061     popall_do_nothing=(void *)do_nothing;
6062     popall_check_checksum=(void *)check_checksum;
6063     #endif
6064    
6065     /* And now, the code to do the matching pushes and then jump
6066     into a handler routine */
6067     pushall_call_handler=get_target();
6068     #if USE_PUSH_POP
6069     for (i=N_REGS;i--;) {
6070     if (need_to_preserve[i])
6071     raw_push_l_r(i);
6072     }
6073     #endif
6074     r=REG_PC_TMP;
6075 gbeauche 1.24 raw_mov_l_rm(r,(uintptr)&regs.pc_p);
6076 gbeauche 1.1 raw_and_l_ri(r,TAGMASK);
6077 gbeauche 1.24 raw_jmp_m_indexed((uintptr)cache_tags,r,SIZEOF_VOID_P);
6078 gbeauche 1.6
6079 gbeauche 1.24 #if defined(X86_ASSEMBLY) || defined(X86_64_ASSEMBLY)
6080 gbeauche 1.6 align_target(align_jumps);
6081     m68k_compile_execute = (void (*)(void))get_target();
6082     for (i=N_REGS;i--;) {
6083     if (need_to_preserve[i])
6084     raw_push_l_r(i);
6085     }
6086     align_target(align_loops);
6087 gbeauche 1.24 uae_u32 dispatch_loop = (uintptr)get_target();
6088 gbeauche 1.6 r=REG_PC_TMP;
6089 gbeauche 1.24 raw_mov_l_rm(r,(uintptr)&regs.pc_p);
6090 gbeauche 1.6 raw_and_l_ri(r,TAGMASK);
6091 gbeauche 1.24 raw_call_m_indexed((uintptr)cache_tags,r,SIZEOF_VOID_P);
6092     raw_cmp_l_mi((uintptr)&regs.spcflags,0);
6093 gbeauche 1.6 raw_jcc_b_oponly(NATIVE_CC_EQ);
6094 gbeauche 1.24 emit_byte(dispatch_loop-((uintptr)get_target()+1));
6095     raw_call((uintptr)m68k_do_specialties);
6096 gbeauche 1.6 raw_test_l_rr(REG_RESULT,REG_RESULT);
6097     raw_jcc_b_oponly(NATIVE_CC_EQ);
6098 gbeauche 1.24 emit_byte(dispatch_loop-((uintptr)get_target()+1));
6099     raw_cmp_b_mi((uintptr)&quit_program,0);
6100 gbeauche 1.6 raw_jcc_b_oponly(NATIVE_CC_EQ);
6101 gbeauche 1.24 emit_byte(dispatch_loop-((uintptr)get_target()+1));
6102 gbeauche 1.6 for (i=0;i<N_REGS;i++) {
6103     if (need_to_preserve[i])
6104     raw_pop_l_r(i);
6105     }
6106     raw_ret();
6107     #endif
6108 gbeauche 1.24
6109     // no need to further write into popallspace
6110     vm_protect(popallspace, POPALLSPACE_SIZE, VM_PAGE_READ | VM_PAGE_EXECUTE);
6111 gbeauche 1.1 }
6112    
6113     static __inline__ void reset_lists(void)
6114     {
6115     int i;
6116    
6117     for (i=0;i<MAX_HOLD_BI;i++)
6118     hold_bi[i]=NULL;
6119     active=NULL;
6120     dormant=NULL;
6121     }
6122    
6123     static void prepare_block(blockinfo* bi)
6124     {
6125     int i;
6126    
6127     set_target(current_compile_p);
6128 gbeauche 1.5 align_target(align_jumps);
6129 gbeauche 1.1 bi->direct_pen=(cpuop_func *)get_target();
6130 gbeauche 1.24 raw_mov_l_rm(0,(uintptr)&(bi->pc_p));
6131     raw_mov_l_mr((uintptr)&regs.pc_p,0);
6132     raw_jmp((uintptr)popall_execute_normal);
6133 gbeauche 1.1
6134 gbeauche 1.5 align_target(align_jumps);
6135 gbeauche 1.1 bi->direct_pcc=(cpuop_func *)get_target();
6136 gbeauche 1.24 raw_mov_l_rm(0,(uintptr)&(bi->pc_p));
6137     raw_mov_l_mr((uintptr)&regs.pc_p,0);
6138     raw_jmp((uintptr)popall_check_checksum);
6139 gbeauche 1.1 current_compile_p=get_target();
6140    
6141     bi->deplist=NULL;
6142     for (i=0;i<2;i++) {
6143     bi->dep[i].prev_p=NULL;
6144     bi->dep[i].next=NULL;
6145     }
6146     bi->env=default_ss;
6147     bi->status=BI_INVALID;
6148     bi->havestate=0;
6149     //bi->env=empty_ss;
6150     }
6151    
6152 gbeauche 1.21 // OPCODE is in big endian format, use cft_map() beforehand, if needed.
6153     static inline void reset_compop(int opcode)
6154 gbeauche 1.17 {
6155 gbeauche 1.21 compfunctbl[opcode] = NULL;
6156     nfcompfunctbl[opcode] = NULL;
6157     }
6158    
6159     static int read_opcode(const char *p)
6160     {
6161     int opcode = 0;
6162     for (int i = 0; i < 4; i++) {
6163     int op = p[i];
6164     switch (op) {
6165     case '0': case '1': case '2': case '3': case '4':
6166     case '5': case '6': case '7': case '8': case '9':
6167     opcode = (opcode << 4) | (op - '0');
6168     break;
6169     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
6170     opcode = (opcode << 4) | ((op - 'a') + 10);
6171     break;
6172     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
6173     opcode = (opcode << 4) | ((op - 'A') + 10);
6174     break;
6175     default:
6176     return -1;
6177     }
6178     }
6179     return opcode;
6180     }
6181    
6182     static bool merge_blacklist()
6183     {
6184     const char *blacklist = PrefsFindString("jitblacklist");
6185     if (blacklist) {
6186     const char *p = blacklist;
6187     for (;;) {
6188     if (*p == 0)
6189     return true;
6190    
6191     int opcode1 = read_opcode(p);
6192     if (opcode1 < 0)
6193     return false;
6194     p += 4;
6195    
6196     int opcode2 = opcode1;
6197     if (*p == '-') {
6198     p++;
6199     opcode2 = read_opcode(p);
6200     if (opcode2 < 0)
6201     return false;
6202     p += 4;
6203     }
6204    
6205     if (*p == 0 || *p == ';') {
6206     write_log("<JIT compiler> : blacklist opcodes : %04x-%04x\n", opcode1, opcode2);
6207     for (int opcode = opcode1; opcode <= opcode2; opcode++)
6208     reset_compop(cft_map(opcode));
6209    
6210     if (*p++ == ';')
6211     continue;
6212    
6213     return true;
6214     }
6215    
6216     return false;
6217     }
6218     }
6219     return true;
6220 gbeauche 1.17 }
6221    
6222 gbeauche 1.1 void build_comp(void)
6223     {
6224     int i;
6225     int jumpcount=0;
6226     unsigned long opcode;
6227     struct comptbl* tbl=op_smalltbl_0_comp_ff;
6228     struct comptbl* nftbl=op_smalltbl_0_comp_nf;
6229     int count;
6230     int cpu_level = 0; // 68000 (default)
6231     if (CPUType == 4)
6232     cpu_level = 4; // 68040 with FPU
6233     else {
6234     if (FPUType)
6235     cpu_level = 3; // 68020 with FPU
6236     else if (CPUType >= 2)
6237     cpu_level = 2; // 68020
6238     else if (CPUType == 1)
6239     cpu_level = 1;
6240     }
6241     struct cputbl *nfctbl = (
6242     cpu_level == 4 ? op_smalltbl_0_nf
6243     : cpu_level == 3 ? op_smalltbl_1_nf
6244     : cpu_level == 2 ? op_smalltbl_2_nf
6245     : cpu_level == 1 ? op_smalltbl_3_nf
6246     : op_smalltbl_4_nf);
6247    
6248     write_log ("<JIT compiler> : building compiler function tables\n");
6249    
6250     for (opcode = 0; opcode < 65536; opcode++) {
6251 gbeauche 1.21 reset_compop(opcode);
6252 gbeauche 1.1 nfcpufunctbl[opcode] = op_illg_1;
6253     prop[opcode].use_flags = 0x1f;
6254     prop[opcode].set_flags = 0x1f;
6255     prop[opcode].cflow = fl_trap; // ILLEGAL instructions do trap
6256     }
6257    
6258     for (i = 0; tbl[i].opcode < 65536; i++) {
6259     int cflow = table68k[tbl[i].opcode].cflow;
6260 gbeauche 1.33 if (follow_const_jumps && (tbl[i].specific & 16))
6261 gbeauche 1.10 cflow = fl_const_jump;
6262 gbeauche 1.8 else
6263 gbeauche 1.10 cflow &= ~fl_const_jump;
6264     prop[cft_map(tbl[i].opcode)].cflow = cflow;
6265 gbeauche 1.1
6266     int uses_fpu = tbl[i].specific & 32;
6267 gbeauche 1.21 if (uses_fpu && avoid_fpu)
6268 gbeauche 1.1 compfunctbl[cft_map(tbl[i].opcode)] = NULL;
6269     else
6270     compfunctbl[cft_map(tbl[i].opcode)] = tbl[i].handler;
6271     }
6272 gbeauche 1.8
6273 gbeauche 1.1 for (i = 0; nftbl[i].opcode < 65536; i++) {
6274     int uses_fpu = tbl[i].specific & 32;
6275 gbeauche 1.21 if (uses_fpu && avoid_fpu)
6276 gbeauche 1.1 nfcompfunctbl[cft_map(nftbl[i].opcode)] = NULL;
6277     else
6278     nfcompfunctbl[cft_map(nftbl[i].opcode)] = nftbl[i].handler;
6279    
6280     nfcpufunctbl[cft_map(nftbl[i].opcode)] = nfctbl[i].handler;
6281     }
6282    
6283     for (i = 0; nfctbl[i].handler; i++) {
6284     nfcpufunctbl[cft_map(nfctbl[i].opcode)] = nfctbl[i].handler;
6285     }
6286    
6287     for (opcode = 0; opcode < 65536; opcode++) {
6288     compop_func *f;
6289     compop_func *nff;
6290     cpuop_func *nfcf;
6291     int isaddx,cflow;
6292    
6293     if (table68k[opcode].mnemo == i_ILLG || table68k[opcode].clev > cpu_level)
6294     continue;
6295    
6296     if (table68k[opcode].handler != -1) {
6297     f = compfunctbl[cft_map(table68k[opcode].handler)];
6298     nff = nfcompfunctbl[cft_map(table68k[opcode].handler)];
6299     nfcf = nfcpufunctbl[cft_map(table68k[opcode].handler)];
6300     cflow = prop[cft_map(table68k[opcode].handler)].cflow;
6301     isaddx = prop[cft_map(table68k[opcode].handler)].is_addx;
6302     prop[cft_map(opcode)].cflow = cflow;
6303     prop[cft_map(opcode)].is_addx = isaddx;
6304     compfunctbl[cft_map(opcode)] = f;
6305     nfcompfunctbl[cft_map(opcode)] = nff;
6306     Dif (nfcf == op_illg_1)
6307     abort();
6308     nfcpufunctbl[cft_map(opcode)] = nfcf;
6309     }
6310     prop[cft_map(opcode)].set_flags = table68k[opcode].flagdead;
6311     prop[cft_map(opcode)].use_flags = table68k[opcode].flaglive;
6312 gbeauche 1.33 /* Unconditional jumps don't evaluate condition codes, so they
6313     * don't actually use any flags themselves */
6314     if (prop[cft_map(opcode)].cflow & fl_const_jump)
6315     prop[cft_map(opcode)].use_flags = 0;
6316 gbeauche 1.1 }
6317     for (i = 0; nfctbl[i].handler != NULL; i++) {
6318     if (nfctbl[i].specific)
6319     nfcpufunctbl[cft_map(tbl[i].opcode)] = nfctbl[i].handler;
6320     }
6321 gbeauche 1.21
6322     /* Merge in blacklist */
6323     if (!merge_blacklist())
6324     write_log("<JIT compiler> : blacklist merge failure!\n");
6325 gbeauche 1.1
6326     count=0;
6327     for (opcode = 0; opcode < 65536; opcode++) {
6328     if (compfunctbl[cft_map(opcode)])
6329     count++;
6330     }
6331     write_log("<JIT compiler> : supposedly %d compileable opcodes!\n",count);
6332    
6333     /* Initialise state */
6334     create_popalls();
6335     alloc_cache();
6336     reset_lists();
6337    
6338     for (i=0;i<TAGSIZE;i+=2) {
6339     cache_tags[i].handler=(cpuop_func *)popall_execute_normal;
6340     cache_tags[i+1].bi=NULL;
6341     }
6342    
6343     #if 0
6344     for (i=0;i<N_REGS;i++) {
6345     empty_ss.nat[i].holds=-1;
6346     empty_ss.nat[i].validsize=0;
6347     empty_ss.nat[i].dirtysize=0;
6348     }
6349     #endif
6350     for (i=0;i<VREGS;i++) {
6351     empty_ss.virt[i]=L_NEEDED;
6352     }
6353     for (i=0;i<N_REGS;i++) {
6354     empty_ss.nat[i]=L_UNKNOWN;
6355     }
6356     default_ss=empty_ss;
6357     }
6358    
6359    
6360     static void flush_icache_none(int n)
6361     {
6362     /* Nothing to do. */
6363     }
6364    
6365     static void flush_icache_hard(int n)
6366     {
6367     uae_u32 i;
6368     blockinfo* bi, *dbi;
6369    
6370     hard_flush_count++;
6371     #if 0
6372     write_log("Flush Icache_hard(%d/%x/%p), %u KB\n",
6373     n,regs.pc,regs.pc_p,current_cache_size/1024);
6374     current_cache_size = 0;
6375     #endif
6376     bi=active;
6377     while(bi) {
6378     cache_tags[cacheline(bi->pc_p)].handler=(cpuop_func *)popall_execute_normal;
6379     cache_tags[cacheline(bi->pc_p)+1].bi=NULL;
6380     dbi=bi; bi=bi->next;
6381     free_blockinfo(dbi);
6382     }
6383     bi=dormant;
6384     while(bi) {
6385     cache_tags[cacheline(bi->pc_p)].handler=(cpuop_func *)popall_execute_normal;
6386     cache_tags[cacheline(bi->pc_p)+1].bi=NULL;
6387     dbi=bi; bi=bi->next;
6388     free_blockinfo(dbi);
6389     }
6390    
6391     reset_lists();
6392     if (!compiled_code)
6393     return;
6394     current_compile_p=compiled_code;
6395     SPCFLAGS_SET( SPCFLAG_JIT_EXEC_RETURN ); /* To get out of compiled code */
6396     }
6397    
6398    
6399     /* "Soft flushing" --- instead of actually throwing everything away,
6400     we simply mark everything as "needs to be checked".
6401     */
6402    
6403     static inline void flush_icache_lazy(int n)
6404     {
6405     uae_u32 i;
6406     blockinfo* bi;
6407     blockinfo* bi2;
6408    
6409     soft_flush_count++;
6410     if (!active)
6411     return;
6412    
6413     bi=active;
6414     while (bi) {
6415     uae_u32 cl=cacheline(bi->pc_p);
6416     if (bi->status==BI_INVALID ||
6417     bi->status==BI_NEED_RECOMP) {
6418     if (bi==cache_tags[cl+1].bi)
6419     cache_tags[cl].handler=(cpuop_func *)popall_execute_normal;
6420     bi->handler_to_use=(cpuop_func *)popall_execute_normal;
6421     set_dhtu(bi,bi->direct_pen);
6422     bi->status=BI_INVALID;
6423     }
6424     else {
6425     if (bi==cache_tags[cl+1].bi)
6426     cache_tags[cl].handler=(cpuop_func *)popall_check_checksum;
6427     bi->handler_to_use=(cpuop_func *)popall_check_checksum;
6428     set_dhtu(bi,bi->direct_pcc);
6429     bi->status=BI_NEED_CHECK;
6430     }
6431     bi2=bi;
6432     bi=bi->next;
6433     }
6434     /* bi2 is now the last entry in the active list */
6435     bi2->next=dormant;
6436     if (dormant)
6437     dormant->prev_p=&(bi2->next);
6438    
6439     dormant=active;
6440     active->prev_p=&dormant;
6441     active=NULL;
6442 gbeauche 1.22 }
6443    
6444     void flush_icache_range(uae_u32 start, uae_u32 length)
6445     {
6446     if (!active)
6447     return;
6448    
6449     #if LAZY_FLUSH_ICACHE_RANGE
6450     uae_u8 *start_p = get_real_address(start);
6451     blockinfo *bi = active;
6452     while (bi) {
6453     #if USE_CHECKSUM_INFO
6454     bool invalidate = false;
6455     for (checksum_info *csi = bi->csi; csi && !invalidate; csi = csi->next)
6456     invalidate = (((start_p - csi->start_p) < csi->length) ||
6457     ((csi->start_p - start_p) < length));
6458     #else
6459     // Assume system is consistent and would invalidate the right range
6460     const bool invalidate = (bi->pc_p - start_p) < length;
6461     #endif
6462     if (invalidate) {
6463     uae_u32 cl = cacheline(bi->pc_p);
6464     if (bi == cache_tags[cl + 1].bi)
6465     cache_tags[cl].handler = (cpuop_func *)popall_execute_normal;
6466     bi->handler_to_use = (cpuop_func *)popall_execute_normal;
6467     set_dhtu(bi, bi->direct_pen);
6468     bi->status = BI_NEED_RECOMP;
6469     }
6470     bi = bi->next;
6471     }
6472     return;
6473     #endif
6474     flush_icache(-1);
6475 gbeauche 1.1 }
6476    
6477     static void catastrophe(void)
6478     {
6479     abort();
6480     }
6481    
6482     int failure;
6483    
6484     #define TARGET_M68K 0
6485     #define TARGET_POWERPC 1
6486     #define TARGET_X86 2
6487 gbeauche 1.24 #define TARGET_X86_64 3
6488 gbeauche 1.1 #if defined(i386) || defined(__i386__)
6489     #define TARGET_NATIVE TARGET_X86
6490     #endif
6491     #if defined(powerpc) || defined(__powerpc__)
6492     #define TARGET_NATIVE TARGET_POWERPC
6493     #endif
6494 gbeauche 1.24 #if defined(x86_64) || defined(__x86_64__)
6495     #define TARGET_NATIVE TARGET_X86_64
6496     #endif
6497 gbeauche 1.1
6498     #ifdef ENABLE_MON
6499 gbeauche 1.24 static uae_u32 mon_read_byte_jit(uintptr addr)
6500 gbeauche 1.1 {
6501     uae_u8 *m = (uae_u8 *)addr;
6502 gbeauche 1.24 return (uintptr)(*m);
6503 gbeauche 1.1 }
6504    
6505 gbeauche 1.24 static void mon_write_byte_jit(uintptr addr, uae_u32 b)
6506 gbeauche 1.1 {
6507     uae_u8 *m = (uae_u8 *)addr;
6508     *m = b;
6509     }
6510     #endif
6511    
6512     void disasm_block(int target, uint8 * start, size_t length)
6513     {
6514     if (!JITDebug)
6515     return;
6516    
6517     #if defined(JIT_DEBUG) && defined(ENABLE_MON)
6518     char disasm_str[200];
6519     sprintf(disasm_str, "%s $%x $%x",
6520     target == TARGET_M68K ? "d68" :
6521     target == TARGET_X86 ? "d86" :
6522 gbeauche 1.24 target == TARGET_X86_64 ? "d8664" :
6523 gbeauche 1.1 target == TARGET_POWERPC ? "d" : "x",
6524     start, start + length - 1);
6525    
6526 gbeauche 1.24 uae_u32 (*old_mon_read_byte)(uintptr) = mon_read_byte;
6527     void (*old_mon_write_byte)(uintptr, uae_u32) = mon_write_byte;
6528 gbeauche 1.1
6529     mon_read_byte = mon_read_byte_jit;
6530     mon_write_byte = mon_write_byte_jit;
6531    
6532     char *arg[5] = {"mon", "-m", "-r", disasm_str, NULL};
6533     mon(4, arg);
6534    
6535     mon_read_byte = old_mon_read_byte;
6536     mon_write_byte = old_mon_write_byte;
6537     #endif
6538     }
6539    
6540 gbeauche 1.24 static void disasm_native_block(uint8 *start, size_t length)
6541 gbeauche 1.1 {
6542     disasm_block(TARGET_NATIVE, start, length);
6543     }
6544    
6545 gbeauche 1.24 static void disasm_m68k_block(uint8 *start, size_t length)
6546 gbeauche 1.1 {
6547     disasm_block(TARGET_M68K, start, length);
6548     }
6549    
6550     #ifdef HAVE_GET_WORD_UNSWAPPED
6551     # define DO_GET_OPCODE(a) (do_get_mem_word_unswapped((uae_u16 *)(a)))
6552     #else
6553     # define DO_GET_OPCODE(a) (do_get_mem_word((uae_u16 *)(a)))
6554     #endif
6555    
6556     #if JIT_DEBUG
6557     static uae_u8 *last_regs_pc_p = 0;
6558     static uae_u8 *last_compiled_block_addr = 0;
6559    
6560     void compiler_dumpstate(void)
6561     {
6562     if (!JITDebug)
6563     return;
6564    
6565     write_log("### Host addresses\n");
6566     write_log("MEM_BASE : %x\n", MEMBaseDiff);
6567     write_log("PC_P : %p\n", &regs.pc_p);
6568     write_log("SPCFLAGS : %p\n", &regs.spcflags);
6569     write_log("D0-D7 : %p-%p\n", &regs.regs[0], &regs.regs[7]);
6570     write_log("A0-A7 : %p-%p\n", &regs.regs[8], &regs.regs[15]);
6571     write_log("\n");
6572    
6573     write_log("### M68k processor state\n");
6574     m68k_dumpstate(0);
6575     write_log("\n");
6576    
6577     write_log("### Block in Mac address space\n");
6578     write_log("M68K block : %p\n",
6579 gbeauche 1.24 (void *)(uintptr)get_virtual_address(last_regs_pc_p));
6580 gbeauche 1.1 write_log("Native block : %p (%d bytes)\n",
6581 gbeauche 1.24 (void *)(uintptr)get_virtual_address(last_compiled_block_addr),
6582 gbeauche 1.1 get_blockinfo_addr(last_regs_pc_p)->direct_handler_size);
6583     write_log("\n");
6584     }
6585     #endif
6586    
6587     static void compile_block(cpu_history* pc_hist, int blocklen)
6588     {
6589     if (letit && compiled_code) {
6590     #if PROFILE_COMPILE_TIME
6591     compile_count++;
6592     clock_t start_time = clock();
6593     #endif
6594     #if JIT_DEBUG
6595     bool disasm_block = false;
6596     #endif
6597    
6598     /* OK, here we need to 'compile' a block */
6599     int i;
6600     int r;
6601     int was_comp=0;
6602     uae_u8 liveflags[MAXRUN+1];
6603 gbeauche 1.8 #if USE_CHECKSUM_INFO
6604     bool trace_in_rom = isinrom((uintptr)pc_hist[0].location);
6605 gbeauche 1.24 uintptr max_pcp=(uintptr)pc_hist[blocklen - 1].location;
6606     uintptr min_pcp=max_pcp;
6607 gbeauche 1.8 #else
6608 gbeauche 1.24 uintptr max_pcp=(uintptr)pc_hist[0].location;
6609     uintptr min_pcp=max_pcp;
6610 gbeauche 1.8 #endif
6611 gbeauche 1.1 uae_u32 cl=cacheline(pc_hist[0].location);
6612     void* specflags=(void*)&regs.spcflags;
6613     blockinfo* bi=NULL;
6614     blockinfo* bi2;
6615     int extra_len=0;
6616    
6617     redo_current_block=0;
6618     if (current_compile_p>=max_compile_start)
6619     flush_icache_hard(7);
6620    
6621     alloc_blockinfos();
6622    
6623     bi=get_blockinfo_addr_new(pc_hist[0].location,0);
6624     bi2=get_blockinfo(cl);
6625    
6626     optlev=bi->optlevel;
6627     if (bi->status!=BI_INVALID) {
6628     Dif (bi!=bi2) {
6629     /* I don't think it can happen anymore. Shouldn't, in
6630     any case. So let's make sure... */
6631     write_log("WOOOWOO count=%d, ol=%d %p %p\n",
6632     bi->count,bi->optlevel,bi->handler_to_use,
6633     cache_tags[cl].handler);
6634     abort();
6635     }
6636    
6637     Dif (bi->count!=-1 && bi->status!=BI_NEED_RECOMP) {
6638     write_log("bi->count=%d, bi->status=%d\n",bi->count,bi->status);
6639     /* What the heck? We are not supposed to be here! */
6640     abort();
6641     }
6642     }
6643     if (bi->count==-1) {
6644     optlev++;
6645     while (!optcount[optlev])
6646     optlev++;
6647     bi->count=optcount[optlev]-1;
6648     }
6649 gbeauche 1.24 current_block_pc_p=(uintptr)pc_hist[0].location;
6650 gbeauche 1.1
6651     remove_deps(bi); /* We are about to create new code */
6652     bi->optlevel=optlev;
6653     bi->pc_p=(uae_u8*)pc_hist[0].location;
6654 gbeauche 1.8 #if USE_CHECKSUM_INFO
6655     free_checksum_info_chain(bi->csi);
6656     bi->csi = NULL;
6657     #endif
6658 gbeauche 1.1
6659     liveflags[blocklen]=0x1f; /* All flags needed afterwards */
6660     i=blocklen;
6661     while (i--) {
6662     uae_u16* currpcp=pc_hist[i].location;
6663     uae_u32 op=DO_GET_OPCODE(currpcp);
6664    
6665 gbeauche 1.8 #if USE_CHECKSUM_INFO
6666     trace_in_rom = trace_in_rom && isinrom((uintptr)currpcp);
6667 gbeauche 1.33 if (follow_const_jumps && is_const_jump(op)) {
6668 gbeauche 1.8 checksum_info *csi = alloc_checksum_info();
6669     csi->start_p = (uae_u8 *)min_pcp;
6670     csi->length = max_pcp - min_pcp + LONGEST_68K_INST;
6671     csi->next = bi->csi;
6672     bi->csi = csi;
6673 gbeauche 1.24 max_pcp = (uintptr)currpcp;
6674 gbeauche 1.8 }
6675 gbeauche 1.24 min_pcp = (uintptr)currpcp;
6676 gbeauche 1.8 #else
6677 gbeauche 1.24 if ((uintptr)currpcp<min_pcp)
6678     min_pcp=(uintptr)currpcp;
6679     if ((uintptr)currpcp>max_pcp)
6680     max_pcp=(uintptr)currpcp;
6681 gbeauche 1.8 #endif
6682 gbeauche 1.1
6683     liveflags[i]=((liveflags[i+1]&
6684     (~prop[op].set_flags))|
6685     prop[op].use_flags);
6686     if (prop[op].is_addx && (liveflags[i+1]&FLAG_Z)==0)
6687     liveflags[i]&= ~FLAG_Z;
6688     }
6689    
6690 gbeauche 1.8 #if USE_CHECKSUM_INFO
6691     checksum_info *csi = alloc_checksum_info();
6692     csi->start_p = (uae_u8 *)min_pcp;
6693     csi->length = max_pcp - min_pcp + LONGEST_68K_INST;
6694     csi->next = bi->csi;
6695     bi->csi = csi;
6696     #endif
6697    
6698 gbeauche 1.1 bi->needed_flags=liveflags[0];
6699    
6700 gbeauche 1.5 align_target(align_loops);
6701 gbeauche 1.1 was_comp=0;
6702    
6703     bi->direct_handler=(cpuop_func *)get_target();
6704     set_dhtu(bi,bi->direct_handler);
6705     bi->status=BI_COMPILING;
6706 gbeauche 1.24 current_block_start_target=(uintptr)get_target();
6707 gbeauche 1.1
6708     log_startblock();
6709    
6710     if (bi->count>=0) { /* Need to generate countdown code */
6711 gbeauche 1.24 raw_mov_l_mi((uintptr)&regs.pc_p,(uintptr)pc_hist[0].location);
6712     raw_sub_l_mi((uintptr)&(bi->count),1);
6713     raw_jl((uintptr)popall_recompile_block);
6714 gbeauche 1.1 }
6715     if (optlev==0) { /* No need to actually translate */
6716     /* Execute normally without keeping stats */
6717 gbeauche 1.24 raw_mov_l_mi((uintptr)&regs.pc_p,(uintptr)pc_hist[0].location);
6718     raw_jmp((uintptr)popall_exec_nostats);
6719 gbeauche 1.1 }
6720     else {
6721     reg_alloc_run=0;
6722     next_pc_p=0;
6723     taken_pc_p=0;
6724     branch_cc=0;
6725    
6726     comp_pc_p=(uae_u8*)pc_hist[0].location;
6727     init_comp();
6728     was_comp=1;
6729    
6730 gbeauche 1.34 #ifdef USE_CPU_EMUL_SERVICES
6731     raw_sub_l_mi((uintptr)&emulated_ticks,blocklen);
6732     raw_jcc_b_oponly(NATIVE_CC_GT);
6733     uae_s8 *branchadd=(uae_s8*)get_target();
6734     emit_byte(0);
6735     raw_call((uintptr)cpu_do_check_ticks);
6736     *branchadd=(uintptr)get_target()-((uintptr)branchadd+1);
6737     #endif
6738    
6739 gbeauche 1.1 #if JIT_DEBUG
6740     if (JITDebug) {
6741 gbeauche 1.24 raw_mov_l_mi((uintptr)&last_regs_pc_p,(uintptr)pc_hist[0].location);
6742     raw_mov_l_mi((uintptr)&last_compiled_block_addr,current_block_start_target);
6743 gbeauche 1.1 }
6744     #endif
6745    
6746     for (i=0;i<blocklen &&
6747     get_target_noopt()<max_compile_start;i++) {
6748     cpuop_func **cputbl;
6749     compop_func **comptbl;
6750     uae_u32 opcode=DO_GET_OPCODE(pc_hist[i].location);
6751     needed_flags=(liveflags[i+1] & prop[opcode].set_flags);
6752     if (!needed_flags) {
6753     cputbl=nfcpufunctbl;
6754     comptbl=nfcompfunctbl;
6755     }
6756     else {
6757     cputbl=cpufunctbl;
6758     comptbl=compfunctbl;
6759     }
6760 gbeauche 1.32
6761     #if FLIGHT_RECORDER
6762     {
6763     mov_l_ri(S1, get_virtual_address((uae_u8 *)(pc_hist[i].location)) | 1);
6764     clobber_flags();
6765     remove_all_offsets();
6766     int arg = readreg_specific(S1,4,REG_PAR1);
6767     prepare_for_call_1();
6768     unlock2(arg);
6769     prepare_for_call_2();
6770     raw_call((uintptr)m68k_record_step);
6771     }
6772     #endif
6773 gbeauche 1.1
6774     failure = 1; // gb-- defaults to failure state
6775     if (comptbl[opcode] && optlev>1) {
6776     failure=0;
6777     if (!was_comp) {
6778     comp_pc_p=(uae_u8*)pc_hist[i].location;
6779     init_comp();
6780     }
6781 gbeauche 1.18 was_comp=1;
6782 gbeauche 1.1
6783     comptbl[opcode](opcode);
6784     freescratch();
6785     if (!(liveflags[i+1] & FLAG_CZNV)) {
6786     /* We can forget about flags */
6787     dont_care_flags();
6788     }
6789     #if INDIVIDUAL_INST
6790     flush(1);
6791     nop();
6792     flush(1);
6793     was_comp=0;
6794     #endif
6795     }
6796    
6797     if (failure) {
6798     if (was_comp) {
6799     flush(1);
6800     was_comp=0;
6801     }
6802     raw_mov_l_ri(REG_PAR1,(uae_u32)opcode);
6803     #if USE_NORMAL_CALLING_CONVENTION
6804     raw_push_l_r(REG_PAR1);
6805     #endif
6806 gbeauche 1.24 raw_mov_l_mi((uintptr)&regs.pc_p,
6807     (uintptr)pc_hist[i].location);
6808     raw_call((uintptr)cputbl[opcode]);
6809 gbeauche 1.9 #if PROFILE_UNTRANSLATED_INSNS
6810     // raw_cputbl_count[] is indexed with plain opcode (in m68k order)
6811 gbeauche 1.24 raw_add_l_mi((uintptr)&raw_cputbl_count[cft_map(opcode)],1);
6812 gbeauche 1.9 #endif
6813 gbeauche 1.1 #if USE_NORMAL_CALLING_CONVENTION
6814     raw_inc_sp(4);
6815     #endif
6816    
6817     if (i < blocklen - 1) {
6818     uae_s8* branchadd;
6819    
6820 gbeauche 1.24 raw_mov_l_rm(0,(uintptr)specflags);
6821 gbeauche 1.1 raw_test_l_rr(0,0);
6822     raw_jz_b_oponly();
6823     branchadd=(uae_s8 *)get_target();
6824     emit_byte(0);
6825 gbeauche 1.24 raw_jmp((uintptr)popall_do_nothing);
6826     *branchadd=(uintptr)get_target()-(uintptr)branchadd-1;
6827 gbeauche 1.1 }
6828     }
6829     }
6830     #if 1 /* This isn't completely kosher yet; It really needs to be
6831     be integrated into a general inter-block-dependency scheme */
6832     if (next_pc_p && taken_pc_p &&
6833     was_comp && taken_pc_p==current_block_pc_p) {
6834     blockinfo* bi1=get_blockinfo_addr_new((void*)next_pc_p,0);
6835     blockinfo* bi2=get_blockinfo_addr_new((void*)taken_pc_p,0);
6836     uae_u8 x=bi1->needed_flags;
6837    
6838     if (x==0xff || 1) { /* To be on the safe side */
6839     uae_u16* next=(uae_u16*)next_pc_p;
6840     uae_u32 op=DO_GET_OPCODE(next);
6841    
6842     x=0x1f;
6843     x&=(~prop[op].set_flags);
6844     x|=prop[op].use_flags;
6845     }
6846    
6847     x|=bi2->needed_flags;
6848     if (!(x & FLAG_CZNV)) {
6849     /* We can forget about flags */
6850     dont_care_flags();
6851     extra_len+=2; /* The next instruction now is part of this
6852     block */
6853     }
6854    
6855     }
6856     #endif
6857     log_flush();
6858    
6859     if (next_pc_p) { /* A branch was registered */
6860 gbeauche 1.24 uintptr t1=next_pc_p;
6861     uintptr t2=taken_pc_p;
6862 gbeauche 1.1 int cc=branch_cc;
6863    
6864     uae_u32* branchadd;
6865     uae_u32* tba;
6866     bigstate tmp;
6867     blockinfo* tbi;
6868    
6869     if (taken_pc_p<next_pc_p) {
6870     /* backward branch. Optimize for the "taken" case ---
6871     which means the raw_jcc should fall through when
6872     the 68k branch is taken. */
6873     t1=taken_pc_p;
6874     t2=next_pc_p;
6875     cc=branch_cc^1;
6876     }
6877    
6878     tmp=live; /* ouch! This is big... */
6879     raw_jcc_l_oponly(cc);
6880     branchadd=(uae_u32*)get_target();
6881     emit_long(0);
6882    
6883     /* predicted outcome */
6884     tbi=get_blockinfo_addr_new((void*)t1,1);
6885     match_states(tbi);
6886 gbeauche 1.24 raw_cmp_l_mi((uintptr)specflags,0);
6887 gbeauche 1.1 raw_jcc_l_oponly(4);
6888     tba=(uae_u32*)get_target();
6889 gbeauche 1.24 emit_long(get_handler(t1)-((uintptr)tba+4));
6890     raw_mov_l_mi((uintptr)&regs.pc_p,t1);
6891 gbeauche 1.28 flush_reg_count();
6892 gbeauche 1.24 raw_jmp((uintptr)popall_do_nothing);
6893 gbeauche 1.1 create_jmpdep(bi,0,tba,t1);
6894    
6895 gbeauche 1.5 align_target(align_jumps);
6896 gbeauche 1.1 /* not-predicted outcome */
6897 gbeauche 1.24 *branchadd=(uintptr)get_target()-((uintptr)branchadd+4);
6898 gbeauche 1.1 live=tmp; /* Ouch again */
6899     tbi=get_blockinfo_addr_new((void*)t2,1);
6900     match_states(tbi);
6901    
6902     //flush(1); /* Can only get here if was_comp==1 */
6903 gbeauche 1.24 raw_cmp_l_mi((uintptr)specflags,0);
6904 gbeauche 1.1 raw_jcc_l_oponly(4);
6905     tba=(uae_u32*)get_target();
6906 gbeauche 1.24 emit_long(get_handler(t2)-((uintptr)tba+4));
6907     raw_mov_l_mi((uintptr)&regs.pc_p,t2);
6908 gbeauche 1.28 flush_reg_count();
6909 gbeauche 1.24 raw_jmp((uintptr)popall_do_nothing);
6910 gbeauche 1.1 create_jmpdep(bi,1,tba,t2);
6911     }
6912     else
6913     {
6914     if (was_comp) {
6915     flush(1);
6916     }
6917 gbeauche 1.28 flush_reg_count();
6918 gbeauche 1.1
6919     /* Let's find out where next_handler is... */
6920     if (was_comp && isinreg(PC_P)) {
6921     r=live.state[PC_P].realreg;
6922     raw_and_l_ri(r,TAGMASK);
6923     int r2 = (r==0) ? 1 : 0;
6924 gbeauche 1.24 raw_mov_l_ri(r2,(uintptr)popall_do_nothing);
6925     raw_cmp_l_mi((uintptr)specflags,0);
6926 gbeauche 1.27 raw_cmov_l_rm_indexed(r2,(uintptr)cache_tags,r,SIZEOF_VOID_P,NATIVE_CC_EQ);
6927 gbeauche 1.1 raw_jmp_r(r2);
6928     }
6929     else if (was_comp && isconst(PC_P)) {
6930     uae_u32 v=live.state[PC_P].val;
6931     uae_u32* tba;
6932     blockinfo* tbi;
6933    
6934 gbeauche 1.24 tbi=get_blockinfo_addr_new((void*)(uintptr)v,1);
6935 gbeauche 1.1 match_states(tbi);
6936    
6937 gbeauche 1.24 raw_cmp_l_mi((uintptr)specflags,0);
6938 gbeauche 1.1 raw_jcc_l_oponly(4);
6939     tba=(uae_u32*)get_target();
6940 gbeauche 1.24 emit_long(get_handler(v)-((uintptr)tba+4));
6941     raw_mov_l_mi((uintptr)&regs.pc_p,v);
6942     raw_jmp((uintptr)popall_do_nothing);
6943 gbeauche 1.1 create_jmpdep(bi,0,tba,v);
6944     }
6945     else {
6946     r=REG_PC_TMP;
6947 gbeauche 1.24 raw_mov_l_rm(r,(uintptr)&regs.pc_p);
6948 gbeauche 1.1 raw_and_l_ri(r,TAGMASK);
6949     int r2 = (r==0) ? 1 : 0;
6950 gbeauche 1.24 raw_mov_l_ri(r2,(uintptr)popall_do_nothing);
6951     raw_cmp_l_mi((uintptr)specflags,0);
6952 gbeauche 1.27 raw_cmov_l_rm_indexed(r2,(uintptr)cache_tags,r,SIZEOF_VOID_P,NATIVE_CC_EQ);
6953 gbeauche 1.1 raw_jmp_r(r2);
6954     }
6955     }
6956     }
6957    
6958     #if USE_MATCH
6959     if (callers_need_recompile(&live,&(bi->env))) {
6960     mark_callers_recompile(bi);
6961     }
6962    
6963     big_to_small_state(&live,&(bi->env));
6964     #endif
6965    
6966 gbeauche 1.8 #if USE_CHECKSUM_INFO
6967     remove_from_list(bi);
6968     if (trace_in_rom) {
6969     // No need to checksum that block trace on cache invalidation
6970     free_checksum_info_chain(bi->csi);
6971     bi->csi = NULL;
6972     add_to_dormant(bi);
6973     }
6974     else {
6975     calc_checksum(bi,&(bi->c1),&(bi->c2));
6976     add_to_active(bi);
6977     }
6978     #else
6979 gbeauche 1.1 if (next_pc_p+extra_len>=max_pcp &&
6980     next_pc_p+extra_len<max_pcp+LONGEST_68K_INST)
6981     max_pcp=next_pc_p+extra_len; /* extra_len covers flags magic */
6982     else
6983     max_pcp+=LONGEST_68K_INST;
6984 gbeauche 1.7
6985 gbeauche 1.1 bi->len=max_pcp-min_pcp;
6986     bi->min_pcp=min_pcp;
6987 gbeauche 1.7
6988 gbeauche 1.1 remove_from_list(bi);
6989     if (isinrom(min_pcp) && isinrom(max_pcp)) {
6990     add_to_dormant(bi); /* No need to checksum it on cache flush.
6991     Please don't start changing ROMs in
6992     flight! */
6993     }
6994     else {
6995     calc_checksum(bi,&(bi->c1),&(bi->c2));
6996     add_to_active(bi);
6997     }
6998 gbeauche 1.8 #endif
6999 gbeauche 1.1
7000     current_cache_size += get_target() - (uae_u8 *)current_compile_p;
7001    
7002     #if JIT_DEBUG
7003     if (JITDebug)
7004     bi->direct_handler_size = get_target() - (uae_u8 *)current_block_start_target;
7005    
7006     if (JITDebug && disasm_block) {
7007     uaecptr block_addr = start_pc + ((char *)pc_hist[0].location - (char *)start_pc_p);
7008     D(bug("M68K block @ 0x%08x (%d insns)\n", block_addr, blocklen));
7009     uae_u32 block_size = ((uae_u8 *)pc_hist[blocklen - 1].location - (uae_u8 *)pc_hist[0].location) + 1;
7010     disasm_m68k_block((uae_u8 *)pc_hist[0].location, block_size);
7011     D(bug("Compiled block @ 0x%08x\n", pc_hist[0].location));
7012     disasm_native_block((uae_u8 *)current_block_start_target, bi->direct_handler_size);
7013     getchar();
7014     }
7015     #endif
7016    
7017     log_dump();
7018 gbeauche 1.5 align_target(align_jumps);
7019 gbeauche 1.1
7020     /* This is the non-direct handler */
7021     bi->handler=
7022     bi->handler_to_use=(cpuop_func *)get_target();
7023 gbeauche 1.24 raw_cmp_l_mi((uintptr)&regs.pc_p,(uintptr)pc_hist[0].location);
7024     raw_jnz((uintptr)popall_cache_miss);
7025 gbeauche 1.1 comp_pc_p=(uae_u8*)pc_hist[0].location;
7026    
7027     bi->status=BI_FINALIZING;
7028     init_comp();
7029     match_states(bi);
7030     flush(1);
7031    
7032 gbeauche 1.24 raw_jmp((uintptr)bi->direct_handler);
7033 gbeauche 1.1
7034     current_compile_p=get_target();
7035     raise_in_cl_list(bi);
7036    
7037     /* We will flush soon, anyway, so let's do it now */
7038     if (current_compile_p>=max_compile_start)
7039     flush_icache_hard(7);
7040    
7041     bi->status=BI_ACTIVE;
7042     if (redo_current_block)
7043     block_need_recompile(bi);
7044    
7045     #if PROFILE_COMPILE_TIME
7046     compile_time += (clock() - start_time);
7047     #endif
7048     }
7049 gbeauche 1.34
7050     /* Account for compilation time */
7051     cpu_do_check_ticks();
7052 gbeauche 1.1 }
7053    
7054     void do_nothing(void)
7055     {
7056     /* What did you expect this to do? */
7057     }
7058    
7059     void exec_nostats(void)
7060     {
7061     for (;;) {
7062     uae_u32 opcode = GET_OPCODE;
7063 gbeauche 1.32 #if FLIGHT_RECORDER
7064     m68k_record_step(m68k_getpc());
7065     #endif
7066 gbeauche 1.1 (*cpufunctbl[opcode])(opcode);
7067 gbeauche 1.34 cpu_check_ticks();
7068 gbeauche 1.1 if (end_block(opcode) || SPCFLAGS_TEST(SPCFLAG_ALL)) {
7069     return; /* We will deal with the spcflags in the caller */
7070     }
7071     }
7072     }
7073    
7074     void execute_normal(void)
7075     {
7076     if (!check_for_cache_miss()) {
7077     cpu_history pc_hist[MAXRUN];
7078     int blocklen = 0;
7079     #if REAL_ADDRESSING || DIRECT_ADDRESSING
7080     start_pc_p = regs.pc_p;
7081     start_pc = get_virtual_address(regs.pc_p);
7082     #else
7083     start_pc_p = regs.pc_oldp;
7084     start_pc = regs.pc;
7085     #endif
7086     for (;;) { /* Take note: This is the do-it-normal loop */
7087     pc_hist[blocklen++].location = (uae_u16 *)regs.pc_p;
7088     uae_u32 opcode = GET_OPCODE;
7089     #if FLIGHT_RECORDER
7090     m68k_record_step(m68k_getpc());
7091     #endif
7092     (*cpufunctbl[opcode])(opcode);
7093 gbeauche 1.34 cpu_check_ticks();
7094 gbeauche 1.1 if (end_block(opcode) || SPCFLAGS_TEST(SPCFLAG_ALL) || blocklen>=MAXRUN) {
7095     compile_block(pc_hist, blocklen);
7096     return; /* We will deal with the spcflags in the caller */
7097     }
7098     /* No need to check regs.spcflags, because if they were set,
7099     we'd have ended up inside that "if" */
7100     }
7101     }
7102     }
7103    
7104     typedef void (*compiled_handler)(void);
7105    
7106 gbeauche 1.24 #if defined(X86_ASSEMBLY) || defined(X86_64_ASSEMBLY)
7107 gbeauche 1.6 void (*m68k_compile_execute)(void) = NULL;
7108     #else
7109 gbeauche 1.1 void m68k_do_compile_execute(void)
7110     {
7111     for (;;) {
7112     ((compiled_handler)(pushall_call_handler))();
7113     /* Whenever we return from that, we should check spcflags */
7114     if (SPCFLAGS_TEST(SPCFLAG_ALL)) {
7115     if (m68k_do_specialties ())
7116     return;
7117     }
7118     }
7119     }
7120 gbeauche 1.6 #endif