ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/compiler/compemu_support.cpp
Revision: 1.20
Committed: 2003-10-03T18:18:15Z (21 years ago) by gbeauche
Branch: MAIN
Changes since 1.19: +58 -1 lines
Log Message:
Make sure a 32-bit B2/JIT works reasonnably well on AMD64 too. This implies
to force RAMBaseHost < 0x80000000. This is empirically determined to work on
Linux/x86 and Linux/amd64.

File Contents

# User Rev Content
1 gbeauche 1.11 /*
2     * compiler/compemu_support.cpp - Core dynamic translation engine
3     *
4     * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
5     *
6     * Adaptation for Basilisk II and improvements, copyright 2000-2002
7     * Gwenole Beauchesne
8     *
9     * Basilisk II (C) 1997-2002 Christian Bauer
10     *
11     * This program is free software; you can redistribute it and/or modify
12     * it under the terms of the GNU General Public License as published by
13     * the Free Software Foundation; either version 2 of the License, or
14     * (at your option) any later version.
15     *
16     * This program is distributed in the hope that it will be useful,
17     * but WITHOUT ANY WARRANTY; without even the implied warranty of
18     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19     * GNU General Public License for more details.
20     *
21     * You should have received a copy of the GNU General Public License
22     * along with this program; if not, write to the Free Software
23     * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24     */
25    
26 gbeauche 1.1 #if !REAL_ADDRESSING && !DIRECT_ADDRESSING
27     #error "Only Real or Direct Addressing is supported with the JIT Compiler"
28     #endif
29    
30 gbeauche 1.4 #if X86_ASSEMBLY && !SAHF_SETO_PROFITABLE
31     #error "Only [LS]AHF scheme to [gs]et flags is supported with the JIT Compiler"
32     #endif
33    
34 gbeauche 1.1 #define USE_MATCH 0
35    
36     /* kludge for Brian, so he can compile under MSVC++ */
37     #define USE_NORMAL_CALLING_CONVENTION 0
38    
39     #ifndef WIN32
40 gbeauche 1.20 #include <unistd.h>
41 gbeauche 1.1 #include <sys/types.h>
42     #include <sys/mman.h>
43     #endif
44    
45     #include <stdlib.h>
46     #include <fcntl.h>
47     #include <errno.h>
48    
49     #include "sysdeps.h"
50     #include "cpu_emulation.h"
51     #include "main.h"
52     #include "prefs.h"
53     #include "user_strings.h"
54 gbeauche 1.2 #include "vm_alloc.h"
55 gbeauche 1.1
56     #include "m68k.h"
57     #include "memory.h"
58     #include "readcpu.h"
59     #include "newcpu.h"
60     #include "comptbl.h"
61     #include "compiler/compemu.h"
62     #include "fpu/fpu.h"
63     #include "fpu/flags.h"
64    
65     #define DEBUG 1
66     #include "debug.h"
67    
68     #ifdef ENABLE_MON
69     #include "mon.h"
70     #endif
71    
72     #ifndef WIN32
73 gbeauche 1.9 #define PROFILE_COMPILE_TIME 1
74     #define PROFILE_UNTRANSLATED_INSNS 1
75 gbeauche 1.1 #endif
76    
77     #ifdef WIN32
78     #undef write_log
79     #define write_log dummy_write_log
80     static void dummy_write_log(const char *, ...) { }
81     #endif
82    
83     #if JIT_DEBUG
84     #undef abort
85     #define abort() do { \
86     fprintf(stderr, "Abort in file %s at line %d\n", __FILE__, __LINE__); \
87     exit(EXIT_FAILURE); \
88     } while (0)
89     #endif
90    
91     #if PROFILE_COMPILE_TIME
92     #include <time.h>
93     static uae_u32 compile_count = 0;
94     static clock_t compile_time = 0;
95     static clock_t emul_start_time = 0;
96     static clock_t emul_end_time = 0;
97     #endif
98    
99 gbeauche 1.9 #if PROFILE_UNTRANSLATED_INSNS
100     const int untranslated_top_ten = 20;
101     static uae_u32 raw_cputbl_count[65536] = { 0, };
102     static uae_u16 opcode_nums[65536];
103    
104     static int untranslated_compfn(const void *e1, const void *e2)
105     {
106     return raw_cputbl_count[*(const uae_u16 *)e1] < raw_cputbl_count[*(const uae_u16 *)e2];
107     }
108     #endif
109    
110 gbeauche 1.1 compop_func *compfunctbl[65536];
111     compop_func *nfcompfunctbl[65536];
112     cpuop_func *nfcpufunctbl[65536];
113     uae_u8* comp_pc_p;
114    
115 gbeauche 1.6 // From newcpu.cpp
116     extern bool quit_program;
117    
118 gbeauche 1.1 // gb-- Extra data for Basilisk II/JIT
119     #if JIT_DEBUG
120     static bool JITDebug = false; // Enable runtime disassemblers through mon?
121     #else
122     const bool JITDebug = false; // Don't use JIT debug mode at all
123     #endif
124    
125     const uae_u32 MIN_CACHE_SIZE = 2048; // Minimal translation cache size (2048 KB)
126     static uae_u32 cache_size = 0; // Size of total cache allocated for compiled blocks
127 gbeauche 1.3 static uae_u32 current_cache_size = 0; // Cache grows upwards: how much has been consumed already
128 gbeauche 1.1 static bool lazy_flush = true; // Flag: lazy translation cache invalidation
129     static bool avoid_fpu = true; // Flag: compile FPU instructions ?
130     static bool have_cmov = false; // target has CMOV instructions ?
131     static bool have_rat_stall = true; // target has partial register stalls ?
132 gbeauche 1.12 const bool tune_alignment = true; // Tune code alignments for running CPU ?
133     const bool tune_nop_fillers = true; // Tune no-op fillers for architecture
134 gbeauche 1.15 static bool setzflg_uses_bsf = false; // setzflg virtual instruction can use native BSF instruction correctly?
135 gbeauche 1.5 static int align_loops = 32; // Align the start of loops
136     static int align_jumps = 32; // Align the start of jumps
137 gbeauche 1.1 static int zero_fd = -1;
138     static int optcount[10] = {
139     10, // How often a block has to be executed before it is translated
140     0, // How often to use naive translation
141     0, 0, 0, 0,
142     -1, -1, -1, -1
143     };
144    
145     struct op_properties {
146     uae_u8 use_flags;
147     uae_u8 set_flags;
148     uae_u8 is_addx;
149     uae_u8 cflow;
150     };
151     static op_properties prop[65536];
152    
153     static inline int end_block(uae_u32 opcode)
154     {
155     return (prop[opcode].cflow & fl_end_block);
156     }
157    
158 gbeauche 1.8 static inline bool is_const_jump(uae_u32 opcode)
159     {
160     return (prop[opcode].cflow == fl_const_jump);
161     }
162    
163 gbeauche 1.18 static inline bool may_trap(uae_u32 opcode)
164     {
165     return (prop[opcode].cflow & fl_trap);
166     }
167    
168     static inline unsigned int cft_map (unsigned int f)
169     {
170     #ifndef HAVE_GET_WORD_UNSWAPPED
171     return f;
172     #else
173     return ((f >> 8) & 255) | ((f & 255) << 8);
174     #endif
175     }
176    
177 gbeauche 1.1 uae_u8* start_pc_p;
178     uae_u32 start_pc;
179     uae_u32 current_block_pc_p;
180     uae_u32 current_block_start_target;
181     uae_u32 needed_flags;
182     static uae_u32 next_pc_p;
183     static uae_u32 taken_pc_p;
184     static int branch_cc;
185     static int redo_current_block;
186    
187     int segvcount=0;
188     int soft_flush_count=0;
189     int hard_flush_count=0;
190     int checksum_count=0;
191     static uae_u8* current_compile_p=NULL;
192     static uae_u8* max_compile_start;
193     static uae_u8* compiled_code=NULL;
194     static uae_s32 reg_alloc_run;
195    
196     void* pushall_call_handler=NULL;
197     static void* popall_do_nothing=NULL;
198     static void* popall_exec_nostats=NULL;
199     static void* popall_execute_normal=NULL;
200     static void* popall_cache_miss=NULL;
201     static void* popall_recompile_block=NULL;
202     static void* popall_check_checksum=NULL;
203    
204     /* The 68k only ever executes from even addresses. So right now, we
205     * waste half the entries in this array
206     * UPDATE: We now use those entries to store the start of the linked
207     * lists that we maintain for each hash result.
208     */
209     cacheline cache_tags[TAGSIZE];
210     int letit=0;
211     blockinfo* hold_bi[MAX_HOLD_BI];
212     blockinfo* active;
213     blockinfo* dormant;
214    
215     /* 68040 */
216     extern struct cputbl op_smalltbl_0_nf[];
217     extern struct comptbl op_smalltbl_0_comp_nf[];
218     extern struct comptbl op_smalltbl_0_comp_ff[];
219    
220     /* 68020 + 68881 */
221     extern struct cputbl op_smalltbl_1_nf[];
222    
223     /* 68020 */
224     extern struct cputbl op_smalltbl_2_nf[];
225    
226     /* 68010 */
227     extern struct cputbl op_smalltbl_3_nf[];
228    
229     /* 68000 */
230     extern struct cputbl op_smalltbl_4_nf[];
231    
232     /* 68000 slow but compatible. */
233     extern struct cputbl op_smalltbl_5_nf[];
234    
235     static void flush_icache_hard(int n);
236     static void flush_icache_lazy(int n);
237     static void flush_icache_none(int n);
238     void (*flush_icache)(int n) = flush_icache_none;
239    
240    
241    
242     bigstate live;
243     smallstate empty_ss;
244     smallstate default_ss;
245     static int optlev;
246    
247     static int writereg(int r, int size);
248     static void unlock2(int r);
249     static void setlock(int r);
250     static int readreg_specific(int r, int size, int spec);
251     static int writereg_specific(int r, int size, int spec);
252     static void prepare_for_call_1(void);
253     static void prepare_for_call_2(void);
254     static void align_target(uae_u32 a);
255    
256     static uae_s32 nextused[VREGS];
257    
258     uae_u32 m68k_pc_offset;
259    
260     /* Some arithmetic ooperations can be optimized away if the operands
261     * are known to be constant. But that's only a good idea when the
262     * side effects they would have on the flags are not important. This
263     * variable indicates whether we need the side effects or not
264     */
265     uae_u32 needflags=0;
266    
267     /* Flag handling is complicated.
268     *
269     * x86 instructions create flags, which quite often are exactly what we
270     * want. So at times, the "68k" flags are actually in the x86 flags.
271     *
272     * Then again, sometimes we do x86 instructions that clobber the x86
273     * flags, but don't represent a corresponding m68k instruction. In that
274     * case, we have to save them.
275     *
276     * We used to save them to the stack, but now store them back directly
277     * into the regflags.cznv of the traditional emulation. Thus some odd
278     * names.
279     *
280     * So flags can be in either of two places (used to be three; boy were
281     * things complicated back then!); And either place can contain either
282     * valid flags or invalid trash (and on the stack, there was also the
283     * option of "nothing at all", now gone). A couple of variables keep
284     * track of the respective states.
285     *
286     * To make things worse, we might or might not be interested in the flags.
287     * by default, we are, but a call to dont_care_flags can change that
288     * until the next call to live_flags. If we are not, pretty much whatever
289     * is in the register and/or the native flags is seen as valid.
290     */
291    
292     static __inline__ blockinfo* get_blockinfo(uae_u32 cl)
293     {
294     return cache_tags[cl+1].bi;
295     }
296    
297     static __inline__ blockinfo* get_blockinfo_addr(void* addr)
298     {
299     blockinfo* bi=get_blockinfo(cacheline(addr));
300    
301     while (bi) {
302     if (bi->pc_p==addr)
303     return bi;
304     bi=bi->next_same_cl;
305     }
306     return NULL;
307     }
308    
309    
310     /*******************************************************************
311     * All sorts of list related functions for all of the lists *
312     *******************************************************************/
313    
314     static __inline__ void remove_from_cl_list(blockinfo* bi)
315     {
316     uae_u32 cl=cacheline(bi->pc_p);
317    
318     if (bi->prev_same_cl_p)
319     *(bi->prev_same_cl_p)=bi->next_same_cl;
320     if (bi->next_same_cl)
321     bi->next_same_cl->prev_same_cl_p=bi->prev_same_cl_p;
322     if (cache_tags[cl+1].bi)
323     cache_tags[cl].handler=cache_tags[cl+1].bi->handler_to_use;
324     else
325     cache_tags[cl].handler=(cpuop_func *)popall_execute_normal;
326     }
327    
328     static __inline__ void remove_from_list(blockinfo* bi)
329     {
330     if (bi->prev_p)
331     *(bi->prev_p)=bi->next;
332     if (bi->next)
333     bi->next->prev_p=bi->prev_p;
334     }
335    
336     static __inline__ void remove_from_lists(blockinfo* bi)
337     {
338     remove_from_list(bi);
339     remove_from_cl_list(bi);
340     }
341    
342     static __inline__ void add_to_cl_list(blockinfo* bi)
343     {
344     uae_u32 cl=cacheline(bi->pc_p);
345    
346     if (cache_tags[cl+1].bi)
347     cache_tags[cl+1].bi->prev_same_cl_p=&(bi->next_same_cl);
348     bi->next_same_cl=cache_tags[cl+1].bi;
349    
350     cache_tags[cl+1].bi=bi;
351     bi->prev_same_cl_p=&(cache_tags[cl+1].bi);
352    
353     cache_tags[cl].handler=bi->handler_to_use;
354     }
355    
356     static __inline__ void raise_in_cl_list(blockinfo* bi)
357     {
358     remove_from_cl_list(bi);
359     add_to_cl_list(bi);
360     }
361    
362     static __inline__ void add_to_active(blockinfo* bi)
363     {
364     if (active)
365     active->prev_p=&(bi->next);
366     bi->next=active;
367    
368     active=bi;
369     bi->prev_p=&active;
370     }
371    
372     static __inline__ void add_to_dormant(blockinfo* bi)
373     {
374     if (dormant)
375     dormant->prev_p=&(bi->next);
376     bi->next=dormant;
377    
378     dormant=bi;
379     bi->prev_p=&dormant;
380     }
381    
382     static __inline__ void remove_dep(dependency* d)
383     {
384     if (d->prev_p)
385     *(d->prev_p)=d->next;
386     if (d->next)
387     d->next->prev_p=d->prev_p;
388     d->prev_p=NULL;
389     d->next=NULL;
390     }
391    
392     /* This block's code is about to be thrown away, so it no longer
393     depends on anything else */
394     static __inline__ void remove_deps(blockinfo* bi)
395     {
396     remove_dep(&(bi->dep[0]));
397     remove_dep(&(bi->dep[1]));
398     }
399    
400     static __inline__ void adjust_jmpdep(dependency* d, cpuop_func* a)
401     {
402     *(d->jmp_off)=(uintptr)a-((uintptr)d->jmp_off+4);
403     }
404    
405     /********************************************************************
406     * Soft flush handling support functions *
407     ********************************************************************/
408    
409     static __inline__ void set_dhtu(blockinfo* bi, cpuop_func* dh)
410     {
411     //write_log("bi is %p\n",bi);
412     if (dh!=bi->direct_handler_to_use) {
413     dependency* x=bi->deplist;
414     //write_log("bi->deplist=%p\n",bi->deplist);
415     while (x) {
416     //write_log("x is %p\n",x);
417     //write_log("x->next is %p\n",x->next);
418     //write_log("x->prev_p is %p\n",x->prev_p);
419    
420     if (x->jmp_off) {
421     adjust_jmpdep(x,dh);
422     }
423     x=x->next;
424     }
425     bi->direct_handler_to_use=dh;
426     }
427     }
428    
429     static __inline__ void invalidate_block(blockinfo* bi)
430     {
431     int i;
432    
433     bi->optlevel=0;
434     bi->count=optcount[0]-1;
435     bi->handler=NULL;
436     bi->handler_to_use=(cpuop_func *)popall_execute_normal;
437     bi->direct_handler=NULL;
438     set_dhtu(bi,bi->direct_pen);
439     bi->needed_flags=0xff;
440     bi->status=BI_INVALID;
441     for (i=0;i<2;i++) {
442     bi->dep[i].jmp_off=NULL;
443     bi->dep[i].target=NULL;
444     }
445     remove_deps(bi);
446     }
447    
448     static __inline__ void create_jmpdep(blockinfo* bi, int i, uae_u32* jmpaddr, uae_u32 target)
449     {
450     blockinfo* tbi=get_blockinfo_addr((void*)target);
451    
452     Dif(!tbi) {
453     write_log("Could not create jmpdep!\n");
454     abort();
455     }
456     bi->dep[i].jmp_off=jmpaddr;
457     bi->dep[i].source=bi;
458     bi->dep[i].target=tbi;
459     bi->dep[i].next=tbi->deplist;
460     if (bi->dep[i].next)
461     bi->dep[i].next->prev_p=&(bi->dep[i].next);
462     bi->dep[i].prev_p=&(tbi->deplist);
463     tbi->deplist=&(bi->dep[i]);
464     }
465    
466     static __inline__ void block_need_recompile(blockinfo * bi)
467     {
468     uae_u32 cl = cacheline(bi->pc_p);
469    
470     set_dhtu(bi, bi->direct_pen);
471     bi->direct_handler = bi->direct_pen;
472    
473     bi->handler_to_use = (cpuop_func *)popall_execute_normal;
474     bi->handler = (cpuop_func *)popall_execute_normal;
475     if (bi == cache_tags[cl + 1].bi)
476     cache_tags[cl].handler = (cpuop_func *)popall_execute_normal;
477     bi->status = BI_NEED_RECOMP;
478     }
479    
480     static __inline__ void mark_callers_recompile(blockinfo * bi)
481     {
482     dependency *x = bi->deplist;
483    
484     while (x) {
485     dependency *next = x->next; /* This disappears when we mark for
486     * recompilation and thus remove the
487     * blocks from the lists */
488     if (x->jmp_off) {
489     blockinfo *cbi = x->source;
490    
491     Dif(cbi->status == BI_INVALID) {
492     // write_log("invalid block in dependency list\n"); // FIXME?
493     // abort();
494     }
495     if (cbi->status == BI_ACTIVE || cbi->status == BI_NEED_CHECK) {
496     block_need_recompile(cbi);
497     mark_callers_recompile(cbi);
498     }
499     else if (cbi->status == BI_COMPILING) {
500     redo_current_block = 1;
501     }
502     else if (cbi->status == BI_NEED_RECOMP) {
503     /* nothing */
504     }
505     else {
506     //write_log("Status %d in mark_callers\n",cbi->status); // FIXME?
507     }
508     }
509     x = next;
510     }
511     }
512    
513     static __inline__ blockinfo* get_blockinfo_addr_new(void* addr, int setstate)
514     {
515     blockinfo* bi=get_blockinfo_addr(addr);
516     int i;
517    
518     if (!bi) {
519     for (i=0;i<MAX_HOLD_BI && !bi;i++) {
520     if (hold_bi[i]) {
521     uae_u32 cl=cacheline(addr);
522    
523     bi=hold_bi[i];
524     hold_bi[i]=NULL;
525     bi->pc_p=(uae_u8 *)addr;
526     invalidate_block(bi);
527     add_to_active(bi);
528     add_to_cl_list(bi);
529    
530     }
531     }
532     }
533     if (!bi) {
534     write_log("Looking for blockinfo, can't find free one\n");
535     abort();
536     }
537     return bi;
538     }
539    
540     static void prepare_block(blockinfo* bi);
541    
542     /* Managment of blockinfos.
543    
544     A blockinfo struct is allocated whenever a new block has to be
545     compiled. If the list of free blockinfos is empty, we allocate a new
546     pool of blockinfos and link the newly created blockinfos altogether
547     into the list of free blockinfos. Otherwise, we simply pop a structure
548 gbeauche 1.7 off the free list.
549 gbeauche 1.1
550     Blockinfo are lazily deallocated, i.e. chained altogether in the
551     list of free blockinfos whenvever a translation cache flush (hard or
552     soft) request occurs.
553     */
554    
555 gbeauche 1.7 template< class T >
556     class LazyBlockAllocator
557     {
558     enum {
559     kPoolSize = 1 + 4096 / sizeof(T)
560     };
561     struct Pool {
562     T chunk[kPoolSize];
563     Pool * next;
564     };
565     Pool * mPools;
566     T * mChunks;
567     public:
568     LazyBlockAllocator() : mPools(0), mChunks(0) { }
569     ~LazyBlockAllocator();
570     T * acquire();
571     void release(T * const);
572 gbeauche 1.1 };
573    
574 gbeauche 1.7 template< class T >
575     LazyBlockAllocator<T>::~LazyBlockAllocator()
576 gbeauche 1.1 {
577 gbeauche 1.7 Pool * currentPool = mPools;
578     while (currentPool) {
579     Pool * deadPool = currentPool;
580     currentPool = currentPool->next;
581     free(deadPool);
582     }
583     }
584    
585     template< class T >
586     T * LazyBlockAllocator<T>::acquire()
587     {
588     if (!mChunks) {
589     // There is no chunk left, allocate a new pool and link the
590     // chunks into the free list
591     Pool * newPool = (Pool *)malloc(sizeof(Pool));
592     for (T * chunk = &newPool->chunk[0]; chunk < &newPool->chunk[kPoolSize]; chunk++) {
593     chunk->next = mChunks;
594     mChunks = chunk;
595 gbeauche 1.1 }
596 gbeauche 1.7 newPool->next = mPools;
597     mPools = newPool;
598     }
599     T * chunk = mChunks;
600     mChunks = chunk->next;
601     return chunk;
602     }
603    
604     template< class T >
605     void LazyBlockAllocator<T>::release(T * const chunk)
606     {
607     chunk->next = mChunks;
608     mChunks = chunk;
609     }
610    
611     template< class T >
612     class HardBlockAllocator
613     {
614     public:
615     T * acquire() {
616     T * data = (T *)current_compile_p;
617     current_compile_p += sizeof(T);
618     return data;
619 gbeauche 1.1 }
620 gbeauche 1.7
621     void release(T * const chunk) {
622     // Deallocated on invalidation
623     }
624     };
625    
626     #if USE_SEPARATE_BIA
627     static LazyBlockAllocator<blockinfo> BlockInfoAllocator;
628     static LazyBlockAllocator<checksum_info> ChecksumInfoAllocator;
629 gbeauche 1.1 #else
630 gbeauche 1.7 static HardBlockAllocator<blockinfo> BlockInfoAllocator;
631     static HardBlockAllocator<checksum_info> ChecksumInfoAllocator;
632 gbeauche 1.1 #endif
633    
634 gbeauche 1.8 static __inline__ checksum_info *alloc_checksum_info(void)
635     {
636     checksum_info *csi = ChecksumInfoAllocator.acquire();
637     csi->next = NULL;
638     return csi;
639     }
640    
641     static __inline__ void free_checksum_info(checksum_info *csi)
642     {
643     csi->next = NULL;
644     ChecksumInfoAllocator.release(csi);
645     }
646    
647     static __inline__ void free_checksum_info_chain(checksum_info *csi)
648     {
649     while (csi != NULL) {
650     checksum_info *csi2 = csi->next;
651     free_checksum_info(csi);
652     csi = csi2;
653     }
654     }
655 gbeauche 1.7
656     static __inline__ blockinfo *alloc_blockinfo(void)
657 gbeauche 1.1 {
658 gbeauche 1.7 blockinfo *bi = BlockInfoAllocator.acquire();
659     #if USE_CHECKSUM_INFO
660     bi->csi = NULL;
661 gbeauche 1.1 #endif
662 gbeauche 1.7 return bi;
663 gbeauche 1.1 }
664    
665 gbeauche 1.7 static __inline__ void free_blockinfo(blockinfo *bi)
666 gbeauche 1.1 {
667 gbeauche 1.7 #if USE_CHECKSUM_INFO
668 gbeauche 1.8 free_checksum_info_chain(bi->csi);
669     bi->csi = NULL;
670 gbeauche 1.1 #endif
671 gbeauche 1.7 BlockInfoAllocator.release(bi);
672 gbeauche 1.1 }
673    
674     static __inline__ void alloc_blockinfos(void)
675     {
676     int i;
677     blockinfo* bi;
678    
679     for (i=0;i<MAX_HOLD_BI;i++) {
680     if (hold_bi[i])
681     return;
682     bi=hold_bi[i]=alloc_blockinfo();
683     prepare_block(bi);
684     }
685     }
686    
687     /********************************************************************
688     * Functions to emit data into memory, and other general support *
689     ********************************************************************/
690    
691     static uae_u8* target;
692    
693     static void emit_init(void)
694     {
695     }
696    
697     static __inline__ void emit_byte(uae_u8 x)
698     {
699     *target++=x;
700     }
701    
702     static __inline__ void emit_word(uae_u16 x)
703     {
704     *((uae_u16*)target)=x;
705     target+=2;
706     }
707    
708     static __inline__ void emit_long(uae_u32 x)
709     {
710     *((uae_u32*)target)=x;
711     target+=4;
712     }
713    
714 gbeauche 1.12 static __inline__ void emit_block(const uae_u8 *block, uae_u32 blocklen)
715     {
716     memcpy((uae_u8 *)target,block,blocklen);
717     target+=blocklen;
718     }
719    
720 gbeauche 1.1 static __inline__ uae_u32 reverse32(uae_u32 v)
721     {
722     #if 1
723     // gb-- We have specialized byteswapping functions, just use them
724     return do_byteswap_32(v);
725     #else
726     return ((v>>24)&0xff) | ((v>>8)&0xff00) | ((v<<8)&0xff0000) | ((v<<24)&0xff000000);
727     #endif
728     }
729    
730     /********************************************************************
731     * Getting the information about the target CPU *
732     ********************************************************************/
733    
734     #include "codegen_x86.cpp"
735    
736     void set_target(uae_u8* t)
737     {
738     target=t;
739     }
740    
741     static __inline__ uae_u8* get_target_noopt(void)
742     {
743     return target;
744     }
745    
746     __inline__ uae_u8* get_target(void)
747     {
748     return get_target_noopt();
749     }
750    
751    
752     /********************************************************************
753     * Flags status handling. EMIT TIME! *
754     ********************************************************************/
755    
756     static void bt_l_ri_noclobber(R4 r, IMM i);
757    
758     static void make_flags_live_internal(void)
759     {
760     if (live.flags_in_flags==VALID)
761     return;
762     Dif (live.flags_on_stack==TRASH) {
763     write_log("Want flags, got something on stack, but it is TRASH\n");
764     abort();
765     }
766     if (live.flags_on_stack==VALID) {
767     int tmp;
768     tmp=readreg_specific(FLAGTMP,4,FLAG_NREG2);
769     raw_reg_to_flags(tmp);
770     unlock2(tmp);
771    
772     live.flags_in_flags=VALID;
773     return;
774     }
775     write_log("Huh? live.flags_in_flags=%d, live.flags_on_stack=%d, but need to make live\n",
776     live.flags_in_flags,live.flags_on_stack);
777     abort();
778     }
779    
780     static void flags_to_stack(void)
781     {
782     if (live.flags_on_stack==VALID)
783     return;
784     if (!live.flags_are_important) {
785     live.flags_on_stack=VALID;
786     return;
787     }
788     Dif (live.flags_in_flags!=VALID)
789     abort();
790     else {
791     int tmp;
792     tmp=writereg_specific(FLAGTMP,4,FLAG_NREG1);
793     raw_flags_to_reg(tmp);
794     unlock2(tmp);
795     }
796     live.flags_on_stack=VALID;
797     }
798    
799     static __inline__ void clobber_flags(void)
800     {
801     if (live.flags_in_flags==VALID && live.flags_on_stack!=VALID)
802     flags_to_stack();
803     live.flags_in_flags=TRASH;
804     }
805    
806     /* Prepare for leaving the compiled stuff */
807     static __inline__ void flush_flags(void)
808     {
809     flags_to_stack();
810     return;
811     }
812    
813     int touchcnt;
814    
815     /********************************************************************
816 gbeauche 1.18 * Partial register flushing for optimized calls *
817     ********************************************************************/
818    
819     struct regusage {
820     uae_u16 rmask;
821     uae_u16 wmask;
822     };
823    
824     static inline void ru_set(uae_u16 *mask, int reg)
825     {
826     #if USE_OPTIMIZED_CALLS
827     *mask |= 1 << reg;
828     #endif
829     }
830    
831     static inline bool ru_get(const uae_u16 *mask, int reg)
832     {
833     #if USE_OPTIMIZED_CALLS
834     return (*mask & (1 << reg));
835     #else
836     /* Default: instruction reads & write to register */
837     return true;
838     #endif
839     }
840    
841     static inline void ru_set_read(regusage *ru, int reg)
842     {
843     ru_set(&ru->rmask, reg);
844     }
845    
846     static inline void ru_set_write(regusage *ru, int reg)
847     {
848     ru_set(&ru->wmask, reg);
849     }
850    
851     static inline bool ru_read_p(const regusage *ru, int reg)
852     {
853     return ru_get(&ru->rmask, reg);
854     }
855    
856     static inline bool ru_write_p(const regusage *ru, int reg)
857     {
858     return ru_get(&ru->wmask, reg);
859     }
860    
861     static void ru_fill_ea(regusage *ru, int reg, amodes mode,
862     wordsizes size, int write_mode)
863     {
864     switch (mode) {
865     case Areg:
866     reg += 8;
867     /* fall through */
868     case Dreg:
869     ru_set(write_mode ? &ru->wmask : &ru->rmask, reg);
870     break;
871     case Ad16:
872     /* skip displacment */
873     m68k_pc_offset += 2;
874     case Aind:
875     case Aipi:
876     case Apdi:
877     ru_set_read(ru, reg+8);
878     break;
879     case Ad8r:
880     ru_set_read(ru, reg+8);
881     /* fall through */
882     case PC8r: {
883     uae_u16 dp = comp_get_iword((m68k_pc_offset+=2)-2);
884     reg = (dp >> 12) & 15;
885     ru_set_read(ru, reg);
886     if (dp & 0x100)
887     m68k_pc_offset += (((dp & 0x30) >> 3) & 7) + ((dp & 3) * 2);
888     break;
889     }
890     case PC16:
891     case absw:
892     case imm0:
893     case imm1:
894     m68k_pc_offset += 2;
895     break;
896     case absl:
897     case imm2:
898     m68k_pc_offset += 4;
899     break;
900     case immi:
901     m68k_pc_offset += (size == sz_long) ? 4 : 2;
902     break;
903     }
904     }
905    
906     /* TODO: split into a static initialization part and a dynamic one
907     (instructions depending on extension words) */
908     static void ru_fill(regusage *ru, uae_u32 opcode)
909     {
910     m68k_pc_offset += 2;
911    
912     /* Default: no register is used or written to */
913     ru->rmask = 0;
914     ru->wmask = 0;
915    
916     uae_u32 real_opcode = cft_map(opcode);
917     struct instr *dp = &table68k[real_opcode];
918    
919     bool rw_dest = true;
920     bool handled = false;
921    
922     /* Handle some instructions specifically */
923     uae_u16 reg, ext;
924     switch (dp->mnemo) {
925     case i_BFCHG:
926     case i_BFCLR:
927     case i_BFEXTS:
928     case i_BFEXTU:
929     case i_BFFFO:
930     case i_BFINS:
931     case i_BFSET:
932     case i_BFTST:
933     ext = comp_get_iword((m68k_pc_offset+=2)-2);
934     if (ext & 0x800) ru_set_read(ru, (ext >> 6) & 7);
935     if (ext & 0x020) ru_set_read(ru, ext & 7);
936     ru_fill_ea(ru, dp->dreg, (amodes)dp->dmode, (wordsizes)dp->size, 1);
937     if (dp->dmode == Dreg)
938     ru_set_read(ru, dp->dreg);
939     switch (dp->mnemo) {
940     case i_BFEXTS:
941     case i_BFEXTU:
942     case i_BFFFO:
943     ru_set_write(ru, (ext >> 12) & 7);
944     break;
945     case i_BFINS:
946     ru_set_read(ru, (ext >> 12) & 7);
947     /* fall through */
948     case i_BFCHG:
949     case i_BFCLR:
950     case i_BSET:
951     if (dp->dmode == Dreg)
952     ru_set_write(ru, dp->dreg);
953     break;
954     }
955     handled = true;
956     rw_dest = false;
957     break;
958    
959     case i_BTST:
960     rw_dest = false;
961     break;
962    
963     case i_CAS:
964     {
965     ext = comp_get_iword((m68k_pc_offset+=2)-2);
966     int Du = ext & 7;
967     ru_set_read(ru, Du);
968     int Dc = (ext >> 6) & 7;
969     ru_set_read(ru, Dc);
970     ru_set_write(ru, Dc);
971     break;
972     }
973     case i_CAS2:
974     {
975     int Dc1, Dc2, Du1, Du2, Rn1, Rn2;
976     ext = comp_get_iword((m68k_pc_offset+=2)-2);
977     Rn1 = (ext >> 12) & 15;
978     Du1 = (ext >> 6) & 7;
979     Dc1 = ext & 7;
980     ru_set_read(ru, Rn1);
981     ru_set_read(ru, Du1);
982     ru_set_read(ru, Dc1);
983     ru_set_write(ru, Dc1);
984     ext = comp_get_iword((m68k_pc_offset+=2)-2);
985     Rn2 = (ext >> 12) & 15;
986     Du2 = (ext >> 6) & 7;
987     Dc2 = ext & 7;
988     ru_set_read(ru, Rn2);
989     ru_set_read(ru, Du2);
990     ru_set_write(ru, Dc2);
991     break;
992     }
993     case i_DIVL: case i_MULL:
994     m68k_pc_offset += 2;
995     break;
996     case i_LEA:
997     case i_MOVE: case i_MOVEA: case i_MOVE16:
998     rw_dest = false;
999     break;
1000     case i_PACK: case i_UNPK:
1001     rw_dest = false;
1002     m68k_pc_offset += 2;
1003     break;
1004     case i_TRAPcc:
1005     m68k_pc_offset += (dp->size == sz_long) ? 4 : 2;
1006     break;
1007     case i_RTR:
1008     /* do nothing, just for coverage debugging */
1009     break;
1010     /* TODO: handle EXG instruction */
1011     }
1012    
1013     /* Handle A-Traps better */
1014     if ((real_opcode & 0xf000) == 0xa000) {
1015     handled = true;
1016     }
1017    
1018     /* Handle EmulOps better */
1019     if ((real_opcode & 0xff00) == 0x7100) {
1020     handled = true;
1021     ru->rmask = 0xffff;
1022     ru->wmask = 0;
1023     }
1024    
1025     if (dp->suse && !handled)
1026     ru_fill_ea(ru, dp->sreg, (amodes)dp->smode, (wordsizes)dp->size, 0);
1027    
1028     if (dp->duse && !handled)
1029     ru_fill_ea(ru, dp->dreg, (amodes)dp->dmode, (wordsizes)dp->size, 1);
1030    
1031     if (rw_dest)
1032     ru->rmask |= ru->wmask;
1033    
1034     handled = handled || dp->suse || dp->duse;
1035    
1036     /* Mark all registers as used/written if the instruction may trap */
1037     if (may_trap(opcode)) {
1038     handled = true;
1039     ru->rmask = 0xffff;
1040     ru->wmask = 0xffff;
1041     }
1042    
1043     if (!handled) {
1044     write_log("ru_fill: %04x = { %04x, %04x }\n",
1045     real_opcode, ru->rmask, ru->wmask);
1046     abort();
1047     }
1048     }
1049    
1050     /********************************************************************
1051 gbeauche 1.1 * register allocation per block logging *
1052     ********************************************************************/
1053    
1054     static uae_s8 vstate[VREGS];
1055     static uae_s8 vwritten[VREGS];
1056     static uae_s8 nstate[N_REGS];
1057    
1058     #define L_UNKNOWN -127
1059     #define L_UNAVAIL -1
1060     #define L_NEEDED -2
1061     #define L_UNNEEDED -3
1062    
1063     static __inline__ void big_to_small_state(bigstate * b, smallstate * s)
1064     {
1065     int i;
1066    
1067     for (i = 0; i < VREGS; i++)
1068     s->virt[i] = vstate[i];
1069     for (i = 0; i < N_REGS; i++)
1070     s->nat[i] = nstate[i];
1071     }
1072    
1073     static __inline__ int callers_need_recompile(bigstate * b, smallstate * s)
1074     {
1075     int i;
1076     int reverse = 0;
1077    
1078     for (i = 0; i < VREGS; i++) {
1079     if (vstate[i] != L_UNNEEDED && s->virt[i] == L_UNNEEDED)
1080     return 1;
1081     if (vstate[i] == L_UNNEEDED && s->virt[i] != L_UNNEEDED)
1082     reverse++;
1083     }
1084     for (i = 0; i < N_REGS; i++) {
1085     if (nstate[i] >= 0 && nstate[i] != s->nat[i])
1086     return 1;
1087     if (nstate[i] < 0 && s->nat[i] >= 0)
1088     reverse++;
1089     }
1090     if (reverse >= 2 && USE_MATCH)
1091     return 1; /* In this case, it might be worth recompiling the
1092     * callers */
1093     return 0;
1094     }
1095    
1096     static __inline__ void log_startblock(void)
1097     {
1098     int i;
1099    
1100     for (i = 0; i < VREGS; i++) {
1101     vstate[i] = L_UNKNOWN;
1102     vwritten[i] = 0;
1103     }
1104     for (i = 0; i < N_REGS; i++)
1105     nstate[i] = L_UNKNOWN;
1106     }
1107    
1108     /* Using an n-reg for a temp variable */
1109     static __inline__ void log_isused(int n)
1110     {
1111     if (nstate[n] == L_UNKNOWN)
1112     nstate[n] = L_UNAVAIL;
1113     }
1114    
1115     static __inline__ void log_visused(int r)
1116     {
1117     if (vstate[r] == L_UNKNOWN)
1118     vstate[r] = L_NEEDED;
1119     }
1120    
1121     static __inline__ void do_load_reg(int n, int r)
1122     {
1123     if (r == FLAGTMP)
1124     raw_load_flagreg(n, r);
1125     else if (r == FLAGX)
1126     raw_load_flagx(n, r);
1127     else
1128     raw_mov_l_rm(n, (uae_u32) live.state[r].mem);
1129     }
1130    
1131     static __inline__ void check_load_reg(int n, int r)
1132     {
1133     raw_mov_l_rm(n, (uae_u32) live.state[r].mem);
1134     }
1135    
1136     static __inline__ void log_vwrite(int r)
1137     {
1138     vwritten[r] = 1;
1139     }
1140    
1141     /* Using an n-reg to hold a v-reg */
1142     static __inline__ void log_isreg(int n, int r)
1143     {
1144     static int count = 0;
1145    
1146     if (nstate[n] == L_UNKNOWN && r < 16 && !vwritten[r] && USE_MATCH)
1147     nstate[n] = r;
1148     else {
1149     do_load_reg(n, r);
1150     if (nstate[n] == L_UNKNOWN)
1151     nstate[n] = L_UNAVAIL;
1152     }
1153     if (vstate[r] == L_UNKNOWN)
1154     vstate[r] = L_NEEDED;
1155     }
1156    
1157     static __inline__ void log_clobberreg(int r)
1158     {
1159     if (vstate[r] == L_UNKNOWN)
1160     vstate[r] = L_UNNEEDED;
1161     }
1162    
1163     /* This ends all possibility of clever register allocation */
1164    
1165     static __inline__ void log_flush(void)
1166     {
1167     int i;
1168    
1169     for (i = 0; i < VREGS; i++)
1170     if (vstate[i] == L_UNKNOWN)
1171     vstate[i] = L_NEEDED;
1172     for (i = 0; i < N_REGS; i++)
1173     if (nstate[i] == L_UNKNOWN)
1174     nstate[i] = L_UNAVAIL;
1175     }
1176    
1177     static __inline__ void log_dump(void)
1178     {
1179     int i;
1180    
1181     return;
1182    
1183     write_log("----------------------\n");
1184     for (i = 0; i < N_REGS; i++) {
1185     switch (nstate[i]) {
1186     case L_UNKNOWN:
1187     write_log("Nat %d : UNKNOWN\n", i);
1188     break;
1189     case L_UNAVAIL:
1190     write_log("Nat %d : UNAVAIL\n", i);
1191     break;
1192     default:
1193     write_log("Nat %d : %d\n", i, nstate[i]);
1194     break;
1195     }
1196     }
1197     for (i = 0; i < VREGS; i++) {
1198     if (vstate[i] == L_UNNEEDED)
1199     write_log("Virt %d: UNNEEDED\n", i);
1200     }
1201     }
1202    
1203     /********************************************************************
1204     * register status handling. EMIT TIME! *
1205     ********************************************************************/
1206    
1207     static __inline__ void set_status(int r, int status)
1208     {
1209     if (status == ISCONST)
1210     log_clobberreg(r);
1211     live.state[r].status=status;
1212     }
1213    
1214     static __inline__ int isinreg(int r)
1215     {
1216     return live.state[r].status==CLEAN || live.state[r].status==DIRTY;
1217     }
1218    
1219     static __inline__ void adjust_nreg(int r, uae_u32 val)
1220     {
1221     if (!val)
1222     return;
1223     raw_lea_l_brr(r,r,val);
1224     }
1225    
1226     static void tomem(int r)
1227     {
1228     int rr=live.state[r].realreg;
1229    
1230     if (isinreg(r)) {
1231     if (live.state[r].val && live.nat[rr].nholds==1
1232     && !live.nat[rr].locked) {
1233     // write_log("RemovingA offset %x from reg %d (%d) at %p\n",
1234     // live.state[r].val,r,rr,target);
1235     adjust_nreg(rr,live.state[r].val);
1236     live.state[r].val=0;
1237     live.state[r].dirtysize=4;
1238     set_status(r,DIRTY);
1239     }
1240     }
1241    
1242     if (live.state[r].status==DIRTY) {
1243     switch (live.state[r].dirtysize) {
1244     case 1: raw_mov_b_mr((uae_u32)live.state[r].mem,rr); break;
1245     case 2: raw_mov_w_mr((uae_u32)live.state[r].mem,rr); break;
1246     case 4: raw_mov_l_mr((uae_u32)live.state[r].mem,rr); break;
1247     default: abort();
1248     }
1249     log_vwrite(r);
1250     set_status(r,CLEAN);
1251     live.state[r].dirtysize=0;
1252     }
1253     }
1254    
1255     static __inline__ int isconst(int r)
1256     {
1257     return live.state[r].status==ISCONST;
1258     }
1259    
1260     int is_const(int r)
1261     {
1262     return isconst(r);
1263     }
1264    
1265     static __inline__ void writeback_const(int r)
1266     {
1267     if (!isconst(r))
1268     return;
1269     Dif (live.state[r].needflush==NF_HANDLER) {
1270     write_log("Trying to write back constant NF_HANDLER!\n");
1271     abort();
1272     }
1273    
1274     raw_mov_l_mi((uae_u32)live.state[r].mem,live.state[r].val);
1275     log_vwrite(r);
1276     live.state[r].val=0;
1277     set_status(r,INMEM);
1278     }
1279    
1280     static __inline__ void tomem_c(int r)
1281     {
1282     if (isconst(r)) {
1283     writeback_const(r);
1284     }
1285     else
1286     tomem(r);
1287     }
1288    
1289     static void evict(int r)
1290     {
1291     int rr;
1292    
1293     if (!isinreg(r))
1294     return;
1295     tomem(r);
1296     rr=live.state[r].realreg;
1297    
1298     Dif (live.nat[rr].locked &&
1299     live.nat[rr].nholds==1) {
1300     write_log("register %d in nreg %d is locked!\n",r,live.state[r].realreg);
1301     abort();
1302     }
1303    
1304     live.nat[rr].nholds--;
1305     if (live.nat[rr].nholds!=live.state[r].realind) { /* Was not last */
1306     int topreg=live.nat[rr].holds[live.nat[rr].nholds];
1307     int thisind=live.state[r].realind;
1308    
1309     live.nat[rr].holds[thisind]=topreg;
1310     live.state[topreg].realind=thisind;
1311     }
1312     live.state[r].realreg=-1;
1313     set_status(r,INMEM);
1314     }
1315    
1316     static __inline__ void free_nreg(int r)
1317     {
1318     int i=live.nat[r].nholds;
1319    
1320     while (i) {
1321     int vr;
1322    
1323     --i;
1324     vr=live.nat[r].holds[i];
1325     evict(vr);
1326     }
1327     Dif (live.nat[r].nholds!=0) {
1328     write_log("Failed to free nreg %d, nholds is %d\n",r,live.nat[r].nholds);
1329     abort();
1330     }
1331     }
1332    
1333     /* Use with care! */
1334     static __inline__ void isclean(int r)
1335     {
1336     if (!isinreg(r))
1337     return;
1338     live.state[r].validsize=4;
1339     live.state[r].dirtysize=0;
1340     live.state[r].val=0;
1341     set_status(r,CLEAN);
1342     }
1343    
1344     static __inline__ void disassociate(int r)
1345     {
1346     isclean(r);
1347     evict(r);
1348     }
1349    
1350     static __inline__ void set_const(int r, uae_u32 val)
1351     {
1352     disassociate(r);
1353     live.state[r].val=val;
1354     set_status(r,ISCONST);
1355     }
1356    
1357     static __inline__ uae_u32 get_offset(int r)
1358     {
1359     return live.state[r].val;
1360     }
1361    
1362     static int alloc_reg_hinted(int r, int size, int willclobber, int hint)
1363     {
1364     int bestreg;
1365     uae_s32 when;
1366     int i;
1367     uae_s32 badness=0; /* to shut up gcc */
1368     bestreg=-1;
1369     when=2000000000;
1370    
1371     for (i=N_REGS;i--;) {
1372     badness=live.nat[i].touched;
1373     if (live.nat[i].nholds==0)
1374     badness=0;
1375     if (i==hint)
1376     badness-=200000000;
1377     if (!live.nat[i].locked && badness<when) {
1378     if ((size==1 && live.nat[i].canbyte) ||
1379     (size==2 && live.nat[i].canword) ||
1380     (size==4)) {
1381     bestreg=i;
1382     when=badness;
1383     if (live.nat[i].nholds==0 && hint<0)
1384     break;
1385     if (i==hint)
1386     break;
1387     }
1388     }
1389     }
1390     Dif (bestreg==-1)
1391     abort();
1392    
1393     if (live.nat[bestreg].nholds>0) {
1394     free_nreg(bestreg);
1395     }
1396     if (isinreg(r)) {
1397     int rr=live.state[r].realreg;
1398     /* This will happen if we read a partially dirty register at a
1399     bigger size */
1400     Dif (willclobber || live.state[r].validsize>=size)
1401     abort();
1402     Dif (live.nat[rr].nholds!=1)
1403     abort();
1404     if (size==4 && live.state[r].validsize==2) {
1405     log_isused(bestreg);
1406     log_visused(r);
1407     raw_mov_l_rm(bestreg,(uae_u32)live.state[r].mem);
1408     raw_bswap_32(bestreg);
1409     raw_zero_extend_16_rr(rr,rr);
1410     raw_zero_extend_16_rr(bestreg,bestreg);
1411     raw_bswap_32(bestreg);
1412     raw_lea_l_brr_indexed(rr,rr,bestreg,1,0);
1413     live.state[r].validsize=4;
1414     live.nat[rr].touched=touchcnt++;
1415     return rr;
1416     }
1417     if (live.state[r].validsize==1) {
1418     /* Nothing yet */
1419     }
1420     evict(r);
1421     }
1422    
1423     if (!willclobber) {
1424     if (live.state[r].status!=UNDEF) {
1425     if (isconst(r)) {
1426     raw_mov_l_ri(bestreg,live.state[r].val);
1427     live.state[r].val=0;
1428     live.state[r].dirtysize=4;
1429     set_status(r,DIRTY);
1430     log_isused(bestreg);
1431     }
1432     else {
1433     log_isreg(bestreg, r); /* This will also load it! */
1434     live.state[r].dirtysize=0;
1435     set_status(r,CLEAN);
1436     }
1437     }
1438     else {
1439     live.state[r].val=0;
1440     live.state[r].dirtysize=0;
1441     set_status(r,CLEAN);
1442     log_isused(bestreg);
1443     }
1444     live.state[r].validsize=4;
1445     }
1446     else { /* this is the easiest way, but not optimal. FIXME! */
1447     /* Now it's trickier, but hopefully still OK */
1448     if (!isconst(r) || size==4) {
1449     live.state[r].validsize=size;
1450     live.state[r].dirtysize=size;
1451     live.state[r].val=0;
1452     set_status(r,DIRTY);
1453     if (size == 4) {
1454     log_clobberreg(r);
1455     log_isused(bestreg);
1456     }
1457     else {
1458     log_visused(r);
1459     log_isused(bestreg);
1460     }
1461     }
1462     else {
1463     if (live.state[r].status!=UNDEF)
1464     raw_mov_l_ri(bestreg,live.state[r].val);
1465     live.state[r].val=0;
1466     live.state[r].validsize=4;
1467     live.state[r].dirtysize=4;
1468     set_status(r,DIRTY);
1469     log_isused(bestreg);
1470     }
1471     }
1472     live.state[r].realreg=bestreg;
1473     live.state[r].realind=live.nat[bestreg].nholds;
1474     live.nat[bestreg].touched=touchcnt++;
1475     live.nat[bestreg].holds[live.nat[bestreg].nholds]=r;
1476     live.nat[bestreg].nholds++;
1477    
1478     return bestreg;
1479     }
1480    
1481     static int alloc_reg(int r, int size, int willclobber)
1482     {
1483     return alloc_reg_hinted(r,size,willclobber,-1);
1484     }
1485    
1486     static void unlock2(int r)
1487     {
1488     Dif (!live.nat[r].locked)
1489     abort();
1490     live.nat[r].locked--;
1491     }
1492    
1493     static void setlock(int r)
1494     {
1495     live.nat[r].locked++;
1496     }
1497    
1498    
1499     static void mov_nregs(int d, int s)
1500     {
1501     int ns=live.nat[s].nholds;
1502     int nd=live.nat[d].nholds;
1503     int i;
1504    
1505     if (s==d)
1506     return;
1507    
1508     if (nd>0)
1509     free_nreg(d);
1510    
1511     log_isused(d);
1512     raw_mov_l_rr(d,s);
1513    
1514     for (i=0;i<live.nat[s].nholds;i++) {
1515     int vs=live.nat[s].holds[i];
1516    
1517     live.state[vs].realreg=d;
1518     live.state[vs].realind=i;
1519     live.nat[d].holds[i]=vs;
1520     }
1521     live.nat[d].nholds=live.nat[s].nholds;
1522    
1523     live.nat[s].nholds=0;
1524     }
1525    
1526    
1527     static __inline__ void make_exclusive(int r, int size, int spec)
1528     {
1529     int clobber;
1530     reg_status oldstate;
1531     int rr=live.state[r].realreg;
1532     int nr;
1533     int nind;
1534     int ndirt=0;
1535     int i;
1536    
1537     if (!isinreg(r))
1538     return;
1539     if (live.nat[rr].nholds==1)
1540     return;
1541     for (i=0;i<live.nat[rr].nholds;i++) {
1542     int vr=live.nat[rr].holds[i];
1543     if (vr!=r &&
1544     (live.state[vr].status==DIRTY || live.state[vr].val))
1545     ndirt++;
1546     }
1547     if (!ndirt && size<live.state[r].validsize && !live.nat[rr].locked) {
1548     /* Everything else is clean, so let's keep this register */
1549     for (i=0;i<live.nat[rr].nholds;i++) {
1550     int vr=live.nat[rr].holds[i];
1551     if (vr!=r) {
1552     evict(vr);
1553     i--; /* Try that index again! */
1554     }
1555     }
1556     Dif (live.nat[rr].nholds!=1) {
1557     write_log("natreg %d holds %d vregs, %d not exclusive\n",
1558     rr,live.nat[rr].nholds,r);
1559     abort();
1560     }
1561     return;
1562     }
1563    
1564     /* We have to split the register */
1565     oldstate=live.state[r];
1566    
1567     setlock(rr); /* Make sure this doesn't go away */
1568     /* Forget about r being in the register rr */
1569     disassociate(r);
1570     /* Get a new register, that we will clobber completely */
1571     if (oldstate.status==DIRTY) {
1572     /* If dirtysize is <4, we need a register that can handle the
1573     eventual smaller memory store! Thanks to Quake68k for exposing
1574     this detail ;-) */
1575     nr=alloc_reg_hinted(r,oldstate.dirtysize,1,spec);
1576     }
1577     else {
1578     nr=alloc_reg_hinted(r,4,1,spec);
1579     }
1580     nind=live.state[r].realind;
1581     live.state[r]=oldstate; /* Keep all the old state info */
1582     live.state[r].realreg=nr;
1583     live.state[r].realind=nind;
1584    
1585     if (size<live.state[r].validsize) {
1586     if (live.state[r].val) {
1587     /* Might as well compensate for the offset now */
1588     raw_lea_l_brr(nr,rr,oldstate.val);
1589     live.state[r].val=0;
1590     live.state[r].dirtysize=4;
1591     set_status(r,DIRTY);
1592     }
1593     else
1594     raw_mov_l_rr(nr,rr); /* Make another copy */
1595     }
1596     unlock2(rr);
1597     }
1598    
1599     static __inline__ void add_offset(int r, uae_u32 off)
1600     {
1601     live.state[r].val+=off;
1602     }
1603    
1604     static __inline__ void remove_offset(int r, int spec)
1605     {
1606     reg_status oldstate;
1607     int rr;
1608    
1609     if (isconst(r))
1610     return;
1611     if (live.state[r].val==0)
1612     return;
1613     if (isinreg(r) && live.state[r].validsize<4)
1614     evict(r);
1615    
1616     if (!isinreg(r))
1617     alloc_reg_hinted(r,4,0,spec);
1618    
1619     Dif (live.state[r].validsize!=4) {
1620     write_log("Validsize=%d in remove_offset\n",live.state[r].validsize);
1621     abort();
1622     }
1623     make_exclusive(r,0,-1);
1624     /* make_exclusive might have done the job already */
1625     if (live.state[r].val==0)
1626     return;
1627    
1628     rr=live.state[r].realreg;
1629    
1630     if (live.nat[rr].nholds==1) {
1631     //write_log("RemovingB offset %x from reg %d (%d) at %p\n",
1632     // live.state[r].val,r,rr,target);
1633     adjust_nreg(rr,live.state[r].val);
1634     live.state[r].dirtysize=4;
1635     live.state[r].val=0;
1636     set_status(r,DIRTY);
1637     return;
1638     }
1639     write_log("Failed in remove_offset\n");
1640     abort();
1641     }
1642    
1643     static __inline__ void remove_all_offsets(void)
1644     {
1645     int i;
1646    
1647     for (i=0;i<VREGS;i++)
1648     remove_offset(i,-1);
1649     }
1650    
1651     static __inline__ int readreg_general(int r, int size, int spec, int can_offset)
1652     {
1653     int n;
1654     int answer=-1;
1655    
1656     if (live.state[r].status==UNDEF) {
1657     write_log("WARNING: Unexpected read of undefined register %d\n",r);
1658     }
1659     if (!can_offset)
1660     remove_offset(r,spec);
1661    
1662     if (isinreg(r) && live.state[r].validsize>=size) {
1663     n=live.state[r].realreg;
1664     switch(size) {
1665     case 1:
1666     if (live.nat[n].canbyte || spec>=0) {
1667     answer=n;
1668     }
1669     break;
1670     case 2:
1671     if (live.nat[n].canword || spec>=0) {
1672     answer=n;
1673     }
1674     break;
1675     case 4:
1676     answer=n;
1677     break;
1678     default: abort();
1679     }
1680     if (answer<0)
1681     evict(r);
1682     }
1683     /* either the value was in memory to start with, or it was evicted and
1684     is in memory now */
1685     if (answer<0) {
1686     answer=alloc_reg_hinted(r,spec>=0?4:size,0,spec);
1687     }
1688    
1689     if (spec>=0 && spec!=answer) {
1690     /* Too bad */
1691     mov_nregs(spec,answer);
1692     answer=spec;
1693     }
1694     live.nat[answer].locked++;
1695     live.nat[answer].touched=touchcnt++;
1696     return answer;
1697     }
1698    
1699    
1700    
1701     static int readreg(int r, int size)
1702     {
1703     return readreg_general(r,size,-1,0);
1704     }
1705    
1706     static int readreg_specific(int r, int size, int spec)
1707     {
1708     return readreg_general(r,size,spec,0);
1709     }
1710    
1711     static int readreg_offset(int r, int size)
1712     {
1713     return readreg_general(r,size,-1,1);
1714     }
1715    
1716     /* writereg_general(r, size, spec)
1717     *
1718     * INPUT
1719     * - r : mid-layer register
1720     * - size : requested size (1/2/4)
1721     * - spec : -1 if find or make a register free, otherwise specifies
1722     * the physical register to use in any case
1723     *
1724     * OUTPUT
1725     * - hard (physical, x86 here) register allocated to virtual register r
1726     */
1727     static __inline__ int writereg_general(int r, int size, int spec)
1728     {
1729     int n;
1730     int answer=-1;
1731    
1732     if (size<4) {
1733     remove_offset(r,spec);
1734     }
1735    
1736     make_exclusive(r,size,spec);
1737     if (isinreg(r)) {
1738     int nvsize=size>live.state[r].validsize?size:live.state[r].validsize;
1739     int ndsize=size>live.state[r].dirtysize?size:live.state[r].dirtysize;
1740     n=live.state[r].realreg;
1741    
1742     Dif (live.nat[n].nholds!=1)
1743     abort();
1744     switch(size) {
1745     case 1:
1746     if (live.nat[n].canbyte || spec>=0) {
1747     live.state[r].dirtysize=ndsize;
1748     live.state[r].validsize=nvsize;
1749     answer=n;
1750     }
1751     break;
1752     case 2:
1753     if (live.nat[n].canword || spec>=0) {
1754     live.state[r].dirtysize=ndsize;
1755     live.state[r].validsize=nvsize;
1756     answer=n;
1757     }
1758     break;
1759     case 4:
1760     live.state[r].dirtysize=ndsize;
1761     live.state[r].validsize=nvsize;
1762     answer=n;
1763     break;
1764     default: abort();
1765     }
1766     if (answer<0)
1767     evict(r);
1768     }
1769     /* either the value was in memory to start with, or it was evicted and
1770     is in memory now */
1771     if (answer<0) {
1772     answer=alloc_reg_hinted(r,size,1,spec);
1773     }
1774     if (spec>=0 && spec!=answer) {
1775     mov_nregs(spec,answer);
1776     answer=spec;
1777     }
1778     if (live.state[r].status==UNDEF)
1779     live.state[r].validsize=4;
1780     live.state[r].dirtysize=size>live.state[r].dirtysize?size:live.state[r].dirtysize;
1781     live.state[r].validsize=size>live.state[r].validsize?size:live.state[r].validsize;
1782    
1783     live.nat[answer].locked++;
1784     live.nat[answer].touched=touchcnt++;
1785     if (size==4) {
1786     live.state[r].val=0;
1787     }
1788     else {
1789     Dif (live.state[r].val) {
1790     write_log("Problem with val\n");
1791     abort();
1792     }
1793     }
1794     set_status(r,DIRTY);
1795     return answer;
1796     }
1797    
1798     static int writereg(int r, int size)
1799     {
1800     return writereg_general(r,size,-1);
1801     }
1802    
1803     static int writereg_specific(int r, int size, int spec)
1804     {
1805     return writereg_general(r,size,spec);
1806     }
1807    
1808     static __inline__ int rmw_general(int r, int wsize, int rsize, int spec)
1809     {
1810     int n;
1811     int answer=-1;
1812    
1813     if (live.state[r].status==UNDEF) {
1814     write_log("WARNING: Unexpected read of undefined register %d\n",r);
1815     }
1816     remove_offset(r,spec);
1817     make_exclusive(r,0,spec);
1818    
1819     Dif (wsize<rsize) {
1820     write_log("Cannot handle wsize<rsize in rmw_general()\n");
1821     abort();
1822     }
1823     if (isinreg(r) && live.state[r].validsize>=rsize) {
1824     n=live.state[r].realreg;
1825     Dif (live.nat[n].nholds!=1)
1826     abort();
1827    
1828     switch(rsize) {
1829     case 1:
1830     if (live.nat[n].canbyte || spec>=0) {
1831     answer=n;
1832     }
1833     break;
1834     case 2:
1835     if (live.nat[n].canword || spec>=0) {
1836     answer=n;
1837     }
1838     break;
1839     case 4:
1840     answer=n;
1841     break;
1842     default: abort();
1843     }
1844     if (answer<0)
1845     evict(r);
1846     }
1847     /* either the value was in memory to start with, or it was evicted and
1848     is in memory now */
1849     if (answer<0) {
1850     answer=alloc_reg_hinted(r,spec>=0?4:rsize,0,spec);
1851     }
1852    
1853     if (spec>=0 && spec!=answer) {
1854     /* Too bad */
1855     mov_nregs(spec,answer);
1856     answer=spec;
1857     }
1858     if (wsize>live.state[r].dirtysize)
1859     live.state[r].dirtysize=wsize;
1860     if (wsize>live.state[r].validsize)
1861     live.state[r].validsize=wsize;
1862     set_status(r,DIRTY);
1863    
1864     live.nat[answer].locked++;
1865     live.nat[answer].touched=touchcnt++;
1866    
1867     Dif (live.state[r].val) {
1868     write_log("Problem with val(rmw)\n");
1869     abort();
1870     }
1871     return answer;
1872     }
1873    
1874     static int rmw(int r, int wsize, int rsize)
1875     {
1876     return rmw_general(r,wsize,rsize,-1);
1877     }
1878    
1879     static int rmw_specific(int r, int wsize, int rsize, int spec)
1880     {
1881     return rmw_general(r,wsize,rsize,spec);
1882     }
1883    
1884    
1885     /* needed for restoring the carry flag on non-P6 cores */
1886     static void bt_l_ri_noclobber(R4 r, IMM i)
1887     {
1888     int size=4;
1889     if (i<16)
1890     size=2;
1891     r=readreg(r,size);
1892     raw_bt_l_ri(r,i);
1893     unlock2(r);
1894     }
1895    
1896     /********************************************************************
1897     * FPU register status handling. EMIT TIME! *
1898     ********************************************************************/
1899    
1900     static void f_tomem(int r)
1901     {
1902     if (live.fate[r].status==DIRTY) {
1903     #if USE_LONG_DOUBLE
1904     raw_fmov_ext_mr((uae_u32)live.fate[r].mem,live.fate[r].realreg);
1905     #else
1906     raw_fmov_mr((uae_u32)live.fate[r].mem,live.fate[r].realreg);
1907     #endif
1908     live.fate[r].status=CLEAN;
1909     }
1910     }
1911    
1912     static void f_tomem_drop(int r)
1913     {
1914     if (live.fate[r].status==DIRTY) {
1915     #if USE_LONG_DOUBLE
1916     raw_fmov_ext_mr_drop((uae_u32)live.fate[r].mem,live.fate[r].realreg);
1917     #else
1918     raw_fmov_mr_drop((uae_u32)live.fate[r].mem,live.fate[r].realreg);
1919     #endif
1920     live.fate[r].status=INMEM;
1921     }
1922     }
1923    
1924    
1925     static __inline__ int f_isinreg(int r)
1926     {
1927     return live.fate[r].status==CLEAN || live.fate[r].status==DIRTY;
1928     }
1929    
1930     static void f_evict(int r)
1931     {
1932     int rr;
1933    
1934     if (!f_isinreg(r))
1935     return;
1936     rr=live.fate[r].realreg;
1937     if (live.fat[rr].nholds==1)
1938     f_tomem_drop(r);
1939     else
1940     f_tomem(r);
1941    
1942     Dif (live.fat[rr].locked &&
1943     live.fat[rr].nholds==1) {
1944     write_log("FPU register %d in nreg %d is locked!\n",r,live.fate[r].realreg);
1945     abort();
1946     }
1947    
1948     live.fat[rr].nholds--;
1949     if (live.fat[rr].nholds!=live.fate[r].realind) { /* Was not last */
1950     int topreg=live.fat[rr].holds[live.fat[rr].nholds];
1951     int thisind=live.fate[r].realind;
1952     live.fat[rr].holds[thisind]=topreg;
1953     live.fate[topreg].realind=thisind;
1954     }
1955     live.fate[r].status=INMEM;
1956     live.fate[r].realreg=-1;
1957     }
1958    
1959     static __inline__ void f_free_nreg(int r)
1960     {
1961     int i=live.fat[r].nholds;
1962    
1963     while (i) {
1964     int vr;
1965    
1966     --i;
1967     vr=live.fat[r].holds[i];
1968     f_evict(vr);
1969     }
1970     Dif (live.fat[r].nholds!=0) {
1971     write_log("Failed to free nreg %d, nholds is %d\n",r,live.fat[r].nholds);
1972     abort();
1973     }
1974     }
1975    
1976    
1977     /* Use with care! */
1978     static __inline__ void f_isclean(int r)
1979     {
1980     if (!f_isinreg(r))
1981     return;
1982     live.fate[r].status=CLEAN;
1983     }
1984    
1985     static __inline__ void f_disassociate(int r)
1986     {
1987     f_isclean(r);
1988     f_evict(r);
1989     }
1990    
1991    
1992    
1993     static int f_alloc_reg(int r, int willclobber)
1994     {
1995     int bestreg;
1996     uae_s32 when;
1997     int i;
1998     uae_s32 badness;
1999     bestreg=-1;
2000     when=2000000000;
2001     for (i=N_FREGS;i--;) {
2002     badness=live.fat[i].touched;
2003     if (live.fat[i].nholds==0)
2004     badness=0;
2005    
2006     if (!live.fat[i].locked && badness<when) {
2007     bestreg=i;
2008     when=badness;
2009     if (live.fat[i].nholds==0)
2010     break;
2011     }
2012     }
2013     Dif (bestreg==-1)
2014     abort();
2015    
2016     if (live.fat[bestreg].nholds>0) {
2017     f_free_nreg(bestreg);
2018     }
2019     if (f_isinreg(r)) {
2020     f_evict(r);
2021     }
2022    
2023     if (!willclobber) {
2024     if (live.fate[r].status!=UNDEF) {
2025     #if USE_LONG_DOUBLE
2026     raw_fmov_ext_rm(bestreg,(uae_u32)live.fate[r].mem);
2027     #else
2028     raw_fmov_rm(bestreg,(uae_u32)live.fate[r].mem);
2029     #endif
2030     }
2031     live.fate[r].status=CLEAN;
2032     }
2033     else {
2034     live.fate[r].status=DIRTY;
2035     }
2036     live.fate[r].realreg=bestreg;
2037     live.fate[r].realind=live.fat[bestreg].nholds;
2038     live.fat[bestreg].touched=touchcnt++;
2039     live.fat[bestreg].holds[live.fat[bestreg].nholds]=r;
2040     live.fat[bestreg].nholds++;
2041    
2042     return bestreg;
2043     }
2044    
2045     static void f_unlock(int r)
2046     {
2047     Dif (!live.fat[r].locked)
2048     abort();
2049     live.fat[r].locked--;
2050     }
2051    
2052     static void f_setlock(int r)
2053     {
2054     live.fat[r].locked++;
2055     }
2056    
2057     static __inline__ int f_readreg(int r)
2058     {
2059     int n;
2060     int answer=-1;
2061    
2062     if (f_isinreg(r)) {
2063     n=live.fate[r].realreg;
2064     answer=n;
2065     }
2066     /* either the value was in memory to start with, or it was evicted and
2067     is in memory now */
2068     if (answer<0)
2069     answer=f_alloc_reg(r,0);
2070    
2071     live.fat[answer].locked++;
2072     live.fat[answer].touched=touchcnt++;
2073     return answer;
2074     }
2075    
2076     static __inline__ void f_make_exclusive(int r, int clobber)
2077     {
2078     freg_status oldstate;
2079     int rr=live.fate[r].realreg;
2080     int nr;
2081     int nind;
2082     int ndirt=0;
2083     int i;
2084    
2085     if (!f_isinreg(r))
2086     return;
2087     if (live.fat[rr].nholds==1)
2088     return;
2089     for (i=0;i<live.fat[rr].nholds;i++) {
2090     int vr=live.fat[rr].holds[i];
2091     if (vr!=r && live.fate[vr].status==DIRTY)
2092     ndirt++;
2093     }
2094     if (!ndirt && !live.fat[rr].locked) {
2095     /* Everything else is clean, so let's keep this register */
2096     for (i=0;i<live.fat[rr].nholds;i++) {
2097     int vr=live.fat[rr].holds[i];
2098     if (vr!=r) {
2099     f_evict(vr);
2100     i--; /* Try that index again! */
2101     }
2102     }
2103     Dif (live.fat[rr].nholds!=1) {
2104     write_log("realreg %d holds %d (",rr,live.fat[rr].nholds);
2105     for (i=0;i<live.fat[rr].nholds;i++) {
2106     write_log(" %d(%d,%d)",live.fat[rr].holds[i],
2107     live.fate[live.fat[rr].holds[i]].realreg,
2108     live.fate[live.fat[rr].holds[i]].realind);
2109     }
2110     write_log("\n");
2111     abort();
2112     }
2113     return;
2114     }
2115    
2116     /* We have to split the register */
2117     oldstate=live.fate[r];
2118    
2119     f_setlock(rr); /* Make sure this doesn't go away */
2120     /* Forget about r being in the register rr */
2121     f_disassociate(r);
2122     /* Get a new register, that we will clobber completely */
2123     nr=f_alloc_reg(r,1);
2124     nind=live.fate[r].realind;
2125     if (!clobber)
2126     raw_fmov_rr(nr,rr); /* Make another copy */
2127     live.fate[r]=oldstate; /* Keep all the old state info */
2128     live.fate[r].realreg=nr;
2129     live.fate[r].realind=nind;
2130     f_unlock(rr);
2131     }
2132    
2133    
2134     static __inline__ int f_writereg(int r)
2135     {
2136     int n;
2137     int answer=-1;
2138    
2139     f_make_exclusive(r,1);
2140     if (f_isinreg(r)) {
2141     n=live.fate[r].realreg;
2142     answer=n;
2143     }
2144     if (answer<0) {
2145     answer=f_alloc_reg(r,1);
2146     }
2147     live.fate[r].status=DIRTY;
2148     live.fat[answer].locked++;
2149     live.fat[answer].touched=touchcnt++;
2150     return answer;
2151     }
2152    
2153     static int f_rmw(int r)
2154     {
2155     int n;
2156    
2157     f_make_exclusive(r,0);
2158     if (f_isinreg(r)) {
2159     n=live.fate[r].realreg;
2160     }
2161     else
2162     n=f_alloc_reg(r,0);
2163     live.fate[r].status=DIRTY;
2164     live.fat[n].locked++;
2165     live.fat[n].touched=touchcnt++;
2166     return n;
2167     }
2168    
2169     static void fflags_into_flags_internal(uae_u32 tmp)
2170     {
2171     int r;
2172    
2173     clobber_flags();
2174     r=f_readreg(FP_RESULT);
2175     if (FFLAG_NREG_CLOBBER_CONDITION) {
2176     int tmp2=tmp;
2177     tmp=writereg_specific(tmp,4,FFLAG_NREG);
2178     raw_fflags_into_flags(r);
2179     unlock2(tmp);
2180     forget_about(tmp2);
2181     }
2182     else
2183     raw_fflags_into_flags(r);
2184     f_unlock(r);
2185 gbeauche 1.19 live_flags();
2186 gbeauche 1.1 }
2187    
2188    
2189    
2190    
2191     /********************************************************************
2192     * CPU functions exposed to gencomp. Both CREATE and EMIT time *
2193     ********************************************************************/
2194    
2195     /*
2196     * RULES FOR HANDLING REGISTERS:
2197     *
2198     * * In the function headers, order the parameters
2199     * - 1st registers written to
2200     * - 2nd read/modify/write registers
2201     * - 3rd registers read from
2202     * * Before calling raw_*, you must call readreg, writereg or rmw for
2203     * each register
2204     * * The order for this is
2205     * - 1st call remove_offset for all registers written to with size<4
2206     * - 2nd call readreg for all registers read without offset
2207     * - 3rd call rmw for all rmw registers
2208     * - 4th call readreg_offset for all registers that can handle offsets
2209     * - 5th call get_offset for all the registers from the previous step
2210     * - 6th call writereg for all written-to registers
2211     * - 7th call raw_*
2212     * - 8th unlock2 all registers that were locked
2213     */
2214    
2215     MIDFUNC(0,live_flags,(void))
2216     {
2217     live.flags_on_stack=TRASH;
2218     live.flags_in_flags=VALID;
2219     live.flags_are_important=1;
2220     }
2221     MENDFUNC(0,live_flags,(void))
2222    
2223     MIDFUNC(0,dont_care_flags,(void))
2224     {
2225     live.flags_are_important=0;
2226     }
2227     MENDFUNC(0,dont_care_flags,(void))
2228    
2229    
2230     MIDFUNC(0,duplicate_carry,(void))
2231     {
2232     evict(FLAGX);
2233     make_flags_live_internal();
2234     COMPCALL(setcc_m)((uae_u32)live.state[FLAGX].mem,2);
2235     log_vwrite(FLAGX);
2236     }
2237     MENDFUNC(0,duplicate_carry,(void))
2238    
2239     MIDFUNC(0,restore_carry,(void))
2240     {
2241     if (!have_rat_stall) { /* Not a P6 core, i.e. no partial stalls */
2242     bt_l_ri_noclobber(FLAGX,0);
2243     }
2244     else { /* Avoid the stall the above creates.
2245     This is slow on non-P6, though.
2246     */
2247     COMPCALL(rol_b_ri(FLAGX,8));
2248     isclean(FLAGX);
2249     }
2250     }
2251     MENDFUNC(0,restore_carry,(void))
2252    
2253     MIDFUNC(0,start_needflags,(void))
2254     {
2255     needflags=1;
2256     }
2257     MENDFUNC(0,start_needflags,(void))
2258    
2259     MIDFUNC(0,end_needflags,(void))
2260     {
2261     needflags=0;
2262     }
2263     MENDFUNC(0,end_needflags,(void))
2264    
2265     MIDFUNC(0,make_flags_live,(void))
2266     {
2267     make_flags_live_internal();
2268     }
2269     MENDFUNC(0,make_flags_live,(void))
2270    
2271     MIDFUNC(1,fflags_into_flags,(W2 tmp))
2272     {
2273     clobber_flags();
2274     fflags_into_flags_internal(tmp);
2275     }
2276     MENDFUNC(1,fflags_into_flags,(W2 tmp))
2277    
2278    
2279     MIDFUNC(2,bt_l_ri,(R4 r, IMM i)) /* This is defined as only affecting C */
2280     {
2281     int size=4;
2282     if (i<16)
2283     size=2;
2284     CLOBBER_BT;
2285     r=readreg(r,size);
2286     raw_bt_l_ri(r,i);
2287     unlock2(r);
2288     }
2289     MENDFUNC(2,bt_l_ri,(R4 r, IMM i)) /* This is defined as only affecting C */
2290    
2291     MIDFUNC(2,bt_l_rr,(R4 r, R4 b)) /* This is defined as only affecting C */
2292     {
2293     CLOBBER_BT;
2294     r=readreg(r,4);
2295     b=readreg(b,4);
2296     raw_bt_l_rr(r,b);
2297     unlock2(r);
2298     unlock2(b);
2299     }
2300     MENDFUNC(2,bt_l_rr,(R4 r, R4 b)) /* This is defined as only affecting C */
2301    
2302     MIDFUNC(2,btc_l_ri,(RW4 r, IMM i))
2303     {
2304     int size=4;
2305     if (i<16)
2306     size=2;
2307     CLOBBER_BT;
2308     r=rmw(r,size,size);
2309     raw_btc_l_ri(r,i);
2310     unlock2(r);
2311     }
2312     MENDFUNC(2,btc_l_ri,(RW4 r, IMM i))
2313    
2314     MIDFUNC(2,btc_l_rr,(RW4 r, R4 b))
2315     {
2316     CLOBBER_BT;
2317     b=readreg(b,4);
2318     r=rmw(r,4,4);
2319     raw_btc_l_rr(r,b);
2320     unlock2(r);
2321     unlock2(b);
2322     }
2323     MENDFUNC(2,btc_l_rr,(RW4 r, R4 b))
2324    
2325    
2326     MIDFUNC(2,btr_l_ri,(RW4 r, IMM i))
2327     {
2328     int size=4;
2329     if (i<16)
2330     size=2;
2331     CLOBBER_BT;
2332     r=rmw(r,size,size);
2333     raw_btr_l_ri(r,i);
2334     unlock2(r);
2335     }
2336     MENDFUNC(2,btr_l_ri,(RW4 r, IMM i))
2337    
2338     MIDFUNC(2,btr_l_rr,(RW4 r, R4 b))
2339     {
2340     CLOBBER_BT;
2341     b=readreg(b,4);
2342     r=rmw(r,4,4);
2343     raw_btr_l_rr(r,b);
2344     unlock2(r);
2345     unlock2(b);
2346     }
2347     MENDFUNC(2,btr_l_rr,(RW4 r, R4 b))
2348    
2349    
2350     MIDFUNC(2,bts_l_ri,(RW4 r, IMM i))
2351     {
2352     int size=4;
2353     if (i<16)
2354     size=2;
2355     CLOBBER_BT;
2356     r=rmw(r,size,size);
2357     raw_bts_l_ri(r,i);
2358     unlock2(r);
2359     }
2360     MENDFUNC(2,bts_l_ri,(RW4 r, IMM i))
2361    
2362     MIDFUNC(2,bts_l_rr,(RW4 r, R4 b))
2363     {
2364     CLOBBER_BT;
2365     b=readreg(b,4);
2366     r=rmw(r,4,4);
2367     raw_bts_l_rr(r,b);
2368     unlock2(r);
2369     unlock2(b);
2370     }
2371     MENDFUNC(2,bts_l_rr,(RW4 r, R4 b))
2372    
2373     MIDFUNC(2,mov_l_rm,(W4 d, IMM s))
2374     {
2375     CLOBBER_MOV;
2376     d=writereg(d,4);
2377     raw_mov_l_rm(d,s);
2378     unlock2(d);
2379     }
2380     MENDFUNC(2,mov_l_rm,(W4 d, IMM s))
2381    
2382    
2383     MIDFUNC(1,call_r,(R4 r)) /* Clobbering is implicit */
2384     {
2385     r=readreg(r,4);
2386     raw_call_r(r);
2387     unlock2(r);
2388     }
2389     MENDFUNC(1,call_r,(R4 r)) /* Clobbering is implicit */
2390    
2391     MIDFUNC(2,sub_l_mi,(IMM d, IMM s))
2392     {
2393     CLOBBER_SUB;
2394     raw_sub_l_mi(d,s) ;
2395     }
2396     MENDFUNC(2,sub_l_mi,(IMM d, IMM s))
2397    
2398     MIDFUNC(2,mov_l_mi,(IMM d, IMM s))
2399     {
2400     CLOBBER_MOV;
2401     raw_mov_l_mi(d,s) ;
2402     }
2403     MENDFUNC(2,mov_l_mi,(IMM d, IMM s))
2404    
2405     MIDFUNC(2,mov_w_mi,(IMM d, IMM s))
2406     {
2407     CLOBBER_MOV;
2408     raw_mov_w_mi(d,s) ;
2409     }
2410     MENDFUNC(2,mov_w_mi,(IMM d, IMM s))
2411    
2412     MIDFUNC(2,mov_b_mi,(IMM d, IMM s))
2413     {
2414     CLOBBER_MOV;
2415     raw_mov_b_mi(d,s) ;
2416     }
2417     MENDFUNC(2,mov_b_mi,(IMM d, IMM s))
2418    
2419     MIDFUNC(2,rol_b_ri,(RW1 r, IMM i))
2420     {
2421     if (!i && !needflags)
2422     return;
2423     CLOBBER_ROL;
2424     r=rmw(r,1,1);
2425     raw_rol_b_ri(r,i);
2426     unlock2(r);
2427     }
2428     MENDFUNC(2,rol_b_ri,(RW1 r, IMM i))
2429    
2430     MIDFUNC(2,rol_w_ri,(RW2 r, IMM i))
2431     {
2432     if (!i && !needflags)
2433     return;
2434     CLOBBER_ROL;
2435     r=rmw(r,2,2);
2436     raw_rol_w_ri(r,i);
2437     unlock2(r);
2438     }
2439     MENDFUNC(2,rol_w_ri,(RW2 r, IMM i))
2440    
2441     MIDFUNC(2,rol_l_ri,(RW4 r, IMM i))
2442     {
2443     if (!i && !needflags)
2444     return;
2445     CLOBBER_ROL;
2446     r=rmw(r,4,4);
2447     raw_rol_l_ri(r,i);
2448     unlock2(r);
2449     }
2450     MENDFUNC(2,rol_l_ri,(RW4 r, IMM i))
2451    
2452     MIDFUNC(2,rol_l_rr,(RW4 d, R1 r))
2453     {
2454     if (isconst(r)) {
2455     COMPCALL(rol_l_ri)(d,(uae_u8)live.state[r].val);
2456     return;
2457     }
2458     CLOBBER_ROL;
2459     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2460     d=rmw(d,4,4);
2461     Dif (r!=1) {
2462     write_log("Illegal register %d in raw_rol_b\n",r);
2463     abort();
2464     }
2465     raw_rol_l_rr(d,r) ;
2466     unlock2(r);
2467     unlock2(d);
2468     }
2469     MENDFUNC(2,rol_l_rr,(RW4 d, R1 r))
2470    
2471     MIDFUNC(2,rol_w_rr,(RW2 d, R1 r))
2472     { /* Can only do this with r==1, i.e. cl */
2473    
2474     if (isconst(r)) {
2475     COMPCALL(rol_w_ri)(d,(uae_u8)live.state[r].val);
2476     return;
2477     }
2478     CLOBBER_ROL;
2479     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2480     d=rmw(d,2,2);
2481     Dif (r!=1) {
2482     write_log("Illegal register %d in raw_rol_b\n",r);
2483     abort();
2484     }
2485     raw_rol_w_rr(d,r) ;
2486     unlock2(r);
2487     unlock2(d);
2488     }
2489     MENDFUNC(2,rol_w_rr,(RW2 d, R1 r))
2490    
2491     MIDFUNC(2,rol_b_rr,(RW1 d, R1 r))
2492     { /* Can only do this with r==1, i.e. cl */
2493    
2494     if (isconst(r)) {
2495     COMPCALL(rol_b_ri)(d,(uae_u8)live.state[r].val);
2496     return;
2497     }
2498    
2499     CLOBBER_ROL;
2500     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2501     d=rmw(d,1,1);
2502     Dif (r!=1) {
2503     write_log("Illegal register %d in raw_rol_b\n",r);
2504     abort();
2505     }
2506     raw_rol_b_rr(d,r) ;
2507     unlock2(r);
2508     unlock2(d);
2509     }
2510     MENDFUNC(2,rol_b_rr,(RW1 d, R1 r))
2511    
2512    
2513     MIDFUNC(2,shll_l_rr,(RW4 d, R1 r))
2514     {
2515     if (isconst(r)) {
2516     COMPCALL(shll_l_ri)(d,(uae_u8)live.state[r].val);
2517     return;
2518     }
2519     CLOBBER_SHLL;
2520     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2521     d=rmw(d,4,4);
2522     Dif (r!=1) {
2523     write_log("Illegal register %d in raw_rol_b\n",r);
2524     abort();
2525     }
2526     raw_shll_l_rr(d,r) ;
2527     unlock2(r);
2528     unlock2(d);
2529     }
2530     MENDFUNC(2,shll_l_rr,(RW4 d, R1 r))
2531    
2532     MIDFUNC(2,shll_w_rr,(RW2 d, R1 r))
2533     { /* Can only do this with r==1, i.e. cl */
2534    
2535     if (isconst(r)) {
2536     COMPCALL(shll_w_ri)(d,(uae_u8)live.state[r].val);
2537     return;
2538     }
2539     CLOBBER_SHLL;
2540     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2541     d=rmw(d,2,2);
2542     Dif (r!=1) {
2543     write_log("Illegal register %d in raw_shll_b\n",r);
2544     abort();
2545     }
2546     raw_shll_w_rr(d,r) ;
2547     unlock2(r);
2548     unlock2(d);
2549     }
2550     MENDFUNC(2,shll_w_rr,(RW2 d, R1 r))
2551    
2552     MIDFUNC(2,shll_b_rr,(RW1 d, R1 r))
2553     { /* Can only do this with r==1, i.e. cl */
2554    
2555     if (isconst(r)) {
2556     COMPCALL(shll_b_ri)(d,(uae_u8)live.state[r].val);
2557     return;
2558     }
2559    
2560     CLOBBER_SHLL;
2561     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2562     d=rmw(d,1,1);
2563     Dif (r!=1) {
2564     write_log("Illegal register %d in raw_shll_b\n",r);
2565     abort();
2566     }
2567     raw_shll_b_rr(d,r) ;
2568     unlock2(r);
2569     unlock2(d);
2570     }
2571     MENDFUNC(2,shll_b_rr,(RW1 d, R1 r))
2572    
2573    
2574     MIDFUNC(2,ror_b_ri,(R1 r, IMM i))
2575     {
2576     if (!i && !needflags)
2577     return;
2578     CLOBBER_ROR;
2579     r=rmw(r,1,1);
2580     raw_ror_b_ri(r,i);
2581     unlock2(r);
2582     }
2583     MENDFUNC(2,ror_b_ri,(R1 r, IMM i))
2584    
2585     MIDFUNC(2,ror_w_ri,(R2 r, IMM i))
2586     {
2587     if (!i && !needflags)
2588     return;
2589     CLOBBER_ROR;
2590     r=rmw(r,2,2);
2591     raw_ror_w_ri(r,i);
2592     unlock2(r);
2593     }
2594     MENDFUNC(2,ror_w_ri,(R2 r, IMM i))
2595    
2596     MIDFUNC(2,ror_l_ri,(R4 r, IMM i))
2597     {
2598     if (!i && !needflags)
2599     return;
2600     CLOBBER_ROR;
2601     r=rmw(r,4,4);
2602     raw_ror_l_ri(r,i);
2603     unlock2(r);
2604     }
2605     MENDFUNC(2,ror_l_ri,(R4 r, IMM i))
2606    
2607     MIDFUNC(2,ror_l_rr,(R4 d, R1 r))
2608     {
2609     if (isconst(r)) {
2610     COMPCALL(ror_l_ri)(d,(uae_u8)live.state[r].val);
2611     return;
2612     }
2613     CLOBBER_ROR;
2614     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2615     d=rmw(d,4,4);
2616     raw_ror_l_rr(d,r) ;
2617     unlock2(r);
2618     unlock2(d);
2619     }
2620     MENDFUNC(2,ror_l_rr,(R4 d, R1 r))
2621    
2622     MIDFUNC(2,ror_w_rr,(R2 d, R1 r))
2623     {
2624     if (isconst(r)) {
2625     COMPCALL(ror_w_ri)(d,(uae_u8)live.state[r].val);
2626     return;
2627     }
2628     CLOBBER_ROR;
2629     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2630     d=rmw(d,2,2);
2631     raw_ror_w_rr(d,r) ;
2632     unlock2(r);
2633     unlock2(d);
2634     }
2635     MENDFUNC(2,ror_w_rr,(R2 d, R1 r))
2636    
2637     MIDFUNC(2,ror_b_rr,(R1 d, R1 r))
2638     {
2639     if (isconst(r)) {
2640     COMPCALL(ror_b_ri)(d,(uae_u8)live.state[r].val);
2641     return;
2642     }
2643    
2644     CLOBBER_ROR;
2645     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2646     d=rmw(d,1,1);
2647     raw_ror_b_rr(d,r) ;
2648     unlock2(r);
2649     unlock2(d);
2650     }
2651     MENDFUNC(2,ror_b_rr,(R1 d, R1 r))
2652    
2653     MIDFUNC(2,shrl_l_rr,(RW4 d, R1 r))
2654     {
2655     if (isconst(r)) {
2656     COMPCALL(shrl_l_ri)(d,(uae_u8)live.state[r].val);
2657     return;
2658     }
2659     CLOBBER_SHRL;
2660     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2661     d=rmw(d,4,4);
2662     Dif (r!=1) {
2663     write_log("Illegal register %d in raw_rol_b\n",r);
2664     abort();
2665     }
2666     raw_shrl_l_rr(d,r) ;
2667     unlock2(r);
2668     unlock2(d);
2669     }
2670     MENDFUNC(2,shrl_l_rr,(RW4 d, R1 r))
2671    
2672     MIDFUNC(2,shrl_w_rr,(RW2 d, R1 r))
2673     { /* Can only do this with r==1, i.e. cl */
2674    
2675     if (isconst(r)) {
2676     COMPCALL(shrl_w_ri)(d,(uae_u8)live.state[r].val);
2677     return;
2678     }
2679     CLOBBER_SHRL;
2680     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2681     d=rmw(d,2,2);
2682     Dif (r!=1) {
2683     write_log("Illegal register %d in raw_shrl_b\n",r);
2684     abort();
2685     }
2686     raw_shrl_w_rr(d,r) ;
2687     unlock2(r);
2688     unlock2(d);
2689     }
2690     MENDFUNC(2,shrl_w_rr,(RW2 d, R1 r))
2691    
2692     MIDFUNC(2,shrl_b_rr,(RW1 d, R1 r))
2693     { /* Can only do this with r==1, i.e. cl */
2694    
2695     if (isconst(r)) {
2696     COMPCALL(shrl_b_ri)(d,(uae_u8)live.state[r].val);
2697     return;
2698     }
2699    
2700     CLOBBER_SHRL;
2701     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2702     d=rmw(d,1,1);
2703     Dif (r!=1) {
2704     write_log("Illegal register %d in raw_shrl_b\n",r);
2705     abort();
2706     }
2707     raw_shrl_b_rr(d,r) ;
2708     unlock2(r);
2709     unlock2(d);
2710     }
2711     MENDFUNC(2,shrl_b_rr,(RW1 d, R1 r))
2712    
2713    
2714    
2715     MIDFUNC(2,shll_l_ri,(RW4 r, IMM i))
2716     {
2717     if (!i && !needflags)
2718     return;
2719     if (isconst(r) && !needflags) {
2720     live.state[r].val<<=i;
2721     return;
2722     }
2723     CLOBBER_SHLL;
2724     r=rmw(r,4,4);
2725     raw_shll_l_ri(r,i);
2726     unlock2(r);
2727     }
2728     MENDFUNC(2,shll_l_ri,(RW4 r, IMM i))
2729    
2730     MIDFUNC(2,shll_w_ri,(RW2 r, IMM i))
2731     {
2732     if (!i && !needflags)
2733     return;
2734     CLOBBER_SHLL;
2735     r=rmw(r,2,2);
2736     raw_shll_w_ri(r,i);
2737     unlock2(r);
2738     }
2739     MENDFUNC(2,shll_w_ri,(RW2 r, IMM i))
2740    
2741     MIDFUNC(2,shll_b_ri,(RW1 r, IMM i))
2742     {
2743     if (!i && !needflags)
2744     return;
2745     CLOBBER_SHLL;
2746     r=rmw(r,1,1);
2747     raw_shll_b_ri(r,i);
2748     unlock2(r);
2749     }
2750     MENDFUNC(2,shll_b_ri,(RW1 r, IMM i))
2751    
2752     MIDFUNC(2,shrl_l_ri,(RW4 r, IMM i))
2753     {
2754     if (!i && !needflags)
2755     return;
2756     if (isconst(r) && !needflags) {
2757     live.state[r].val>>=i;
2758     return;
2759     }
2760     CLOBBER_SHRL;
2761     r=rmw(r,4,4);
2762     raw_shrl_l_ri(r,i);
2763     unlock2(r);
2764     }
2765     MENDFUNC(2,shrl_l_ri,(RW4 r, IMM i))
2766    
2767     MIDFUNC(2,shrl_w_ri,(RW2 r, IMM i))
2768     {
2769     if (!i && !needflags)
2770     return;
2771     CLOBBER_SHRL;
2772     r=rmw(r,2,2);
2773     raw_shrl_w_ri(r,i);
2774     unlock2(r);
2775     }
2776     MENDFUNC(2,shrl_w_ri,(RW2 r, IMM i))
2777    
2778     MIDFUNC(2,shrl_b_ri,(RW1 r, IMM i))
2779     {
2780     if (!i && !needflags)
2781     return;
2782     CLOBBER_SHRL;
2783     r=rmw(r,1,1);
2784     raw_shrl_b_ri(r,i);
2785     unlock2(r);
2786     }
2787     MENDFUNC(2,shrl_b_ri,(RW1 r, IMM i))
2788    
2789     MIDFUNC(2,shra_l_ri,(RW4 r, IMM i))
2790     {
2791     if (!i && !needflags)
2792     return;
2793     CLOBBER_SHRA;
2794     r=rmw(r,4,4);
2795     raw_shra_l_ri(r,i);
2796     unlock2(r);
2797     }
2798     MENDFUNC(2,shra_l_ri,(RW4 r, IMM i))
2799    
2800     MIDFUNC(2,shra_w_ri,(RW2 r, IMM i))
2801     {
2802     if (!i && !needflags)
2803     return;
2804     CLOBBER_SHRA;
2805     r=rmw(r,2,2);
2806     raw_shra_w_ri(r,i);
2807     unlock2(r);
2808     }
2809     MENDFUNC(2,shra_w_ri,(RW2 r, IMM i))
2810    
2811     MIDFUNC(2,shra_b_ri,(RW1 r, IMM i))
2812     {
2813     if (!i && !needflags)
2814     return;
2815     CLOBBER_SHRA;
2816     r=rmw(r,1,1);
2817     raw_shra_b_ri(r,i);
2818     unlock2(r);
2819     }
2820     MENDFUNC(2,shra_b_ri,(RW1 r, IMM i))
2821    
2822     MIDFUNC(2,shra_l_rr,(RW4 d, R1 r))
2823     {
2824     if (isconst(r)) {
2825     COMPCALL(shra_l_ri)(d,(uae_u8)live.state[r].val);
2826     return;
2827     }
2828     CLOBBER_SHRA;
2829     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2830     d=rmw(d,4,4);
2831     Dif (r!=1) {
2832     write_log("Illegal register %d in raw_rol_b\n",r);
2833     abort();
2834     }
2835     raw_shra_l_rr(d,r) ;
2836     unlock2(r);
2837     unlock2(d);
2838     }
2839     MENDFUNC(2,shra_l_rr,(RW4 d, R1 r))
2840    
2841     MIDFUNC(2,shra_w_rr,(RW2 d, R1 r))
2842     { /* Can only do this with r==1, i.e. cl */
2843    
2844     if (isconst(r)) {
2845     COMPCALL(shra_w_ri)(d,(uae_u8)live.state[r].val);
2846     return;
2847     }
2848     CLOBBER_SHRA;
2849     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2850     d=rmw(d,2,2);
2851     Dif (r!=1) {
2852     write_log("Illegal register %d in raw_shra_b\n",r);
2853     abort();
2854     }
2855     raw_shra_w_rr(d,r) ;
2856     unlock2(r);
2857     unlock2(d);
2858     }
2859     MENDFUNC(2,shra_w_rr,(RW2 d, R1 r))
2860    
2861     MIDFUNC(2,shra_b_rr,(RW1 d, R1 r))
2862     { /* Can only do this with r==1, i.e. cl */
2863    
2864     if (isconst(r)) {
2865     COMPCALL(shra_b_ri)(d,(uae_u8)live.state[r].val);
2866     return;
2867     }
2868    
2869     CLOBBER_SHRA;
2870     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2871     d=rmw(d,1,1);
2872     Dif (r!=1) {
2873     write_log("Illegal register %d in raw_shra_b\n",r);
2874     abort();
2875     }
2876     raw_shra_b_rr(d,r) ;
2877     unlock2(r);
2878     unlock2(d);
2879     }
2880     MENDFUNC(2,shra_b_rr,(RW1 d, R1 r))
2881    
2882    
2883     MIDFUNC(2,setcc,(W1 d, IMM cc))
2884     {
2885     CLOBBER_SETCC;
2886     d=writereg(d,1);
2887     raw_setcc(d,cc);
2888     unlock2(d);
2889     }
2890     MENDFUNC(2,setcc,(W1 d, IMM cc))
2891    
2892     MIDFUNC(2,setcc_m,(IMM d, IMM cc))
2893     {
2894     CLOBBER_SETCC;
2895     raw_setcc_m(d,cc);
2896     }
2897     MENDFUNC(2,setcc_m,(IMM d, IMM cc))
2898    
2899     MIDFUNC(3,cmov_l_rr,(RW4 d, R4 s, IMM cc))
2900     {
2901     if (d==s)
2902     return;
2903     CLOBBER_CMOV;
2904     s=readreg(s,4);
2905     d=rmw(d,4,4);
2906     raw_cmov_l_rr(d,s,cc);
2907     unlock2(s);
2908     unlock2(d);
2909     }
2910     MENDFUNC(3,cmov_l_rr,(RW4 d, R4 s, IMM cc))
2911    
2912     MIDFUNC(3,cmov_l_rm,(RW4 d, IMM s, IMM cc))
2913     {
2914     CLOBBER_CMOV;
2915     d=rmw(d,4,4);
2916     raw_cmov_l_rm(d,s,cc);
2917     unlock2(d);
2918     }
2919     MENDFUNC(3,cmov_l_rm,(RW4 d, IMM s, IMM cc))
2920    
2921 gbeauche 1.15 MIDFUNC(1,setzflg_l,(RW4 r))
2922 gbeauche 1.1 {
2923 gbeauche 1.15 if (setzflg_uses_bsf) {
2924     CLOBBER_BSF;
2925     r=rmw(r,4,4);
2926     raw_bsf_l_rr(r,r);
2927     unlock2(r);
2928     }
2929     else {
2930 gbeauche 1.16 Dif (live.flags_in_flags!=VALID) {
2931     write_log("setzflg() wanted flags in native flags, they are %d\n",
2932     live.flags_in_flags);
2933     abort();
2934     }
2935     r=readreg(r,4);
2936     int f=writereg(S11,4);
2937     int t=writereg(S12,4);
2938     raw_flags_set_zero(f,r,t);
2939     unlock2(f);
2940     unlock2(r);
2941     unlock2(t);
2942 gbeauche 1.15 }
2943 gbeauche 1.1 }
2944 gbeauche 1.15 MENDFUNC(1,setzflg_l,(RW4 r))
2945 gbeauche 1.1
2946     MIDFUNC(2,imul_32_32,(RW4 d, R4 s))
2947     {
2948     CLOBBER_MUL;
2949     s=readreg(s,4);
2950     d=rmw(d,4,4);
2951     raw_imul_32_32(d,s);
2952     unlock2(s);
2953     unlock2(d);
2954     }
2955     MENDFUNC(2,imul_32_32,(RW4 d, R4 s))
2956    
2957     MIDFUNC(2,imul_64_32,(RW4 d, RW4 s))
2958     {
2959     CLOBBER_MUL;
2960     s=rmw_specific(s,4,4,MUL_NREG2);
2961     d=rmw_specific(d,4,4,MUL_NREG1);
2962     raw_imul_64_32(d,s);
2963     unlock2(s);
2964     unlock2(d);
2965     }
2966     MENDFUNC(2,imul_64_32,(RW4 d, RW4 s))
2967    
2968     MIDFUNC(2,mul_64_32,(RW4 d, RW4 s))
2969     {
2970     CLOBBER_MUL;
2971     s=rmw_specific(s,4,4,MUL_NREG2);
2972     d=rmw_specific(d,4,4,MUL_NREG1);
2973     raw_mul_64_32(d,s);
2974     unlock2(s);
2975     unlock2(d);
2976     }
2977     MENDFUNC(2,mul_64_32,(RW4 d, RW4 s))
2978    
2979     MIDFUNC(2,mul_32_32,(RW4 d, R4 s))
2980     {
2981     CLOBBER_MUL;
2982     s=readreg(s,4);
2983     d=rmw(d,4,4);
2984     raw_mul_32_32(d,s);
2985     unlock2(s);
2986     unlock2(d);
2987     }
2988     MENDFUNC(2,mul_32_32,(RW4 d, R4 s))
2989    
2990     MIDFUNC(2,sign_extend_16_rr,(W4 d, R2 s))
2991     {
2992     int isrmw;
2993    
2994     if (isconst(s)) {
2995     set_const(d,(uae_s32)(uae_s16)live.state[s].val);
2996     return;
2997     }
2998    
2999     CLOBBER_SE16;
3000     isrmw=(s==d);
3001     if (!isrmw) {
3002     s=readreg(s,2);
3003     d=writereg(d,4);
3004     }
3005     else { /* If we try to lock this twice, with different sizes, we
3006     are int trouble! */
3007     s=d=rmw(s,4,2);
3008     }
3009     raw_sign_extend_16_rr(d,s);
3010     if (!isrmw) {
3011     unlock2(d);
3012     unlock2(s);
3013     }
3014     else {
3015     unlock2(s);
3016     }
3017     }
3018     MENDFUNC(2,sign_extend_16_rr,(W4 d, R2 s))
3019    
3020     MIDFUNC(2,sign_extend_8_rr,(W4 d, R1 s))
3021     {
3022     int isrmw;
3023    
3024     if (isconst(s)) {
3025     set_const(d,(uae_s32)(uae_s8)live.state[s].val);
3026     return;
3027     }
3028    
3029     isrmw=(s==d);
3030     CLOBBER_SE8;
3031     if (!isrmw) {
3032     s=readreg(s,1);
3033     d=writereg(d,4);
3034     }
3035     else { /* If we try to lock this twice, with different sizes, we
3036     are int trouble! */
3037     s=d=rmw(s,4,1);
3038     }
3039    
3040     raw_sign_extend_8_rr(d,s);
3041    
3042     if (!isrmw) {
3043     unlock2(d);
3044     unlock2(s);
3045     }
3046     else {
3047     unlock2(s);
3048     }
3049     }
3050     MENDFUNC(2,sign_extend_8_rr,(W4 d, R1 s))
3051    
3052    
3053     MIDFUNC(2,zero_extend_16_rr,(W4 d, R2 s))
3054     {
3055     int isrmw;
3056    
3057     if (isconst(s)) {
3058     set_const(d,(uae_u32)(uae_u16)live.state[s].val);
3059     return;
3060     }
3061    
3062     isrmw=(s==d);
3063     CLOBBER_ZE16;
3064     if (!isrmw) {
3065     s=readreg(s,2);
3066     d=writereg(d,4);
3067     }
3068     else { /* If we try to lock this twice, with different sizes, we
3069     are int trouble! */
3070     s=d=rmw(s,4,2);
3071     }
3072     raw_zero_extend_16_rr(d,s);
3073     if (!isrmw) {
3074     unlock2(d);
3075     unlock2(s);
3076     }
3077     else {
3078     unlock2(s);
3079     }
3080     }
3081     MENDFUNC(2,zero_extend_16_rr,(W4 d, R2 s))
3082    
3083     MIDFUNC(2,zero_extend_8_rr,(W4 d, R1 s))
3084     {
3085     int isrmw;
3086     if (isconst(s)) {
3087     set_const(d,(uae_u32)(uae_u8)live.state[s].val);
3088     return;
3089     }
3090    
3091     isrmw=(s==d);
3092     CLOBBER_ZE8;
3093     if (!isrmw) {
3094     s=readreg(s,1);
3095     d=writereg(d,4);
3096     }
3097     else { /* If we try to lock this twice, with different sizes, we
3098     are int trouble! */
3099     s=d=rmw(s,4,1);
3100     }
3101    
3102     raw_zero_extend_8_rr(d,s);
3103    
3104     if (!isrmw) {
3105     unlock2(d);
3106     unlock2(s);
3107     }
3108     else {
3109     unlock2(s);
3110     }
3111     }
3112     MENDFUNC(2,zero_extend_8_rr,(W4 d, R1 s))
3113    
3114     MIDFUNC(2,mov_b_rr,(W1 d, R1 s))
3115     {
3116     if (d==s)
3117     return;
3118     if (isconst(s)) {
3119     COMPCALL(mov_b_ri)(d,(uae_u8)live.state[s].val);
3120     return;
3121     }
3122    
3123     CLOBBER_MOV;
3124     s=readreg(s,1);
3125     d=writereg(d,1);
3126     raw_mov_b_rr(d,s);
3127     unlock2(d);
3128     unlock2(s);
3129     }
3130     MENDFUNC(2,mov_b_rr,(W1 d, R1 s))
3131    
3132     MIDFUNC(2,mov_w_rr,(W2 d, R2 s))
3133     {
3134     if (d==s)
3135     return;
3136     if (isconst(s)) {
3137     COMPCALL(mov_w_ri)(d,(uae_u16)live.state[s].val);
3138     return;
3139     }
3140    
3141     CLOBBER_MOV;
3142     s=readreg(s,2);
3143     d=writereg(d,2);
3144     raw_mov_w_rr(d,s);
3145     unlock2(d);
3146     unlock2(s);
3147     }
3148     MENDFUNC(2,mov_w_rr,(W2 d, R2 s))
3149    
3150    
3151     MIDFUNC(4,mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
3152     {
3153     CLOBBER_MOV;
3154     baser=readreg(baser,4);
3155     index=readreg(index,4);
3156     d=writereg(d,4);
3157    
3158     raw_mov_l_rrm_indexed(d,baser,index,factor);
3159     unlock2(d);
3160     unlock2(baser);
3161     unlock2(index);
3162     }
3163     MENDFUNC(4,mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
3164    
3165     MIDFUNC(4,mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
3166     {
3167     CLOBBER_MOV;
3168     baser=readreg(baser,4);
3169     index=readreg(index,4);
3170     d=writereg(d,2);
3171    
3172     raw_mov_w_rrm_indexed(d,baser,index,factor);
3173     unlock2(d);
3174     unlock2(baser);
3175     unlock2(index);
3176     }
3177     MENDFUNC(4,mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
3178    
3179     MIDFUNC(4,mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
3180     {
3181     CLOBBER_MOV;
3182     baser=readreg(baser,4);
3183     index=readreg(index,4);
3184     d=writereg(d,1);
3185    
3186     raw_mov_b_rrm_indexed(d,baser,index,factor);
3187    
3188     unlock2(d);
3189     unlock2(baser);
3190     unlock2(index);
3191     }
3192     MENDFUNC(4,mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
3193    
3194    
3195     MIDFUNC(4,mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
3196     {
3197     CLOBBER_MOV;
3198     baser=readreg(baser,4);
3199     index=readreg(index,4);
3200     s=readreg(s,4);
3201    
3202     Dif (baser==s || index==s)
3203     abort();
3204    
3205    
3206     raw_mov_l_mrr_indexed(baser,index,factor,s);
3207     unlock2(s);
3208     unlock2(baser);
3209     unlock2(index);
3210     }
3211     MENDFUNC(4,mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
3212    
3213     MIDFUNC(4,mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
3214     {
3215     CLOBBER_MOV;
3216     baser=readreg(baser,4);
3217     index=readreg(index,4);
3218     s=readreg(s,2);
3219    
3220     raw_mov_w_mrr_indexed(baser,index,factor,s);
3221     unlock2(s);
3222     unlock2(baser);
3223     unlock2(index);
3224     }
3225     MENDFUNC(4,mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
3226    
3227     MIDFUNC(4,mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
3228     {
3229     CLOBBER_MOV;
3230     s=readreg(s,1);
3231     baser=readreg(baser,4);
3232     index=readreg(index,4);
3233    
3234     raw_mov_b_mrr_indexed(baser,index,factor,s);
3235     unlock2(s);
3236     unlock2(baser);
3237     unlock2(index);
3238     }
3239     MENDFUNC(4,mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
3240    
3241    
3242     MIDFUNC(5,mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
3243     {
3244     int basereg=baser;
3245     int indexreg=index;
3246    
3247     CLOBBER_MOV;
3248     s=readreg(s,4);
3249     baser=readreg_offset(baser,4);
3250     index=readreg_offset(index,4);
3251    
3252     base+=get_offset(basereg);
3253     base+=factor*get_offset(indexreg);
3254    
3255     raw_mov_l_bmrr_indexed(base,baser,index,factor,s);
3256     unlock2(s);
3257     unlock2(baser);
3258     unlock2(index);
3259     }
3260     MENDFUNC(5,mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
3261    
3262     MIDFUNC(5,mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
3263     {
3264     int basereg=baser;
3265     int indexreg=index;
3266    
3267     CLOBBER_MOV;
3268     s=readreg(s,2);
3269     baser=readreg_offset(baser,4);
3270     index=readreg_offset(index,4);
3271    
3272     base+=get_offset(basereg);
3273     base+=factor*get_offset(indexreg);
3274    
3275     raw_mov_w_bmrr_indexed(base,baser,index,factor,s);
3276     unlock2(s);
3277     unlock2(baser);
3278     unlock2(index);
3279     }
3280     MENDFUNC(5,mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
3281    
3282     MIDFUNC(5,mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
3283     {
3284     int basereg=baser;
3285     int indexreg=index;
3286    
3287     CLOBBER_MOV;
3288     s=readreg(s,1);
3289     baser=readreg_offset(baser,4);
3290     index=readreg_offset(index,4);
3291    
3292     base+=get_offset(basereg);
3293     base+=factor*get_offset(indexreg);
3294    
3295     raw_mov_b_bmrr_indexed(base,baser,index,factor,s);
3296     unlock2(s);
3297     unlock2(baser);
3298     unlock2(index);
3299     }
3300     MENDFUNC(5,mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
3301    
3302    
3303    
3304     /* Read a long from base+baser+factor*index */
3305     MIDFUNC(5,mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
3306     {
3307     int basereg=baser;
3308     int indexreg=index;
3309    
3310     CLOBBER_MOV;
3311     baser=readreg_offset(baser,4);
3312     index=readreg_offset(index,4);
3313     base+=get_offset(basereg);
3314     base+=factor*get_offset(indexreg);
3315     d=writereg(d,4);
3316     raw_mov_l_brrm_indexed(d,base,baser,index,factor);
3317     unlock2(d);
3318     unlock2(baser);
3319     unlock2(index);
3320     }
3321     MENDFUNC(5,mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
3322    
3323    
3324     MIDFUNC(5,mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
3325     {
3326     int basereg=baser;
3327     int indexreg=index;
3328    
3329     CLOBBER_MOV;
3330     remove_offset(d,-1);
3331     baser=readreg_offset(baser,4);
3332     index=readreg_offset(index,4);
3333     base+=get_offset(basereg);
3334     base+=factor*get_offset(indexreg);
3335     d=writereg(d,2);
3336     raw_mov_w_brrm_indexed(d,base,baser,index,factor);
3337     unlock2(d);
3338     unlock2(baser);
3339     unlock2(index);
3340     }
3341     MENDFUNC(5,mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
3342    
3343    
3344     MIDFUNC(5,mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
3345     {
3346     int basereg=baser;
3347     int indexreg=index;
3348    
3349     CLOBBER_MOV;
3350     remove_offset(d,-1);
3351     baser=readreg_offset(baser,4);
3352     index=readreg_offset(index,4);
3353     base+=get_offset(basereg);
3354     base+=factor*get_offset(indexreg);
3355     d=writereg(d,1);
3356     raw_mov_b_brrm_indexed(d,base,baser,index,factor);
3357     unlock2(d);
3358     unlock2(baser);
3359     unlock2(index);
3360     }
3361     MENDFUNC(5,mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
3362    
3363     /* Read a long from base+factor*index */
3364     MIDFUNC(4,mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
3365     {
3366     int indexreg=index;
3367    
3368     if (isconst(index)) {
3369     COMPCALL(mov_l_rm)(d,base+factor*live.state[index].val);
3370     return;
3371     }
3372    
3373     CLOBBER_MOV;
3374     index=readreg_offset(index,4);
3375     base+=get_offset(indexreg)*factor;
3376     d=writereg(d,4);
3377    
3378     raw_mov_l_rm_indexed(d,base,index,factor);
3379     unlock2(index);
3380     unlock2(d);
3381     }
3382     MENDFUNC(4,mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
3383    
3384    
3385     /* read the long at the address contained in s+offset and store in d */
3386     MIDFUNC(3,mov_l_rR,(W4 d, R4 s, IMM offset))
3387     {
3388     if (isconst(s)) {
3389     COMPCALL(mov_l_rm)(d,live.state[s].val+offset);
3390     return;
3391     }
3392     CLOBBER_MOV;
3393     s=readreg(s,4);
3394     d=writereg(d,4);
3395    
3396     raw_mov_l_rR(d,s,offset);
3397     unlock2(d);
3398     unlock2(s);
3399     }
3400     MENDFUNC(3,mov_l_rR,(W4 d, R4 s, IMM offset))
3401    
3402     /* read the word at the address contained in s+offset and store in d */
3403     MIDFUNC(3,mov_w_rR,(W2 d, R4 s, IMM offset))
3404     {
3405     if (isconst(s)) {
3406     COMPCALL(mov_w_rm)(d,live.state[s].val+offset);
3407     return;
3408     }
3409     CLOBBER_MOV;
3410     s=readreg(s,4);
3411     d=writereg(d,2);
3412    
3413     raw_mov_w_rR(d,s,offset);
3414     unlock2(d);
3415     unlock2(s);
3416     }
3417     MENDFUNC(3,mov_w_rR,(W2 d, R4 s, IMM offset))
3418    
3419     /* read the word at the address contained in s+offset and store in d */
3420     MIDFUNC(3,mov_b_rR,(W1 d, R4 s, IMM offset))
3421     {
3422     if (isconst(s)) {
3423     COMPCALL(mov_b_rm)(d,live.state[s].val+offset);
3424     return;
3425     }
3426     CLOBBER_MOV;
3427     s=readreg(s,4);
3428     d=writereg(d,1);
3429    
3430     raw_mov_b_rR(d,s,offset);
3431     unlock2(d);
3432     unlock2(s);
3433     }
3434     MENDFUNC(3,mov_b_rR,(W1 d, R4 s, IMM offset))
3435    
3436     /* read the long at the address contained in s+offset and store in d */
3437     MIDFUNC(3,mov_l_brR,(W4 d, R4 s, IMM offset))
3438     {
3439     int sreg=s;
3440     if (isconst(s)) {
3441     COMPCALL(mov_l_rm)(d,live.state[s].val+offset);
3442     return;
3443     }
3444     CLOBBER_MOV;
3445     s=readreg_offset(s,4);
3446     offset+=get_offset(sreg);
3447     d=writereg(d,4);
3448    
3449     raw_mov_l_brR(d,s,offset);
3450     unlock2(d);
3451     unlock2(s);
3452     }
3453     MENDFUNC(3,mov_l_brR,(W4 d, R4 s, IMM offset))
3454    
3455     /* read the word at the address contained in s+offset and store in d */
3456     MIDFUNC(3,mov_w_brR,(W2 d, R4 s, IMM offset))
3457     {
3458     int sreg=s;
3459     if (isconst(s)) {
3460     COMPCALL(mov_w_rm)(d,live.state[s].val+offset);
3461     return;
3462     }
3463     CLOBBER_MOV;
3464     remove_offset(d,-1);
3465     s=readreg_offset(s,4);
3466     offset+=get_offset(sreg);
3467     d=writereg(d,2);
3468    
3469     raw_mov_w_brR(d,s,offset);
3470     unlock2(d);
3471     unlock2(s);
3472     }
3473     MENDFUNC(3,mov_w_brR,(W2 d, R4 s, IMM offset))
3474    
3475     /* read the word at the address contained in s+offset and store in d */
3476     MIDFUNC(3,mov_b_brR,(W1 d, R4 s, IMM offset))
3477     {
3478     int sreg=s;
3479     if (isconst(s)) {
3480     COMPCALL(mov_b_rm)(d,live.state[s].val+offset);
3481     return;
3482     }
3483     CLOBBER_MOV;
3484     remove_offset(d,-1);
3485     s=readreg_offset(s,4);
3486     offset+=get_offset(sreg);
3487     d=writereg(d,1);
3488    
3489     raw_mov_b_brR(d,s,offset);
3490     unlock2(d);
3491     unlock2(s);
3492     }
3493     MENDFUNC(3,mov_b_brR,(W1 d, R4 s, IMM offset))
3494    
3495     MIDFUNC(3,mov_l_Ri,(R4 d, IMM i, IMM offset))
3496     {
3497     int dreg=d;
3498     if (isconst(d)) {
3499     COMPCALL(mov_l_mi)(live.state[d].val+offset,i);
3500     return;
3501     }
3502    
3503     CLOBBER_MOV;
3504     d=readreg_offset(d,4);
3505     offset+=get_offset(dreg);
3506     raw_mov_l_Ri(d,i,offset);
3507     unlock2(d);
3508     }
3509     MENDFUNC(3,mov_l_Ri,(R4 d, IMM i, IMM offset))
3510    
3511     MIDFUNC(3,mov_w_Ri,(R4 d, IMM i, IMM offset))
3512     {
3513     int dreg=d;
3514     if (isconst(d)) {
3515     COMPCALL(mov_w_mi)(live.state[d].val+offset,i);
3516     return;
3517     }
3518    
3519     CLOBBER_MOV;
3520     d=readreg_offset(d,4);
3521     offset+=get_offset(dreg);
3522     raw_mov_w_Ri(d,i,offset);
3523     unlock2(d);
3524     }
3525     MENDFUNC(3,mov_w_Ri,(R4 d, IMM i, IMM offset))
3526    
3527     MIDFUNC(3,mov_b_Ri,(R4 d, IMM i, IMM offset))
3528     {
3529     int dreg=d;
3530     if (isconst(d)) {
3531     COMPCALL(mov_b_mi)(live.state[d].val+offset,i);
3532     return;
3533     }
3534    
3535     CLOBBER_MOV;
3536     d=readreg_offset(d,4);
3537     offset+=get_offset(dreg);
3538     raw_mov_b_Ri(d,i,offset);
3539     unlock2(d);
3540     }
3541     MENDFUNC(3,mov_b_Ri,(R4 d, IMM i, IMM offset))
3542    
3543     /* Warning! OFFSET is byte sized only! */
3544     MIDFUNC(3,mov_l_Rr,(R4 d, R4 s, IMM offset))
3545     {
3546     if (isconst(d)) {
3547     COMPCALL(mov_l_mr)(live.state[d].val+offset,s);
3548     return;
3549     }
3550     if (isconst(s)) {
3551     COMPCALL(mov_l_Ri)(d,live.state[s].val,offset);
3552     return;
3553     }
3554    
3555     CLOBBER_MOV;
3556     s=readreg(s,4);
3557     d=readreg(d,4);
3558    
3559     raw_mov_l_Rr(d,s,offset);
3560     unlock2(d);
3561     unlock2(s);
3562     }
3563     MENDFUNC(3,mov_l_Rr,(R4 d, R4 s, IMM offset))
3564    
3565     MIDFUNC(3,mov_w_Rr,(R4 d, R2 s, IMM offset))
3566     {
3567     if (isconst(d)) {
3568     COMPCALL(mov_w_mr)(live.state[d].val+offset,s);
3569     return;
3570     }
3571     if (isconst(s)) {
3572     COMPCALL(mov_w_Ri)(d,(uae_u16)live.state[s].val,offset);
3573     return;
3574     }
3575    
3576     CLOBBER_MOV;
3577     s=readreg(s,2);
3578     d=readreg(d,4);
3579     raw_mov_w_Rr(d,s,offset);
3580     unlock2(d);
3581     unlock2(s);
3582     }
3583     MENDFUNC(3,mov_w_Rr,(R4 d, R2 s, IMM offset))
3584    
3585     MIDFUNC(3,mov_b_Rr,(R4 d, R1 s, IMM offset))
3586     {
3587     if (isconst(d)) {
3588     COMPCALL(mov_b_mr)(live.state[d].val+offset,s);
3589     return;
3590     }
3591     if (isconst(s)) {
3592     COMPCALL(mov_b_Ri)(d,(uae_u8)live.state[s].val,offset);
3593     return;
3594     }
3595    
3596     CLOBBER_MOV;
3597     s=readreg(s,1);
3598     d=readreg(d,4);
3599     raw_mov_b_Rr(d,s,offset);
3600     unlock2(d);
3601     unlock2(s);
3602     }
3603     MENDFUNC(3,mov_b_Rr,(R4 d, R1 s, IMM offset))
3604    
3605     MIDFUNC(3,lea_l_brr,(W4 d, R4 s, IMM offset))
3606     {
3607     if (isconst(s)) {
3608     COMPCALL(mov_l_ri)(d,live.state[s].val+offset);
3609     return;
3610     }
3611     #if USE_OFFSET
3612     if (d==s) {
3613     add_offset(d,offset);
3614     return;
3615     }
3616     #endif
3617     CLOBBER_LEA;
3618     s=readreg(s,4);
3619     d=writereg(d,4);
3620     raw_lea_l_brr(d,s,offset);
3621     unlock2(d);
3622     unlock2(s);
3623     }
3624     MENDFUNC(3,lea_l_brr,(W4 d, R4 s, IMM offset))
3625    
3626     MIDFUNC(5,lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
3627     {
3628     if (!offset) {
3629     COMPCALL(lea_l_rr_indexed)(d,s,index,factor);
3630     return;
3631     }
3632     CLOBBER_LEA;
3633     s=readreg(s,4);
3634     index=readreg(index,4);
3635     d=writereg(d,4);
3636    
3637     raw_lea_l_brr_indexed(d,s,index,factor,offset);
3638     unlock2(d);
3639     unlock2(index);
3640     unlock2(s);
3641     }
3642     MENDFUNC(5,lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
3643    
3644     MIDFUNC(4,lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
3645     {
3646     CLOBBER_LEA;
3647     s=readreg(s,4);
3648     index=readreg(index,4);
3649     d=writereg(d,4);
3650    
3651     raw_lea_l_rr_indexed(d,s,index,factor);
3652     unlock2(d);
3653     unlock2(index);
3654     unlock2(s);
3655     }
3656     MENDFUNC(4,lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
3657    
3658     /* write d to the long at the address contained in s+offset */
3659     MIDFUNC(3,mov_l_bRr,(R4 d, R4 s, IMM offset))
3660     {
3661     int dreg=d;
3662     if (isconst(d)) {
3663     COMPCALL(mov_l_mr)(live.state[d].val+offset,s);
3664     return;
3665     }
3666    
3667     CLOBBER_MOV;
3668     s=readreg(s,4);
3669     d=readreg_offset(d,4);
3670     offset+=get_offset(dreg);
3671    
3672     raw_mov_l_bRr(d,s,offset);
3673     unlock2(d);
3674     unlock2(s);
3675     }
3676     MENDFUNC(3,mov_l_bRr,(R4 d, R4 s, IMM offset))
3677    
3678     /* write the word at the address contained in s+offset and store in d */
3679     MIDFUNC(3,mov_w_bRr,(R4 d, R2 s, IMM offset))
3680     {
3681     int dreg=d;
3682    
3683     if (isconst(d)) {
3684     COMPCALL(mov_w_mr)(live.state[d].val+offset,s);
3685     return;
3686     }
3687    
3688     CLOBBER_MOV;
3689     s=readreg(s,2);
3690     d=readreg_offset(d,4);
3691     offset+=get_offset(dreg);
3692     raw_mov_w_bRr(d,s,offset);
3693     unlock2(d);
3694     unlock2(s);
3695     }
3696     MENDFUNC(3,mov_w_bRr,(R4 d, R2 s, IMM offset))
3697    
3698     MIDFUNC(3,mov_b_bRr,(R4 d, R1 s, IMM offset))
3699     {
3700     int dreg=d;
3701     if (isconst(d)) {
3702     COMPCALL(mov_b_mr)(live.state[d].val+offset,s);
3703     return;
3704     }
3705    
3706     CLOBBER_MOV;
3707     s=readreg(s,1);
3708     d=readreg_offset(d,4);
3709     offset+=get_offset(dreg);
3710     raw_mov_b_bRr(d,s,offset);
3711     unlock2(d);
3712     unlock2(s);
3713     }
3714     MENDFUNC(3,mov_b_bRr,(R4 d, R1 s, IMM offset))
3715    
3716     MIDFUNC(1,bswap_32,(RW4 r))
3717     {
3718     int reg=r;
3719    
3720     if (isconst(r)) {
3721     uae_u32 oldv=live.state[r].val;
3722     live.state[r].val=reverse32(oldv);
3723     return;
3724     }
3725    
3726     CLOBBER_SW32;
3727     r=rmw(r,4,4);
3728     raw_bswap_32(r);
3729     unlock2(r);
3730     }
3731     MENDFUNC(1,bswap_32,(RW4 r))
3732    
3733     MIDFUNC(1,bswap_16,(RW2 r))
3734     {
3735     if (isconst(r)) {
3736     uae_u32 oldv=live.state[r].val;
3737     live.state[r].val=((oldv>>8)&0xff) | ((oldv<<8)&0xff00) |
3738     (oldv&0xffff0000);
3739     return;
3740     }
3741    
3742     CLOBBER_SW16;
3743     r=rmw(r,2,2);
3744    
3745     raw_bswap_16(r);
3746     unlock2(r);
3747     }
3748     MENDFUNC(1,bswap_16,(RW2 r))
3749    
3750    
3751    
3752     MIDFUNC(2,mov_l_rr,(W4 d, R4 s))
3753     {
3754     int olds;
3755    
3756     if (d==s) { /* How pointless! */
3757     return;
3758     }
3759     if (isconst(s)) {
3760     COMPCALL(mov_l_ri)(d,live.state[s].val);
3761     return;
3762     }
3763     olds=s;
3764     disassociate(d);
3765     s=readreg_offset(s,4);
3766     live.state[d].realreg=s;
3767     live.state[d].realind=live.nat[s].nholds;
3768     live.state[d].val=live.state[olds].val;
3769     live.state[d].validsize=4;
3770     live.state[d].dirtysize=4;
3771     set_status(d,DIRTY);
3772    
3773     live.nat[s].holds[live.nat[s].nholds]=d;
3774     live.nat[s].nholds++;
3775     log_clobberreg(d);
3776     /* write_log("Added %d to nreg %d(%d), now holds %d regs\n",
3777     d,s,live.state[d].realind,live.nat[s].nholds); */
3778     unlock2(s);
3779     }
3780     MENDFUNC(2,mov_l_rr,(W4 d, R4 s))
3781    
3782     MIDFUNC(2,mov_l_mr,(IMM d, R4 s))
3783     {
3784     if (isconst(s)) {
3785     COMPCALL(mov_l_mi)(d,live.state[s].val);
3786     return;
3787     }
3788     CLOBBER_MOV;
3789     s=readreg(s,4);
3790    
3791     raw_mov_l_mr(d,s);
3792     unlock2(s);
3793     }
3794     MENDFUNC(2,mov_l_mr,(IMM d, R4 s))
3795    
3796    
3797     MIDFUNC(2,mov_w_mr,(IMM d, R2 s))
3798     {
3799     if (isconst(s)) {
3800     COMPCALL(mov_w_mi)(d,(uae_u16)live.state[s].val);
3801     return;
3802     }
3803     CLOBBER_MOV;
3804     s=readreg(s,2);
3805    
3806     raw_mov_w_mr(d,s);
3807     unlock2(s);
3808     }
3809     MENDFUNC(2,mov_w_mr,(IMM d, R2 s))
3810    
3811     MIDFUNC(2,mov_w_rm,(W2 d, IMM s))
3812     {
3813     CLOBBER_MOV;
3814     d=writereg(d,2);
3815    
3816     raw_mov_w_rm(d,s);
3817     unlock2(d);
3818     }
3819     MENDFUNC(2,mov_w_rm,(W2 d, IMM s))
3820    
3821     MIDFUNC(2,mov_b_mr,(IMM d, R1 s))
3822     {
3823     if (isconst(s)) {
3824     COMPCALL(mov_b_mi)(d,(uae_u8)live.state[s].val);
3825     return;
3826     }
3827    
3828     CLOBBER_MOV;
3829     s=readreg(s,1);
3830    
3831     raw_mov_b_mr(d,s);
3832     unlock2(s);
3833     }
3834     MENDFUNC(2,mov_b_mr,(IMM d, R1 s))
3835    
3836     MIDFUNC(2,mov_b_rm,(W1 d, IMM s))
3837     {
3838     CLOBBER_MOV;
3839     d=writereg(d,1);
3840    
3841     raw_mov_b_rm(d,s);
3842     unlock2(d);
3843     }
3844     MENDFUNC(2,mov_b_rm,(W1 d, IMM s))
3845    
3846     MIDFUNC(2,mov_l_ri,(W4 d, IMM s))
3847     {
3848     set_const(d,s);
3849     return;
3850     }
3851     MENDFUNC(2,mov_l_ri,(W4 d, IMM s))
3852    
3853     MIDFUNC(2,mov_w_ri,(W2 d, IMM s))
3854     {
3855     CLOBBER_MOV;
3856     d=writereg(d,2);
3857    
3858     raw_mov_w_ri(d,s);
3859     unlock2(d);
3860     }
3861     MENDFUNC(2,mov_w_ri,(W2 d, IMM s))
3862    
3863     MIDFUNC(2,mov_b_ri,(W1 d, IMM s))
3864     {
3865     CLOBBER_MOV;
3866     d=writereg(d,1);
3867    
3868     raw_mov_b_ri(d,s);
3869     unlock2(d);
3870     }
3871     MENDFUNC(2,mov_b_ri,(W1 d, IMM s))
3872    
3873    
3874     MIDFUNC(2,add_l_mi,(IMM d, IMM s))
3875     {
3876     CLOBBER_ADD;
3877     raw_add_l_mi(d,s) ;
3878     }
3879     MENDFUNC(2,add_l_mi,(IMM d, IMM s))
3880    
3881     MIDFUNC(2,add_w_mi,(IMM d, IMM s))
3882     {
3883     CLOBBER_ADD;
3884     raw_add_w_mi(d,s) ;
3885     }
3886     MENDFUNC(2,add_w_mi,(IMM d, IMM s))
3887    
3888     MIDFUNC(2,add_b_mi,(IMM d, IMM s))
3889     {
3890     CLOBBER_ADD;
3891     raw_add_b_mi(d,s) ;
3892     }
3893     MENDFUNC(2,add_b_mi,(IMM d, IMM s))
3894    
3895    
3896     MIDFUNC(2,test_l_ri,(R4 d, IMM i))
3897     {
3898     CLOBBER_TEST;
3899     d=readreg(d,4);
3900    
3901     raw_test_l_ri(d,i);
3902     unlock2(d);
3903     }
3904     MENDFUNC(2,test_l_ri,(R4 d, IMM i))
3905    
3906     MIDFUNC(2,test_l_rr,(R4 d, R4 s))
3907     {
3908     CLOBBER_TEST;
3909     d=readreg(d,4);
3910     s=readreg(s,4);
3911    
3912     raw_test_l_rr(d,s);;
3913     unlock2(d);
3914     unlock2(s);
3915     }
3916     MENDFUNC(2,test_l_rr,(R4 d, R4 s))
3917    
3918     MIDFUNC(2,test_w_rr,(R2 d, R2 s))
3919     {
3920     CLOBBER_TEST;
3921     d=readreg(d,2);
3922     s=readreg(s,2);
3923    
3924     raw_test_w_rr(d,s);
3925     unlock2(d);
3926     unlock2(s);
3927     }
3928     MENDFUNC(2,test_w_rr,(R2 d, R2 s))
3929    
3930     MIDFUNC(2,test_b_rr,(R1 d, R1 s))
3931     {
3932     CLOBBER_TEST;
3933     d=readreg(d,1);
3934     s=readreg(s,1);
3935    
3936     raw_test_b_rr(d,s);
3937     unlock2(d);
3938     unlock2(s);
3939     }
3940     MENDFUNC(2,test_b_rr,(R1 d, R1 s))
3941    
3942    
3943     MIDFUNC(2,and_l_ri,(RW4 d, IMM i))
3944     {
3945     if (isconst(d) && !needflags) {
3946     live.state[d].val &= i;
3947     return;
3948     }
3949    
3950     CLOBBER_AND;
3951     d=rmw(d,4,4);
3952    
3953     raw_and_l_ri(d,i);
3954     unlock2(d);
3955     }
3956     MENDFUNC(2,and_l_ri,(RW4 d, IMM i))
3957    
3958     MIDFUNC(2,and_l,(RW4 d, R4 s))
3959     {
3960     CLOBBER_AND;
3961     s=readreg(s,4);
3962     d=rmw(d,4,4);
3963    
3964     raw_and_l(d,s);
3965     unlock2(d);
3966     unlock2(s);
3967     }
3968     MENDFUNC(2,and_l,(RW4 d, R4 s))
3969    
3970     MIDFUNC(2,and_w,(RW2 d, R2 s))
3971     {
3972     CLOBBER_AND;
3973     s=readreg(s,2);
3974     d=rmw(d,2,2);
3975    
3976     raw_and_w(d,s);
3977     unlock2(d);
3978     unlock2(s);
3979     }
3980     MENDFUNC(2,and_w,(RW2 d, R2 s))
3981    
3982     MIDFUNC(2,and_b,(RW1 d, R1 s))
3983     {
3984     CLOBBER_AND;
3985     s=readreg(s,1);
3986     d=rmw(d,1,1);
3987    
3988     raw_and_b(d,s);
3989     unlock2(d);
3990     unlock2(s);
3991     }
3992     MENDFUNC(2,and_b,(RW1 d, R1 s))
3993    
3994     // gb-- used for making an fpcr value in compemu_fpp.cpp
3995     MIDFUNC(2,or_l_rm,(RW4 d, IMM s))
3996     {
3997     CLOBBER_OR;
3998     d=rmw(d,4,4);
3999    
4000     raw_or_l_rm(d,s);
4001     unlock2(d);
4002     }
4003     MENDFUNC(2,or_l_rm,(RW4 d, IMM s))
4004    
4005     MIDFUNC(2,or_l_ri,(RW4 d, IMM i))
4006     {
4007     if (isconst(d) && !needflags) {
4008     live.state[d].val|=i;
4009     return;
4010     }
4011     CLOBBER_OR;
4012     d=rmw(d,4,4);
4013    
4014     raw_or_l_ri(d,i);
4015     unlock2(d);
4016     }
4017     MENDFUNC(2,or_l_ri,(RW4 d, IMM i))
4018    
4019     MIDFUNC(2,or_l,(RW4 d, R4 s))
4020     {
4021     if (isconst(d) && isconst(s) && !needflags) {
4022     live.state[d].val|=live.state[s].val;
4023     return;
4024     }
4025     CLOBBER_OR;
4026     s=readreg(s,4);
4027     d=rmw(d,4,4);
4028    
4029     raw_or_l(d,s);
4030     unlock2(d);
4031     unlock2(s);
4032     }
4033     MENDFUNC(2,or_l,(RW4 d, R4 s))
4034    
4035     MIDFUNC(2,or_w,(RW2 d, R2 s))
4036     {
4037     CLOBBER_OR;
4038     s=readreg(s,2);
4039     d=rmw(d,2,2);
4040    
4041     raw_or_w(d,s);
4042     unlock2(d);
4043     unlock2(s);
4044     }
4045     MENDFUNC(2,or_w,(RW2 d, R2 s))
4046    
4047     MIDFUNC(2,or_b,(RW1 d, R1 s))
4048     {
4049     CLOBBER_OR;
4050     s=readreg(s,1);
4051     d=rmw(d,1,1);
4052    
4053     raw_or_b(d,s);
4054     unlock2(d);
4055     unlock2(s);
4056     }
4057     MENDFUNC(2,or_b,(RW1 d, R1 s))
4058    
4059     MIDFUNC(2,adc_l,(RW4 d, R4 s))
4060     {
4061     CLOBBER_ADC;
4062     s=readreg(s,4);
4063     d=rmw(d,4,4);
4064    
4065     raw_adc_l(d,s);
4066    
4067     unlock2(d);
4068     unlock2(s);
4069     }
4070     MENDFUNC(2,adc_l,(RW4 d, R4 s))
4071    
4072     MIDFUNC(2,adc_w,(RW2 d, R2 s))
4073     {
4074     CLOBBER_ADC;
4075     s=readreg(s,2);
4076     d=rmw(d,2,2);
4077    
4078     raw_adc_w(d,s);
4079     unlock2(d);
4080     unlock2(s);
4081     }
4082     MENDFUNC(2,adc_w,(RW2 d, R2 s))
4083    
4084     MIDFUNC(2,adc_b,(RW1 d, R1 s))
4085     {
4086     CLOBBER_ADC;
4087     s=readreg(s,1);
4088     d=rmw(d,1,1);
4089    
4090     raw_adc_b(d,s);
4091     unlock2(d);
4092     unlock2(s);
4093     }
4094     MENDFUNC(2,adc_b,(RW1 d, R1 s))
4095    
4096     MIDFUNC(2,add_l,(RW4 d, R4 s))
4097     {
4098     if (isconst(s)) {
4099     COMPCALL(add_l_ri)(d,live.state[s].val);
4100     return;
4101     }
4102    
4103     CLOBBER_ADD;
4104     s=readreg(s,4);
4105     d=rmw(d,4,4);
4106    
4107     raw_add_l(d,s);
4108    
4109     unlock2(d);
4110     unlock2(s);
4111     }
4112     MENDFUNC(2,add_l,(RW4 d, R4 s))
4113    
4114     MIDFUNC(2,add_w,(RW2 d, R2 s))
4115     {
4116     if (isconst(s)) {
4117     COMPCALL(add_w_ri)(d,(uae_u16)live.state[s].val);
4118     return;
4119     }
4120    
4121     CLOBBER_ADD;
4122     s=readreg(s,2);
4123     d=rmw(d,2,2);
4124    
4125     raw_add_w(d,s);
4126     unlock2(d);
4127     unlock2(s);
4128     }
4129     MENDFUNC(2,add_w,(RW2 d, R2 s))
4130    
4131     MIDFUNC(2,add_b,(RW1 d, R1 s))
4132     {
4133     if (isconst(s)) {
4134     COMPCALL(add_b_ri)(d,(uae_u8)live.state[s].val);
4135     return;
4136     }
4137    
4138     CLOBBER_ADD;
4139     s=readreg(s,1);
4140     d=rmw(d,1,1);
4141    
4142     raw_add_b(d,s);
4143     unlock2(d);
4144     unlock2(s);
4145     }
4146     MENDFUNC(2,add_b,(RW1 d, R1 s))
4147    
4148     MIDFUNC(2,sub_l_ri,(RW4 d, IMM i))
4149     {
4150     if (!i && !needflags)
4151     return;
4152     if (isconst(d) && !needflags) {
4153     live.state[d].val-=i;
4154     return;
4155     }
4156     #if USE_OFFSET
4157     if (!needflags) {
4158     add_offset(d,-i);
4159     return;
4160     }
4161     #endif
4162    
4163     CLOBBER_SUB;
4164     d=rmw(d,4,4);
4165    
4166     raw_sub_l_ri(d,i);
4167     unlock2(d);
4168     }
4169     MENDFUNC(2,sub_l_ri,(RW4 d, IMM i))
4170    
4171     MIDFUNC(2,sub_w_ri,(RW2 d, IMM i))
4172     {
4173     if (!i && !needflags)
4174     return;
4175    
4176     CLOBBER_SUB;
4177     d=rmw(d,2,2);
4178    
4179     raw_sub_w_ri(d,i);
4180     unlock2(d);
4181     }
4182     MENDFUNC(2,sub_w_ri,(RW2 d, IMM i))
4183    
4184     MIDFUNC(2,sub_b_ri,(RW1 d, IMM i))
4185     {
4186     if (!i && !needflags)
4187     return;
4188    
4189     CLOBBER_SUB;
4190     d=rmw(d,1,1);
4191    
4192     raw_sub_b_ri(d,i);
4193    
4194     unlock2(d);
4195     }
4196     MENDFUNC(2,sub_b_ri,(RW1 d, IMM i))
4197    
4198     MIDFUNC(2,add_l_ri,(RW4 d, IMM i))
4199     {
4200     if (!i && !needflags)
4201     return;
4202     if (isconst(d) && !needflags) {
4203     live.state[d].val+=i;
4204     return;
4205     }
4206     #if USE_OFFSET
4207     if (!needflags) {
4208     add_offset(d,i);
4209     return;
4210     }
4211     #endif
4212     CLOBBER_ADD;
4213     d=rmw(d,4,4);
4214     raw_add_l_ri(d,i);
4215     unlock2(d);
4216     }
4217     MENDFUNC(2,add_l_ri,(RW4 d, IMM i))
4218    
4219     MIDFUNC(2,add_w_ri,(RW2 d, IMM i))
4220     {
4221     if (!i && !needflags)
4222     return;
4223    
4224     CLOBBER_ADD;
4225     d=rmw(d,2,2);
4226    
4227     raw_add_w_ri(d,i);
4228     unlock2(d);
4229     }
4230     MENDFUNC(2,add_w_ri,(RW2 d, IMM i))
4231    
4232     MIDFUNC(2,add_b_ri,(RW1 d, IMM i))
4233     {
4234     if (!i && !needflags)
4235     return;
4236    
4237     CLOBBER_ADD;
4238     d=rmw(d,1,1);
4239    
4240     raw_add_b_ri(d,i);
4241    
4242     unlock2(d);
4243     }
4244     MENDFUNC(2,add_b_ri,(RW1 d, IMM i))
4245    
4246     MIDFUNC(2,sbb_l,(RW4 d, R4 s))
4247     {
4248     CLOBBER_SBB;
4249     s=readreg(s,4);
4250     d=rmw(d,4,4);
4251    
4252     raw_sbb_l(d,s);
4253     unlock2(d);
4254     unlock2(s);
4255     }
4256     MENDFUNC(2,sbb_l,(RW4 d, R4 s))
4257    
4258     MIDFUNC(2,sbb_w,(RW2 d, R2 s))
4259     {
4260     CLOBBER_SBB;
4261     s=readreg(s,2);
4262     d=rmw(d,2,2);
4263    
4264     raw_sbb_w(d,s);
4265     unlock2(d);
4266     unlock2(s);
4267     }
4268     MENDFUNC(2,sbb_w,(RW2 d, R2 s))
4269    
4270     MIDFUNC(2,sbb_b,(RW1 d, R1 s))
4271     {
4272     CLOBBER_SBB;
4273     s=readreg(s,1);
4274     d=rmw(d,1,1);
4275    
4276     raw_sbb_b(d,s);
4277     unlock2(d);
4278     unlock2(s);
4279     }
4280     MENDFUNC(2,sbb_b,(RW1 d, R1 s))
4281    
4282     MIDFUNC(2,sub_l,(RW4 d, R4 s))
4283     {
4284     if (isconst(s)) {
4285     COMPCALL(sub_l_ri)(d,live.state[s].val);
4286     return;
4287     }
4288    
4289     CLOBBER_SUB;
4290     s=readreg(s,4);
4291     d=rmw(d,4,4);
4292    
4293     raw_sub_l(d,s);
4294     unlock2(d);
4295     unlock2(s);
4296     }
4297     MENDFUNC(2,sub_l,(RW4 d, R4 s))
4298    
4299     MIDFUNC(2,sub_w,(RW2 d, R2 s))
4300     {
4301     if (isconst(s)) {
4302     COMPCALL(sub_w_ri)(d,(uae_u16)live.state[s].val);
4303     return;
4304     }
4305    
4306     CLOBBER_SUB;
4307     s=readreg(s,2);
4308     d=rmw(d,2,2);
4309    
4310     raw_sub_w(d,s);
4311     unlock2(d);
4312     unlock2(s);
4313     }
4314     MENDFUNC(2,sub_w,(RW2 d, R2 s))
4315    
4316     MIDFUNC(2,sub_b,(RW1 d, R1 s))
4317     {
4318     if (isconst(s)) {
4319     COMPCALL(sub_b_ri)(d,(uae_u8)live.state[s].val);
4320     return;
4321     }
4322    
4323     CLOBBER_SUB;
4324     s=readreg(s,1);
4325     d=rmw(d,1,1);
4326    
4327     raw_sub_b(d,s);
4328     unlock2(d);
4329     unlock2(s);
4330     }
4331     MENDFUNC(2,sub_b,(RW1 d, R1 s))
4332    
4333     MIDFUNC(2,cmp_l,(R4 d, R4 s))
4334     {
4335     CLOBBER_CMP;
4336     s=readreg(s,4);
4337     d=readreg(d,4);
4338    
4339     raw_cmp_l(d,s);
4340     unlock2(d);
4341     unlock2(s);
4342     }
4343     MENDFUNC(2,cmp_l,(R4 d, R4 s))
4344    
4345     MIDFUNC(2,cmp_l_ri,(R4 r, IMM i))
4346     {
4347     CLOBBER_CMP;
4348     r=readreg(r,4);
4349    
4350     raw_cmp_l_ri(r,i);
4351     unlock2(r);
4352     }
4353     MENDFUNC(2,cmp_l_ri,(R4 r, IMM i))
4354    
4355     MIDFUNC(2,cmp_w,(R2 d, R2 s))
4356     {
4357     CLOBBER_CMP;
4358     s=readreg(s,2);
4359     d=readreg(d,2);
4360    
4361     raw_cmp_w(d,s);
4362     unlock2(d);
4363     unlock2(s);
4364     }
4365     MENDFUNC(2,cmp_w,(R2 d, R2 s))
4366    
4367     MIDFUNC(2,cmp_b,(R1 d, R1 s))
4368     {
4369     CLOBBER_CMP;
4370     s=readreg(s,1);
4371     d=readreg(d,1);
4372    
4373     raw_cmp_b(d,s);
4374     unlock2(d);
4375     unlock2(s);
4376     }
4377     MENDFUNC(2,cmp_b,(R1 d, R1 s))
4378    
4379    
4380     MIDFUNC(2,xor_l,(RW4 d, R4 s))
4381     {
4382     CLOBBER_XOR;
4383     s=readreg(s,4);
4384     d=rmw(d,4,4);
4385    
4386     raw_xor_l(d,s);
4387     unlock2(d);
4388     unlock2(s);
4389     }
4390     MENDFUNC(2,xor_l,(RW4 d, R4 s))
4391    
4392     MIDFUNC(2,xor_w,(RW2 d, R2 s))
4393     {
4394     CLOBBER_XOR;
4395     s=readreg(s,2);
4396     d=rmw(d,2,2);
4397    
4398     raw_xor_w(d,s);
4399     unlock2(d);
4400     unlock2(s);
4401     }
4402     MENDFUNC(2,xor_w,(RW2 d, R2 s))
4403    
4404     MIDFUNC(2,xor_b,(RW1 d, R1 s))
4405     {
4406     CLOBBER_XOR;
4407     s=readreg(s,1);
4408     d=rmw(d,1,1);
4409    
4410     raw_xor_b(d,s);
4411     unlock2(d);
4412     unlock2(s);
4413     }
4414     MENDFUNC(2,xor_b,(RW1 d, R1 s))
4415    
4416     MIDFUNC(5,call_r_11,(W4 out1, R4 r, R4 in1, IMM osize, IMM isize))
4417     {
4418     clobber_flags();
4419     remove_all_offsets();
4420     if (osize==4) {
4421     if (out1!=in1 && out1!=r) {
4422     COMPCALL(forget_about)(out1);
4423     }
4424     }
4425     else {
4426     tomem_c(out1);
4427     }
4428    
4429     in1=readreg_specific(in1,isize,REG_PAR1);
4430     r=readreg(r,4);
4431     prepare_for_call_1(); /* This should ensure that there won't be
4432     any need for swapping nregs in prepare_for_call_2
4433     */
4434     #if USE_NORMAL_CALLING_CONVENTION
4435     raw_push_l_r(in1);
4436     #endif
4437     unlock2(in1);
4438     unlock2(r);
4439    
4440     prepare_for_call_2();
4441     raw_call_r(r);
4442    
4443     #if USE_NORMAL_CALLING_CONVENTION
4444     raw_inc_sp(4);
4445     #endif
4446    
4447    
4448     live.nat[REG_RESULT].holds[0]=out1;
4449     live.nat[REG_RESULT].nholds=1;
4450     live.nat[REG_RESULT].touched=touchcnt++;
4451    
4452     live.state[out1].realreg=REG_RESULT;
4453     live.state[out1].realind=0;
4454     live.state[out1].val=0;
4455     live.state[out1].validsize=osize;
4456     live.state[out1].dirtysize=osize;
4457     set_status(out1,DIRTY);
4458     }
4459     MENDFUNC(5,call_r_11,(W4 out1, R4 r, R4 in1, IMM osize, IMM isize))
4460    
4461     MIDFUNC(5,call_r_02,(R4 r, R4 in1, R4 in2, IMM isize1, IMM isize2))
4462     {
4463     clobber_flags();
4464     remove_all_offsets();
4465     in1=readreg_specific(in1,isize1,REG_PAR1);
4466     in2=readreg_specific(in2,isize2,REG_PAR2);
4467     r=readreg(r,4);
4468     prepare_for_call_1(); /* This should ensure that there won't be
4469     any need for swapping nregs in prepare_for_call_2
4470     */
4471     #if USE_NORMAL_CALLING_CONVENTION
4472     raw_push_l_r(in2);
4473     raw_push_l_r(in1);
4474     #endif
4475     unlock2(r);
4476     unlock2(in1);
4477     unlock2(in2);
4478     prepare_for_call_2();
4479     raw_call_r(r);
4480     #if USE_NORMAL_CALLING_CONVENTION
4481     raw_inc_sp(8);
4482     #endif
4483     }
4484     MENDFUNC(5,call_r_02,(R4 r, R4 in1, R4 in2, IMM isize1, IMM isize2))
4485    
4486     /* forget_about() takes a mid-layer register */
4487     MIDFUNC(1,forget_about,(W4 r))
4488     {
4489     if (isinreg(r))
4490     disassociate(r);
4491     live.state[r].val=0;
4492     set_status(r,UNDEF);
4493     }
4494     MENDFUNC(1,forget_about,(W4 r))
4495    
4496     MIDFUNC(0,nop,(void))
4497     {
4498     raw_nop();
4499     }
4500     MENDFUNC(0,nop,(void))
4501    
4502    
4503     MIDFUNC(1,f_forget_about,(FW r))
4504     {
4505     if (f_isinreg(r))
4506     f_disassociate(r);
4507     live.fate[r].status=UNDEF;
4508     }
4509     MENDFUNC(1,f_forget_about,(FW r))
4510    
4511     MIDFUNC(1,fmov_pi,(FW r))
4512     {
4513     r=f_writereg(r);
4514     raw_fmov_pi(r);
4515     f_unlock(r);
4516     }
4517     MENDFUNC(1,fmov_pi,(FW r))
4518    
4519     MIDFUNC(1,fmov_log10_2,(FW r))
4520     {
4521     r=f_writereg(r);
4522     raw_fmov_log10_2(r);
4523     f_unlock(r);
4524     }
4525     MENDFUNC(1,fmov_log10_2,(FW r))
4526    
4527     MIDFUNC(1,fmov_log2_e,(FW r))
4528     {
4529     r=f_writereg(r);
4530     raw_fmov_log2_e(r);
4531     f_unlock(r);
4532     }
4533     MENDFUNC(1,fmov_log2_e,(FW r))
4534    
4535     MIDFUNC(1,fmov_loge_2,(FW r))
4536     {
4537     r=f_writereg(r);
4538     raw_fmov_loge_2(r);
4539     f_unlock(r);
4540     }
4541     MENDFUNC(1,fmov_loge_2,(FW r))
4542    
4543     MIDFUNC(1,fmov_1,(FW r))
4544     {
4545     r=f_writereg(r);
4546     raw_fmov_1(r);
4547     f_unlock(r);
4548     }
4549     MENDFUNC(1,fmov_1,(FW r))
4550    
4551     MIDFUNC(1,fmov_0,(FW r))
4552     {
4553     r=f_writereg(r);
4554     raw_fmov_0(r);
4555     f_unlock(r);
4556     }
4557     MENDFUNC(1,fmov_0,(FW r))
4558    
4559     MIDFUNC(2,fmov_rm,(FW r, MEMR m))
4560     {
4561     r=f_writereg(r);
4562     raw_fmov_rm(r,m);
4563     f_unlock(r);
4564     }
4565     MENDFUNC(2,fmov_rm,(FW r, MEMR m))
4566    
4567     MIDFUNC(2,fmovi_rm,(FW r, MEMR m))
4568     {
4569     r=f_writereg(r);
4570     raw_fmovi_rm(r,m);
4571     f_unlock(r);
4572     }
4573     MENDFUNC(2,fmovi_rm,(FW r, MEMR m))
4574    
4575     MIDFUNC(2,fmovi_mr,(MEMW m, FR r))
4576     {
4577     r=f_readreg(r);
4578     raw_fmovi_mr(m,r);
4579     f_unlock(r);
4580     }
4581     MENDFUNC(2,fmovi_mr,(MEMW m, FR r))
4582    
4583     MIDFUNC(2,fmovs_rm,(FW r, MEMR m))
4584     {
4585     r=f_writereg(r);
4586     raw_fmovs_rm(r,m);
4587     f_unlock(r);
4588     }
4589     MENDFUNC(2,fmovs_rm,(FW r, MEMR m))
4590    
4591     MIDFUNC(2,fmovs_mr,(MEMW m, FR r))
4592     {
4593     r=f_readreg(r);
4594     raw_fmovs_mr(m,r);
4595     f_unlock(r);
4596     }
4597     MENDFUNC(2,fmovs_mr,(MEMW m, FR r))
4598    
4599     MIDFUNC(2,fmov_ext_mr,(MEMW m, FR r))
4600     {
4601     r=f_readreg(r);
4602     raw_fmov_ext_mr(m,r);
4603     f_unlock(r);
4604     }
4605     MENDFUNC(2,fmov_ext_mr,(MEMW m, FR r))
4606    
4607     MIDFUNC(2,fmov_mr,(MEMW m, FR r))
4608     {
4609     r=f_readreg(r);
4610     raw_fmov_mr(m,r);
4611     f_unlock(r);
4612     }
4613     MENDFUNC(2,fmov_mr,(MEMW m, FR r))
4614    
4615     MIDFUNC(2,fmov_ext_rm,(FW r, MEMR m))
4616     {
4617     r=f_writereg(r);
4618     raw_fmov_ext_rm(r,m);
4619     f_unlock(r);
4620     }
4621     MENDFUNC(2,fmov_ext_rm,(FW r, MEMR m))
4622    
4623     MIDFUNC(2,fmov_rr,(FW d, FR s))
4624     {
4625     if (d==s) { /* How pointless! */
4626     return;
4627     }
4628     #if USE_F_ALIAS
4629     f_disassociate(d);
4630     s=f_readreg(s);
4631     live.fate[d].realreg=s;
4632     live.fate[d].realind=live.fat[s].nholds;
4633     live.fate[d].status=DIRTY;
4634     live.fat[s].holds[live.fat[s].nholds]=d;
4635     live.fat[s].nholds++;
4636     f_unlock(s);
4637     #else
4638     s=f_readreg(s);
4639     d=f_writereg(d);
4640     raw_fmov_rr(d,s);
4641     f_unlock(s);
4642     f_unlock(d);
4643     #endif
4644     }
4645     MENDFUNC(2,fmov_rr,(FW d, FR s))
4646    
4647     MIDFUNC(2,fldcw_m_indexed,(R4 index, IMM base))
4648     {
4649     index=readreg(index,4);
4650    
4651     raw_fldcw_m_indexed(index,base);
4652     unlock2(index);
4653     }
4654     MENDFUNC(2,fldcw_m_indexed,(R4 index, IMM base))
4655    
4656     MIDFUNC(1,ftst_r,(FR r))
4657     {
4658     r=f_readreg(r);
4659     raw_ftst_r(r);
4660     f_unlock(r);
4661     }
4662     MENDFUNC(1,ftst_r,(FR r))
4663    
4664     MIDFUNC(0,dont_care_fflags,(void))
4665     {
4666     f_disassociate(FP_RESULT);
4667     }
4668     MENDFUNC(0,dont_care_fflags,(void))
4669    
4670     MIDFUNC(2,fsqrt_rr,(FW d, FR s))
4671     {
4672     s=f_readreg(s);
4673     d=f_writereg(d);
4674     raw_fsqrt_rr(d,s);
4675     f_unlock(s);
4676     f_unlock(d);
4677     }
4678     MENDFUNC(2,fsqrt_rr,(FW d, FR s))
4679    
4680     MIDFUNC(2,fabs_rr,(FW d, FR s))
4681     {
4682     s=f_readreg(s);
4683     d=f_writereg(d);
4684     raw_fabs_rr(d,s);
4685     f_unlock(s);
4686     f_unlock(d);
4687     }
4688     MENDFUNC(2,fabs_rr,(FW d, FR s))
4689    
4690     MIDFUNC(2,fsin_rr,(FW d, FR s))
4691     {
4692     s=f_readreg(s);
4693     d=f_writereg(d);
4694     raw_fsin_rr(d,s);
4695     f_unlock(s);
4696     f_unlock(d);
4697     }
4698     MENDFUNC(2,fsin_rr,(FW d, FR s))
4699    
4700     MIDFUNC(2,fcos_rr,(FW d, FR s))
4701     {
4702     s=f_readreg(s);
4703     d=f_writereg(d);
4704     raw_fcos_rr(d,s);
4705     f_unlock(s);
4706     f_unlock(d);
4707     }
4708     MENDFUNC(2,fcos_rr,(FW d, FR s))
4709    
4710     MIDFUNC(2,ftwotox_rr,(FW d, FR s))
4711     {
4712     s=f_readreg(s);
4713     d=f_writereg(d);
4714     raw_ftwotox_rr(d,s);
4715     f_unlock(s);
4716     f_unlock(d);
4717     }
4718     MENDFUNC(2,ftwotox_rr,(FW d, FR s))
4719    
4720     MIDFUNC(2,fetox_rr,(FW d, FR s))
4721     {
4722     s=f_readreg(s);
4723     d=f_writereg(d);
4724     raw_fetox_rr(d,s);
4725     f_unlock(s);
4726     f_unlock(d);
4727     }
4728     MENDFUNC(2,fetox_rr,(FW d, FR s))
4729    
4730     MIDFUNC(2,frndint_rr,(FW d, FR s))
4731     {
4732     s=f_readreg(s);
4733     d=f_writereg(d);
4734     raw_frndint_rr(d,s);
4735     f_unlock(s);
4736     f_unlock(d);
4737     }
4738     MENDFUNC(2,frndint_rr,(FW d, FR s))
4739    
4740     MIDFUNC(2,flog2_rr,(FW d, FR s))
4741     {
4742     s=f_readreg(s);
4743     d=f_writereg(d);
4744     raw_flog2_rr(d,s);
4745     f_unlock(s);
4746     f_unlock(d);
4747     }
4748     MENDFUNC(2,flog2_rr,(FW d, FR s))
4749    
4750     MIDFUNC(2,fneg_rr,(FW d, FR s))
4751     {
4752     s=f_readreg(s);
4753     d=f_writereg(d);
4754     raw_fneg_rr(d,s);
4755     f_unlock(s);
4756     f_unlock(d);
4757     }
4758     MENDFUNC(2,fneg_rr,(FW d, FR s))
4759    
4760     MIDFUNC(2,fadd_rr,(FRW d, FR s))
4761     {
4762     s=f_readreg(s);
4763     d=f_rmw(d);
4764     raw_fadd_rr(d,s);
4765     f_unlock(s);
4766     f_unlock(d);
4767     }
4768     MENDFUNC(2,fadd_rr,(FRW d, FR s))
4769    
4770     MIDFUNC(2,fsub_rr,(FRW d, FR s))
4771     {
4772     s=f_readreg(s);
4773     d=f_rmw(d);
4774     raw_fsub_rr(d,s);
4775     f_unlock(s);
4776     f_unlock(d);
4777     }
4778     MENDFUNC(2,fsub_rr,(FRW d, FR s))
4779    
4780     MIDFUNC(2,fcmp_rr,(FR d, FR s))
4781     {
4782     d=f_readreg(d);
4783     s=f_readreg(s);
4784     raw_fcmp_rr(d,s);
4785     f_unlock(s);
4786     f_unlock(d);
4787     }
4788     MENDFUNC(2,fcmp_rr,(FR d, FR s))
4789    
4790     MIDFUNC(2,fdiv_rr,(FRW d, FR s))
4791     {
4792     s=f_readreg(s);
4793     d=f_rmw(d);
4794     raw_fdiv_rr(d,s);
4795     f_unlock(s);
4796     f_unlock(d);
4797     }
4798     MENDFUNC(2,fdiv_rr,(FRW d, FR s))
4799    
4800     MIDFUNC(2,frem_rr,(FRW d, FR s))
4801     {
4802     s=f_readreg(s);
4803     d=f_rmw(d);
4804     raw_frem_rr(d,s);
4805     f_unlock(s);
4806     f_unlock(d);
4807     }
4808     MENDFUNC(2,frem_rr,(FRW d, FR s))
4809    
4810     MIDFUNC(2,frem1_rr,(FRW d, FR s))
4811     {
4812     s=f_readreg(s);
4813     d=f_rmw(d);
4814     raw_frem1_rr(d,s);
4815     f_unlock(s);
4816     f_unlock(d);
4817     }
4818     MENDFUNC(2,frem1_rr,(FRW d, FR s))
4819    
4820     MIDFUNC(2,fmul_rr,(FRW d, FR s))
4821     {
4822     s=f_readreg(s);
4823     d=f_rmw(d);
4824     raw_fmul_rr(d,s);
4825     f_unlock(s);
4826     f_unlock(d);
4827     }
4828     MENDFUNC(2,fmul_rr,(FRW d, FR s))
4829    
4830     /********************************************************************
4831     * Support functions exposed to gencomp. CREATE time *
4832     ********************************************************************/
4833    
4834     int kill_rodent(int r)
4835     {
4836     return KILLTHERAT &&
4837     have_rat_stall &&
4838     (live.state[r].status==INMEM ||
4839     live.state[r].status==CLEAN ||
4840     live.state[r].status==ISCONST ||
4841     live.state[r].dirtysize==4);
4842     }
4843    
4844     uae_u32 get_const(int r)
4845     {
4846     Dif (!isconst(r)) {
4847     write_log("Register %d should be constant, but isn't\n",r);
4848     abort();
4849     }
4850     return live.state[r].val;
4851     }
4852    
4853     void sync_m68k_pc(void)
4854     {
4855     if (m68k_pc_offset) {
4856     add_l_ri(PC_P,m68k_pc_offset);
4857     comp_pc_p+=m68k_pc_offset;
4858     m68k_pc_offset=0;
4859     }
4860     }
4861    
4862     /********************************************************************
4863     * Scratch registers management *
4864     ********************************************************************/
4865    
4866     struct scratch_t {
4867     uae_u32 regs[VREGS];
4868     fpu_register fregs[VFREGS];
4869     };
4870    
4871     static scratch_t scratch;
4872    
4873     /********************************************************************
4874     * Support functions exposed to newcpu *
4875     ********************************************************************/
4876    
4877     static inline const char *str_on_off(bool b)
4878     {
4879     return b ? "on" : "off";
4880     }
4881    
4882     void compiler_init(void)
4883     {
4884     static bool initialized = false;
4885     if (initialized)
4886     return;
4887    
4888     #ifndef WIN32
4889     // Open /dev/zero
4890     zero_fd = open("/dev/zero", O_RDWR);
4891     if (zero_fd < 0) {
4892     char str[200];
4893     sprintf(str, GetString(STR_NO_DEV_ZERO_ERR), strerror(errno));
4894     ErrorAlert(str);
4895     QuitEmulator();
4896     }
4897     #endif
4898    
4899     #if JIT_DEBUG
4900     // JIT debug mode ?
4901     JITDebug = PrefsFindBool("jitdebug");
4902     #endif
4903     write_log("<JIT compiler> : enable runtime disassemblers : %s\n", JITDebug ? "yes" : "no");
4904    
4905     #ifdef USE_JIT_FPU
4906     // Use JIT compiler for FPU instructions ?
4907     avoid_fpu = !PrefsFindBool("jitfpu");
4908     #else
4909     // JIT FPU is always disabled
4910     avoid_fpu = true;
4911     #endif
4912     write_log("<JIT compiler> : compile FPU instructions : %s\n", !avoid_fpu ? "yes" : "no");
4913    
4914     // Get size of the translation cache (in KB)
4915     cache_size = PrefsFindInt32("jitcachesize");
4916     write_log("<JIT compiler> : requested translation cache size : %d KB\n", cache_size);
4917    
4918     // Initialize target CPU (check for features, e.g. CMOV, rat stalls)
4919     raw_init_cpu();
4920 gbeauche 1.15 setzflg_uses_bsf = target_check_bsf();
4921 gbeauche 1.1 write_log("<JIT compiler> : target processor has CMOV instructions : %s\n", have_cmov ? "yes" : "no");
4922     write_log("<JIT compiler> : target processor can suffer from partial register stalls : %s\n", have_rat_stall ? "yes" : "no");
4923 gbeauche 1.5 write_log("<JIT compiler> : alignment for loops, jumps are %d, %d\n", align_loops, align_jumps);
4924 gbeauche 1.1
4925     // Translation cache flush mechanism
4926     lazy_flush = PrefsFindBool("jitlazyflush");
4927     write_log("<JIT compiler> : lazy translation cache invalidation : %s\n", str_on_off(lazy_flush));
4928     flush_icache = lazy_flush ? flush_icache_lazy : flush_icache_hard;
4929    
4930     // Compiler features
4931     write_log("<JIT compiler> : register aliasing : %s\n", str_on_off(1));
4932     write_log("<JIT compiler> : FP register aliasing : %s\n", str_on_off(USE_F_ALIAS));
4933     write_log("<JIT compiler> : lazy constant offsetting : %s\n", str_on_off(USE_OFFSET));
4934 gbeauche 1.8 write_log("<JIT compiler> : block inlining : %s\n", str_on_off(USE_INLINING));
4935 gbeauche 1.1 write_log("<JIT compiler> : separate blockinfo allocation : %s\n", str_on_off(USE_SEPARATE_BIA));
4936    
4937     // Build compiler tables
4938     build_comp();
4939    
4940     initialized = true;
4941    
4942 gbeauche 1.9 #if PROFILE_UNTRANSLATED_INSNS
4943     write_log("<JIT compiler> : gather statistics on untranslated insns count\n");
4944     #endif
4945    
4946 gbeauche 1.1 #if PROFILE_COMPILE_TIME
4947     write_log("<JIT compiler> : gather statistics on translation time\n");
4948     emul_start_time = clock();
4949     #endif
4950     }
4951    
4952     void compiler_exit(void)
4953     {
4954     #if PROFILE_COMPILE_TIME
4955     emul_end_time = clock();
4956     #endif
4957    
4958     // Deallocate translation cache
4959     if (compiled_code) {
4960 gbeauche 1.3 vm_release(compiled_code, cache_size * 1024);
4961 gbeauche 1.1 compiled_code = 0;
4962     }
4963    
4964     #ifndef WIN32
4965     // Close /dev/zero
4966     if (zero_fd > 0)
4967     close(zero_fd);
4968     #endif
4969    
4970     #if PROFILE_COMPILE_TIME
4971     write_log("### Compile Block statistics\n");
4972     write_log("Number of calls to compile_block : %d\n", compile_count);
4973     uae_u32 emul_time = emul_end_time - emul_start_time;
4974     write_log("Total emulation time : %.1f sec\n", double(emul_time)/double(CLOCKS_PER_SEC));
4975     write_log("Total compilation time : %.1f sec (%.1f%%)\n", double(compile_time)/double(CLOCKS_PER_SEC),
4976     100.0*double(compile_time)/double(emul_time));
4977     write_log("\n");
4978     #endif
4979 gbeauche 1.9
4980     #if PROFILE_UNTRANSLATED_INSNS
4981     uae_u64 untranslated_count = 0;
4982     for (int i = 0; i < 65536; i++) {
4983     opcode_nums[i] = i;
4984     untranslated_count += raw_cputbl_count[i];
4985     }
4986     write_log("Sorting out untranslated instructions count...\n");
4987     qsort(opcode_nums, 65536, sizeof(uae_u16), untranslated_compfn);
4988     write_log("\nRank Opc Count Name\n");
4989     for (int i = 0; i < untranslated_top_ten; i++) {
4990     uae_u32 count = raw_cputbl_count[opcode_nums[i]];
4991     struct instr *dp;
4992     struct mnemolookup *lookup;
4993     if (!count)
4994     break;
4995     dp = table68k + opcode_nums[i];
4996     for (lookup = lookuptab; lookup->mnemo != dp->mnemo; lookup++)
4997     ;
4998     write_log("%03d: %04x %10lu %s\n", i, opcode_nums[i], count, lookup->name);
4999     }
5000     #endif
5001 gbeauche 1.1 }
5002    
5003     bool compiler_use_jit(void)
5004     {
5005     // Check for the "jit" prefs item
5006     if (!PrefsFindBool("jit"))
5007     return false;
5008    
5009     // Don't use JIT if translation cache size is less then MIN_CACHE_SIZE KB
5010     if (PrefsFindInt32("jitcachesize") < MIN_CACHE_SIZE) {
5011     write_log("<JIT compiler> : translation cache size is less than %d KB. Disabling JIT.\n", MIN_CACHE_SIZE);
5012     return false;
5013     }
5014    
5015     // FIXME: there are currently problems with JIT compilation and anything below a 68040
5016     if (CPUType < 4) {
5017     write_log("<JIT compiler> : 68040 emulation is required instead of 680%d0. Disabling JIT.\n", CPUType);
5018     return false;
5019     }
5020    
5021     return true;
5022     }
5023    
5024     void init_comp(void)
5025     {
5026     int i;
5027     uae_s8* cb=can_byte;
5028     uae_s8* cw=can_word;
5029     uae_s8* au=always_used;
5030    
5031     for (i=0;i<VREGS;i++) {
5032     live.state[i].realreg=-1;
5033     live.state[i].needflush=NF_SCRATCH;
5034     live.state[i].val=0;
5035     set_status(i,UNDEF);
5036     }
5037    
5038     for (i=0;i<VFREGS;i++) {
5039     live.fate[i].status=UNDEF;
5040     live.fate[i].realreg=-1;
5041     live.fate[i].needflush=NF_SCRATCH;
5042     }
5043    
5044     for (i=0;i<VREGS;i++) {
5045     if (i<16) { /* First 16 registers map to 68k registers */
5046     live.state[i].mem=((uae_u32*)&regs)+i;
5047     live.state[i].needflush=NF_TOMEM;
5048     set_status(i,INMEM);
5049     }
5050     else
5051     live.state[i].mem=scratch.regs+i;
5052     }
5053     live.state[PC_P].mem=(uae_u32*)&(regs.pc_p);
5054     live.state[PC_P].needflush=NF_TOMEM;
5055     set_const(PC_P,(uae_u32)comp_pc_p);
5056    
5057     live.state[FLAGX].mem=&(regflags.x);
5058     live.state[FLAGX].needflush=NF_TOMEM;
5059     set_status(FLAGX,INMEM);
5060    
5061     live.state[FLAGTMP].mem=&(regflags.cznv);
5062     live.state[FLAGTMP].needflush=NF_TOMEM;
5063     set_status(FLAGTMP,INMEM);
5064    
5065     live.state[NEXT_HANDLER].needflush=NF_HANDLER;
5066     set_status(NEXT_HANDLER,UNDEF);
5067    
5068     for (i=0;i<VFREGS;i++) {
5069     if (i<8) { /* First 8 registers map to 68k FPU registers */
5070     live.fate[i].mem=(uae_u32*)fpu_register_address(i);
5071     live.fate[i].needflush=NF_TOMEM;
5072     live.fate[i].status=INMEM;
5073     }
5074     else if (i==FP_RESULT) {
5075     live.fate[i].mem=(uae_u32*)(&fpu.result);
5076     live.fate[i].needflush=NF_TOMEM;
5077     live.fate[i].status=INMEM;
5078     }
5079     else
5080     live.fate[i].mem=(uae_u32*)(scratch.fregs+i);
5081     }
5082    
5083    
5084     for (i=0;i<N_REGS;i++) {
5085     live.nat[i].touched=0;
5086     live.nat[i].nholds=0;
5087     live.nat[i].locked=0;
5088     if (*cb==i) {
5089     live.nat[i].canbyte=1; cb++;
5090     } else live.nat[i].canbyte=0;
5091     if (*cw==i) {
5092     live.nat[i].canword=1; cw++;
5093     } else live.nat[i].canword=0;
5094     if (*au==i) {
5095     live.nat[i].locked=1; au++;
5096     }
5097     }
5098    
5099     for (i=0;i<N_FREGS;i++) {
5100     live.fat[i].touched=0;
5101     live.fat[i].nholds=0;
5102     live.fat[i].locked=0;
5103     }
5104    
5105     touchcnt=1;
5106     m68k_pc_offset=0;
5107     live.flags_in_flags=TRASH;
5108     live.flags_on_stack=VALID;
5109     live.flags_are_important=1;
5110    
5111     raw_fp_init();
5112     }
5113    
5114     /* Only do this if you really mean it! The next call should be to init!*/
5115     void flush(int save_regs)
5116     {
5117     int fi,i;
5118    
5119     log_flush();
5120     flush_flags(); /* low level */
5121     sync_m68k_pc(); /* mid level */
5122    
5123     if (save_regs) {
5124     for (i=0;i<VFREGS;i++) {
5125     if (live.fate[i].needflush==NF_SCRATCH ||
5126     live.fate[i].status==CLEAN) {
5127     f_disassociate(i);
5128     }
5129     }
5130     for (i=0;i<VREGS;i++) {
5131     if (live.state[i].needflush==NF_TOMEM) {
5132     switch(live.state[i].status) {
5133     case INMEM:
5134     if (live.state[i].val) {
5135     raw_add_l_mi((uae_u32)live.state[i].mem,live.state[i].val);
5136     log_vwrite(i);
5137     live.state[i].val=0;
5138     }
5139     break;
5140     case CLEAN:
5141     case DIRTY:
5142     remove_offset(i,-1); tomem(i); break;
5143     case ISCONST:
5144     if (i!=PC_P)
5145     writeback_const(i);
5146     break;
5147     default: break;
5148     }
5149     Dif (live.state[i].val && i!=PC_P) {
5150     write_log("Register %d still has val %x\n",
5151     i,live.state[i].val);
5152     }
5153     }
5154     }
5155     for (i=0;i<VFREGS;i++) {
5156     if (live.fate[i].needflush==NF_TOMEM &&
5157     live.fate[i].status==DIRTY) {
5158     f_evict(i);
5159     }
5160     }
5161     raw_fp_cleanup_drop();
5162     }
5163     if (needflags) {
5164     write_log("Warning! flush with needflags=1!\n");
5165     }
5166     }
5167    
5168     static void flush_keepflags(void)
5169     {
5170     int fi,i;
5171    
5172     for (i=0;i<VFREGS;i++) {
5173     if (live.fate[i].needflush==NF_SCRATCH ||
5174     live.fate[i].status==CLEAN) {
5175     f_disassociate(i);
5176     }
5177     }
5178     for (i=0;i<VREGS;i++) {
5179     if (live.state[i].needflush==NF_TOMEM) {
5180     switch(live.state[i].status) {
5181     case INMEM:
5182     /* Can't adjust the offset here --- that needs "add" */
5183     break;
5184     case CLEAN:
5185     case DIRTY:
5186     remove_offset(i,-1); tomem(i); break;
5187     case ISCONST:
5188     if (i!=PC_P)
5189     writeback_const(i);
5190     break;
5191     default: break;
5192     }
5193     }
5194     }
5195     for (i=0;i<VFREGS;i++) {
5196     if (live.fate[i].needflush==NF_TOMEM &&
5197     live.fate[i].status==DIRTY) {
5198     f_evict(i);
5199     }
5200     }
5201     raw_fp_cleanup_drop();
5202     }
5203    
5204     void freescratch(void)
5205     {
5206     int i;
5207     for (i=0;i<N_REGS;i++)
5208     if (live.nat[i].locked && i!=4)
5209     write_log("Warning! %d is locked\n",i);
5210    
5211     for (i=0;i<VREGS;i++)
5212     if (live.state[i].needflush==NF_SCRATCH) {
5213     forget_about(i);
5214     }
5215    
5216     for (i=0;i<VFREGS;i++)
5217     if (live.fate[i].needflush==NF_SCRATCH) {
5218     f_forget_about(i);
5219     }
5220     }
5221    
5222     /********************************************************************
5223     * Support functions, internal *
5224     ********************************************************************/
5225    
5226    
5227     static void align_target(uae_u32 a)
5228     {
5229 gbeauche 1.14 if (!a)
5230     return;
5231    
5232 gbeauche 1.12 if (tune_nop_fillers)
5233     raw_emit_nop_filler(a - (((uae_u32)target) & (a - 1)));
5234     else {
5235     /* Fill with NOPs --- makes debugging with gdb easier */
5236     while ((uae_u32)target&(a-1))
5237     *target++=0x90;
5238     }
5239 gbeauche 1.1 }
5240    
5241     static __inline__ int isinrom(uintptr addr)
5242     {
5243     return ((addr >= (uintptr)ROMBaseHost) && (addr < (uintptr)ROMBaseHost + ROMSize));
5244     }
5245    
5246     static void flush_all(void)
5247     {
5248     int i;
5249    
5250     log_flush();
5251     for (i=0;i<VREGS;i++)
5252     if (live.state[i].status==DIRTY) {
5253     if (!call_saved[live.state[i].realreg]) {
5254     tomem(i);
5255     }
5256     }
5257     for (i=0;i<VFREGS;i++)
5258     if (f_isinreg(i))
5259     f_evict(i);
5260     raw_fp_cleanup_drop();
5261     }
5262    
5263     /* Make sure all registers that will get clobbered by a call are
5264     save and sound in memory */
5265     static void prepare_for_call_1(void)
5266     {
5267     flush_all(); /* If there are registers that don't get clobbered,
5268     * we should be a bit more selective here */
5269     }
5270    
5271     /* We will call a C routine in a moment. That will clobber all registers,
5272     so we need to disassociate everything */
5273     static void prepare_for_call_2(void)
5274     {
5275     int i;
5276     for (i=0;i<N_REGS;i++)
5277     if (!call_saved[i] && live.nat[i].nholds>0)
5278     free_nreg(i);
5279    
5280     for (i=0;i<N_FREGS;i++)
5281     if (live.fat[i].nholds>0)
5282     f_free_nreg(i);
5283    
5284     live.flags_in_flags=TRASH; /* Note: We assume we already rescued the
5285     flags at the very start of the call_r
5286     functions! */
5287     }
5288    
5289     /********************************************************************
5290     * Memory access and related functions, CREATE time *
5291     ********************************************************************/
5292    
5293     void register_branch(uae_u32 not_taken, uae_u32 taken, uae_u8 cond)
5294     {
5295     next_pc_p=not_taken;
5296     taken_pc_p=taken;
5297     branch_cc=cond;
5298     }
5299    
5300    
5301     static uae_u32 get_handler_address(uae_u32 addr)
5302     {
5303     uae_u32 cl=cacheline(addr);
5304     blockinfo* bi=get_blockinfo_addr_new((void*)addr,0);
5305     return (uae_u32)&(bi->direct_handler_to_use);
5306     }
5307    
5308     static uae_u32 get_handler(uae_u32 addr)
5309     {
5310     uae_u32 cl=cacheline(addr);
5311     blockinfo* bi=get_blockinfo_addr_new((void*)addr,0);
5312     return (uae_u32)bi->direct_handler_to_use;
5313     }
5314    
5315     static void load_handler(int reg, uae_u32 addr)
5316     {
5317     mov_l_rm(reg,get_handler_address(addr));
5318     }
5319    
5320     /* This version assumes that it is writing *real* memory, and *will* fail
5321     * if that assumption is wrong! No branches, no second chances, just
5322     * straight go-for-it attitude */
5323    
5324     static void writemem_real(int address, int source, int offset, int size, int tmp, int clobber)
5325     {
5326     int f=tmp;
5327    
5328     if (clobber)
5329     f=source;
5330     switch(size) {
5331     case 1: mov_b_bRr(address,source,MEMBaseDiff); break;
5332     case 2: mov_w_rr(f,source); bswap_16(f); mov_w_bRr(address,f,MEMBaseDiff); break;
5333     case 4: mov_l_rr(f,source); bswap_32(f); mov_l_bRr(address,f,MEMBaseDiff); break;
5334     }
5335     forget_about(tmp);
5336     forget_about(f);
5337     }
5338    
5339     void writebyte(int address, int source, int tmp)
5340     {
5341     writemem_real(address,source,20,1,tmp,0);
5342     }
5343    
5344     static __inline__ void writeword_general(int address, int source, int tmp,
5345     int clobber)
5346     {
5347     writemem_real(address,source,16,2,tmp,clobber);
5348     }
5349    
5350     void writeword_clobber(int address, int source, int tmp)
5351     {
5352     writeword_general(address,source,tmp,1);
5353     }
5354    
5355     void writeword(int address, int source, int tmp)
5356     {
5357     writeword_general(address,source,tmp,0);
5358     }
5359    
5360     static __inline__ void writelong_general(int address, int source, int tmp,
5361     int clobber)
5362     {
5363     writemem_real(address,source,12,4,tmp,clobber);
5364     }
5365    
5366     void writelong_clobber(int address, int source, int tmp)
5367     {
5368     writelong_general(address,source,tmp,1);
5369     }
5370    
5371     void writelong(int address, int source, int tmp)
5372     {
5373     writelong_general(address,source,tmp,0);
5374     }
5375    
5376    
5377    
5378     /* This version assumes that it is reading *real* memory, and *will* fail
5379     * if that assumption is wrong! No branches, no second chances, just
5380     * straight go-for-it attitude */
5381    
5382     static void readmem_real(int address, int dest, int offset, int size, int tmp)
5383     {
5384     int f=tmp;
5385    
5386     if (size==4 && address!=dest)
5387     f=dest;
5388    
5389     switch(size) {
5390     case 1: mov_b_brR(dest,address,MEMBaseDiff); break;
5391     case 2: mov_w_brR(dest,address,MEMBaseDiff); bswap_16(dest); break;
5392     case 4: mov_l_brR(dest,address,MEMBaseDiff); bswap_32(dest); break;
5393     }
5394     forget_about(tmp);
5395     }
5396    
5397     void readbyte(int address, int dest, int tmp)
5398     {
5399     readmem_real(address,dest,8,1,tmp);
5400     }
5401    
5402     void readword(int address, int dest, int tmp)
5403     {
5404     readmem_real(address,dest,4,2,tmp);
5405     }
5406    
5407     void readlong(int address, int dest, int tmp)
5408     {
5409     readmem_real(address,dest,0,4,tmp);
5410     }
5411    
5412     void get_n_addr(int address, int dest, int tmp)
5413     {
5414     // a is the register containing the virtual address
5415     // after the offset had been fetched
5416     int a=tmp;
5417    
5418     // f is the register that will contain the offset
5419     int f=tmp;
5420    
5421     // a == f == tmp if (address == dest)
5422     if (address!=dest) {
5423     a=address;
5424     f=dest;
5425     }
5426    
5427     #if REAL_ADDRESSING
5428     mov_l_rr(dest, address);
5429     #elif DIRECT_ADDRESSING
5430     lea_l_brr(dest,address,MEMBaseDiff);
5431     #endif
5432     forget_about(tmp);
5433     }
5434    
5435     void get_n_addr_jmp(int address, int dest, int tmp)
5436     {
5437     /* For this, we need to get the same address as the rest of UAE
5438     would --- otherwise we end up translating everything twice */
5439     get_n_addr(address,dest,tmp);
5440     }
5441    
5442    
5443     /* base is a register, but dp is an actual value.
5444     target is a register, as is tmp */
5445     void calc_disp_ea_020(int base, uae_u32 dp, int target, int tmp)
5446     {
5447     int reg = (dp >> 12) & 15;
5448     int regd_shift=(dp >> 9) & 3;
5449    
5450     if (dp & 0x100) {
5451     int ignorebase=(dp&0x80);
5452     int ignorereg=(dp&0x40);
5453     int addbase=0;
5454     int outer=0;
5455    
5456     if ((dp & 0x30) == 0x20) addbase = (uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
5457     if ((dp & 0x30) == 0x30) addbase = comp_get_ilong((m68k_pc_offset+=4)-4);
5458    
5459     if ((dp & 0x3) == 0x2) outer = (uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
5460     if ((dp & 0x3) == 0x3) outer = comp_get_ilong((m68k_pc_offset+=4)-4);
5461    
5462     if ((dp & 0x4) == 0) { /* add regd *before* the get_long */
5463     if (!ignorereg) {
5464     if ((dp & 0x800) == 0)
5465     sign_extend_16_rr(target,reg);
5466     else
5467     mov_l_rr(target,reg);
5468     shll_l_ri(target,regd_shift);
5469     }
5470     else
5471     mov_l_ri(target,0);
5472    
5473     /* target is now regd */
5474     if (!ignorebase)
5475     add_l(target,base);
5476     add_l_ri(target,addbase);
5477     if (dp&0x03) readlong(target,target,tmp);
5478     } else { /* do the getlong first, then add regd */
5479     if (!ignorebase) {
5480     mov_l_rr(target,base);
5481     add_l_ri(target,addbase);
5482     }
5483     else
5484     mov_l_ri(target,addbase);
5485     if (dp&0x03) readlong(target,target,tmp);
5486    
5487     if (!ignorereg) {
5488     if ((dp & 0x800) == 0)
5489     sign_extend_16_rr(tmp,reg);
5490     else
5491     mov_l_rr(tmp,reg);
5492     shll_l_ri(tmp,regd_shift);
5493     /* tmp is now regd */
5494     add_l(target,tmp);
5495     }
5496     }
5497     add_l_ri(target,outer);
5498     }
5499     else { /* 68000 version */
5500     if ((dp & 0x800) == 0) { /* Sign extend */
5501     sign_extend_16_rr(target,reg);
5502     lea_l_brr_indexed(target,base,target,1<<regd_shift,(uae_s32)((uae_s8)dp));
5503     }
5504     else {
5505     lea_l_brr_indexed(target,base,reg,1<<regd_shift,(uae_s32)((uae_s8)dp));
5506     }
5507     }
5508     forget_about(tmp);
5509     }
5510    
5511    
5512    
5513    
5514    
5515     void set_cache_state(int enabled)
5516     {
5517     if (enabled!=letit)
5518     flush_icache_hard(77);
5519     letit=enabled;
5520     }
5521    
5522     int get_cache_state(void)
5523     {
5524     return letit;
5525     }
5526    
5527     uae_u32 get_jitted_size(void)
5528     {
5529     if (compiled_code)
5530     return current_compile_p-compiled_code;
5531     return 0;
5532     }
5533    
5534 gbeauche 1.20 const int CODE_ALLOC_MAX_ATTEMPTS = 10;
5535     const int CODE_ALLOC_BOUNDARIES = 128 * 1024; // 128 KB
5536    
5537     static uint8 *do_alloc_code(uint32 size, int depth)
5538     {
5539     #if defined(__linux__) && 0
5540     /*
5541     This is a really awful hack that is known to work on Linux at
5542     least.
5543    
5544     The trick here is to make sure the allocated cache is nearby
5545     code segment, and more precisely in the positive half of a
5546     32-bit address space. i.e. addr < 0x80000000. Actually, it
5547     turned out that a 32-bit binary run on AMD64 yields a cache
5548     allocated around 0xa0000000, thus causing some troubles when
5549     translating addresses from m68k to x86.
5550     */
5551     static uint8 * code_base = NULL;
5552     if (code_base == NULL) {
5553     uintptr page_size = getpagesize();
5554     uintptr boundaries = CODE_ALLOC_BOUNDARIES;
5555     if (boundaries < page_size)
5556     boundaries = page_size;
5557     code_base = (uint8 *)sbrk(0);
5558     for (int attempts = 0; attempts < CODE_ALLOC_MAX_ATTEMPTS; attempts++) {
5559     if (vm_acquire_fixed(code_base, size) == 0) {
5560     uint8 *code = code_base;
5561     code_base += size;
5562     return code;
5563     }
5564     code_base += boundaries;
5565     }
5566     return NULL;
5567     }
5568    
5569     if (vm_acquire_fixed(code_base, size) == 0) {
5570     uint8 *code = code_base;
5571     code_base += size;
5572     return code;
5573     }
5574    
5575     if (depth >= CODE_ALLOC_MAX_ATTEMPTS)
5576     return NULL;
5577    
5578     return do_alloc_code(size, depth + 1);
5579     #else
5580     uint8 *code = (uint8 *)vm_acquire(size);
5581     return code == VM_MAP_FAILED ? NULL : code;
5582     #endif
5583     }
5584    
5585     static inline uint8 *alloc_code(uint32 size)
5586     {
5587     return do_alloc_code(size, 0);
5588     }
5589    
5590 gbeauche 1.1 void alloc_cache(void)
5591     {
5592     if (compiled_code) {
5593     flush_icache_hard(6);
5594 gbeauche 1.3 vm_release(compiled_code, cache_size * 1024);
5595 gbeauche 1.1 compiled_code = 0;
5596     }
5597    
5598     if (cache_size == 0)
5599     return;
5600    
5601     while (!compiled_code && cache_size) {
5602 gbeauche 1.20 if ((compiled_code = alloc_code(cache_size * 1024)) == NULL) {
5603 gbeauche 1.1 compiled_code = 0;
5604     cache_size /= 2;
5605     }
5606     }
5607 gbeauche 1.2 vm_protect(compiled_code, cache_size, VM_PAGE_READ | VM_PAGE_WRITE | VM_PAGE_EXECUTE);
5608 gbeauche 1.1
5609     if (compiled_code) {
5610     write_log("<JIT compiler> : actual translation cache size : %d KB at 0x%08X\n", cache_size, compiled_code);
5611     max_compile_start = compiled_code + cache_size*1024 - BYTES_PER_INST;
5612     current_compile_p = compiled_code;
5613     current_cache_size = 0;
5614     }
5615     }
5616    
5617    
5618    
5619 gbeauche 1.13 extern void op_illg_1 (uae_u32 opcode) REGPARAM;
5620 gbeauche 1.1
5621 gbeauche 1.8 static void calc_checksum(blockinfo* bi, uae_u32* c1, uae_u32* c2)
5622 gbeauche 1.1 {
5623 gbeauche 1.8 uae_u32 k1 = 0;
5624     uae_u32 k2 = 0;
5625    
5626     #if USE_CHECKSUM_INFO
5627     checksum_info *csi = bi->csi;
5628     Dif(!csi) abort();
5629     while (csi) {
5630     uae_s32 len = csi->length;
5631     uae_u32 tmp = (uae_u32)csi->start_p;
5632     #else
5633     uae_s32 len = bi->len;
5634     uae_u32 tmp = (uae_u32)bi->min_pcp;
5635     #endif
5636     uae_u32*pos;
5637 gbeauche 1.1
5638 gbeauche 1.8 len += (tmp & 3);
5639     tmp &= ~3;
5640     pos = (uae_u32 *)tmp;
5641    
5642     if (len >= 0 && len <= MAX_CHECKSUM_LEN) {
5643     while (len > 0) {
5644     k1 += *pos;
5645     k2 ^= *pos;
5646     pos++;
5647     len -= 4;
5648     }
5649     }
5650 gbeauche 1.1
5651 gbeauche 1.8 #if USE_CHECKSUM_INFO
5652     csi = csi->next;
5653 gbeauche 1.1 }
5654 gbeauche 1.8 #endif
5655    
5656     *c1 = k1;
5657     *c2 = k2;
5658 gbeauche 1.1 }
5659    
5660 gbeauche 1.8 #if 0
5661 gbeauche 1.7 static void show_checksum(CSI_TYPE* csi)
5662 gbeauche 1.1 {
5663     uae_u32 k1=0;
5664     uae_u32 k2=0;
5665 gbeauche 1.7 uae_s32 len=CSI_LENGTH(csi);
5666     uae_u32 tmp=(uae_u32)CSI_START_P(csi);
5667 gbeauche 1.1 uae_u32* pos;
5668    
5669     len+=(tmp&3);
5670     tmp&=(~3);
5671     pos=(uae_u32*)tmp;
5672    
5673     if (len<0 || len>MAX_CHECKSUM_LEN) {
5674     return;
5675     }
5676     else {
5677     while (len>0) {
5678     write_log("%08x ",*pos);
5679     pos++;
5680     len-=4;
5681     }
5682     write_log(" bla\n");
5683     }
5684     }
5685 gbeauche 1.8 #endif
5686 gbeauche 1.1
5687    
5688     int check_for_cache_miss(void)
5689     {
5690     blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5691    
5692     if (bi) {
5693     int cl=cacheline(regs.pc_p);
5694     if (bi!=cache_tags[cl+1].bi) {
5695     raise_in_cl_list(bi);
5696     return 1;
5697     }
5698     }
5699     return 0;
5700     }
5701    
5702    
5703     static void recompile_block(void)
5704     {
5705     /* An existing block's countdown code has expired. We need to make
5706     sure that execute_normal doesn't refuse to recompile due to a
5707     perceived cache miss... */
5708     blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5709    
5710     Dif (!bi)
5711     abort();
5712     raise_in_cl_list(bi);
5713     execute_normal();
5714     return;
5715     }
5716     static void cache_miss(void)
5717     {
5718     blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5719     uae_u32 cl=cacheline(regs.pc_p);
5720     blockinfo* bi2=get_blockinfo(cl);
5721    
5722     if (!bi) {
5723     execute_normal(); /* Compile this block now */
5724     return;
5725     }
5726     Dif (!bi2 || bi==bi2) {
5727     write_log("Unexplained cache miss %p %p\n",bi,bi2);
5728     abort();
5729     }
5730     raise_in_cl_list(bi);
5731     return;
5732     }
5733    
5734     static int called_check_checksum(blockinfo* bi);
5735    
5736     static inline int block_check_checksum(blockinfo* bi)
5737     {
5738     uae_u32 c1,c2;
5739 gbeauche 1.7 bool isgood;
5740 gbeauche 1.1
5741     if (bi->status!=BI_NEED_CHECK)
5742     return 1; /* This block is in a checked state */
5743    
5744     checksum_count++;
5745 gbeauche 1.7
5746 gbeauche 1.1 if (bi->c1 || bi->c2)
5747     calc_checksum(bi,&c1,&c2);
5748     else {
5749     c1=c2=1; /* Make sure it doesn't match */
5750 gbeauche 1.7 }
5751 gbeauche 1.1
5752     isgood=(c1==bi->c1 && c2==bi->c2);
5753 gbeauche 1.7
5754 gbeauche 1.1 if (isgood) {
5755     /* This block is still OK. So we reactivate. Of course, that
5756     means we have to move it into the needs-to-be-flushed list */
5757     bi->handler_to_use=bi->handler;
5758     set_dhtu(bi,bi->direct_handler);
5759     bi->status=BI_CHECKING;
5760     isgood=called_check_checksum(bi);
5761     }
5762     if (isgood) {
5763     /* write_log("reactivate %p/%p (%x %x/%x %x)\n",bi,bi->pc_p,
5764     c1,c2,bi->c1,bi->c2);*/
5765     remove_from_list(bi);
5766     add_to_active(bi);
5767     raise_in_cl_list(bi);
5768     bi->status=BI_ACTIVE;
5769     }
5770     else {
5771     /* This block actually changed. We need to invalidate it,
5772     and set it up to be recompiled */
5773     /* write_log("discard %p/%p (%x %x/%x %x)\n",bi,bi->pc_p,
5774     c1,c2,bi->c1,bi->c2); */
5775     invalidate_block(bi);
5776     raise_in_cl_list(bi);
5777     }
5778     return isgood;
5779     }
5780    
5781     static int called_check_checksum(blockinfo* bi)
5782     {
5783     dependency* x=bi->deplist;
5784     int isgood=1;
5785     int i;
5786    
5787     for (i=0;i<2 && isgood;i++) {
5788     if (bi->dep[i].jmp_off) {
5789     isgood=block_check_checksum(bi->dep[i].target);
5790     }
5791     }
5792     return isgood;
5793     }
5794    
5795     static void check_checksum(void)
5796     {
5797     blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5798     uae_u32 cl=cacheline(regs.pc_p);
5799     blockinfo* bi2=get_blockinfo(cl);
5800    
5801     /* These are not the droids you are looking for... */
5802     if (!bi) {
5803     /* Whoever is the primary target is in a dormant state, but
5804     calling it was accidental, and we should just compile this
5805     new block */
5806     execute_normal();
5807     return;
5808     }
5809     if (bi!=bi2) {
5810     /* The block was hit accidentally, but it does exist. Cache miss */
5811     cache_miss();
5812     return;
5813     }
5814    
5815     if (!block_check_checksum(bi))
5816     execute_normal();
5817     }
5818    
5819     static __inline__ void match_states(blockinfo* bi)
5820     {
5821     int i;
5822     smallstate* s=&(bi->env);
5823    
5824     if (bi->status==BI_NEED_CHECK) {
5825     block_check_checksum(bi);
5826     }
5827     if (bi->status==BI_ACTIVE ||
5828     bi->status==BI_FINALIZING) { /* Deal with the *promises* the
5829     block makes (about not using
5830     certain vregs) */
5831     for (i=0;i<16;i++) {
5832     if (s->virt[i]==L_UNNEEDED) {
5833     // write_log("unneeded reg %d at %p\n",i,target);
5834     COMPCALL(forget_about)(i); // FIXME
5835     }
5836     }
5837     }
5838     flush(1);
5839    
5840     /* And now deal with the *demands* the block makes */
5841     for (i=0;i<N_REGS;i++) {
5842     int v=s->nat[i];
5843     if (v>=0) {
5844     // printf("Loading reg %d into %d at %p\n",v,i,target);
5845     readreg_specific(v,4,i);
5846     // do_load_reg(i,v);
5847     // setlock(i);
5848     }
5849     }
5850     for (i=0;i<N_REGS;i++) {
5851     int v=s->nat[i];
5852     if (v>=0) {
5853     unlock2(i);
5854     }
5855     }
5856     }
5857    
5858     static uae_u8 popallspace[1024]; /* That should be enough space */
5859    
5860     static __inline__ void create_popalls(void)
5861     {
5862     int i,r;
5863    
5864     current_compile_p=popallspace;
5865     set_target(current_compile_p);
5866     #if USE_PUSH_POP
5867     /* If we can't use gcc inline assembly, we need to pop some
5868     registers before jumping back to the various get-out routines.
5869     This generates the code for it.
5870     */
5871 gbeauche 1.5 align_target(align_jumps);
5872     popall_do_nothing=get_target();
5873 gbeauche 1.1 for (i=0;i<N_REGS;i++) {
5874     if (need_to_preserve[i])
5875     raw_pop_l_r(i);
5876     }
5877     raw_jmp((uae_u32)do_nothing);
5878    
5879 gbeauche 1.5 align_target(align_jumps);
5880 gbeauche 1.1 popall_execute_normal=get_target();
5881     for (i=0;i<N_REGS;i++) {
5882     if (need_to_preserve[i])
5883     raw_pop_l_r(i);
5884     }
5885     raw_jmp((uae_u32)execute_normal);
5886    
5887 gbeauche 1.5 align_target(align_jumps);
5888 gbeauche 1.1 popall_cache_miss=get_target();
5889     for (i=0;i<N_REGS;i++) {
5890     if (need_to_preserve[i])
5891     raw_pop_l_r(i);
5892     }
5893     raw_jmp((uae_u32)cache_miss);
5894    
5895 gbeauche 1.5 align_target(align_jumps);
5896 gbeauche 1.1 popall_recompile_block=get_target();
5897     for (i=0;i<N_REGS;i++) {
5898     if (need_to_preserve[i])
5899     raw_pop_l_r(i);
5900     }
5901     raw_jmp((uae_u32)recompile_block);
5902 gbeauche 1.5
5903     align_target(align_jumps);
5904 gbeauche 1.1 popall_exec_nostats=get_target();
5905     for (i=0;i<N_REGS;i++) {
5906     if (need_to_preserve[i])
5907     raw_pop_l_r(i);
5908     }
5909     raw_jmp((uae_u32)exec_nostats);
5910 gbeauche 1.5
5911     align_target(align_jumps);
5912 gbeauche 1.1 popall_check_checksum=get_target();
5913     for (i=0;i<N_REGS;i++) {
5914     if (need_to_preserve[i])
5915     raw_pop_l_r(i);
5916     }
5917     raw_jmp((uae_u32)check_checksum);
5918 gbeauche 1.5
5919     align_target(align_jumps);
5920 gbeauche 1.1 current_compile_p=get_target();
5921     #else
5922     popall_exec_nostats=(void *)exec_nostats;
5923     popall_execute_normal=(void *)execute_normal;
5924     popall_cache_miss=(void *)cache_miss;
5925     popall_recompile_block=(void *)recompile_block;
5926     popall_do_nothing=(void *)do_nothing;
5927     popall_check_checksum=(void *)check_checksum;
5928     #endif
5929    
5930     /* And now, the code to do the matching pushes and then jump
5931     into a handler routine */
5932     pushall_call_handler=get_target();
5933     #if USE_PUSH_POP
5934     for (i=N_REGS;i--;) {
5935     if (need_to_preserve[i])
5936     raw_push_l_r(i);
5937     }
5938     #endif
5939     r=REG_PC_TMP;
5940     raw_mov_l_rm(r,(uae_u32)&regs.pc_p);
5941     raw_and_l_ri(r,TAGMASK);
5942     raw_jmp_m_indexed((uae_u32)cache_tags,r,4);
5943 gbeauche 1.6
5944     #ifdef X86_ASSEMBLY
5945     align_target(align_jumps);
5946     m68k_compile_execute = (void (*)(void))get_target();
5947     for (i=N_REGS;i--;) {
5948     if (need_to_preserve[i])
5949     raw_push_l_r(i);
5950     }
5951     align_target(align_loops);
5952     uae_u32 dispatch_loop = (uae_u32)get_target();
5953     r=REG_PC_TMP;
5954     raw_mov_l_rm(r,(uae_u32)&regs.pc_p);
5955     raw_and_l_ri(r,TAGMASK);
5956     raw_call_m_indexed((uae_u32)cache_tags,r,4);
5957     raw_cmp_l_mi((uae_u32)&regs.spcflags,0);
5958     raw_jcc_b_oponly(NATIVE_CC_EQ);
5959     emit_byte(dispatch_loop-((uae_u32)get_target()+1));
5960     raw_call((uae_u32)m68k_do_specialties);
5961     raw_test_l_rr(REG_RESULT,REG_RESULT);
5962     raw_jcc_b_oponly(NATIVE_CC_EQ);
5963     emit_byte(dispatch_loop-((uae_u32)get_target()+1));
5964     raw_cmp_b_mi((uae_u32)&quit_program,0);
5965     raw_jcc_b_oponly(NATIVE_CC_EQ);
5966     emit_byte(dispatch_loop-((uae_u32)get_target()+1));
5967     for (i=0;i<N_REGS;i++) {
5968     if (need_to_preserve[i])
5969     raw_pop_l_r(i);
5970     }
5971     raw_ret();
5972     #endif
5973 gbeauche 1.1 }
5974    
5975     static __inline__ void reset_lists(void)
5976     {
5977     int i;
5978    
5979     for (i=0;i<MAX_HOLD_BI;i++)
5980     hold_bi[i]=NULL;
5981     active=NULL;
5982     dormant=NULL;
5983     }
5984    
5985     static void prepare_block(blockinfo* bi)
5986     {
5987     int i;
5988    
5989     set_target(current_compile_p);
5990 gbeauche 1.5 align_target(align_jumps);
5991 gbeauche 1.1 bi->direct_pen=(cpuop_func *)get_target();
5992     raw_mov_l_rm(0,(uae_u32)&(bi->pc_p));
5993     raw_mov_l_mr((uae_u32)&regs.pc_p,0);
5994     raw_jmp((uae_u32)popall_execute_normal);
5995    
5996 gbeauche 1.5 align_target(align_jumps);
5997 gbeauche 1.1 bi->direct_pcc=(cpuop_func *)get_target();
5998     raw_mov_l_rm(0,(uae_u32)&(bi->pc_p));
5999     raw_mov_l_mr((uae_u32)&regs.pc_p,0);
6000     raw_jmp((uae_u32)popall_check_checksum);
6001     current_compile_p=get_target();
6002    
6003     bi->deplist=NULL;
6004     for (i=0;i<2;i++) {
6005     bi->dep[i].prev_p=NULL;
6006     bi->dep[i].next=NULL;
6007     }
6008     bi->env=default_ss;
6009     bi->status=BI_INVALID;
6010     bi->havestate=0;
6011     //bi->env=empty_ss;
6012     }
6013    
6014 gbeauche 1.17 static bool avoid_opcode(uae_u32 opcode)
6015     {
6016     #if JIT_DEBUG
6017     struct instr *dp = &table68k[opcode];
6018     // filter opcodes per type, integral value, or whatever
6019     #endif
6020     return false;
6021     }
6022    
6023 gbeauche 1.1 void build_comp(void)
6024     {
6025     int i;
6026     int jumpcount=0;
6027     unsigned long opcode;
6028     struct comptbl* tbl=op_smalltbl_0_comp_ff;
6029     struct comptbl* nftbl=op_smalltbl_0_comp_nf;
6030     int count;
6031     int cpu_level = 0; // 68000 (default)
6032     if (CPUType == 4)
6033     cpu_level = 4; // 68040 with FPU
6034     else {
6035     if (FPUType)
6036     cpu_level = 3; // 68020 with FPU
6037     else if (CPUType >= 2)
6038     cpu_level = 2; // 68020
6039     else if (CPUType == 1)
6040     cpu_level = 1;
6041     }
6042     struct cputbl *nfctbl = (
6043     cpu_level == 4 ? op_smalltbl_0_nf
6044     : cpu_level == 3 ? op_smalltbl_1_nf
6045     : cpu_level == 2 ? op_smalltbl_2_nf
6046     : cpu_level == 1 ? op_smalltbl_3_nf
6047     : op_smalltbl_4_nf);
6048    
6049     write_log ("<JIT compiler> : building compiler function tables\n");
6050    
6051     for (opcode = 0; opcode < 65536; opcode++) {
6052     nfcpufunctbl[opcode] = op_illg_1;
6053     compfunctbl[opcode] = NULL;
6054     nfcompfunctbl[opcode] = NULL;
6055     prop[opcode].use_flags = 0x1f;
6056     prop[opcode].set_flags = 0x1f;
6057     prop[opcode].cflow = fl_trap; // ILLEGAL instructions do trap
6058     }
6059    
6060     for (i = 0; tbl[i].opcode < 65536; i++) {
6061     int cflow = table68k[tbl[i].opcode].cflow;
6062 gbeauche 1.10 if (USE_INLINING && ((cflow & fl_const_jump) != 0))
6063     cflow = fl_const_jump;
6064 gbeauche 1.8 else
6065 gbeauche 1.10 cflow &= ~fl_const_jump;
6066     prop[cft_map(tbl[i].opcode)].cflow = cflow;
6067 gbeauche 1.1
6068     int uses_fpu = tbl[i].specific & 32;
6069 gbeauche 1.17 if ((uses_fpu && avoid_fpu) || avoid_opcode(tbl[i].opcode))
6070 gbeauche 1.1 compfunctbl[cft_map(tbl[i].opcode)] = NULL;
6071     else
6072     compfunctbl[cft_map(tbl[i].opcode)] = tbl[i].handler;
6073     }
6074 gbeauche 1.8
6075 gbeauche 1.1 for (i = 0; nftbl[i].opcode < 65536; i++) {
6076     int uses_fpu = tbl[i].specific & 32;
6077 gbeauche 1.17 if ((uses_fpu && avoid_fpu) || avoid_opcode(nftbl[i].opcode))
6078 gbeauche 1.1 nfcompfunctbl[cft_map(nftbl[i].opcode)] = NULL;
6079     else
6080     nfcompfunctbl[cft_map(nftbl[i].opcode)] = nftbl[i].handler;
6081    
6082     nfcpufunctbl[cft_map(nftbl[i].opcode)] = nfctbl[i].handler;
6083     }
6084    
6085     for (i = 0; nfctbl[i].handler; i++) {
6086     nfcpufunctbl[cft_map(nfctbl[i].opcode)] = nfctbl[i].handler;
6087     }
6088    
6089     for (opcode = 0; opcode < 65536; opcode++) {
6090     compop_func *f;
6091     compop_func *nff;
6092     cpuop_func *nfcf;
6093     int isaddx,cflow;
6094    
6095     if (table68k[opcode].mnemo == i_ILLG || table68k[opcode].clev > cpu_level)
6096     continue;
6097    
6098     if (table68k[opcode].handler != -1) {
6099     f = compfunctbl[cft_map(table68k[opcode].handler)];
6100     nff = nfcompfunctbl[cft_map(table68k[opcode].handler)];
6101     nfcf = nfcpufunctbl[cft_map(table68k[opcode].handler)];
6102     cflow = prop[cft_map(table68k[opcode].handler)].cflow;
6103     isaddx = prop[cft_map(table68k[opcode].handler)].is_addx;
6104     prop[cft_map(opcode)].cflow = cflow;
6105     prop[cft_map(opcode)].is_addx = isaddx;
6106     compfunctbl[cft_map(opcode)] = f;
6107     nfcompfunctbl[cft_map(opcode)] = nff;
6108     Dif (nfcf == op_illg_1)
6109     abort();
6110     nfcpufunctbl[cft_map(opcode)] = nfcf;
6111     }
6112     prop[cft_map(opcode)].set_flags = table68k[opcode].flagdead;
6113     prop[cft_map(opcode)].use_flags = table68k[opcode].flaglive;
6114     }
6115     for (i = 0; nfctbl[i].handler != NULL; i++) {
6116     if (nfctbl[i].specific)
6117     nfcpufunctbl[cft_map(tbl[i].opcode)] = nfctbl[i].handler;
6118     }
6119    
6120     count=0;
6121     for (opcode = 0; opcode < 65536; opcode++) {
6122     if (compfunctbl[cft_map(opcode)])
6123     count++;
6124     }
6125     write_log("<JIT compiler> : supposedly %d compileable opcodes!\n",count);
6126    
6127     /* Initialise state */
6128     create_popalls();
6129     alloc_cache();
6130     reset_lists();
6131    
6132     for (i=0;i<TAGSIZE;i+=2) {
6133     cache_tags[i].handler=(cpuop_func *)popall_execute_normal;
6134     cache_tags[i+1].bi=NULL;
6135     }
6136    
6137     #if 0
6138     for (i=0;i<N_REGS;i++) {
6139     empty_ss.nat[i].holds=-1;
6140     empty_ss.nat[i].validsize=0;
6141     empty_ss.nat[i].dirtysize=0;
6142     }
6143     #endif
6144     for (i=0;i<VREGS;i++) {
6145     empty_ss.virt[i]=L_NEEDED;
6146     }
6147     for (i=0;i<N_REGS;i++) {
6148     empty_ss.nat[i]=L_UNKNOWN;
6149     }
6150     default_ss=empty_ss;
6151     }
6152    
6153    
6154     static void flush_icache_none(int n)
6155     {
6156     /* Nothing to do. */
6157     }
6158    
6159     static void flush_icache_hard(int n)
6160     {
6161     uae_u32 i;
6162     blockinfo* bi, *dbi;
6163    
6164     hard_flush_count++;
6165     #if 0
6166     write_log("Flush Icache_hard(%d/%x/%p), %u KB\n",
6167     n,regs.pc,regs.pc_p,current_cache_size/1024);
6168     current_cache_size = 0;
6169     #endif
6170     bi=active;
6171     while(bi) {
6172     cache_tags[cacheline(bi->pc_p)].handler=(cpuop_func *)popall_execute_normal;
6173     cache_tags[cacheline(bi->pc_p)+1].bi=NULL;
6174     dbi=bi; bi=bi->next;
6175     free_blockinfo(dbi);
6176     }
6177     bi=dormant;
6178     while(bi) {
6179     cache_tags[cacheline(bi->pc_p)].handler=(cpuop_func *)popall_execute_normal;
6180     cache_tags[cacheline(bi->pc_p)+1].bi=NULL;
6181     dbi=bi; bi=bi->next;
6182     free_blockinfo(dbi);
6183     }
6184    
6185     reset_lists();
6186     if (!compiled_code)
6187     return;
6188     current_compile_p=compiled_code;
6189     SPCFLAGS_SET( SPCFLAG_JIT_EXEC_RETURN ); /* To get out of compiled code */
6190     }
6191    
6192    
6193     /* "Soft flushing" --- instead of actually throwing everything away,
6194     we simply mark everything as "needs to be checked".
6195     */
6196    
6197     static inline void flush_icache_lazy(int n)
6198     {
6199     uae_u32 i;
6200     blockinfo* bi;
6201     blockinfo* bi2;
6202    
6203     soft_flush_count++;
6204     if (!active)
6205     return;
6206    
6207     bi=active;
6208     while (bi) {
6209     uae_u32 cl=cacheline(bi->pc_p);
6210     if (bi->status==BI_INVALID ||
6211     bi->status==BI_NEED_RECOMP) {
6212     if (bi==cache_tags[cl+1].bi)
6213     cache_tags[cl].handler=(cpuop_func *)popall_execute_normal;
6214     bi->handler_to_use=(cpuop_func *)popall_execute_normal;
6215     set_dhtu(bi,bi->direct_pen);
6216     bi->status=BI_INVALID;
6217     }
6218     else {
6219     if (bi==cache_tags[cl+1].bi)
6220     cache_tags[cl].handler=(cpuop_func *)popall_check_checksum;
6221     bi->handler_to_use=(cpuop_func *)popall_check_checksum;
6222     set_dhtu(bi,bi->direct_pcc);
6223     bi->status=BI_NEED_CHECK;
6224     }
6225     bi2=bi;
6226     bi=bi->next;
6227     }
6228     /* bi2 is now the last entry in the active list */
6229     bi2->next=dormant;
6230     if (dormant)
6231     dormant->prev_p=&(bi2->next);
6232    
6233     dormant=active;
6234     active->prev_p=&dormant;
6235     active=NULL;
6236     }
6237    
6238     static void catastrophe(void)
6239     {
6240     abort();
6241     }
6242    
6243     int failure;
6244    
6245     #define TARGET_M68K 0
6246     #define TARGET_POWERPC 1
6247     #define TARGET_X86 2
6248     #if defined(i386) || defined(__i386__)
6249     #define TARGET_NATIVE TARGET_X86
6250     #endif
6251     #if defined(powerpc) || defined(__powerpc__)
6252     #define TARGET_NATIVE TARGET_POWERPC
6253     #endif
6254    
6255     #ifdef ENABLE_MON
6256     static uae_u32 mon_read_byte_jit(uae_u32 addr)
6257     {
6258     uae_u8 *m = (uae_u8 *)addr;
6259     return (uae_u32)(*m);
6260     }
6261    
6262     static void mon_write_byte_jit(uae_u32 addr, uae_u32 b)
6263     {
6264     uae_u8 *m = (uae_u8 *)addr;
6265     *m = b;
6266     }
6267     #endif
6268    
6269     void disasm_block(int target, uint8 * start, size_t length)
6270     {
6271     if (!JITDebug)
6272     return;
6273    
6274     #if defined(JIT_DEBUG) && defined(ENABLE_MON)
6275     char disasm_str[200];
6276     sprintf(disasm_str, "%s $%x $%x",
6277     target == TARGET_M68K ? "d68" :
6278     target == TARGET_X86 ? "d86" :
6279     target == TARGET_POWERPC ? "d" : "x",
6280     start, start + length - 1);
6281    
6282     uae_u32 (*old_mon_read_byte)(uae_u32) = mon_read_byte;
6283     void (*old_mon_write_byte)(uae_u32, uae_u32) = mon_write_byte;
6284    
6285     mon_read_byte = mon_read_byte_jit;
6286     mon_write_byte = mon_write_byte_jit;
6287    
6288     char *arg[5] = {"mon", "-m", "-r", disasm_str, NULL};
6289     mon(4, arg);
6290    
6291     mon_read_byte = old_mon_read_byte;
6292     mon_write_byte = old_mon_write_byte;
6293     #endif
6294     }
6295    
6296     static inline void disasm_native_block(uint8 *start, size_t length)
6297     {
6298     disasm_block(TARGET_NATIVE, start, length);
6299     }
6300    
6301     static inline void disasm_m68k_block(uint8 *start, size_t length)
6302     {
6303     disasm_block(TARGET_M68K, start, length);
6304     }
6305    
6306     #ifdef HAVE_GET_WORD_UNSWAPPED
6307     # define DO_GET_OPCODE(a) (do_get_mem_word_unswapped((uae_u16 *)(a)))
6308     #else
6309     # define DO_GET_OPCODE(a) (do_get_mem_word((uae_u16 *)(a)))
6310     #endif
6311    
6312     #if JIT_DEBUG
6313     static uae_u8 *last_regs_pc_p = 0;
6314     static uae_u8 *last_compiled_block_addr = 0;
6315    
6316     void compiler_dumpstate(void)
6317     {
6318     if (!JITDebug)
6319     return;
6320    
6321     write_log("### Host addresses\n");
6322     write_log("MEM_BASE : %x\n", MEMBaseDiff);
6323     write_log("PC_P : %p\n", &regs.pc_p);
6324     write_log("SPCFLAGS : %p\n", &regs.spcflags);
6325     write_log("D0-D7 : %p-%p\n", &regs.regs[0], &regs.regs[7]);
6326     write_log("A0-A7 : %p-%p\n", &regs.regs[8], &regs.regs[15]);
6327     write_log("\n");
6328    
6329     write_log("### M68k processor state\n");
6330     m68k_dumpstate(0);
6331     write_log("\n");
6332    
6333     write_log("### Block in Mac address space\n");
6334     write_log("M68K block : %p\n",
6335     (void *)get_virtual_address(last_regs_pc_p));
6336     write_log("Native block : %p (%d bytes)\n",
6337     (void *)get_virtual_address(last_compiled_block_addr),
6338     get_blockinfo_addr(last_regs_pc_p)->direct_handler_size);
6339     write_log("\n");
6340     }
6341     #endif
6342    
6343     static void compile_block(cpu_history* pc_hist, int blocklen)
6344     {
6345     if (letit && compiled_code) {
6346     #if PROFILE_COMPILE_TIME
6347     compile_count++;
6348     clock_t start_time = clock();
6349     #endif
6350     #if JIT_DEBUG
6351     bool disasm_block = false;
6352     #endif
6353    
6354     /* OK, here we need to 'compile' a block */
6355     int i;
6356     int r;
6357     int was_comp=0;
6358     uae_u8 liveflags[MAXRUN+1];
6359 gbeauche 1.8 #if USE_CHECKSUM_INFO
6360     bool trace_in_rom = isinrom((uintptr)pc_hist[0].location);
6361     uae_u32 max_pcp=(uae_u32)pc_hist[blocklen - 1].location;
6362     uae_u32 min_pcp=max_pcp;
6363     #else
6364 gbeauche 1.1 uae_u32 max_pcp=(uae_u32)pc_hist[0].location;
6365     uae_u32 min_pcp=max_pcp;
6366 gbeauche 1.8 #endif
6367 gbeauche 1.1 uae_u32 cl=cacheline(pc_hist[0].location);
6368     void* specflags=(void*)&regs.spcflags;
6369     blockinfo* bi=NULL;
6370     blockinfo* bi2;
6371     int extra_len=0;
6372    
6373     redo_current_block=0;
6374     if (current_compile_p>=max_compile_start)
6375     flush_icache_hard(7);
6376    
6377     alloc_blockinfos();
6378    
6379     bi=get_blockinfo_addr_new(pc_hist[0].location,0);
6380     bi2=get_blockinfo(cl);
6381    
6382     optlev=bi->optlevel;
6383     if (bi->status!=BI_INVALID) {
6384     Dif (bi!=bi2) {
6385     /* I don't think it can happen anymore. Shouldn't, in
6386     any case. So let's make sure... */
6387     write_log("WOOOWOO count=%d, ol=%d %p %p\n",
6388     bi->count,bi->optlevel,bi->handler_to_use,
6389     cache_tags[cl].handler);
6390     abort();
6391     }
6392    
6393     Dif (bi->count!=-1 && bi->status!=BI_NEED_RECOMP) {
6394     write_log("bi->count=%d, bi->status=%d\n",bi->count,bi->status);
6395     /* What the heck? We are not supposed to be here! */
6396     abort();
6397     }
6398     }
6399     if (bi->count==-1) {
6400     optlev++;
6401     while (!optcount[optlev])
6402     optlev++;
6403     bi->count=optcount[optlev]-1;
6404     }
6405     current_block_pc_p=(uae_u32)pc_hist[0].location;
6406    
6407     remove_deps(bi); /* We are about to create new code */
6408     bi->optlevel=optlev;
6409     bi->pc_p=(uae_u8*)pc_hist[0].location;
6410 gbeauche 1.8 #if USE_CHECKSUM_INFO
6411     free_checksum_info_chain(bi->csi);
6412     bi->csi = NULL;
6413     #endif
6414 gbeauche 1.1
6415     liveflags[blocklen]=0x1f; /* All flags needed afterwards */
6416     i=blocklen;
6417     while (i--) {
6418     uae_u16* currpcp=pc_hist[i].location;
6419     uae_u32 op=DO_GET_OPCODE(currpcp);
6420    
6421 gbeauche 1.8 #if USE_CHECKSUM_INFO
6422     trace_in_rom = trace_in_rom && isinrom((uintptr)currpcp);
6423     #if USE_INLINING
6424     if (is_const_jump(op)) {
6425     checksum_info *csi = alloc_checksum_info();
6426     csi->start_p = (uae_u8 *)min_pcp;
6427     csi->length = max_pcp - min_pcp + LONGEST_68K_INST;
6428     csi->next = bi->csi;
6429     bi->csi = csi;
6430     max_pcp = (uae_u32)currpcp;
6431     }
6432     #endif
6433     min_pcp = (uae_u32)currpcp;
6434     #else
6435 gbeauche 1.1 if ((uae_u32)currpcp<min_pcp)
6436     min_pcp=(uae_u32)currpcp;
6437     if ((uae_u32)currpcp>max_pcp)
6438     max_pcp=(uae_u32)currpcp;
6439 gbeauche 1.8 #endif
6440 gbeauche 1.1
6441     liveflags[i]=((liveflags[i+1]&
6442     (~prop[op].set_flags))|
6443     prop[op].use_flags);
6444     if (prop[op].is_addx && (liveflags[i+1]&FLAG_Z)==0)
6445     liveflags[i]&= ~FLAG_Z;
6446     }
6447    
6448 gbeauche 1.8 #if USE_CHECKSUM_INFO
6449     checksum_info *csi = alloc_checksum_info();
6450     csi->start_p = (uae_u8 *)min_pcp;
6451     csi->length = max_pcp - min_pcp + LONGEST_68K_INST;
6452     csi->next = bi->csi;
6453     bi->csi = csi;
6454     #endif
6455    
6456 gbeauche 1.1 bi->needed_flags=liveflags[0];
6457    
6458 gbeauche 1.5 align_target(align_loops);
6459 gbeauche 1.1 was_comp=0;
6460    
6461     bi->direct_handler=(cpuop_func *)get_target();
6462     set_dhtu(bi,bi->direct_handler);
6463     bi->status=BI_COMPILING;
6464     current_block_start_target=(uae_u32)get_target();
6465    
6466     log_startblock();
6467    
6468     if (bi->count>=0) { /* Need to generate countdown code */
6469     raw_mov_l_mi((uae_u32)&regs.pc_p,(uae_u32)pc_hist[0].location);
6470     raw_sub_l_mi((uae_u32)&(bi->count),1);
6471     raw_jl((uae_u32)popall_recompile_block);
6472     }
6473     if (optlev==0) { /* No need to actually translate */
6474     /* Execute normally without keeping stats */
6475     raw_mov_l_mi((uae_u32)&regs.pc_p,(uae_u32)pc_hist[0].location);
6476     raw_jmp((uae_u32)popall_exec_nostats);
6477     }
6478     else {
6479     reg_alloc_run=0;
6480     next_pc_p=0;
6481     taken_pc_p=0;
6482     branch_cc=0;
6483    
6484     comp_pc_p=(uae_u8*)pc_hist[0].location;
6485     init_comp();
6486     was_comp=1;
6487    
6488     #if JIT_DEBUG
6489     if (JITDebug) {
6490     raw_mov_l_mi((uae_u32)&last_regs_pc_p,(uae_u32)pc_hist[0].location);
6491     raw_mov_l_mi((uae_u32)&last_compiled_block_addr,(uae_u32)current_block_start_target);
6492     }
6493     #endif
6494    
6495     for (i=0;i<blocklen &&
6496     get_target_noopt()<max_compile_start;i++) {
6497     cpuop_func **cputbl;
6498     compop_func **comptbl;
6499     uae_u32 opcode=DO_GET_OPCODE(pc_hist[i].location);
6500     needed_flags=(liveflags[i+1] & prop[opcode].set_flags);
6501     if (!needed_flags) {
6502     cputbl=nfcpufunctbl;
6503     comptbl=nfcompfunctbl;
6504     }
6505     else {
6506     cputbl=cpufunctbl;
6507     comptbl=compfunctbl;
6508     }
6509    
6510     failure = 1; // gb-- defaults to failure state
6511     if (comptbl[opcode] && optlev>1) {
6512     failure=0;
6513     if (!was_comp) {
6514     comp_pc_p=(uae_u8*)pc_hist[i].location;
6515     init_comp();
6516     }
6517 gbeauche 1.18 was_comp=1;
6518 gbeauche 1.1
6519     comptbl[opcode](opcode);
6520     freescratch();
6521     if (!(liveflags[i+1] & FLAG_CZNV)) {
6522     /* We can forget about flags */
6523     dont_care_flags();
6524     }
6525     #if INDIVIDUAL_INST
6526     flush(1);
6527     nop();
6528     flush(1);
6529     was_comp=0;
6530     #endif
6531     }
6532    
6533     if (failure) {
6534     if (was_comp) {
6535     flush(1);
6536     was_comp=0;
6537     }
6538     raw_mov_l_ri(REG_PAR1,(uae_u32)opcode);
6539     #if USE_NORMAL_CALLING_CONVENTION
6540     raw_push_l_r(REG_PAR1);
6541     #endif
6542     raw_mov_l_mi((uae_u32)&regs.pc_p,
6543     (uae_u32)pc_hist[i].location);
6544     raw_call((uae_u32)cputbl[opcode]);
6545 gbeauche 1.9 #if PROFILE_UNTRANSLATED_INSNS
6546     // raw_cputbl_count[] is indexed with plain opcode (in m68k order)
6547     raw_add_l_mi((uae_u32)&raw_cputbl_count[cft_map(opcode)],1);
6548     #endif
6549 gbeauche 1.1 #if USE_NORMAL_CALLING_CONVENTION
6550     raw_inc_sp(4);
6551     #endif
6552    
6553     if (i < blocklen - 1) {
6554     uae_s8* branchadd;
6555    
6556     raw_mov_l_rm(0,(uae_u32)specflags);
6557     raw_test_l_rr(0,0);
6558     raw_jz_b_oponly();
6559     branchadd=(uae_s8 *)get_target();
6560     emit_byte(0);
6561     raw_jmp((uae_u32)popall_do_nothing);
6562     *branchadd=(uae_u32)get_target()-(uae_u32)branchadd-1;
6563     }
6564     }
6565     }
6566     #if 1 /* This isn't completely kosher yet; It really needs to be
6567     be integrated into a general inter-block-dependency scheme */
6568     if (next_pc_p && taken_pc_p &&
6569     was_comp && taken_pc_p==current_block_pc_p) {
6570     blockinfo* bi1=get_blockinfo_addr_new((void*)next_pc_p,0);
6571     blockinfo* bi2=get_blockinfo_addr_new((void*)taken_pc_p,0);
6572     uae_u8 x=bi1->needed_flags;
6573    
6574     if (x==0xff || 1) { /* To be on the safe side */
6575     uae_u16* next=(uae_u16*)next_pc_p;
6576     uae_u32 op=DO_GET_OPCODE(next);
6577    
6578     x=0x1f;
6579     x&=(~prop[op].set_flags);
6580     x|=prop[op].use_flags;
6581     }
6582    
6583     x|=bi2->needed_flags;
6584     if (!(x & FLAG_CZNV)) {
6585     /* We can forget about flags */
6586     dont_care_flags();
6587     extra_len+=2; /* The next instruction now is part of this
6588     block */
6589     }
6590    
6591     }
6592     #endif
6593     log_flush();
6594    
6595     if (next_pc_p) { /* A branch was registered */
6596     uae_u32 t1=next_pc_p;
6597     uae_u32 t2=taken_pc_p;
6598     int cc=branch_cc;
6599    
6600     uae_u32* branchadd;
6601     uae_u32* tba;
6602     bigstate tmp;
6603     blockinfo* tbi;
6604    
6605     if (taken_pc_p<next_pc_p) {
6606     /* backward branch. Optimize for the "taken" case ---
6607     which means the raw_jcc should fall through when
6608     the 68k branch is taken. */
6609     t1=taken_pc_p;
6610     t2=next_pc_p;
6611     cc=branch_cc^1;
6612     }
6613    
6614     tmp=live; /* ouch! This is big... */
6615     raw_jcc_l_oponly(cc);
6616     branchadd=(uae_u32*)get_target();
6617     emit_long(0);
6618    
6619     /* predicted outcome */
6620     tbi=get_blockinfo_addr_new((void*)t1,1);
6621     match_states(tbi);
6622     raw_cmp_l_mi((uae_u32)specflags,0);
6623     raw_jcc_l_oponly(4);
6624     tba=(uae_u32*)get_target();
6625     emit_long(get_handler(t1)-((uae_u32)tba+4));
6626     raw_mov_l_mi((uae_u32)&regs.pc_p,t1);
6627     raw_jmp((uae_u32)popall_do_nothing);
6628     create_jmpdep(bi,0,tba,t1);
6629    
6630 gbeauche 1.5 align_target(align_jumps);
6631 gbeauche 1.1 /* not-predicted outcome */
6632     *branchadd=(uae_u32)get_target()-((uae_u32)branchadd+4);
6633     live=tmp; /* Ouch again */
6634     tbi=get_blockinfo_addr_new((void*)t2,1);
6635     match_states(tbi);
6636    
6637     //flush(1); /* Can only get here if was_comp==1 */
6638     raw_cmp_l_mi((uae_u32)specflags,0);
6639     raw_jcc_l_oponly(4);
6640     tba=(uae_u32*)get_target();
6641     emit_long(get_handler(t2)-((uae_u32)tba+4));
6642     raw_mov_l_mi((uae_u32)&regs.pc_p,t2);
6643     raw_jmp((uae_u32)popall_do_nothing);
6644     create_jmpdep(bi,1,tba,t2);
6645     }
6646     else
6647     {
6648     if (was_comp) {
6649     flush(1);
6650     }
6651    
6652     /* Let's find out where next_handler is... */
6653     if (was_comp && isinreg(PC_P)) {
6654     r=live.state[PC_P].realreg;
6655     raw_and_l_ri(r,TAGMASK);
6656     int r2 = (r==0) ? 1 : 0;
6657     raw_mov_l_ri(r2,(uae_u32)popall_do_nothing);
6658     raw_cmp_l_mi((uae_u32)specflags,0);
6659     raw_cmov_l_rm_indexed(r2,(uae_u32)cache_tags,r,4,4);
6660     raw_jmp_r(r2);
6661     }
6662     else if (was_comp && isconst(PC_P)) {
6663     uae_u32 v=live.state[PC_P].val;
6664     uae_u32* tba;
6665     blockinfo* tbi;
6666    
6667     tbi=get_blockinfo_addr_new((void*)v,1);
6668     match_states(tbi);
6669    
6670     raw_cmp_l_mi((uae_u32)specflags,0);
6671     raw_jcc_l_oponly(4);
6672     tba=(uae_u32*)get_target();
6673     emit_long(get_handler(v)-((uae_u32)tba+4));
6674     raw_mov_l_mi((uae_u32)&regs.pc_p,v);
6675     raw_jmp((uae_u32)popall_do_nothing);
6676     create_jmpdep(bi,0,tba,v);
6677     }
6678     else {
6679     r=REG_PC_TMP;
6680     raw_mov_l_rm(r,(uae_u32)&regs.pc_p);
6681     raw_and_l_ri(r,TAGMASK);
6682     int r2 = (r==0) ? 1 : 0;
6683     raw_mov_l_ri(r2,(uae_u32)popall_do_nothing);
6684     raw_cmp_l_mi((uae_u32)specflags,0);
6685     raw_cmov_l_rm_indexed(r2,(uae_u32)cache_tags,r,4,4);
6686     raw_jmp_r(r2);
6687     }
6688     }
6689     }
6690    
6691     #if USE_MATCH
6692     if (callers_need_recompile(&live,&(bi->env))) {
6693     mark_callers_recompile(bi);
6694     }
6695    
6696     big_to_small_state(&live,&(bi->env));
6697     #endif
6698    
6699 gbeauche 1.8 #if USE_CHECKSUM_INFO
6700     remove_from_list(bi);
6701     if (trace_in_rom) {
6702     // No need to checksum that block trace on cache invalidation
6703     free_checksum_info_chain(bi->csi);
6704     bi->csi = NULL;
6705     add_to_dormant(bi);
6706     }
6707     else {
6708     calc_checksum(bi,&(bi->c1),&(bi->c2));
6709     add_to_active(bi);
6710     }
6711     #else
6712 gbeauche 1.1 if (next_pc_p+extra_len>=max_pcp &&
6713     next_pc_p+extra_len<max_pcp+LONGEST_68K_INST)
6714     max_pcp=next_pc_p+extra_len; /* extra_len covers flags magic */
6715     else
6716     max_pcp+=LONGEST_68K_INST;
6717 gbeauche 1.7
6718 gbeauche 1.1 bi->len=max_pcp-min_pcp;
6719     bi->min_pcp=min_pcp;
6720 gbeauche 1.7
6721 gbeauche 1.1 remove_from_list(bi);
6722     if (isinrom(min_pcp) && isinrom(max_pcp)) {
6723     add_to_dormant(bi); /* No need to checksum it on cache flush.
6724     Please don't start changing ROMs in
6725     flight! */
6726     }
6727     else {
6728     calc_checksum(bi,&(bi->c1),&(bi->c2));
6729     add_to_active(bi);
6730     }
6731 gbeauche 1.8 #endif
6732 gbeauche 1.1
6733     current_cache_size += get_target() - (uae_u8 *)current_compile_p;
6734    
6735     #if JIT_DEBUG
6736     if (JITDebug)
6737     bi->direct_handler_size = get_target() - (uae_u8 *)current_block_start_target;
6738    
6739     if (JITDebug && disasm_block) {
6740     uaecptr block_addr = start_pc + ((char *)pc_hist[0].location - (char *)start_pc_p);
6741     D(bug("M68K block @ 0x%08x (%d insns)\n", block_addr, blocklen));
6742     uae_u32 block_size = ((uae_u8 *)pc_hist[blocklen - 1].location - (uae_u8 *)pc_hist[0].location) + 1;
6743     disasm_m68k_block((uae_u8 *)pc_hist[0].location, block_size);
6744     D(bug("Compiled block @ 0x%08x\n", pc_hist[0].location));
6745     disasm_native_block((uae_u8 *)current_block_start_target, bi->direct_handler_size);
6746     getchar();
6747     }
6748     #endif
6749    
6750     log_dump();
6751 gbeauche 1.5 align_target(align_jumps);
6752 gbeauche 1.1
6753     /* This is the non-direct handler */
6754     bi->handler=
6755     bi->handler_to_use=(cpuop_func *)get_target();
6756     raw_cmp_l_mi((uae_u32)&regs.pc_p,(uae_u32)pc_hist[0].location);
6757     raw_jnz((uae_u32)popall_cache_miss);
6758     comp_pc_p=(uae_u8*)pc_hist[0].location;
6759    
6760     bi->status=BI_FINALIZING;
6761     init_comp();
6762     match_states(bi);
6763     flush(1);
6764    
6765     raw_jmp((uae_u32)bi->direct_handler);
6766    
6767     current_compile_p=get_target();
6768     raise_in_cl_list(bi);
6769    
6770     /* We will flush soon, anyway, so let's do it now */
6771     if (current_compile_p>=max_compile_start)
6772     flush_icache_hard(7);
6773    
6774     bi->status=BI_ACTIVE;
6775     if (redo_current_block)
6776     block_need_recompile(bi);
6777    
6778     #if PROFILE_COMPILE_TIME
6779     compile_time += (clock() - start_time);
6780     #endif
6781     }
6782     }
6783    
6784     void do_nothing(void)
6785     {
6786     /* What did you expect this to do? */
6787     }
6788    
6789     void exec_nostats(void)
6790     {
6791     for (;;) {
6792     uae_u32 opcode = GET_OPCODE;
6793     (*cpufunctbl[opcode])(opcode);
6794     if (end_block(opcode) || SPCFLAGS_TEST(SPCFLAG_ALL)) {
6795     return; /* We will deal with the spcflags in the caller */
6796     }
6797     }
6798     }
6799    
6800     void execute_normal(void)
6801     {
6802     if (!check_for_cache_miss()) {
6803     cpu_history pc_hist[MAXRUN];
6804     int blocklen = 0;
6805     #if REAL_ADDRESSING || DIRECT_ADDRESSING
6806     start_pc_p = regs.pc_p;
6807     start_pc = get_virtual_address(regs.pc_p);
6808     #else
6809     start_pc_p = regs.pc_oldp;
6810     start_pc = regs.pc;
6811     #endif
6812     for (;;) { /* Take note: This is the do-it-normal loop */
6813     pc_hist[blocklen++].location = (uae_u16 *)regs.pc_p;
6814     uae_u32 opcode = GET_OPCODE;
6815     #if FLIGHT_RECORDER
6816     m68k_record_step(m68k_getpc());
6817     #endif
6818     (*cpufunctbl[opcode])(opcode);
6819     if (end_block(opcode) || SPCFLAGS_TEST(SPCFLAG_ALL) || blocklen>=MAXRUN) {
6820     compile_block(pc_hist, blocklen);
6821     return; /* We will deal with the spcflags in the caller */
6822     }
6823     /* No need to check regs.spcflags, because if they were set,
6824     we'd have ended up inside that "if" */
6825     }
6826     }
6827     }
6828    
6829     typedef void (*compiled_handler)(void);
6830    
6831 gbeauche 1.6 #ifdef X86_ASSEMBLY
6832     void (*m68k_compile_execute)(void) = NULL;
6833     #else
6834 gbeauche 1.1 void m68k_do_compile_execute(void)
6835     {
6836     for (;;) {
6837     ((compiled_handler)(pushall_call_handler))();
6838     /* Whenever we return from that, we should check spcflags */
6839     if (SPCFLAGS_TEST(SPCFLAG_ALL)) {
6840     if (m68k_do_specialties ())
6841     return;
6842     }
6843     }
6844     }
6845 gbeauche 1.6 #endif