ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/compiler/compemu_support.cpp
Revision: 1.18
Committed: 2003-03-21T19:12:44Z (21 years, 6 months ago) by gbeauche
Branch: MAIN
CVS Tags: nigel-build-12, nigel-build-13
Changes since 1.17: +250 -21 lines
Log Message:
Remove some dead code. Start implementation of optimized calls to interpretive
fallbacks for untranslatable instruction handlers. Disabled for now since
call_m_01() is not correctly imeplemented yet.

File Contents

# User Rev Content
1 gbeauche 1.11 /*
2     * compiler/compemu_support.cpp - Core dynamic translation engine
3     *
4     * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
5     *
6     * Adaptation for Basilisk II and improvements, copyright 2000-2002
7     * Gwenole Beauchesne
8     *
9     * Basilisk II (C) 1997-2002 Christian Bauer
10     *
11     * This program is free software; you can redistribute it and/or modify
12     * it under the terms of the GNU General Public License as published by
13     * the Free Software Foundation; either version 2 of the License, or
14     * (at your option) any later version.
15     *
16     * This program is distributed in the hope that it will be useful,
17     * but WITHOUT ANY WARRANTY; without even the implied warranty of
18     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19     * GNU General Public License for more details.
20     *
21     * You should have received a copy of the GNU General Public License
22     * along with this program; if not, write to the Free Software
23     * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24     */
25    
26 gbeauche 1.1 #if !REAL_ADDRESSING && !DIRECT_ADDRESSING
27     #error "Only Real or Direct Addressing is supported with the JIT Compiler"
28     #endif
29    
30 gbeauche 1.4 #if X86_ASSEMBLY && !SAHF_SETO_PROFITABLE
31     #error "Only [LS]AHF scheme to [gs]et flags is supported with the JIT Compiler"
32     #endif
33    
34 gbeauche 1.1 #define USE_MATCH 0
35    
36     /* kludge for Brian, so he can compile under MSVC++ */
37     #define USE_NORMAL_CALLING_CONVENTION 0
38    
39     #ifndef WIN32
40     #include <sys/types.h>
41     #include <sys/mman.h>
42     #endif
43    
44     #include <stdlib.h>
45     #include <fcntl.h>
46     #include <errno.h>
47    
48     #include "sysdeps.h"
49     #include "cpu_emulation.h"
50     #include "main.h"
51     #include "prefs.h"
52     #include "user_strings.h"
53 gbeauche 1.2 #include "vm_alloc.h"
54 gbeauche 1.1
55     #include "m68k.h"
56     #include "memory.h"
57     #include "readcpu.h"
58     #include "newcpu.h"
59     #include "comptbl.h"
60     #include "compiler/compemu.h"
61     #include "fpu/fpu.h"
62     #include "fpu/flags.h"
63    
64     #define DEBUG 1
65     #include "debug.h"
66    
67     #ifdef ENABLE_MON
68     #include "mon.h"
69     #endif
70    
71     #ifndef WIN32
72 gbeauche 1.9 #define PROFILE_COMPILE_TIME 1
73     #define PROFILE_UNTRANSLATED_INSNS 1
74 gbeauche 1.1 #endif
75    
76     #ifdef WIN32
77     #undef write_log
78     #define write_log dummy_write_log
79     static void dummy_write_log(const char *, ...) { }
80     #endif
81    
82     #if JIT_DEBUG
83     #undef abort
84     #define abort() do { \
85     fprintf(stderr, "Abort in file %s at line %d\n", __FILE__, __LINE__); \
86     exit(EXIT_FAILURE); \
87     } while (0)
88     #endif
89    
90     #if PROFILE_COMPILE_TIME
91     #include <time.h>
92     static uae_u32 compile_count = 0;
93     static clock_t compile_time = 0;
94     static clock_t emul_start_time = 0;
95     static clock_t emul_end_time = 0;
96     #endif
97    
98 gbeauche 1.9 #if PROFILE_UNTRANSLATED_INSNS
99     const int untranslated_top_ten = 20;
100     static uae_u32 raw_cputbl_count[65536] = { 0, };
101     static uae_u16 opcode_nums[65536];
102    
103     static int untranslated_compfn(const void *e1, const void *e2)
104     {
105     return raw_cputbl_count[*(const uae_u16 *)e1] < raw_cputbl_count[*(const uae_u16 *)e2];
106     }
107     #endif
108    
109 gbeauche 1.1 compop_func *compfunctbl[65536];
110     compop_func *nfcompfunctbl[65536];
111     cpuop_func *nfcpufunctbl[65536];
112     uae_u8* comp_pc_p;
113    
114 gbeauche 1.6 // From newcpu.cpp
115     extern bool quit_program;
116    
117 gbeauche 1.1 // gb-- Extra data for Basilisk II/JIT
118     #if JIT_DEBUG
119     static bool JITDebug = false; // Enable runtime disassemblers through mon?
120     #else
121     const bool JITDebug = false; // Don't use JIT debug mode at all
122     #endif
123    
124     const uae_u32 MIN_CACHE_SIZE = 2048; // Minimal translation cache size (2048 KB)
125     static uae_u32 cache_size = 0; // Size of total cache allocated for compiled blocks
126 gbeauche 1.3 static uae_u32 current_cache_size = 0; // Cache grows upwards: how much has been consumed already
127 gbeauche 1.1 static bool lazy_flush = true; // Flag: lazy translation cache invalidation
128     static bool avoid_fpu = true; // Flag: compile FPU instructions ?
129     static bool have_cmov = false; // target has CMOV instructions ?
130     static bool have_rat_stall = true; // target has partial register stalls ?
131 gbeauche 1.12 const bool tune_alignment = true; // Tune code alignments for running CPU ?
132     const bool tune_nop_fillers = true; // Tune no-op fillers for architecture
133 gbeauche 1.15 static bool setzflg_uses_bsf = false; // setzflg virtual instruction can use native BSF instruction correctly?
134 gbeauche 1.5 static int align_loops = 32; // Align the start of loops
135     static int align_jumps = 32; // Align the start of jumps
136 gbeauche 1.1 static int zero_fd = -1;
137     static int optcount[10] = {
138     10, // How often a block has to be executed before it is translated
139     0, // How often to use naive translation
140     0, 0, 0, 0,
141     -1, -1, -1, -1
142     };
143    
144     struct op_properties {
145     uae_u8 use_flags;
146     uae_u8 set_flags;
147     uae_u8 is_addx;
148     uae_u8 cflow;
149     };
150     static op_properties prop[65536];
151    
152     static inline int end_block(uae_u32 opcode)
153     {
154     return (prop[opcode].cflow & fl_end_block);
155     }
156    
157 gbeauche 1.8 static inline bool is_const_jump(uae_u32 opcode)
158     {
159     return (prop[opcode].cflow == fl_const_jump);
160     }
161    
162 gbeauche 1.18 static inline bool may_trap(uae_u32 opcode)
163     {
164     return (prop[opcode].cflow & fl_trap);
165     }
166    
167     static inline unsigned int cft_map (unsigned int f)
168     {
169     #ifndef HAVE_GET_WORD_UNSWAPPED
170     return f;
171     #else
172     return ((f >> 8) & 255) | ((f & 255) << 8);
173     #endif
174     }
175    
176 gbeauche 1.1 uae_u8* start_pc_p;
177     uae_u32 start_pc;
178     uae_u32 current_block_pc_p;
179     uae_u32 current_block_start_target;
180     uae_u32 needed_flags;
181     static uae_u32 next_pc_p;
182     static uae_u32 taken_pc_p;
183     static int branch_cc;
184     static int redo_current_block;
185    
186     int segvcount=0;
187     int soft_flush_count=0;
188     int hard_flush_count=0;
189     int checksum_count=0;
190     static uae_u8* current_compile_p=NULL;
191     static uae_u8* max_compile_start;
192     static uae_u8* compiled_code=NULL;
193     static uae_s32 reg_alloc_run;
194    
195     void* pushall_call_handler=NULL;
196     static void* popall_do_nothing=NULL;
197     static void* popall_exec_nostats=NULL;
198     static void* popall_execute_normal=NULL;
199     static void* popall_cache_miss=NULL;
200     static void* popall_recompile_block=NULL;
201     static void* popall_check_checksum=NULL;
202    
203     /* The 68k only ever executes from even addresses. So right now, we
204     * waste half the entries in this array
205     * UPDATE: We now use those entries to store the start of the linked
206     * lists that we maintain for each hash result.
207     */
208     cacheline cache_tags[TAGSIZE];
209     int letit=0;
210     blockinfo* hold_bi[MAX_HOLD_BI];
211     blockinfo* active;
212     blockinfo* dormant;
213    
214     /* 68040 */
215     extern struct cputbl op_smalltbl_0_nf[];
216     extern struct comptbl op_smalltbl_0_comp_nf[];
217     extern struct comptbl op_smalltbl_0_comp_ff[];
218    
219     /* 68020 + 68881 */
220     extern struct cputbl op_smalltbl_1_nf[];
221    
222     /* 68020 */
223     extern struct cputbl op_smalltbl_2_nf[];
224    
225     /* 68010 */
226     extern struct cputbl op_smalltbl_3_nf[];
227    
228     /* 68000 */
229     extern struct cputbl op_smalltbl_4_nf[];
230    
231     /* 68000 slow but compatible. */
232     extern struct cputbl op_smalltbl_5_nf[];
233    
234     static void flush_icache_hard(int n);
235     static void flush_icache_lazy(int n);
236     static void flush_icache_none(int n);
237     void (*flush_icache)(int n) = flush_icache_none;
238    
239    
240    
241     bigstate live;
242     smallstate empty_ss;
243     smallstate default_ss;
244     static int optlev;
245    
246     static int writereg(int r, int size);
247     static void unlock2(int r);
248     static void setlock(int r);
249     static int readreg_specific(int r, int size, int spec);
250     static int writereg_specific(int r, int size, int spec);
251     static void prepare_for_call_1(void);
252     static void prepare_for_call_2(void);
253     static void align_target(uae_u32 a);
254    
255     static uae_s32 nextused[VREGS];
256    
257     uae_u32 m68k_pc_offset;
258    
259     /* Some arithmetic ooperations can be optimized away if the operands
260     * are known to be constant. But that's only a good idea when the
261     * side effects they would have on the flags are not important. This
262     * variable indicates whether we need the side effects or not
263     */
264     uae_u32 needflags=0;
265    
266     /* Flag handling is complicated.
267     *
268     * x86 instructions create flags, which quite often are exactly what we
269     * want. So at times, the "68k" flags are actually in the x86 flags.
270     *
271     * Then again, sometimes we do x86 instructions that clobber the x86
272     * flags, but don't represent a corresponding m68k instruction. In that
273     * case, we have to save them.
274     *
275     * We used to save them to the stack, but now store them back directly
276     * into the regflags.cznv of the traditional emulation. Thus some odd
277     * names.
278     *
279     * So flags can be in either of two places (used to be three; boy were
280     * things complicated back then!); And either place can contain either
281     * valid flags or invalid trash (and on the stack, there was also the
282     * option of "nothing at all", now gone). A couple of variables keep
283     * track of the respective states.
284     *
285     * To make things worse, we might or might not be interested in the flags.
286     * by default, we are, but a call to dont_care_flags can change that
287     * until the next call to live_flags. If we are not, pretty much whatever
288     * is in the register and/or the native flags is seen as valid.
289     */
290    
291     static __inline__ blockinfo* get_blockinfo(uae_u32 cl)
292     {
293     return cache_tags[cl+1].bi;
294     }
295    
296     static __inline__ blockinfo* get_blockinfo_addr(void* addr)
297     {
298     blockinfo* bi=get_blockinfo(cacheline(addr));
299    
300     while (bi) {
301     if (bi->pc_p==addr)
302     return bi;
303     bi=bi->next_same_cl;
304     }
305     return NULL;
306     }
307    
308    
309     /*******************************************************************
310     * All sorts of list related functions for all of the lists *
311     *******************************************************************/
312    
313     static __inline__ void remove_from_cl_list(blockinfo* bi)
314     {
315     uae_u32 cl=cacheline(bi->pc_p);
316    
317     if (bi->prev_same_cl_p)
318     *(bi->prev_same_cl_p)=bi->next_same_cl;
319     if (bi->next_same_cl)
320     bi->next_same_cl->prev_same_cl_p=bi->prev_same_cl_p;
321     if (cache_tags[cl+1].bi)
322     cache_tags[cl].handler=cache_tags[cl+1].bi->handler_to_use;
323     else
324     cache_tags[cl].handler=(cpuop_func *)popall_execute_normal;
325     }
326    
327     static __inline__ void remove_from_list(blockinfo* bi)
328     {
329     if (bi->prev_p)
330     *(bi->prev_p)=bi->next;
331     if (bi->next)
332     bi->next->prev_p=bi->prev_p;
333     }
334    
335     static __inline__ void remove_from_lists(blockinfo* bi)
336     {
337     remove_from_list(bi);
338     remove_from_cl_list(bi);
339     }
340    
341     static __inline__ void add_to_cl_list(blockinfo* bi)
342     {
343     uae_u32 cl=cacheline(bi->pc_p);
344    
345     if (cache_tags[cl+1].bi)
346     cache_tags[cl+1].bi->prev_same_cl_p=&(bi->next_same_cl);
347     bi->next_same_cl=cache_tags[cl+1].bi;
348    
349     cache_tags[cl+1].bi=bi;
350     bi->prev_same_cl_p=&(cache_tags[cl+1].bi);
351    
352     cache_tags[cl].handler=bi->handler_to_use;
353     }
354    
355     static __inline__ void raise_in_cl_list(blockinfo* bi)
356     {
357     remove_from_cl_list(bi);
358     add_to_cl_list(bi);
359     }
360    
361     static __inline__ void add_to_active(blockinfo* bi)
362     {
363     if (active)
364     active->prev_p=&(bi->next);
365     bi->next=active;
366    
367     active=bi;
368     bi->prev_p=&active;
369     }
370    
371     static __inline__ void add_to_dormant(blockinfo* bi)
372     {
373     if (dormant)
374     dormant->prev_p=&(bi->next);
375     bi->next=dormant;
376    
377     dormant=bi;
378     bi->prev_p=&dormant;
379     }
380    
381     static __inline__ void remove_dep(dependency* d)
382     {
383     if (d->prev_p)
384     *(d->prev_p)=d->next;
385     if (d->next)
386     d->next->prev_p=d->prev_p;
387     d->prev_p=NULL;
388     d->next=NULL;
389     }
390    
391     /* This block's code is about to be thrown away, so it no longer
392     depends on anything else */
393     static __inline__ void remove_deps(blockinfo* bi)
394     {
395     remove_dep(&(bi->dep[0]));
396     remove_dep(&(bi->dep[1]));
397     }
398    
399     static __inline__ void adjust_jmpdep(dependency* d, cpuop_func* a)
400     {
401     *(d->jmp_off)=(uintptr)a-((uintptr)d->jmp_off+4);
402     }
403    
404     /********************************************************************
405     * Soft flush handling support functions *
406     ********************************************************************/
407    
408     static __inline__ void set_dhtu(blockinfo* bi, cpuop_func* dh)
409     {
410     //write_log("bi is %p\n",bi);
411     if (dh!=bi->direct_handler_to_use) {
412     dependency* x=bi->deplist;
413     //write_log("bi->deplist=%p\n",bi->deplist);
414     while (x) {
415     //write_log("x is %p\n",x);
416     //write_log("x->next is %p\n",x->next);
417     //write_log("x->prev_p is %p\n",x->prev_p);
418    
419     if (x->jmp_off) {
420     adjust_jmpdep(x,dh);
421     }
422     x=x->next;
423     }
424     bi->direct_handler_to_use=dh;
425     }
426     }
427    
428     static __inline__ void invalidate_block(blockinfo* bi)
429     {
430     int i;
431    
432     bi->optlevel=0;
433     bi->count=optcount[0]-1;
434     bi->handler=NULL;
435     bi->handler_to_use=(cpuop_func *)popall_execute_normal;
436     bi->direct_handler=NULL;
437     set_dhtu(bi,bi->direct_pen);
438     bi->needed_flags=0xff;
439     bi->status=BI_INVALID;
440     for (i=0;i<2;i++) {
441     bi->dep[i].jmp_off=NULL;
442     bi->dep[i].target=NULL;
443     }
444     remove_deps(bi);
445     }
446    
447     static __inline__ void create_jmpdep(blockinfo* bi, int i, uae_u32* jmpaddr, uae_u32 target)
448     {
449     blockinfo* tbi=get_blockinfo_addr((void*)target);
450    
451     Dif(!tbi) {
452     write_log("Could not create jmpdep!\n");
453     abort();
454     }
455     bi->dep[i].jmp_off=jmpaddr;
456     bi->dep[i].source=bi;
457     bi->dep[i].target=tbi;
458     bi->dep[i].next=tbi->deplist;
459     if (bi->dep[i].next)
460     bi->dep[i].next->prev_p=&(bi->dep[i].next);
461     bi->dep[i].prev_p=&(tbi->deplist);
462     tbi->deplist=&(bi->dep[i]);
463     }
464    
465     static __inline__ void block_need_recompile(blockinfo * bi)
466     {
467     uae_u32 cl = cacheline(bi->pc_p);
468    
469     set_dhtu(bi, bi->direct_pen);
470     bi->direct_handler = bi->direct_pen;
471    
472     bi->handler_to_use = (cpuop_func *)popall_execute_normal;
473     bi->handler = (cpuop_func *)popall_execute_normal;
474     if (bi == cache_tags[cl + 1].bi)
475     cache_tags[cl].handler = (cpuop_func *)popall_execute_normal;
476     bi->status = BI_NEED_RECOMP;
477     }
478    
479     static __inline__ void mark_callers_recompile(blockinfo * bi)
480     {
481     dependency *x = bi->deplist;
482    
483     while (x) {
484     dependency *next = x->next; /* This disappears when we mark for
485     * recompilation and thus remove the
486     * blocks from the lists */
487     if (x->jmp_off) {
488     blockinfo *cbi = x->source;
489    
490     Dif(cbi->status == BI_INVALID) {
491     // write_log("invalid block in dependency list\n"); // FIXME?
492     // abort();
493     }
494     if (cbi->status == BI_ACTIVE || cbi->status == BI_NEED_CHECK) {
495     block_need_recompile(cbi);
496     mark_callers_recompile(cbi);
497     }
498     else if (cbi->status == BI_COMPILING) {
499     redo_current_block = 1;
500     }
501     else if (cbi->status == BI_NEED_RECOMP) {
502     /* nothing */
503     }
504     else {
505     //write_log("Status %d in mark_callers\n",cbi->status); // FIXME?
506     }
507     }
508     x = next;
509     }
510     }
511    
512     static __inline__ blockinfo* get_blockinfo_addr_new(void* addr, int setstate)
513     {
514     blockinfo* bi=get_blockinfo_addr(addr);
515     int i;
516    
517     if (!bi) {
518     for (i=0;i<MAX_HOLD_BI && !bi;i++) {
519     if (hold_bi[i]) {
520     uae_u32 cl=cacheline(addr);
521    
522     bi=hold_bi[i];
523     hold_bi[i]=NULL;
524     bi->pc_p=(uae_u8 *)addr;
525     invalidate_block(bi);
526     add_to_active(bi);
527     add_to_cl_list(bi);
528    
529     }
530     }
531     }
532     if (!bi) {
533     write_log("Looking for blockinfo, can't find free one\n");
534     abort();
535     }
536     return bi;
537     }
538    
539     static void prepare_block(blockinfo* bi);
540    
541     /* Managment of blockinfos.
542    
543     A blockinfo struct is allocated whenever a new block has to be
544     compiled. If the list of free blockinfos is empty, we allocate a new
545     pool of blockinfos and link the newly created blockinfos altogether
546     into the list of free blockinfos. Otherwise, we simply pop a structure
547 gbeauche 1.7 off the free list.
548 gbeauche 1.1
549     Blockinfo are lazily deallocated, i.e. chained altogether in the
550     list of free blockinfos whenvever a translation cache flush (hard or
551     soft) request occurs.
552     */
553    
554 gbeauche 1.7 template< class T >
555     class LazyBlockAllocator
556     {
557     enum {
558     kPoolSize = 1 + 4096 / sizeof(T)
559     };
560     struct Pool {
561     T chunk[kPoolSize];
562     Pool * next;
563     };
564     Pool * mPools;
565     T * mChunks;
566     public:
567     LazyBlockAllocator() : mPools(0), mChunks(0) { }
568     ~LazyBlockAllocator();
569     T * acquire();
570     void release(T * const);
571 gbeauche 1.1 };
572    
573 gbeauche 1.7 template< class T >
574     LazyBlockAllocator<T>::~LazyBlockAllocator()
575 gbeauche 1.1 {
576 gbeauche 1.7 Pool * currentPool = mPools;
577     while (currentPool) {
578     Pool * deadPool = currentPool;
579     currentPool = currentPool->next;
580     free(deadPool);
581     }
582     }
583    
584     template< class T >
585     T * LazyBlockAllocator<T>::acquire()
586     {
587     if (!mChunks) {
588     // There is no chunk left, allocate a new pool and link the
589     // chunks into the free list
590     Pool * newPool = (Pool *)malloc(sizeof(Pool));
591     for (T * chunk = &newPool->chunk[0]; chunk < &newPool->chunk[kPoolSize]; chunk++) {
592     chunk->next = mChunks;
593     mChunks = chunk;
594 gbeauche 1.1 }
595 gbeauche 1.7 newPool->next = mPools;
596     mPools = newPool;
597     }
598     T * chunk = mChunks;
599     mChunks = chunk->next;
600     return chunk;
601     }
602    
603     template< class T >
604     void LazyBlockAllocator<T>::release(T * const chunk)
605     {
606     chunk->next = mChunks;
607     mChunks = chunk;
608     }
609    
610     template< class T >
611     class HardBlockAllocator
612     {
613     public:
614     T * acquire() {
615     T * data = (T *)current_compile_p;
616     current_compile_p += sizeof(T);
617     return data;
618 gbeauche 1.1 }
619 gbeauche 1.7
620     void release(T * const chunk) {
621     // Deallocated on invalidation
622     }
623     };
624    
625     #if USE_SEPARATE_BIA
626     static LazyBlockAllocator<blockinfo> BlockInfoAllocator;
627     static LazyBlockAllocator<checksum_info> ChecksumInfoAllocator;
628 gbeauche 1.1 #else
629 gbeauche 1.7 static HardBlockAllocator<blockinfo> BlockInfoAllocator;
630     static HardBlockAllocator<checksum_info> ChecksumInfoAllocator;
631 gbeauche 1.1 #endif
632    
633 gbeauche 1.8 static __inline__ checksum_info *alloc_checksum_info(void)
634     {
635     checksum_info *csi = ChecksumInfoAllocator.acquire();
636     csi->next = NULL;
637     return csi;
638     }
639    
640     static __inline__ void free_checksum_info(checksum_info *csi)
641     {
642     csi->next = NULL;
643     ChecksumInfoAllocator.release(csi);
644     }
645    
646     static __inline__ void free_checksum_info_chain(checksum_info *csi)
647     {
648     while (csi != NULL) {
649     checksum_info *csi2 = csi->next;
650     free_checksum_info(csi);
651     csi = csi2;
652     }
653     }
654 gbeauche 1.7
655     static __inline__ blockinfo *alloc_blockinfo(void)
656 gbeauche 1.1 {
657 gbeauche 1.7 blockinfo *bi = BlockInfoAllocator.acquire();
658     #if USE_CHECKSUM_INFO
659     bi->csi = NULL;
660 gbeauche 1.1 #endif
661 gbeauche 1.7 return bi;
662 gbeauche 1.1 }
663    
664 gbeauche 1.7 static __inline__ void free_blockinfo(blockinfo *bi)
665 gbeauche 1.1 {
666 gbeauche 1.7 #if USE_CHECKSUM_INFO
667 gbeauche 1.8 free_checksum_info_chain(bi->csi);
668     bi->csi = NULL;
669 gbeauche 1.1 #endif
670 gbeauche 1.7 BlockInfoAllocator.release(bi);
671 gbeauche 1.1 }
672    
673     static __inline__ void alloc_blockinfos(void)
674     {
675     int i;
676     blockinfo* bi;
677    
678     for (i=0;i<MAX_HOLD_BI;i++) {
679     if (hold_bi[i])
680     return;
681     bi=hold_bi[i]=alloc_blockinfo();
682     prepare_block(bi);
683     }
684     }
685    
686     /********************************************************************
687     * Functions to emit data into memory, and other general support *
688     ********************************************************************/
689    
690     static uae_u8* target;
691    
692     static void emit_init(void)
693     {
694     }
695    
696     static __inline__ void emit_byte(uae_u8 x)
697     {
698     *target++=x;
699     }
700    
701     static __inline__ void emit_word(uae_u16 x)
702     {
703     *((uae_u16*)target)=x;
704     target+=2;
705     }
706    
707     static __inline__ void emit_long(uae_u32 x)
708     {
709     *((uae_u32*)target)=x;
710     target+=4;
711     }
712    
713 gbeauche 1.12 static __inline__ void emit_block(const uae_u8 *block, uae_u32 blocklen)
714     {
715     memcpy((uae_u8 *)target,block,blocklen);
716     target+=blocklen;
717     }
718    
719 gbeauche 1.1 static __inline__ uae_u32 reverse32(uae_u32 v)
720     {
721     #if 1
722     // gb-- We have specialized byteswapping functions, just use them
723     return do_byteswap_32(v);
724     #else
725     return ((v>>24)&0xff) | ((v>>8)&0xff00) | ((v<<8)&0xff0000) | ((v<<24)&0xff000000);
726     #endif
727     }
728    
729     /********************************************************************
730     * Getting the information about the target CPU *
731     ********************************************************************/
732    
733     #include "codegen_x86.cpp"
734    
735     void set_target(uae_u8* t)
736     {
737     target=t;
738     }
739    
740     static __inline__ uae_u8* get_target_noopt(void)
741     {
742     return target;
743     }
744    
745     __inline__ uae_u8* get_target(void)
746     {
747     return get_target_noopt();
748     }
749    
750    
751     /********************************************************************
752     * Flags status handling. EMIT TIME! *
753     ********************************************************************/
754    
755     static void bt_l_ri_noclobber(R4 r, IMM i);
756    
757     static void make_flags_live_internal(void)
758     {
759     if (live.flags_in_flags==VALID)
760     return;
761     Dif (live.flags_on_stack==TRASH) {
762     write_log("Want flags, got something on stack, but it is TRASH\n");
763     abort();
764     }
765     if (live.flags_on_stack==VALID) {
766     int tmp;
767     tmp=readreg_specific(FLAGTMP,4,FLAG_NREG2);
768     raw_reg_to_flags(tmp);
769     unlock2(tmp);
770    
771     live.flags_in_flags=VALID;
772     return;
773     }
774     write_log("Huh? live.flags_in_flags=%d, live.flags_on_stack=%d, but need to make live\n",
775     live.flags_in_flags,live.flags_on_stack);
776     abort();
777     }
778    
779     static void flags_to_stack(void)
780     {
781     if (live.flags_on_stack==VALID)
782     return;
783     if (!live.flags_are_important) {
784     live.flags_on_stack=VALID;
785     return;
786     }
787     Dif (live.flags_in_flags!=VALID)
788     abort();
789     else {
790     int tmp;
791     tmp=writereg_specific(FLAGTMP,4,FLAG_NREG1);
792     raw_flags_to_reg(tmp);
793     unlock2(tmp);
794     }
795     live.flags_on_stack=VALID;
796     }
797    
798     static __inline__ void clobber_flags(void)
799     {
800     if (live.flags_in_flags==VALID && live.flags_on_stack!=VALID)
801     flags_to_stack();
802     live.flags_in_flags=TRASH;
803     }
804    
805     /* Prepare for leaving the compiled stuff */
806     static __inline__ void flush_flags(void)
807     {
808     flags_to_stack();
809     return;
810     }
811    
812     int touchcnt;
813    
814     /********************************************************************
815 gbeauche 1.18 * Partial register flushing for optimized calls *
816     ********************************************************************/
817    
818     struct regusage {
819     uae_u16 rmask;
820     uae_u16 wmask;
821     };
822    
823     static inline void ru_set(uae_u16 *mask, int reg)
824     {
825     #if USE_OPTIMIZED_CALLS
826     *mask |= 1 << reg;
827     #endif
828     }
829    
830     static inline bool ru_get(const uae_u16 *mask, int reg)
831     {
832     #if USE_OPTIMIZED_CALLS
833     return (*mask & (1 << reg));
834     #else
835     /* Default: instruction reads & write to register */
836     return true;
837     #endif
838     }
839    
840     static inline void ru_set_read(regusage *ru, int reg)
841     {
842     ru_set(&ru->rmask, reg);
843     }
844    
845     static inline void ru_set_write(regusage *ru, int reg)
846     {
847     ru_set(&ru->wmask, reg);
848     }
849    
850     static inline bool ru_read_p(const regusage *ru, int reg)
851     {
852     return ru_get(&ru->rmask, reg);
853     }
854    
855     static inline bool ru_write_p(const regusage *ru, int reg)
856     {
857     return ru_get(&ru->wmask, reg);
858     }
859    
860     static void ru_fill_ea(regusage *ru, int reg, amodes mode,
861     wordsizes size, int write_mode)
862     {
863     switch (mode) {
864     case Areg:
865     reg += 8;
866     /* fall through */
867     case Dreg:
868     ru_set(write_mode ? &ru->wmask : &ru->rmask, reg);
869     break;
870     case Ad16:
871     /* skip displacment */
872     m68k_pc_offset += 2;
873     case Aind:
874     case Aipi:
875     case Apdi:
876     ru_set_read(ru, reg+8);
877     break;
878     case Ad8r:
879     ru_set_read(ru, reg+8);
880     /* fall through */
881     case PC8r: {
882     uae_u16 dp = comp_get_iword((m68k_pc_offset+=2)-2);
883     reg = (dp >> 12) & 15;
884     ru_set_read(ru, reg);
885     if (dp & 0x100)
886     m68k_pc_offset += (((dp & 0x30) >> 3) & 7) + ((dp & 3) * 2);
887     break;
888     }
889     case PC16:
890     case absw:
891     case imm0:
892     case imm1:
893     m68k_pc_offset += 2;
894     break;
895     case absl:
896     case imm2:
897     m68k_pc_offset += 4;
898     break;
899     case immi:
900     m68k_pc_offset += (size == sz_long) ? 4 : 2;
901     break;
902     }
903     }
904    
905     /* TODO: split into a static initialization part and a dynamic one
906     (instructions depending on extension words) */
907     static void ru_fill(regusage *ru, uae_u32 opcode)
908     {
909     m68k_pc_offset += 2;
910    
911     /* Default: no register is used or written to */
912     ru->rmask = 0;
913     ru->wmask = 0;
914    
915     uae_u32 real_opcode = cft_map(opcode);
916     struct instr *dp = &table68k[real_opcode];
917    
918     bool rw_dest = true;
919     bool handled = false;
920    
921     /* Handle some instructions specifically */
922     uae_u16 reg, ext;
923     switch (dp->mnemo) {
924     case i_BFCHG:
925     case i_BFCLR:
926     case i_BFEXTS:
927     case i_BFEXTU:
928     case i_BFFFO:
929     case i_BFINS:
930     case i_BFSET:
931     case i_BFTST:
932     ext = comp_get_iword((m68k_pc_offset+=2)-2);
933     if (ext & 0x800) ru_set_read(ru, (ext >> 6) & 7);
934     if (ext & 0x020) ru_set_read(ru, ext & 7);
935     ru_fill_ea(ru, dp->dreg, (amodes)dp->dmode, (wordsizes)dp->size, 1);
936     if (dp->dmode == Dreg)
937     ru_set_read(ru, dp->dreg);
938     switch (dp->mnemo) {
939     case i_BFEXTS:
940     case i_BFEXTU:
941     case i_BFFFO:
942     ru_set_write(ru, (ext >> 12) & 7);
943     break;
944     case i_BFINS:
945     ru_set_read(ru, (ext >> 12) & 7);
946     /* fall through */
947     case i_BFCHG:
948     case i_BFCLR:
949     case i_BSET:
950     if (dp->dmode == Dreg)
951     ru_set_write(ru, dp->dreg);
952     break;
953     }
954     handled = true;
955     rw_dest = false;
956     break;
957    
958     case i_BTST:
959     rw_dest = false;
960     break;
961    
962     case i_CAS:
963     {
964     ext = comp_get_iword((m68k_pc_offset+=2)-2);
965     int Du = ext & 7;
966     ru_set_read(ru, Du);
967     int Dc = (ext >> 6) & 7;
968     ru_set_read(ru, Dc);
969     ru_set_write(ru, Dc);
970     break;
971     }
972     case i_CAS2:
973     {
974     int Dc1, Dc2, Du1, Du2, Rn1, Rn2;
975     ext = comp_get_iword((m68k_pc_offset+=2)-2);
976     Rn1 = (ext >> 12) & 15;
977     Du1 = (ext >> 6) & 7;
978     Dc1 = ext & 7;
979     ru_set_read(ru, Rn1);
980     ru_set_read(ru, Du1);
981     ru_set_read(ru, Dc1);
982     ru_set_write(ru, Dc1);
983     ext = comp_get_iword((m68k_pc_offset+=2)-2);
984     Rn2 = (ext >> 12) & 15;
985     Du2 = (ext >> 6) & 7;
986     Dc2 = ext & 7;
987     ru_set_read(ru, Rn2);
988     ru_set_read(ru, Du2);
989     ru_set_write(ru, Dc2);
990     break;
991     }
992     case i_DIVL: case i_MULL:
993     m68k_pc_offset += 2;
994     break;
995     case i_LEA:
996     case i_MOVE: case i_MOVEA: case i_MOVE16:
997     rw_dest = false;
998     break;
999     case i_PACK: case i_UNPK:
1000     rw_dest = false;
1001     m68k_pc_offset += 2;
1002     break;
1003     case i_TRAPcc:
1004     m68k_pc_offset += (dp->size == sz_long) ? 4 : 2;
1005     break;
1006     case i_RTR:
1007     /* do nothing, just for coverage debugging */
1008     break;
1009     /* TODO: handle EXG instruction */
1010     }
1011    
1012     /* Handle A-Traps better */
1013     if ((real_opcode & 0xf000) == 0xa000) {
1014     handled = true;
1015     }
1016    
1017     /* Handle EmulOps better */
1018     if ((real_opcode & 0xff00) == 0x7100) {
1019     handled = true;
1020     ru->rmask = 0xffff;
1021     ru->wmask = 0;
1022     }
1023    
1024     if (dp->suse && !handled)
1025     ru_fill_ea(ru, dp->sreg, (amodes)dp->smode, (wordsizes)dp->size, 0);
1026    
1027     if (dp->duse && !handled)
1028     ru_fill_ea(ru, dp->dreg, (amodes)dp->dmode, (wordsizes)dp->size, 1);
1029    
1030     if (rw_dest)
1031     ru->rmask |= ru->wmask;
1032    
1033     handled = handled || dp->suse || dp->duse;
1034    
1035     /* Mark all registers as used/written if the instruction may trap */
1036     if (may_trap(opcode)) {
1037     handled = true;
1038     ru->rmask = 0xffff;
1039     ru->wmask = 0xffff;
1040     }
1041    
1042     if (!handled) {
1043     write_log("ru_fill: %04x = { %04x, %04x }\n",
1044     real_opcode, ru->rmask, ru->wmask);
1045     abort();
1046     }
1047     }
1048    
1049     /********************************************************************
1050 gbeauche 1.1 * register allocation per block logging *
1051     ********************************************************************/
1052    
1053     static uae_s8 vstate[VREGS];
1054     static uae_s8 vwritten[VREGS];
1055     static uae_s8 nstate[N_REGS];
1056    
1057     #define L_UNKNOWN -127
1058     #define L_UNAVAIL -1
1059     #define L_NEEDED -2
1060     #define L_UNNEEDED -3
1061    
1062     static __inline__ void big_to_small_state(bigstate * b, smallstate * s)
1063     {
1064     int i;
1065    
1066     for (i = 0; i < VREGS; i++)
1067     s->virt[i] = vstate[i];
1068     for (i = 0; i < N_REGS; i++)
1069     s->nat[i] = nstate[i];
1070     }
1071    
1072     static __inline__ int callers_need_recompile(bigstate * b, smallstate * s)
1073     {
1074     int i;
1075     int reverse = 0;
1076    
1077     for (i = 0; i < VREGS; i++) {
1078     if (vstate[i] != L_UNNEEDED && s->virt[i] == L_UNNEEDED)
1079     return 1;
1080     if (vstate[i] == L_UNNEEDED && s->virt[i] != L_UNNEEDED)
1081     reverse++;
1082     }
1083     for (i = 0; i < N_REGS; i++) {
1084     if (nstate[i] >= 0 && nstate[i] != s->nat[i])
1085     return 1;
1086     if (nstate[i] < 0 && s->nat[i] >= 0)
1087     reverse++;
1088     }
1089     if (reverse >= 2 && USE_MATCH)
1090     return 1; /* In this case, it might be worth recompiling the
1091     * callers */
1092     return 0;
1093     }
1094    
1095     static __inline__ void log_startblock(void)
1096     {
1097     int i;
1098    
1099     for (i = 0; i < VREGS; i++) {
1100     vstate[i] = L_UNKNOWN;
1101     vwritten[i] = 0;
1102     }
1103     for (i = 0; i < N_REGS; i++)
1104     nstate[i] = L_UNKNOWN;
1105     }
1106    
1107     /* Using an n-reg for a temp variable */
1108     static __inline__ void log_isused(int n)
1109     {
1110     if (nstate[n] == L_UNKNOWN)
1111     nstate[n] = L_UNAVAIL;
1112     }
1113    
1114     static __inline__ void log_visused(int r)
1115     {
1116     if (vstate[r] == L_UNKNOWN)
1117     vstate[r] = L_NEEDED;
1118     }
1119    
1120     static __inline__ void do_load_reg(int n, int r)
1121     {
1122     if (r == FLAGTMP)
1123     raw_load_flagreg(n, r);
1124     else if (r == FLAGX)
1125     raw_load_flagx(n, r);
1126     else
1127     raw_mov_l_rm(n, (uae_u32) live.state[r].mem);
1128     }
1129    
1130     static __inline__ void check_load_reg(int n, int r)
1131     {
1132     raw_mov_l_rm(n, (uae_u32) live.state[r].mem);
1133     }
1134    
1135     static __inline__ void log_vwrite(int r)
1136     {
1137     vwritten[r] = 1;
1138     }
1139    
1140     /* Using an n-reg to hold a v-reg */
1141     static __inline__ void log_isreg(int n, int r)
1142     {
1143     static int count = 0;
1144    
1145     if (nstate[n] == L_UNKNOWN && r < 16 && !vwritten[r] && USE_MATCH)
1146     nstate[n] = r;
1147     else {
1148     do_load_reg(n, r);
1149     if (nstate[n] == L_UNKNOWN)
1150     nstate[n] = L_UNAVAIL;
1151     }
1152     if (vstate[r] == L_UNKNOWN)
1153     vstate[r] = L_NEEDED;
1154     }
1155    
1156     static __inline__ void log_clobberreg(int r)
1157     {
1158     if (vstate[r] == L_UNKNOWN)
1159     vstate[r] = L_UNNEEDED;
1160     }
1161    
1162     /* This ends all possibility of clever register allocation */
1163    
1164     static __inline__ void log_flush(void)
1165     {
1166     int i;
1167    
1168     for (i = 0; i < VREGS; i++)
1169     if (vstate[i] == L_UNKNOWN)
1170     vstate[i] = L_NEEDED;
1171     for (i = 0; i < N_REGS; i++)
1172     if (nstate[i] == L_UNKNOWN)
1173     nstate[i] = L_UNAVAIL;
1174     }
1175    
1176     static __inline__ void log_dump(void)
1177     {
1178     int i;
1179    
1180     return;
1181    
1182     write_log("----------------------\n");
1183     for (i = 0; i < N_REGS; i++) {
1184     switch (nstate[i]) {
1185     case L_UNKNOWN:
1186     write_log("Nat %d : UNKNOWN\n", i);
1187     break;
1188     case L_UNAVAIL:
1189     write_log("Nat %d : UNAVAIL\n", i);
1190     break;
1191     default:
1192     write_log("Nat %d : %d\n", i, nstate[i]);
1193     break;
1194     }
1195     }
1196     for (i = 0; i < VREGS; i++) {
1197     if (vstate[i] == L_UNNEEDED)
1198     write_log("Virt %d: UNNEEDED\n", i);
1199     }
1200     }
1201    
1202     /********************************************************************
1203     * register status handling. EMIT TIME! *
1204     ********************************************************************/
1205    
1206     static __inline__ void set_status(int r, int status)
1207     {
1208     if (status == ISCONST)
1209     log_clobberreg(r);
1210     live.state[r].status=status;
1211     }
1212    
1213     static __inline__ int isinreg(int r)
1214     {
1215     return live.state[r].status==CLEAN || live.state[r].status==DIRTY;
1216     }
1217    
1218     static __inline__ void adjust_nreg(int r, uae_u32 val)
1219     {
1220     if (!val)
1221     return;
1222     raw_lea_l_brr(r,r,val);
1223     }
1224    
1225     static void tomem(int r)
1226     {
1227     int rr=live.state[r].realreg;
1228    
1229     if (isinreg(r)) {
1230     if (live.state[r].val && live.nat[rr].nholds==1
1231     && !live.nat[rr].locked) {
1232     // write_log("RemovingA offset %x from reg %d (%d) at %p\n",
1233     // live.state[r].val,r,rr,target);
1234     adjust_nreg(rr,live.state[r].val);
1235     live.state[r].val=0;
1236     live.state[r].dirtysize=4;
1237     set_status(r,DIRTY);
1238     }
1239     }
1240    
1241     if (live.state[r].status==DIRTY) {
1242     switch (live.state[r].dirtysize) {
1243     case 1: raw_mov_b_mr((uae_u32)live.state[r].mem,rr); break;
1244     case 2: raw_mov_w_mr((uae_u32)live.state[r].mem,rr); break;
1245     case 4: raw_mov_l_mr((uae_u32)live.state[r].mem,rr); break;
1246     default: abort();
1247     }
1248     log_vwrite(r);
1249     set_status(r,CLEAN);
1250     live.state[r].dirtysize=0;
1251     }
1252     }
1253    
1254     static __inline__ int isconst(int r)
1255     {
1256     return live.state[r].status==ISCONST;
1257     }
1258    
1259     int is_const(int r)
1260     {
1261     return isconst(r);
1262     }
1263    
1264     static __inline__ void writeback_const(int r)
1265     {
1266     if (!isconst(r))
1267     return;
1268     Dif (live.state[r].needflush==NF_HANDLER) {
1269     write_log("Trying to write back constant NF_HANDLER!\n");
1270     abort();
1271     }
1272    
1273     raw_mov_l_mi((uae_u32)live.state[r].mem,live.state[r].val);
1274     log_vwrite(r);
1275     live.state[r].val=0;
1276     set_status(r,INMEM);
1277     }
1278    
1279     static __inline__ void tomem_c(int r)
1280     {
1281     if (isconst(r)) {
1282     writeback_const(r);
1283     }
1284     else
1285     tomem(r);
1286     }
1287    
1288     static void evict(int r)
1289     {
1290     int rr;
1291    
1292     if (!isinreg(r))
1293     return;
1294     tomem(r);
1295     rr=live.state[r].realreg;
1296    
1297     Dif (live.nat[rr].locked &&
1298     live.nat[rr].nholds==1) {
1299     write_log("register %d in nreg %d is locked!\n",r,live.state[r].realreg);
1300     abort();
1301     }
1302    
1303     live.nat[rr].nholds--;
1304     if (live.nat[rr].nholds!=live.state[r].realind) { /* Was not last */
1305     int topreg=live.nat[rr].holds[live.nat[rr].nholds];
1306     int thisind=live.state[r].realind;
1307    
1308     live.nat[rr].holds[thisind]=topreg;
1309     live.state[topreg].realind=thisind;
1310     }
1311     live.state[r].realreg=-1;
1312     set_status(r,INMEM);
1313     }
1314    
1315     static __inline__ void free_nreg(int r)
1316     {
1317     int i=live.nat[r].nholds;
1318    
1319     while (i) {
1320     int vr;
1321    
1322     --i;
1323     vr=live.nat[r].holds[i];
1324     evict(vr);
1325     }
1326     Dif (live.nat[r].nholds!=0) {
1327     write_log("Failed to free nreg %d, nholds is %d\n",r,live.nat[r].nholds);
1328     abort();
1329     }
1330     }
1331    
1332     /* Use with care! */
1333     static __inline__ void isclean(int r)
1334     {
1335     if (!isinreg(r))
1336     return;
1337     live.state[r].validsize=4;
1338     live.state[r].dirtysize=0;
1339     live.state[r].val=0;
1340     set_status(r,CLEAN);
1341     }
1342    
1343     static __inline__ void disassociate(int r)
1344     {
1345     isclean(r);
1346     evict(r);
1347     }
1348    
1349     static __inline__ void set_const(int r, uae_u32 val)
1350     {
1351     disassociate(r);
1352     live.state[r].val=val;
1353     set_status(r,ISCONST);
1354     }
1355    
1356     static __inline__ uae_u32 get_offset(int r)
1357     {
1358     return live.state[r].val;
1359     }
1360    
1361     static int alloc_reg_hinted(int r, int size, int willclobber, int hint)
1362     {
1363     int bestreg;
1364     uae_s32 when;
1365     int i;
1366     uae_s32 badness=0; /* to shut up gcc */
1367     bestreg=-1;
1368     when=2000000000;
1369    
1370     for (i=N_REGS;i--;) {
1371     badness=live.nat[i].touched;
1372     if (live.nat[i].nholds==0)
1373     badness=0;
1374     if (i==hint)
1375     badness-=200000000;
1376     if (!live.nat[i].locked && badness<when) {
1377     if ((size==1 && live.nat[i].canbyte) ||
1378     (size==2 && live.nat[i].canword) ||
1379     (size==4)) {
1380     bestreg=i;
1381     when=badness;
1382     if (live.nat[i].nholds==0 && hint<0)
1383     break;
1384     if (i==hint)
1385     break;
1386     }
1387     }
1388     }
1389     Dif (bestreg==-1)
1390     abort();
1391    
1392     if (live.nat[bestreg].nholds>0) {
1393     free_nreg(bestreg);
1394     }
1395     if (isinreg(r)) {
1396     int rr=live.state[r].realreg;
1397     /* This will happen if we read a partially dirty register at a
1398     bigger size */
1399     Dif (willclobber || live.state[r].validsize>=size)
1400     abort();
1401     Dif (live.nat[rr].nholds!=1)
1402     abort();
1403     if (size==4 && live.state[r].validsize==2) {
1404     log_isused(bestreg);
1405     log_visused(r);
1406     raw_mov_l_rm(bestreg,(uae_u32)live.state[r].mem);
1407     raw_bswap_32(bestreg);
1408     raw_zero_extend_16_rr(rr,rr);
1409     raw_zero_extend_16_rr(bestreg,bestreg);
1410     raw_bswap_32(bestreg);
1411     raw_lea_l_brr_indexed(rr,rr,bestreg,1,0);
1412     live.state[r].validsize=4;
1413     live.nat[rr].touched=touchcnt++;
1414     return rr;
1415     }
1416     if (live.state[r].validsize==1) {
1417     /* Nothing yet */
1418     }
1419     evict(r);
1420     }
1421    
1422     if (!willclobber) {
1423     if (live.state[r].status!=UNDEF) {
1424     if (isconst(r)) {
1425     raw_mov_l_ri(bestreg,live.state[r].val);
1426     live.state[r].val=0;
1427     live.state[r].dirtysize=4;
1428     set_status(r,DIRTY);
1429     log_isused(bestreg);
1430     }
1431     else {
1432     log_isreg(bestreg, r); /* This will also load it! */
1433     live.state[r].dirtysize=0;
1434     set_status(r,CLEAN);
1435     }
1436     }
1437     else {
1438     live.state[r].val=0;
1439     live.state[r].dirtysize=0;
1440     set_status(r,CLEAN);
1441     log_isused(bestreg);
1442     }
1443     live.state[r].validsize=4;
1444     }
1445     else { /* this is the easiest way, but not optimal. FIXME! */
1446     /* Now it's trickier, but hopefully still OK */
1447     if (!isconst(r) || size==4) {
1448     live.state[r].validsize=size;
1449     live.state[r].dirtysize=size;
1450     live.state[r].val=0;
1451     set_status(r,DIRTY);
1452     if (size == 4) {
1453     log_clobberreg(r);
1454     log_isused(bestreg);
1455     }
1456     else {
1457     log_visused(r);
1458     log_isused(bestreg);
1459     }
1460     }
1461     else {
1462     if (live.state[r].status!=UNDEF)
1463     raw_mov_l_ri(bestreg,live.state[r].val);
1464     live.state[r].val=0;
1465     live.state[r].validsize=4;
1466     live.state[r].dirtysize=4;
1467     set_status(r,DIRTY);
1468     log_isused(bestreg);
1469     }
1470     }
1471     live.state[r].realreg=bestreg;
1472     live.state[r].realind=live.nat[bestreg].nholds;
1473     live.nat[bestreg].touched=touchcnt++;
1474     live.nat[bestreg].holds[live.nat[bestreg].nholds]=r;
1475     live.nat[bestreg].nholds++;
1476    
1477     return bestreg;
1478     }
1479    
1480     static int alloc_reg(int r, int size, int willclobber)
1481     {
1482     return alloc_reg_hinted(r,size,willclobber,-1);
1483     }
1484    
1485     static void unlock2(int r)
1486     {
1487     Dif (!live.nat[r].locked)
1488     abort();
1489     live.nat[r].locked--;
1490     }
1491    
1492     static void setlock(int r)
1493     {
1494     live.nat[r].locked++;
1495     }
1496    
1497    
1498     static void mov_nregs(int d, int s)
1499     {
1500     int ns=live.nat[s].nholds;
1501     int nd=live.nat[d].nholds;
1502     int i;
1503    
1504     if (s==d)
1505     return;
1506    
1507     if (nd>0)
1508     free_nreg(d);
1509    
1510     log_isused(d);
1511     raw_mov_l_rr(d,s);
1512    
1513     for (i=0;i<live.nat[s].nholds;i++) {
1514     int vs=live.nat[s].holds[i];
1515    
1516     live.state[vs].realreg=d;
1517     live.state[vs].realind=i;
1518     live.nat[d].holds[i]=vs;
1519     }
1520     live.nat[d].nholds=live.nat[s].nholds;
1521    
1522     live.nat[s].nholds=0;
1523     }
1524    
1525    
1526     static __inline__ void make_exclusive(int r, int size, int spec)
1527     {
1528     int clobber;
1529     reg_status oldstate;
1530     int rr=live.state[r].realreg;
1531     int nr;
1532     int nind;
1533     int ndirt=0;
1534     int i;
1535    
1536     if (!isinreg(r))
1537     return;
1538     if (live.nat[rr].nholds==1)
1539     return;
1540     for (i=0;i<live.nat[rr].nholds;i++) {
1541     int vr=live.nat[rr].holds[i];
1542     if (vr!=r &&
1543     (live.state[vr].status==DIRTY || live.state[vr].val))
1544     ndirt++;
1545     }
1546     if (!ndirt && size<live.state[r].validsize && !live.nat[rr].locked) {
1547     /* Everything else is clean, so let's keep this register */
1548     for (i=0;i<live.nat[rr].nholds;i++) {
1549     int vr=live.nat[rr].holds[i];
1550     if (vr!=r) {
1551     evict(vr);
1552     i--; /* Try that index again! */
1553     }
1554     }
1555     Dif (live.nat[rr].nholds!=1) {
1556     write_log("natreg %d holds %d vregs, %d not exclusive\n",
1557     rr,live.nat[rr].nholds,r);
1558     abort();
1559     }
1560     return;
1561     }
1562    
1563     /* We have to split the register */
1564     oldstate=live.state[r];
1565    
1566     setlock(rr); /* Make sure this doesn't go away */
1567     /* Forget about r being in the register rr */
1568     disassociate(r);
1569     /* Get a new register, that we will clobber completely */
1570     if (oldstate.status==DIRTY) {
1571     /* If dirtysize is <4, we need a register that can handle the
1572     eventual smaller memory store! Thanks to Quake68k for exposing
1573     this detail ;-) */
1574     nr=alloc_reg_hinted(r,oldstate.dirtysize,1,spec);
1575     }
1576     else {
1577     nr=alloc_reg_hinted(r,4,1,spec);
1578     }
1579     nind=live.state[r].realind;
1580     live.state[r]=oldstate; /* Keep all the old state info */
1581     live.state[r].realreg=nr;
1582     live.state[r].realind=nind;
1583    
1584     if (size<live.state[r].validsize) {
1585     if (live.state[r].val) {
1586     /* Might as well compensate for the offset now */
1587     raw_lea_l_brr(nr,rr,oldstate.val);
1588     live.state[r].val=0;
1589     live.state[r].dirtysize=4;
1590     set_status(r,DIRTY);
1591     }
1592     else
1593     raw_mov_l_rr(nr,rr); /* Make another copy */
1594     }
1595     unlock2(rr);
1596     }
1597    
1598     static __inline__ void add_offset(int r, uae_u32 off)
1599     {
1600     live.state[r].val+=off;
1601     }
1602    
1603     static __inline__ void remove_offset(int r, int spec)
1604     {
1605     reg_status oldstate;
1606     int rr;
1607    
1608     if (isconst(r))
1609     return;
1610     if (live.state[r].val==0)
1611     return;
1612     if (isinreg(r) && live.state[r].validsize<4)
1613     evict(r);
1614    
1615     if (!isinreg(r))
1616     alloc_reg_hinted(r,4,0,spec);
1617    
1618     Dif (live.state[r].validsize!=4) {
1619     write_log("Validsize=%d in remove_offset\n",live.state[r].validsize);
1620     abort();
1621     }
1622     make_exclusive(r,0,-1);
1623     /* make_exclusive might have done the job already */
1624     if (live.state[r].val==0)
1625     return;
1626    
1627     rr=live.state[r].realreg;
1628    
1629     if (live.nat[rr].nholds==1) {
1630     //write_log("RemovingB offset %x from reg %d (%d) at %p\n",
1631     // live.state[r].val,r,rr,target);
1632     adjust_nreg(rr,live.state[r].val);
1633     live.state[r].dirtysize=4;
1634     live.state[r].val=0;
1635     set_status(r,DIRTY);
1636     return;
1637     }
1638     write_log("Failed in remove_offset\n");
1639     abort();
1640     }
1641    
1642     static __inline__ void remove_all_offsets(void)
1643     {
1644     int i;
1645    
1646     for (i=0;i<VREGS;i++)
1647     remove_offset(i,-1);
1648     }
1649    
1650     static __inline__ int readreg_general(int r, int size, int spec, int can_offset)
1651     {
1652     int n;
1653     int answer=-1;
1654    
1655     if (live.state[r].status==UNDEF) {
1656     write_log("WARNING: Unexpected read of undefined register %d\n",r);
1657     }
1658     if (!can_offset)
1659     remove_offset(r,spec);
1660    
1661     if (isinreg(r) && live.state[r].validsize>=size) {
1662     n=live.state[r].realreg;
1663     switch(size) {
1664     case 1:
1665     if (live.nat[n].canbyte || spec>=0) {
1666     answer=n;
1667     }
1668     break;
1669     case 2:
1670     if (live.nat[n].canword || spec>=0) {
1671     answer=n;
1672     }
1673     break;
1674     case 4:
1675     answer=n;
1676     break;
1677     default: abort();
1678     }
1679     if (answer<0)
1680     evict(r);
1681     }
1682     /* either the value was in memory to start with, or it was evicted and
1683     is in memory now */
1684     if (answer<0) {
1685     answer=alloc_reg_hinted(r,spec>=0?4:size,0,spec);
1686     }
1687    
1688     if (spec>=0 && spec!=answer) {
1689     /* Too bad */
1690     mov_nregs(spec,answer);
1691     answer=spec;
1692     }
1693     live.nat[answer].locked++;
1694     live.nat[answer].touched=touchcnt++;
1695     return answer;
1696     }
1697    
1698    
1699    
1700     static int readreg(int r, int size)
1701     {
1702     return readreg_general(r,size,-1,0);
1703     }
1704    
1705     static int readreg_specific(int r, int size, int spec)
1706     {
1707     return readreg_general(r,size,spec,0);
1708     }
1709    
1710     static int readreg_offset(int r, int size)
1711     {
1712     return readreg_general(r,size,-1,1);
1713     }
1714    
1715     /* writereg_general(r, size, spec)
1716     *
1717     * INPUT
1718     * - r : mid-layer register
1719     * - size : requested size (1/2/4)
1720     * - spec : -1 if find or make a register free, otherwise specifies
1721     * the physical register to use in any case
1722     *
1723     * OUTPUT
1724     * - hard (physical, x86 here) register allocated to virtual register r
1725     */
1726     static __inline__ int writereg_general(int r, int size, int spec)
1727     {
1728     int n;
1729     int answer=-1;
1730    
1731     if (size<4) {
1732     remove_offset(r,spec);
1733     }
1734    
1735     make_exclusive(r,size,spec);
1736     if (isinreg(r)) {
1737     int nvsize=size>live.state[r].validsize?size:live.state[r].validsize;
1738     int ndsize=size>live.state[r].dirtysize?size:live.state[r].dirtysize;
1739     n=live.state[r].realreg;
1740    
1741     Dif (live.nat[n].nholds!=1)
1742     abort();
1743     switch(size) {
1744     case 1:
1745     if (live.nat[n].canbyte || spec>=0) {
1746     live.state[r].dirtysize=ndsize;
1747     live.state[r].validsize=nvsize;
1748     answer=n;
1749     }
1750     break;
1751     case 2:
1752     if (live.nat[n].canword || spec>=0) {
1753     live.state[r].dirtysize=ndsize;
1754     live.state[r].validsize=nvsize;
1755     answer=n;
1756     }
1757     break;
1758     case 4:
1759     live.state[r].dirtysize=ndsize;
1760     live.state[r].validsize=nvsize;
1761     answer=n;
1762     break;
1763     default: abort();
1764     }
1765     if (answer<0)
1766     evict(r);
1767     }
1768     /* either the value was in memory to start with, or it was evicted and
1769     is in memory now */
1770     if (answer<0) {
1771     answer=alloc_reg_hinted(r,size,1,spec);
1772     }
1773     if (spec>=0 && spec!=answer) {
1774     mov_nregs(spec,answer);
1775     answer=spec;
1776     }
1777     if (live.state[r].status==UNDEF)
1778     live.state[r].validsize=4;
1779     live.state[r].dirtysize=size>live.state[r].dirtysize?size:live.state[r].dirtysize;
1780     live.state[r].validsize=size>live.state[r].validsize?size:live.state[r].validsize;
1781    
1782     live.nat[answer].locked++;
1783     live.nat[answer].touched=touchcnt++;
1784     if (size==4) {
1785     live.state[r].val=0;
1786     }
1787     else {
1788     Dif (live.state[r].val) {
1789     write_log("Problem with val\n");
1790     abort();
1791     }
1792     }
1793     set_status(r,DIRTY);
1794     return answer;
1795     }
1796    
1797     static int writereg(int r, int size)
1798     {
1799     return writereg_general(r,size,-1);
1800     }
1801    
1802     static int writereg_specific(int r, int size, int spec)
1803     {
1804     return writereg_general(r,size,spec);
1805     }
1806    
1807     static __inline__ int rmw_general(int r, int wsize, int rsize, int spec)
1808     {
1809     int n;
1810     int answer=-1;
1811    
1812     if (live.state[r].status==UNDEF) {
1813     write_log("WARNING: Unexpected read of undefined register %d\n",r);
1814     }
1815     remove_offset(r,spec);
1816     make_exclusive(r,0,spec);
1817    
1818     Dif (wsize<rsize) {
1819     write_log("Cannot handle wsize<rsize in rmw_general()\n");
1820     abort();
1821     }
1822     if (isinreg(r) && live.state[r].validsize>=rsize) {
1823     n=live.state[r].realreg;
1824     Dif (live.nat[n].nholds!=1)
1825     abort();
1826    
1827     switch(rsize) {
1828     case 1:
1829     if (live.nat[n].canbyte || spec>=0) {
1830     answer=n;
1831     }
1832     break;
1833     case 2:
1834     if (live.nat[n].canword || spec>=0) {
1835     answer=n;
1836     }
1837     break;
1838     case 4:
1839     answer=n;
1840     break;
1841     default: abort();
1842     }
1843     if (answer<0)
1844     evict(r);
1845     }
1846     /* either the value was in memory to start with, or it was evicted and
1847     is in memory now */
1848     if (answer<0) {
1849     answer=alloc_reg_hinted(r,spec>=0?4:rsize,0,spec);
1850     }
1851    
1852     if (spec>=0 && spec!=answer) {
1853     /* Too bad */
1854     mov_nregs(spec,answer);
1855     answer=spec;
1856     }
1857     if (wsize>live.state[r].dirtysize)
1858     live.state[r].dirtysize=wsize;
1859     if (wsize>live.state[r].validsize)
1860     live.state[r].validsize=wsize;
1861     set_status(r,DIRTY);
1862    
1863     live.nat[answer].locked++;
1864     live.nat[answer].touched=touchcnt++;
1865    
1866     Dif (live.state[r].val) {
1867     write_log("Problem with val(rmw)\n");
1868     abort();
1869     }
1870     return answer;
1871     }
1872    
1873     static int rmw(int r, int wsize, int rsize)
1874     {
1875     return rmw_general(r,wsize,rsize,-1);
1876     }
1877    
1878     static int rmw_specific(int r, int wsize, int rsize, int spec)
1879     {
1880     return rmw_general(r,wsize,rsize,spec);
1881     }
1882    
1883    
1884     /* needed for restoring the carry flag on non-P6 cores */
1885     static void bt_l_ri_noclobber(R4 r, IMM i)
1886     {
1887     int size=4;
1888     if (i<16)
1889     size=2;
1890     r=readreg(r,size);
1891     raw_bt_l_ri(r,i);
1892     unlock2(r);
1893     }
1894    
1895     /********************************************************************
1896     * FPU register status handling. EMIT TIME! *
1897     ********************************************************************/
1898    
1899     static void f_tomem(int r)
1900     {
1901     if (live.fate[r].status==DIRTY) {
1902     #if USE_LONG_DOUBLE
1903     raw_fmov_ext_mr((uae_u32)live.fate[r].mem,live.fate[r].realreg);
1904     #else
1905     raw_fmov_mr((uae_u32)live.fate[r].mem,live.fate[r].realreg);
1906     #endif
1907     live.fate[r].status=CLEAN;
1908     }
1909     }
1910    
1911     static void f_tomem_drop(int r)
1912     {
1913     if (live.fate[r].status==DIRTY) {
1914     #if USE_LONG_DOUBLE
1915     raw_fmov_ext_mr_drop((uae_u32)live.fate[r].mem,live.fate[r].realreg);
1916     #else
1917     raw_fmov_mr_drop((uae_u32)live.fate[r].mem,live.fate[r].realreg);
1918     #endif
1919     live.fate[r].status=INMEM;
1920     }
1921     }
1922    
1923    
1924     static __inline__ int f_isinreg(int r)
1925     {
1926     return live.fate[r].status==CLEAN || live.fate[r].status==DIRTY;
1927     }
1928    
1929     static void f_evict(int r)
1930     {
1931     int rr;
1932    
1933     if (!f_isinreg(r))
1934     return;
1935     rr=live.fate[r].realreg;
1936     if (live.fat[rr].nholds==1)
1937     f_tomem_drop(r);
1938     else
1939     f_tomem(r);
1940    
1941     Dif (live.fat[rr].locked &&
1942     live.fat[rr].nholds==1) {
1943     write_log("FPU register %d in nreg %d is locked!\n",r,live.fate[r].realreg);
1944     abort();
1945     }
1946    
1947     live.fat[rr].nholds--;
1948     if (live.fat[rr].nholds!=live.fate[r].realind) { /* Was not last */
1949     int topreg=live.fat[rr].holds[live.fat[rr].nholds];
1950     int thisind=live.fate[r].realind;
1951     live.fat[rr].holds[thisind]=topreg;
1952     live.fate[topreg].realind=thisind;
1953     }
1954     live.fate[r].status=INMEM;
1955     live.fate[r].realreg=-1;
1956     }
1957    
1958     static __inline__ void f_free_nreg(int r)
1959     {
1960     int i=live.fat[r].nholds;
1961    
1962     while (i) {
1963     int vr;
1964    
1965     --i;
1966     vr=live.fat[r].holds[i];
1967     f_evict(vr);
1968     }
1969     Dif (live.fat[r].nholds!=0) {
1970     write_log("Failed to free nreg %d, nholds is %d\n",r,live.fat[r].nholds);
1971     abort();
1972     }
1973     }
1974    
1975    
1976     /* Use with care! */
1977     static __inline__ void f_isclean(int r)
1978     {
1979     if (!f_isinreg(r))
1980     return;
1981     live.fate[r].status=CLEAN;
1982     }
1983    
1984     static __inline__ void f_disassociate(int r)
1985     {
1986     f_isclean(r);
1987     f_evict(r);
1988     }
1989    
1990    
1991    
1992     static int f_alloc_reg(int r, int willclobber)
1993     {
1994     int bestreg;
1995     uae_s32 when;
1996     int i;
1997     uae_s32 badness;
1998     bestreg=-1;
1999     when=2000000000;
2000     for (i=N_FREGS;i--;) {
2001     badness=live.fat[i].touched;
2002     if (live.fat[i].nholds==0)
2003     badness=0;
2004    
2005     if (!live.fat[i].locked && badness<when) {
2006     bestreg=i;
2007     when=badness;
2008     if (live.fat[i].nholds==0)
2009     break;
2010     }
2011     }
2012     Dif (bestreg==-1)
2013     abort();
2014    
2015     if (live.fat[bestreg].nholds>0) {
2016     f_free_nreg(bestreg);
2017     }
2018     if (f_isinreg(r)) {
2019     f_evict(r);
2020     }
2021    
2022     if (!willclobber) {
2023     if (live.fate[r].status!=UNDEF) {
2024     #if USE_LONG_DOUBLE
2025     raw_fmov_ext_rm(bestreg,(uae_u32)live.fate[r].mem);
2026     #else
2027     raw_fmov_rm(bestreg,(uae_u32)live.fate[r].mem);
2028     #endif
2029     }
2030     live.fate[r].status=CLEAN;
2031     }
2032     else {
2033     live.fate[r].status=DIRTY;
2034     }
2035     live.fate[r].realreg=bestreg;
2036     live.fate[r].realind=live.fat[bestreg].nholds;
2037     live.fat[bestreg].touched=touchcnt++;
2038     live.fat[bestreg].holds[live.fat[bestreg].nholds]=r;
2039     live.fat[bestreg].nholds++;
2040    
2041     return bestreg;
2042     }
2043    
2044     static void f_unlock(int r)
2045     {
2046     Dif (!live.fat[r].locked)
2047     abort();
2048     live.fat[r].locked--;
2049     }
2050    
2051     static void f_setlock(int r)
2052     {
2053     live.fat[r].locked++;
2054     }
2055    
2056     static __inline__ int f_readreg(int r)
2057     {
2058     int n;
2059     int answer=-1;
2060    
2061     if (f_isinreg(r)) {
2062     n=live.fate[r].realreg;
2063     answer=n;
2064     }
2065     /* either the value was in memory to start with, or it was evicted and
2066     is in memory now */
2067     if (answer<0)
2068     answer=f_alloc_reg(r,0);
2069    
2070     live.fat[answer].locked++;
2071     live.fat[answer].touched=touchcnt++;
2072     return answer;
2073     }
2074    
2075     static __inline__ void f_make_exclusive(int r, int clobber)
2076     {
2077     freg_status oldstate;
2078     int rr=live.fate[r].realreg;
2079     int nr;
2080     int nind;
2081     int ndirt=0;
2082     int i;
2083    
2084     if (!f_isinreg(r))
2085     return;
2086     if (live.fat[rr].nholds==1)
2087     return;
2088     for (i=0;i<live.fat[rr].nholds;i++) {
2089     int vr=live.fat[rr].holds[i];
2090     if (vr!=r && live.fate[vr].status==DIRTY)
2091     ndirt++;
2092     }
2093     if (!ndirt && !live.fat[rr].locked) {
2094     /* Everything else is clean, so let's keep this register */
2095     for (i=0;i<live.fat[rr].nholds;i++) {
2096     int vr=live.fat[rr].holds[i];
2097     if (vr!=r) {
2098     f_evict(vr);
2099     i--; /* Try that index again! */
2100     }
2101     }
2102     Dif (live.fat[rr].nholds!=1) {
2103     write_log("realreg %d holds %d (",rr,live.fat[rr].nholds);
2104     for (i=0;i<live.fat[rr].nholds;i++) {
2105     write_log(" %d(%d,%d)",live.fat[rr].holds[i],
2106     live.fate[live.fat[rr].holds[i]].realreg,
2107     live.fate[live.fat[rr].holds[i]].realind);
2108     }
2109     write_log("\n");
2110     abort();
2111     }
2112     return;
2113     }
2114    
2115     /* We have to split the register */
2116     oldstate=live.fate[r];
2117    
2118     f_setlock(rr); /* Make sure this doesn't go away */
2119     /* Forget about r being in the register rr */
2120     f_disassociate(r);
2121     /* Get a new register, that we will clobber completely */
2122     nr=f_alloc_reg(r,1);
2123     nind=live.fate[r].realind;
2124     if (!clobber)
2125     raw_fmov_rr(nr,rr); /* Make another copy */
2126     live.fate[r]=oldstate; /* Keep all the old state info */
2127     live.fate[r].realreg=nr;
2128     live.fate[r].realind=nind;
2129     f_unlock(rr);
2130     }
2131    
2132    
2133     static __inline__ int f_writereg(int r)
2134     {
2135     int n;
2136     int answer=-1;
2137    
2138     f_make_exclusive(r,1);
2139     if (f_isinreg(r)) {
2140     n=live.fate[r].realreg;
2141     answer=n;
2142     }
2143     if (answer<0) {
2144     answer=f_alloc_reg(r,1);
2145     }
2146     live.fate[r].status=DIRTY;
2147     live.fat[answer].locked++;
2148     live.fat[answer].touched=touchcnt++;
2149     return answer;
2150     }
2151    
2152     static int f_rmw(int r)
2153     {
2154     int n;
2155    
2156     f_make_exclusive(r,0);
2157     if (f_isinreg(r)) {
2158     n=live.fate[r].realreg;
2159     }
2160     else
2161     n=f_alloc_reg(r,0);
2162     live.fate[r].status=DIRTY;
2163     live.fat[n].locked++;
2164     live.fat[n].touched=touchcnt++;
2165     return n;
2166     }
2167    
2168     static void fflags_into_flags_internal(uae_u32 tmp)
2169     {
2170     int r;
2171    
2172     clobber_flags();
2173     r=f_readreg(FP_RESULT);
2174     if (FFLAG_NREG_CLOBBER_CONDITION) {
2175     int tmp2=tmp;
2176     tmp=writereg_specific(tmp,4,FFLAG_NREG);
2177     raw_fflags_into_flags(r);
2178     unlock2(tmp);
2179     forget_about(tmp2);
2180     }
2181     else
2182     raw_fflags_into_flags(r);
2183     f_unlock(r);
2184     }
2185    
2186    
2187    
2188    
2189     /********************************************************************
2190     * CPU functions exposed to gencomp. Both CREATE and EMIT time *
2191     ********************************************************************/
2192    
2193     /*
2194     * RULES FOR HANDLING REGISTERS:
2195     *
2196     * * In the function headers, order the parameters
2197     * - 1st registers written to
2198     * - 2nd read/modify/write registers
2199     * - 3rd registers read from
2200     * * Before calling raw_*, you must call readreg, writereg or rmw for
2201     * each register
2202     * * The order for this is
2203     * - 1st call remove_offset for all registers written to with size<4
2204     * - 2nd call readreg for all registers read without offset
2205     * - 3rd call rmw for all rmw registers
2206     * - 4th call readreg_offset for all registers that can handle offsets
2207     * - 5th call get_offset for all the registers from the previous step
2208     * - 6th call writereg for all written-to registers
2209     * - 7th call raw_*
2210     * - 8th unlock2 all registers that were locked
2211     */
2212    
2213     MIDFUNC(0,live_flags,(void))
2214     {
2215     live.flags_on_stack=TRASH;
2216     live.flags_in_flags=VALID;
2217     live.flags_are_important=1;
2218     }
2219     MENDFUNC(0,live_flags,(void))
2220    
2221     MIDFUNC(0,dont_care_flags,(void))
2222     {
2223     live.flags_are_important=0;
2224     }
2225     MENDFUNC(0,dont_care_flags,(void))
2226    
2227    
2228     MIDFUNC(0,duplicate_carry,(void))
2229     {
2230     evict(FLAGX);
2231     make_flags_live_internal();
2232     COMPCALL(setcc_m)((uae_u32)live.state[FLAGX].mem,2);
2233     log_vwrite(FLAGX);
2234     }
2235     MENDFUNC(0,duplicate_carry,(void))
2236    
2237     MIDFUNC(0,restore_carry,(void))
2238     {
2239     if (!have_rat_stall) { /* Not a P6 core, i.e. no partial stalls */
2240     bt_l_ri_noclobber(FLAGX,0);
2241     }
2242     else { /* Avoid the stall the above creates.
2243     This is slow on non-P6, though.
2244     */
2245     COMPCALL(rol_b_ri(FLAGX,8));
2246     isclean(FLAGX);
2247     }
2248     }
2249     MENDFUNC(0,restore_carry,(void))
2250    
2251     MIDFUNC(0,start_needflags,(void))
2252     {
2253     needflags=1;
2254     }
2255     MENDFUNC(0,start_needflags,(void))
2256    
2257     MIDFUNC(0,end_needflags,(void))
2258     {
2259     needflags=0;
2260     }
2261     MENDFUNC(0,end_needflags,(void))
2262    
2263     MIDFUNC(0,make_flags_live,(void))
2264     {
2265     make_flags_live_internal();
2266     }
2267     MENDFUNC(0,make_flags_live,(void))
2268    
2269     MIDFUNC(1,fflags_into_flags,(W2 tmp))
2270     {
2271     clobber_flags();
2272     fflags_into_flags_internal(tmp);
2273     }
2274     MENDFUNC(1,fflags_into_flags,(W2 tmp))
2275    
2276    
2277     MIDFUNC(2,bt_l_ri,(R4 r, IMM i)) /* This is defined as only affecting C */
2278     {
2279     int size=4;
2280     if (i<16)
2281     size=2;
2282     CLOBBER_BT;
2283     r=readreg(r,size);
2284     raw_bt_l_ri(r,i);
2285     unlock2(r);
2286     }
2287     MENDFUNC(2,bt_l_ri,(R4 r, IMM i)) /* This is defined as only affecting C */
2288    
2289     MIDFUNC(2,bt_l_rr,(R4 r, R4 b)) /* This is defined as only affecting C */
2290     {
2291     CLOBBER_BT;
2292     r=readreg(r,4);
2293     b=readreg(b,4);
2294     raw_bt_l_rr(r,b);
2295     unlock2(r);
2296     unlock2(b);
2297     }
2298     MENDFUNC(2,bt_l_rr,(R4 r, R4 b)) /* This is defined as only affecting C */
2299    
2300     MIDFUNC(2,btc_l_ri,(RW4 r, IMM i))
2301     {
2302     int size=4;
2303     if (i<16)
2304     size=2;
2305     CLOBBER_BT;
2306     r=rmw(r,size,size);
2307     raw_btc_l_ri(r,i);
2308     unlock2(r);
2309     }
2310     MENDFUNC(2,btc_l_ri,(RW4 r, IMM i))
2311    
2312     MIDFUNC(2,btc_l_rr,(RW4 r, R4 b))
2313     {
2314     CLOBBER_BT;
2315     b=readreg(b,4);
2316     r=rmw(r,4,4);
2317     raw_btc_l_rr(r,b);
2318     unlock2(r);
2319     unlock2(b);
2320     }
2321     MENDFUNC(2,btc_l_rr,(RW4 r, R4 b))
2322    
2323    
2324     MIDFUNC(2,btr_l_ri,(RW4 r, IMM i))
2325     {
2326     int size=4;
2327     if (i<16)
2328     size=2;
2329     CLOBBER_BT;
2330     r=rmw(r,size,size);
2331     raw_btr_l_ri(r,i);
2332     unlock2(r);
2333     }
2334     MENDFUNC(2,btr_l_ri,(RW4 r, IMM i))
2335    
2336     MIDFUNC(2,btr_l_rr,(RW4 r, R4 b))
2337     {
2338     CLOBBER_BT;
2339     b=readreg(b,4);
2340     r=rmw(r,4,4);
2341     raw_btr_l_rr(r,b);
2342     unlock2(r);
2343     unlock2(b);
2344     }
2345     MENDFUNC(2,btr_l_rr,(RW4 r, R4 b))
2346    
2347    
2348     MIDFUNC(2,bts_l_ri,(RW4 r, IMM i))
2349     {
2350     int size=4;
2351     if (i<16)
2352     size=2;
2353     CLOBBER_BT;
2354     r=rmw(r,size,size);
2355     raw_bts_l_ri(r,i);
2356     unlock2(r);
2357     }
2358     MENDFUNC(2,bts_l_ri,(RW4 r, IMM i))
2359    
2360     MIDFUNC(2,bts_l_rr,(RW4 r, R4 b))
2361     {
2362     CLOBBER_BT;
2363     b=readreg(b,4);
2364     r=rmw(r,4,4);
2365     raw_bts_l_rr(r,b);
2366     unlock2(r);
2367     unlock2(b);
2368     }
2369     MENDFUNC(2,bts_l_rr,(RW4 r, R4 b))
2370    
2371     MIDFUNC(2,mov_l_rm,(W4 d, IMM s))
2372     {
2373     CLOBBER_MOV;
2374     d=writereg(d,4);
2375     raw_mov_l_rm(d,s);
2376     unlock2(d);
2377     }
2378     MENDFUNC(2,mov_l_rm,(W4 d, IMM s))
2379    
2380    
2381     MIDFUNC(1,call_r,(R4 r)) /* Clobbering is implicit */
2382     {
2383     r=readreg(r,4);
2384     raw_call_r(r);
2385     unlock2(r);
2386     }
2387     MENDFUNC(1,call_r,(R4 r)) /* Clobbering is implicit */
2388    
2389     MIDFUNC(2,sub_l_mi,(IMM d, IMM s))
2390     {
2391     CLOBBER_SUB;
2392     raw_sub_l_mi(d,s) ;
2393     }
2394     MENDFUNC(2,sub_l_mi,(IMM d, IMM s))
2395    
2396     MIDFUNC(2,mov_l_mi,(IMM d, IMM s))
2397     {
2398     CLOBBER_MOV;
2399     raw_mov_l_mi(d,s) ;
2400     }
2401     MENDFUNC(2,mov_l_mi,(IMM d, IMM s))
2402    
2403     MIDFUNC(2,mov_w_mi,(IMM d, IMM s))
2404     {
2405     CLOBBER_MOV;
2406     raw_mov_w_mi(d,s) ;
2407     }
2408     MENDFUNC(2,mov_w_mi,(IMM d, IMM s))
2409    
2410     MIDFUNC(2,mov_b_mi,(IMM d, IMM s))
2411     {
2412     CLOBBER_MOV;
2413     raw_mov_b_mi(d,s) ;
2414     }
2415     MENDFUNC(2,mov_b_mi,(IMM d, IMM s))
2416    
2417     MIDFUNC(2,rol_b_ri,(RW1 r, IMM i))
2418     {
2419     if (!i && !needflags)
2420     return;
2421     CLOBBER_ROL;
2422     r=rmw(r,1,1);
2423     raw_rol_b_ri(r,i);
2424     unlock2(r);
2425     }
2426     MENDFUNC(2,rol_b_ri,(RW1 r, IMM i))
2427    
2428     MIDFUNC(2,rol_w_ri,(RW2 r, IMM i))
2429     {
2430     if (!i && !needflags)
2431     return;
2432     CLOBBER_ROL;
2433     r=rmw(r,2,2);
2434     raw_rol_w_ri(r,i);
2435     unlock2(r);
2436     }
2437     MENDFUNC(2,rol_w_ri,(RW2 r, IMM i))
2438    
2439     MIDFUNC(2,rol_l_ri,(RW4 r, IMM i))
2440     {
2441     if (!i && !needflags)
2442     return;
2443     CLOBBER_ROL;
2444     r=rmw(r,4,4);
2445     raw_rol_l_ri(r,i);
2446     unlock2(r);
2447     }
2448     MENDFUNC(2,rol_l_ri,(RW4 r, IMM i))
2449    
2450     MIDFUNC(2,rol_l_rr,(RW4 d, R1 r))
2451     {
2452     if (isconst(r)) {
2453     COMPCALL(rol_l_ri)(d,(uae_u8)live.state[r].val);
2454     return;
2455     }
2456     CLOBBER_ROL;
2457     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2458     d=rmw(d,4,4);
2459     Dif (r!=1) {
2460     write_log("Illegal register %d in raw_rol_b\n",r);
2461     abort();
2462     }
2463     raw_rol_l_rr(d,r) ;
2464     unlock2(r);
2465     unlock2(d);
2466     }
2467     MENDFUNC(2,rol_l_rr,(RW4 d, R1 r))
2468    
2469     MIDFUNC(2,rol_w_rr,(RW2 d, R1 r))
2470     { /* Can only do this with r==1, i.e. cl */
2471    
2472     if (isconst(r)) {
2473     COMPCALL(rol_w_ri)(d,(uae_u8)live.state[r].val);
2474     return;
2475     }
2476     CLOBBER_ROL;
2477     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2478     d=rmw(d,2,2);
2479     Dif (r!=1) {
2480     write_log("Illegal register %d in raw_rol_b\n",r);
2481     abort();
2482     }
2483     raw_rol_w_rr(d,r) ;
2484     unlock2(r);
2485     unlock2(d);
2486     }
2487     MENDFUNC(2,rol_w_rr,(RW2 d, R1 r))
2488    
2489     MIDFUNC(2,rol_b_rr,(RW1 d, R1 r))
2490     { /* Can only do this with r==1, i.e. cl */
2491    
2492     if (isconst(r)) {
2493     COMPCALL(rol_b_ri)(d,(uae_u8)live.state[r].val);
2494     return;
2495     }
2496    
2497     CLOBBER_ROL;
2498     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2499     d=rmw(d,1,1);
2500     Dif (r!=1) {
2501     write_log("Illegal register %d in raw_rol_b\n",r);
2502     abort();
2503     }
2504     raw_rol_b_rr(d,r) ;
2505     unlock2(r);
2506     unlock2(d);
2507     }
2508     MENDFUNC(2,rol_b_rr,(RW1 d, R1 r))
2509    
2510    
2511     MIDFUNC(2,shll_l_rr,(RW4 d, R1 r))
2512     {
2513     if (isconst(r)) {
2514     COMPCALL(shll_l_ri)(d,(uae_u8)live.state[r].val);
2515     return;
2516     }
2517     CLOBBER_SHLL;
2518     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2519     d=rmw(d,4,4);
2520     Dif (r!=1) {
2521     write_log("Illegal register %d in raw_rol_b\n",r);
2522     abort();
2523     }
2524     raw_shll_l_rr(d,r) ;
2525     unlock2(r);
2526     unlock2(d);
2527     }
2528     MENDFUNC(2,shll_l_rr,(RW4 d, R1 r))
2529    
2530     MIDFUNC(2,shll_w_rr,(RW2 d, R1 r))
2531     { /* Can only do this with r==1, i.e. cl */
2532    
2533     if (isconst(r)) {
2534     COMPCALL(shll_w_ri)(d,(uae_u8)live.state[r].val);
2535     return;
2536     }
2537     CLOBBER_SHLL;
2538     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2539     d=rmw(d,2,2);
2540     Dif (r!=1) {
2541     write_log("Illegal register %d in raw_shll_b\n",r);
2542     abort();
2543     }
2544     raw_shll_w_rr(d,r) ;
2545     unlock2(r);
2546     unlock2(d);
2547     }
2548     MENDFUNC(2,shll_w_rr,(RW2 d, R1 r))
2549    
2550     MIDFUNC(2,shll_b_rr,(RW1 d, R1 r))
2551     { /* Can only do this with r==1, i.e. cl */
2552    
2553     if (isconst(r)) {
2554     COMPCALL(shll_b_ri)(d,(uae_u8)live.state[r].val);
2555     return;
2556     }
2557    
2558     CLOBBER_SHLL;
2559     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2560     d=rmw(d,1,1);
2561     Dif (r!=1) {
2562     write_log("Illegal register %d in raw_shll_b\n",r);
2563     abort();
2564     }
2565     raw_shll_b_rr(d,r) ;
2566     unlock2(r);
2567     unlock2(d);
2568     }
2569     MENDFUNC(2,shll_b_rr,(RW1 d, R1 r))
2570    
2571    
2572     MIDFUNC(2,ror_b_ri,(R1 r, IMM i))
2573     {
2574     if (!i && !needflags)
2575     return;
2576     CLOBBER_ROR;
2577     r=rmw(r,1,1);
2578     raw_ror_b_ri(r,i);
2579     unlock2(r);
2580     }
2581     MENDFUNC(2,ror_b_ri,(R1 r, IMM i))
2582    
2583     MIDFUNC(2,ror_w_ri,(R2 r, IMM i))
2584     {
2585     if (!i && !needflags)
2586     return;
2587     CLOBBER_ROR;
2588     r=rmw(r,2,2);
2589     raw_ror_w_ri(r,i);
2590     unlock2(r);
2591     }
2592     MENDFUNC(2,ror_w_ri,(R2 r, IMM i))
2593    
2594     MIDFUNC(2,ror_l_ri,(R4 r, IMM i))
2595     {
2596     if (!i && !needflags)
2597     return;
2598     CLOBBER_ROR;
2599     r=rmw(r,4,4);
2600     raw_ror_l_ri(r,i);
2601     unlock2(r);
2602     }
2603     MENDFUNC(2,ror_l_ri,(R4 r, IMM i))
2604    
2605     MIDFUNC(2,ror_l_rr,(R4 d, R1 r))
2606     {
2607     if (isconst(r)) {
2608     COMPCALL(ror_l_ri)(d,(uae_u8)live.state[r].val);
2609     return;
2610     }
2611     CLOBBER_ROR;
2612     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2613     d=rmw(d,4,4);
2614     raw_ror_l_rr(d,r) ;
2615     unlock2(r);
2616     unlock2(d);
2617     }
2618     MENDFUNC(2,ror_l_rr,(R4 d, R1 r))
2619    
2620     MIDFUNC(2,ror_w_rr,(R2 d, R1 r))
2621     {
2622     if (isconst(r)) {
2623     COMPCALL(ror_w_ri)(d,(uae_u8)live.state[r].val);
2624     return;
2625     }
2626     CLOBBER_ROR;
2627     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2628     d=rmw(d,2,2);
2629     raw_ror_w_rr(d,r) ;
2630     unlock2(r);
2631     unlock2(d);
2632     }
2633     MENDFUNC(2,ror_w_rr,(R2 d, R1 r))
2634    
2635     MIDFUNC(2,ror_b_rr,(R1 d, R1 r))
2636     {
2637     if (isconst(r)) {
2638     COMPCALL(ror_b_ri)(d,(uae_u8)live.state[r].val);
2639     return;
2640     }
2641    
2642     CLOBBER_ROR;
2643     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2644     d=rmw(d,1,1);
2645     raw_ror_b_rr(d,r) ;
2646     unlock2(r);
2647     unlock2(d);
2648     }
2649     MENDFUNC(2,ror_b_rr,(R1 d, R1 r))
2650    
2651     MIDFUNC(2,shrl_l_rr,(RW4 d, R1 r))
2652     {
2653     if (isconst(r)) {
2654     COMPCALL(shrl_l_ri)(d,(uae_u8)live.state[r].val);
2655     return;
2656     }
2657     CLOBBER_SHRL;
2658     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2659     d=rmw(d,4,4);
2660     Dif (r!=1) {
2661     write_log("Illegal register %d in raw_rol_b\n",r);
2662     abort();
2663     }
2664     raw_shrl_l_rr(d,r) ;
2665     unlock2(r);
2666     unlock2(d);
2667     }
2668     MENDFUNC(2,shrl_l_rr,(RW4 d, R1 r))
2669    
2670     MIDFUNC(2,shrl_w_rr,(RW2 d, R1 r))
2671     { /* Can only do this with r==1, i.e. cl */
2672    
2673     if (isconst(r)) {
2674     COMPCALL(shrl_w_ri)(d,(uae_u8)live.state[r].val);
2675     return;
2676     }
2677     CLOBBER_SHRL;
2678     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2679     d=rmw(d,2,2);
2680     Dif (r!=1) {
2681     write_log("Illegal register %d in raw_shrl_b\n",r);
2682     abort();
2683     }
2684     raw_shrl_w_rr(d,r) ;
2685     unlock2(r);
2686     unlock2(d);
2687     }
2688     MENDFUNC(2,shrl_w_rr,(RW2 d, R1 r))
2689    
2690     MIDFUNC(2,shrl_b_rr,(RW1 d, R1 r))
2691     { /* Can only do this with r==1, i.e. cl */
2692    
2693     if (isconst(r)) {
2694     COMPCALL(shrl_b_ri)(d,(uae_u8)live.state[r].val);
2695     return;
2696     }
2697    
2698     CLOBBER_SHRL;
2699     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2700     d=rmw(d,1,1);
2701     Dif (r!=1) {
2702     write_log("Illegal register %d in raw_shrl_b\n",r);
2703     abort();
2704     }
2705     raw_shrl_b_rr(d,r) ;
2706     unlock2(r);
2707     unlock2(d);
2708     }
2709     MENDFUNC(2,shrl_b_rr,(RW1 d, R1 r))
2710    
2711    
2712    
2713     MIDFUNC(2,shll_l_ri,(RW4 r, IMM i))
2714     {
2715     if (!i && !needflags)
2716     return;
2717     if (isconst(r) && !needflags) {
2718     live.state[r].val<<=i;
2719     return;
2720     }
2721     CLOBBER_SHLL;
2722     r=rmw(r,4,4);
2723     raw_shll_l_ri(r,i);
2724     unlock2(r);
2725     }
2726     MENDFUNC(2,shll_l_ri,(RW4 r, IMM i))
2727    
2728     MIDFUNC(2,shll_w_ri,(RW2 r, IMM i))
2729     {
2730     if (!i && !needflags)
2731     return;
2732     CLOBBER_SHLL;
2733     r=rmw(r,2,2);
2734     raw_shll_w_ri(r,i);
2735     unlock2(r);
2736     }
2737     MENDFUNC(2,shll_w_ri,(RW2 r, IMM i))
2738    
2739     MIDFUNC(2,shll_b_ri,(RW1 r, IMM i))
2740     {
2741     if (!i && !needflags)
2742     return;
2743     CLOBBER_SHLL;
2744     r=rmw(r,1,1);
2745     raw_shll_b_ri(r,i);
2746     unlock2(r);
2747     }
2748     MENDFUNC(2,shll_b_ri,(RW1 r, IMM i))
2749    
2750     MIDFUNC(2,shrl_l_ri,(RW4 r, IMM i))
2751     {
2752     if (!i && !needflags)
2753     return;
2754     if (isconst(r) && !needflags) {
2755     live.state[r].val>>=i;
2756     return;
2757     }
2758     CLOBBER_SHRL;
2759     r=rmw(r,4,4);
2760     raw_shrl_l_ri(r,i);
2761     unlock2(r);
2762     }
2763     MENDFUNC(2,shrl_l_ri,(RW4 r, IMM i))
2764    
2765     MIDFUNC(2,shrl_w_ri,(RW2 r, IMM i))
2766     {
2767     if (!i && !needflags)
2768     return;
2769     CLOBBER_SHRL;
2770     r=rmw(r,2,2);
2771     raw_shrl_w_ri(r,i);
2772     unlock2(r);
2773     }
2774     MENDFUNC(2,shrl_w_ri,(RW2 r, IMM i))
2775    
2776     MIDFUNC(2,shrl_b_ri,(RW1 r, IMM i))
2777     {
2778     if (!i && !needflags)
2779     return;
2780     CLOBBER_SHRL;
2781     r=rmw(r,1,1);
2782     raw_shrl_b_ri(r,i);
2783     unlock2(r);
2784     }
2785     MENDFUNC(2,shrl_b_ri,(RW1 r, IMM i))
2786    
2787     MIDFUNC(2,shra_l_ri,(RW4 r, IMM i))
2788     {
2789     if (!i && !needflags)
2790     return;
2791     CLOBBER_SHRA;
2792     r=rmw(r,4,4);
2793     raw_shra_l_ri(r,i);
2794     unlock2(r);
2795     }
2796     MENDFUNC(2,shra_l_ri,(RW4 r, IMM i))
2797    
2798     MIDFUNC(2,shra_w_ri,(RW2 r, IMM i))
2799     {
2800     if (!i && !needflags)
2801     return;
2802     CLOBBER_SHRA;
2803     r=rmw(r,2,2);
2804     raw_shra_w_ri(r,i);
2805     unlock2(r);
2806     }
2807     MENDFUNC(2,shra_w_ri,(RW2 r, IMM i))
2808    
2809     MIDFUNC(2,shra_b_ri,(RW1 r, IMM i))
2810     {
2811     if (!i && !needflags)
2812     return;
2813     CLOBBER_SHRA;
2814     r=rmw(r,1,1);
2815     raw_shra_b_ri(r,i);
2816     unlock2(r);
2817     }
2818     MENDFUNC(2,shra_b_ri,(RW1 r, IMM i))
2819    
2820     MIDFUNC(2,shra_l_rr,(RW4 d, R1 r))
2821     {
2822     if (isconst(r)) {
2823     COMPCALL(shra_l_ri)(d,(uae_u8)live.state[r].val);
2824     return;
2825     }
2826     CLOBBER_SHRA;
2827     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2828     d=rmw(d,4,4);
2829     Dif (r!=1) {
2830     write_log("Illegal register %d in raw_rol_b\n",r);
2831     abort();
2832     }
2833     raw_shra_l_rr(d,r) ;
2834     unlock2(r);
2835     unlock2(d);
2836     }
2837     MENDFUNC(2,shra_l_rr,(RW4 d, R1 r))
2838    
2839     MIDFUNC(2,shra_w_rr,(RW2 d, R1 r))
2840     { /* Can only do this with r==1, i.e. cl */
2841    
2842     if (isconst(r)) {
2843     COMPCALL(shra_w_ri)(d,(uae_u8)live.state[r].val);
2844     return;
2845     }
2846     CLOBBER_SHRA;
2847     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2848     d=rmw(d,2,2);
2849     Dif (r!=1) {
2850     write_log("Illegal register %d in raw_shra_b\n",r);
2851     abort();
2852     }
2853     raw_shra_w_rr(d,r) ;
2854     unlock2(r);
2855     unlock2(d);
2856     }
2857     MENDFUNC(2,shra_w_rr,(RW2 d, R1 r))
2858    
2859     MIDFUNC(2,shra_b_rr,(RW1 d, R1 r))
2860     { /* Can only do this with r==1, i.e. cl */
2861    
2862     if (isconst(r)) {
2863     COMPCALL(shra_b_ri)(d,(uae_u8)live.state[r].val);
2864     return;
2865     }
2866    
2867     CLOBBER_SHRA;
2868     r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2869     d=rmw(d,1,1);
2870     Dif (r!=1) {
2871     write_log("Illegal register %d in raw_shra_b\n",r);
2872     abort();
2873     }
2874     raw_shra_b_rr(d,r) ;
2875     unlock2(r);
2876     unlock2(d);
2877     }
2878     MENDFUNC(2,shra_b_rr,(RW1 d, R1 r))
2879    
2880    
2881     MIDFUNC(2,setcc,(W1 d, IMM cc))
2882     {
2883     CLOBBER_SETCC;
2884     d=writereg(d,1);
2885     raw_setcc(d,cc);
2886     unlock2(d);
2887     }
2888     MENDFUNC(2,setcc,(W1 d, IMM cc))
2889    
2890     MIDFUNC(2,setcc_m,(IMM d, IMM cc))
2891     {
2892     CLOBBER_SETCC;
2893     raw_setcc_m(d,cc);
2894     }
2895     MENDFUNC(2,setcc_m,(IMM d, IMM cc))
2896    
2897     MIDFUNC(3,cmov_l_rr,(RW4 d, R4 s, IMM cc))
2898     {
2899     if (d==s)
2900     return;
2901     CLOBBER_CMOV;
2902     s=readreg(s,4);
2903     d=rmw(d,4,4);
2904     raw_cmov_l_rr(d,s,cc);
2905     unlock2(s);
2906     unlock2(d);
2907     }
2908     MENDFUNC(3,cmov_l_rr,(RW4 d, R4 s, IMM cc))
2909    
2910     MIDFUNC(3,cmov_l_rm,(RW4 d, IMM s, IMM cc))
2911     {
2912     CLOBBER_CMOV;
2913     d=rmw(d,4,4);
2914     raw_cmov_l_rm(d,s,cc);
2915     unlock2(d);
2916     }
2917     MENDFUNC(3,cmov_l_rm,(RW4 d, IMM s, IMM cc))
2918    
2919 gbeauche 1.15 MIDFUNC(1,setzflg_l,(RW4 r))
2920 gbeauche 1.1 {
2921 gbeauche 1.15 if (setzflg_uses_bsf) {
2922     CLOBBER_BSF;
2923     r=rmw(r,4,4);
2924     raw_bsf_l_rr(r,r);
2925     unlock2(r);
2926     }
2927     else {
2928 gbeauche 1.16 Dif (live.flags_in_flags!=VALID) {
2929     write_log("setzflg() wanted flags in native flags, they are %d\n",
2930     live.flags_in_flags);
2931     abort();
2932     }
2933     r=readreg(r,4);
2934     int f=writereg(S11,4);
2935     int t=writereg(S12,4);
2936     raw_flags_set_zero(f,r,t);
2937     unlock2(f);
2938     unlock2(r);
2939     unlock2(t);
2940 gbeauche 1.15 }
2941 gbeauche 1.1 }
2942 gbeauche 1.15 MENDFUNC(1,setzflg_l,(RW4 r))
2943 gbeauche 1.1
2944     MIDFUNC(2,imul_32_32,(RW4 d, R4 s))
2945     {
2946     CLOBBER_MUL;
2947     s=readreg(s,4);
2948     d=rmw(d,4,4);
2949     raw_imul_32_32(d,s);
2950     unlock2(s);
2951     unlock2(d);
2952     }
2953     MENDFUNC(2,imul_32_32,(RW4 d, R4 s))
2954    
2955     MIDFUNC(2,imul_64_32,(RW4 d, RW4 s))
2956     {
2957     CLOBBER_MUL;
2958     s=rmw_specific(s,4,4,MUL_NREG2);
2959     d=rmw_specific(d,4,4,MUL_NREG1);
2960     raw_imul_64_32(d,s);
2961     unlock2(s);
2962     unlock2(d);
2963     }
2964     MENDFUNC(2,imul_64_32,(RW4 d, RW4 s))
2965    
2966     MIDFUNC(2,mul_64_32,(RW4 d, RW4 s))
2967     {
2968     CLOBBER_MUL;
2969     s=rmw_specific(s,4,4,MUL_NREG2);
2970     d=rmw_specific(d,4,4,MUL_NREG1);
2971     raw_mul_64_32(d,s);
2972     unlock2(s);
2973     unlock2(d);
2974     }
2975     MENDFUNC(2,mul_64_32,(RW4 d, RW4 s))
2976    
2977     MIDFUNC(2,mul_32_32,(RW4 d, R4 s))
2978     {
2979     CLOBBER_MUL;
2980     s=readreg(s,4);
2981     d=rmw(d,4,4);
2982     raw_mul_32_32(d,s);
2983     unlock2(s);
2984     unlock2(d);
2985     }
2986     MENDFUNC(2,mul_32_32,(RW4 d, R4 s))
2987    
2988     MIDFUNC(2,sign_extend_16_rr,(W4 d, R2 s))
2989     {
2990     int isrmw;
2991    
2992     if (isconst(s)) {
2993     set_const(d,(uae_s32)(uae_s16)live.state[s].val);
2994     return;
2995     }
2996    
2997     CLOBBER_SE16;
2998     isrmw=(s==d);
2999     if (!isrmw) {
3000     s=readreg(s,2);
3001     d=writereg(d,4);
3002     }
3003     else { /* If we try to lock this twice, with different sizes, we
3004     are int trouble! */
3005     s=d=rmw(s,4,2);
3006     }
3007     raw_sign_extend_16_rr(d,s);
3008     if (!isrmw) {
3009     unlock2(d);
3010     unlock2(s);
3011     }
3012     else {
3013     unlock2(s);
3014     }
3015     }
3016     MENDFUNC(2,sign_extend_16_rr,(W4 d, R2 s))
3017    
3018     MIDFUNC(2,sign_extend_8_rr,(W4 d, R1 s))
3019     {
3020     int isrmw;
3021    
3022     if (isconst(s)) {
3023     set_const(d,(uae_s32)(uae_s8)live.state[s].val);
3024     return;
3025     }
3026    
3027     isrmw=(s==d);
3028     CLOBBER_SE8;
3029     if (!isrmw) {
3030     s=readreg(s,1);
3031     d=writereg(d,4);
3032     }
3033     else { /* If we try to lock this twice, with different sizes, we
3034     are int trouble! */
3035     s=d=rmw(s,4,1);
3036     }
3037    
3038     raw_sign_extend_8_rr(d,s);
3039    
3040     if (!isrmw) {
3041     unlock2(d);
3042     unlock2(s);
3043     }
3044     else {
3045     unlock2(s);
3046     }
3047     }
3048     MENDFUNC(2,sign_extend_8_rr,(W4 d, R1 s))
3049    
3050    
3051     MIDFUNC(2,zero_extend_16_rr,(W4 d, R2 s))
3052     {
3053     int isrmw;
3054    
3055     if (isconst(s)) {
3056     set_const(d,(uae_u32)(uae_u16)live.state[s].val);
3057     return;
3058     }
3059    
3060     isrmw=(s==d);
3061     CLOBBER_ZE16;
3062     if (!isrmw) {
3063     s=readreg(s,2);
3064     d=writereg(d,4);
3065     }
3066     else { /* If we try to lock this twice, with different sizes, we
3067     are int trouble! */
3068     s=d=rmw(s,4,2);
3069     }
3070     raw_zero_extend_16_rr(d,s);
3071     if (!isrmw) {
3072     unlock2(d);
3073     unlock2(s);
3074     }
3075     else {
3076     unlock2(s);
3077     }
3078     }
3079     MENDFUNC(2,zero_extend_16_rr,(W4 d, R2 s))
3080    
3081     MIDFUNC(2,zero_extend_8_rr,(W4 d, R1 s))
3082     {
3083     int isrmw;
3084     if (isconst(s)) {
3085     set_const(d,(uae_u32)(uae_u8)live.state[s].val);
3086     return;
3087     }
3088    
3089     isrmw=(s==d);
3090     CLOBBER_ZE8;
3091     if (!isrmw) {
3092     s=readreg(s,1);
3093     d=writereg(d,4);
3094     }
3095     else { /* If we try to lock this twice, with different sizes, we
3096     are int trouble! */
3097     s=d=rmw(s,4,1);
3098     }
3099    
3100     raw_zero_extend_8_rr(d,s);
3101    
3102     if (!isrmw) {
3103     unlock2(d);
3104     unlock2(s);
3105     }
3106     else {
3107     unlock2(s);
3108     }
3109     }
3110     MENDFUNC(2,zero_extend_8_rr,(W4 d, R1 s))
3111    
3112     MIDFUNC(2,mov_b_rr,(W1 d, R1 s))
3113     {
3114     if (d==s)
3115     return;
3116     if (isconst(s)) {
3117     COMPCALL(mov_b_ri)(d,(uae_u8)live.state[s].val);
3118     return;
3119     }
3120    
3121     CLOBBER_MOV;
3122     s=readreg(s,1);
3123     d=writereg(d,1);
3124     raw_mov_b_rr(d,s);
3125     unlock2(d);
3126     unlock2(s);
3127     }
3128     MENDFUNC(2,mov_b_rr,(W1 d, R1 s))
3129    
3130     MIDFUNC(2,mov_w_rr,(W2 d, R2 s))
3131     {
3132     if (d==s)
3133     return;
3134     if (isconst(s)) {
3135     COMPCALL(mov_w_ri)(d,(uae_u16)live.state[s].val);
3136     return;
3137     }
3138    
3139     CLOBBER_MOV;
3140     s=readreg(s,2);
3141     d=writereg(d,2);
3142     raw_mov_w_rr(d,s);
3143     unlock2(d);
3144     unlock2(s);
3145     }
3146     MENDFUNC(2,mov_w_rr,(W2 d, R2 s))
3147    
3148    
3149     MIDFUNC(4,mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
3150     {
3151     CLOBBER_MOV;
3152     baser=readreg(baser,4);
3153     index=readreg(index,4);
3154     d=writereg(d,4);
3155    
3156     raw_mov_l_rrm_indexed(d,baser,index,factor);
3157     unlock2(d);
3158     unlock2(baser);
3159     unlock2(index);
3160     }
3161     MENDFUNC(4,mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
3162    
3163     MIDFUNC(4,mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
3164     {
3165     CLOBBER_MOV;
3166     baser=readreg(baser,4);
3167     index=readreg(index,4);
3168     d=writereg(d,2);
3169    
3170     raw_mov_w_rrm_indexed(d,baser,index,factor);
3171     unlock2(d);
3172     unlock2(baser);
3173     unlock2(index);
3174     }
3175     MENDFUNC(4,mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
3176    
3177     MIDFUNC(4,mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
3178     {
3179     CLOBBER_MOV;
3180     baser=readreg(baser,4);
3181     index=readreg(index,4);
3182     d=writereg(d,1);
3183    
3184     raw_mov_b_rrm_indexed(d,baser,index,factor);
3185    
3186     unlock2(d);
3187     unlock2(baser);
3188     unlock2(index);
3189     }
3190     MENDFUNC(4,mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
3191    
3192    
3193     MIDFUNC(4,mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
3194     {
3195     CLOBBER_MOV;
3196     baser=readreg(baser,4);
3197     index=readreg(index,4);
3198     s=readreg(s,4);
3199    
3200     Dif (baser==s || index==s)
3201     abort();
3202    
3203    
3204     raw_mov_l_mrr_indexed(baser,index,factor,s);
3205     unlock2(s);
3206     unlock2(baser);
3207     unlock2(index);
3208     }
3209     MENDFUNC(4,mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
3210    
3211     MIDFUNC(4,mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
3212     {
3213     CLOBBER_MOV;
3214     baser=readreg(baser,4);
3215     index=readreg(index,4);
3216     s=readreg(s,2);
3217    
3218     raw_mov_w_mrr_indexed(baser,index,factor,s);
3219     unlock2(s);
3220     unlock2(baser);
3221     unlock2(index);
3222     }
3223     MENDFUNC(4,mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
3224    
3225     MIDFUNC(4,mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
3226     {
3227     CLOBBER_MOV;
3228     s=readreg(s,1);
3229     baser=readreg(baser,4);
3230     index=readreg(index,4);
3231    
3232     raw_mov_b_mrr_indexed(baser,index,factor,s);
3233     unlock2(s);
3234     unlock2(baser);
3235     unlock2(index);
3236     }
3237     MENDFUNC(4,mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
3238    
3239    
3240     MIDFUNC(5,mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
3241     {
3242     int basereg=baser;
3243     int indexreg=index;
3244    
3245     CLOBBER_MOV;
3246     s=readreg(s,4);
3247     baser=readreg_offset(baser,4);
3248     index=readreg_offset(index,4);
3249    
3250     base+=get_offset(basereg);
3251     base+=factor*get_offset(indexreg);
3252    
3253     raw_mov_l_bmrr_indexed(base,baser,index,factor,s);
3254     unlock2(s);
3255     unlock2(baser);
3256     unlock2(index);
3257     }
3258     MENDFUNC(5,mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
3259    
3260     MIDFUNC(5,mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
3261     {
3262     int basereg=baser;
3263     int indexreg=index;
3264    
3265     CLOBBER_MOV;
3266     s=readreg(s,2);
3267     baser=readreg_offset(baser,4);
3268     index=readreg_offset(index,4);
3269    
3270     base+=get_offset(basereg);
3271     base+=factor*get_offset(indexreg);
3272    
3273     raw_mov_w_bmrr_indexed(base,baser,index,factor,s);
3274     unlock2(s);
3275     unlock2(baser);
3276     unlock2(index);
3277     }
3278     MENDFUNC(5,mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
3279    
3280     MIDFUNC(5,mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
3281     {
3282     int basereg=baser;
3283     int indexreg=index;
3284    
3285     CLOBBER_MOV;
3286     s=readreg(s,1);
3287     baser=readreg_offset(baser,4);
3288     index=readreg_offset(index,4);
3289    
3290     base+=get_offset(basereg);
3291     base+=factor*get_offset(indexreg);
3292    
3293     raw_mov_b_bmrr_indexed(base,baser,index,factor,s);
3294     unlock2(s);
3295     unlock2(baser);
3296     unlock2(index);
3297     }
3298     MENDFUNC(5,mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
3299    
3300    
3301    
3302     /* Read a long from base+baser+factor*index */
3303     MIDFUNC(5,mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
3304     {
3305     int basereg=baser;
3306     int indexreg=index;
3307    
3308     CLOBBER_MOV;
3309     baser=readreg_offset(baser,4);
3310     index=readreg_offset(index,4);
3311     base+=get_offset(basereg);
3312     base+=factor*get_offset(indexreg);
3313     d=writereg(d,4);
3314     raw_mov_l_brrm_indexed(d,base,baser,index,factor);
3315     unlock2(d);
3316     unlock2(baser);
3317     unlock2(index);
3318     }
3319     MENDFUNC(5,mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
3320    
3321    
3322     MIDFUNC(5,mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
3323     {
3324     int basereg=baser;
3325     int indexreg=index;
3326    
3327     CLOBBER_MOV;
3328     remove_offset(d,-1);
3329     baser=readreg_offset(baser,4);
3330     index=readreg_offset(index,4);
3331     base+=get_offset(basereg);
3332     base+=factor*get_offset(indexreg);
3333     d=writereg(d,2);
3334     raw_mov_w_brrm_indexed(d,base,baser,index,factor);
3335     unlock2(d);
3336     unlock2(baser);
3337     unlock2(index);
3338     }
3339     MENDFUNC(5,mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
3340    
3341    
3342     MIDFUNC(5,mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
3343     {
3344     int basereg=baser;
3345     int indexreg=index;
3346    
3347     CLOBBER_MOV;
3348     remove_offset(d,-1);
3349     baser=readreg_offset(baser,4);
3350     index=readreg_offset(index,4);
3351     base+=get_offset(basereg);
3352     base+=factor*get_offset(indexreg);
3353     d=writereg(d,1);
3354     raw_mov_b_brrm_indexed(d,base,baser,index,factor);
3355     unlock2(d);
3356     unlock2(baser);
3357     unlock2(index);
3358     }
3359     MENDFUNC(5,mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
3360    
3361     /* Read a long from base+factor*index */
3362     MIDFUNC(4,mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
3363     {
3364     int indexreg=index;
3365    
3366     if (isconst(index)) {
3367     COMPCALL(mov_l_rm)(d,base+factor*live.state[index].val);
3368     return;
3369     }
3370    
3371     CLOBBER_MOV;
3372     index=readreg_offset(index,4);
3373     base+=get_offset(indexreg)*factor;
3374     d=writereg(d,4);
3375    
3376     raw_mov_l_rm_indexed(d,base,index,factor);
3377     unlock2(index);
3378     unlock2(d);
3379     }
3380     MENDFUNC(4,mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
3381    
3382    
3383     /* read the long at the address contained in s+offset and store in d */
3384     MIDFUNC(3,mov_l_rR,(W4 d, R4 s, IMM offset))
3385     {
3386     if (isconst(s)) {
3387     COMPCALL(mov_l_rm)(d,live.state[s].val+offset);
3388     return;
3389     }
3390     CLOBBER_MOV;
3391     s=readreg(s,4);
3392     d=writereg(d,4);
3393    
3394     raw_mov_l_rR(d,s,offset);
3395     unlock2(d);
3396     unlock2(s);
3397     }
3398     MENDFUNC(3,mov_l_rR,(W4 d, R4 s, IMM offset))
3399    
3400     /* read the word at the address contained in s+offset and store in d */
3401     MIDFUNC(3,mov_w_rR,(W2 d, R4 s, IMM offset))
3402     {
3403     if (isconst(s)) {
3404     COMPCALL(mov_w_rm)(d,live.state[s].val+offset);
3405     return;
3406     }
3407     CLOBBER_MOV;
3408     s=readreg(s,4);
3409     d=writereg(d,2);
3410    
3411     raw_mov_w_rR(d,s,offset);
3412     unlock2(d);
3413     unlock2(s);
3414     }
3415     MENDFUNC(3,mov_w_rR,(W2 d, R4 s, IMM offset))
3416    
3417     /* read the word at the address contained in s+offset and store in d */
3418     MIDFUNC(3,mov_b_rR,(W1 d, R4 s, IMM offset))
3419     {
3420     if (isconst(s)) {
3421     COMPCALL(mov_b_rm)(d,live.state[s].val+offset);
3422     return;
3423     }
3424     CLOBBER_MOV;
3425     s=readreg(s,4);
3426     d=writereg(d,1);
3427    
3428     raw_mov_b_rR(d,s,offset);
3429     unlock2(d);
3430     unlock2(s);
3431     }
3432     MENDFUNC(3,mov_b_rR,(W1 d, R4 s, IMM offset))
3433    
3434     /* read the long at the address contained in s+offset and store in d */
3435     MIDFUNC(3,mov_l_brR,(W4 d, R4 s, IMM offset))
3436     {
3437     int sreg=s;
3438     if (isconst(s)) {
3439     COMPCALL(mov_l_rm)(d,live.state[s].val+offset);
3440     return;
3441     }
3442     CLOBBER_MOV;
3443     s=readreg_offset(s,4);
3444     offset+=get_offset(sreg);
3445     d=writereg(d,4);
3446    
3447     raw_mov_l_brR(d,s,offset);
3448     unlock2(d);
3449     unlock2(s);
3450     }
3451     MENDFUNC(3,mov_l_brR,(W4 d, R4 s, IMM offset))
3452    
3453     /* read the word at the address contained in s+offset and store in d */
3454     MIDFUNC(3,mov_w_brR,(W2 d, R4 s, IMM offset))
3455     {
3456     int sreg=s;
3457     if (isconst(s)) {
3458     COMPCALL(mov_w_rm)(d,live.state[s].val+offset);
3459     return;
3460     }
3461     CLOBBER_MOV;
3462     remove_offset(d,-1);
3463     s=readreg_offset(s,4);
3464     offset+=get_offset(sreg);
3465     d=writereg(d,2);
3466    
3467     raw_mov_w_brR(d,s,offset);
3468     unlock2(d);
3469     unlock2(s);
3470     }
3471     MENDFUNC(3,mov_w_brR,(W2 d, R4 s, IMM offset))
3472    
3473     /* read the word at the address contained in s+offset and store in d */
3474     MIDFUNC(3,mov_b_brR,(W1 d, R4 s, IMM offset))
3475     {
3476     int sreg=s;
3477     if (isconst(s)) {
3478     COMPCALL(mov_b_rm)(d,live.state[s].val+offset);
3479     return;
3480     }
3481     CLOBBER_MOV;
3482     remove_offset(d,-1);
3483     s=readreg_offset(s,4);
3484     offset+=get_offset(sreg);
3485     d=writereg(d,1);
3486    
3487     raw_mov_b_brR(d,s,offset);
3488     unlock2(d);
3489     unlock2(s);
3490     }
3491     MENDFUNC(3,mov_b_brR,(W1 d, R4 s, IMM offset))
3492    
3493     MIDFUNC(3,mov_l_Ri,(R4 d, IMM i, IMM offset))
3494     {
3495     int dreg=d;
3496     if (isconst(d)) {
3497     COMPCALL(mov_l_mi)(live.state[d].val+offset,i);
3498     return;
3499     }
3500    
3501     CLOBBER_MOV;
3502     d=readreg_offset(d,4);
3503     offset+=get_offset(dreg);
3504     raw_mov_l_Ri(d,i,offset);
3505     unlock2(d);
3506     }
3507     MENDFUNC(3,mov_l_Ri,(R4 d, IMM i, IMM offset))
3508    
3509     MIDFUNC(3,mov_w_Ri,(R4 d, IMM i, IMM offset))
3510     {
3511     int dreg=d;
3512     if (isconst(d)) {
3513     COMPCALL(mov_w_mi)(live.state[d].val+offset,i);
3514     return;
3515     }
3516    
3517     CLOBBER_MOV;
3518     d=readreg_offset(d,4);
3519     offset+=get_offset(dreg);
3520     raw_mov_w_Ri(d,i,offset);
3521     unlock2(d);
3522     }
3523     MENDFUNC(3,mov_w_Ri,(R4 d, IMM i, IMM offset))
3524    
3525     MIDFUNC(3,mov_b_Ri,(R4 d, IMM i, IMM offset))
3526     {
3527     int dreg=d;
3528     if (isconst(d)) {
3529     COMPCALL(mov_b_mi)(live.state[d].val+offset,i);
3530     return;
3531     }
3532    
3533     CLOBBER_MOV;
3534     d=readreg_offset(d,4);
3535     offset+=get_offset(dreg);
3536     raw_mov_b_Ri(d,i,offset);
3537     unlock2(d);
3538     }
3539     MENDFUNC(3,mov_b_Ri,(R4 d, IMM i, IMM offset))
3540    
3541     /* Warning! OFFSET is byte sized only! */
3542     MIDFUNC(3,mov_l_Rr,(R4 d, R4 s, IMM offset))
3543     {
3544     if (isconst(d)) {
3545     COMPCALL(mov_l_mr)(live.state[d].val+offset,s);
3546     return;
3547     }
3548     if (isconst(s)) {
3549     COMPCALL(mov_l_Ri)(d,live.state[s].val,offset);
3550     return;
3551     }
3552    
3553     CLOBBER_MOV;
3554     s=readreg(s,4);
3555     d=readreg(d,4);
3556    
3557     raw_mov_l_Rr(d,s,offset);
3558     unlock2(d);
3559     unlock2(s);
3560     }
3561     MENDFUNC(3,mov_l_Rr,(R4 d, R4 s, IMM offset))
3562    
3563     MIDFUNC(3,mov_w_Rr,(R4 d, R2 s, IMM offset))
3564     {
3565     if (isconst(d)) {
3566     COMPCALL(mov_w_mr)(live.state[d].val+offset,s);
3567     return;
3568     }
3569     if (isconst(s)) {
3570     COMPCALL(mov_w_Ri)(d,(uae_u16)live.state[s].val,offset);
3571     return;
3572     }
3573    
3574     CLOBBER_MOV;
3575     s=readreg(s,2);
3576     d=readreg(d,4);
3577     raw_mov_w_Rr(d,s,offset);
3578     unlock2(d);
3579     unlock2(s);
3580     }
3581     MENDFUNC(3,mov_w_Rr,(R4 d, R2 s, IMM offset))
3582    
3583     MIDFUNC(3,mov_b_Rr,(R4 d, R1 s, IMM offset))
3584     {
3585     if (isconst(d)) {
3586     COMPCALL(mov_b_mr)(live.state[d].val+offset,s);
3587     return;
3588     }
3589     if (isconst(s)) {
3590     COMPCALL(mov_b_Ri)(d,(uae_u8)live.state[s].val,offset);
3591     return;
3592     }
3593    
3594     CLOBBER_MOV;
3595     s=readreg(s,1);
3596     d=readreg(d,4);
3597     raw_mov_b_Rr(d,s,offset);
3598     unlock2(d);
3599     unlock2(s);
3600     }
3601     MENDFUNC(3,mov_b_Rr,(R4 d, R1 s, IMM offset))
3602    
3603     MIDFUNC(3,lea_l_brr,(W4 d, R4 s, IMM offset))
3604     {
3605     if (isconst(s)) {
3606     COMPCALL(mov_l_ri)(d,live.state[s].val+offset);
3607     return;
3608     }
3609     #if USE_OFFSET
3610     if (d==s) {
3611     add_offset(d,offset);
3612     return;
3613     }
3614     #endif
3615     CLOBBER_LEA;
3616     s=readreg(s,4);
3617     d=writereg(d,4);
3618     raw_lea_l_brr(d,s,offset);
3619     unlock2(d);
3620     unlock2(s);
3621     }
3622     MENDFUNC(3,lea_l_brr,(W4 d, R4 s, IMM offset))
3623    
3624     MIDFUNC(5,lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
3625     {
3626     if (!offset) {
3627     COMPCALL(lea_l_rr_indexed)(d,s,index,factor);
3628     return;
3629     }
3630     CLOBBER_LEA;
3631     s=readreg(s,4);
3632     index=readreg(index,4);
3633     d=writereg(d,4);
3634    
3635     raw_lea_l_brr_indexed(d,s,index,factor,offset);
3636     unlock2(d);
3637     unlock2(index);
3638     unlock2(s);
3639     }
3640     MENDFUNC(5,lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
3641    
3642     MIDFUNC(4,lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
3643     {
3644     CLOBBER_LEA;
3645     s=readreg(s,4);
3646     index=readreg(index,4);
3647     d=writereg(d,4);
3648    
3649     raw_lea_l_rr_indexed(d,s,index,factor);
3650     unlock2(d);
3651     unlock2(index);
3652     unlock2(s);
3653     }
3654     MENDFUNC(4,lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
3655    
3656     /* write d to the long at the address contained in s+offset */
3657     MIDFUNC(3,mov_l_bRr,(R4 d, R4 s, IMM offset))
3658     {
3659     int dreg=d;
3660     if (isconst(d)) {
3661     COMPCALL(mov_l_mr)(live.state[d].val+offset,s);
3662     return;
3663     }
3664    
3665     CLOBBER_MOV;
3666     s=readreg(s,4);
3667     d=readreg_offset(d,4);
3668     offset+=get_offset(dreg);
3669    
3670     raw_mov_l_bRr(d,s,offset);
3671     unlock2(d);
3672     unlock2(s);
3673     }
3674     MENDFUNC(3,mov_l_bRr,(R4 d, R4 s, IMM offset))
3675    
3676     /* write the word at the address contained in s+offset and store in d */
3677     MIDFUNC(3,mov_w_bRr,(R4 d, R2 s, IMM offset))
3678     {
3679     int dreg=d;
3680    
3681     if (isconst(d)) {
3682     COMPCALL(mov_w_mr)(live.state[d].val+offset,s);
3683     return;
3684     }
3685    
3686     CLOBBER_MOV;
3687     s=readreg(s,2);
3688     d=readreg_offset(d,4);
3689     offset+=get_offset(dreg);
3690     raw_mov_w_bRr(d,s,offset);
3691     unlock2(d);
3692     unlock2(s);
3693     }
3694     MENDFUNC(3,mov_w_bRr,(R4 d, R2 s, IMM offset))
3695    
3696     MIDFUNC(3,mov_b_bRr,(R4 d, R1 s, IMM offset))
3697     {
3698     int dreg=d;
3699     if (isconst(d)) {
3700     COMPCALL(mov_b_mr)(live.state[d].val+offset,s);
3701     return;
3702     }
3703    
3704     CLOBBER_MOV;
3705     s=readreg(s,1);
3706     d=readreg_offset(d,4);
3707     offset+=get_offset(dreg);
3708     raw_mov_b_bRr(d,s,offset);
3709     unlock2(d);
3710     unlock2(s);
3711     }
3712     MENDFUNC(3,mov_b_bRr,(R4 d, R1 s, IMM offset))
3713    
3714     MIDFUNC(1,bswap_32,(RW4 r))
3715     {
3716     int reg=r;
3717    
3718     if (isconst(r)) {
3719     uae_u32 oldv=live.state[r].val;
3720     live.state[r].val=reverse32(oldv);
3721     return;
3722     }
3723    
3724     CLOBBER_SW32;
3725     r=rmw(r,4,4);
3726     raw_bswap_32(r);
3727     unlock2(r);
3728     }
3729     MENDFUNC(1,bswap_32,(RW4 r))
3730    
3731     MIDFUNC(1,bswap_16,(RW2 r))
3732     {
3733     if (isconst(r)) {
3734     uae_u32 oldv=live.state[r].val;
3735     live.state[r].val=((oldv>>8)&0xff) | ((oldv<<8)&0xff00) |
3736     (oldv&0xffff0000);
3737     return;
3738     }
3739    
3740     CLOBBER_SW16;
3741     r=rmw(r,2,2);
3742    
3743     raw_bswap_16(r);
3744     unlock2(r);
3745     }
3746     MENDFUNC(1,bswap_16,(RW2 r))
3747    
3748    
3749    
3750     MIDFUNC(2,mov_l_rr,(W4 d, R4 s))
3751     {
3752     int olds;
3753    
3754     if (d==s) { /* How pointless! */
3755     return;
3756     }
3757     if (isconst(s)) {
3758     COMPCALL(mov_l_ri)(d,live.state[s].val);
3759     return;
3760     }
3761     olds=s;
3762     disassociate(d);
3763     s=readreg_offset(s,4);
3764     live.state[d].realreg=s;
3765     live.state[d].realind=live.nat[s].nholds;
3766     live.state[d].val=live.state[olds].val;
3767     live.state[d].validsize=4;
3768     live.state[d].dirtysize=4;
3769     set_status(d,DIRTY);
3770    
3771     live.nat[s].holds[live.nat[s].nholds]=d;
3772     live.nat[s].nholds++;
3773     log_clobberreg(d);
3774     /* write_log("Added %d to nreg %d(%d), now holds %d regs\n",
3775     d,s,live.state[d].realind,live.nat[s].nholds); */
3776     unlock2(s);
3777     }
3778     MENDFUNC(2,mov_l_rr,(W4 d, R4 s))
3779    
3780     MIDFUNC(2,mov_l_mr,(IMM d, R4 s))
3781     {
3782     if (isconst(s)) {
3783     COMPCALL(mov_l_mi)(d,live.state[s].val);
3784     return;
3785     }
3786     CLOBBER_MOV;
3787     s=readreg(s,4);
3788    
3789     raw_mov_l_mr(d,s);
3790     unlock2(s);
3791     }
3792     MENDFUNC(2,mov_l_mr,(IMM d, R4 s))
3793    
3794    
3795     MIDFUNC(2,mov_w_mr,(IMM d, R2 s))
3796     {
3797     if (isconst(s)) {
3798     COMPCALL(mov_w_mi)(d,(uae_u16)live.state[s].val);
3799     return;
3800     }
3801     CLOBBER_MOV;
3802     s=readreg(s,2);
3803    
3804     raw_mov_w_mr(d,s);
3805     unlock2(s);
3806     }
3807     MENDFUNC(2,mov_w_mr,(IMM d, R2 s))
3808    
3809     MIDFUNC(2,mov_w_rm,(W2 d, IMM s))
3810     {
3811     CLOBBER_MOV;
3812     d=writereg(d,2);
3813    
3814     raw_mov_w_rm(d,s);
3815     unlock2(d);
3816     }
3817     MENDFUNC(2,mov_w_rm,(W2 d, IMM s))
3818    
3819     MIDFUNC(2,mov_b_mr,(IMM d, R1 s))
3820     {
3821     if (isconst(s)) {
3822     COMPCALL(mov_b_mi)(d,(uae_u8)live.state[s].val);
3823     return;
3824     }
3825    
3826     CLOBBER_MOV;
3827     s=readreg(s,1);
3828    
3829     raw_mov_b_mr(d,s);
3830     unlock2(s);
3831     }
3832     MENDFUNC(2,mov_b_mr,(IMM d, R1 s))
3833    
3834     MIDFUNC(2,mov_b_rm,(W1 d, IMM s))
3835     {
3836     CLOBBER_MOV;
3837     d=writereg(d,1);
3838    
3839     raw_mov_b_rm(d,s);
3840     unlock2(d);
3841     }
3842     MENDFUNC(2,mov_b_rm,(W1 d, IMM s))
3843    
3844     MIDFUNC(2,mov_l_ri,(W4 d, IMM s))
3845     {
3846     set_const(d,s);
3847     return;
3848     }
3849     MENDFUNC(2,mov_l_ri,(W4 d, IMM s))
3850    
3851     MIDFUNC(2,mov_w_ri,(W2 d, IMM s))
3852     {
3853     CLOBBER_MOV;
3854     d=writereg(d,2);
3855    
3856     raw_mov_w_ri(d,s);
3857     unlock2(d);
3858     }
3859     MENDFUNC(2,mov_w_ri,(W2 d, IMM s))
3860    
3861     MIDFUNC(2,mov_b_ri,(W1 d, IMM s))
3862     {
3863     CLOBBER_MOV;
3864     d=writereg(d,1);
3865    
3866     raw_mov_b_ri(d,s);
3867     unlock2(d);
3868     }
3869     MENDFUNC(2,mov_b_ri,(W1 d, IMM s))
3870    
3871    
3872     MIDFUNC(2,add_l_mi,(IMM d, IMM s))
3873     {
3874     CLOBBER_ADD;
3875     raw_add_l_mi(d,s) ;
3876     }
3877     MENDFUNC(2,add_l_mi,(IMM d, IMM s))
3878    
3879     MIDFUNC(2,add_w_mi,(IMM d, IMM s))
3880     {
3881     CLOBBER_ADD;
3882     raw_add_w_mi(d,s) ;
3883     }
3884     MENDFUNC(2,add_w_mi,(IMM d, IMM s))
3885    
3886     MIDFUNC(2,add_b_mi,(IMM d, IMM s))
3887     {
3888     CLOBBER_ADD;
3889     raw_add_b_mi(d,s) ;
3890     }
3891     MENDFUNC(2,add_b_mi,(IMM d, IMM s))
3892    
3893    
3894     MIDFUNC(2,test_l_ri,(R4 d, IMM i))
3895     {
3896     CLOBBER_TEST;
3897     d=readreg(d,4);
3898    
3899     raw_test_l_ri(d,i);
3900     unlock2(d);
3901     }
3902     MENDFUNC(2,test_l_ri,(R4 d, IMM i))
3903    
3904     MIDFUNC(2,test_l_rr,(R4 d, R4 s))
3905     {
3906     CLOBBER_TEST;
3907     d=readreg(d,4);
3908     s=readreg(s,4);
3909    
3910     raw_test_l_rr(d,s);;
3911     unlock2(d);
3912     unlock2(s);
3913     }
3914     MENDFUNC(2,test_l_rr,(R4 d, R4 s))
3915    
3916     MIDFUNC(2,test_w_rr,(R2 d, R2 s))
3917     {
3918     CLOBBER_TEST;
3919     d=readreg(d,2);
3920     s=readreg(s,2);
3921    
3922     raw_test_w_rr(d,s);
3923     unlock2(d);
3924     unlock2(s);
3925     }
3926     MENDFUNC(2,test_w_rr,(R2 d, R2 s))
3927    
3928     MIDFUNC(2,test_b_rr,(R1 d, R1 s))
3929     {
3930     CLOBBER_TEST;
3931     d=readreg(d,1);
3932     s=readreg(s,1);
3933    
3934     raw_test_b_rr(d,s);
3935     unlock2(d);
3936     unlock2(s);
3937     }
3938     MENDFUNC(2,test_b_rr,(R1 d, R1 s))
3939    
3940    
3941     MIDFUNC(2,and_l_ri,(RW4 d, IMM i))
3942     {
3943     if (isconst(d) && !needflags) {
3944     live.state[d].val &= i;
3945     return;
3946     }
3947    
3948     CLOBBER_AND;
3949     d=rmw(d,4,4);
3950    
3951     raw_and_l_ri(d,i);
3952     unlock2(d);
3953     }
3954     MENDFUNC(2,and_l_ri,(RW4 d, IMM i))
3955    
3956     MIDFUNC(2,and_l,(RW4 d, R4 s))
3957     {
3958     CLOBBER_AND;
3959     s=readreg(s,4);
3960     d=rmw(d,4,4);
3961    
3962     raw_and_l(d,s);
3963     unlock2(d);
3964     unlock2(s);
3965     }
3966     MENDFUNC(2,and_l,(RW4 d, R4 s))
3967    
3968     MIDFUNC(2,and_w,(RW2 d, R2 s))
3969     {
3970     CLOBBER_AND;
3971     s=readreg(s,2);
3972     d=rmw(d,2,2);
3973    
3974     raw_and_w(d,s);
3975     unlock2(d);
3976     unlock2(s);
3977     }
3978     MENDFUNC(2,and_w,(RW2 d, R2 s))
3979    
3980     MIDFUNC(2,and_b,(RW1 d, R1 s))
3981     {
3982     CLOBBER_AND;
3983     s=readreg(s,1);
3984     d=rmw(d,1,1);
3985    
3986     raw_and_b(d,s);
3987     unlock2(d);
3988     unlock2(s);
3989     }
3990     MENDFUNC(2,and_b,(RW1 d, R1 s))
3991    
3992     // gb-- used for making an fpcr value in compemu_fpp.cpp
3993     MIDFUNC(2,or_l_rm,(RW4 d, IMM s))
3994     {
3995     CLOBBER_OR;
3996     d=rmw(d,4,4);
3997    
3998     raw_or_l_rm(d,s);
3999     unlock2(d);
4000     }
4001     MENDFUNC(2,or_l_rm,(RW4 d, IMM s))
4002    
4003     MIDFUNC(2,or_l_ri,(RW4 d, IMM i))
4004     {
4005     if (isconst(d) && !needflags) {
4006     live.state[d].val|=i;
4007     return;
4008     }
4009     CLOBBER_OR;
4010     d=rmw(d,4,4);
4011    
4012     raw_or_l_ri(d,i);
4013     unlock2(d);
4014     }
4015     MENDFUNC(2,or_l_ri,(RW4 d, IMM i))
4016    
4017     MIDFUNC(2,or_l,(RW4 d, R4 s))
4018     {
4019     if (isconst(d) && isconst(s) && !needflags) {
4020     live.state[d].val|=live.state[s].val;
4021     return;
4022     }
4023     CLOBBER_OR;
4024     s=readreg(s,4);
4025     d=rmw(d,4,4);
4026    
4027     raw_or_l(d,s);
4028     unlock2(d);
4029     unlock2(s);
4030     }
4031     MENDFUNC(2,or_l,(RW4 d, R4 s))
4032    
4033     MIDFUNC(2,or_w,(RW2 d, R2 s))
4034     {
4035     CLOBBER_OR;
4036     s=readreg(s,2);
4037     d=rmw(d,2,2);
4038    
4039     raw_or_w(d,s);
4040     unlock2(d);
4041     unlock2(s);
4042     }
4043     MENDFUNC(2,or_w,(RW2 d, R2 s))
4044    
4045     MIDFUNC(2,or_b,(RW1 d, R1 s))
4046     {
4047     CLOBBER_OR;
4048     s=readreg(s,1);
4049     d=rmw(d,1,1);
4050    
4051     raw_or_b(d,s);
4052     unlock2(d);
4053     unlock2(s);
4054     }
4055     MENDFUNC(2,or_b,(RW1 d, R1 s))
4056    
4057     MIDFUNC(2,adc_l,(RW4 d, R4 s))
4058     {
4059     CLOBBER_ADC;
4060     s=readreg(s,4);
4061     d=rmw(d,4,4);
4062    
4063     raw_adc_l(d,s);
4064    
4065     unlock2(d);
4066     unlock2(s);
4067     }
4068     MENDFUNC(2,adc_l,(RW4 d, R4 s))
4069    
4070     MIDFUNC(2,adc_w,(RW2 d, R2 s))
4071     {
4072     CLOBBER_ADC;
4073     s=readreg(s,2);
4074     d=rmw(d,2,2);
4075    
4076     raw_adc_w(d,s);
4077     unlock2(d);
4078     unlock2(s);
4079     }
4080     MENDFUNC(2,adc_w,(RW2 d, R2 s))
4081    
4082     MIDFUNC(2,adc_b,(RW1 d, R1 s))
4083     {
4084     CLOBBER_ADC;
4085     s=readreg(s,1);
4086     d=rmw(d,1,1);
4087    
4088     raw_adc_b(d,s);
4089     unlock2(d);
4090     unlock2(s);
4091     }
4092     MENDFUNC(2,adc_b,(RW1 d, R1 s))
4093    
4094     MIDFUNC(2,add_l,(RW4 d, R4 s))
4095     {
4096     if (isconst(s)) {
4097     COMPCALL(add_l_ri)(d,live.state[s].val);
4098     return;
4099     }
4100    
4101     CLOBBER_ADD;
4102     s=readreg(s,4);
4103     d=rmw(d,4,4);
4104    
4105     raw_add_l(d,s);
4106    
4107     unlock2(d);
4108     unlock2(s);
4109     }
4110     MENDFUNC(2,add_l,(RW4 d, R4 s))
4111    
4112     MIDFUNC(2,add_w,(RW2 d, R2 s))
4113     {
4114     if (isconst(s)) {
4115     COMPCALL(add_w_ri)(d,(uae_u16)live.state[s].val);
4116     return;
4117     }
4118    
4119     CLOBBER_ADD;
4120     s=readreg(s,2);
4121     d=rmw(d,2,2);
4122    
4123     raw_add_w(d,s);
4124     unlock2(d);
4125     unlock2(s);
4126     }
4127     MENDFUNC(2,add_w,(RW2 d, R2 s))
4128    
4129     MIDFUNC(2,add_b,(RW1 d, R1 s))
4130     {
4131     if (isconst(s)) {
4132     COMPCALL(add_b_ri)(d,(uae_u8)live.state[s].val);
4133     return;
4134     }
4135    
4136     CLOBBER_ADD;
4137     s=readreg(s,1);
4138     d=rmw(d,1,1);
4139    
4140     raw_add_b(d,s);
4141     unlock2(d);
4142     unlock2(s);
4143     }
4144     MENDFUNC(2,add_b,(RW1 d, R1 s))
4145    
4146     MIDFUNC(2,sub_l_ri,(RW4 d, IMM i))
4147     {
4148     if (!i && !needflags)
4149     return;
4150     if (isconst(d) && !needflags) {
4151     live.state[d].val-=i;
4152     return;
4153     }
4154     #if USE_OFFSET
4155     if (!needflags) {
4156     add_offset(d,-i);
4157     return;
4158     }
4159     #endif
4160    
4161     CLOBBER_SUB;
4162     d=rmw(d,4,4);
4163    
4164     raw_sub_l_ri(d,i);
4165     unlock2(d);
4166     }
4167     MENDFUNC(2,sub_l_ri,(RW4 d, IMM i))
4168    
4169     MIDFUNC(2,sub_w_ri,(RW2 d, IMM i))
4170     {
4171     if (!i && !needflags)
4172     return;
4173    
4174     CLOBBER_SUB;
4175     d=rmw(d,2,2);
4176    
4177     raw_sub_w_ri(d,i);
4178     unlock2(d);
4179     }
4180     MENDFUNC(2,sub_w_ri,(RW2 d, IMM i))
4181    
4182     MIDFUNC(2,sub_b_ri,(RW1 d, IMM i))
4183     {
4184     if (!i && !needflags)
4185     return;
4186    
4187     CLOBBER_SUB;
4188     d=rmw(d,1,1);
4189    
4190     raw_sub_b_ri(d,i);
4191    
4192     unlock2(d);
4193     }
4194     MENDFUNC(2,sub_b_ri,(RW1 d, IMM i))
4195    
4196     MIDFUNC(2,add_l_ri,(RW4 d, IMM i))
4197     {
4198     if (!i && !needflags)
4199     return;
4200     if (isconst(d) && !needflags) {
4201     live.state[d].val+=i;
4202     return;
4203     }
4204     #if USE_OFFSET
4205     if (!needflags) {
4206     add_offset(d,i);
4207     return;
4208     }
4209     #endif
4210     CLOBBER_ADD;
4211     d=rmw(d,4,4);
4212     raw_add_l_ri(d,i);
4213     unlock2(d);
4214     }
4215     MENDFUNC(2,add_l_ri,(RW4 d, IMM i))
4216    
4217     MIDFUNC(2,add_w_ri,(RW2 d, IMM i))
4218     {
4219     if (!i && !needflags)
4220     return;
4221    
4222     CLOBBER_ADD;
4223     d=rmw(d,2,2);
4224    
4225     raw_add_w_ri(d,i);
4226     unlock2(d);
4227     }
4228     MENDFUNC(2,add_w_ri,(RW2 d, IMM i))
4229    
4230     MIDFUNC(2,add_b_ri,(RW1 d, IMM i))
4231     {
4232     if (!i && !needflags)
4233     return;
4234    
4235     CLOBBER_ADD;
4236     d=rmw(d,1,1);
4237    
4238     raw_add_b_ri(d,i);
4239    
4240     unlock2(d);
4241     }
4242     MENDFUNC(2,add_b_ri,(RW1 d, IMM i))
4243    
4244     MIDFUNC(2,sbb_l,(RW4 d, R4 s))
4245     {
4246     CLOBBER_SBB;
4247     s=readreg(s,4);
4248     d=rmw(d,4,4);
4249    
4250     raw_sbb_l(d,s);
4251     unlock2(d);
4252     unlock2(s);
4253     }
4254     MENDFUNC(2,sbb_l,(RW4 d, R4 s))
4255    
4256     MIDFUNC(2,sbb_w,(RW2 d, R2 s))
4257     {
4258     CLOBBER_SBB;
4259     s=readreg(s,2);
4260     d=rmw(d,2,2);
4261    
4262     raw_sbb_w(d,s);
4263     unlock2(d);
4264     unlock2(s);
4265     }
4266     MENDFUNC(2,sbb_w,(RW2 d, R2 s))
4267    
4268     MIDFUNC(2,sbb_b,(RW1 d, R1 s))
4269     {
4270     CLOBBER_SBB;
4271     s=readreg(s,1);
4272     d=rmw(d,1,1);
4273    
4274     raw_sbb_b(d,s);
4275     unlock2(d);
4276     unlock2(s);
4277     }
4278     MENDFUNC(2,sbb_b,(RW1 d, R1 s))
4279    
4280     MIDFUNC(2,sub_l,(RW4 d, R4 s))
4281     {
4282     if (isconst(s)) {
4283     COMPCALL(sub_l_ri)(d,live.state[s].val);
4284     return;
4285     }
4286    
4287     CLOBBER_SUB;
4288     s=readreg(s,4);
4289     d=rmw(d,4,4);
4290    
4291     raw_sub_l(d,s);
4292     unlock2(d);
4293     unlock2(s);
4294     }
4295     MENDFUNC(2,sub_l,(RW4 d, R4 s))
4296    
4297     MIDFUNC(2,sub_w,(RW2 d, R2 s))
4298     {
4299     if (isconst(s)) {
4300     COMPCALL(sub_w_ri)(d,(uae_u16)live.state[s].val);
4301     return;
4302     }
4303    
4304     CLOBBER_SUB;
4305     s=readreg(s,2);
4306     d=rmw(d,2,2);
4307    
4308     raw_sub_w(d,s);
4309     unlock2(d);
4310     unlock2(s);
4311     }
4312     MENDFUNC(2,sub_w,(RW2 d, R2 s))
4313    
4314     MIDFUNC(2,sub_b,(RW1 d, R1 s))
4315     {
4316     if (isconst(s)) {
4317     COMPCALL(sub_b_ri)(d,(uae_u8)live.state[s].val);
4318     return;
4319     }
4320    
4321     CLOBBER_SUB;
4322     s=readreg(s,1);
4323     d=rmw(d,1,1);
4324    
4325     raw_sub_b(d,s);
4326     unlock2(d);
4327     unlock2(s);
4328     }
4329     MENDFUNC(2,sub_b,(RW1 d, R1 s))
4330    
4331     MIDFUNC(2,cmp_l,(R4 d, R4 s))
4332     {
4333     CLOBBER_CMP;
4334     s=readreg(s,4);
4335     d=readreg(d,4);
4336    
4337     raw_cmp_l(d,s);
4338     unlock2(d);
4339     unlock2(s);
4340     }
4341     MENDFUNC(2,cmp_l,(R4 d, R4 s))
4342    
4343     MIDFUNC(2,cmp_l_ri,(R4 r, IMM i))
4344     {
4345     CLOBBER_CMP;
4346     r=readreg(r,4);
4347    
4348     raw_cmp_l_ri(r,i);
4349     unlock2(r);
4350     }
4351     MENDFUNC(2,cmp_l_ri,(R4 r, IMM i))
4352    
4353     MIDFUNC(2,cmp_w,(R2 d, R2 s))
4354     {
4355     CLOBBER_CMP;
4356     s=readreg(s,2);
4357     d=readreg(d,2);
4358    
4359     raw_cmp_w(d,s);
4360     unlock2(d);
4361     unlock2(s);
4362     }
4363     MENDFUNC(2,cmp_w,(R2 d, R2 s))
4364    
4365     MIDFUNC(2,cmp_b,(R1 d, R1 s))
4366     {
4367     CLOBBER_CMP;
4368     s=readreg(s,1);
4369     d=readreg(d,1);
4370    
4371     raw_cmp_b(d,s);
4372     unlock2(d);
4373     unlock2(s);
4374     }
4375     MENDFUNC(2,cmp_b,(R1 d, R1 s))
4376    
4377    
4378     MIDFUNC(2,xor_l,(RW4 d, R4 s))
4379     {
4380     CLOBBER_XOR;
4381     s=readreg(s,4);
4382     d=rmw(d,4,4);
4383    
4384     raw_xor_l(d,s);
4385     unlock2(d);
4386     unlock2(s);
4387     }
4388     MENDFUNC(2,xor_l,(RW4 d, R4 s))
4389    
4390     MIDFUNC(2,xor_w,(RW2 d, R2 s))
4391     {
4392     CLOBBER_XOR;
4393     s=readreg(s,2);
4394     d=rmw(d,2,2);
4395    
4396     raw_xor_w(d,s);
4397     unlock2(d);
4398     unlock2(s);
4399     }
4400     MENDFUNC(2,xor_w,(RW2 d, R2 s))
4401    
4402     MIDFUNC(2,xor_b,(RW1 d, R1 s))
4403     {
4404     CLOBBER_XOR;
4405     s=readreg(s,1);
4406     d=rmw(d,1,1);
4407    
4408     raw_xor_b(d,s);
4409     unlock2(d);
4410     unlock2(s);
4411     }
4412     MENDFUNC(2,xor_b,(RW1 d, R1 s))
4413    
4414     MIDFUNC(5,call_r_11,(W4 out1, R4 r, R4 in1, IMM osize, IMM isize))
4415     {
4416     clobber_flags();
4417     remove_all_offsets();
4418     if (osize==4) {
4419     if (out1!=in1 && out1!=r) {
4420     COMPCALL(forget_about)(out1);
4421     }
4422     }
4423     else {
4424     tomem_c(out1);
4425     }
4426    
4427     in1=readreg_specific(in1,isize,REG_PAR1);
4428     r=readreg(r,4);
4429     prepare_for_call_1(); /* This should ensure that there won't be
4430     any need for swapping nregs in prepare_for_call_2
4431     */
4432     #if USE_NORMAL_CALLING_CONVENTION
4433     raw_push_l_r(in1);
4434     #endif
4435     unlock2(in1);
4436     unlock2(r);
4437    
4438     prepare_for_call_2();
4439     raw_call_r(r);
4440    
4441     #if USE_NORMAL_CALLING_CONVENTION
4442     raw_inc_sp(4);
4443     #endif
4444    
4445    
4446     live.nat[REG_RESULT].holds[0]=out1;
4447     live.nat[REG_RESULT].nholds=1;
4448     live.nat[REG_RESULT].touched=touchcnt++;
4449    
4450     live.state[out1].realreg=REG_RESULT;
4451     live.state[out1].realind=0;
4452     live.state[out1].val=0;
4453     live.state[out1].validsize=osize;
4454     live.state[out1].dirtysize=osize;
4455     set_status(out1,DIRTY);
4456     }
4457     MENDFUNC(5,call_r_11,(W4 out1, R4 r, R4 in1, IMM osize, IMM isize))
4458    
4459     MIDFUNC(5,call_r_02,(R4 r, R4 in1, R4 in2, IMM isize1, IMM isize2))
4460     {
4461     clobber_flags();
4462     remove_all_offsets();
4463     in1=readreg_specific(in1,isize1,REG_PAR1);
4464     in2=readreg_specific(in2,isize2,REG_PAR2);
4465     r=readreg(r,4);
4466     prepare_for_call_1(); /* This should ensure that there won't be
4467     any need for swapping nregs in prepare_for_call_2
4468     */
4469     #if USE_NORMAL_CALLING_CONVENTION
4470     raw_push_l_r(in2);
4471     raw_push_l_r(in1);
4472     #endif
4473     unlock2(r);
4474     unlock2(in1);
4475     unlock2(in2);
4476     prepare_for_call_2();
4477     raw_call_r(r);
4478     #if USE_NORMAL_CALLING_CONVENTION
4479     raw_inc_sp(8);
4480     #endif
4481     }
4482     MENDFUNC(5,call_r_02,(R4 r, R4 in1, R4 in2, IMM isize1, IMM isize2))
4483    
4484     /* forget_about() takes a mid-layer register */
4485     MIDFUNC(1,forget_about,(W4 r))
4486     {
4487     if (isinreg(r))
4488     disassociate(r);
4489     live.state[r].val=0;
4490     set_status(r,UNDEF);
4491     }
4492     MENDFUNC(1,forget_about,(W4 r))
4493    
4494     MIDFUNC(0,nop,(void))
4495     {
4496     raw_nop();
4497     }
4498     MENDFUNC(0,nop,(void))
4499    
4500    
4501     MIDFUNC(1,f_forget_about,(FW r))
4502     {
4503     if (f_isinreg(r))
4504     f_disassociate(r);
4505     live.fate[r].status=UNDEF;
4506     }
4507     MENDFUNC(1,f_forget_about,(FW r))
4508    
4509     MIDFUNC(1,fmov_pi,(FW r))
4510     {
4511     r=f_writereg(r);
4512     raw_fmov_pi(r);
4513     f_unlock(r);
4514     }
4515     MENDFUNC(1,fmov_pi,(FW r))
4516    
4517     MIDFUNC(1,fmov_log10_2,(FW r))
4518     {
4519     r=f_writereg(r);
4520     raw_fmov_log10_2(r);
4521     f_unlock(r);
4522     }
4523     MENDFUNC(1,fmov_log10_2,(FW r))
4524    
4525     MIDFUNC(1,fmov_log2_e,(FW r))
4526     {
4527     r=f_writereg(r);
4528     raw_fmov_log2_e(r);
4529     f_unlock(r);
4530     }
4531     MENDFUNC(1,fmov_log2_e,(FW r))
4532    
4533     MIDFUNC(1,fmov_loge_2,(FW r))
4534     {
4535     r=f_writereg(r);
4536     raw_fmov_loge_2(r);
4537     f_unlock(r);
4538     }
4539     MENDFUNC(1,fmov_loge_2,(FW r))
4540    
4541     MIDFUNC(1,fmov_1,(FW r))
4542     {
4543     r=f_writereg(r);
4544     raw_fmov_1(r);
4545     f_unlock(r);
4546     }
4547     MENDFUNC(1,fmov_1,(FW r))
4548    
4549     MIDFUNC(1,fmov_0,(FW r))
4550     {
4551     r=f_writereg(r);
4552     raw_fmov_0(r);
4553     f_unlock(r);
4554     }
4555     MENDFUNC(1,fmov_0,(FW r))
4556    
4557     MIDFUNC(2,fmov_rm,(FW r, MEMR m))
4558     {
4559     r=f_writereg(r);
4560     raw_fmov_rm(r,m);
4561     f_unlock(r);
4562     }
4563     MENDFUNC(2,fmov_rm,(FW r, MEMR m))
4564    
4565     MIDFUNC(2,fmovi_rm,(FW r, MEMR m))
4566     {
4567     r=f_writereg(r);
4568     raw_fmovi_rm(r,m);
4569     f_unlock(r);
4570     }
4571     MENDFUNC(2,fmovi_rm,(FW r, MEMR m))
4572    
4573     MIDFUNC(2,fmovi_mr,(MEMW m, FR r))
4574     {
4575     r=f_readreg(r);
4576     raw_fmovi_mr(m,r);
4577     f_unlock(r);
4578     }
4579     MENDFUNC(2,fmovi_mr,(MEMW m, FR r))
4580    
4581     MIDFUNC(2,fmovs_rm,(FW r, MEMR m))
4582     {
4583     r=f_writereg(r);
4584     raw_fmovs_rm(r,m);
4585     f_unlock(r);
4586     }
4587     MENDFUNC(2,fmovs_rm,(FW r, MEMR m))
4588    
4589     MIDFUNC(2,fmovs_mr,(MEMW m, FR r))
4590     {
4591     r=f_readreg(r);
4592     raw_fmovs_mr(m,r);
4593     f_unlock(r);
4594     }
4595     MENDFUNC(2,fmovs_mr,(MEMW m, FR r))
4596    
4597     MIDFUNC(2,fmov_ext_mr,(MEMW m, FR r))
4598     {
4599     r=f_readreg(r);
4600     raw_fmov_ext_mr(m,r);
4601     f_unlock(r);
4602     }
4603     MENDFUNC(2,fmov_ext_mr,(MEMW m, FR r))
4604    
4605     MIDFUNC(2,fmov_mr,(MEMW m, FR r))
4606     {
4607     r=f_readreg(r);
4608     raw_fmov_mr(m,r);
4609     f_unlock(r);
4610     }
4611     MENDFUNC(2,fmov_mr,(MEMW m, FR r))
4612    
4613     MIDFUNC(2,fmov_ext_rm,(FW r, MEMR m))
4614     {
4615     r=f_writereg(r);
4616     raw_fmov_ext_rm(r,m);
4617     f_unlock(r);
4618     }
4619     MENDFUNC(2,fmov_ext_rm,(FW r, MEMR m))
4620    
4621     MIDFUNC(2,fmov_rr,(FW d, FR s))
4622     {
4623     if (d==s) { /* How pointless! */
4624     return;
4625     }
4626     #if USE_F_ALIAS
4627     f_disassociate(d);
4628     s=f_readreg(s);
4629     live.fate[d].realreg=s;
4630     live.fate[d].realind=live.fat[s].nholds;
4631     live.fate[d].status=DIRTY;
4632     live.fat[s].holds[live.fat[s].nholds]=d;
4633     live.fat[s].nholds++;
4634     f_unlock(s);
4635     #else
4636     s=f_readreg(s);
4637     d=f_writereg(d);
4638     raw_fmov_rr(d,s);
4639     f_unlock(s);
4640     f_unlock(d);
4641     #endif
4642     }
4643     MENDFUNC(2,fmov_rr,(FW d, FR s))
4644    
4645     MIDFUNC(2,fldcw_m_indexed,(R4 index, IMM base))
4646     {
4647     index=readreg(index,4);
4648    
4649     raw_fldcw_m_indexed(index,base);
4650     unlock2(index);
4651     }
4652     MENDFUNC(2,fldcw_m_indexed,(R4 index, IMM base))
4653    
4654     MIDFUNC(1,ftst_r,(FR r))
4655     {
4656     r=f_readreg(r);
4657     raw_ftst_r(r);
4658     f_unlock(r);
4659     }
4660     MENDFUNC(1,ftst_r,(FR r))
4661    
4662     MIDFUNC(0,dont_care_fflags,(void))
4663     {
4664     f_disassociate(FP_RESULT);
4665     }
4666     MENDFUNC(0,dont_care_fflags,(void))
4667    
4668     MIDFUNC(2,fsqrt_rr,(FW d, FR s))
4669     {
4670     s=f_readreg(s);
4671     d=f_writereg(d);
4672     raw_fsqrt_rr(d,s);
4673     f_unlock(s);
4674     f_unlock(d);
4675     }
4676     MENDFUNC(2,fsqrt_rr,(FW d, FR s))
4677    
4678     MIDFUNC(2,fabs_rr,(FW d, FR s))
4679     {
4680     s=f_readreg(s);
4681     d=f_writereg(d);
4682     raw_fabs_rr(d,s);
4683     f_unlock(s);
4684     f_unlock(d);
4685     }
4686     MENDFUNC(2,fabs_rr,(FW d, FR s))
4687    
4688     MIDFUNC(2,fsin_rr,(FW d, FR s))
4689     {
4690     s=f_readreg(s);
4691     d=f_writereg(d);
4692     raw_fsin_rr(d,s);
4693     f_unlock(s);
4694     f_unlock(d);
4695     }
4696     MENDFUNC(2,fsin_rr,(FW d, FR s))
4697    
4698     MIDFUNC(2,fcos_rr,(FW d, FR s))
4699     {
4700     s=f_readreg(s);
4701     d=f_writereg(d);
4702     raw_fcos_rr(d,s);
4703     f_unlock(s);
4704     f_unlock(d);
4705     }
4706     MENDFUNC(2,fcos_rr,(FW d, FR s))
4707    
4708     MIDFUNC(2,ftwotox_rr,(FW d, FR s))
4709     {
4710     s=f_readreg(s);
4711     d=f_writereg(d);
4712     raw_ftwotox_rr(d,s);
4713     f_unlock(s);
4714     f_unlock(d);
4715     }
4716     MENDFUNC(2,ftwotox_rr,(FW d, FR s))
4717    
4718     MIDFUNC(2,fetox_rr,(FW d, FR s))
4719     {
4720     s=f_readreg(s);
4721     d=f_writereg(d);
4722     raw_fetox_rr(d,s);
4723     f_unlock(s);
4724     f_unlock(d);
4725     }
4726     MENDFUNC(2,fetox_rr,(FW d, FR s))
4727    
4728     MIDFUNC(2,frndint_rr,(FW d, FR s))
4729     {
4730     s=f_readreg(s);
4731     d=f_writereg(d);
4732     raw_frndint_rr(d,s);
4733     f_unlock(s);
4734     f_unlock(d);
4735     }
4736     MENDFUNC(2,frndint_rr,(FW d, FR s))
4737    
4738     MIDFUNC(2,flog2_rr,(FW d, FR s))
4739     {
4740     s=f_readreg(s);
4741     d=f_writereg(d);
4742     raw_flog2_rr(d,s);
4743     f_unlock(s);
4744     f_unlock(d);
4745     }
4746     MENDFUNC(2,flog2_rr,(FW d, FR s))
4747    
4748     MIDFUNC(2,fneg_rr,(FW d, FR s))
4749     {
4750     s=f_readreg(s);
4751     d=f_writereg(d);
4752     raw_fneg_rr(d,s);
4753     f_unlock(s);
4754     f_unlock(d);
4755     }
4756     MENDFUNC(2,fneg_rr,(FW d, FR s))
4757    
4758     MIDFUNC(2,fadd_rr,(FRW d, FR s))
4759     {
4760     s=f_readreg(s);
4761     d=f_rmw(d);
4762     raw_fadd_rr(d,s);
4763     f_unlock(s);
4764     f_unlock(d);
4765     }
4766     MENDFUNC(2,fadd_rr,(FRW d, FR s))
4767    
4768     MIDFUNC(2,fsub_rr,(FRW d, FR s))
4769     {
4770     s=f_readreg(s);
4771     d=f_rmw(d);
4772     raw_fsub_rr(d,s);
4773     f_unlock(s);
4774     f_unlock(d);
4775     }
4776     MENDFUNC(2,fsub_rr,(FRW d, FR s))
4777    
4778     MIDFUNC(2,fcmp_rr,(FR d, FR s))
4779     {
4780     d=f_readreg(d);
4781     s=f_readreg(s);
4782     raw_fcmp_rr(d,s);
4783     f_unlock(s);
4784     f_unlock(d);
4785     }
4786     MENDFUNC(2,fcmp_rr,(FR d, FR s))
4787    
4788     MIDFUNC(2,fdiv_rr,(FRW d, FR s))
4789     {
4790     s=f_readreg(s);
4791     d=f_rmw(d);
4792     raw_fdiv_rr(d,s);
4793     f_unlock(s);
4794     f_unlock(d);
4795     }
4796     MENDFUNC(2,fdiv_rr,(FRW d, FR s))
4797    
4798     MIDFUNC(2,frem_rr,(FRW d, FR s))
4799     {
4800     s=f_readreg(s);
4801     d=f_rmw(d);
4802     raw_frem_rr(d,s);
4803     f_unlock(s);
4804     f_unlock(d);
4805     }
4806     MENDFUNC(2,frem_rr,(FRW d, FR s))
4807    
4808     MIDFUNC(2,frem1_rr,(FRW d, FR s))
4809     {
4810     s=f_readreg(s);
4811     d=f_rmw(d);
4812     raw_frem1_rr(d,s);
4813     f_unlock(s);
4814     f_unlock(d);
4815     }
4816     MENDFUNC(2,frem1_rr,(FRW d, FR s))
4817    
4818     MIDFUNC(2,fmul_rr,(FRW d, FR s))
4819     {
4820     s=f_readreg(s);
4821     d=f_rmw(d);
4822     raw_fmul_rr(d,s);
4823     f_unlock(s);
4824     f_unlock(d);
4825     }
4826     MENDFUNC(2,fmul_rr,(FRW d, FR s))
4827    
4828     /********************************************************************
4829     * Support functions exposed to gencomp. CREATE time *
4830     ********************************************************************/
4831    
4832     int kill_rodent(int r)
4833     {
4834     return KILLTHERAT &&
4835     have_rat_stall &&
4836     (live.state[r].status==INMEM ||
4837     live.state[r].status==CLEAN ||
4838     live.state[r].status==ISCONST ||
4839     live.state[r].dirtysize==4);
4840     }
4841    
4842     uae_u32 get_const(int r)
4843     {
4844     Dif (!isconst(r)) {
4845     write_log("Register %d should be constant, but isn't\n",r);
4846     abort();
4847     }
4848     return live.state[r].val;
4849     }
4850    
4851     void sync_m68k_pc(void)
4852     {
4853     if (m68k_pc_offset) {
4854     add_l_ri(PC_P,m68k_pc_offset);
4855     comp_pc_p+=m68k_pc_offset;
4856     m68k_pc_offset=0;
4857     }
4858     }
4859    
4860     /********************************************************************
4861     * Scratch registers management *
4862     ********************************************************************/
4863    
4864     struct scratch_t {
4865     uae_u32 regs[VREGS];
4866     fpu_register fregs[VFREGS];
4867     };
4868    
4869     static scratch_t scratch;
4870    
4871     /********************************************************************
4872     * Support functions exposed to newcpu *
4873     ********************************************************************/
4874    
4875     static inline const char *str_on_off(bool b)
4876     {
4877     return b ? "on" : "off";
4878     }
4879    
4880     void compiler_init(void)
4881     {
4882     static bool initialized = false;
4883     if (initialized)
4884     return;
4885    
4886     #ifndef WIN32
4887     // Open /dev/zero
4888     zero_fd = open("/dev/zero", O_RDWR);
4889     if (zero_fd < 0) {
4890     char str[200];
4891     sprintf(str, GetString(STR_NO_DEV_ZERO_ERR), strerror(errno));
4892     ErrorAlert(str);
4893     QuitEmulator();
4894     }
4895     #endif
4896    
4897     #if JIT_DEBUG
4898     // JIT debug mode ?
4899     JITDebug = PrefsFindBool("jitdebug");
4900     #endif
4901     write_log("<JIT compiler> : enable runtime disassemblers : %s\n", JITDebug ? "yes" : "no");
4902    
4903     #ifdef USE_JIT_FPU
4904     // Use JIT compiler for FPU instructions ?
4905     avoid_fpu = !PrefsFindBool("jitfpu");
4906     #else
4907     // JIT FPU is always disabled
4908     avoid_fpu = true;
4909     #endif
4910     write_log("<JIT compiler> : compile FPU instructions : %s\n", !avoid_fpu ? "yes" : "no");
4911    
4912     // Get size of the translation cache (in KB)
4913     cache_size = PrefsFindInt32("jitcachesize");
4914     write_log("<JIT compiler> : requested translation cache size : %d KB\n", cache_size);
4915    
4916     // Initialize target CPU (check for features, e.g. CMOV, rat stalls)
4917     raw_init_cpu();
4918 gbeauche 1.15 setzflg_uses_bsf = target_check_bsf();
4919 gbeauche 1.1 write_log("<JIT compiler> : target processor has CMOV instructions : %s\n", have_cmov ? "yes" : "no");
4920     write_log("<JIT compiler> : target processor can suffer from partial register stalls : %s\n", have_rat_stall ? "yes" : "no");
4921 gbeauche 1.5 write_log("<JIT compiler> : alignment for loops, jumps are %d, %d\n", align_loops, align_jumps);
4922 gbeauche 1.1
4923     // Translation cache flush mechanism
4924     lazy_flush = PrefsFindBool("jitlazyflush");
4925     write_log("<JIT compiler> : lazy translation cache invalidation : %s\n", str_on_off(lazy_flush));
4926     flush_icache = lazy_flush ? flush_icache_lazy : flush_icache_hard;
4927    
4928     // Compiler features
4929     write_log("<JIT compiler> : register aliasing : %s\n", str_on_off(1));
4930     write_log("<JIT compiler> : FP register aliasing : %s\n", str_on_off(USE_F_ALIAS));
4931     write_log("<JIT compiler> : lazy constant offsetting : %s\n", str_on_off(USE_OFFSET));
4932 gbeauche 1.8 write_log("<JIT compiler> : block inlining : %s\n", str_on_off(USE_INLINING));
4933 gbeauche 1.1 write_log("<JIT compiler> : separate blockinfo allocation : %s\n", str_on_off(USE_SEPARATE_BIA));
4934    
4935     // Build compiler tables
4936     build_comp();
4937    
4938     initialized = true;
4939    
4940 gbeauche 1.9 #if PROFILE_UNTRANSLATED_INSNS
4941     write_log("<JIT compiler> : gather statistics on untranslated insns count\n");
4942     #endif
4943    
4944 gbeauche 1.1 #if PROFILE_COMPILE_TIME
4945     write_log("<JIT compiler> : gather statistics on translation time\n");
4946     emul_start_time = clock();
4947     #endif
4948     }
4949    
4950     void compiler_exit(void)
4951     {
4952     #if PROFILE_COMPILE_TIME
4953     emul_end_time = clock();
4954     #endif
4955    
4956     // Deallocate translation cache
4957     if (compiled_code) {
4958 gbeauche 1.3 vm_release(compiled_code, cache_size * 1024);
4959 gbeauche 1.1 compiled_code = 0;
4960     }
4961    
4962     #ifndef WIN32
4963     // Close /dev/zero
4964     if (zero_fd > 0)
4965     close(zero_fd);
4966     #endif
4967    
4968     #if PROFILE_COMPILE_TIME
4969     write_log("### Compile Block statistics\n");
4970     write_log("Number of calls to compile_block : %d\n", compile_count);
4971     uae_u32 emul_time = emul_end_time - emul_start_time;
4972     write_log("Total emulation time : %.1f sec\n", double(emul_time)/double(CLOCKS_PER_SEC));
4973     write_log("Total compilation time : %.1f sec (%.1f%%)\n", double(compile_time)/double(CLOCKS_PER_SEC),
4974     100.0*double(compile_time)/double(emul_time));
4975     write_log("\n");
4976     #endif
4977 gbeauche 1.9
4978     #if PROFILE_UNTRANSLATED_INSNS
4979     uae_u64 untranslated_count = 0;
4980     for (int i = 0; i < 65536; i++) {
4981     opcode_nums[i] = i;
4982     untranslated_count += raw_cputbl_count[i];
4983     }
4984     write_log("Sorting out untranslated instructions count...\n");
4985     qsort(opcode_nums, 65536, sizeof(uae_u16), untranslated_compfn);
4986     write_log("\nRank Opc Count Name\n");
4987     for (int i = 0; i < untranslated_top_ten; i++) {
4988     uae_u32 count = raw_cputbl_count[opcode_nums[i]];
4989     struct instr *dp;
4990     struct mnemolookup *lookup;
4991     if (!count)
4992     break;
4993     dp = table68k + opcode_nums[i];
4994     for (lookup = lookuptab; lookup->mnemo != dp->mnemo; lookup++)
4995     ;
4996     write_log("%03d: %04x %10lu %s\n", i, opcode_nums[i], count, lookup->name);
4997     }
4998     #endif
4999 gbeauche 1.1 }
5000    
5001     bool compiler_use_jit(void)
5002     {
5003     // Check for the "jit" prefs item
5004     if (!PrefsFindBool("jit"))
5005     return false;
5006    
5007     // Don't use JIT if translation cache size is less then MIN_CACHE_SIZE KB
5008     if (PrefsFindInt32("jitcachesize") < MIN_CACHE_SIZE) {
5009     write_log("<JIT compiler> : translation cache size is less than %d KB. Disabling JIT.\n", MIN_CACHE_SIZE);
5010     return false;
5011     }
5012    
5013     // FIXME: there are currently problems with JIT compilation and anything below a 68040
5014     if (CPUType < 4) {
5015     write_log("<JIT compiler> : 68040 emulation is required instead of 680%d0. Disabling JIT.\n", CPUType);
5016     return false;
5017     }
5018    
5019     return true;
5020     }
5021    
5022     void init_comp(void)
5023     {
5024     int i;
5025     uae_s8* cb=can_byte;
5026     uae_s8* cw=can_word;
5027     uae_s8* au=always_used;
5028    
5029     for (i=0;i<VREGS;i++) {
5030     live.state[i].realreg=-1;
5031     live.state[i].needflush=NF_SCRATCH;
5032     live.state[i].val=0;
5033     set_status(i,UNDEF);
5034     }
5035    
5036     for (i=0;i<VFREGS;i++) {
5037     live.fate[i].status=UNDEF;
5038     live.fate[i].realreg=-1;
5039     live.fate[i].needflush=NF_SCRATCH;
5040     }
5041    
5042     for (i=0;i<VREGS;i++) {
5043     if (i<16) { /* First 16 registers map to 68k registers */
5044     live.state[i].mem=((uae_u32*)&regs)+i;
5045     live.state[i].needflush=NF_TOMEM;
5046     set_status(i,INMEM);
5047     }
5048     else
5049     live.state[i].mem=scratch.regs+i;
5050     }
5051     live.state[PC_P].mem=(uae_u32*)&(regs.pc_p);
5052     live.state[PC_P].needflush=NF_TOMEM;
5053     set_const(PC_P,(uae_u32)comp_pc_p);
5054    
5055     live.state[FLAGX].mem=&(regflags.x);
5056     live.state[FLAGX].needflush=NF_TOMEM;
5057     set_status(FLAGX,INMEM);
5058    
5059     live.state[FLAGTMP].mem=&(regflags.cznv);
5060     live.state[FLAGTMP].needflush=NF_TOMEM;
5061     set_status(FLAGTMP,INMEM);
5062    
5063     live.state[NEXT_HANDLER].needflush=NF_HANDLER;
5064     set_status(NEXT_HANDLER,UNDEF);
5065    
5066     for (i=0;i<VFREGS;i++) {
5067     if (i<8) { /* First 8 registers map to 68k FPU registers */
5068     live.fate[i].mem=(uae_u32*)fpu_register_address(i);
5069     live.fate[i].needflush=NF_TOMEM;
5070     live.fate[i].status=INMEM;
5071     }
5072     else if (i==FP_RESULT) {
5073     live.fate[i].mem=(uae_u32*)(&fpu.result);
5074     live.fate[i].needflush=NF_TOMEM;
5075     live.fate[i].status=INMEM;
5076     }
5077     else
5078     live.fate[i].mem=(uae_u32*)(scratch.fregs+i);
5079     }
5080    
5081    
5082     for (i=0;i<N_REGS;i++) {
5083     live.nat[i].touched=0;
5084     live.nat[i].nholds=0;
5085     live.nat[i].locked=0;
5086     if (*cb==i) {
5087     live.nat[i].canbyte=1; cb++;
5088     } else live.nat[i].canbyte=0;
5089     if (*cw==i) {
5090     live.nat[i].canword=1; cw++;
5091     } else live.nat[i].canword=0;
5092     if (*au==i) {
5093     live.nat[i].locked=1; au++;
5094     }
5095     }
5096    
5097     for (i=0;i<N_FREGS;i++) {
5098     live.fat[i].touched=0;
5099     live.fat[i].nholds=0;
5100     live.fat[i].locked=0;
5101     }
5102    
5103     touchcnt=1;
5104     m68k_pc_offset=0;
5105     live.flags_in_flags=TRASH;
5106     live.flags_on_stack=VALID;
5107     live.flags_are_important=1;
5108    
5109     raw_fp_init();
5110     }
5111    
5112     /* Only do this if you really mean it! The next call should be to init!*/
5113     void flush(int save_regs)
5114     {
5115     int fi,i;
5116    
5117     log_flush();
5118     flush_flags(); /* low level */
5119     sync_m68k_pc(); /* mid level */
5120    
5121     if (save_regs) {
5122     for (i=0;i<VFREGS;i++) {
5123     if (live.fate[i].needflush==NF_SCRATCH ||
5124     live.fate[i].status==CLEAN) {
5125     f_disassociate(i);
5126     }
5127     }
5128     for (i=0;i<VREGS;i++) {
5129     if (live.state[i].needflush==NF_TOMEM) {
5130     switch(live.state[i].status) {
5131     case INMEM:
5132     if (live.state[i].val) {
5133     raw_add_l_mi((uae_u32)live.state[i].mem,live.state[i].val);
5134     log_vwrite(i);
5135     live.state[i].val=0;
5136     }
5137     break;
5138     case CLEAN:
5139     case DIRTY:
5140     remove_offset(i,-1); tomem(i); break;
5141     case ISCONST:
5142     if (i!=PC_P)
5143     writeback_const(i);
5144     break;
5145     default: break;
5146     }
5147     Dif (live.state[i].val && i!=PC_P) {
5148     write_log("Register %d still has val %x\n",
5149     i,live.state[i].val);
5150     }
5151     }
5152     }
5153     for (i=0;i<VFREGS;i++) {
5154     if (live.fate[i].needflush==NF_TOMEM &&
5155     live.fate[i].status==DIRTY) {
5156     f_evict(i);
5157     }
5158     }
5159     raw_fp_cleanup_drop();
5160     }
5161     if (needflags) {
5162     write_log("Warning! flush with needflags=1!\n");
5163     }
5164     }
5165    
5166     static void flush_keepflags(void)
5167     {
5168     int fi,i;
5169    
5170     for (i=0;i<VFREGS;i++) {
5171     if (live.fate[i].needflush==NF_SCRATCH ||
5172     live.fate[i].status==CLEAN) {
5173     f_disassociate(i);
5174     }
5175     }
5176     for (i=0;i<VREGS;i++) {
5177     if (live.state[i].needflush==NF_TOMEM) {
5178     switch(live.state[i].status) {
5179     case INMEM:
5180     /* Can't adjust the offset here --- that needs "add" */
5181     break;
5182     case CLEAN:
5183     case DIRTY:
5184     remove_offset(i,-1); tomem(i); break;
5185     case ISCONST:
5186     if (i!=PC_P)
5187     writeback_const(i);
5188     break;
5189     default: break;
5190     }
5191     }
5192     }
5193     for (i=0;i<VFREGS;i++) {
5194     if (live.fate[i].needflush==NF_TOMEM &&
5195     live.fate[i].status==DIRTY) {
5196     f_evict(i);
5197     }
5198     }
5199     raw_fp_cleanup_drop();
5200     }
5201    
5202     void freescratch(void)
5203     {
5204     int i;
5205     for (i=0;i<N_REGS;i++)
5206     if (live.nat[i].locked && i!=4)
5207     write_log("Warning! %d is locked\n",i);
5208    
5209     for (i=0;i<VREGS;i++)
5210     if (live.state[i].needflush==NF_SCRATCH) {
5211     forget_about(i);
5212     }
5213    
5214     for (i=0;i<VFREGS;i++)
5215     if (live.fate[i].needflush==NF_SCRATCH) {
5216     f_forget_about(i);
5217     }
5218     }
5219    
5220     /********************************************************************
5221     * Support functions, internal *
5222     ********************************************************************/
5223    
5224    
5225     static void align_target(uae_u32 a)
5226     {
5227 gbeauche 1.14 if (!a)
5228     return;
5229    
5230 gbeauche 1.12 if (tune_nop_fillers)
5231     raw_emit_nop_filler(a - (((uae_u32)target) & (a - 1)));
5232     else {
5233     /* Fill with NOPs --- makes debugging with gdb easier */
5234     while ((uae_u32)target&(a-1))
5235     *target++=0x90;
5236     }
5237 gbeauche 1.1 }
5238    
5239     static __inline__ int isinrom(uintptr addr)
5240     {
5241     return ((addr >= (uintptr)ROMBaseHost) && (addr < (uintptr)ROMBaseHost + ROMSize));
5242     }
5243    
5244     static void flush_all(void)
5245     {
5246     int i;
5247    
5248     log_flush();
5249     for (i=0;i<VREGS;i++)
5250     if (live.state[i].status==DIRTY) {
5251     if (!call_saved[live.state[i].realreg]) {
5252     tomem(i);
5253     }
5254     }
5255     for (i=0;i<VFREGS;i++)
5256     if (f_isinreg(i))
5257     f_evict(i);
5258     raw_fp_cleanup_drop();
5259     }
5260    
5261     /* Make sure all registers that will get clobbered by a call are
5262     save and sound in memory */
5263     static void prepare_for_call_1(void)
5264     {
5265     flush_all(); /* If there are registers that don't get clobbered,
5266     * we should be a bit more selective here */
5267     }
5268    
5269     /* We will call a C routine in a moment. That will clobber all registers,
5270     so we need to disassociate everything */
5271     static void prepare_for_call_2(void)
5272     {
5273     int i;
5274     for (i=0;i<N_REGS;i++)
5275     if (!call_saved[i] && live.nat[i].nholds>0)
5276     free_nreg(i);
5277    
5278     for (i=0;i<N_FREGS;i++)
5279     if (live.fat[i].nholds>0)
5280     f_free_nreg(i);
5281    
5282     live.flags_in_flags=TRASH; /* Note: We assume we already rescued the
5283     flags at the very start of the call_r
5284     functions! */
5285     }
5286    
5287     /********************************************************************
5288     * Memory access and related functions, CREATE time *
5289     ********************************************************************/
5290    
5291     void register_branch(uae_u32 not_taken, uae_u32 taken, uae_u8 cond)
5292     {
5293     next_pc_p=not_taken;
5294     taken_pc_p=taken;
5295     branch_cc=cond;
5296     }
5297    
5298    
5299     static uae_u32 get_handler_address(uae_u32 addr)
5300     {
5301     uae_u32 cl=cacheline(addr);
5302     blockinfo* bi=get_blockinfo_addr_new((void*)addr,0);
5303     return (uae_u32)&(bi->direct_handler_to_use);
5304     }
5305    
5306     static uae_u32 get_handler(uae_u32 addr)
5307     {
5308     uae_u32 cl=cacheline(addr);
5309     blockinfo* bi=get_blockinfo_addr_new((void*)addr,0);
5310     return (uae_u32)bi->direct_handler_to_use;
5311     }
5312    
5313     static void load_handler(int reg, uae_u32 addr)
5314     {
5315     mov_l_rm(reg,get_handler_address(addr));
5316     }
5317    
5318     /* This version assumes that it is writing *real* memory, and *will* fail
5319     * if that assumption is wrong! No branches, no second chances, just
5320     * straight go-for-it attitude */
5321    
5322     static void writemem_real(int address, int source, int offset, int size, int tmp, int clobber)
5323     {
5324     int f=tmp;
5325    
5326     if (clobber)
5327     f=source;
5328     switch(size) {
5329     case 1: mov_b_bRr(address,source,MEMBaseDiff); break;
5330     case 2: mov_w_rr(f,source); bswap_16(f); mov_w_bRr(address,f,MEMBaseDiff); break;
5331     case 4: mov_l_rr(f,source); bswap_32(f); mov_l_bRr(address,f,MEMBaseDiff); break;
5332     }
5333     forget_about(tmp);
5334     forget_about(f);
5335     }
5336    
5337     void writebyte(int address, int source, int tmp)
5338     {
5339     writemem_real(address,source,20,1,tmp,0);
5340     }
5341    
5342     static __inline__ void writeword_general(int address, int source, int tmp,
5343     int clobber)
5344     {
5345     writemem_real(address,source,16,2,tmp,clobber);
5346     }
5347    
5348     void writeword_clobber(int address, int source, int tmp)
5349     {
5350     writeword_general(address,source,tmp,1);
5351     }
5352    
5353     void writeword(int address, int source, int tmp)
5354     {
5355     writeword_general(address,source,tmp,0);
5356     }
5357    
5358     static __inline__ void writelong_general(int address, int source, int tmp,
5359     int clobber)
5360     {
5361     writemem_real(address,source,12,4,tmp,clobber);
5362     }
5363    
5364     void writelong_clobber(int address, int source, int tmp)
5365     {
5366     writelong_general(address,source,tmp,1);
5367     }
5368    
5369     void writelong(int address, int source, int tmp)
5370     {
5371     writelong_general(address,source,tmp,0);
5372     }
5373    
5374    
5375    
5376     /* This version assumes that it is reading *real* memory, and *will* fail
5377     * if that assumption is wrong! No branches, no second chances, just
5378     * straight go-for-it attitude */
5379    
5380     static void readmem_real(int address, int dest, int offset, int size, int tmp)
5381     {
5382     int f=tmp;
5383    
5384     if (size==4 && address!=dest)
5385     f=dest;
5386    
5387     switch(size) {
5388     case 1: mov_b_brR(dest,address,MEMBaseDiff); break;
5389     case 2: mov_w_brR(dest,address,MEMBaseDiff); bswap_16(dest); break;
5390     case 4: mov_l_brR(dest,address,MEMBaseDiff); bswap_32(dest); break;
5391     }
5392     forget_about(tmp);
5393     }
5394    
5395     void readbyte(int address, int dest, int tmp)
5396     {
5397     readmem_real(address,dest,8,1,tmp);
5398     }
5399    
5400     void readword(int address, int dest, int tmp)
5401     {
5402     readmem_real(address,dest,4,2,tmp);
5403     }
5404    
5405     void readlong(int address, int dest, int tmp)
5406     {
5407     readmem_real(address,dest,0,4,tmp);
5408     }
5409    
5410     void get_n_addr(int address, int dest, int tmp)
5411     {
5412     // a is the register containing the virtual address
5413     // after the offset had been fetched
5414     int a=tmp;
5415    
5416     // f is the register that will contain the offset
5417     int f=tmp;
5418    
5419     // a == f == tmp if (address == dest)
5420     if (address!=dest) {
5421     a=address;
5422     f=dest;
5423     }
5424    
5425     #if REAL_ADDRESSING
5426     mov_l_rr(dest, address);
5427     #elif DIRECT_ADDRESSING
5428     lea_l_brr(dest,address,MEMBaseDiff);
5429     #endif
5430     forget_about(tmp);
5431     }
5432    
5433     void get_n_addr_jmp(int address, int dest, int tmp)
5434     {
5435     /* For this, we need to get the same address as the rest of UAE
5436     would --- otherwise we end up translating everything twice */
5437     get_n_addr(address,dest,tmp);
5438     }
5439    
5440    
5441     /* base is a register, but dp is an actual value.
5442     target is a register, as is tmp */
5443     void calc_disp_ea_020(int base, uae_u32 dp, int target, int tmp)
5444     {
5445     int reg = (dp >> 12) & 15;
5446     int regd_shift=(dp >> 9) & 3;
5447    
5448     if (dp & 0x100) {
5449     int ignorebase=(dp&0x80);
5450     int ignorereg=(dp&0x40);
5451     int addbase=0;
5452     int outer=0;
5453    
5454     if ((dp & 0x30) == 0x20) addbase = (uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
5455     if ((dp & 0x30) == 0x30) addbase = comp_get_ilong((m68k_pc_offset+=4)-4);
5456    
5457     if ((dp & 0x3) == 0x2) outer = (uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
5458     if ((dp & 0x3) == 0x3) outer = comp_get_ilong((m68k_pc_offset+=4)-4);
5459    
5460     if ((dp & 0x4) == 0) { /* add regd *before* the get_long */
5461     if (!ignorereg) {
5462     if ((dp & 0x800) == 0)
5463     sign_extend_16_rr(target,reg);
5464     else
5465     mov_l_rr(target,reg);
5466     shll_l_ri(target,regd_shift);
5467     }
5468     else
5469     mov_l_ri(target,0);
5470    
5471     /* target is now regd */
5472     if (!ignorebase)
5473     add_l(target,base);
5474     add_l_ri(target,addbase);
5475     if (dp&0x03) readlong(target,target,tmp);
5476     } else { /* do the getlong first, then add regd */
5477     if (!ignorebase) {
5478     mov_l_rr(target,base);
5479     add_l_ri(target,addbase);
5480     }
5481     else
5482     mov_l_ri(target,addbase);
5483     if (dp&0x03) readlong(target,target,tmp);
5484    
5485     if (!ignorereg) {
5486     if ((dp & 0x800) == 0)
5487     sign_extend_16_rr(tmp,reg);
5488     else
5489     mov_l_rr(tmp,reg);
5490     shll_l_ri(tmp,regd_shift);
5491     /* tmp is now regd */
5492     add_l(target,tmp);
5493     }
5494     }
5495     add_l_ri(target,outer);
5496     }
5497     else { /* 68000 version */
5498     if ((dp & 0x800) == 0) { /* Sign extend */
5499     sign_extend_16_rr(target,reg);
5500     lea_l_brr_indexed(target,base,target,1<<regd_shift,(uae_s32)((uae_s8)dp));
5501     }
5502     else {
5503     lea_l_brr_indexed(target,base,reg,1<<regd_shift,(uae_s32)((uae_s8)dp));
5504     }
5505     }
5506     forget_about(tmp);
5507     }
5508    
5509    
5510    
5511    
5512    
5513     void set_cache_state(int enabled)
5514     {
5515     if (enabled!=letit)
5516     flush_icache_hard(77);
5517     letit=enabled;
5518     }
5519    
5520     int get_cache_state(void)
5521     {
5522     return letit;
5523     }
5524    
5525     uae_u32 get_jitted_size(void)
5526     {
5527     if (compiled_code)
5528     return current_compile_p-compiled_code;
5529     return 0;
5530     }
5531    
5532     void alloc_cache(void)
5533     {
5534     if (compiled_code) {
5535     flush_icache_hard(6);
5536 gbeauche 1.3 vm_release(compiled_code, cache_size * 1024);
5537 gbeauche 1.1 compiled_code = 0;
5538     }
5539    
5540     if (cache_size == 0)
5541     return;
5542    
5543     while (!compiled_code && cache_size) {
5544 gbeauche 1.2 if ((compiled_code = (uae_u8 *)vm_acquire(cache_size * 1024)) == VM_MAP_FAILED) {
5545 gbeauche 1.1 compiled_code = 0;
5546     cache_size /= 2;
5547     }
5548     }
5549 gbeauche 1.2 vm_protect(compiled_code, cache_size, VM_PAGE_READ | VM_PAGE_WRITE | VM_PAGE_EXECUTE);
5550 gbeauche 1.1
5551     if (compiled_code) {
5552     write_log("<JIT compiler> : actual translation cache size : %d KB at 0x%08X\n", cache_size, compiled_code);
5553     max_compile_start = compiled_code + cache_size*1024 - BYTES_PER_INST;
5554     current_compile_p = compiled_code;
5555     current_cache_size = 0;
5556     }
5557     }
5558    
5559    
5560    
5561 gbeauche 1.13 extern void op_illg_1 (uae_u32 opcode) REGPARAM;
5562 gbeauche 1.1
5563 gbeauche 1.8 static void calc_checksum(blockinfo* bi, uae_u32* c1, uae_u32* c2)
5564 gbeauche 1.1 {
5565 gbeauche 1.8 uae_u32 k1 = 0;
5566     uae_u32 k2 = 0;
5567    
5568     #if USE_CHECKSUM_INFO
5569     checksum_info *csi = bi->csi;
5570     Dif(!csi) abort();
5571     while (csi) {
5572     uae_s32 len = csi->length;
5573     uae_u32 tmp = (uae_u32)csi->start_p;
5574     #else
5575     uae_s32 len = bi->len;
5576     uae_u32 tmp = (uae_u32)bi->min_pcp;
5577     #endif
5578     uae_u32*pos;
5579 gbeauche 1.1
5580 gbeauche 1.8 len += (tmp & 3);
5581     tmp &= ~3;
5582     pos = (uae_u32 *)tmp;
5583    
5584     if (len >= 0 && len <= MAX_CHECKSUM_LEN) {
5585     while (len > 0) {
5586     k1 += *pos;
5587     k2 ^= *pos;
5588     pos++;
5589     len -= 4;
5590     }
5591     }
5592 gbeauche 1.1
5593 gbeauche 1.8 #if USE_CHECKSUM_INFO
5594     csi = csi->next;
5595 gbeauche 1.1 }
5596 gbeauche 1.8 #endif
5597    
5598     *c1 = k1;
5599     *c2 = k2;
5600 gbeauche 1.1 }
5601    
5602 gbeauche 1.8 #if 0
5603 gbeauche 1.7 static void show_checksum(CSI_TYPE* csi)
5604 gbeauche 1.1 {
5605     uae_u32 k1=0;
5606     uae_u32 k2=0;
5607 gbeauche 1.7 uae_s32 len=CSI_LENGTH(csi);
5608     uae_u32 tmp=(uae_u32)CSI_START_P(csi);
5609 gbeauche 1.1 uae_u32* pos;
5610    
5611     len+=(tmp&3);
5612     tmp&=(~3);
5613     pos=(uae_u32*)tmp;
5614    
5615     if (len<0 || len>MAX_CHECKSUM_LEN) {
5616     return;
5617     }
5618     else {
5619     while (len>0) {
5620     write_log("%08x ",*pos);
5621     pos++;
5622     len-=4;
5623     }
5624     write_log(" bla\n");
5625     }
5626     }
5627 gbeauche 1.8 #endif
5628 gbeauche 1.1
5629    
5630     int check_for_cache_miss(void)
5631     {
5632     blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5633    
5634     if (bi) {
5635     int cl=cacheline(regs.pc_p);
5636     if (bi!=cache_tags[cl+1].bi) {
5637     raise_in_cl_list(bi);
5638     return 1;
5639     }
5640     }
5641     return 0;
5642     }
5643    
5644    
5645     static void recompile_block(void)
5646     {
5647     /* An existing block's countdown code has expired. We need to make
5648     sure that execute_normal doesn't refuse to recompile due to a
5649     perceived cache miss... */
5650     blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5651    
5652     Dif (!bi)
5653     abort();
5654     raise_in_cl_list(bi);
5655     execute_normal();
5656     return;
5657     }
5658     static void cache_miss(void)
5659     {
5660     blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5661     uae_u32 cl=cacheline(regs.pc_p);
5662     blockinfo* bi2=get_blockinfo(cl);
5663    
5664     if (!bi) {
5665     execute_normal(); /* Compile this block now */
5666     return;
5667     }
5668     Dif (!bi2 || bi==bi2) {
5669     write_log("Unexplained cache miss %p %p\n",bi,bi2);
5670     abort();
5671     }
5672     raise_in_cl_list(bi);
5673     return;
5674     }
5675    
5676     static int called_check_checksum(blockinfo* bi);
5677    
5678     static inline int block_check_checksum(blockinfo* bi)
5679     {
5680     uae_u32 c1,c2;
5681 gbeauche 1.7 bool isgood;
5682 gbeauche 1.1
5683     if (bi->status!=BI_NEED_CHECK)
5684     return 1; /* This block is in a checked state */
5685    
5686     checksum_count++;
5687 gbeauche 1.7
5688 gbeauche 1.1 if (bi->c1 || bi->c2)
5689     calc_checksum(bi,&c1,&c2);
5690     else {
5691     c1=c2=1; /* Make sure it doesn't match */
5692 gbeauche 1.7 }
5693 gbeauche 1.1
5694     isgood=(c1==bi->c1 && c2==bi->c2);
5695 gbeauche 1.7
5696 gbeauche 1.1 if (isgood) {
5697     /* This block is still OK. So we reactivate. Of course, that
5698     means we have to move it into the needs-to-be-flushed list */
5699     bi->handler_to_use=bi->handler;
5700     set_dhtu(bi,bi->direct_handler);
5701     bi->status=BI_CHECKING;
5702     isgood=called_check_checksum(bi);
5703     }
5704     if (isgood) {
5705     /* write_log("reactivate %p/%p (%x %x/%x %x)\n",bi,bi->pc_p,
5706     c1,c2,bi->c1,bi->c2);*/
5707     remove_from_list(bi);
5708     add_to_active(bi);
5709     raise_in_cl_list(bi);
5710     bi->status=BI_ACTIVE;
5711     }
5712     else {
5713     /* This block actually changed. We need to invalidate it,
5714     and set it up to be recompiled */
5715     /* write_log("discard %p/%p (%x %x/%x %x)\n",bi,bi->pc_p,
5716     c1,c2,bi->c1,bi->c2); */
5717     invalidate_block(bi);
5718     raise_in_cl_list(bi);
5719     }
5720     return isgood;
5721     }
5722    
5723     static int called_check_checksum(blockinfo* bi)
5724     {
5725     dependency* x=bi->deplist;
5726     int isgood=1;
5727     int i;
5728    
5729     for (i=0;i<2 && isgood;i++) {
5730     if (bi->dep[i].jmp_off) {
5731     isgood=block_check_checksum(bi->dep[i].target);
5732     }
5733     }
5734     return isgood;
5735     }
5736    
5737     static void check_checksum(void)
5738     {
5739     blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5740     uae_u32 cl=cacheline(regs.pc_p);
5741     blockinfo* bi2=get_blockinfo(cl);
5742    
5743     /* These are not the droids you are looking for... */
5744     if (!bi) {
5745     /* Whoever is the primary target is in a dormant state, but
5746     calling it was accidental, and we should just compile this
5747     new block */
5748     execute_normal();
5749     return;
5750     }
5751     if (bi!=bi2) {
5752     /* The block was hit accidentally, but it does exist. Cache miss */
5753     cache_miss();
5754     return;
5755     }
5756    
5757     if (!block_check_checksum(bi))
5758     execute_normal();
5759     }
5760    
5761     static __inline__ void match_states(blockinfo* bi)
5762     {
5763     int i;
5764     smallstate* s=&(bi->env);
5765    
5766     if (bi->status==BI_NEED_CHECK) {
5767     block_check_checksum(bi);
5768     }
5769     if (bi->status==BI_ACTIVE ||
5770     bi->status==BI_FINALIZING) { /* Deal with the *promises* the
5771     block makes (about not using
5772     certain vregs) */
5773     for (i=0;i<16;i++) {
5774     if (s->virt[i]==L_UNNEEDED) {
5775     // write_log("unneeded reg %d at %p\n",i,target);
5776     COMPCALL(forget_about)(i); // FIXME
5777     }
5778     }
5779     }
5780     flush(1);
5781    
5782     /* And now deal with the *demands* the block makes */
5783     for (i=0;i<N_REGS;i++) {
5784     int v=s->nat[i];
5785     if (v>=0) {
5786     // printf("Loading reg %d into %d at %p\n",v,i,target);
5787     readreg_specific(v,4,i);
5788     // do_load_reg(i,v);
5789     // setlock(i);
5790     }
5791     }
5792     for (i=0;i<N_REGS;i++) {
5793     int v=s->nat[i];
5794     if (v>=0) {
5795     unlock2(i);
5796     }
5797     }
5798     }
5799    
5800     static uae_u8 popallspace[1024]; /* That should be enough space */
5801    
5802     static __inline__ void create_popalls(void)
5803     {
5804     int i,r;
5805    
5806     current_compile_p=popallspace;
5807     set_target(current_compile_p);
5808     #if USE_PUSH_POP
5809     /* If we can't use gcc inline assembly, we need to pop some
5810     registers before jumping back to the various get-out routines.
5811     This generates the code for it.
5812     */
5813 gbeauche 1.5 align_target(align_jumps);
5814     popall_do_nothing=get_target();
5815 gbeauche 1.1 for (i=0;i<N_REGS;i++) {
5816     if (need_to_preserve[i])
5817     raw_pop_l_r(i);
5818     }
5819     raw_jmp((uae_u32)do_nothing);
5820    
5821 gbeauche 1.5 align_target(align_jumps);
5822 gbeauche 1.1 popall_execute_normal=get_target();
5823     for (i=0;i<N_REGS;i++) {
5824     if (need_to_preserve[i])
5825     raw_pop_l_r(i);
5826     }
5827     raw_jmp((uae_u32)execute_normal);
5828    
5829 gbeauche 1.5 align_target(align_jumps);
5830 gbeauche 1.1 popall_cache_miss=get_target();
5831     for (i=0;i<N_REGS;i++) {
5832     if (need_to_preserve[i])
5833     raw_pop_l_r(i);
5834     }
5835     raw_jmp((uae_u32)cache_miss);
5836    
5837 gbeauche 1.5 align_target(align_jumps);
5838 gbeauche 1.1 popall_recompile_block=get_target();
5839     for (i=0;i<N_REGS;i++) {
5840     if (need_to_preserve[i])
5841     raw_pop_l_r(i);
5842     }
5843     raw_jmp((uae_u32)recompile_block);
5844 gbeauche 1.5
5845     align_target(align_jumps);
5846 gbeauche 1.1 popall_exec_nostats=get_target();
5847     for (i=0;i<N_REGS;i++) {
5848     if (need_to_preserve[i])
5849     raw_pop_l_r(i);
5850     }
5851     raw_jmp((uae_u32)exec_nostats);
5852 gbeauche 1.5
5853     align_target(align_jumps);
5854 gbeauche 1.1 popall_check_checksum=get_target();
5855     for (i=0;i<N_REGS;i++) {
5856     if (need_to_preserve[i])
5857     raw_pop_l_r(i);
5858     }
5859     raw_jmp((uae_u32)check_checksum);
5860 gbeauche 1.5
5861     align_target(align_jumps);
5862 gbeauche 1.1 current_compile_p=get_target();
5863     #else
5864     popall_exec_nostats=(void *)exec_nostats;
5865     popall_execute_normal=(void *)execute_normal;
5866     popall_cache_miss=(void *)cache_miss;
5867     popall_recompile_block=(void *)recompile_block;
5868     popall_do_nothing=(void *)do_nothing;
5869     popall_check_checksum=(void *)check_checksum;
5870     #endif
5871    
5872     /* And now, the code to do the matching pushes and then jump
5873     into a handler routine */
5874     pushall_call_handler=get_target();
5875     #if USE_PUSH_POP
5876     for (i=N_REGS;i--;) {
5877     if (need_to_preserve[i])
5878     raw_push_l_r(i);
5879     }
5880     #endif
5881     r=REG_PC_TMP;
5882     raw_mov_l_rm(r,(uae_u32)&regs.pc_p);
5883     raw_and_l_ri(r,TAGMASK);
5884     raw_jmp_m_indexed((uae_u32)cache_tags,r,4);
5885 gbeauche 1.6
5886     #ifdef X86_ASSEMBLY
5887     align_target(align_jumps);
5888     m68k_compile_execute = (void (*)(void))get_target();
5889     for (i=N_REGS;i--;) {
5890     if (need_to_preserve[i])
5891     raw_push_l_r(i);
5892     }
5893     align_target(align_loops);
5894     uae_u32 dispatch_loop = (uae_u32)get_target();
5895     r=REG_PC_TMP;
5896     raw_mov_l_rm(r,(uae_u32)&regs.pc_p);
5897     raw_and_l_ri(r,TAGMASK);
5898     raw_call_m_indexed((uae_u32)cache_tags,r,4);
5899     raw_cmp_l_mi((uae_u32)&regs.spcflags,0);
5900     raw_jcc_b_oponly(NATIVE_CC_EQ);
5901     emit_byte(dispatch_loop-((uae_u32)get_target()+1));
5902     raw_call((uae_u32)m68k_do_specialties);
5903     raw_test_l_rr(REG_RESULT,REG_RESULT);
5904     raw_jcc_b_oponly(NATIVE_CC_EQ);
5905     emit_byte(dispatch_loop-((uae_u32)get_target()+1));
5906     raw_cmp_b_mi((uae_u32)&quit_program,0);
5907     raw_jcc_b_oponly(NATIVE_CC_EQ);
5908     emit_byte(dispatch_loop-((uae_u32)get_target()+1));
5909     for (i=0;i<N_REGS;i++) {
5910     if (need_to_preserve[i])
5911     raw_pop_l_r(i);
5912     }
5913     raw_ret();
5914     #endif
5915 gbeauche 1.1 }
5916    
5917     static __inline__ void reset_lists(void)
5918     {
5919     int i;
5920    
5921     for (i=0;i<MAX_HOLD_BI;i++)
5922     hold_bi[i]=NULL;
5923     active=NULL;
5924     dormant=NULL;
5925     }
5926    
5927     static void prepare_block(blockinfo* bi)
5928     {
5929     int i;
5930    
5931     set_target(current_compile_p);
5932 gbeauche 1.5 align_target(align_jumps);
5933 gbeauche 1.1 bi->direct_pen=(cpuop_func *)get_target();
5934     raw_mov_l_rm(0,(uae_u32)&(bi->pc_p));
5935     raw_mov_l_mr((uae_u32)&regs.pc_p,0);
5936     raw_jmp((uae_u32)popall_execute_normal);
5937    
5938 gbeauche 1.5 align_target(align_jumps);
5939 gbeauche 1.1 bi->direct_pcc=(cpuop_func *)get_target();
5940     raw_mov_l_rm(0,(uae_u32)&(bi->pc_p));
5941     raw_mov_l_mr((uae_u32)&regs.pc_p,0);
5942     raw_jmp((uae_u32)popall_check_checksum);
5943     current_compile_p=get_target();
5944    
5945     bi->deplist=NULL;
5946     for (i=0;i<2;i++) {
5947     bi->dep[i].prev_p=NULL;
5948     bi->dep[i].next=NULL;
5949     }
5950     bi->env=default_ss;
5951     bi->status=BI_INVALID;
5952     bi->havestate=0;
5953     //bi->env=empty_ss;
5954     }
5955    
5956 gbeauche 1.17 static bool avoid_opcode(uae_u32 opcode)
5957     {
5958     #if JIT_DEBUG
5959     struct instr *dp = &table68k[opcode];
5960     // filter opcodes per type, integral value, or whatever
5961     #endif
5962     return false;
5963     }
5964    
5965 gbeauche 1.1 void build_comp(void)
5966     {
5967     int i;
5968     int jumpcount=0;
5969     unsigned long opcode;
5970     struct comptbl* tbl=op_smalltbl_0_comp_ff;
5971     struct comptbl* nftbl=op_smalltbl_0_comp_nf;
5972     int count;
5973     int cpu_level = 0; // 68000 (default)
5974     if (CPUType == 4)
5975     cpu_level = 4; // 68040 with FPU
5976     else {
5977     if (FPUType)
5978     cpu_level = 3; // 68020 with FPU
5979     else if (CPUType >= 2)
5980     cpu_level = 2; // 68020
5981     else if (CPUType == 1)
5982     cpu_level = 1;
5983     }
5984     struct cputbl *nfctbl = (
5985     cpu_level == 4 ? op_smalltbl_0_nf
5986     : cpu_level == 3 ? op_smalltbl_1_nf
5987     : cpu_level == 2 ? op_smalltbl_2_nf
5988     : cpu_level == 1 ? op_smalltbl_3_nf
5989     : op_smalltbl_4_nf);
5990    
5991     write_log ("<JIT compiler> : building compiler function tables\n");
5992    
5993     for (opcode = 0; opcode < 65536; opcode++) {
5994     nfcpufunctbl[opcode] = op_illg_1;
5995     compfunctbl[opcode] = NULL;
5996     nfcompfunctbl[opcode] = NULL;
5997     prop[opcode].use_flags = 0x1f;
5998     prop[opcode].set_flags = 0x1f;
5999     prop[opcode].cflow = fl_trap; // ILLEGAL instructions do trap
6000     }
6001    
6002     for (i = 0; tbl[i].opcode < 65536; i++) {
6003     int cflow = table68k[tbl[i].opcode].cflow;
6004 gbeauche 1.10 if (USE_INLINING && ((cflow & fl_const_jump) != 0))
6005     cflow = fl_const_jump;
6006 gbeauche 1.8 else
6007 gbeauche 1.10 cflow &= ~fl_const_jump;
6008     prop[cft_map(tbl[i].opcode)].cflow = cflow;
6009 gbeauche 1.1
6010     int uses_fpu = tbl[i].specific & 32;
6011 gbeauche 1.17 if ((uses_fpu && avoid_fpu) || avoid_opcode(tbl[i].opcode))
6012 gbeauche 1.1 compfunctbl[cft_map(tbl[i].opcode)] = NULL;
6013     else
6014     compfunctbl[cft_map(tbl[i].opcode)] = tbl[i].handler;
6015     }
6016 gbeauche 1.8
6017 gbeauche 1.1 for (i = 0; nftbl[i].opcode < 65536; i++) {
6018     int uses_fpu = tbl[i].specific & 32;
6019 gbeauche 1.17 if ((uses_fpu && avoid_fpu) || avoid_opcode(nftbl[i].opcode))
6020 gbeauche 1.1 nfcompfunctbl[cft_map(nftbl[i].opcode)] = NULL;
6021     else
6022     nfcompfunctbl[cft_map(nftbl[i].opcode)] = nftbl[i].handler;
6023    
6024     nfcpufunctbl[cft_map(nftbl[i].opcode)] = nfctbl[i].handler;
6025     }
6026    
6027     for (i = 0; nfctbl[i].handler; i++) {
6028     nfcpufunctbl[cft_map(nfctbl[i].opcode)] = nfctbl[i].handler;
6029     }
6030    
6031     for (opcode = 0; opcode < 65536; opcode++) {
6032     compop_func *f;
6033     compop_func *nff;
6034     cpuop_func *nfcf;
6035     int isaddx,cflow;
6036    
6037     if (table68k[opcode].mnemo == i_ILLG || table68k[opcode].clev > cpu_level)
6038     continue;
6039    
6040     if (table68k[opcode].handler != -1) {
6041     f = compfunctbl[cft_map(table68k[opcode].handler)];
6042     nff = nfcompfunctbl[cft_map(table68k[opcode].handler)];
6043     nfcf = nfcpufunctbl[cft_map(table68k[opcode].handler)];
6044     cflow = prop[cft_map(table68k[opcode].handler)].cflow;
6045     isaddx = prop[cft_map(table68k[opcode].handler)].is_addx;
6046     prop[cft_map(opcode)].cflow = cflow;
6047     prop[cft_map(opcode)].is_addx = isaddx;
6048     compfunctbl[cft_map(opcode)] = f;
6049     nfcompfunctbl[cft_map(opcode)] = nff;
6050     Dif (nfcf == op_illg_1)
6051     abort();
6052     nfcpufunctbl[cft_map(opcode)] = nfcf;
6053     }
6054     prop[cft_map(opcode)].set_flags = table68k[opcode].flagdead;
6055     prop[cft_map(opcode)].use_flags = table68k[opcode].flaglive;
6056     }
6057     for (i = 0; nfctbl[i].handler != NULL; i++) {
6058     if (nfctbl[i].specific)
6059     nfcpufunctbl[cft_map(tbl[i].opcode)] = nfctbl[i].handler;
6060     }
6061    
6062     count=0;
6063     for (opcode = 0; opcode < 65536; opcode++) {
6064     if (compfunctbl[cft_map(opcode)])
6065     count++;
6066     }
6067     write_log("<JIT compiler> : supposedly %d compileable opcodes!\n",count);
6068    
6069     /* Initialise state */
6070     create_popalls();
6071     alloc_cache();
6072     reset_lists();
6073    
6074     for (i=0;i<TAGSIZE;i+=2) {
6075     cache_tags[i].handler=(cpuop_func *)popall_execute_normal;
6076     cache_tags[i+1].bi=NULL;
6077     }
6078    
6079     #if 0
6080     for (i=0;i<N_REGS;i++) {
6081     empty_ss.nat[i].holds=-1;
6082     empty_ss.nat[i].validsize=0;
6083     empty_ss.nat[i].dirtysize=0;
6084     }
6085     #endif
6086     for (i=0;i<VREGS;i++) {
6087     empty_ss.virt[i]=L_NEEDED;
6088     }
6089     for (i=0;i<N_REGS;i++) {
6090     empty_ss.nat[i]=L_UNKNOWN;
6091     }
6092     default_ss=empty_ss;
6093     }
6094    
6095    
6096     static void flush_icache_none(int n)
6097     {
6098     /* Nothing to do. */
6099     }
6100    
6101     static void flush_icache_hard(int n)
6102     {
6103     uae_u32 i;
6104     blockinfo* bi, *dbi;
6105    
6106     hard_flush_count++;
6107     #if 0
6108     write_log("Flush Icache_hard(%d/%x/%p), %u KB\n",
6109     n,regs.pc,regs.pc_p,current_cache_size/1024);
6110     current_cache_size = 0;
6111     #endif
6112     bi=active;
6113     while(bi) {
6114     cache_tags[cacheline(bi->pc_p)].handler=(cpuop_func *)popall_execute_normal;
6115     cache_tags[cacheline(bi->pc_p)+1].bi=NULL;
6116     dbi=bi; bi=bi->next;
6117     free_blockinfo(dbi);
6118     }
6119     bi=dormant;
6120     while(bi) {
6121     cache_tags[cacheline(bi->pc_p)].handler=(cpuop_func *)popall_execute_normal;
6122     cache_tags[cacheline(bi->pc_p)+1].bi=NULL;
6123     dbi=bi; bi=bi->next;
6124     free_blockinfo(dbi);
6125     }
6126    
6127     reset_lists();
6128     if (!compiled_code)
6129     return;
6130     current_compile_p=compiled_code;
6131     SPCFLAGS_SET( SPCFLAG_JIT_EXEC_RETURN ); /* To get out of compiled code */
6132     }
6133    
6134    
6135     /* "Soft flushing" --- instead of actually throwing everything away,
6136     we simply mark everything as "needs to be checked".
6137     */
6138    
6139     static inline void flush_icache_lazy(int n)
6140     {
6141     uae_u32 i;
6142     blockinfo* bi;
6143     blockinfo* bi2;
6144    
6145     soft_flush_count++;
6146     if (!active)
6147     return;
6148    
6149     bi=active;
6150     while (bi) {
6151     uae_u32 cl=cacheline(bi->pc_p);
6152     if (bi->status==BI_INVALID ||
6153     bi->status==BI_NEED_RECOMP) {
6154     if (bi==cache_tags[cl+1].bi)
6155     cache_tags[cl].handler=(cpuop_func *)popall_execute_normal;
6156     bi->handler_to_use=(cpuop_func *)popall_execute_normal;
6157     set_dhtu(bi,bi->direct_pen);
6158     bi->status=BI_INVALID;
6159     }
6160     else {
6161     if (bi==cache_tags[cl+1].bi)
6162     cache_tags[cl].handler=(cpuop_func *)popall_check_checksum;
6163     bi->handler_to_use=(cpuop_func *)popall_check_checksum;
6164     set_dhtu(bi,bi->direct_pcc);
6165     bi->status=BI_NEED_CHECK;
6166     }
6167     bi2=bi;
6168     bi=bi->next;
6169     }
6170     /* bi2 is now the last entry in the active list */
6171     bi2->next=dormant;
6172     if (dormant)
6173     dormant->prev_p=&(bi2->next);
6174    
6175     dormant=active;
6176     active->prev_p=&dormant;
6177     active=NULL;
6178     }
6179    
6180     static void catastrophe(void)
6181     {
6182     abort();
6183     }
6184    
6185     int failure;
6186    
6187     #define TARGET_M68K 0
6188     #define TARGET_POWERPC 1
6189     #define TARGET_X86 2
6190     #if defined(i386) || defined(__i386__)
6191     #define TARGET_NATIVE TARGET_X86
6192     #endif
6193     #if defined(powerpc) || defined(__powerpc__)
6194     #define TARGET_NATIVE TARGET_POWERPC
6195     #endif
6196    
6197     #ifdef ENABLE_MON
6198     static uae_u32 mon_read_byte_jit(uae_u32 addr)
6199     {
6200     uae_u8 *m = (uae_u8 *)addr;
6201     return (uae_u32)(*m);
6202     }
6203    
6204     static void mon_write_byte_jit(uae_u32 addr, uae_u32 b)
6205     {
6206     uae_u8 *m = (uae_u8 *)addr;
6207     *m = b;
6208     }
6209     #endif
6210    
6211     void disasm_block(int target, uint8 * start, size_t length)
6212     {
6213     if (!JITDebug)
6214     return;
6215    
6216     #if defined(JIT_DEBUG) && defined(ENABLE_MON)
6217     char disasm_str[200];
6218     sprintf(disasm_str, "%s $%x $%x",
6219     target == TARGET_M68K ? "d68" :
6220     target == TARGET_X86 ? "d86" :
6221     target == TARGET_POWERPC ? "d" : "x",
6222     start, start + length - 1);
6223    
6224     uae_u32 (*old_mon_read_byte)(uae_u32) = mon_read_byte;
6225     void (*old_mon_write_byte)(uae_u32, uae_u32) = mon_write_byte;
6226    
6227     mon_read_byte = mon_read_byte_jit;
6228     mon_write_byte = mon_write_byte_jit;
6229    
6230     char *arg[5] = {"mon", "-m", "-r", disasm_str, NULL};
6231     mon(4, arg);
6232    
6233     mon_read_byte = old_mon_read_byte;
6234     mon_write_byte = old_mon_write_byte;
6235     #endif
6236     }
6237    
6238     static inline void disasm_native_block(uint8 *start, size_t length)
6239     {
6240     disasm_block(TARGET_NATIVE, start, length);
6241     }
6242    
6243     static inline void disasm_m68k_block(uint8 *start, size_t length)
6244     {
6245     disasm_block(TARGET_M68K, start, length);
6246     }
6247    
6248     #ifdef HAVE_GET_WORD_UNSWAPPED
6249     # define DO_GET_OPCODE(a) (do_get_mem_word_unswapped((uae_u16 *)(a)))
6250     #else
6251     # define DO_GET_OPCODE(a) (do_get_mem_word((uae_u16 *)(a)))
6252     #endif
6253    
6254     #if JIT_DEBUG
6255     static uae_u8 *last_regs_pc_p = 0;
6256     static uae_u8 *last_compiled_block_addr = 0;
6257    
6258     void compiler_dumpstate(void)
6259     {
6260     if (!JITDebug)
6261     return;
6262    
6263     write_log("### Host addresses\n");
6264     write_log("MEM_BASE : %x\n", MEMBaseDiff);
6265     write_log("PC_P : %p\n", &regs.pc_p);
6266     write_log("SPCFLAGS : %p\n", &regs.spcflags);
6267     write_log("D0-D7 : %p-%p\n", &regs.regs[0], &regs.regs[7]);
6268     write_log("A0-A7 : %p-%p\n", &regs.regs[8], &regs.regs[15]);
6269     write_log("\n");
6270    
6271     write_log("### M68k processor state\n");
6272     m68k_dumpstate(0);
6273     write_log("\n");
6274    
6275     write_log("### Block in Mac address space\n");
6276     write_log("M68K block : %p\n",
6277     (void *)get_virtual_address(last_regs_pc_p));
6278     write_log("Native block : %p (%d bytes)\n",
6279     (void *)get_virtual_address(last_compiled_block_addr),
6280     get_blockinfo_addr(last_regs_pc_p)->direct_handler_size);
6281     write_log("\n");
6282     }
6283     #endif
6284    
6285     static void compile_block(cpu_history* pc_hist, int blocklen)
6286     {
6287     if (letit && compiled_code) {
6288     #if PROFILE_COMPILE_TIME
6289     compile_count++;
6290     clock_t start_time = clock();
6291     #endif
6292     #if JIT_DEBUG
6293     bool disasm_block = false;
6294     #endif
6295    
6296     /* OK, here we need to 'compile' a block */
6297     int i;
6298     int r;
6299     int was_comp=0;
6300     uae_u8 liveflags[MAXRUN+1];
6301 gbeauche 1.8 #if USE_CHECKSUM_INFO
6302     bool trace_in_rom = isinrom((uintptr)pc_hist[0].location);
6303     uae_u32 max_pcp=(uae_u32)pc_hist[blocklen - 1].location;
6304     uae_u32 min_pcp=max_pcp;
6305     #else
6306 gbeauche 1.1 uae_u32 max_pcp=(uae_u32)pc_hist[0].location;
6307     uae_u32 min_pcp=max_pcp;
6308 gbeauche 1.8 #endif
6309 gbeauche 1.1 uae_u32 cl=cacheline(pc_hist[0].location);
6310     void* specflags=(void*)&regs.spcflags;
6311     blockinfo* bi=NULL;
6312     blockinfo* bi2;
6313     int extra_len=0;
6314    
6315     redo_current_block=0;
6316     if (current_compile_p>=max_compile_start)
6317     flush_icache_hard(7);
6318    
6319     alloc_blockinfos();
6320    
6321     bi=get_blockinfo_addr_new(pc_hist[0].location,0);
6322     bi2=get_blockinfo(cl);
6323    
6324     optlev=bi->optlevel;
6325     if (bi->status!=BI_INVALID) {
6326     Dif (bi!=bi2) {
6327     /* I don't think it can happen anymore. Shouldn't, in
6328     any case. So let's make sure... */
6329     write_log("WOOOWOO count=%d, ol=%d %p %p\n",
6330     bi->count,bi->optlevel,bi->handler_to_use,
6331     cache_tags[cl].handler);
6332     abort();
6333     }
6334    
6335     Dif (bi->count!=-1 && bi->status!=BI_NEED_RECOMP) {
6336     write_log("bi->count=%d, bi->status=%d\n",bi->count,bi->status);
6337     /* What the heck? We are not supposed to be here! */
6338     abort();
6339     }
6340     }
6341     if (bi->count==-1) {
6342     optlev++;
6343     while (!optcount[optlev])
6344     optlev++;
6345     bi->count=optcount[optlev]-1;
6346     }
6347     current_block_pc_p=(uae_u32)pc_hist[0].location;
6348    
6349     remove_deps(bi); /* We are about to create new code */
6350     bi->optlevel=optlev;
6351     bi->pc_p=(uae_u8*)pc_hist[0].location;
6352 gbeauche 1.8 #if USE_CHECKSUM_INFO
6353     free_checksum_info_chain(bi->csi);
6354     bi->csi = NULL;
6355     #endif
6356 gbeauche 1.1
6357     liveflags[blocklen]=0x1f; /* All flags needed afterwards */
6358     i=blocklen;
6359     while (i--) {
6360     uae_u16* currpcp=pc_hist[i].location;
6361     uae_u32 op=DO_GET_OPCODE(currpcp);
6362    
6363 gbeauche 1.8 #if USE_CHECKSUM_INFO
6364     trace_in_rom = trace_in_rom && isinrom((uintptr)currpcp);
6365     #if USE_INLINING
6366     if (is_const_jump(op)) {
6367     checksum_info *csi = alloc_checksum_info();
6368     csi->start_p = (uae_u8 *)min_pcp;
6369     csi->length = max_pcp - min_pcp + LONGEST_68K_INST;
6370     csi->next = bi->csi;
6371     bi->csi = csi;
6372     max_pcp = (uae_u32)currpcp;
6373     }
6374     #endif
6375     min_pcp = (uae_u32)currpcp;
6376     #else
6377 gbeauche 1.1 if ((uae_u32)currpcp<min_pcp)
6378     min_pcp=(uae_u32)currpcp;
6379     if ((uae_u32)currpcp>max_pcp)
6380     max_pcp=(uae_u32)currpcp;
6381 gbeauche 1.8 #endif
6382 gbeauche 1.1
6383     liveflags[i]=((liveflags[i+1]&
6384     (~prop[op].set_flags))|
6385     prop[op].use_flags);
6386     if (prop[op].is_addx && (liveflags[i+1]&FLAG_Z)==0)
6387     liveflags[i]&= ~FLAG_Z;
6388     }
6389    
6390 gbeauche 1.8 #if USE_CHECKSUM_INFO
6391     checksum_info *csi = alloc_checksum_info();
6392     csi->start_p = (uae_u8 *)min_pcp;
6393     csi->length = max_pcp - min_pcp + LONGEST_68K_INST;
6394     csi->next = bi->csi;
6395     bi->csi = csi;
6396     #endif
6397    
6398 gbeauche 1.1 bi->needed_flags=liveflags[0];
6399    
6400 gbeauche 1.5 align_target(align_loops);
6401 gbeauche 1.1 was_comp=0;
6402    
6403     bi->direct_handler=(cpuop_func *)get_target();
6404     set_dhtu(bi,bi->direct_handler);
6405     bi->status=BI_COMPILING;
6406     current_block_start_target=(uae_u32)get_target();
6407    
6408     log_startblock();
6409    
6410     if (bi->count>=0) { /* Need to generate countdown code */
6411     raw_mov_l_mi((uae_u32)&regs.pc_p,(uae_u32)pc_hist[0].location);
6412     raw_sub_l_mi((uae_u32)&(bi->count),1);
6413     raw_jl((uae_u32)popall_recompile_block);
6414     }
6415     if (optlev==0) { /* No need to actually translate */
6416     /* Execute normally without keeping stats */
6417     raw_mov_l_mi((uae_u32)&regs.pc_p,(uae_u32)pc_hist[0].location);
6418     raw_jmp((uae_u32)popall_exec_nostats);
6419     }
6420     else {
6421     reg_alloc_run=0;
6422     next_pc_p=0;
6423     taken_pc_p=0;
6424     branch_cc=0;
6425    
6426     comp_pc_p=(uae_u8*)pc_hist[0].location;
6427     init_comp();
6428     was_comp=1;
6429    
6430     #if JIT_DEBUG
6431     if (JITDebug) {
6432     raw_mov_l_mi((uae_u32)&last_regs_pc_p,(uae_u32)pc_hist[0].location);
6433     raw_mov_l_mi((uae_u32)&last_compiled_block_addr,(uae_u32)current_block_start_target);
6434     }
6435     #endif
6436    
6437     for (i=0;i<blocklen &&
6438     get_target_noopt()<max_compile_start;i++) {
6439     cpuop_func **cputbl;
6440     compop_func **comptbl;
6441     uae_u32 opcode=DO_GET_OPCODE(pc_hist[i].location);
6442     needed_flags=(liveflags[i+1] & prop[opcode].set_flags);
6443     if (!needed_flags) {
6444     cputbl=nfcpufunctbl;
6445     comptbl=nfcompfunctbl;
6446     }
6447     else {
6448     cputbl=cpufunctbl;
6449     comptbl=compfunctbl;
6450     }
6451    
6452     failure = 1; // gb-- defaults to failure state
6453     if (comptbl[opcode] && optlev>1) {
6454     failure=0;
6455     if (!was_comp) {
6456     comp_pc_p=(uae_u8*)pc_hist[i].location;
6457     init_comp();
6458     }
6459 gbeauche 1.18 was_comp=1;
6460 gbeauche 1.1
6461     comptbl[opcode](opcode);
6462     freescratch();
6463     if (!(liveflags[i+1] & FLAG_CZNV)) {
6464     /* We can forget about flags */
6465     dont_care_flags();
6466     }
6467     #if INDIVIDUAL_INST
6468     flush(1);
6469     nop();
6470     flush(1);
6471     was_comp=0;
6472     #endif
6473     }
6474    
6475     if (failure) {
6476     if (was_comp) {
6477     flush(1);
6478     was_comp=0;
6479     }
6480     raw_mov_l_ri(REG_PAR1,(uae_u32)opcode);
6481     #if USE_NORMAL_CALLING_CONVENTION
6482     raw_push_l_r(REG_PAR1);
6483     #endif
6484     raw_mov_l_mi((uae_u32)&regs.pc_p,
6485     (uae_u32)pc_hist[i].location);
6486     raw_call((uae_u32)cputbl[opcode]);
6487 gbeauche 1.9 #if PROFILE_UNTRANSLATED_INSNS
6488     // raw_cputbl_count[] is indexed with plain opcode (in m68k order)
6489     raw_add_l_mi((uae_u32)&raw_cputbl_count[cft_map(opcode)],1);
6490     #endif
6491 gbeauche 1.1 #if USE_NORMAL_CALLING_CONVENTION
6492     raw_inc_sp(4);
6493     #endif
6494    
6495     if (i < blocklen - 1) {
6496     uae_s8* branchadd;
6497    
6498     raw_mov_l_rm(0,(uae_u32)specflags);
6499     raw_test_l_rr(0,0);
6500     raw_jz_b_oponly();
6501     branchadd=(uae_s8 *)get_target();
6502     emit_byte(0);
6503     raw_jmp((uae_u32)popall_do_nothing);
6504     *branchadd=(uae_u32)get_target()-(uae_u32)branchadd-1;
6505     }
6506     }
6507     }
6508     #if 1 /* This isn't completely kosher yet; It really needs to be
6509     be integrated into a general inter-block-dependency scheme */
6510     if (next_pc_p && taken_pc_p &&
6511     was_comp && taken_pc_p==current_block_pc_p) {
6512     blockinfo* bi1=get_blockinfo_addr_new((void*)next_pc_p,0);
6513     blockinfo* bi2=get_blockinfo_addr_new((void*)taken_pc_p,0);
6514     uae_u8 x=bi1->needed_flags;
6515    
6516     if (x==0xff || 1) { /* To be on the safe side */
6517     uae_u16* next=(uae_u16*)next_pc_p;
6518     uae_u32 op=DO_GET_OPCODE(next);
6519    
6520     x=0x1f;
6521     x&=(~prop[op].set_flags);
6522     x|=prop[op].use_flags;
6523     }
6524    
6525     x|=bi2->needed_flags;
6526     if (!(x & FLAG_CZNV)) {
6527     /* We can forget about flags */
6528     dont_care_flags();
6529     extra_len+=2; /* The next instruction now is part of this
6530     block */
6531     }
6532    
6533     }
6534     #endif
6535     log_flush();
6536    
6537     if (next_pc_p) { /* A branch was registered */
6538     uae_u32 t1=next_pc_p;
6539     uae_u32 t2=taken_pc_p;
6540     int cc=branch_cc;
6541    
6542     uae_u32* branchadd;
6543     uae_u32* tba;
6544     bigstate tmp;
6545     blockinfo* tbi;
6546    
6547     if (taken_pc_p<next_pc_p) {
6548     /* backward branch. Optimize for the "taken" case ---
6549     which means the raw_jcc should fall through when
6550     the 68k branch is taken. */
6551     t1=taken_pc_p;
6552     t2=next_pc_p;
6553     cc=branch_cc^1;
6554     }
6555    
6556     tmp=live; /* ouch! This is big... */
6557     raw_jcc_l_oponly(cc);
6558     branchadd=(uae_u32*)get_target();
6559     emit_long(0);
6560    
6561     /* predicted outcome */
6562     tbi=get_blockinfo_addr_new((void*)t1,1);
6563     match_states(tbi);
6564     raw_cmp_l_mi((uae_u32)specflags,0);
6565     raw_jcc_l_oponly(4);
6566     tba=(uae_u32*)get_target();
6567     emit_long(get_handler(t1)-((uae_u32)tba+4));
6568     raw_mov_l_mi((uae_u32)&regs.pc_p,t1);
6569     raw_jmp((uae_u32)popall_do_nothing);
6570     create_jmpdep(bi,0,tba,t1);
6571    
6572 gbeauche 1.5 align_target(align_jumps);
6573 gbeauche 1.1 /* not-predicted outcome */
6574     *branchadd=(uae_u32)get_target()-((uae_u32)branchadd+4);
6575     live=tmp; /* Ouch again */
6576     tbi=get_blockinfo_addr_new((void*)t2,1);
6577     match_states(tbi);
6578    
6579     //flush(1); /* Can only get here if was_comp==1 */
6580     raw_cmp_l_mi((uae_u32)specflags,0);
6581     raw_jcc_l_oponly(4);
6582     tba=(uae_u32*)get_target();
6583     emit_long(get_handler(t2)-((uae_u32)tba+4));
6584     raw_mov_l_mi((uae_u32)&regs.pc_p,t2);
6585     raw_jmp((uae_u32)popall_do_nothing);
6586     create_jmpdep(bi,1,tba,t2);
6587     }
6588     else
6589     {
6590     if (was_comp) {
6591     flush(1);
6592     }
6593    
6594     /* Let's find out where next_handler is... */
6595     if (was_comp && isinreg(PC_P)) {
6596     r=live.state[PC_P].realreg;
6597     raw_and_l_ri(r,TAGMASK);
6598     int r2 = (r==0) ? 1 : 0;
6599     raw_mov_l_ri(r2,(uae_u32)popall_do_nothing);
6600     raw_cmp_l_mi((uae_u32)specflags,0);
6601     raw_cmov_l_rm_indexed(r2,(uae_u32)cache_tags,r,4,4);
6602     raw_jmp_r(r2);
6603     }
6604     else if (was_comp && isconst(PC_P)) {
6605     uae_u32 v=live.state[PC_P].val;
6606     uae_u32* tba;
6607     blockinfo* tbi;
6608    
6609     tbi=get_blockinfo_addr_new((void*)v,1);
6610     match_states(tbi);
6611    
6612     raw_cmp_l_mi((uae_u32)specflags,0);
6613     raw_jcc_l_oponly(4);
6614     tba=(uae_u32*)get_target();
6615     emit_long(get_handler(v)-((uae_u32)tba+4));
6616     raw_mov_l_mi((uae_u32)&regs.pc_p,v);
6617     raw_jmp((uae_u32)popall_do_nothing);
6618     create_jmpdep(bi,0,tba,v);
6619     }
6620     else {
6621     r=REG_PC_TMP;
6622     raw_mov_l_rm(r,(uae_u32)&regs.pc_p);
6623     raw_and_l_ri(r,TAGMASK);
6624     int r2 = (r==0) ? 1 : 0;
6625     raw_mov_l_ri(r2,(uae_u32)popall_do_nothing);
6626     raw_cmp_l_mi((uae_u32)specflags,0);
6627     raw_cmov_l_rm_indexed(r2,(uae_u32)cache_tags,r,4,4);
6628     raw_jmp_r(r2);
6629     }
6630     }
6631     }
6632    
6633     #if USE_MATCH
6634     if (callers_need_recompile(&live,&(bi->env))) {
6635     mark_callers_recompile(bi);
6636     }
6637    
6638     big_to_small_state(&live,&(bi->env));
6639     #endif
6640    
6641 gbeauche 1.8 #if USE_CHECKSUM_INFO
6642     remove_from_list(bi);
6643     if (trace_in_rom) {
6644     // No need to checksum that block trace on cache invalidation
6645     free_checksum_info_chain(bi->csi);
6646     bi->csi = NULL;
6647     add_to_dormant(bi);
6648     }
6649     else {
6650     calc_checksum(bi,&(bi->c1),&(bi->c2));
6651     add_to_active(bi);
6652     }
6653     #else
6654 gbeauche 1.1 if (next_pc_p+extra_len>=max_pcp &&
6655     next_pc_p+extra_len<max_pcp+LONGEST_68K_INST)
6656     max_pcp=next_pc_p+extra_len; /* extra_len covers flags magic */
6657     else
6658     max_pcp+=LONGEST_68K_INST;
6659 gbeauche 1.7
6660 gbeauche 1.1 bi->len=max_pcp-min_pcp;
6661     bi->min_pcp=min_pcp;
6662 gbeauche 1.7
6663 gbeauche 1.1 remove_from_list(bi);
6664     if (isinrom(min_pcp) && isinrom(max_pcp)) {
6665     add_to_dormant(bi); /* No need to checksum it on cache flush.
6666     Please don't start changing ROMs in
6667     flight! */
6668     }
6669     else {
6670     calc_checksum(bi,&(bi->c1),&(bi->c2));
6671     add_to_active(bi);
6672     }
6673 gbeauche 1.8 #endif
6674 gbeauche 1.1
6675     current_cache_size += get_target() - (uae_u8 *)current_compile_p;
6676    
6677     #if JIT_DEBUG
6678     if (JITDebug)
6679     bi->direct_handler_size = get_target() - (uae_u8 *)current_block_start_target;
6680    
6681     if (JITDebug && disasm_block) {
6682     uaecptr block_addr = start_pc + ((char *)pc_hist[0].location - (char *)start_pc_p);
6683     D(bug("M68K block @ 0x%08x (%d insns)\n", block_addr, blocklen));
6684     uae_u32 block_size = ((uae_u8 *)pc_hist[blocklen - 1].location - (uae_u8 *)pc_hist[0].location) + 1;
6685     disasm_m68k_block((uae_u8 *)pc_hist[0].location, block_size);
6686     D(bug("Compiled block @ 0x%08x\n", pc_hist[0].location));
6687     disasm_native_block((uae_u8 *)current_block_start_target, bi->direct_handler_size);
6688     getchar();
6689     }
6690     #endif
6691    
6692     log_dump();
6693 gbeauche 1.5 align_target(align_jumps);
6694 gbeauche 1.1
6695     /* This is the non-direct handler */
6696     bi->handler=
6697     bi->handler_to_use=(cpuop_func *)get_target();
6698     raw_cmp_l_mi((uae_u32)&regs.pc_p,(uae_u32)pc_hist[0].location);
6699     raw_jnz((uae_u32)popall_cache_miss);
6700     comp_pc_p=(uae_u8*)pc_hist[0].location;
6701    
6702     bi->status=BI_FINALIZING;
6703     init_comp();
6704     match_states(bi);
6705     flush(1);
6706    
6707     raw_jmp((uae_u32)bi->direct_handler);
6708    
6709     current_compile_p=get_target();
6710     raise_in_cl_list(bi);
6711    
6712     /* We will flush soon, anyway, so let's do it now */
6713     if (current_compile_p>=max_compile_start)
6714     flush_icache_hard(7);
6715    
6716     bi->status=BI_ACTIVE;
6717     if (redo_current_block)
6718     block_need_recompile(bi);
6719    
6720     #if PROFILE_COMPILE_TIME
6721     compile_time += (clock() - start_time);
6722     #endif
6723     }
6724     }
6725    
6726     void do_nothing(void)
6727     {
6728     /* What did you expect this to do? */
6729     }
6730    
6731     void exec_nostats(void)
6732     {
6733     for (;;) {
6734     uae_u32 opcode = GET_OPCODE;
6735     (*cpufunctbl[opcode])(opcode);
6736     if (end_block(opcode) || SPCFLAGS_TEST(SPCFLAG_ALL)) {
6737     return; /* We will deal with the spcflags in the caller */
6738     }
6739     }
6740     }
6741    
6742     void execute_normal(void)
6743     {
6744     if (!check_for_cache_miss()) {
6745     cpu_history pc_hist[MAXRUN];
6746     int blocklen = 0;
6747     #if REAL_ADDRESSING || DIRECT_ADDRESSING
6748     start_pc_p = regs.pc_p;
6749     start_pc = get_virtual_address(regs.pc_p);
6750     #else
6751     start_pc_p = regs.pc_oldp;
6752     start_pc = regs.pc;
6753     #endif
6754     for (;;) { /* Take note: This is the do-it-normal loop */
6755     pc_hist[blocklen++].location = (uae_u16 *)regs.pc_p;
6756     uae_u32 opcode = GET_OPCODE;
6757     #if FLIGHT_RECORDER
6758     m68k_record_step(m68k_getpc());
6759     #endif
6760     (*cpufunctbl[opcode])(opcode);
6761     if (end_block(opcode) || SPCFLAGS_TEST(SPCFLAG_ALL) || blocklen>=MAXRUN) {
6762     compile_block(pc_hist, blocklen);
6763     return; /* We will deal with the spcflags in the caller */
6764     }
6765     /* No need to check regs.spcflags, because if they were set,
6766     we'd have ended up inside that "if" */
6767     }
6768     }
6769     }
6770    
6771     typedef void (*compiled_handler)(void);
6772    
6773 gbeauche 1.6 #ifdef X86_ASSEMBLY
6774     void (*m68k_compile_execute)(void) = NULL;
6775     #else
6776 gbeauche 1.1 void m68k_do_compile_execute(void)
6777     {
6778     for (;;) {
6779     ((compiled_handler)(pushall_call_handler))();
6780     /* Whenever we return from that, we should check spcflags */
6781     if (SPCFLAGS_TEST(SPCFLAG_ALL)) {
6782     if (m68k_do_specialties ())
6783     return;
6784     }
6785     }
6786     }
6787 gbeauche 1.6 #endif