ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/compiler/compemu_support.cpp
Revision: 1.19
Committed: 2003-10-02T09:51:14Z (20 years, 9 months ago) by gbeauche
Branch: MAIN
Changes since 1.18: +1 -0 lines
Log Message:
flags are live after a call to fflags_into_flags_internal()

File Contents

# Content
1 /*
2 * compiler/compemu_support.cpp - Core dynamic translation engine
3 *
4 * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
5 *
6 * Adaptation for Basilisk II and improvements, copyright 2000-2002
7 * Gwenole Beauchesne
8 *
9 * Basilisk II (C) 1997-2002 Christian Bauer
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 */
25
26 #if !REAL_ADDRESSING && !DIRECT_ADDRESSING
27 #error "Only Real or Direct Addressing is supported with the JIT Compiler"
28 #endif
29
30 #if X86_ASSEMBLY && !SAHF_SETO_PROFITABLE
31 #error "Only [LS]AHF scheme to [gs]et flags is supported with the JIT Compiler"
32 #endif
33
34 #define USE_MATCH 0
35
36 /* kludge for Brian, so he can compile under MSVC++ */
37 #define USE_NORMAL_CALLING_CONVENTION 0
38
39 #ifndef WIN32
40 #include <sys/types.h>
41 #include <sys/mman.h>
42 #endif
43
44 #include <stdlib.h>
45 #include <fcntl.h>
46 #include <errno.h>
47
48 #include "sysdeps.h"
49 #include "cpu_emulation.h"
50 #include "main.h"
51 #include "prefs.h"
52 #include "user_strings.h"
53 #include "vm_alloc.h"
54
55 #include "m68k.h"
56 #include "memory.h"
57 #include "readcpu.h"
58 #include "newcpu.h"
59 #include "comptbl.h"
60 #include "compiler/compemu.h"
61 #include "fpu/fpu.h"
62 #include "fpu/flags.h"
63
64 #define DEBUG 1
65 #include "debug.h"
66
67 #ifdef ENABLE_MON
68 #include "mon.h"
69 #endif
70
71 #ifndef WIN32
72 #define PROFILE_COMPILE_TIME 1
73 #define PROFILE_UNTRANSLATED_INSNS 1
74 #endif
75
76 #ifdef WIN32
77 #undef write_log
78 #define write_log dummy_write_log
79 static void dummy_write_log(const char *, ...) { }
80 #endif
81
82 #if JIT_DEBUG
83 #undef abort
84 #define abort() do { \
85 fprintf(stderr, "Abort in file %s at line %d\n", __FILE__, __LINE__); \
86 exit(EXIT_FAILURE); \
87 } while (0)
88 #endif
89
90 #if PROFILE_COMPILE_TIME
91 #include <time.h>
92 static uae_u32 compile_count = 0;
93 static clock_t compile_time = 0;
94 static clock_t emul_start_time = 0;
95 static clock_t emul_end_time = 0;
96 #endif
97
98 #if PROFILE_UNTRANSLATED_INSNS
99 const int untranslated_top_ten = 20;
100 static uae_u32 raw_cputbl_count[65536] = { 0, };
101 static uae_u16 opcode_nums[65536];
102
103 static int untranslated_compfn(const void *e1, const void *e2)
104 {
105 return raw_cputbl_count[*(const uae_u16 *)e1] < raw_cputbl_count[*(const uae_u16 *)e2];
106 }
107 #endif
108
109 compop_func *compfunctbl[65536];
110 compop_func *nfcompfunctbl[65536];
111 cpuop_func *nfcpufunctbl[65536];
112 uae_u8* comp_pc_p;
113
114 // From newcpu.cpp
115 extern bool quit_program;
116
117 // gb-- Extra data for Basilisk II/JIT
118 #if JIT_DEBUG
119 static bool JITDebug = false; // Enable runtime disassemblers through mon?
120 #else
121 const bool JITDebug = false; // Don't use JIT debug mode at all
122 #endif
123
124 const uae_u32 MIN_CACHE_SIZE = 2048; // Minimal translation cache size (2048 KB)
125 static uae_u32 cache_size = 0; // Size of total cache allocated for compiled blocks
126 static uae_u32 current_cache_size = 0; // Cache grows upwards: how much has been consumed already
127 static bool lazy_flush = true; // Flag: lazy translation cache invalidation
128 static bool avoid_fpu = true; // Flag: compile FPU instructions ?
129 static bool have_cmov = false; // target has CMOV instructions ?
130 static bool have_rat_stall = true; // target has partial register stalls ?
131 const bool tune_alignment = true; // Tune code alignments for running CPU ?
132 const bool tune_nop_fillers = true; // Tune no-op fillers for architecture
133 static bool setzflg_uses_bsf = false; // setzflg virtual instruction can use native BSF instruction correctly?
134 static int align_loops = 32; // Align the start of loops
135 static int align_jumps = 32; // Align the start of jumps
136 static int zero_fd = -1;
137 static int optcount[10] = {
138 10, // How often a block has to be executed before it is translated
139 0, // How often to use naive translation
140 0, 0, 0, 0,
141 -1, -1, -1, -1
142 };
143
144 struct op_properties {
145 uae_u8 use_flags;
146 uae_u8 set_flags;
147 uae_u8 is_addx;
148 uae_u8 cflow;
149 };
150 static op_properties prop[65536];
151
152 static inline int end_block(uae_u32 opcode)
153 {
154 return (prop[opcode].cflow & fl_end_block);
155 }
156
157 static inline bool is_const_jump(uae_u32 opcode)
158 {
159 return (prop[opcode].cflow == fl_const_jump);
160 }
161
162 static inline bool may_trap(uae_u32 opcode)
163 {
164 return (prop[opcode].cflow & fl_trap);
165 }
166
167 static inline unsigned int cft_map (unsigned int f)
168 {
169 #ifndef HAVE_GET_WORD_UNSWAPPED
170 return f;
171 #else
172 return ((f >> 8) & 255) | ((f & 255) << 8);
173 #endif
174 }
175
176 uae_u8* start_pc_p;
177 uae_u32 start_pc;
178 uae_u32 current_block_pc_p;
179 uae_u32 current_block_start_target;
180 uae_u32 needed_flags;
181 static uae_u32 next_pc_p;
182 static uae_u32 taken_pc_p;
183 static int branch_cc;
184 static int redo_current_block;
185
186 int segvcount=0;
187 int soft_flush_count=0;
188 int hard_flush_count=0;
189 int checksum_count=0;
190 static uae_u8* current_compile_p=NULL;
191 static uae_u8* max_compile_start;
192 static uae_u8* compiled_code=NULL;
193 static uae_s32 reg_alloc_run;
194
195 void* pushall_call_handler=NULL;
196 static void* popall_do_nothing=NULL;
197 static void* popall_exec_nostats=NULL;
198 static void* popall_execute_normal=NULL;
199 static void* popall_cache_miss=NULL;
200 static void* popall_recompile_block=NULL;
201 static void* popall_check_checksum=NULL;
202
203 /* The 68k only ever executes from even addresses. So right now, we
204 * waste half the entries in this array
205 * UPDATE: We now use those entries to store the start of the linked
206 * lists that we maintain for each hash result.
207 */
208 cacheline cache_tags[TAGSIZE];
209 int letit=0;
210 blockinfo* hold_bi[MAX_HOLD_BI];
211 blockinfo* active;
212 blockinfo* dormant;
213
214 /* 68040 */
215 extern struct cputbl op_smalltbl_0_nf[];
216 extern struct comptbl op_smalltbl_0_comp_nf[];
217 extern struct comptbl op_smalltbl_0_comp_ff[];
218
219 /* 68020 + 68881 */
220 extern struct cputbl op_smalltbl_1_nf[];
221
222 /* 68020 */
223 extern struct cputbl op_smalltbl_2_nf[];
224
225 /* 68010 */
226 extern struct cputbl op_smalltbl_3_nf[];
227
228 /* 68000 */
229 extern struct cputbl op_smalltbl_4_nf[];
230
231 /* 68000 slow but compatible. */
232 extern struct cputbl op_smalltbl_5_nf[];
233
234 static void flush_icache_hard(int n);
235 static void flush_icache_lazy(int n);
236 static void flush_icache_none(int n);
237 void (*flush_icache)(int n) = flush_icache_none;
238
239
240
241 bigstate live;
242 smallstate empty_ss;
243 smallstate default_ss;
244 static int optlev;
245
246 static int writereg(int r, int size);
247 static void unlock2(int r);
248 static void setlock(int r);
249 static int readreg_specific(int r, int size, int spec);
250 static int writereg_specific(int r, int size, int spec);
251 static void prepare_for_call_1(void);
252 static void prepare_for_call_2(void);
253 static void align_target(uae_u32 a);
254
255 static uae_s32 nextused[VREGS];
256
257 uae_u32 m68k_pc_offset;
258
259 /* Some arithmetic ooperations can be optimized away if the operands
260 * are known to be constant. But that's only a good idea when the
261 * side effects they would have on the flags are not important. This
262 * variable indicates whether we need the side effects or not
263 */
264 uae_u32 needflags=0;
265
266 /* Flag handling is complicated.
267 *
268 * x86 instructions create flags, which quite often are exactly what we
269 * want. So at times, the "68k" flags are actually in the x86 flags.
270 *
271 * Then again, sometimes we do x86 instructions that clobber the x86
272 * flags, but don't represent a corresponding m68k instruction. In that
273 * case, we have to save them.
274 *
275 * We used to save them to the stack, but now store them back directly
276 * into the regflags.cznv of the traditional emulation. Thus some odd
277 * names.
278 *
279 * So flags can be in either of two places (used to be three; boy were
280 * things complicated back then!); And either place can contain either
281 * valid flags or invalid trash (and on the stack, there was also the
282 * option of "nothing at all", now gone). A couple of variables keep
283 * track of the respective states.
284 *
285 * To make things worse, we might or might not be interested in the flags.
286 * by default, we are, but a call to dont_care_flags can change that
287 * until the next call to live_flags. If we are not, pretty much whatever
288 * is in the register and/or the native flags is seen as valid.
289 */
290
291 static __inline__ blockinfo* get_blockinfo(uae_u32 cl)
292 {
293 return cache_tags[cl+1].bi;
294 }
295
296 static __inline__ blockinfo* get_blockinfo_addr(void* addr)
297 {
298 blockinfo* bi=get_blockinfo(cacheline(addr));
299
300 while (bi) {
301 if (bi->pc_p==addr)
302 return bi;
303 bi=bi->next_same_cl;
304 }
305 return NULL;
306 }
307
308
309 /*******************************************************************
310 * All sorts of list related functions for all of the lists *
311 *******************************************************************/
312
313 static __inline__ void remove_from_cl_list(blockinfo* bi)
314 {
315 uae_u32 cl=cacheline(bi->pc_p);
316
317 if (bi->prev_same_cl_p)
318 *(bi->prev_same_cl_p)=bi->next_same_cl;
319 if (bi->next_same_cl)
320 bi->next_same_cl->prev_same_cl_p=bi->prev_same_cl_p;
321 if (cache_tags[cl+1].bi)
322 cache_tags[cl].handler=cache_tags[cl+1].bi->handler_to_use;
323 else
324 cache_tags[cl].handler=(cpuop_func *)popall_execute_normal;
325 }
326
327 static __inline__ void remove_from_list(blockinfo* bi)
328 {
329 if (bi->prev_p)
330 *(bi->prev_p)=bi->next;
331 if (bi->next)
332 bi->next->prev_p=bi->prev_p;
333 }
334
335 static __inline__ void remove_from_lists(blockinfo* bi)
336 {
337 remove_from_list(bi);
338 remove_from_cl_list(bi);
339 }
340
341 static __inline__ void add_to_cl_list(blockinfo* bi)
342 {
343 uae_u32 cl=cacheline(bi->pc_p);
344
345 if (cache_tags[cl+1].bi)
346 cache_tags[cl+1].bi->prev_same_cl_p=&(bi->next_same_cl);
347 bi->next_same_cl=cache_tags[cl+1].bi;
348
349 cache_tags[cl+1].bi=bi;
350 bi->prev_same_cl_p=&(cache_tags[cl+1].bi);
351
352 cache_tags[cl].handler=bi->handler_to_use;
353 }
354
355 static __inline__ void raise_in_cl_list(blockinfo* bi)
356 {
357 remove_from_cl_list(bi);
358 add_to_cl_list(bi);
359 }
360
361 static __inline__ void add_to_active(blockinfo* bi)
362 {
363 if (active)
364 active->prev_p=&(bi->next);
365 bi->next=active;
366
367 active=bi;
368 bi->prev_p=&active;
369 }
370
371 static __inline__ void add_to_dormant(blockinfo* bi)
372 {
373 if (dormant)
374 dormant->prev_p=&(bi->next);
375 bi->next=dormant;
376
377 dormant=bi;
378 bi->prev_p=&dormant;
379 }
380
381 static __inline__ void remove_dep(dependency* d)
382 {
383 if (d->prev_p)
384 *(d->prev_p)=d->next;
385 if (d->next)
386 d->next->prev_p=d->prev_p;
387 d->prev_p=NULL;
388 d->next=NULL;
389 }
390
391 /* This block's code is about to be thrown away, so it no longer
392 depends on anything else */
393 static __inline__ void remove_deps(blockinfo* bi)
394 {
395 remove_dep(&(bi->dep[0]));
396 remove_dep(&(bi->dep[1]));
397 }
398
399 static __inline__ void adjust_jmpdep(dependency* d, cpuop_func* a)
400 {
401 *(d->jmp_off)=(uintptr)a-((uintptr)d->jmp_off+4);
402 }
403
404 /********************************************************************
405 * Soft flush handling support functions *
406 ********************************************************************/
407
408 static __inline__ void set_dhtu(blockinfo* bi, cpuop_func* dh)
409 {
410 //write_log("bi is %p\n",bi);
411 if (dh!=bi->direct_handler_to_use) {
412 dependency* x=bi->deplist;
413 //write_log("bi->deplist=%p\n",bi->deplist);
414 while (x) {
415 //write_log("x is %p\n",x);
416 //write_log("x->next is %p\n",x->next);
417 //write_log("x->prev_p is %p\n",x->prev_p);
418
419 if (x->jmp_off) {
420 adjust_jmpdep(x,dh);
421 }
422 x=x->next;
423 }
424 bi->direct_handler_to_use=dh;
425 }
426 }
427
428 static __inline__ void invalidate_block(blockinfo* bi)
429 {
430 int i;
431
432 bi->optlevel=0;
433 bi->count=optcount[0]-1;
434 bi->handler=NULL;
435 bi->handler_to_use=(cpuop_func *)popall_execute_normal;
436 bi->direct_handler=NULL;
437 set_dhtu(bi,bi->direct_pen);
438 bi->needed_flags=0xff;
439 bi->status=BI_INVALID;
440 for (i=0;i<2;i++) {
441 bi->dep[i].jmp_off=NULL;
442 bi->dep[i].target=NULL;
443 }
444 remove_deps(bi);
445 }
446
447 static __inline__ void create_jmpdep(blockinfo* bi, int i, uae_u32* jmpaddr, uae_u32 target)
448 {
449 blockinfo* tbi=get_blockinfo_addr((void*)target);
450
451 Dif(!tbi) {
452 write_log("Could not create jmpdep!\n");
453 abort();
454 }
455 bi->dep[i].jmp_off=jmpaddr;
456 bi->dep[i].source=bi;
457 bi->dep[i].target=tbi;
458 bi->dep[i].next=tbi->deplist;
459 if (bi->dep[i].next)
460 bi->dep[i].next->prev_p=&(bi->dep[i].next);
461 bi->dep[i].prev_p=&(tbi->deplist);
462 tbi->deplist=&(bi->dep[i]);
463 }
464
465 static __inline__ void block_need_recompile(blockinfo * bi)
466 {
467 uae_u32 cl = cacheline(bi->pc_p);
468
469 set_dhtu(bi, bi->direct_pen);
470 bi->direct_handler = bi->direct_pen;
471
472 bi->handler_to_use = (cpuop_func *)popall_execute_normal;
473 bi->handler = (cpuop_func *)popall_execute_normal;
474 if (bi == cache_tags[cl + 1].bi)
475 cache_tags[cl].handler = (cpuop_func *)popall_execute_normal;
476 bi->status = BI_NEED_RECOMP;
477 }
478
479 static __inline__ void mark_callers_recompile(blockinfo * bi)
480 {
481 dependency *x = bi->deplist;
482
483 while (x) {
484 dependency *next = x->next; /* This disappears when we mark for
485 * recompilation and thus remove the
486 * blocks from the lists */
487 if (x->jmp_off) {
488 blockinfo *cbi = x->source;
489
490 Dif(cbi->status == BI_INVALID) {
491 // write_log("invalid block in dependency list\n"); // FIXME?
492 // abort();
493 }
494 if (cbi->status == BI_ACTIVE || cbi->status == BI_NEED_CHECK) {
495 block_need_recompile(cbi);
496 mark_callers_recompile(cbi);
497 }
498 else if (cbi->status == BI_COMPILING) {
499 redo_current_block = 1;
500 }
501 else if (cbi->status == BI_NEED_RECOMP) {
502 /* nothing */
503 }
504 else {
505 //write_log("Status %d in mark_callers\n",cbi->status); // FIXME?
506 }
507 }
508 x = next;
509 }
510 }
511
512 static __inline__ blockinfo* get_blockinfo_addr_new(void* addr, int setstate)
513 {
514 blockinfo* bi=get_blockinfo_addr(addr);
515 int i;
516
517 if (!bi) {
518 for (i=0;i<MAX_HOLD_BI && !bi;i++) {
519 if (hold_bi[i]) {
520 uae_u32 cl=cacheline(addr);
521
522 bi=hold_bi[i];
523 hold_bi[i]=NULL;
524 bi->pc_p=(uae_u8 *)addr;
525 invalidate_block(bi);
526 add_to_active(bi);
527 add_to_cl_list(bi);
528
529 }
530 }
531 }
532 if (!bi) {
533 write_log("Looking for blockinfo, can't find free one\n");
534 abort();
535 }
536 return bi;
537 }
538
539 static void prepare_block(blockinfo* bi);
540
541 /* Managment of blockinfos.
542
543 A blockinfo struct is allocated whenever a new block has to be
544 compiled. If the list of free blockinfos is empty, we allocate a new
545 pool of blockinfos and link the newly created blockinfos altogether
546 into the list of free blockinfos. Otherwise, we simply pop a structure
547 off the free list.
548
549 Blockinfo are lazily deallocated, i.e. chained altogether in the
550 list of free blockinfos whenvever a translation cache flush (hard or
551 soft) request occurs.
552 */
553
554 template< class T >
555 class LazyBlockAllocator
556 {
557 enum {
558 kPoolSize = 1 + 4096 / sizeof(T)
559 };
560 struct Pool {
561 T chunk[kPoolSize];
562 Pool * next;
563 };
564 Pool * mPools;
565 T * mChunks;
566 public:
567 LazyBlockAllocator() : mPools(0), mChunks(0) { }
568 ~LazyBlockAllocator();
569 T * acquire();
570 void release(T * const);
571 };
572
573 template< class T >
574 LazyBlockAllocator<T>::~LazyBlockAllocator()
575 {
576 Pool * currentPool = mPools;
577 while (currentPool) {
578 Pool * deadPool = currentPool;
579 currentPool = currentPool->next;
580 free(deadPool);
581 }
582 }
583
584 template< class T >
585 T * LazyBlockAllocator<T>::acquire()
586 {
587 if (!mChunks) {
588 // There is no chunk left, allocate a new pool and link the
589 // chunks into the free list
590 Pool * newPool = (Pool *)malloc(sizeof(Pool));
591 for (T * chunk = &newPool->chunk[0]; chunk < &newPool->chunk[kPoolSize]; chunk++) {
592 chunk->next = mChunks;
593 mChunks = chunk;
594 }
595 newPool->next = mPools;
596 mPools = newPool;
597 }
598 T * chunk = mChunks;
599 mChunks = chunk->next;
600 return chunk;
601 }
602
603 template< class T >
604 void LazyBlockAllocator<T>::release(T * const chunk)
605 {
606 chunk->next = mChunks;
607 mChunks = chunk;
608 }
609
610 template< class T >
611 class HardBlockAllocator
612 {
613 public:
614 T * acquire() {
615 T * data = (T *)current_compile_p;
616 current_compile_p += sizeof(T);
617 return data;
618 }
619
620 void release(T * const chunk) {
621 // Deallocated on invalidation
622 }
623 };
624
625 #if USE_SEPARATE_BIA
626 static LazyBlockAllocator<blockinfo> BlockInfoAllocator;
627 static LazyBlockAllocator<checksum_info> ChecksumInfoAllocator;
628 #else
629 static HardBlockAllocator<blockinfo> BlockInfoAllocator;
630 static HardBlockAllocator<checksum_info> ChecksumInfoAllocator;
631 #endif
632
633 static __inline__ checksum_info *alloc_checksum_info(void)
634 {
635 checksum_info *csi = ChecksumInfoAllocator.acquire();
636 csi->next = NULL;
637 return csi;
638 }
639
640 static __inline__ void free_checksum_info(checksum_info *csi)
641 {
642 csi->next = NULL;
643 ChecksumInfoAllocator.release(csi);
644 }
645
646 static __inline__ void free_checksum_info_chain(checksum_info *csi)
647 {
648 while (csi != NULL) {
649 checksum_info *csi2 = csi->next;
650 free_checksum_info(csi);
651 csi = csi2;
652 }
653 }
654
655 static __inline__ blockinfo *alloc_blockinfo(void)
656 {
657 blockinfo *bi = BlockInfoAllocator.acquire();
658 #if USE_CHECKSUM_INFO
659 bi->csi = NULL;
660 #endif
661 return bi;
662 }
663
664 static __inline__ void free_blockinfo(blockinfo *bi)
665 {
666 #if USE_CHECKSUM_INFO
667 free_checksum_info_chain(bi->csi);
668 bi->csi = NULL;
669 #endif
670 BlockInfoAllocator.release(bi);
671 }
672
673 static __inline__ void alloc_blockinfos(void)
674 {
675 int i;
676 blockinfo* bi;
677
678 for (i=0;i<MAX_HOLD_BI;i++) {
679 if (hold_bi[i])
680 return;
681 bi=hold_bi[i]=alloc_blockinfo();
682 prepare_block(bi);
683 }
684 }
685
686 /********************************************************************
687 * Functions to emit data into memory, and other general support *
688 ********************************************************************/
689
690 static uae_u8* target;
691
692 static void emit_init(void)
693 {
694 }
695
696 static __inline__ void emit_byte(uae_u8 x)
697 {
698 *target++=x;
699 }
700
701 static __inline__ void emit_word(uae_u16 x)
702 {
703 *((uae_u16*)target)=x;
704 target+=2;
705 }
706
707 static __inline__ void emit_long(uae_u32 x)
708 {
709 *((uae_u32*)target)=x;
710 target+=4;
711 }
712
713 static __inline__ void emit_block(const uae_u8 *block, uae_u32 blocklen)
714 {
715 memcpy((uae_u8 *)target,block,blocklen);
716 target+=blocklen;
717 }
718
719 static __inline__ uae_u32 reverse32(uae_u32 v)
720 {
721 #if 1
722 // gb-- We have specialized byteswapping functions, just use them
723 return do_byteswap_32(v);
724 #else
725 return ((v>>24)&0xff) | ((v>>8)&0xff00) | ((v<<8)&0xff0000) | ((v<<24)&0xff000000);
726 #endif
727 }
728
729 /********************************************************************
730 * Getting the information about the target CPU *
731 ********************************************************************/
732
733 #include "codegen_x86.cpp"
734
735 void set_target(uae_u8* t)
736 {
737 target=t;
738 }
739
740 static __inline__ uae_u8* get_target_noopt(void)
741 {
742 return target;
743 }
744
745 __inline__ uae_u8* get_target(void)
746 {
747 return get_target_noopt();
748 }
749
750
751 /********************************************************************
752 * Flags status handling. EMIT TIME! *
753 ********************************************************************/
754
755 static void bt_l_ri_noclobber(R4 r, IMM i);
756
757 static void make_flags_live_internal(void)
758 {
759 if (live.flags_in_flags==VALID)
760 return;
761 Dif (live.flags_on_stack==TRASH) {
762 write_log("Want flags, got something on stack, but it is TRASH\n");
763 abort();
764 }
765 if (live.flags_on_stack==VALID) {
766 int tmp;
767 tmp=readreg_specific(FLAGTMP,4,FLAG_NREG2);
768 raw_reg_to_flags(tmp);
769 unlock2(tmp);
770
771 live.flags_in_flags=VALID;
772 return;
773 }
774 write_log("Huh? live.flags_in_flags=%d, live.flags_on_stack=%d, but need to make live\n",
775 live.flags_in_flags,live.flags_on_stack);
776 abort();
777 }
778
779 static void flags_to_stack(void)
780 {
781 if (live.flags_on_stack==VALID)
782 return;
783 if (!live.flags_are_important) {
784 live.flags_on_stack=VALID;
785 return;
786 }
787 Dif (live.flags_in_flags!=VALID)
788 abort();
789 else {
790 int tmp;
791 tmp=writereg_specific(FLAGTMP,4,FLAG_NREG1);
792 raw_flags_to_reg(tmp);
793 unlock2(tmp);
794 }
795 live.flags_on_stack=VALID;
796 }
797
798 static __inline__ void clobber_flags(void)
799 {
800 if (live.flags_in_flags==VALID && live.flags_on_stack!=VALID)
801 flags_to_stack();
802 live.flags_in_flags=TRASH;
803 }
804
805 /* Prepare for leaving the compiled stuff */
806 static __inline__ void flush_flags(void)
807 {
808 flags_to_stack();
809 return;
810 }
811
812 int touchcnt;
813
814 /********************************************************************
815 * Partial register flushing for optimized calls *
816 ********************************************************************/
817
818 struct regusage {
819 uae_u16 rmask;
820 uae_u16 wmask;
821 };
822
823 static inline void ru_set(uae_u16 *mask, int reg)
824 {
825 #if USE_OPTIMIZED_CALLS
826 *mask |= 1 << reg;
827 #endif
828 }
829
830 static inline bool ru_get(const uae_u16 *mask, int reg)
831 {
832 #if USE_OPTIMIZED_CALLS
833 return (*mask & (1 << reg));
834 #else
835 /* Default: instruction reads & write to register */
836 return true;
837 #endif
838 }
839
840 static inline void ru_set_read(regusage *ru, int reg)
841 {
842 ru_set(&ru->rmask, reg);
843 }
844
845 static inline void ru_set_write(regusage *ru, int reg)
846 {
847 ru_set(&ru->wmask, reg);
848 }
849
850 static inline bool ru_read_p(const regusage *ru, int reg)
851 {
852 return ru_get(&ru->rmask, reg);
853 }
854
855 static inline bool ru_write_p(const regusage *ru, int reg)
856 {
857 return ru_get(&ru->wmask, reg);
858 }
859
860 static void ru_fill_ea(regusage *ru, int reg, amodes mode,
861 wordsizes size, int write_mode)
862 {
863 switch (mode) {
864 case Areg:
865 reg += 8;
866 /* fall through */
867 case Dreg:
868 ru_set(write_mode ? &ru->wmask : &ru->rmask, reg);
869 break;
870 case Ad16:
871 /* skip displacment */
872 m68k_pc_offset += 2;
873 case Aind:
874 case Aipi:
875 case Apdi:
876 ru_set_read(ru, reg+8);
877 break;
878 case Ad8r:
879 ru_set_read(ru, reg+8);
880 /* fall through */
881 case PC8r: {
882 uae_u16 dp = comp_get_iword((m68k_pc_offset+=2)-2);
883 reg = (dp >> 12) & 15;
884 ru_set_read(ru, reg);
885 if (dp & 0x100)
886 m68k_pc_offset += (((dp & 0x30) >> 3) & 7) + ((dp & 3) * 2);
887 break;
888 }
889 case PC16:
890 case absw:
891 case imm0:
892 case imm1:
893 m68k_pc_offset += 2;
894 break;
895 case absl:
896 case imm2:
897 m68k_pc_offset += 4;
898 break;
899 case immi:
900 m68k_pc_offset += (size == sz_long) ? 4 : 2;
901 break;
902 }
903 }
904
905 /* TODO: split into a static initialization part and a dynamic one
906 (instructions depending on extension words) */
907 static void ru_fill(regusage *ru, uae_u32 opcode)
908 {
909 m68k_pc_offset += 2;
910
911 /* Default: no register is used or written to */
912 ru->rmask = 0;
913 ru->wmask = 0;
914
915 uae_u32 real_opcode = cft_map(opcode);
916 struct instr *dp = &table68k[real_opcode];
917
918 bool rw_dest = true;
919 bool handled = false;
920
921 /* Handle some instructions specifically */
922 uae_u16 reg, ext;
923 switch (dp->mnemo) {
924 case i_BFCHG:
925 case i_BFCLR:
926 case i_BFEXTS:
927 case i_BFEXTU:
928 case i_BFFFO:
929 case i_BFINS:
930 case i_BFSET:
931 case i_BFTST:
932 ext = comp_get_iword((m68k_pc_offset+=2)-2);
933 if (ext & 0x800) ru_set_read(ru, (ext >> 6) & 7);
934 if (ext & 0x020) ru_set_read(ru, ext & 7);
935 ru_fill_ea(ru, dp->dreg, (amodes)dp->dmode, (wordsizes)dp->size, 1);
936 if (dp->dmode == Dreg)
937 ru_set_read(ru, dp->dreg);
938 switch (dp->mnemo) {
939 case i_BFEXTS:
940 case i_BFEXTU:
941 case i_BFFFO:
942 ru_set_write(ru, (ext >> 12) & 7);
943 break;
944 case i_BFINS:
945 ru_set_read(ru, (ext >> 12) & 7);
946 /* fall through */
947 case i_BFCHG:
948 case i_BFCLR:
949 case i_BSET:
950 if (dp->dmode == Dreg)
951 ru_set_write(ru, dp->dreg);
952 break;
953 }
954 handled = true;
955 rw_dest = false;
956 break;
957
958 case i_BTST:
959 rw_dest = false;
960 break;
961
962 case i_CAS:
963 {
964 ext = comp_get_iword((m68k_pc_offset+=2)-2);
965 int Du = ext & 7;
966 ru_set_read(ru, Du);
967 int Dc = (ext >> 6) & 7;
968 ru_set_read(ru, Dc);
969 ru_set_write(ru, Dc);
970 break;
971 }
972 case i_CAS2:
973 {
974 int Dc1, Dc2, Du1, Du2, Rn1, Rn2;
975 ext = comp_get_iword((m68k_pc_offset+=2)-2);
976 Rn1 = (ext >> 12) & 15;
977 Du1 = (ext >> 6) & 7;
978 Dc1 = ext & 7;
979 ru_set_read(ru, Rn1);
980 ru_set_read(ru, Du1);
981 ru_set_read(ru, Dc1);
982 ru_set_write(ru, Dc1);
983 ext = comp_get_iword((m68k_pc_offset+=2)-2);
984 Rn2 = (ext >> 12) & 15;
985 Du2 = (ext >> 6) & 7;
986 Dc2 = ext & 7;
987 ru_set_read(ru, Rn2);
988 ru_set_read(ru, Du2);
989 ru_set_write(ru, Dc2);
990 break;
991 }
992 case i_DIVL: case i_MULL:
993 m68k_pc_offset += 2;
994 break;
995 case i_LEA:
996 case i_MOVE: case i_MOVEA: case i_MOVE16:
997 rw_dest = false;
998 break;
999 case i_PACK: case i_UNPK:
1000 rw_dest = false;
1001 m68k_pc_offset += 2;
1002 break;
1003 case i_TRAPcc:
1004 m68k_pc_offset += (dp->size == sz_long) ? 4 : 2;
1005 break;
1006 case i_RTR:
1007 /* do nothing, just for coverage debugging */
1008 break;
1009 /* TODO: handle EXG instruction */
1010 }
1011
1012 /* Handle A-Traps better */
1013 if ((real_opcode & 0xf000) == 0xa000) {
1014 handled = true;
1015 }
1016
1017 /* Handle EmulOps better */
1018 if ((real_opcode & 0xff00) == 0x7100) {
1019 handled = true;
1020 ru->rmask = 0xffff;
1021 ru->wmask = 0;
1022 }
1023
1024 if (dp->suse && !handled)
1025 ru_fill_ea(ru, dp->sreg, (amodes)dp->smode, (wordsizes)dp->size, 0);
1026
1027 if (dp->duse && !handled)
1028 ru_fill_ea(ru, dp->dreg, (amodes)dp->dmode, (wordsizes)dp->size, 1);
1029
1030 if (rw_dest)
1031 ru->rmask |= ru->wmask;
1032
1033 handled = handled || dp->suse || dp->duse;
1034
1035 /* Mark all registers as used/written if the instruction may trap */
1036 if (may_trap(opcode)) {
1037 handled = true;
1038 ru->rmask = 0xffff;
1039 ru->wmask = 0xffff;
1040 }
1041
1042 if (!handled) {
1043 write_log("ru_fill: %04x = { %04x, %04x }\n",
1044 real_opcode, ru->rmask, ru->wmask);
1045 abort();
1046 }
1047 }
1048
1049 /********************************************************************
1050 * register allocation per block logging *
1051 ********************************************************************/
1052
1053 static uae_s8 vstate[VREGS];
1054 static uae_s8 vwritten[VREGS];
1055 static uae_s8 nstate[N_REGS];
1056
1057 #define L_UNKNOWN -127
1058 #define L_UNAVAIL -1
1059 #define L_NEEDED -2
1060 #define L_UNNEEDED -3
1061
1062 static __inline__ void big_to_small_state(bigstate * b, smallstate * s)
1063 {
1064 int i;
1065
1066 for (i = 0; i < VREGS; i++)
1067 s->virt[i] = vstate[i];
1068 for (i = 0; i < N_REGS; i++)
1069 s->nat[i] = nstate[i];
1070 }
1071
1072 static __inline__ int callers_need_recompile(bigstate * b, smallstate * s)
1073 {
1074 int i;
1075 int reverse = 0;
1076
1077 for (i = 0; i < VREGS; i++) {
1078 if (vstate[i] != L_UNNEEDED && s->virt[i] == L_UNNEEDED)
1079 return 1;
1080 if (vstate[i] == L_UNNEEDED && s->virt[i] != L_UNNEEDED)
1081 reverse++;
1082 }
1083 for (i = 0; i < N_REGS; i++) {
1084 if (nstate[i] >= 0 && nstate[i] != s->nat[i])
1085 return 1;
1086 if (nstate[i] < 0 && s->nat[i] >= 0)
1087 reverse++;
1088 }
1089 if (reverse >= 2 && USE_MATCH)
1090 return 1; /* In this case, it might be worth recompiling the
1091 * callers */
1092 return 0;
1093 }
1094
1095 static __inline__ void log_startblock(void)
1096 {
1097 int i;
1098
1099 for (i = 0; i < VREGS; i++) {
1100 vstate[i] = L_UNKNOWN;
1101 vwritten[i] = 0;
1102 }
1103 for (i = 0; i < N_REGS; i++)
1104 nstate[i] = L_UNKNOWN;
1105 }
1106
1107 /* Using an n-reg for a temp variable */
1108 static __inline__ void log_isused(int n)
1109 {
1110 if (nstate[n] == L_UNKNOWN)
1111 nstate[n] = L_UNAVAIL;
1112 }
1113
1114 static __inline__ void log_visused(int r)
1115 {
1116 if (vstate[r] == L_UNKNOWN)
1117 vstate[r] = L_NEEDED;
1118 }
1119
1120 static __inline__ void do_load_reg(int n, int r)
1121 {
1122 if (r == FLAGTMP)
1123 raw_load_flagreg(n, r);
1124 else if (r == FLAGX)
1125 raw_load_flagx(n, r);
1126 else
1127 raw_mov_l_rm(n, (uae_u32) live.state[r].mem);
1128 }
1129
1130 static __inline__ void check_load_reg(int n, int r)
1131 {
1132 raw_mov_l_rm(n, (uae_u32) live.state[r].mem);
1133 }
1134
1135 static __inline__ void log_vwrite(int r)
1136 {
1137 vwritten[r] = 1;
1138 }
1139
1140 /* Using an n-reg to hold a v-reg */
1141 static __inline__ void log_isreg(int n, int r)
1142 {
1143 static int count = 0;
1144
1145 if (nstate[n] == L_UNKNOWN && r < 16 && !vwritten[r] && USE_MATCH)
1146 nstate[n] = r;
1147 else {
1148 do_load_reg(n, r);
1149 if (nstate[n] == L_UNKNOWN)
1150 nstate[n] = L_UNAVAIL;
1151 }
1152 if (vstate[r] == L_UNKNOWN)
1153 vstate[r] = L_NEEDED;
1154 }
1155
1156 static __inline__ void log_clobberreg(int r)
1157 {
1158 if (vstate[r] == L_UNKNOWN)
1159 vstate[r] = L_UNNEEDED;
1160 }
1161
1162 /* This ends all possibility of clever register allocation */
1163
1164 static __inline__ void log_flush(void)
1165 {
1166 int i;
1167
1168 for (i = 0; i < VREGS; i++)
1169 if (vstate[i] == L_UNKNOWN)
1170 vstate[i] = L_NEEDED;
1171 for (i = 0; i < N_REGS; i++)
1172 if (nstate[i] == L_UNKNOWN)
1173 nstate[i] = L_UNAVAIL;
1174 }
1175
1176 static __inline__ void log_dump(void)
1177 {
1178 int i;
1179
1180 return;
1181
1182 write_log("----------------------\n");
1183 for (i = 0; i < N_REGS; i++) {
1184 switch (nstate[i]) {
1185 case L_UNKNOWN:
1186 write_log("Nat %d : UNKNOWN\n", i);
1187 break;
1188 case L_UNAVAIL:
1189 write_log("Nat %d : UNAVAIL\n", i);
1190 break;
1191 default:
1192 write_log("Nat %d : %d\n", i, nstate[i]);
1193 break;
1194 }
1195 }
1196 for (i = 0; i < VREGS; i++) {
1197 if (vstate[i] == L_UNNEEDED)
1198 write_log("Virt %d: UNNEEDED\n", i);
1199 }
1200 }
1201
1202 /********************************************************************
1203 * register status handling. EMIT TIME! *
1204 ********************************************************************/
1205
1206 static __inline__ void set_status(int r, int status)
1207 {
1208 if (status == ISCONST)
1209 log_clobberreg(r);
1210 live.state[r].status=status;
1211 }
1212
1213 static __inline__ int isinreg(int r)
1214 {
1215 return live.state[r].status==CLEAN || live.state[r].status==DIRTY;
1216 }
1217
1218 static __inline__ void adjust_nreg(int r, uae_u32 val)
1219 {
1220 if (!val)
1221 return;
1222 raw_lea_l_brr(r,r,val);
1223 }
1224
1225 static void tomem(int r)
1226 {
1227 int rr=live.state[r].realreg;
1228
1229 if (isinreg(r)) {
1230 if (live.state[r].val && live.nat[rr].nholds==1
1231 && !live.nat[rr].locked) {
1232 // write_log("RemovingA offset %x from reg %d (%d) at %p\n",
1233 // live.state[r].val,r,rr,target);
1234 adjust_nreg(rr,live.state[r].val);
1235 live.state[r].val=0;
1236 live.state[r].dirtysize=4;
1237 set_status(r,DIRTY);
1238 }
1239 }
1240
1241 if (live.state[r].status==DIRTY) {
1242 switch (live.state[r].dirtysize) {
1243 case 1: raw_mov_b_mr((uae_u32)live.state[r].mem,rr); break;
1244 case 2: raw_mov_w_mr((uae_u32)live.state[r].mem,rr); break;
1245 case 4: raw_mov_l_mr((uae_u32)live.state[r].mem,rr); break;
1246 default: abort();
1247 }
1248 log_vwrite(r);
1249 set_status(r,CLEAN);
1250 live.state[r].dirtysize=0;
1251 }
1252 }
1253
1254 static __inline__ int isconst(int r)
1255 {
1256 return live.state[r].status==ISCONST;
1257 }
1258
1259 int is_const(int r)
1260 {
1261 return isconst(r);
1262 }
1263
1264 static __inline__ void writeback_const(int r)
1265 {
1266 if (!isconst(r))
1267 return;
1268 Dif (live.state[r].needflush==NF_HANDLER) {
1269 write_log("Trying to write back constant NF_HANDLER!\n");
1270 abort();
1271 }
1272
1273 raw_mov_l_mi((uae_u32)live.state[r].mem,live.state[r].val);
1274 log_vwrite(r);
1275 live.state[r].val=0;
1276 set_status(r,INMEM);
1277 }
1278
1279 static __inline__ void tomem_c(int r)
1280 {
1281 if (isconst(r)) {
1282 writeback_const(r);
1283 }
1284 else
1285 tomem(r);
1286 }
1287
1288 static void evict(int r)
1289 {
1290 int rr;
1291
1292 if (!isinreg(r))
1293 return;
1294 tomem(r);
1295 rr=live.state[r].realreg;
1296
1297 Dif (live.nat[rr].locked &&
1298 live.nat[rr].nholds==1) {
1299 write_log("register %d in nreg %d is locked!\n",r,live.state[r].realreg);
1300 abort();
1301 }
1302
1303 live.nat[rr].nholds--;
1304 if (live.nat[rr].nholds!=live.state[r].realind) { /* Was not last */
1305 int topreg=live.nat[rr].holds[live.nat[rr].nholds];
1306 int thisind=live.state[r].realind;
1307
1308 live.nat[rr].holds[thisind]=topreg;
1309 live.state[topreg].realind=thisind;
1310 }
1311 live.state[r].realreg=-1;
1312 set_status(r,INMEM);
1313 }
1314
1315 static __inline__ void free_nreg(int r)
1316 {
1317 int i=live.nat[r].nholds;
1318
1319 while (i) {
1320 int vr;
1321
1322 --i;
1323 vr=live.nat[r].holds[i];
1324 evict(vr);
1325 }
1326 Dif (live.nat[r].nholds!=0) {
1327 write_log("Failed to free nreg %d, nholds is %d\n",r,live.nat[r].nholds);
1328 abort();
1329 }
1330 }
1331
1332 /* Use with care! */
1333 static __inline__ void isclean(int r)
1334 {
1335 if (!isinreg(r))
1336 return;
1337 live.state[r].validsize=4;
1338 live.state[r].dirtysize=0;
1339 live.state[r].val=0;
1340 set_status(r,CLEAN);
1341 }
1342
1343 static __inline__ void disassociate(int r)
1344 {
1345 isclean(r);
1346 evict(r);
1347 }
1348
1349 static __inline__ void set_const(int r, uae_u32 val)
1350 {
1351 disassociate(r);
1352 live.state[r].val=val;
1353 set_status(r,ISCONST);
1354 }
1355
1356 static __inline__ uae_u32 get_offset(int r)
1357 {
1358 return live.state[r].val;
1359 }
1360
1361 static int alloc_reg_hinted(int r, int size, int willclobber, int hint)
1362 {
1363 int bestreg;
1364 uae_s32 when;
1365 int i;
1366 uae_s32 badness=0; /* to shut up gcc */
1367 bestreg=-1;
1368 when=2000000000;
1369
1370 for (i=N_REGS;i--;) {
1371 badness=live.nat[i].touched;
1372 if (live.nat[i].nholds==0)
1373 badness=0;
1374 if (i==hint)
1375 badness-=200000000;
1376 if (!live.nat[i].locked && badness<when) {
1377 if ((size==1 && live.nat[i].canbyte) ||
1378 (size==2 && live.nat[i].canword) ||
1379 (size==4)) {
1380 bestreg=i;
1381 when=badness;
1382 if (live.nat[i].nholds==0 && hint<0)
1383 break;
1384 if (i==hint)
1385 break;
1386 }
1387 }
1388 }
1389 Dif (bestreg==-1)
1390 abort();
1391
1392 if (live.nat[bestreg].nholds>0) {
1393 free_nreg(bestreg);
1394 }
1395 if (isinreg(r)) {
1396 int rr=live.state[r].realreg;
1397 /* This will happen if we read a partially dirty register at a
1398 bigger size */
1399 Dif (willclobber || live.state[r].validsize>=size)
1400 abort();
1401 Dif (live.nat[rr].nholds!=1)
1402 abort();
1403 if (size==4 && live.state[r].validsize==2) {
1404 log_isused(bestreg);
1405 log_visused(r);
1406 raw_mov_l_rm(bestreg,(uae_u32)live.state[r].mem);
1407 raw_bswap_32(bestreg);
1408 raw_zero_extend_16_rr(rr,rr);
1409 raw_zero_extend_16_rr(bestreg,bestreg);
1410 raw_bswap_32(bestreg);
1411 raw_lea_l_brr_indexed(rr,rr,bestreg,1,0);
1412 live.state[r].validsize=4;
1413 live.nat[rr].touched=touchcnt++;
1414 return rr;
1415 }
1416 if (live.state[r].validsize==1) {
1417 /* Nothing yet */
1418 }
1419 evict(r);
1420 }
1421
1422 if (!willclobber) {
1423 if (live.state[r].status!=UNDEF) {
1424 if (isconst(r)) {
1425 raw_mov_l_ri(bestreg,live.state[r].val);
1426 live.state[r].val=0;
1427 live.state[r].dirtysize=4;
1428 set_status(r,DIRTY);
1429 log_isused(bestreg);
1430 }
1431 else {
1432 log_isreg(bestreg, r); /* This will also load it! */
1433 live.state[r].dirtysize=0;
1434 set_status(r,CLEAN);
1435 }
1436 }
1437 else {
1438 live.state[r].val=0;
1439 live.state[r].dirtysize=0;
1440 set_status(r,CLEAN);
1441 log_isused(bestreg);
1442 }
1443 live.state[r].validsize=4;
1444 }
1445 else { /* this is the easiest way, but not optimal. FIXME! */
1446 /* Now it's trickier, but hopefully still OK */
1447 if (!isconst(r) || size==4) {
1448 live.state[r].validsize=size;
1449 live.state[r].dirtysize=size;
1450 live.state[r].val=0;
1451 set_status(r,DIRTY);
1452 if (size == 4) {
1453 log_clobberreg(r);
1454 log_isused(bestreg);
1455 }
1456 else {
1457 log_visused(r);
1458 log_isused(bestreg);
1459 }
1460 }
1461 else {
1462 if (live.state[r].status!=UNDEF)
1463 raw_mov_l_ri(bestreg,live.state[r].val);
1464 live.state[r].val=0;
1465 live.state[r].validsize=4;
1466 live.state[r].dirtysize=4;
1467 set_status(r,DIRTY);
1468 log_isused(bestreg);
1469 }
1470 }
1471 live.state[r].realreg=bestreg;
1472 live.state[r].realind=live.nat[bestreg].nholds;
1473 live.nat[bestreg].touched=touchcnt++;
1474 live.nat[bestreg].holds[live.nat[bestreg].nholds]=r;
1475 live.nat[bestreg].nholds++;
1476
1477 return bestreg;
1478 }
1479
1480 static int alloc_reg(int r, int size, int willclobber)
1481 {
1482 return alloc_reg_hinted(r,size,willclobber,-1);
1483 }
1484
1485 static void unlock2(int r)
1486 {
1487 Dif (!live.nat[r].locked)
1488 abort();
1489 live.nat[r].locked--;
1490 }
1491
1492 static void setlock(int r)
1493 {
1494 live.nat[r].locked++;
1495 }
1496
1497
1498 static void mov_nregs(int d, int s)
1499 {
1500 int ns=live.nat[s].nholds;
1501 int nd=live.nat[d].nholds;
1502 int i;
1503
1504 if (s==d)
1505 return;
1506
1507 if (nd>0)
1508 free_nreg(d);
1509
1510 log_isused(d);
1511 raw_mov_l_rr(d,s);
1512
1513 for (i=0;i<live.nat[s].nholds;i++) {
1514 int vs=live.nat[s].holds[i];
1515
1516 live.state[vs].realreg=d;
1517 live.state[vs].realind=i;
1518 live.nat[d].holds[i]=vs;
1519 }
1520 live.nat[d].nholds=live.nat[s].nholds;
1521
1522 live.nat[s].nholds=0;
1523 }
1524
1525
1526 static __inline__ void make_exclusive(int r, int size, int spec)
1527 {
1528 int clobber;
1529 reg_status oldstate;
1530 int rr=live.state[r].realreg;
1531 int nr;
1532 int nind;
1533 int ndirt=0;
1534 int i;
1535
1536 if (!isinreg(r))
1537 return;
1538 if (live.nat[rr].nholds==1)
1539 return;
1540 for (i=0;i<live.nat[rr].nholds;i++) {
1541 int vr=live.nat[rr].holds[i];
1542 if (vr!=r &&
1543 (live.state[vr].status==DIRTY || live.state[vr].val))
1544 ndirt++;
1545 }
1546 if (!ndirt && size<live.state[r].validsize && !live.nat[rr].locked) {
1547 /* Everything else is clean, so let's keep this register */
1548 for (i=0;i<live.nat[rr].nholds;i++) {
1549 int vr=live.nat[rr].holds[i];
1550 if (vr!=r) {
1551 evict(vr);
1552 i--; /* Try that index again! */
1553 }
1554 }
1555 Dif (live.nat[rr].nholds!=1) {
1556 write_log("natreg %d holds %d vregs, %d not exclusive\n",
1557 rr,live.nat[rr].nholds,r);
1558 abort();
1559 }
1560 return;
1561 }
1562
1563 /* We have to split the register */
1564 oldstate=live.state[r];
1565
1566 setlock(rr); /* Make sure this doesn't go away */
1567 /* Forget about r being in the register rr */
1568 disassociate(r);
1569 /* Get a new register, that we will clobber completely */
1570 if (oldstate.status==DIRTY) {
1571 /* If dirtysize is <4, we need a register that can handle the
1572 eventual smaller memory store! Thanks to Quake68k for exposing
1573 this detail ;-) */
1574 nr=alloc_reg_hinted(r,oldstate.dirtysize,1,spec);
1575 }
1576 else {
1577 nr=alloc_reg_hinted(r,4,1,spec);
1578 }
1579 nind=live.state[r].realind;
1580 live.state[r]=oldstate; /* Keep all the old state info */
1581 live.state[r].realreg=nr;
1582 live.state[r].realind=nind;
1583
1584 if (size<live.state[r].validsize) {
1585 if (live.state[r].val) {
1586 /* Might as well compensate for the offset now */
1587 raw_lea_l_brr(nr,rr,oldstate.val);
1588 live.state[r].val=0;
1589 live.state[r].dirtysize=4;
1590 set_status(r,DIRTY);
1591 }
1592 else
1593 raw_mov_l_rr(nr,rr); /* Make another copy */
1594 }
1595 unlock2(rr);
1596 }
1597
1598 static __inline__ void add_offset(int r, uae_u32 off)
1599 {
1600 live.state[r].val+=off;
1601 }
1602
1603 static __inline__ void remove_offset(int r, int spec)
1604 {
1605 reg_status oldstate;
1606 int rr;
1607
1608 if (isconst(r))
1609 return;
1610 if (live.state[r].val==0)
1611 return;
1612 if (isinreg(r) && live.state[r].validsize<4)
1613 evict(r);
1614
1615 if (!isinreg(r))
1616 alloc_reg_hinted(r,4,0,spec);
1617
1618 Dif (live.state[r].validsize!=4) {
1619 write_log("Validsize=%d in remove_offset\n",live.state[r].validsize);
1620 abort();
1621 }
1622 make_exclusive(r,0,-1);
1623 /* make_exclusive might have done the job already */
1624 if (live.state[r].val==0)
1625 return;
1626
1627 rr=live.state[r].realreg;
1628
1629 if (live.nat[rr].nholds==1) {
1630 //write_log("RemovingB offset %x from reg %d (%d) at %p\n",
1631 // live.state[r].val,r,rr,target);
1632 adjust_nreg(rr,live.state[r].val);
1633 live.state[r].dirtysize=4;
1634 live.state[r].val=0;
1635 set_status(r,DIRTY);
1636 return;
1637 }
1638 write_log("Failed in remove_offset\n");
1639 abort();
1640 }
1641
1642 static __inline__ void remove_all_offsets(void)
1643 {
1644 int i;
1645
1646 for (i=0;i<VREGS;i++)
1647 remove_offset(i,-1);
1648 }
1649
1650 static __inline__ int readreg_general(int r, int size, int spec, int can_offset)
1651 {
1652 int n;
1653 int answer=-1;
1654
1655 if (live.state[r].status==UNDEF) {
1656 write_log("WARNING: Unexpected read of undefined register %d\n",r);
1657 }
1658 if (!can_offset)
1659 remove_offset(r,spec);
1660
1661 if (isinreg(r) && live.state[r].validsize>=size) {
1662 n=live.state[r].realreg;
1663 switch(size) {
1664 case 1:
1665 if (live.nat[n].canbyte || spec>=0) {
1666 answer=n;
1667 }
1668 break;
1669 case 2:
1670 if (live.nat[n].canword || spec>=0) {
1671 answer=n;
1672 }
1673 break;
1674 case 4:
1675 answer=n;
1676 break;
1677 default: abort();
1678 }
1679 if (answer<0)
1680 evict(r);
1681 }
1682 /* either the value was in memory to start with, or it was evicted and
1683 is in memory now */
1684 if (answer<0) {
1685 answer=alloc_reg_hinted(r,spec>=0?4:size,0,spec);
1686 }
1687
1688 if (spec>=0 && spec!=answer) {
1689 /* Too bad */
1690 mov_nregs(spec,answer);
1691 answer=spec;
1692 }
1693 live.nat[answer].locked++;
1694 live.nat[answer].touched=touchcnt++;
1695 return answer;
1696 }
1697
1698
1699
1700 static int readreg(int r, int size)
1701 {
1702 return readreg_general(r,size,-1,0);
1703 }
1704
1705 static int readreg_specific(int r, int size, int spec)
1706 {
1707 return readreg_general(r,size,spec,0);
1708 }
1709
1710 static int readreg_offset(int r, int size)
1711 {
1712 return readreg_general(r,size,-1,1);
1713 }
1714
1715 /* writereg_general(r, size, spec)
1716 *
1717 * INPUT
1718 * - r : mid-layer register
1719 * - size : requested size (1/2/4)
1720 * - spec : -1 if find or make a register free, otherwise specifies
1721 * the physical register to use in any case
1722 *
1723 * OUTPUT
1724 * - hard (physical, x86 here) register allocated to virtual register r
1725 */
1726 static __inline__ int writereg_general(int r, int size, int spec)
1727 {
1728 int n;
1729 int answer=-1;
1730
1731 if (size<4) {
1732 remove_offset(r,spec);
1733 }
1734
1735 make_exclusive(r,size,spec);
1736 if (isinreg(r)) {
1737 int nvsize=size>live.state[r].validsize?size:live.state[r].validsize;
1738 int ndsize=size>live.state[r].dirtysize?size:live.state[r].dirtysize;
1739 n=live.state[r].realreg;
1740
1741 Dif (live.nat[n].nholds!=1)
1742 abort();
1743 switch(size) {
1744 case 1:
1745 if (live.nat[n].canbyte || spec>=0) {
1746 live.state[r].dirtysize=ndsize;
1747 live.state[r].validsize=nvsize;
1748 answer=n;
1749 }
1750 break;
1751 case 2:
1752 if (live.nat[n].canword || spec>=0) {
1753 live.state[r].dirtysize=ndsize;
1754 live.state[r].validsize=nvsize;
1755 answer=n;
1756 }
1757 break;
1758 case 4:
1759 live.state[r].dirtysize=ndsize;
1760 live.state[r].validsize=nvsize;
1761 answer=n;
1762 break;
1763 default: abort();
1764 }
1765 if (answer<0)
1766 evict(r);
1767 }
1768 /* either the value was in memory to start with, or it was evicted and
1769 is in memory now */
1770 if (answer<0) {
1771 answer=alloc_reg_hinted(r,size,1,spec);
1772 }
1773 if (spec>=0 && spec!=answer) {
1774 mov_nregs(spec,answer);
1775 answer=spec;
1776 }
1777 if (live.state[r].status==UNDEF)
1778 live.state[r].validsize=4;
1779 live.state[r].dirtysize=size>live.state[r].dirtysize?size:live.state[r].dirtysize;
1780 live.state[r].validsize=size>live.state[r].validsize?size:live.state[r].validsize;
1781
1782 live.nat[answer].locked++;
1783 live.nat[answer].touched=touchcnt++;
1784 if (size==4) {
1785 live.state[r].val=0;
1786 }
1787 else {
1788 Dif (live.state[r].val) {
1789 write_log("Problem with val\n");
1790 abort();
1791 }
1792 }
1793 set_status(r,DIRTY);
1794 return answer;
1795 }
1796
1797 static int writereg(int r, int size)
1798 {
1799 return writereg_general(r,size,-1);
1800 }
1801
1802 static int writereg_specific(int r, int size, int spec)
1803 {
1804 return writereg_general(r,size,spec);
1805 }
1806
1807 static __inline__ int rmw_general(int r, int wsize, int rsize, int spec)
1808 {
1809 int n;
1810 int answer=-1;
1811
1812 if (live.state[r].status==UNDEF) {
1813 write_log("WARNING: Unexpected read of undefined register %d\n",r);
1814 }
1815 remove_offset(r,spec);
1816 make_exclusive(r,0,spec);
1817
1818 Dif (wsize<rsize) {
1819 write_log("Cannot handle wsize<rsize in rmw_general()\n");
1820 abort();
1821 }
1822 if (isinreg(r) && live.state[r].validsize>=rsize) {
1823 n=live.state[r].realreg;
1824 Dif (live.nat[n].nholds!=1)
1825 abort();
1826
1827 switch(rsize) {
1828 case 1:
1829 if (live.nat[n].canbyte || spec>=0) {
1830 answer=n;
1831 }
1832 break;
1833 case 2:
1834 if (live.nat[n].canword || spec>=0) {
1835 answer=n;
1836 }
1837 break;
1838 case 4:
1839 answer=n;
1840 break;
1841 default: abort();
1842 }
1843 if (answer<0)
1844 evict(r);
1845 }
1846 /* either the value was in memory to start with, or it was evicted and
1847 is in memory now */
1848 if (answer<0) {
1849 answer=alloc_reg_hinted(r,spec>=0?4:rsize,0,spec);
1850 }
1851
1852 if (spec>=0 && spec!=answer) {
1853 /* Too bad */
1854 mov_nregs(spec,answer);
1855 answer=spec;
1856 }
1857 if (wsize>live.state[r].dirtysize)
1858 live.state[r].dirtysize=wsize;
1859 if (wsize>live.state[r].validsize)
1860 live.state[r].validsize=wsize;
1861 set_status(r,DIRTY);
1862
1863 live.nat[answer].locked++;
1864 live.nat[answer].touched=touchcnt++;
1865
1866 Dif (live.state[r].val) {
1867 write_log("Problem with val(rmw)\n");
1868 abort();
1869 }
1870 return answer;
1871 }
1872
1873 static int rmw(int r, int wsize, int rsize)
1874 {
1875 return rmw_general(r,wsize,rsize,-1);
1876 }
1877
1878 static int rmw_specific(int r, int wsize, int rsize, int spec)
1879 {
1880 return rmw_general(r,wsize,rsize,spec);
1881 }
1882
1883
1884 /* needed for restoring the carry flag on non-P6 cores */
1885 static void bt_l_ri_noclobber(R4 r, IMM i)
1886 {
1887 int size=4;
1888 if (i<16)
1889 size=2;
1890 r=readreg(r,size);
1891 raw_bt_l_ri(r,i);
1892 unlock2(r);
1893 }
1894
1895 /********************************************************************
1896 * FPU register status handling. EMIT TIME! *
1897 ********************************************************************/
1898
1899 static void f_tomem(int r)
1900 {
1901 if (live.fate[r].status==DIRTY) {
1902 #if USE_LONG_DOUBLE
1903 raw_fmov_ext_mr((uae_u32)live.fate[r].mem,live.fate[r].realreg);
1904 #else
1905 raw_fmov_mr((uae_u32)live.fate[r].mem,live.fate[r].realreg);
1906 #endif
1907 live.fate[r].status=CLEAN;
1908 }
1909 }
1910
1911 static void f_tomem_drop(int r)
1912 {
1913 if (live.fate[r].status==DIRTY) {
1914 #if USE_LONG_DOUBLE
1915 raw_fmov_ext_mr_drop((uae_u32)live.fate[r].mem,live.fate[r].realreg);
1916 #else
1917 raw_fmov_mr_drop((uae_u32)live.fate[r].mem,live.fate[r].realreg);
1918 #endif
1919 live.fate[r].status=INMEM;
1920 }
1921 }
1922
1923
1924 static __inline__ int f_isinreg(int r)
1925 {
1926 return live.fate[r].status==CLEAN || live.fate[r].status==DIRTY;
1927 }
1928
1929 static void f_evict(int r)
1930 {
1931 int rr;
1932
1933 if (!f_isinreg(r))
1934 return;
1935 rr=live.fate[r].realreg;
1936 if (live.fat[rr].nholds==1)
1937 f_tomem_drop(r);
1938 else
1939 f_tomem(r);
1940
1941 Dif (live.fat[rr].locked &&
1942 live.fat[rr].nholds==1) {
1943 write_log("FPU register %d in nreg %d is locked!\n",r,live.fate[r].realreg);
1944 abort();
1945 }
1946
1947 live.fat[rr].nholds--;
1948 if (live.fat[rr].nholds!=live.fate[r].realind) { /* Was not last */
1949 int topreg=live.fat[rr].holds[live.fat[rr].nholds];
1950 int thisind=live.fate[r].realind;
1951 live.fat[rr].holds[thisind]=topreg;
1952 live.fate[topreg].realind=thisind;
1953 }
1954 live.fate[r].status=INMEM;
1955 live.fate[r].realreg=-1;
1956 }
1957
1958 static __inline__ void f_free_nreg(int r)
1959 {
1960 int i=live.fat[r].nholds;
1961
1962 while (i) {
1963 int vr;
1964
1965 --i;
1966 vr=live.fat[r].holds[i];
1967 f_evict(vr);
1968 }
1969 Dif (live.fat[r].nholds!=0) {
1970 write_log("Failed to free nreg %d, nholds is %d\n",r,live.fat[r].nholds);
1971 abort();
1972 }
1973 }
1974
1975
1976 /* Use with care! */
1977 static __inline__ void f_isclean(int r)
1978 {
1979 if (!f_isinreg(r))
1980 return;
1981 live.fate[r].status=CLEAN;
1982 }
1983
1984 static __inline__ void f_disassociate(int r)
1985 {
1986 f_isclean(r);
1987 f_evict(r);
1988 }
1989
1990
1991
1992 static int f_alloc_reg(int r, int willclobber)
1993 {
1994 int bestreg;
1995 uae_s32 when;
1996 int i;
1997 uae_s32 badness;
1998 bestreg=-1;
1999 when=2000000000;
2000 for (i=N_FREGS;i--;) {
2001 badness=live.fat[i].touched;
2002 if (live.fat[i].nholds==0)
2003 badness=0;
2004
2005 if (!live.fat[i].locked && badness<when) {
2006 bestreg=i;
2007 when=badness;
2008 if (live.fat[i].nholds==0)
2009 break;
2010 }
2011 }
2012 Dif (bestreg==-1)
2013 abort();
2014
2015 if (live.fat[bestreg].nholds>0) {
2016 f_free_nreg(bestreg);
2017 }
2018 if (f_isinreg(r)) {
2019 f_evict(r);
2020 }
2021
2022 if (!willclobber) {
2023 if (live.fate[r].status!=UNDEF) {
2024 #if USE_LONG_DOUBLE
2025 raw_fmov_ext_rm(bestreg,(uae_u32)live.fate[r].mem);
2026 #else
2027 raw_fmov_rm(bestreg,(uae_u32)live.fate[r].mem);
2028 #endif
2029 }
2030 live.fate[r].status=CLEAN;
2031 }
2032 else {
2033 live.fate[r].status=DIRTY;
2034 }
2035 live.fate[r].realreg=bestreg;
2036 live.fate[r].realind=live.fat[bestreg].nholds;
2037 live.fat[bestreg].touched=touchcnt++;
2038 live.fat[bestreg].holds[live.fat[bestreg].nholds]=r;
2039 live.fat[bestreg].nholds++;
2040
2041 return bestreg;
2042 }
2043
2044 static void f_unlock(int r)
2045 {
2046 Dif (!live.fat[r].locked)
2047 abort();
2048 live.fat[r].locked--;
2049 }
2050
2051 static void f_setlock(int r)
2052 {
2053 live.fat[r].locked++;
2054 }
2055
2056 static __inline__ int f_readreg(int r)
2057 {
2058 int n;
2059 int answer=-1;
2060
2061 if (f_isinreg(r)) {
2062 n=live.fate[r].realreg;
2063 answer=n;
2064 }
2065 /* either the value was in memory to start with, or it was evicted and
2066 is in memory now */
2067 if (answer<0)
2068 answer=f_alloc_reg(r,0);
2069
2070 live.fat[answer].locked++;
2071 live.fat[answer].touched=touchcnt++;
2072 return answer;
2073 }
2074
2075 static __inline__ void f_make_exclusive(int r, int clobber)
2076 {
2077 freg_status oldstate;
2078 int rr=live.fate[r].realreg;
2079 int nr;
2080 int nind;
2081 int ndirt=0;
2082 int i;
2083
2084 if (!f_isinreg(r))
2085 return;
2086 if (live.fat[rr].nholds==1)
2087 return;
2088 for (i=0;i<live.fat[rr].nholds;i++) {
2089 int vr=live.fat[rr].holds[i];
2090 if (vr!=r && live.fate[vr].status==DIRTY)
2091 ndirt++;
2092 }
2093 if (!ndirt && !live.fat[rr].locked) {
2094 /* Everything else is clean, so let's keep this register */
2095 for (i=0;i<live.fat[rr].nholds;i++) {
2096 int vr=live.fat[rr].holds[i];
2097 if (vr!=r) {
2098 f_evict(vr);
2099 i--; /* Try that index again! */
2100 }
2101 }
2102 Dif (live.fat[rr].nholds!=1) {
2103 write_log("realreg %d holds %d (",rr,live.fat[rr].nholds);
2104 for (i=0;i<live.fat[rr].nholds;i++) {
2105 write_log(" %d(%d,%d)",live.fat[rr].holds[i],
2106 live.fate[live.fat[rr].holds[i]].realreg,
2107 live.fate[live.fat[rr].holds[i]].realind);
2108 }
2109 write_log("\n");
2110 abort();
2111 }
2112 return;
2113 }
2114
2115 /* We have to split the register */
2116 oldstate=live.fate[r];
2117
2118 f_setlock(rr); /* Make sure this doesn't go away */
2119 /* Forget about r being in the register rr */
2120 f_disassociate(r);
2121 /* Get a new register, that we will clobber completely */
2122 nr=f_alloc_reg(r,1);
2123 nind=live.fate[r].realind;
2124 if (!clobber)
2125 raw_fmov_rr(nr,rr); /* Make another copy */
2126 live.fate[r]=oldstate; /* Keep all the old state info */
2127 live.fate[r].realreg=nr;
2128 live.fate[r].realind=nind;
2129 f_unlock(rr);
2130 }
2131
2132
2133 static __inline__ int f_writereg(int r)
2134 {
2135 int n;
2136 int answer=-1;
2137
2138 f_make_exclusive(r,1);
2139 if (f_isinreg(r)) {
2140 n=live.fate[r].realreg;
2141 answer=n;
2142 }
2143 if (answer<0) {
2144 answer=f_alloc_reg(r,1);
2145 }
2146 live.fate[r].status=DIRTY;
2147 live.fat[answer].locked++;
2148 live.fat[answer].touched=touchcnt++;
2149 return answer;
2150 }
2151
2152 static int f_rmw(int r)
2153 {
2154 int n;
2155
2156 f_make_exclusive(r,0);
2157 if (f_isinreg(r)) {
2158 n=live.fate[r].realreg;
2159 }
2160 else
2161 n=f_alloc_reg(r,0);
2162 live.fate[r].status=DIRTY;
2163 live.fat[n].locked++;
2164 live.fat[n].touched=touchcnt++;
2165 return n;
2166 }
2167
2168 static void fflags_into_flags_internal(uae_u32 tmp)
2169 {
2170 int r;
2171
2172 clobber_flags();
2173 r=f_readreg(FP_RESULT);
2174 if (FFLAG_NREG_CLOBBER_CONDITION) {
2175 int tmp2=tmp;
2176 tmp=writereg_specific(tmp,4,FFLAG_NREG);
2177 raw_fflags_into_flags(r);
2178 unlock2(tmp);
2179 forget_about(tmp2);
2180 }
2181 else
2182 raw_fflags_into_flags(r);
2183 f_unlock(r);
2184 live_flags();
2185 }
2186
2187
2188
2189
2190 /********************************************************************
2191 * CPU functions exposed to gencomp. Both CREATE and EMIT time *
2192 ********************************************************************/
2193
2194 /*
2195 * RULES FOR HANDLING REGISTERS:
2196 *
2197 * * In the function headers, order the parameters
2198 * - 1st registers written to
2199 * - 2nd read/modify/write registers
2200 * - 3rd registers read from
2201 * * Before calling raw_*, you must call readreg, writereg or rmw for
2202 * each register
2203 * * The order for this is
2204 * - 1st call remove_offset for all registers written to with size<4
2205 * - 2nd call readreg for all registers read without offset
2206 * - 3rd call rmw for all rmw registers
2207 * - 4th call readreg_offset for all registers that can handle offsets
2208 * - 5th call get_offset for all the registers from the previous step
2209 * - 6th call writereg for all written-to registers
2210 * - 7th call raw_*
2211 * - 8th unlock2 all registers that were locked
2212 */
2213
2214 MIDFUNC(0,live_flags,(void))
2215 {
2216 live.flags_on_stack=TRASH;
2217 live.flags_in_flags=VALID;
2218 live.flags_are_important=1;
2219 }
2220 MENDFUNC(0,live_flags,(void))
2221
2222 MIDFUNC(0,dont_care_flags,(void))
2223 {
2224 live.flags_are_important=0;
2225 }
2226 MENDFUNC(0,dont_care_flags,(void))
2227
2228
2229 MIDFUNC(0,duplicate_carry,(void))
2230 {
2231 evict(FLAGX);
2232 make_flags_live_internal();
2233 COMPCALL(setcc_m)((uae_u32)live.state[FLAGX].mem,2);
2234 log_vwrite(FLAGX);
2235 }
2236 MENDFUNC(0,duplicate_carry,(void))
2237
2238 MIDFUNC(0,restore_carry,(void))
2239 {
2240 if (!have_rat_stall) { /* Not a P6 core, i.e. no partial stalls */
2241 bt_l_ri_noclobber(FLAGX,0);
2242 }
2243 else { /* Avoid the stall the above creates.
2244 This is slow on non-P6, though.
2245 */
2246 COMPCALL(rol_b_ri(FLAGX,8));
2247 isclean(FLAGX);
2248 }
2249 }
2250 MENDFUNC(0,restore_carry,(void))
2251
2252 MIDFUNC(0,start_needflags,(void))
2253 {
2254 needflags=1;
2255 }
2256 MENDFUNC(0,start_needflags,(void))
2257
2258 MIDFUNC(0,end_needflags,(void))
2259 {
2260 needflags=0;
2261 }
2262 MENDFUNC(0,end_needflags,(void))
2263
2264 MIDFUNC(0,make_flags_live,(void))
2265 {
2266 make_flags_live_internal();
2267 }
2268 MENDFUNC(0,make_flags_live,(void))
2269
2270 MIDFUNC(1,fflags_into_flags,(W2 tmp))
2271 {
2272 clobber_flags();
2273 fflags_into_flags_internal(tmp);
2274 }
2275 MENDFUNC(1,fflags_into_flags,(W2 tmp))
2276
2277
2278 MIDFUNC(2,bt_l_ri,(R4 r, IMM i)) /* This is defined as only affecting C */
2279 {
2280 int size=4;
2281 if (i<16)
2282 size=2;
2283 CLOBBER_BT;
2284 r=readreg(r,size);
2285 raw_bt_l_ri(r,i);
2286 unlock2(r);
2287 }
2288 MENDFUNC(2,bt_l_ri,(R4 r, IMM i)) /* This is defined as only affecting C */
2289
2290 MIDFUNC(2,bt_l_rr,(R4 r, R4 b)) /* This is defined as only affecting C */
2291 {
2292 CLOBBER_BT;
2293 r=readreg(r,4);
2294 b=readreg(b,4);
2295 raw_bt_l_rr(r,b);
2296 unlock2(r);
2297 unlock2(b);
2298 }
2299 MENDFUNC(2,bt_l_rr,(R4 r, R4 b)) /* This is defined as only affecting C */
2300
2301 MIDFUNC(2,btc_l_ri,(RW4 r, IMM i))
2302 {
2303 int size=4;
2304 if (i<16)
2305 size=2;
2306 CLOBBER_BT;
2307 r=rmw(r,size,size);
2308 raw_btc_l_ri(r,i);
2309 unlock2(r);
2310 }
2311 MENDFUNC(2,btc_l_ri,(RW4 r, IMM i))
2312
2313 MIDFUNC(2,btc_l_rr,(RW4 r, R4 b))
2314 {
2315 CLOBBER_BT;
2316 b=readreg(b,4);
2317 r=rmw(r,4,4);
2318 raw_btc_l_rr(r,b);
2319 unlock2(r);
2320 unlock2(b);
2321 }
2322 MENDFUNC(2,btc_l_rr,(RW4 r, R4 b))
2323
2324
2325 MIDFUNC(2,btr_l_ri,(RW4 r, IMM i))
2326 {
2327 int size=4;
2328 if (i<16)
2329 size=2;
2330 CLOBBER_BT;
2331 r=rmw(r,size,size);
2332 raw_btr_l_ri(r,i);
2333 unlock2(r);
2334 }
2335 MENDFUNC(2,btr_l_ri,(RW4 r, IMM i))
2336
2337 MIDFUNC(2,btr_l_rr,(RW4 r, R4 b))
2338 {
2339 CLOBBER_BT;
2340 b=readreg(b,4);
2341 r=rmw(r,4,4);
2342 raw_btr_l_rr(r,b);
2343 unlock2(r);
2344 unlock2(b);
2345 }
2346 MENDFUNC(2,btr_l_rr,(RW4 r, R4 b))
2347
2348
2349 MIDFUNC(2,bts_l_ri,(RW4 r, IMM i))
2350 {
2351 int size=4;
2352 if (i<16)
2353 size=2;
2354 CLOBBER_BT;
2355 r=rmw(r,size,size);
2356 raw_bts_l_ri(r,i);
2357 unlock2(r);
2358 }
2359 MENDFUNC(2,bts_l_ri,(RW4 r, IMM i))
2360
2361 MIDFUNC(2,bts_l_rr,(RW4 r, R4 b))
2362 {
2363 CLOBBER_BT;
2364 b=readreg(b,4);
2365 r=rmw(r,4,4);
2366 raw_bts_l_rr(r,b);
2367 unlock2(r);
2368 unlock2(b);
2369 }
2370 MENDFUNC(2,bts_l_rr,(RW4 r, R4 b))
2371
2372 MIDFUNC(2,mov_l_rm,(W4 d, IMM s))
2373 {
2374 CLOBBER_MOV;
2375 d=writereg(d,4);
2376 raw_mov_l_rm(d,s);
2377 unlock2(d);
2378 }
2379 MENDFUNC(2,mov_l_rm,(W4 d, IMM s))
2380
2381
2382 MIDFUNC(1,call_r,(R4 r)) /* Clobbering is implicit */
2383 {
2384 r=readreg(r,4);
2385 raw_call_r(r);
2386 unlock2(r);
2387 }
2388 MENDFUNC(1,call_r,(R4 r)) /* Clobbering is implicit */
2389
2390 MIDFUNC(2,sub_l_mi,(IMM d, IMM s))
2391 {
2392 CLOBBER_SUB;
2393 raw_sub_l_mi(d,s) ;
2394 }
2395 MENDFUNC(2,sub_l_mi,(IMM d, IMM s))
2396
2397 MIDFUNC(2,mov_l_mi,(IMM d, IMM s))
2398 {
2399 CLOBBER_MOV;
2400 raw_mov_l_mi(d,s) ;
2401 }
2402 MENDFUNC(2,mov_l_mi,(IMM d, IMM s))
2403
2404 MIDFUNC(2,mov_w_mi,(IMM d, IMM s))
2405 {
2406 CLOBBER_MOV;
2407 raw_mov_w_mi(d,s) ;
2408 }
2409 MENDFUNC(2,mov_w_mi,(IMM d, IMM s))
2410
2411 MIDFUNC(2,mov_b_mi,(IMM d, IMM s))
2412 {
2413 CLOBBER_MOV;
2414 raw_mov_b_mi(d,s) ;
2415 }
2416 MENDFUNC(2,mov_b_mi,(IMM d, IMM s))
2417
2418 MIDFUNC(2,rol_b_ri,(RW1 r, IMM i))
2419 {
2420 if (!i && !needflags)
2421 return;
2422 CLOBBER_ROL;
2423 r=rmw(r,1,1);
2424 raw_rol_b_ri(r,i);
2425 unlock2(r);
2426 }
2427 MENDFUNC(2,rol_b_ri,(RW1 r, IMM i))
2428
2429 MIDFUNC(2,rol_w_ri,(RW2 r, IMM i))
2430 {
2431 if (!i && !needflags)
2432 return;
2433 CLOBBER_ROL;
2434 r=rmw(r,2,2);
2435 raw_rol_w_ri(r,i);
2436 unlock2(r);
2437 }
2438 MENDFUNC(2,rol_w_ri,(RW2 r, IMM i))
2439
2440 MIDFUNC(2,rol_l_ri,(RW4 r, IMM i))
2441 {
2442 if (!i && !needflags)
2443 return;
2444 CLOBBER_ROL;
2445 r=rmw(r,4,4);
2446 raw_rol_l_ri(r,i);
2447 unlock2(r);
2448 }
2449 MENDFUNC(2,rol_l_ri,(RW4 r, IMM i))
2450
2451 MIDFUNC(2,rol_l_rr,(RW4 d, R1 r))
2452 {
2453 if (isconst(r)) {
2454 COMPCALL(rol_l_ri)(d,(uae_u8)live.state[r].val);
2455 return;
2456 }
2457 CLOBBER_ROL;
2458 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2459 d=rmw(d,4,4);
2460 Dif (r!=1) {
2461 write_log("Illegal register %d in raw_rol_b\n",r);
2462 abort();
2463 }
2464 raw_rol_l_rr(d,r) ;
2465 unlock2(r);
2466 unlock2(d);
2467 }
2468 MENDFUNC(2,rol_l_rr,(RW4 d, R1 r))
2469
2470 MIDFUNC(2,rol_w_rr,(RW2 d, R1 r))
2471 { /* Can only do this with r==1, i.e. cl */
2472
2473 if (isconst(r)) {
2474 COMPCALL(rol_w_ri)(d,(uae_u8)live.state[r].val);
2475 return;
2476 }
2477 CLOBBER_ROL;
2478 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2479 d=rmw(d,2,2);
2480 Dif (r!=1) {
2481 write_log("Illegal register %d in raw_rol_b\n",r);
2482 abort();
2483 }
2484 raw_rol_w_rr(d,r) ;
2485 unlock2(r);
2486 unlock2(d);
2487 }
2488 MENDFUNC(2,rol_w_rr,(RW2 d, R1 r))
2489
2490 MIDFUNC(2,rol_b_rr,(RW1 d, R1 r))
2491 { /* Can only do this with r==1, i.e. cl */
2492
2493 if (isconst(r)) {
2494 COMPCALL(rol_b_ri)(d,(uae_u8)live.state[r].val);
2495 return;
2496 }
2497
2498 CLOBBER_ROL;
2499 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2500 d=rmw(d,1,1);
2501 Dif (r!=1) {
2502 write_log("Illegal register %d in raw_rol_b\n",r);
2503 abort();
2504 }
2505 raw_rol_b_rr(d,r) ;
2506 unlock2(r);
2507 unlock2(d);
2508 }
2509 MENDFUNC(2,rol_b_rr,(RW1 d, R1 r))
2510
2511
2512 MIDFUNC(2,shll_l_rr,(RW4 d, R1 r))
2513 {
2514 if (isconst(r)) {
2515 COMPCALL(shll_l_ri)(d,(uae_u8)live.state[r].val);
2516 return;
2517 }
2518 CLOBBER_SHLL;
2519 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2520 d=rmw(d,4,4);
2521 Dif (r!=1) {
2522 write_log("Illegal register %d in raw_rol_b\n",r);
2523 abort();
2524 }
2525 raw_shll_l_rr(d,r) ;
2526 unlock2(r);
2527 unlock2(d);
2528 }
2529 MENDFUNC(2,shll_l_rr,(RW4 d, R1 r))
2530
2531 MIDFUNC(2,shll_w_rr,(RW2 d, R1 r))
2532 { /* Can only do this with r==1, i.e. cl */
2533
2534 if (isconst(r)) {
2535 COMPCALL(shll_w_ri)(d,(uae_u8)live.state[r].val);
2536 return;
2537 }
2538 CLOBBER_SHLL;
2539 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2540 d=rmw(d,2,2);
2541 Dif (r!=1) {
2542 write_log("Illegal register %d in raw_shll_b\n",r);
2543 abort();
2544 }
2545 raw_shll_w_rr(d,r) ;
2546 unlock2(r);
2547 unlock2(d);
2548 }
2549 MENDFUNC(2,shll_w_rr,(RW2 d, R1 r))
2550
2551 MIDFUNC(2,shll_b_rr,(RW1 d, R1 r))
2552 { /* Can only do this with r==1, i.e. cl */
2553
2554 if (isconst(r)) {
2555 COMPCALL(shll_b_ri)(d,(uae_u8)live.state[r].val);
2556 return;
2557 }
2558
2559 CLOBBER_SHLL;
2560 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2561 d=rmw(d,1,1);
2562 Dif (r!=1) {
2563 write_log("Illegal register %d in raw_shll_b\n",r);
2564 abort();
2565 }
2566 raw_shll_b_rr(d,r) ;
2567 unlock2(r);
2568 unlock2(d);
2569 }
2570 MENDFUNC(2,shll_b_rr,(RW1 d, R1 r))
2571
2572
2573 MIDFUNC(2,ror_b_ri,(R1 r, IMM i))
2574 {
2575 if (!i && !needflags)
2576 return;
2577 CLOBBER_ROR;
2578 r=rmw(r,1,1);
2579 raw_ror_b_ri(r,i);
2580 unlock2(r);
2581 }
2582 MENDFUNC(2,ror_b_ri,(R1 r, IMM i))
2583
2584 MIDFUNC(2,ror_w_ri,(R2 r, IMM i))
2585 {
2586 if (!i && !needflags)
2587 return;
2588 CLOBBER_ROR;
2589 r=rmw(r,2,2);
2590 raw_ror_w_ri(r,i);
2591 unlock2(r);
2592 }
2593 MENDFUNC(2,ror_w_ri,(R2 r, IMM i))
2594
2595 MIDFUNC(2,ror_l_ri,(R4 r, IMM i))
2596 {
2597 if (!i && !needflags)
2598 return;
2599 CLOBBER_ROR;
2600 r=rmw(r,4,4);
2601 raw_ror_l_ri(r,i);
2602 unlock2(r);
2603 }
2604 MENDFUNC(2,ror_l_ri,(R4 r, IMM i))
2605
2606 MIDFUNC(2,ror_l_rr,(R4 d, R1 r))
2607 {
2608 if (isconst(r)) {
2609 COMPCALL(ror_l_ri)(d,(uae_u8)live.state[r].val);
2610 return;
2611 }
2612 CLOBBER_ROR;
2613 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2614 d=rmw(d,4,4);
2615 raw_ror_l_rr(d,r) ;
2616 unlock2(r);
2617 unlock2(d);
2618 }
2619 MENDFUNC(2,ror_l_rr,(R4 d, R1 r))
2620
2621 MIDFUNC(2,ror_w_rr,(R2 d, R1 r))
2622 {
2623 if (isconst(r)) {
2624 COMPCALL(ror_w_ri)(d,(uae_u8)live.state[r].val);
2625 return;
2626 }
2627 CLOBBER_ROR;
2628 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2629 d=rmw(d,2,2);
2630 raw_ror_w_rr(d,r) ;
2631 unlock2(r);
2632 unlock2(d);
2633 }
2634 MENDFUNC(2,ror_w_rr,(R2 d, R1 r))
2635
2636 MIDFUNC(2,ror_b_rr,(R1 d, R1 r))
2637 {
2638 if (isconst(r)) {
2639 COMPCALL(ror_b_ri)(d,(uae_u8)live.state[r].val);
2640 return;
2641 }
2642
2643 CLOBBER_ROR;
2644 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2645 d=rmw(d,1,1);
2646 raw_ror_b_rr(d,r) ;
2647 unlock2(r);
2648 unlock2(d);
2649 }
2650 MENDFUNC(2,ror_b_rr,(R1 d, R1 r))
2651
2652 MIDFUNC(2,shrl_l_rr,(RW4 d, R1 r))
2653 {
2654 if (isconst(r)) {
2655 COMPCALL(shrl_l_ri)(d,(uae_u8)live.state[r].val);
2656 return;
2657 }
2658 CLOBBER_SHRL;
2659 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2660 d=rmw(d,4,4);
2661 Dif (r!=1) {
2662 write_log("Illegal register %d in raw_rol_b\n",r);
2663 abort();
2664 }
2665 raw_shrl_l_rr(d,r) ;
2666 unlock2(r);
2667 unlock2(d);
2668 }
2669 MENDFUNC(2,shrl_l_rr,(RW4 d, R1 r))
2670
2671 MIDFUNC(2,shrl_w_rr,(RW2 d, R1 r))
2672 { /* Can only do this with r==1, i.e. cl */
2673
2674 if (isconst(r)) {
2675 COMPCALL(shrl_w_ri)(d,(uae_u8)live.state[r].val);
2676 return;
2677 }
2678 CLOBBER_SHRL;
2679 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2680 d=rmw(d,2,2);
2681 Dif (r!=1) {
2682 write_log("Illegal register %d in raw_shrl_b\n",r);
2683 abort();
2684 }
2685 raw_shrl_w_rr(d,r) ;
2686 unlock2(r);
2687 unlock2(d);
2688 }
2689 MENDFUNC(2,shrl_w_rr,(RW2 d, R1 r))
2690
2691 MIDFUNC(2,shrl_b_rr,(RW1 d, R1 r))
2692 { /* Can only do this with r==1, i.e. cl */
2693
2694 if (isconst(r)) {
2695 COMPCALL(shrl_b_ri)(d,(uae_u8)live.state[r].val);
2696 return;
2697 }
2698
2699 CLOBBER_SHRL;
2700 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2701 d=rmw(d,1,1);
2702 Dif (r!=1) {
2703 write_log("Illegal register %d in raw_shrl_b\n",r);
2704 abort();
2705 }
2706 raw_shrl_b_rr(d,r) ;
2707 unlock2(r);
2708 unlock2(d);
2709 }
2710 MENDFUNC(2,shrl_b_rr,(RW1 d, R1 r))
2711
2712
2713
2714 MIDFUNC(2,shll_l_ri,(RW4 r, IMM i))
2715 {
2716 if (!i && !needflags)
2717 return;
2718 if (isconst(r) && !needflags) {
2719 live.state[r].val<<=i;
2720 return;
2721 }
2722 CLOBBER_SHLL;
2723 r=rmw(r,4,4);
2724 raw_shll_l_ri(r,i);
2725 unlock2(r);
2726 }
2727 MENDFUNC(2,shll_l_ri,(RW4 r, IMM i))
2728
2729 MIDFUNC(2,shll_w_ri,(RW2 r, IMM i))
2730 {
2731 if (!i && !needflags)
2732 return;
2733 CLOBBER_SHLL;
2734 r=rmw(r,2,2);
2735 raw_shll_w_ri(r,i);
2736 unlock2(r);
2737 }
2738 MENDFUNC(2,shll_w_ri,(RW2 r, IMM i))
2739
2740 MIDFUNC(2,shll_b_ri,(RW1 r, IMM i))
2741 {
2742 if (!i && !needflags)
2743 return;
2744 CLOBBER_SHLL;
2745 r=rmw(r,1,1);
2746 raw_shll_b_ri(r,i);
2747 unlock2(r);
2748 }
2749 MENDFUNC(2,shll_b_ri,(RW1 r, IMM i))
2750
2751 MIDFUNC(2,shrl_l_ri,(RW4 r, IMM i))
2752 {
2753 if (!i && !needflags)
2754 return;
2755 if (isconst(r) && !needflags) {
2756 live.state[r].val>>=i;
2757 return;
2758 }
2759 CLOBBER_SHRL;
2760 r=rmw(r,4,4);
2761 raw_shrl_l_ri(r,i);
2762 unlock2(r);
2763 }
2764 MENDFUNC(2,shrl_l_ri,(RW4 r, IMM i))
2765
2766 MIDFUNC(2,shrl_w_ri,(RW2 r, IMM i))
2767 {
2768 if (!i && !needflags)
2769 return;
2770 CLOBBER_SHRL;
2771 r=rmw(r,2,2);
2772 raw_shrl_w_ri(r,i);
2773 unlock2(r);
2774 }
2775 MENDFUNC(2,shrl_w_ri,(RW2 r, IMM i))
2776
2777 MIDFUNC(2,shrl_b_ri,(RW1 r, IMM i))
2778 {
2779 if (!i && !needflags)
2780 return;
2781 CLOBBER_SHRL;
2782 r=rmw(r,1,1);
2783 raw_shrl_b_ri(r,i);
2784 unlock2(r);
2785 }
2786 MENDFUNC(2,shrl_b_ri,(RW1 r, IMM i))
2787
2788 MIDFUNC(2,shra_l_ri,(RW4 r, IMM i))
2789 {
2790 if (!i && !needflags)
2791 return;
2792 CLOBBER_SHRA;
2793 r=rmw(r,4,4);
2794 raw_shra_l_ri(r,i);
2795 unlock2(r);
2796 }
2797 MENDFUNC(2,shra_l_ri,(RW4 r, IMM i))
2798
2799 MIDFUNC(2,shra_w_ri,(RW2 r, IMM i))
2800 {
2801 if (!i && !needflags)
2802 return;
2803 CLOBBER_SHRA;
2804 r=rmw(r,2,2);
2805 raw_shra_w_ri(r,i);
2806 unlock2(r);
2807 }
2808 MENDFUNC(2,shra_w_ri,(RW2 r, IMM i))
2809
2810 MIDFUNC(2,shra_b_ri,(RW1 r, IMM i))
2811 {
2812 if (!i && !needflags)
2813 return;
2814 CLOBBER_SHRA;
2815 r=rmw(r,1,1);
2816 raw_shra_b_ri(r,i);
2817 unlock2(r);
2818 }
2819 MENDFUNC(2,shra_b_ri,(RW1 r, IMM i))
2820
2821 MIDFUNC(2,shra_l_rr,(RW4 d, R1 r))
2822 {
2823 if (isconst(r)) {
2824 COMPCALL(shra_l_ri)(d,(uae_u8)live.state[r].val);
2825 return;
2826 }
2827 CLOBBER_SHRA;
2828 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2829 d=rmw(d,4,4);
2830 Dif (r!=1) {
2831 write_log("Illegal register %d in raw_rol_b\n",r);
2832 abort();
2833 }
2834 raw_shra_l_rr(d,r) ;
2835 unlock2(r);
2836 unlock2(d);
2837 }
2838 MENDFUNC(2,shra_l_rr,(RW4 d, R1 r))
2839
2840 MIDFUNC(2,shra_w_rr,(RW2 d, R1 r))
2841 { /* Can only do this with r==1, i.e. cl */
2842
2843 if (isconst(r)) {
2844 COMPCALL(shra_w_ri)(d,(uae_u8)live.state[r].val);
2845 return;
2846 }
2847 CLOBBER_SHRA;
2848 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2849 d=rmw(d,2,2);
2850 Dif (r!=1) {
2851 write_log("Illegal register %d in raw_shra_b\n",r);
2852 abort();
2853 }
2854 raw_shra_w_rr(d,r) ;
2855 unlock2(r);
2856 unlock2(d);
2857 }
2858 MENDFUNC(2,shra_w_rr,(RW2 d, R1 r))
2859
2860 MIDFUNC(2,shra_b_rr,(RW1 d, R1 r))
2861 { /* Can only do this with r==1, i.e. cl */
2862
2863 if (isconst(r)) {
2864 COMPCALL(shra_b_ri)(d,(uae_u8)live.state[r].val);
2865 return;
2866 }
2867
2868 CLOBBER_SHRA;
2869 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2870 d=rmw(d,1,1);
2871 Dif (r!=1) {
2872 write_log("Illegal register %d in raw_shra_b\n",r);
2873 abort();
2874 }
2875 raw_shra_b_rr(d,r) ;
2876 unlock2(r);
2877 unlock2(d);
2878 }
2879 MENDFUNC(2,shra_b_rr,(RW1 d, R1 r))
2880
2881
2882 MIDFUNC(2,setcc,(W1 d, IMM cc))
2883 {
2884 CLOBBER_SETCC;
2885 d=writereg(d,1);
2886 raw_setcc(d,cc);
2887 unlock2(d);
2888 }
2889 MENDFUNC(2,setcc,(W1 d, IMM cc))
2890
2891 MIDFUNC(2,setcc_m,(IMM d, IMM cc))
2892 {
2893 CLOBBER_SETCC;
2894 raw_setcc_m(d,cc);
2895 }
2896 MENDFUNC(2,setcc_m,(IMM d, IMM cc))
2897
2898 MIDFUNC(3,cmov_l_rr,(RW4 d, R4 s, IMM cc))
2899 {
2900 if (d==s)
2901 return;
2902 CLOBBER_CMOV;
2903 s=readreg(s,4);
2904 d=rmw(d,4,4);
2905 raw_cmov_l_rr(d,s,cc);
2906 unlock2(s);
2907 unlock2(d);
2908 }
2909 MENDFUNC(3,cmov_l_rr,(RW4 d, R4 s, IMM cc))
2910
2911 MIDFUNC(3,cmov_l_rm,(RW4 d, IMM s, IMM cc))
2912 {
2913 CLOBBER_CMOV;
2914 d=rmw(d,4,4);
2915 raw_cmov_l_rm(d,s,cc);
2916 unlock2(d);
2917 }
2918 MENDFUNC(3,cmov_l_rm,(RW4 d, IMM s, IMM cc))
2919
2920 MIDFUNC(1,setzflg_l,(RW4 r))
2921 {
2922 if (setzflg_uses_bsf) {
2923 CLOBBER_BSF;
2924 r=rmw(r,4,4);
2925 raw_bsf_l_rr(r,r);
2926 unlock2(r);
2927 }
2928 else {
2929 Dif (live.flags_in_flags!=VALID) {
2930 write_log("setzflg() wanted flags in native flags, they are %d\n",
2931 live.flags_in_flags);
2932 abort();
2933 }
2934 r=readreg(r,4);
2935 int f=writereg(S11,4);
2936 int t=writereg(S12,4);
2937 raw_flags_set_zero(f,r,t);
2938 unlock2(f);
2939 unlock2(r);
2940 unlock2(t);
2941 }
2942 }
2943 MENDFUNC(1,setzflg_l,(RW4 r))
2944
2945 MIDFUNC(2,imul_32_32,(RW4 d, R4 s))
2946 {
2947 CLOBBER_MUL;
2948 s=readreg(s,4);
2949 d=rmw(d,4,4);
2950 raw_imul_32_32(d,s);
2951 unlock2(s);
2952 unlock2(d);
2953 }
2954 MENDFUNC(2,imul_32_32,(RW4 d, R4 s))
2955
2956 MIDFUNC(2,imul_64_32,(RW4 d, RW4 s))
2957 {
2958 CLOBBER_MUL;
2959 s=rmw_specific(s,4,4,MUL_NREG2);
2960 d=rmw_specific(d,4,4,MUL_NREG1);
2961 raw_imul_64_32(d,s);
2962 unlock2(s);
2963 unlock2(d);
2964 }
2965 MENDFUNC(2,imul_64_32,(RW4 d, RW4 s))
2966
2967 MIDFUNC(2,mul_64_32,(RW4 d, RW4 s))
2968 {
2969 CLOBBER_MUL;
2970 s=rmw_specific(s,4,4,MUL_NREG2);
2971 d=rmw_specific(d,4,4,MUL_NREG1);
2972 raw_mul_64_32(d,s);
2973 unlock2(s);
2974 unlock2(d);
2975 }
2976 MENDFUNC(2,mul_64_32,(RW4 d, RW4 s))
2977
2978 MIDFUNC(2,mul_32_32,(RW4 d, R4 s))
2979 {
2980 CLOBBER_MUL;
2981 s=readreg(s,4);
2982 d=rmw(d,4,4);
2983 raw_mul_32_32(d,s);
2984 unlock2(s);
2985 unlock2(d);
2986 }
2987 MENDFUNC(2,mul_32_32,(RW4 d, R4 s))
2988
2989 MIDFUNC(2,sign_extend_16_rr,(W4 d, R2 s))
2990 {
2991 int isrmw;
2992
2993 if (isconst(s)) {
2994 set_const(d,(uae_s32)(uae_s16)live.state[s].val);
2995 return;
2996 }
2997
2998 CLOBBER_SE16;
2999 isrmw=(s==d);
3000 if (!isrmw) {
3001 s=readreg(s,2);
3002 d=writereg(d,4);
3003 }
3004 else { /* If we try to lock this twice, with different sizes, we
3005 are int trouble! */
3006 s=d=rmw(s,4,2);
3007 }
3008 raw_sign_extend_16_rr(d,s);
3009 if (!isrmw) {
3010 unlock2(d);
3011 unlock2(s);
3012 }
3013 else {
3014 unlock2(s);
3015 }
3016 }
3017 MENDFUNC(2,sign_extend_16_rr,(W4 d, R2 s))
3018
3019 MIDFUNC(2,sign_extend_8_rr,(W4 d, R1 s))
3020 {
3021 int isrmw;
3022
3023 if (isconst(s)) {
3024 set_const(d,(uae_s32)(uae_s8)live.state[s].val);
3025 return;
3026 }
3027
3028 isrmw=(s==d);
3029 CLOBBER_SE8;
3030 if (!isrmw) {
3031 s=readreg(s,1);
3032 d=writereg(d,4);
3033 }
3034 else { /* If we try to lock this twice, with different sizes, we
3035 are int trouble! */
3036 s=d=rmw(s,4,1);
3037 }
3038
3039 raw_sign_extend_8_rr(d,s);
3040
3041 if (!isrmw) {
3042 unlock2(d);
3043 unlock2(s);
3044 }
3045 else {
3046 unlock2(s);
3047 }
3048 }
3049 MENDFUNC(2,sign_extend_8_rr,(W4 d, R1 s))
3050
3051
3052 MIDFUNC(2,zero_extend_16_rr,(W4 d, R2 s))
3053 {
3054 int isrmw;
3055
3056 if (isconst(s)) {
3057 set_const(d,(uae_u32)(uae_u16)live.state[s].val);
3058 return;
3059 }
3060
3061 isrmw=(s==d);
3062 CLOBBER_ZE16;
3063 if (!isrmw) {
3064 s=readreg(s,2);
3065 d=writereg(d,4);
3066 }
3067 else { /* If we try to lock this twice, with different sizes, we
3068 are int trouble! */
3069 s=d=rmw(s,4,2);
3070 }
3071 raw_zero_extend_16_rr(d,s);
3072 if (!isrmw) {
3073 unlock2(d);
3074 unlock2(s);
3075 }
3076 else {
3077 unlock2(s);
3078 }
3079 }
3080 MENDFUNC(2,zero_extend_16_rr,(W4 d, R2 s))
3081
3082 MIDFUNC(2,zero_extend_8_rr,(W4 d, R1 s))
3083 {
3084 int isrmw;
3085 if (isconst(s)) {
3086 set_const(d,(uae_u32)(uae_u8)live.state[s].val);
3087 return;
3088 }
3089
3090 isrmw=(s==d);
3091 CLOBBER_ZE8;
3092 if (!isrmw) {
3093 s=readreg(s,1);
3094 d=writereg(d,4);
3095 }
3096 else { /* If we try to lock this twice, with different sizes, we
3097 are int trouble! */
3098 s=d=rmw(s,4,1);
3099 }
3100
3101 raw_zero_extend_8_rr(d,s);
3102
3103 if (!isrmw) {
3104 unlock2(d);
3105 unlock2(s);
3106 }
3107 else {
3108 unlock2(s);
3109 }
3110 }
3111 MENDFUNC(2,zero_extend_8_rr,(W4 d, R1 s))
3112
3113 MIDFUNC(2,mov_b_rr,(W1 d, R1 s))
3114 {
3115 if (d==s)
3116 return;
3117 if (isconst(s)) {
3118 COMPCALL(mov_b_ri)(d,(uae_u8)live.state[s].val);
3119 return;
3120 }
3121
3122 CLOBBER_MOV;
3123 s=readreg(s,1);
3124 d=writereg(d,1);
3125 raw_mov_b_rr(d,s);
3126 unlock2(d);
3127 unlock2(s);
3128 }
3129 MENDFUNC(2,mov_b_rr,(W1 d, R1 s))
3130
3131 MIDFUNC(2,mov_w_rr,(W2 d, R2 s))
3132 {
3133 if (d==s)
3134 return;
3135 if (isconst(s)) {
3136 COMPCALL(mov_w_ri)(d,(uae_u16)live.state[s].val);
3137 return;
3138 }
3139
3140 CLOBBER_MOV;
3141 s=readreg(s,2);
3142 d=writereg(d,2);
3143 raw_mov_w_rr(d,s);
3144 unlock2(d);
3145 unlock2(s);
3146 }
3147 MENDFUNC(2,mov_w_rr,(W2 d, R2 s))
3148
3149
3150 MIDFUNC(4,mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
3151 {
3152 CLOBBER_MOV;
3153 baser=readreg(baser,4);
3154 index=readreg(index,4);
3155 d=writereg(d,4);
3156
3157 raw_mov_l_rrm_indexed(d,baser,index,factor);
3158 unlock2(d);
3159 unlock2(baser);
3160 unlock2(index);
3161 }
3162 MENDFUNC(4,mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
3163
3164 MIDFUNC(4,mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
3165 {
3166 CLOBBER_MOV;
3167 baser=readreg(baser,4);
3168 index=readreg(index,4);
3169 d=writereg(d,2);
3170
3171 raw_mov_w_rrm_indexed(d,baser,index,factor);
3172 unlock2(d);
3173 unlock2(baser);
3174 unlock2(index);
3175 }
3176 MENDFUNC(4,mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
3177
3178 MIDFUNC(4,mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
3179 {
3180 CLOBBER_MOV;
3181 baser=readreg(baser,4);
3182 index=readreg(index,4);
3183 d=writereg(d,1);
3184
3185 raw_mov_b_rrm_indexed(d,baser,index,factor);
3186
3187 unlock2(d);
3188 unlock2(baser);
3189 unlock2(index);
3190 }
3191 MENDFUNC(4,mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
3192
3193
3194 MIDFUNC(4,mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
3195 {
3196 CLOBBER_MOV;
3197 baser=readreg(baser,4);
3198 index=readreg(index,4);
3199 s=readreg(s,4);
3200
3201 Dif (baser==s || index==s)
3202 abort();
3203
3204
3205 raw_mov_l_mrr_indexed(baser,index,factor,s);
3206 unlock2(s);
3207 unlock2(baser);
3208 unlock2(index);
3209 }
3210 MENDFUNC(4,mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
3211
3212 MIDFUNC(4,mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
3213 {
3214 CLOBBER_MOV;
3215 baser=readreg(baser,4);
3216 index=readreg(index,4);
3217 s=readreg(s,2);
3218
3219 raw_mov_w_mrr_indexed(baser,index,factor,s);
3220 unlock2(s);
3221 unlock2(baser);
3222 unlock2(index);
3223 }
3224 MENDFUNC(4,mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
3225
3226 MIDFUNC(4,mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
3227 {
3228 CLOBBER_MOV;
3229 s=readreg(s,1);
3230 baser=readreg(baser,4);
3231 index=readreg(index,4);
3232
3233 raw_mov_b_mrr_indexed(baser,index,factor,s);
3234 unlock2(s);
3235 unlock2(baser);
3236 unlock2(index);
3237 }
3238 MENDFUNC(4,mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
3239
3240
3241 MIDFUNC(5,mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
3242 {
3243 int basereg=baser;
3244 int indexreg=index;
3245
3246 CLOBBER_MOV;
3247 s=readreg(s,4);
3248 baser=readreg_offset(baser,4);
3249 index=readreg_offset(index,4);
3250
3251 base+=get_offset(basereg);
3252 base+=factor*get_offset(indexreg);
3253
3254 raw_mov_l_bmrr_indexed(base,baser,index,factor,s);
3255 unlock2(s);
3256 unlock2(baser);
3257 unlock2(index);
3258 }
3259 MENDFUNC(5,mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
3260
3261 MIDFUNC(5,mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
3262 {
3263 int basereg=baser;
3264 int indexreg=index;
3265
3266 CLOBBER_MOV;
3267 s=readreg(s,2);
3268 baser=readreg_offset(baser,4);
3269 index=readreg_offset(index,4);
3270
3271 base+=get_offset(basereg);
3272 base+=factor*get_offset(indexreg);
3273
3274 raw_mov_w_bmrr_indexed(base,baser,index,factor,s);
3275 unlock2(s);
3276 unlock2(baser);
3277 unlock2(index);
3278 }
3279 MENDFUNC(5,mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
3280
3281 MIDFUNC(5,mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
3282 {
3283 int basereg=baser;
3284 int indexreg=index;
3285
3286 CLOBBER_MOV;
3287 s=readreg(s,1);
3288 baser=readreg_offset(baser,4);
3289 index=readreg_offset(index,4);
3290
3291 base+=get_offset(basereg);
3292 base+=factor*get_offset(indexreg);
3293
3294 raw_mov_b_bmrr_indexed(base,baser,index,factor,s);
3295 unlock2(s);
3296 unlock2(baser);
3297 unlock2(index);
3298 }
3299 MENDFUNC(5,mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
3300
3301
3302
3303 /* Read a long from base+baser+factor*index */
3304 MIDFUNC(5,mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
3305 {
3306 int basereg=baser;
3307 int indexreg=index;
3308
3309 CLOBBER_MOV;
3310 baser=readreg_offset(baser,4);
3311 index=readreg_offset(index,4);
3312 base+=get_offset(basereg);
3313 base+=factor*get_offset(indexreg);
3314 d=writereg(d,4);
3315 raw_mov_l_brrm_indexed(d,base,baser,index,factor);
3316 unlock2(d);
3317 unlock2(baser);
3318 unlock2(index);
3319 }
3320 MENDFUNC(5,mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
3321
3322
3323 MIDFUNC(5,mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
3324 {
3325 int basereg=baser;
3326 int indexreg=index;
3327
3328 CLOBBER_MOV;
3329 remove_offset(d,-1);
3330 baser=readreg_offset(baser,4);
3331 index=readreg_offset(index,4);
3332 base+=get_offset(basereg);
3333 base+=factor*get_offset(indexreg);
3334 d=writereg(d,2);
3335 raw_mov_w_brrm_indexed(d,base,baser,index,factor);
3336 unlock2(d);
3337 unlock2(baser);
3338 unlock2(index);
3339 }
3340 MENDFUNC(5,mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
3341
3342
3343 MIDFUNC(5,mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
3344 {
3345 int basereg=baser;
3346 int indexreg=index;
3347
3348 CLOBBER_MOV;
3349 remove_offset(d,-1);
3350 baser=readreg_offset(baser,4);
3351 index=readreg_offset(index,4);
3352 base+=get_offset(basereg);
3353 base+=factor*get_offset(indexreg);
3354 d=writereg(d,1);
3355 raw_mov_b_brrm_indexed(d,base,baser,index,factor);
3356 unlock2(d);
3357 unlock2(baser);
3358 unlock2(index);
3359 }
3360 MENDFUNC(5,mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
3361
3362 /* Read a long from base+factor*index */
3363 MIDFUNC(4,mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
3364 {
3365 int indexreg=index;
3366
3367 if (isconst(index)) {
3368 COMPCALL(mov_l_rm)(d,base+factor*live.state[index].val);
3369 return;
3370 }
3371
3372 CLOBBER_MOV;
3373 index=readreg_offset(index,4);
3374 base+=get_offset(indexreg)*factor;
3375 d=writereg(d,4);
3376
3377 raw_mov_l_rm_indexed(d,base,index,factor);
3378 unlock2(index);
3379 unlock2(d);
3380 }
3381 MENDFUNC(4,mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
3382
3383
3384 /* read the long at the address contained in s+offset and store in d */
3385 MIDFUNC(3,mov_l_rR,(W4 d, R4 s, IMM offset))
3386 {
3387 if (isconst(s)) {
3388 COMPCALL(mov_l_rm)(d,live.state[s].val+offset);
3389 return;
3390 }
3391 CLOBBER_MOV;
3392 s=readreg(s,4);
3393 d=writereg(d,4);
3394
3395 raw_mov_l_rR(d,s,offset);
3396 unlock2(d);
3397 unlock2(s);
3398 }
3399 MENDFUNC(3,mov_l_rR,(W4 d, R4 s, IMM offset))
3400
3401 /* read the word at the address contained in s+offset and store in d */
3402 MIDFUNC(3,mov_w_rR,(W2 d, R4 s, IMM offset))
3403 {
3404 if (isconst(s)) {
3405 COMPCALL(mov_w_rm)(d,live.state[s].val+offset);
3406 return;
3407 }
3408 CLOBBER_MOV;
3409 s=readreg(s,4);
3410 d=writereg(d,2);
3411
3412 raw_mov_w_rR(d,s,offset);
3413 unlock2(d);
3414 unlock2(s);
3415 }
3416 MENDFUNC(3,mov_w_rR,(W2 d, R4 s, IMM offset))
3417
3418 /* read the word at the address contained in s+offset and store in d */
3419 MIDFUNC(3,mov_b_rR,(W1 d, R4 s, IMM offset))
3420 {
3421 if (isconst(s)) {
3422 COMPCALL(mov_b_rm)(d,live.state[s].val+offset);
3423 return;
3424 }
3425 CLOBBER_MOV;
3426 s=readreg(s,4);
3427 d=writereg(d,1);
3428
3429 raw_mov_b_rR(d,s,offset);
3430 unlock2(d);
3431 unlock2(s);
3432 }
3433 MENDFUNC(3,mov_b_rR,(W1 d, R4 s, IMM offset))
3434
3435 /* read the long at the address contained in s+offset and store in d */
3436 MIDFUNC(3,mov_l_brR,(W4 d, R4 s, IMM offset))
3437 {
3438 int sreg=s;
3439 if (isconst(s)) {
3440 COMPCALL(mov_l_rm)(d,live.state[s].val+offset);
3441 return;
3442 }
3443 CLOBBER_MOV;
3444 s=readreg_offset(s,4);
3445 offset+=get_offset(sreg);
3446 d=writereg(d,4);
3447
3448 raw_mov_l_brR(d,s,offset);
3449 unlock2(d);
3450 unlock2(s);
3451 }
3452 MENDFUNC(3,mov_l_brR,(W4 d, R4 s, IMM offset))
3453
3454 /* read the word at the address contained in s+offset and store in d */
3455 MIDFUNC(3,mov_w_brR,(W2 d, R4 s, IMM offset))
3456 {
3457 int sreg=s;
3458 if (isconst(s)) {
3459 COMPCALL(mov_w_rm)(d,live.state[s].val+offset);
3460 return;
3461 }
3462 CLOBBER_MOV;
3463 remove_offset(d,-1);
3464 s=readreg_offset(s,4);
3465 offset+=get_offset(sreg);
3466 d=writereg(d,2);
3467
3468 raw_mov_w_brR(d,s,offset);
3469 unlock2(d);
3470 unlock2(s);
3471 }
3472 MENDFUNC(3,mov_w_brR,(W2 d, R4 s, IMM offset))
3473
3474 /* read the word at the address contained in s+offset and store in d */
3475 MIDFUNC(3,mov_b_brR,(W1 d, R4 s, IMM offset))
3476 {
3477 int sreg=s;
3478 if (isconst(s)) {
3479 COMPCALL(mov_b_rm)(d,live.state[s].val+offset);
3480 return;
3481 }
3482 CLOBBER_MOV;
3483 remove_offset(d,-1);
3484 s=readreg_offset(s,4);
3485 offset+=get_offset(sreg);
3486 d=writereg(d,1);
3487
3488 raw_mov_b_brR(d,s,offset);
3489 unlock2(d);
3490 unlock2(s);
3491 }
3492 MENDFUNC(3,mov_b_brR,(W1 d, R4 s, IMM offset))
3493
3494 MIDFUNC(3,mov_l_Ri,(R4 d, IMM i, IMM offset))
3495 {
3496 int dreg=d;
3497 if (isconst(d)) {
3498 COMPCALL(mov_l_mi)(live.state[d].val+offset,i);
3499 return;
3500 }
3501
3502 CLOBBER_MOV;
3503 d=readreg_offset(d,4);
3504 offset+=get_offset(dreg);
3505 raw_mov_l_Ri(d,i,offset);
3506 unlock2(d);
3507 }
3508 MENDFUNC(3,mov_l_Ri,(R4 d, IMM i, IMM offset))
3509
3510 MIDFUNC(3,mov_w_Ri,(R4 d, IMM i, IMM offset))
3511 {
3512 int dreg=d;
3513 if (isconst(d)) {
3514 COMPCALL(mov_w_mi)(live.state[d].val+offset,i);
3515 return;
3516 }
3517
3518 CLOBBER_MOV;
3519 d=readreg_offset(d,4);
3520 offset+=get_offset(dreg);
3521 raw_mov_w_Ri(d,i,offset);
3522 unlock2(d);
3523 }
3524 MENDFUNC(3,mov_w_Ri,(R4 d, IMM i, IMM offset))
3525
3526 MIDFUNC(3,mov_b_Ri,(R4 d, IMM i, IMM offset))
3527 {
3528 int dreg=d;
3529 if (isconst(d)) {
3530 COMPCALL(mov_b_mi)(live.state[d].val+offset,i);
3531 return;
3532 }
3533
3534 CLOBBER_MOV;
3535 d=readreg_offset(d,4);
3536 offset+=get_offset(dreg);
3537 raw_mov_b_Ri(d,i,offset);
3538 unlock2(d);
3539 }
3540 MENDFUNC(3,mov_b_Ri,(R4 d, IMM i, IMM offset))
3541
3542 /* Warning! OFFSET is byte sized only! */
3543 MIDFUNC(3,mov_l_Rr,(R4 d, R4 s, IMM offset))
3544 {
3545 if (isconst(d)) {
3546 COMPCALL(mov_l_mr)(live.state[d].val+offset,s);
3547 return;
3548 }
3549 if (isconst(s)) {
3550 COMPCALL(mov_l_Ri)(d,live.state[s].val,offset);
3551 return;
3552 }
3553
3554 CLOBBER_MOV;
3555 s=readreg(s,4);
3556 d=readreg(d,4);
3557
3558 raw_mov_l_Rr(d,s,offset);
3559 unlock2(d);
3560 unlock2(s);
3561 }
3562 MENDFUNC(3,mov_l_Rr,(R4 d, R4 s, IMM offset))
3563
3564 MIDFUNC(3,mov_w_Rr,(R4 d, R2 s, IMM offset))
3565 {
3566 if (isconst(d)) {
3567 COMPCALL(mov_w_mr)(live.state[d].val+offset,s);
3568 return;
3569 }
3570 if (isconst(s)) {
3571 COMPCALL(mov_w_Ri)(d,(uae_u16)live.state[s].val,offset);
3572 return;
3573 }
3574
3575 CLOBBER_MOV;
3576 s=readreg(s,2);
3577 d=readreg(d,4);
3578 raw_mov_w_Rr(d,s,offset);
3579 unlock2(d);
3580 unlock2(s);
3581 }
3582 MENDFUNC(3,mov_w_Rr,(R4 d, R2 s, IMM offset))
3583
3584 MIDFUNC(3,mov_b_Rr,(R4 d, R1 s, IMM offset))
3585 {
3586 if (isconst(d)) {
3587 COMPCALL(mov_b_mr)(live.state[d].val+offset,s);
3588 return;
3589 }
3590 if (isconst(s)) {
3591 COMPCALL(mov_b_Ri)(d,(uae_u8)live.state[s].val,offset);
3592 return;
3593 }
3594
3595 CLOBBER_MOV;
3596 s=readreg(s,1);
3597 d=readreg(d,4);
3598 raw_mov_b_Rr(d,s,offset);
3599 unlock2(d);
3600 unlock2(s);
3601 }
3602 MENDFUNC(3,mov_b_Rr,(R4 d, R1 s, IMM offset))
3603
3604 MIDFUNC(3,lea_l_brr,(W4 d, R4 s, IMM offset))
3605 {
3606 if (isconst(s)) {
3607 COMPCALL(mov_l_ri)(d,live.state[s].val+offset);
3608 return;
3609 }
3610 #if USE_OFFSET
3611 if (d==s) {
3612 add_offset(d,offset);
3613 return;
3614 }
3615 #endif
3616 CLOBBER_LEA;
3617 s=readreg(s,4);
3618 d=writereg(d,4);
3619 raw_lea_l_brr(d,s,offset);
3620 unlock2(d);
3621 unlock2(s);
3622 }
3623 MENDFUNC(3,lea_l_brr,(W4 d, R4 s, IMM offset))
3624
3625 MIDFUNC(5,lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
3626 {
3627 if (!offset) {
3628 COMPCALL(lea_l_rr_indexed)(d,s,index,factor);
3629 return;
3630 }
3631 CLOBBER_LEA;
3632 s=readreg(s,4);
3633 index=readreg(index,4);
3634 d=writereg(d,4);
3635
3636 raw_lea_l_brr_indexed(d,s,index,factor,offset);
3637 unlock2(d);
3638 unlock2(index);
3639 unlock2(s);
3640 }
3641 MENDFUNC(5,lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
3642
3643 MIDFUNC(4,lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
3644 {
3645 CLOBBER_LEA;
3646 s=readreg(s,4);
3647 index=readreg(index,4);
3648 d=writereg(d,4);
3649
3650 raw_lea_l_rr_indexed(d,s,index,factor);
3651 unlock2(d);
3652 unlock2(index);
3653 unlock2(s);
3654 }
3655 MENDFUNC(4,lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
3656
3657 /* write d to the long at the address contained in s+offset */
3658 MIDFUNC(3,mov_l_bRr,(R4 d, R4 s, IMM offset))
3659 {
3660 int dreg=d;
3661 if (isconst(d)) {
3662 COMPCALL(mov_l_mr)(live.state[d].val+offset,s);
3663 return;
3664 }
3665
3666 CLOBBER_MOV;
3667 s=readreg(s,4);
3668 d=readreg_offset(d,4);
3669 offset+=get_offset(dreg);
3670
3671 raw_mov_l_bRr(d,s,offset);
3672 unlock2(d);
3673 unlock2(s);
3674 }
3675 MENDFUNC(3,mov_l_bRr,(R4 d, R4 s, IMM offset))
3676
3677 /* write the word at the address contained in s+offset and store in d */
3678 MIDFUNC(3,mov_w_bRr,(R4 d, R2 s, IMM offset))
3679 {
3680 int dreg=d;
3681
3682 if (isconst(d)) {
3683 COMPCALL(mov_w_mr)(live.state[d].val+offset,s);
3684 return;
3685 }
3686
3687 CLOBBER_MOV;
3688 s=readreg(s,2);
3689 d=readreg_offset(d,4);
3690 offset+=get_offset(dreg);
3691 raw_mov_w_bRr(d,s,offset);
3692 unlock2(d);
3693 unlock2(s);
3694 }
3695 MENDFUNC(3,mov_w_bRr,(R4 d, R2 s, IMM offset))
3696
3697 MIDFUNC(3,mov_b_bRr,(R4 d, R1 s, IMM offset))
3698 {
3699 int dreg=d;
3700 if (isconst(d)) {
3701 COMPCALL(mov_b_mr)(live.state[d].val+offset,s);
3702 return;
3703 }
3704
3705 CLOBBER_MOV;
3706 s=readreg(s,1);
3707 d=readreg_offset(d,4);
3708 offset+=get_offset(dreg);
3709 raw_mov_b_bRr(d,s,offset);
3710 unlock2(d);
3711 unlock2(s);
3712 }
3713 MENDFUNC(3,mov_b_bRr,(R4 d, R1 s, IMM offset))
3714
3715 MIDFUNC(1,bswap_32,(RW4 r))
3716 {
3717 int reg=r;
3718
3719 if (isconst(r)) {
3720 uae_u32 oldv=live.state[r].val;
3721 live.state[r].val=reverse32(oldv);
3722 return;
3723 }
3724
3725 CLOBBER_SW32;
3726 r=rmw(r,4,4);
3727 raw_bswap_32(r);
3728 unlock2(r);
3729 }
3730 MENDFUNC(1,bswap_32,(RW4 r))
3731
3732 MIDFUNC(1,bswap_16,(RW2 r))
3733 {
3734 if (isconst(r)) {
3735 uae_u32 oldv=live.state[r].val;
3736 live.state[r].val=((oldv>>8)&0xff) | ((oldv<<8)&0xff00) |
3737 (oldv&0xffff0000);
3738 return;
3739 }
3740
3741 CLOBBER_SW16;
3742 r=rmw(r,2,2);
3743
3744 raw_bswap_16(r);
3745 unlock2(r);
3746 }
3747 MENDFUNC(1,bswap_16,(RW2 r))
3748
3749
3750
3751 MIDFUNC(2,mov_l_rr,(W4 d, R4 s))
3752 {
3753 int olds;
3754
3755 if (d==s) { /* How pointless! */
3756 return;
3757 }
3758 if (isconst(s)) {
3759 COMPCALL(mov_l_ri)(d,live.state[s].val);
3760 return;
3761 }
3762 olds=s;
3763 disassociate(d);
3764 s=readreg_offset(s,4);
3765 live.state[d].realreg=s;
3766 live.state[d].realind=live.nat[s].nholds;
3767 live.state[d].val=live.state[olds].val;
3768 live.state[d].validsize=4;
3769 live.state[d].dirtysize=4;
3770 set_status(d,DIRTY);
3771
3772 live.nat[s].holds[live.nat[s].nholds]=d;
3773 live.nat[s].nholds++;
3774 log_clobberreg(d);
3775 /* write_log("Added %d to nreg %d(%d), now holds %d regs\n",
3776 d,s,live.state[d].realind,live.nat[s].nholds); */
3777 unlock2(s);
3778 }
3779 MENDFUNC(2,mov_l_rr,(W4 d, R4 s))
3780
3781 MIDFUNC(2,mov_l_mr,(IMM d, R4 s))
3782 {
3783 if (isconst(s)) {
3784 COMPCALL(mov_l_mi)(d,live.state[s].val);
3785 return;
3786 }
3787 CLOBBER_MOV;
3788 s=readreg(s,4);
3789
3790 raw_mov_l_mr(d,s);
3791 unlock2(s);
3792 }
3793 MENDFUNC(2,mov_l_mr,(IMM d, R4 s))
3794
3795
3796 MIDFUNC(2,mov_w_mr,(IMM d, R2 s))
3797 {
3798 if (isconst(s)) {
3799 COMPCALL(mov_w_mi)(d,(uae_u16)live.state[s].val);
3800 return;
3801 }
3802 CLOBBER_MOV;
3803 s=readreg(s,2);
3804
3805 raw_mov_w_mr(d,s);
3806 unlock2(s);
3807 }
3808 MENDFUNC(2,mov_w_mr,(IMM d, R2 s))
3809
3810 MIDFUNC(2,mov_w_rm,(W2 d, IMM s))
3811 {
3812 CLOBBER_MOV;
3813 d=writereg(d,2);
3814
3815 raw_mov_w_rm(d,s);
3816 unlock2(d);
3817 }
3818 MENDFUNC(2,mov_w_rm,(W2 d, IMM s))
3819
3820 MIDFUNC(2,mov_b_mr,(IMM d, R1 s))
3821 {
3822 if (isconst(s)) {
3823 COMPCALL(mov_b_mi)(d,(uae_u8)live.state[s].val);
3824 return;
3825 }
3826
3827 CLOBBER_MOV;
3828 s=readreg(s,1);
3829
3830 raw_mov_b_mr(d,s);
3831 unlock2(s);
3832 }
3833 MENDFUNC(2,mov_b_mr,(IMM d, R1 s))
3834
3835 MIDFUNC(2,mov_b_rm,(W1 d, IMM s))
3836 {
3837 CLOBBER_MOV;
3838 d=writereg(d,1);
3839
3840 raw_mov_b_rm(d,s);
3841 unlock2(d);
3842 }
3843 MENDFUNC(2,mov_b_rm,(W1 d, IMM s))
3844
3845 MIDFUNC(2,mov_l_ri,(W4 d, IMM s))
3846 {
3847 set_const(d,s);
3848 return;
3849 }
3850 MENDFUNC(2,mov_l_ri,(W4 d, IMM s))
3851
3852 MIDFUNC(2,mov_w_ri,(W2 d, IMM s))
3853 {
3854 CLOBBER_MOV;
3855 d=writereg(d,2);
3856
3857 raw_mov_w_ri(d,s);
3858 unlock2(d);
3859 }
3860 MENDFUNC(2,mov_w_ri,(W2 d, IMM s))
3861
3862 MIDFUNC(2,mov_b_ri,(W1 d, IMM s))
3863 {
3864 CLOBBER_MOV;
3865 d=writereg(d,1);
3866
3867 raw_mov_b_ri(d,s);
3868 unlock2(d);
3869 }
3870 MENDFUNC(2,mov_b_ri,(W1 d, IMM s))
3871
3872
3873 MIDFUNC(2,add_l_mi,(IMM d, IMM s))
3874 {
3875 CLOBBER_ADD;
3876 raw_add_l_mi(d,s) ;
3877 }
3878 MENDFUNC(2,add_l_mi,(IMM d, IMM s))
3879
3880 MIDFUNC(2,add_w_mi,(IMM d, IMM s))
3881 {
3882 CLOBBER_ADD;
3883 raw_add_w_mi(d,s) ;
3884 }
3885 MENDFUNC(2,add_w_mi,(IMM d, IMM s))
3886
3887 MIDFUNC(2,add_b_mi,(IMM d, IMM s))
3888 {
3889 CLOBBER_ADD;
3890 raw_add_b_mi(d,s) ;
3891 }
3892 MENDFUNC(2,add_b_mi,(IMM d, IMM s))
3893
3894
3895 MIDFUNC(2,test_l_ri,(R4 d, IMM i))
3896 {
3897 CLOBBER_TEST;
3898 d=readreg(d,4);
3899
3900 raw_test_l_ri(d,i);
3901 unlock2(d);
3902 }
3903 MENDFUNC(2,test_l_ri,(R4 d, IMM i))
3904
3905 MIDFUNC(2,test_l_rr,(R4 d, R4 s))
3906 {
3907 CLOBBER_TEST;
3908 d=readreg(d,4);
3909 s=readreg(s,4);
3910
3911 raw_test_l_rr(d,s);;
3912 unlock2(d);
3913 unlock2(s);
3914 }
3915 MENDFUNC(2,test_l_rr,(R4 d, R4 s))
3916
3917 MIDFUNC(2,test_w_rr,(R2 d, R2 s))
3918 {
3919 CLOBBER_TEST;
3920 d=readreg(d,2);
3921 s=readreg(s,2);
3922
3923 raw_test_w_rr(d,s);
3924 unlock2(d);
3925 unlock2(s);
3926 }
3927 MENDFUNC(2,test_w_rr,(R2 d, R2 s))
3928
3929 MIDFUNC(2,test_b_rr,(R1 d, R1 s))
3930 {
3931 CLOBBER_TEST;
3932 d=readreg(d,1);
3933 s=readreg(s,1);
3934
3935 raw_test_b_rr(d,s);
3936 unlock2(d);
3937 unlock2(s);
3938 }
3939 MENDFUNC(2,test_b_rr,(R1 d, R1 s))
3940
3941
3942 MIDFUNC(2,and_l_ri,(RW4 d, IMM i))
3943 {
3944 if (isconst(d) && !needflags) {
3945 live.state[d].val &= i;
3946 return;
3947 }
3948
3949 CLOBBER_AND;
3950 d=rmw(d,4,4);
3951
3952 raw_and_l_ri(d,i);
3953 unlock2(d);
3954 }
3955 MENDFUNC(2,and_l_ri,(RW4 d, IMM i))
3956
3957 MIDFUNC(2,and_l,(RW4 d, R4 s))
3958 {
3959 CLOBBER_AND;
3960 s=readreg(s,4);
3961 d=rmw(d,4,4);
3962
3963 raw_and_l(d,s);
3964 unlock2(d);
3965 unlock2(s);
3966 }
3967 MENDFUNC(2,and_l,(RW4 d, R4 s))
3968
3969 MIDFUNC(2,and_w,(RW2 d, R2 s))
3970 {
3971 CLOBBER_AND;
3972 s=readreg(s,2);
3973 d=rmw(d,2,2);
3974
3975 raw_and_w(d,s);
3976 unlock2(d);
3977 unlock2(s);
3978 }
3979 MENDFUNC(2,and_w,(RW2 d, R2 s))
3980
3981 MIDFUNC(2,and_b,(RW1 d, R1 s))
3982 {
3983 CLOBBER_AND;
3984 s=readreg(s,1);
3985 d=rmw(d,1,1);
3986
3987 raw_and_b(d,s);
3988 unlock2(d);
3989 unlock2(s);
3990 }
3991 MENDFUNC(2,and_b,(RW1 d, R1 s))
3992
3993 // gb-- used for making an fpcr value in compemu_fpp.cpp
3994 MIDFUNC(2,or_l_rm,(RW4 d, IMM s))
3995 {
3996 CLOBBER_OR;
3997 d=rmw(d,4,4);
3998
3999 raw_or_l_rm(d,s);
4000 unlock2(d);
4001 }
4002 MENDFUNC(2,or_l_rm,(RW4 d, IMM s))
4003
4004 MIDFUNC(2,or_l_ri,(RW4 d, IMM i))
4005 {
4006 if (isconst(d) && !needflags) {
4007 live.state[d].val|=i;
4008 return;
4009 }
4010 CLOBBER_OR;
4011 d=rmw(d,4,4);
4012
4013 raw_or_l_ri(d,i);
4014 unlock2(d);
4015 }
4016 MENDFUNC(2,or_l_ri,(RW4 d, IMM i))
4017
4018 MIDFUNC(2,or_l,(RW4 d, R4 s))
4019 {
4020 if (isconst(d) && isconst(s) && !needflags) {
4021 live.state[d].val|=live.state[s].val;
4022 return;
4023 }
4024 CLOBBER_OR;
4025 s=readreg(s,4);
4026 d=rmw(d,4,4);
4027
4028 raw_or_l(d,s);
4029 unlock2(d);
4030 unlock2(s);
4031 }
4032 MENDFUNC(2,or_l,(RW4 d, R4 s))
4033
4034 MIDFUNC(2,or_w,(RW2 d, R2 s))
4035 {
4036 CLOBBER_OR;
4037 s=readreg(s,2);
4038 d=rmw(d,2,2);
4039
4040 raw_or_w(d,s);
4041 unlock2(d);
4042 unlock2(s);
4043 }
4044 MENDFUNC(2,or_w,(RW2 d, R2 s))
4045
4046 MIDFUNC(2,or_b,(RW1 d, R1 s))
4047 {
4048 CLOBBER_OR;
4049 s=readreg(s,1);
4050 d=rmw(d,1,1);
4051
4052 raw_or_b(d,s);
4053 unlock2(d);
4054 unlock2(s);
4055 }
4056 MENDFUNC(2,or_b,(RW1 d, R1 s))
4057
4058 MIDFUNC(2,adc_l,(RW4 d, R4 s))
4059 {
4060 CLOBBER_ADC;
4061 s=readreg(s,4);
4062 d=rmw(d,4,4);
4063
4064 raw_adc_l(d,s);
4065
4066 unlock2(d);
4067 unlock2(s);
4068 }
4069 MENDFUNC(2,adc_l,(RW4 d, R4 s))
4070
4071 MIDFUNC(2,adc_w,(RW2 d, R2 s))
4072 {
4073 CLOBBER_ADC;
4074 s=readreg(s,2);
4075 d=rmw(d,2,2);
4076
4077 raw_adc_w(d,s);
4078 unlock2(d);
4079 unlock2(s);
4080 }
4081 MENDFUNC(2,adc_w,(RW2 d, R2 s))
4082
4083 MIDFUNC(2,adc_b,(RW1 d, R1 s))
4084 {
4085 CLOBBER_ADC;
4086 s=readreg(s,1);
4087 d=rmw(d,1,1);
4088
4089 raw_adc_b(d,s);
4090 unlock2(d);
4091 unlock2(s);
4092 }
4093 MENDFUNC(2,adc_b,(RW1 d, R1 s))
4094
4095 MIDFUNC(2,add_l,(RW4 d, R4 s))
4096 {
4097 if (isconst(s)) {
4098 COMPCALL(add_l_ri)(d,live.state[s].val);
4099 return;
4100 }
4101
4102 CLOBBER_ADD;
4103 s=readreg(s,4);
4104 d=rmw(d,4,4);
4105
4106 raw_add_l(d,s);
4107
4108 unlock2(d);
4109 unlock2(s);
4110 }
4111 MENDFUNC(2,add_l,(RW4 d, R4 s))
4112
4113 MIDFUNC(2,add_w,(RW2 d, R2 s))
4114 {
4115 if (isconst(s)) {
4116 COMPCALL(add_w_ri)(d,(uae_u16)live.state[s].val);
4117 return;
4118 }
4119
4120 CLOBBER_ADD;
4121 s=readreg(s,2);
4122 d=rmw(d,2,2);
4123
4124 raw_add_w(d,s);
4125 unlock2(d);
4126 unlock2(s);
4127 }
4128 MENDFUNC(2,add_w,(RW2 d, R2 s))
4129
4130 MIDFUNC(2,add_b,(RW1 d, R1 s))
4131 {
4132 if (isconst(s)) {
4133 COMPCALL(add_b_ri)(d,(uae_u8)live.state[s].val);
4134 return;
4135 }
4136
4137 CLOBBER_ADD;
4138 s=readreg(s,1);
4139 d=rmw(d,1,1);
4140
4141 raw_add_b(d,s);
4142 unlock2(d);
4143 unlock2(s);
4144 }
4145 MENDFUNC(2,add_b,(RW1 d, R1 s))
4146
4147 MIDFUNC(2,sub_l_ri,(RW4 d, IMM i))
4148 {
4149 if (!i && !needflags)
4150 return;
4151 if (isconst(d) && !needflags) {
4152 live.state[d].val-=i;
4153 return;
4154 }
4155 #if USE_OFFSET
4156 if (!needflags) {
4157 add_offset(d,-i);
4158 return;
4159 }
4160 #endif
4161
4162 CLOBBER_SUB;
4163 d=rmw(d,4,4);
4164
4165 raw_sub_l_ri(d,i);
4166 unlock2(d);
4167 }
4168 MENDFUNC(2,sub_l_ri,(RW4 d, IMM i))
4169
4170 MIDFUNC(2,sub_w_ri,(RW2 d, IMM i))
4171 {
4172 if (!i && !needflags)
4173 return;
4174
4175 CLOBBER_SUB;
4176 d=rmw(d,2,2);
4177
4178 raw_sub_w_ri(d,i);
4179 unlock2(d);
4180 }
4181 MENDFUNC(2,sub_w_ri,(RW2 d, IMM i))
4182
4183 MIDFUNC(2,sub_b_ri,(RW1 d, IMM i))
4184 {
4185 if (!i && !needflags)
4186 return;
4187
4188 CLOBBER_SUB;
4189 d=rmw(d,1,1);
4190
4191 raw_sub_b_ri(d,i);
4192
4193 unlock2(d);
4194 }
4195 MENDFUNC(2,sub_b_ri,(RW1 d, IMM i))
4196
4197 MIDFUNC(2,add_l_ri,(RW4 d, IMM i))
4198 {
4199 if (!i && !needflags)
4200 return;
4201 if (isconst(d) && !needflags) {
4202 live.state[d].val+=i;
4203 return;
4204 }
4205 #if USE_OFFSET
4206 if (!needflags) {
4207 add_offset(d,i);
4208 return;
4209 }
4210 #endif
4211 CLOBBER_ADD;
4212 d=rmw(d,4,4);
4213 raw_add_l_ri(d,i);
4214 unlock2(d);
4215 }
4216 MENDFUNC(2,add_l_ri,(RW4 d, IMM i))
4217
4218 MIDFUNC(2,add_w_ri,(RW2 d, IMM i))
4219 {
4220 if (!i && !needflags)
4221 return;
4222
4223 CLOBBER_ADD;
4224 d=rmw(d,2,2);
4225
4226 raw_add_w_ri(d,i);
4227 unlock2(d);
4228 }
4229 MENDFUNC(2,add_w_ri,(RW2 d, IMM i))
4230
4231 MIDFUNC(2,add_b_ri,(RW1 d, IMM i))
4232 {
4233 if (!i && !needflags)
4234 return;
4235
4236 CLOBBER_ADD;
4237 d=rmw(d,1,1);
4238
4239 raw_add_b_ri(d,i);
4240
4241 unlock2(d);
4242 }
4243 MENDFUNC(2,add_b_ri,(RW1 d, IMM i))
4244
4245 MIDFUNC(2,sbb_l,(RW4 d, R4 s))
4246 {
4247 CLOBBER_SBB;
4248 s=readreg(s,4);
4249 d=rmw(d,4,4);
4250
4251 raw_sbb_l(d,s);
4252 unlock2(d);
4253 unlock2(s);
4254 }
4255 MENDFUNC(2,sbb_l,(RW4 d, R4 s))
4256
4257 MIDFUNC(2,sbb_w,(RW2 d, R2 s))
4258 {
4259 CLOBBER_SBB;
4260 s=readreg(s,2);
4261 d=rmw(d,2,2);
4262
4263 raw_sbb_w(d,s);
4264 unlock2(d);
4265 unlock2(s);
4266 }
4267 MENDFUNC(2,sbb_w,(RW2 d, R2 s))
4268
4269 MIDFUNC(2,sbb_b,(RW1 d, R1 s))
4270 {
4271 CLOBBER_SBB;
4272 s=readreg(s,1);
4273 d=rmw(d,1,1);
4274
4275 raw_sbb_b(d,s);
4276 unlock2(d);
4277 unlock2(s);
4278 }
4279 MENDFUNC(2,sbb_b,(RW1 d, R1 s))
4280
4281 MIDFUNC(2,sub_l,(RW4 d, R4 s))
4282 {
4283 if (isconst(s)) {
4284 COMPCALL(sub_l_ri)(d,live.state[s].val);
4285 return;
4286 }
4287
4288 CLOBBER_SUB;
4289 s=readreg(s,4);
4290 d=rmw(d,4,4);
4291
4292 raw_sub_l(d,s);
4293 unlock2(d);
4294 unlock2(s);
4295 }
4296 MENDFUNC(2,sub_l,(RW4 d, R4 s))
4297
4298 MIDFUNC(2,sub_w,(RW2 d, R2 s))
4299 {
4300 if (isconst(s)) {
4301 COMPCALL(sub_w_ri)(d,(uae_u16)live.state[s].val);
4302 return;
4303 }
4304
4305 CLOBBER_SUB;
4306 s=readreg(s,2);
4307 d=rmw(d,2,2);
4308
4309 raw_sub_w(d,s);
4310 unlock2(d);
4311 unlock2(s);
4312 }
4313 MENDFUNC(2,sub_w,(RW2 d, R2 s))
4314
4315 MIDFUNC(2,sub_b,(RW1 d, R1 s))
4316 {
4317 if (isconst(s)) {
4318 COMPCALL(sub_b_ri)(d,(uae_u8)live.state[s].val);
4319 return;
4320 }
4321
4322 CLOBBER_SUB;
4323 s=readreg(s,1);
4324 d=rmw(d,1,1);
4325
4326 raw_sub_b(d,s);
4327 unlock2(d);
4328 unlock2(s);
4329 }
4330 MENDFUNC(2,sub_b,(RW1 d, R1 s))
4331
4332 MIDFUNC(2,cmp_l,(R4 d, R4 s))
4333 {
4334 CLOBBER_CMP;
4335 s=readreg(s,4);
4336 d=readreg(d,4);
4337
4338 raw_cmp_l(d,s);
4339 unlock2(d);
4340 unlock2(s);
4341 }
4342 MENDFUNC(2,cmp_l,(R4 d, R4 s))
4343
4344 MIDFUNC(2,cmp_l_ri,(R4 r, IMM i))
4345 {
4346 CLOBBER_CMP;
4347 r=readreg(r,4);
4348
4349 raw_cmp_l_ri(r,i);
4350 unlock2(r);
4351 }
4352 MENDFUNC(2,cmp_l_ri,(R4 r, IMM i))
4353
4354 MIDFUNC(2,cmp_w,(R2 d, R2 s))
4355 {
4356 CLOBBER_CMP;
4357 s=readreg(s,2);
4358 d=readreg(d,2);
4359
4360 raw_cmp_w(d,s);
4361 unlock2(d);
4362 unlock2(s);
4363 }
4364 MENDFUNC(2,cmp_w,(R2 d, R2 s))
4365
4366 MIDFUNC(2,cmp_b,(R1 d, R1 s))
4367 {
4368 CLOBBER_CMP;
4369 s=readreg(s,1);
4370 d=readreg(d,1);
4371
4372 raw_cmp_b(d,s);
4373 unlock2(d);
4374 unlock2(s);
4375 }
4376 MENDFUNC(2,cmp_b,(R1 d, R1 s))
4377
4378
4379 MIDFUNC(2,xor_l,(RW4 d, R4 s))
4380 {
4381 CLOBBER_XOR;
4382 s=readreg(s,4);
4383 d=rmw(d,4,4);
4384
4385 raw_xor_l(d,s);
4386 unlock2(d);
4387 unlock2(s);
4388 }
4389 MENDFUNC(2,xor_l,(RW4 d, R4 s))
4390
4391 MIDFUNC(2,xor_w,(RW2 d, R2 s))
4392 {
4393 CLOBBER_XOR;
4394 s=readreg(s,2);
4395 d=rmw(d,2,2);
4396
4397 raw_xor_w(d,s);
4398 unlock2(d);
4399 unlock2(s);
4400 }
4401 MENDFUNC(2,xor_w,(RW2 d, R2 s))
4402
4403 MIDFUNC(2,xor_b,(RW1 d, R1 s))
4404 {
4405 CLOBBER_XOR;
4406 s=readreg(s,1);
4407 d=rmw(d,1,1);
4408
4409 raw_xor_b(d,s);
4410 unlock2(d);
4411 unlock2(s);
4412 }
4413 MENDFUNC(2,xor_b,(RW1 d, R1 s))
4414
4415 MIDFUNC(5,call_r_11,(W4 out1, R4 r, R4 in1, IMM osize, IMM isize))
4416 {
4417 clobber_flags();
4418 remove_all_offsets();
4419 if (osize==4) {
4420 if (out1!=in1 && out1!=r) {
4421 COMPCALL(forget_about)(out1);
4422 }
4423 }
4424 else {
4425 tomem_c(out1);
4426 }
4427
4428 in1=readreg_specific(in1,isize,REG_PAR1);
4429 r=readreg(r,4);
4430 prepare_for_call_1(); /* This should ensure that there won't be
4431 any need for swapping nregs in prepare_for_call_2
4432 */
4433 #if USE_NORMAL_CALLING_CONVENTION
4434 raw_push_l_r(in1);
4435 #endif
4436 unlock2(in1);
4437 unlock2(r);
4438
4439 prepare_for_call_2();
4440 raw_call_r(r);
4441
4442 #if USE_NORMAL_CALLING_CONVENTION
4443 raw_inc_sp(4);
4444 #endif
4445
4446
4447 live.nat[REG_RESULT].holds[0]=out1;
4448 live.nat[REG_RESULT].nholds=1;
4449 live.nat[REG_RESULT].touched=touchcnt++;
4450
4451 live.state[out1].realreg=REG_RESULT;
4452 live.state[out1].realind=0;
4453 live.state[out1].val=0;
4454 live.state[out1].validsize=osize;
4455 live.state[out1].dirtysize=osize;
4456 set_status(out1,DIRTY);
4457 }
4458 MENDFUNC(5,call_r_11,(W4 out1, R4 r, R4 in1, IMM osize, IMM isize))
4459
4460 MIDFUNC(5,call_r_02,(R4 r, R4 in1, R4 in2, IMM isize1, IMM isize2))
4461 {
4462 clobber_flags();
4463 remove_all_offsets();
4464 in1=readreg_specific(in1,isize1,REG_PAR1);
4465 in2=readreg_specific(in2,isize2,REG_PAR2);
4466 r=readreg(r,4);
4467 prepare_for_call_1(); /* This should ensure that there won't be
4468 any need for swapping nregs in prepare_for_call_2
4469 */
4470 #if USE_NORMAL_CALLING_CONVENTION
4471 raw_push_l_r(in2);
4472 raw_push_l_r(in1);
4473 #endif
4474 unlock2(r);
4475 unlock2(in1);
4476 unlock2(in2);
4477 prepare_for_call_2();
4478 raw_call_r(r);
4479 #if USE_NORMAL_CALLING_CONVENTION
4480 raw_inc_sp(8);
4481 #endif
4482 }
4483 MENDFUNC(5,call_r_02,(R4 r, R4 in1, R4 in2, IMM isize1, IMM isize2))
4484
4485 /* forget_about() takes a mid-layer register */
4486 MIDFUNC(1,forget_about,(W4 r))
4487 {
4488 if (isinreg(r))
4489 disassociate(r);
4490 live.state[r].val=0;
4491 set_status(r,UNDEF);
4492 }
4493 MENDFUNC(1,forget_about,(W4 r))
4494
4495 MIDFUNC(0,nop,(void))
4496 {
4497 raw_nop();
4498 }
4499 MENDFUNC(0,nop,(void))
4500
4501
4502 MIDFUNC(1,f_forget_about,(FW r))
4503 {
4504 if (f_isinreg(r))
4505 f_disassociate(r);
4506 live.fate[r].status=UNDEF;
4507 }
4508 MENDFUNC(1,f_forget_about,(FW r))
4509
4510 MIDFUNC(1,fmov_pi,(FW r))
4511 {
4512 r=f_writereg(r);
4513 raw_fmov_pi(r);
4514 f_unlock(r);
4515 }
4516 MENDFUNC(1,fmov_pi,(FW r))
4517
4518 MIDFUNC(1,fmov_log10_2,(FW r))
4519 {
4520 r=f_writereg(r);
4521 raw_fmov_log10_2(r);
4522 f_unlock(r);
4523 }
4524 MENDFUNC(1,fmov_log10_2,(FW r))
4525
4526 MIDFUNC(1,fmov_log2_e,(FW r))
4527 {
4528 r=f_writereg(r);
4529 raw_fmov_log2_e(r);
4530 f_unlock(r);
4531 }
4532 MENDFUNC(1,fmov_log2_e,(FW r))
4533
4534 MIDFUNC(1,fmov_loge_2,(FW r))
4535 {
4536 r=f_writereg(r);
4537 raw_fmov_loge_2(r);
4538 f_unlock(r);
4539 }
4540 MENDFUNC(1,fmov_loge_2,(FW r))
4541
4542 MIDFUNC(1,fmov_1,(FW r))
4543 {
4544 r=f_writereg(r);
4545 raw_fmov_1(r);
4546 f_unlock(r);
4547 }
4548 MENDFUNC(1,fmov_1,(FW r))
4549
4550 MIDFUNC(1,fmov_0,(FW r))
4551 {
4552 r=f_writereg(r);
4553 raw_fmov_0(r);
4554 f_unlock(r);
4555 }
4556 MENDFUNC(1,fmov_0,(FW r))
4557
4558 MIDFUNC(2,fmov_rm,(FW r, MEMR m))
4559 {
4560 r=f_writereg(r);
4561 raw_fmov_rm(r,m);
4562 f_unlock(r);
4563 }
4564 MENDFUNC(2,fmov_rm,(FW r, MEMR m))
4565
4566 MIDFUNC(2,fmovi_rm,(FW r, MEMR m))
4567 {
4568 r=f_writereg(r);
4569 raw_fmovi_rm(r,m);
4570 f_unlock(r);
4571 }
4572 MENDFUNC(2,fmovi_rm,(FW r, MEMR m))
4573
4574 MIDFUNC(2,fmovi_mr,(MEMW m, FR r))
4575 {
4576 r=f_readreg(r);
4577 raw_fmovi_mr(m,r);
4578 f_unlock(r);
4579 }
4580 MENDFUNC(2,fmovi_mr,(MEMW m, FR r))
4581
4582 MIDFUNC(2,fmovs_rm,(FW r, MEMR m))
4583 {
4584 r=f_writereg(r);
4585 raw_fmovs_rm(r,m);
4586 f_unlock(r);
4587 }
4588 MENDFUNC(2,fmovs_rm,(FW r, MEMR m))
4589
4590 MIDFUNC(2,fmovs_mr,(MEMW m, FR r))
4591 {
4592 r=f_readreg(r);
4593 raw_fmovs_mr(m,r);
4594 f_unlock(r);
4595 }
4596 MENDFUNC(2,fmovs_mr,(MEMW m, FR r))
4597
4598 MIDFUNC(2,fmov_ext_mr,(MEMW m, FR r))
4599 {
4600 r=f_readreg(r);
4601 raw_fmov_ext_mr(m,r);
4602 f_unlock(r);
4603 }
4604 MENDFUNC(2,fmov_ext_mr,(MEMW m, FR r))
4605
4606 MIDFUNC(2,fmov_mr,(MEMW m, FR r))
4607 {
4608 r=f_readreg(r);
4609 raw_fmov_mr(m,r);
4610 f_unlock(r);
4611 }
4612 MENDFUNC(2,fmov_mr,(MEMW m, FR r))
4613
4614 MIDFUNC(2,fmov_ext_rm,(FW r, MEMR m))
4615 {
4616 r=f_writereg(r);
4617 raw_fmov_ext_rm(r,m);
4618 f_unlock(r);
4619 }
4620 MENDFUNC(2,fmov_ext_rm,(FW r, MEMR m))
4621
4622 MIDFUNC(2,fmov_rr,(FW d, FR s))
4623 {
4624 if (d==s) { /* How pointless! */
4625 return;
4626 }
4627 #if USE_F_ALIAS
4628 f_disassociate(d);
4629 s=f_readreg(s);
4630 live.fate[d].realreg=s;
4631 live.fate[d].realind=live.fat[s].nholds;
4632 live.fate[d].status=DIRTY;
4633 live.fat[s].holds[live.fat[s].nholds]=d;
4634 live.fat[s].nholds++;
4635 f_unlock(s);
4636 #else
4637 s=f_readreg(s);
4638 d=f_writereg(d);
4639 raw_fmov_rr(d,s);
4640 f_unlock(s);
4641 f_unlock(d);
4642 #endif
4643 }
4644 MENDFUNC(2,fmov_rr,(FW d, FR s))
4645
4646 MIDFUNC(2,fldcw_m_indexed,(R4 index, IMM base))
4647 {
4648 index=readreg(index,4);
4649
4650 raw_fldcw_m_indexed(index,base);
4651 unlock2(index);
4652 }
4653 MENDFUNC(2,fldcw_m_indexed,(R4 index, IMM base))
4654
4655 MIDFUNC(1,ftst_r,(FR r))
4656 {
4657 r=f_readreg(r);
4658 raw_ftst_r(r);
4659 f_unlock(r);
4660 }
4661 MENDFUNC(1,ftst_r,(FR r))
4662
4663 MIDFUNC(0,dont_care_fflags,(void))
4664 {
4665 f_disassociate(FP_RESULT);
4666 }
4667 MENDFUNC(0,dont_care_fflags,(void))
4668
4669 MIDFUNC(2,fsqrt_rr,(FW d, FR s))
4670 {
4671 s=f_readreg(s);
4672 d=f_writereg(d);
4673 raw_fsqrt_rr(d,s);
4674 f_unlock(s);
4675 f_unlock(d);
4676 }
4677 MENDFUNC(2,fsqrt_rr,(FW d, FR s))
4678
4679 MIDFUNC(2,fabs_rr,(FW d, FR s))
4680 {
4681 s=f_readreg(s);
4682 d=f_writereg(d);
4683 raw_fabs_rr(d,s);
4684 f_unlock(s);
4685 f_unlock(d);
4686 }
4687 MENDFUNC(2,fabs_rr,(FW d, FR s))
4688
4689 MIDFUNC(2,fsin_rr,(FW d, FR s))
4690 {
4691 s=f_readreg(s);
4692 d=f_writereg(d);
4693 raw_fsin_rr(d,s);
4694 f_unlock(s);
4695 f_unlock(d);
4696 }
4697 MENDFUNC(2,fsin_rr,(FW d, FR s))
4698
4699 MIDFUNC(2,fcos_rr,(FW d, FR s))
4700 {
4701 s=f_readreg(s);
4702 d=f_writereg(d);
4703 raw_fcos_rr(d,s);
4704 f_unlock(s);
4705 f_unlock(d);
4706 }
4707 MENDFUNC(2,fcos_rr,(FW d, FR s))
4708
4709 MIDFUNC(2,ftwotox_rr,(FW d, FR s))
4710 {
4711 s=f_readreg(s);
4712 d=f_writereg(d);
4713 raw_ftwotox_rr(d,s);
4714 f_unlock(s);
4715 f_unlock(d);
4716 }
4717 MENDFUNC(2,ftwotox_rr,(FW d, FR s))
4718
4719 MIDFUNC(2,fetox_rr,(FW d, FR s))
4720 {
4721 s=f_readreg(s);
4722 d=f_writereg(d);
4723 raw_fetox_rr(d,s);
4724 f_unlock(s);
4725 f_unlock(d);
4726 }
4727 MENDFUNC(2,fetox_rr,(FW d, FR s))
4728
4729 MIDFUNC(2,frndint_rr,(FW d, FR s))
4730 {
4731 s=f_readreg(s);
4732 d=f_writereg(d);
4733 raw_frndint_rr(d,s);
4734 f_unlock(s);
4735 f_unlock(d);
4736 }
4737 MENDFUNC(2,frndint_rr,(FW d, FR s))
4738
4739 MIDFUNC(2,flog2_rr,(FW d, FR s))
4740 {
4741 s=f_readreg(s);
4742 d=f_writereg(d);
4743 raw_flog2_rr(d,s);
4744 f_unlock(s);
4745 f_unlock(d);
4746 }
4747 MENDFUNC(2,flog2_rr,(FW d, FR s))
4748
4749 MIDFUNC(2,fneg_rr,(FW d, FR s))
4750 {
4751 s=f_readreg(s);
4752 d=f_writereg(d);
4753 raw_fneg_rr(d,s);
4754 f_unlock(s);
4755 f_unlock(d);
4756 }
4757 MENDFUNC(2,fneg_rr,(FW d, FR s))
4758
4759 MIDFUNC(2,fadd_rr,(FRW d, FR s))
4760 {
4761 s=f_readreg(s);
4762 d=f_rmw(d);
4763 raw_fadd_rr(d,s);
4764 f_unlock(s);
4765 f_unlock(d);
4766 }
4767 MENDFUNC(2,fadd_rr,(FRW d, FR s))
4768
4769 MIDFUNC(2,fsub_rr,(FRW d, FR s))
4770 {
4771 s=f_readreg(s);
4772 d=f_rmw(d);
4773 raw_fsub_rr(d,s);
4774 f_unlock(s);
4775 f_unlock(d);
4776 }
4777 MENDFUNC(2,fsub_rr,(FRW d, FR s))
4778
4779 MIDFUNC(2,fcmp_rr,(FR d, FR s))
4780 {
4781 d=f_readreg(d);
4782 s=f_readreg(s);
4783 raw_fcmp_rr(d,s);
4784 f_unlock(s);
4785 f_unlock(d);
4786 }
4787 MENDFUNC(2,fcmp_rr,(FR d, FR s))
4788
4789 MIDFUNC(2,fdiv_rr,(FRW d, FR s))
4790 {
4791 s=f_readreg(s);
4792 d=f_rmw(d);
4793 raw_fdiv_rr(d,s);
4794 f_unlock(s);
4795 f_unlock(d);
4796 }
4797 MENDFUNC(2,fdiv_rr,(FRW d, FR s))
4798
4799 MIDFUNC(2,frem_rr,(FRW d, FR s))
4800 {
4801 s=f_readreg(s);
4802 d=f_rmw(d);
4803 raw_frem_rr(d,s);
4804 f_unlock(s);
4805 f_unlock(d);
4806 }
4807 MENDFUNC(2,frem_rr,(FRW d, FR s))
4808
4809 MIDFUNC(2,frem1_rr,(FRW d, FR s))
4810 {
4811 s=f_readreg(s);
4812 d=f_rmw(d);
4813 raw_frem1_rr(d,s);
4814 f_unlock(s);
4815 f_unlock(d);
4816 }
4817 MENDFUNC(2,frem1_rr,(FRW d, FR s))
4818
4819 MIDFUNC(2,fmul_rr,(FRW d, FR s))
4820 {
4821 s=f_readreg(s);
4822 d=f_rmw(d);
4823 raw_fmul_rr(d,s);
4824 f_unlock(s);
4825 f_unlock(d);
4826 }
4827 MENDFUNC(2,fmul_rr,(FRW d, FR s))
4828
4829 /********************************************************************
4830 * Support functions exposed to gencomp. CREATE time *
4831 ********************************************************************/
4832
4833 int kill_rodent(int r)
4834 {
4835 return KILLTHERAT &&
4836 have_rat_stall &&
4837 (live.state[r].status==INMEM ||
4838 live.state[r].status==CLEAN ||
4839 live.state[r].status==ISCONST ||
4840 live.state[r].dirtysize==4);
4841 }
4842
4843 uae_u32 get_const(int r)
4844 {
4845 Dif (!isconst(r)) {
4846 write_log("Register %d should be constant, but isn't\n",r);
4847 abort();
4848 }
4849 return live.state[r].val;
4850 }
4851
4852 void sync_m68k_pc(void)
4853 {
4854 if (m68k_pc_offset) {
4855 add_l_ri(PC_P,m68k_pc_offset);
4856 comp_pc_p+=m68k_pc_offset;
4857 m68k_pc_offset=0;
4858 }
4859 }
4860
4861 /********************************************************************
4862 * Scratch registers management *
4863 ********************************************************************/
4864
4865 struct scratch_t {
4866 uae_u32 regs[VREGS];
4867 fpu_register fregs[VFREGS];
4868 };
4869
4870 static scratch_t scratch;
4871
4872 /********************************************************************
4873 * Support functions exposed to newcpu *
4874 ********************************************************************/
4875
4876 static inline const char *str_on_off(bool b)
4877 {
4878 return b ? "on" : "off";
4879 }
4880
4881 void compiler_init(void)
4882 {
4883 static bool initialized = false;
4884 if (initialized)
4885 return;
4886
4887 #ifndef WIN32
4888 // Open /dev/zero
4889 zero_fd = open("/dev/zero", O_RDWR);
4890 if (zero_fd < 0) {
4891 char str[200];
4892 sprintf(str, GetString(STR_NO_DEV_ZERO_ERR), strerror(errno));
4893 ErrorAlert(str);
4894 QuitEmulator();
4895 }
4896 #endif
4897
4898 #if JIT_DEBUG
4899 // JIT debug mode ?
4900 JITDebug = PrefsFindBool("jitdebug");
4901 #endif
4902 write_log("<JIT compiler> : enable runtime disassemblers : %s\n", JITDebug ? "yes" : "no");
4903
4904 #ifdef USE_JIT_FPU
4905 // Use JIT compiler for FPU instructions ?
4906 avoid_fpu = !PrefsFindBool("jitfpu");
4907 #else
4908 // JIT FPU is always disabled
4909 avoid_fpu = true;
4910 #endif
4911 write_log("<JIT compiler> : compile FPU instructions : %s\n", !avoid_fpu ? "yes" : "no");
4912
4913 // Get size of the translation cache (in KB)
4914 cache_size = PrefsFindInt32("jitcachesize");
4915 write_log("<JIT compiler> : requested translation cache size : %d KB\n", cache_size);
4916
4917 // Initialize target CPU (check for features, e.g. CMOV, rat stalls)
4918 raw_init_cpu();
4919 setzflg_uses_bsf = target_check_bsf();
4920 write_log("<JIT compiler> : target processor has CMOV instructions : %s\n", have_cmov ? "yes" : "no");
4921 write_log("<JIT compiler> : target processor can suffer from partial register stalls : %s\n", have_rat_stall ? "yes" : "no");
4922 write_log("<JIT compiler> : alignment for loops, jumps are %d, %d\n", align_loops, align_jumps);
4923
4924 // Translation cache flush mechanism
4925 lazy_flush = PrefsFindBool("jitlazyflush");
4926 write_log("<JIT compiler> : lazy translation cache invalidation : %s\n", str_on_off(lazy_flush));
4927 flush_icache = lazy_flush ? flush_icache_lazy : flush_icache_hard;
4928
4929 // Compiler features
4930 write_log("<JIT compiler> : register aliasing : %s\n", str_on_off(1));
4931 write_log("<JIT compiler> : FP register aliasing : %s\n", str_on_off(USE_F_ALIAS));
4932 write_log("<JIT compiler> : lazy constant offsetting : %s\n", str_on_off(USE_OFFSET));
4933 write_log("<JIT compiler> : block inlining : %s\n", str_on_off(USE_INLINING));
4934 write_log("<JIT compiler> : separate blockinfo allocation : %s\n", str_on_off(USE_SEPARATE_BIA));
4935
4936 // Build compiler tables
4937 build_comp();
4938
4939 initialized = true;
4940
4941 #if PROFILE_UNTRANSLATED_INSNS
4942 write_log("<JIT compiler> : gather statistics on untranslated insns count\n");
4943 #endif
4944
4945 #if PROFILE_COMPILE_TIME
4946 write_log("<JIT compiler> : gather statistics on translation time\n");
4947 emul_start_time = clock();
4948 #endif
4949 }
4950
4951 void compiler_exit(void)
4952 {
4953 #if PROFILE_COMPILE_TIME
4954 emul_end_time = clock();
4955 #endif
4956
4957 // Deallocate translation cache
4958 if (compiled_code) {
4959 vm_release(compiled_code, cache_size * 1024);
4960 compiled_code = 0;
4961 }
4962
4963 #ifndef WIN32
4964 // Close /dev/zero
4965 if (zero_fd > 0)
4966 close(zero_fd);
4967 #endif
4968
4969 #if PROFILE_COMPILE_TIME
4970 write_log("### Compile Block statistics\n");
4971 write_log("Number of calls to compile_block : %d\n", compile_count);
4972 uae_u32 emul_time = emul_end_time - emul_start_time;
4973 write_log("Total emulation time : %.1f sec\n", double(emul_time)/double(CLOCKS_PER_SEC));
4974 write_log("Total compilation time : %.1f sec (%.1f%%)\n", double(compile_time)/double(CLOCKS_PER_SEC),
4975 100.0*double(compile_time)/double(emul_time));
4976 write_log("\n");
4977 #endif
4978
4979 #if PROFILE_UNTRANSLATED_INSNS
4980 uae_u64 untranslated_count = 0;
4981 for (int i = 0; i < 65536; i++) {
4982 opcode_nums[i] = i;
4983 untranslated_count += raw_cputbl_count[i];
4984 }
4985 write_log("Sorting out untranslated instructions count...\n");
4986 qsort(opcode_nums, 65536, sizeof(uae_u16), untranslated_compfn);
4987 write_log("\nRank Opc Count Name\n");
4988 for (int i = 0; i < untranslated_top_ten; i++) {
4989 uae_u32 count = raw_cputbl_count[opcode_nums[i]];
4990 struct instr *dp;
4991 struct mnemolookup *lookup;
4992 if (!count)
4993 break;
4994 dp = table68k + opcode_nums[i];
4995 for (lookup = lookuptab; lookup->mnemo != dp->mnemo; lookup++)
4996 ;
4997 write_log("%03d: %04x %10lu %s\n", i, opcode_nums[i], count, lookup->name);
4998 }
4999 #endif
5000 }
5001
5002 bool compiler_use_jit(void)
5003 {
5004 // Check for the "jit" prefs item
5005 if (!PrefsFindBool("jit"))
5006 return false;
5007
5008 // Don't use JIT if translation cache size is less then MIN_CACHE_SIZE KB
5009 if (PrefsFindInt32("jitcachesize") < MIN_CACHE_SIZE) {
5010 write_log("<JIT compiler> : translation cache size is less than %d KB. Disabling JIT.\n", MIN_CACHE_SIZE);
5011 return false;
5012 }
5013
5014 // FIXME: there are currently problems with JIT compilation and anything below a 68040
5015 if (CPUType < 4) {
5016 write_log("<JIT compiler> : 68040 emulation is required instead of 680%d0. Disabling JIT.\n", CPUType);
5017 return false;
5018 }
5019
5020 return true;
5021 }
5022
5023 void init_comp(void)
5024 {
5025 int i;
5026 uae_s8* cb=can_byte;
5027 uae_s8* cw=can_word;
5028 uae_s8* au=always_used;
5029
5030 for (i=0;i<VREGS;i++) {
5031 live.state[i].realreg=-1;
5032 live.state[i].needflush=NF_SCRATCH;
5033 live.state[i].val=0;
5034 set_status(i,UNDEF);
5035 }
5036
5037 for (i=0;i<VFREGS;i++) {
5038 live.fate[i].status=UNDEF;
5039 live.fate[i].realreg=-1;
5040 live.fate[i].needflush=NF_SCRATCH;
5041 }
5042
5043 for (i=0;i<VREGS;i++) {
5044 if (i<16) { /* First 16 registers map to 68k registers */
5045 live.state[i].mem=((uae_u32*)&regs)+i;
5046 live.state[i].needflush=NF_TOMEM;
5047 set_status(i,INMEM);
5048 }
5049 else
5050 live.state[i].mem=scratch.regs+i;
5051 }
5052 live.state[PC_P].mem=(uae_u32*)&(regs.pc_p);
5053 live.state[PC_P].needflush=NF_TOMEM;
5054 set_const(PC_P,(uae_u32)comp_pc_p);
5055
5056 live.state[FLAGX].mem=&(regflags.x);
5057 live.state[FLAGX].needflush=NF_TOMEM;
5058 set_status(FLAGX,INMEM);
5059
5060 live.state[FLAGTMP].mem=&(regflags.cznv);
5061 live.state[FLAGTMP].needflush=NF_TOMEM;
5062 set_status(FLAGTMP,INMEM);
5063
5064 live.state[NEXT_HANDLER].needflush=NF_HANDLER;
5065 set_status(NEXT_HANDLER,UNDEF);
5066
5067 for (i=0;i<VFREGS;i++) {
5068 if (i<8) { /* First 8 registers map to 68k FPU registers */
5069 live.fate[i].mem=(uae_u32*)fpu_register_address(i);
5070 live.fate[i].needflush=NF_TOMEM;
5071 live.fate[i].status=INMEM;
5072 }
5073 else if (i==FP_RESULT) {
5074 live.fate[i].mem=(uae_u32*)(&fpu.result);
5075 live.fate[i].needflush=NF_TOMEM;
5076 live.fate[i].status=INMEM;
5077 }
5078 else
5079 live.fate[i].mem=(uae_u32*)(scratch.fregs+i);
5080 }
5081
5082
5083 for (i=0;i<N_REGS;i++) {
5084 live.nat[i].touched=0;
5085 live.nat[i].nholds=0;
5086 live.nat[i].locked=0;
5087 if (*cb==i) {
5088 live.nat[i].canbyte=1; cb++;
5089 } else live.nat[i].canbyte=0;
5090 if (*cw==i) {
5091 live.nat[i].canword=1; cw++;
5092 } else live.nat[i].canword=0;
5093 if (*au==i) {
5094 live.nat[i].locked=1; au++;
5095 }
5096 }
5097
5098 for (i=0;i<N_FREGS;i++) {
5099 live.fat[i].touched=0;
5100 live.fat[i].nholds=0;
5101 live.fat[i].locked=0;
5102 }
5103
5104 touchcnt=1;
5105 m68k_pc_offset=0;
5106 live.flags_in_flags=TRASH;
5107 live.flags_on_stack=VALID;
5108 live.flags_are_important=1;
5109
5110 raw_fp_init();
5111 }
5112
5113 /* Only do this if you really mean it! The next call should be to init!*/
5114 void flush(int save_regs)
5115 {
5116 int fi,i;
5117
5118 log_flush();
5119 flush_flags(); /* low level */
5120 sync_m68k_pc(); /* mid level */
5121
5122 if (save_regs) {
5123 for (i=0;i<VFREGS;i++) {
5124 if (live.fate[i].needflush==NF_SCRATCH ||
5125 live.fate[i].status==CLEAN) {
5126 f_disassociate(i);
5127 }
5128 }
5129 for (i=0;i<VREGS;i++) {
5130 if (live.state[i].needflush==NF_TOMEM) {
5131 switch(live.state[i].status) {
5132 case INMEM:
5133 if (live.state[i].val) {
5134 raw_add_l_mi((uae_u32)live.state[i].mem,live.state[i].val);
5135 log_vwrite(i);
5136 live.state[i].val=0;
5137 }
5138 break;
5139 case CLEAN:
5140 case DIRTY:
5141 remove_offset(i,-1); tomem(i); break;
5142 case ISCONST:
5143 if (i!=PC_P)
5144 writeback_const(i);
5145 break;
5146 default: break;
5147 }
5148 Dif (live.state[i].val && i!=PC_P) {
5149 write_log("Register %d still has val %x\n",
5150 i,live.state[i].val);
5151 }
5152 }
5153 }
5154 for (i=0;i<VFREGS;i++) {
5155 if (live.fate[i].needflush==NF_TOMEM &&
5156 live.fate[i].status==DIRTY) {
5157 f_evict(i);
5158 }
5159 }
5160 raw_fp_cleanup_drop();
5161 }
5162 if (needflags) {
5163 write_log("Warning! flush with needflags=1!\n");
5164 }
5165 }
5166
5167 static void flush_keepflags(void)
5168 {
5169 int fi,i;
5170
5171 for (i=0;i<VFREGS;i++) {
5172 if (live.fate[i].needflush==NF_SCRATCH ||
5173 live.fate[i].status==CLEAN) {
5174 f_disassociate(i);
5175 }
5176 }
5177 for (i=0;i<VREGS;i++) {
5178 if (live.state[i].needflush==NF_TOMEM) {
5179 switch(live.state[i].status) {
5180 case INMEM:
5181 /* Can't adjust the offset here --- that needs "add" */
5182 break;
5183 case CLEAN:
5184 case DIRTY:
5185 remove_offset(i,-1); tomem(i); break;
5186 case ISCONST:
5187 if (i!=PC_P)
5188 writeback_const(i);
5189 break;
5190 default: break;
5191 }
5192 }
5193 }
5194 for (i=0;i<VFREGS;i++) {
5195 if (live.fate[i].needflush==NF_TOMEM &&
5196 live.fate[i].status==DIRTY) {
5197 f_evict(i);
5198 }
5199 }
5200 raw_fp_cleanup_drop();
5201 }
5202
5203 void freescratch(void)
5204 {
5205 int i;
5206 for (i=0;i<N_REGS;i++)
5207 if (live.nat[i].locked && i!=4)
5208 write_log("Warning! %d is locked\n",i);
5209
5210 for (i=0;i<VREGS;i++)
5211 if (live.state[i].needflush==NF_SCRATCH) {
5212 forget_about(i);
5213 }
5214
5215 for (i=0;i<VFREGS;i++)
5216 if (live.fate[i].needflush==NF_SCRATCH) {
5217 f_forget_about(i);
5218 }
5219 }
5220
5221 /********************************************************************
5222 * Support functions, internal *
5223 ********************************************************************/
5224
5225
5226 static void align_target(uae_u32 a)
5227 {
5228 if (!a)
5229 return;
5230
5231 if (tune_nop_fillers)
5232 raw_emit_nop_filler(a - (((uae_u32)target) & (a - 1)));
5233 else {
5234 /* Fill with NOPs --- makes debugging with gdb easier */
5235 while ((uae_u32)target&(a-1))
5236 *target++=0x90;
5237 }
5238 }
5239
5240 static __inline__ int isinrom(uintptr addr)
5241 {
5242 return ((addr >= (uintptr)ROMBaseHost) && (addr < (uintptr)ROMBaseHost + ROMSize));
5243 }
5244
5245 static void flush_all(void)
5246 {
5247 int i;
5248
5249 log_flush();
5250 for (i=0;i<VREGS;i++)
5251 if (live.state[i].status==DIRTY) {
5252 if (!call_saved[live.state[i].realreg]) {
5253 tomem(i);
5254 }
5255 }
5256 for (i=0;i<VFREGS;i++)
5257 if (f_isinreg(i))
5258 f_evict(i);
5259 raw_fp_cleanup_drop();
5260 }
5261
5262 /* Make sure all registers that will get clobbered by a call are
5263 save and sound in memory */
5264 static void prepare_for_call_1(void)
5265 {
5266 flush_all(); /* If there are registers that don't get clobbered,
5267 * we should be a bit more selective here */
5268 }
5269
5270 /* We will call a C routine in a moment. That will clobber all registers,
5271 so we need to disassociate everything */
5272 static void prepare_for_call_2(void)
5273 {
5274 int i;
5275 for (i=0;i<N_REGS;i++)
5276 if (!call_saved[i] && live.nat[i].nholds>0)
5277 free_nreg(i);
5278
5279 for (i=0;i<N_FREGS;i++)
5280 if (live.fat[i].nholds>0)
5281 f_free_nreg(i);
5282
5283 live.flags_in_flags=TRASH; /* Note: We assume we already rescued the
5284 flags at the very start of the call_r
5285 functions! */
5286 }
5287
5288 /********************************************************************
5289 * Memory access and related functions, CREATE time *
5290 ********************************************************************/
5291
5292 void register_branch(uae_u32 not_taken, uae_u32 taken, uae_u8 cond)
5293 {
5294 next_pc_p=not_taken;
5295 taken_pc_p=taken;
5296 branch_cc=cond;
5297 }
5298
5299
5300 static uae_u32 get_handler_address(uae_u32 addr)
5301 {
5302 uae_u32 cl=cacheline(addr);
5303 blockinfo* bi=get_blockinfo_addr_new((void*)addr,0);
5304 return (uae_u32)&(bi->direct_handler_to_use);
5305 }
5306
5307 static uae_u32 get_handler(uae_u32 addr)
5308 {
5309 uae_u32 cl=cacheline(addr);
5310 blockinfo* bi=get_blockinfo_addr_new((void*)addr,0);
5311 return (uae_u32)bi->direct_handler_to_use;
5312 }
5313
5314 static void load_handler(int reg, uae_u32 addr)
5315 {
5316 mov_l_rm(reg,get_handler_address(addr));
5317 }
5318
5319 /* This version assumes that it is writing *real* memory, and *will* fail
5320 * if that assumption is wrong! No branches, no second chances, just
5321 * straight go-for-it attitude */
5322
5323 static void writemem_real(int address, int source, int offset, int size, int tmp, int clobber)
5324 {
5325 int f=tmp;
5326
5327 if (clobber)
5328 f=source;
5329 switch(size) {
5330 case 1: mov_b_bRr(address,source,MEMBaseDiff); break;
5331 case 2: mov_w_rr(f,source); bswap_16(f); mov_w_bRr(address,f,MEMBaseDiff); break;
5332 case 4: mov_l_rr(f,source); bswap_32(f); mov_l_bRr(address,f,MEMBaseDiff); break;
5333 }
5334 forget_about(tmp);
5335 forget_about(f);
5336 }
5337
5338 void writebyte(int address, int source, int tmp)
5339 {
5340 writemem_real(address,source,20,1,tmp,0);
5341 }
5342
5343 static __inline__ void writeword_general(int address, int source, int tmp,
5344 int clobber)
5345 {
5346 writemem_real(address,source,16,2,tmp,clobber);
5347 }
5348
5349 void writeword_clobber(int address, int source, int tmp)
5350 {
5351 writeword_general(address,source,tmp,1);
5352 }
5353
5354 void writeword(int address, int source, int tmp)
5355 {
5356 writeword_general(address,source,tmp,0);
5357 }
5358
5359 static __inline__ void writelong_general(int address, int source, int tmp,
5360 int clobber)
5361 {
5362 writemem_real(address,source,12,4,tmp,clobber);
5363 }
5364
5365 void writelong_clobber(int address, int source, int tmp)
5366 {
5367 writelong_general(address,source,tmp,1);
5368 }
5369
5370 void writelong(int address, int source, int tmp)
5371 {
5372 writelong_general(address,source,tmp,0);
5373 }
5374
5375
5376
5377 /* This version assumes that it is reading *real* memory, and *will* fail
5378 * if that assumption is wrong! No branches, no second chances, just
5379 * straight go-for-it attitude */
5380
5381 static void readmem_real(int address, int dest, int offset, int size, int tmp)
5382 {
5383 int f=tmp;
5384
5385 if (size==4 && address!=dest)
5386 f=dest;
5387
5388 switch(size) {
5389 case 1: mov_b_brR(dest,address,MEMBaseDiff); break;
5390 case 2: mov_w_brR(dest,address,MEMBaseDiff); bswap_16(dest); break;
5391 case 4: mov_l_brR(dest,address,MEMBaseDiff); bswap_32(dest); break;
5392 }
5393 forget_about(tmp);
5394 }
5395
5396 void readbyte(int address, int dest, int tmp)
5397 {
5398 readmem_real(address,dest,8,1,tmp);
5399 }
5400
5401 void readword(int address, int dest, int tmp)
5402 {
5403 readmem_real(address,dest,4,2,tmp);
5404 }
5405
5406 void readlong(int address, int dest, int tmp)
5407 {
5408 readmem_real(address,dest,0,4,tmp);
5409 }
5410
5411 void get_n_addr(int address, int dest, int tmp)
5412 {
5413 // a is the register containing the virtual address
5414 // after the offset had been fetched
5415 int a=tmp;
5416
5417 // f is the register that will contain the offset
5418 int f=tmp;
5419
5420 // a == f == tmp if (address == dest)
5421 if (address!=dest) {
5422 a=address;
5423 f=dest;
5424 }
5425
5426 #if REAL_ADDRESSING
5427 mov_l_rr(dest, address);
5428 #elif DIRECT_ADDRESSING
5429 lea_l_brr(dest,address,MEMBaseDiff);
5430 #endif
5431 forget_about(tmp);
5432 }
5433
5434 void get_n_addr_jmp(int address, int dest, int tmp)
5435 {
5436 /* For this, we need to get the same address as the rest of UAE
5437 would --- otherwise we end up translating everything twice */
5438 get_n_addr(address,dest,tmp);
5439 }
5440
5441
5442 /* base is a register, but dp is an actual value.
5443 target is a register, as is tmp */
5444 void calc_disp_ea_020(int base, uae_u32 dp, int target, int tmp)
5445 {
5446 int reg = (dp >> 12) & 15;
5447 int regd_shift=(dp >> 9) & 3;
5448
5449 if (dp & 0x100) {
5450 int ignorebase=(dp&0x80);
5451 int ignorereg=(dp&0x40);
5452 int addbase=0;
5453 int outer=0;
5454
5455 if ((dp & 0x30) == 0x20) addbase = (uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
5456 if ((dp & 0x30) == 0x30) addbase = comp_get_ilong((m68k_pc_offset+=4)-4);
5457
5458 if ((dp & 0x3) == 0x2) outer = (uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
5459 if ((dp & 0x3) == 0x3) outer = comp_get_ilong((m68k_pc_offset+=4)-4);
5460
5461 if ((dp & 0x4) == 0) { /* add regd *before* the get_long */
5462 if (!ignorereg) {
5463 if ((dp & 0x800) == 0)
5464 sign_extend_16_rr(target,reg);
5465 else
5466 mov_l_rr(target,reg);
5467 shll_l_ri(target,regd_shift);
5468 }
5469 else
5470 mov_l_ri(target,0);
5471
5472 /* target is now regd */
5473 if (!ignorebase)
5474 add_l(target,base);
5475 add_l_ri(target,addbase);
5476 if (dp&0x03) readlong(target,target,tmp);
5477 } else { /* do the getlong first, then add regd */
5478 if (!ignorebase) {
5479 mov_l_rr(target,base);
5480 add_l_ri(target,addbase);
5481 }
5482 else
5483 mov_l_ri(target,addbase);
5484 if (dp&0x03) readlong(target,target,tmp);
5485
5486 if (!ignorereg) {
5487 if ((dp & 0x800) == 0)
5488 sign_extend_16_rr(tmp,reg);
5489 else
5490 mov_l_rr(tmp,reg);
5491 shll_l_ri(tmp,regd_shift);
5492 /* tmp is now regd */
5493 add_l(target,tmp);
5494 }
5495 }
5496 add_l_ri(target,outer);
5497 }
5498 else { /* 68000 version */
5499 if ((dp & 0x800) == 0) { /* Sign extend */
5500 sign_extend_16_rr(target,reg);
5501 lea_l_brr_indexed(target,base,target,1<<regd_shift,(uae_s32)((uae_s8)dp));
5502 }
5503 else {
5504 lea_l_brr_indexed(target,base,reg,1<<regd_shift,(uae_s32)((uae_s8)dp));
5505 }
5506 }
5507 forget_about(tmp);
5508 }
5509
5510
5511
5512
5513
5514 void set_cache_state(int enabled)
5515 {
5516 if (enabled!=letit)
5517 flush_icache_hard(77);
5518 letit=enabled;
5519 }
5520
5521 int get_cache_state(void)
5522 {
5523 return letit;
5524 }
5525
5526 uae_u32 get_jitted_size(void)
5527 {
5528 if (compiled_code)
5529 return current_compile_p-compiled_code;
5530 return 0;
5531 }
5532
5533 void alloc_cache(void)
5534 {
5535 if (compiled_code) {
5536 flush_icache_hard(6);
5537 vm_release(compiled_code, cache_size * 1024);
5538 compiled_code = 0;
5539 }
5540
5541 if (cache_size == 0)
5542 return;
5543
5544 while (!compiled_code && cache_size) {
5545 if ((compiled_code = (uae_u8 *)vm_acquire(cache_size * 1024)) == VM_MAP_FAILED) {
5546 compiled_code = 0;
5547 cache_size /= 2;
5548 }
5549 }
5550 vm_protect(compiled_code, cache_size, VM_PAGE_READ | VM_PAGE_WRITE | VM_PAGE_EXECUTE);
5551
5552 if (compiled_code) {
5553 write_log("<JIT compiler> : actual translation cache size : %d KB at 0x%08X\n", cache_size, compiled_code);
5554 max_compile_start = compiled_code + cache_size*1024 - BYTES_PER_INST;
5555 current_compile_p = compiled_code;
5556 current_cache_size = 0;
5557 }
5558 }
5559
5560
5561
5562 extern void op_illg_1 (uae_u32 opcode) REGPARAM;
5563
5564 static void calc_checksum(blockinfo* bi, uae_u32* c1, uae_u32* c2)
5565 {
5566 uae_u32 k1 = 0;
5567 uae_u32 k2 = 0;
5568
5569 #if USE_CHECKSUM_INFO
5570 checksum_info *csi = bi->csi;
5571 Dif(!csi) abort();
5572 while (csi) {
5573 uae_s32 len = csi->length;
5574 uae_u32 tmp = (uae_u32)csi->start_p;
5575 #else
5576 uae_s32 len = bi->len;
5577 uae_u32 tmp = (uae_u32)bi->min_pcp;
5578 #endif
5579 uae_u32*pos;
5580
5581 len += (tmp & 3);
5582 tmp &= ~3;
5583 pos = (uae_u32 *)tmp;
5584
5585 if (len >= 0 && len <= MAX_CHECKSUM_LEN) {
5586 while (len > 0) {
5587 k1 += *pos;
5588 k2 ^= *pos;
5589 pos++;
5590 len -= 4;
5591 }
5592 }
5593
5594 #if USE_CHECKSUM_INFO
5595 csi = csi->next;
5596 }
5597 #endif
5598
5599 *c1 = k1;
5600 *c2 = k2;
5601 }
5602
5603 #if 0
5604 static void show_checksum(CSI_TYPE* csi)
5605 {
5606 uae_u32 k1=0;
5607 uae_u32 k2=0;
5608 uae_s32 len=CSI_LENGTH(csi);
5609 uae_u32 tmp=(uae_u32)CSI_START_P(csi);
5610 uae_u32* pos;
5611
5612 len+=(tmp&3);
5613 tmp&=(~3);
5614 pos=(uae_u32*)tmp;
5615
5616 if (len<0 || len>MAX_CHECKSUM_LEN) {
5617 return;
5618 }
5619 else {
5620 while (len>0) {
5621 write_log("%08x ",*pos);
5622 pos++;
5623 len-=4;
5624 }
5625 write_log(" bla\n");
5626 }
5627 }
5628 #endif
5629
5630
5631 int check_for_cache_miss(void)
5632 {
5633 blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5634
5635 if (bi) {
5636 int cl=cacheline(regs.pc_p);
5637 if (bi!=cache_tags[cl+1].bi) {
5638 raise_in_cl_list(bi);
5639 return 1;
5640 }
5641 }
5642 return 0;
5643 }
5644
5645
5646 static void recompile_block(void)
5647 {
5648 /* An existing block's countdown code has expired. We need to make
5649 sure that execute_normal doesn't refuse to recompile due to a
5650 perceived cache miss... */
5651 blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5652
5653 Dif (!bi)
5654 abort();
5655 raise_in_cl_list(bi);
5656 execute_normal();
5657 return;
5658 }
5659 static void cache_miss(void)
5660 {
5661 blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5662 uae_u32 cl=cacheline(regs.pc_p);
5663 blockinfo* bi2=get_blockinfo(cl);
5664
5665 if (!bi) {
5666 execute_normal(); /* Compile this block now */
5667 return;
5668 }
5669 Dif (!bi2 || bi==bi2) {
5670 write_log("Unexplained cache miss %p %p\n",bi,bi2);
5671 abort();
5672 }
5673 raise_in_cl_list(bi);
5674 return;
5675 }
5676
5677 static int called_check_checksum(blockinfo* bi);
5678
5679 static inline int block_check_checksum(blockinfo* bi)
5680 {
5681 uae_u32 c1,c2;
5682 bool isgood;
5683
5684 if (bi->status!=BI_NEED_CHECK)
5685 return 1; /* This block is in a checked state */
5686
5687 checksum_count++;
5688
5689 if (bi->c1 || bi->c2)
5690 calc_checksum(bi,&c1,&c2);
5691 else {
5692 c1=c2=1; /* Make sure it doesn't match */
5693 }
5694
5695 isgood=(c1==bi->c1 && c2==bi->c2);
5696
5697 if (isgood) {
5698 /* This block is still OK. So we reactivate. Of course, that
5699 means we have to move it into the needs-to-be-flushed list */
5700 bi->handler_to_use=bi->handler;
5701 set_dhtu(bi,bi->direct_handler);
5702 bi->status=BI_CHECKING;
5703 isgood=called_check_checksum(bi);
5704 }
5705 if (isgood) {
5706 /* write_log("reactivate %p/%p (%x %x/%x %x)\n",bi,bi->pc_p,
5707 c1,c2,bi->c1,bi->c2);*/
5708 remove_from_list(bi);
5709 add_to_active(bi);
5710 raise_in_cl_list(bi);
5711 bi->status=BI_ACTIVE;
5712 }
5713 else {
5714 /* This block actually changed. We need to invalidate it,
5715 and set it up to be recompiled */
5716 /* write_log("discard %p/%p (%x %x/%x %x)\n",bi,bi->pc_p,
5717 c1,c2,bi->c1,bi->c2); */
5718 invalidate_block(bi);
5719 raise_in_cl_list(bi);
5720 }
5721 return isgood;
5722 }
5723
5724 static int called_check_checksum(blockinfo* bi)
5725 {
5726 dependency* x=bi->deplist;
5727 int isgood=1;
5728 int i;
5729
5730 for (i=0;i<2 && isgood;i++) {
5731 if (bi->dep[i].jmp_off) {
5732 isgood=block_check_checksum(bi->dep[i].target);
5733 }
5734 }
5735 return isgood;
5736 }
5737
5738 static void check_checksum(void)
5739 {
5740 blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5741 uae_u32 cl=cacheline(regs.pc_p);
5742 blockinfo* bi2=get_blockinfo(cl);
5743
5744 /* These are not the droids you are looking for... */
5745 if (!bi) {
5746 /* Whoever is the primary target is in a dormant state, but
5747 calling it was accidental, and we should just compile this
5748 new block */
5749 execute_normal();
5750 return;
5751 }
5752 if (bi!=bi2) {
5753 /* The block was hit accidentally, but it does exist. Cache miss */
5754 cache_miss();
5755 return;
5756 }
5757
5758 if (!block_check_checksum(bi))
5759 execute_normal();
5760 }
5761
5762 static __inline__ void match_states(blockinfo* bi)
5763 {
5764 int i;
5765 smallstate* s=&(bi->env);
5766
5767 if (bi->status==BI_NEED_CHECK) {
5768 block_check_checksum(bi);
5769 }
5770 if (bi->status==BI_ACTIVE ||
5771 bi->status==BI_FINALIZING) { /* Deal with the *promises* the
5772 block makes (about not using
5773 certain vregs) */
5774 for (i=0;i<16;i++) {
5775 if (s->virt[i]==L_UNNEEDED) {
5776 // write_log("unneeded reg %d at %p\n",i,target);
5777 COMPCALL(forget_about)(i); // FIXME
5778 }
5779 }
5780 }
5781 flush(1);
5782
5783 /* And now deal with the *demands* the block makes */
5784 for (i=0;i<N_REGS;i++) {
5785 int v=s->nat[i];
5786 if (v>=0) {
5787 // printf("Loading reg %d into %d at %p\n",v,i,target);
5788 readreg_specific(v,4,i);
5789 // do_load_reg(i,v);
5790 // setlock(i);
5791 }
5792 }
5793 for (i=0;i<N_REGS;i++) {
5794 int v=s->nat[i];
5795 if (v>=0) {
5796 unlock2(i);
5797 }
5798 }
5799 }
5800
5801 static uae_u8 popallspace[1024]; /* That should be enough space */
5802
5803 static __inline__ void create_popalls(void)
5804 {
5805 int i,r;
5806
5807 current_compile_p=popallspace;
5808 set_target(current_compile_p);
5809 #if USE_PUSH_POP
5810 /* If we can't use gcc inline assembly, we need to pop some
5811 registers before jumping back to the various get-out routines.
5812 This generates the code for it.
5813 */
5814 align_target(align_jumps);
5815 popall_do_nothing=get_target();
5816 for (i=0;i<N_REGS;i++) {
5817 if (need_to_preserve[i])
5818 raw_pop_l_r(i);
5819 }
5820 raw_jmp((uae_u32)do_nothing);
5821
5822 align_target(align_jumps);
5823 popall_execute_normal=get_target();
5824 for (i=0;i<N_REGS;i++) {
5825 if (need_to_preserve[i])
5826 raw_pop_l_r(i);
5827 }
5828 raw_jmp((uae_u32)execute_normal);
5829
5830 align_target(align_jumps);
5831 popall_cache_miss=get_target();
5832 for (i=0;i<N_REGS;i++) {
5833 if (need_to_preserve[i])
5834 raw_pop_l_r(i);
5835 }
5836 raw_jmp((uae_u32)cache_miss);
5837
5838 align_target(align_jumps);
5839 popall_recompile_block=get_target();
5840 for (i=0;i<N_REGS;i++) {
5841 if (need_to_preserve[i])
5842 raw_pop_l_r(i);
5843 }
5844 raw_jmp((uae_u32)recompile_block);
5845
5846 align_target(align_jumps);
5847 popall_exec_nostats=get_target();
5848 for (i=0;i<N_REGS;i++) {
5849 if (need_to_preserve[i])
5850 raw_pop_l_r(i);
5851 }
5852 raw_jmp((uae_u32)exec_nostats);
5853
5854 align_target(align_jumps);
5855 popall_check_checksum=get_target();
5856 for (i=0;i<N_REGS;i++) {
5857 if (need_to_preserve[i])
5858 raw_pop_l_r(i);
5859 }
5860 raw_jmp((uae_u32)check_checksum);
5861
5862 align_target(align_jumps);
5863 current_compile_p=get_target();
5864 #else
5865 popall_exec_nostats=(void *)exec_nostats;
5866 popall_execute_normal=(void *)execute_normal;
5867 popall_cache_miss=(void *)cache_miss;
5868 popall_recompile_block=(void *)recompile_block;
5869 popall_do_nothing=(void *)do_nothing;
5870 popall_check_checksum=(void *)check_checksum;
5871 #endif
5872
5873 /* And now, the code to do the matching pushes and then jump
5874 into a handler routine */
5875 pushall_call_handler=get_target();
5876 #if USE_PUSH_POP
5877 for (i=N_REGS;i--;) {
5878 if (need_to_preserve[i])
5879 raw_push_l_r(i);
5880 }
5881 #endif
5882 r=REG_PC_TMP;
5883 raw_mov_l_rm(r,(uae_u32)&regs.pc_p);
5884 raw_and_l_ri(r,TAGMASK);
5885 raw_jmp_m_indexed((uae_u32)cache_tags,r,4);
5886
5887 #ifdef X86_ASSEMBLY
5888 align_target(align_jumps);
5889 m68k_compile_execute = (void (*)(void))get_target();
5890 for (i=N_REGS;i--;) {
5891 if (need_to_preserve[i])
5892 raw_push_l_r(i);
5893 }
5894 align_target(align_loops);
5895 uae_u32 dispatch_loop = (uae_u32)get_target();
5896 r=REG_PC_TMP;
5897 raw_mov_l_rm(r,(uae_u32)&regs.pc_p);
5898 raw_and_l_ri(r,TAGMASK);
5899 raw_call_m_indexed((uae_u32)cache_tags,r,4);
5900 raw_cmp_l_mi((uae_u32)&regs.spcflags,0);
5901 raw_jcc_b_oponly(NATIVE_CC_EQ);
5902 emit_byte(dispatch_loop-((uae_u32)get_target()+1));
5903 raw_call((uae_u32)m68k_do_specialties);
5904 raw_test_l_rr(REG_RESULT,REG_RESULT);
5905 raw_jcc_b_oponly(NATIVE_CC_EQ);
5906 emit_byte(dispatch_loop-((uae_u32)get_target()+1));
5907 raw_cmp_b_mi((uae_u32)&quit_program,0);
5908 raw_jcc_b_oponly(NATIVE_CC_EQ);
5909 emit_byte(dispatch_loop-((uae_u32)get_target()+1));
5910 for (i=0;i<N_REGS;i++) {
5911 if (need_to_preserve[i])
5912 raw_pop_l_r(i);
5913 }
5914 raw_ret();
5915 #endif
5916 }
5917
5918 static __inline__ void reset_lists(void)
5919 {
5920 int i;
5921
5922 for (i=0;i<MAX_HOLD_BI;i++)
5923 hold_bi[i]=NULL;
5924 active=NULL;
5925 dormant=NULL;
5926 }
5927
5928 static void prepare_block(blockinfo* bi)
5929 {
5930 int i;
5931
5932 set_target(current_compile_p);
5933 align_target(align_jumps);
5934 bi->direct_pen=(cpuop_func *)get_target();
5935 raw_mov_l_rm(0,(uae_u32)&(bi->pc_p));
5936 raw_mov_l_mr((uae_u32)&regs.pc_p,0);
5937 raw_jmp((uae_u32)popall_execute_normal);
5938
5939 align_target(align_jumps);
5940 bi->direct_pcc=(cpuop_func *)get_target();
5941 raw_mov_l_rm(0,(uae_u32)&(bi->pc_p));
5942 raw_mov_l_mr((uae_u32)&regs.pc_p,0);
5943 raw_jmp((uae_u32)popall_check_checksum);
5944 current_compile_p=get_target();
5945
5946 bi->deplist=NULL;
5947 for (i=0;i<2;i++) {
5948 bi->dep[i].prev_p=NULL;
5949 bi->dep[i].next=NULL;
5950 }
5951 bi->env=default_ss;
5952 bi->status=BI_INVALID;
5953 bi->havestate=0;
5954 //bi->env=empty_ss;
5955 }
5956
5957 static bool avoid_opcode(uae_u32 opcode)
5958 {
5959 #if JIT_DEBUG
5960 struct instr *dp = &table68k[opcode];
5961 // filter opcodes per type, integral value, or whatever
5962 #endif
5963 return false;
5964 }
5965
5966 void build_comp(void)
5967 {
5968 int i;
5969 int jumpcount=0;
5970 unsigned long opcode;
5971 struct comptbl* tbl=op_smalltbl_0_comp_ff;
5972 struct comptbl* nftbl=op_smalltbl_0_comp_nf;
5973 int count;
5974 int cpu_level = 0; // 68000 (default)
5975 if (CPUType == 4)
5976 cpu_level = 4; // 68040 with FPU
5977 else {
5978 if (FPUType)
5979 cpu_level = 3; // 68020 with FPU
5980 else if (CPUType >= 2)
5981 cpu_level = 2; // 68020
5982 else if (CPUType == 1)
5983 cpu_level = 1;
5984 }
5985 struct cputbl *nfctbl = (
5986 cpu_level == 4 ? op_smalltbl_0_nf
5987 : cpu_level == 3 ? op_smalltbl_1_nf
5988 : cpu_level == 2 ? op_smalltbl_2_nf
5989 : cpu_level == 1 ? op_smalltbl_3_nf
5990 : op_smalltbl_4_nf);
5991
5992 write_log ("<JIT compiler> : building compiler function tables\n");
5993
5994 for (opcode = 0; opcode < 65536; opcode++) {
5995 nfcpufunctbl[opcode] = op_illg_1;
5996 compfunctbl[opcode] = NULL;
5997 nfcompfunctbl[opcode] = NULL;
5998 prop[opcode].use_flags = 0x1f;
5999 prop[opcode].set_flags = 0x1f;
6000 prop[opcode].cflow = fl_trap; // ILLEGAL instructions do trap
6001 }
6002
6003 for (i = 0; tbl[i].opcode < 65536; i++) {
6004 int cflow = table68k[tbl[i].opcode].cflow;
6005 if (USE_INLINING && ((cflow & fl_const_jump) != 0))
6006 cflow = fl_const_jump;
6007 else
6008 cflow &= ~fl_const_jump;
6009 prop[cft_map(tbl[i].opcode)].cflow = cflow;
6010
6011 int uses_fpu = tbl[i].specific & 32;
6012 if ((uses_fpu && avoid_fpu) || avoid_opcode(tbl[i].opcode))
6013 compfunctbl[cft_map(tbl[i].opcode)] = NULL;
6014 else
6015 compfunctbl[cft_map(tbl[i].opcode)] = tbl[i].handler;
6016 }
6017
6018 for (i = 0; nftbl[i].opcode < 65536; i++) {
6019 int uses_fpu = tbl[i].specific & 32;
6020 if ((uses_fpu && avoid_fpu) || avoid_opcode(nftbl[i].opcode))
6021 nfcompfunctbl[cft_map(nftbl[i].opcode)] = NULL;
6022 else
6023 nfcompfunctbl[cft_map(nftbl[i].opcode)] = nftbl[i].handler;
6024
6025 nfcpufunctbl[cft_map(nftbl[i].opcode)] = nfctbl[i].handler;
6026 }
6027
6028 for (i = 0; nfctbl[i].handler; i++) {
6029 nfcpufunctbl[cft_map(nfctbl[i].opcode)] = nfctbl[i].handler;
6030 }
6031
6032 for (opcode = 0; opcode < 65536; opcode++) {
6033 compop_func *f;
6034 compop_func *nff;
6035 cpuop_func *nfcf;
6036 int isaddx,cflow;
6037
6038 if (table68k[opcode].mnemo == i_ILLG || table68k[opcode].clev > cpu_level)
6039 continue;
6040
6041 if (table68k[opcode].handler != -1) {
6042 f = compfunctbl[cft_map(table68k[opcode].handler)];
6043 nff = nfcompfunctbl[cft_map(table68k[opcode].handler)];
6044 nfcf = nfcpufunctbl[cft_map(table68k[opcode].handler)];
6045 cflow = prop[cft_map(table68k[opcode].handler)].cflow;
6046 isaddx = prop[cft_map(table68k[opcode].handler)].is_addx;
6047 prop[cft_map(opcode)].cflow = cflow;
6048 prop[cft_map(opcode)].is_addx = isaddx;
6049 compfunctbl[cft_map(opcode)] = f;
6050 nfcompfunctbl[cft_map(opcode)] = nff;
6051 Dif (nfcf == op_illg_1)
6052 abort();
6053 nfcpufunctbl[cft_map(opcode)] = nfcf;
6054 }
6055 prop[cft_map(opcode)].set_flags = table68k[opcode].flagdead;
6056 prop[cft_map(opcode)].use_flags = table68k[opcode].flaglive;
6057 }
6058 for (i = 0; nfctbl[i].handler != NULL; i++) {
6059 if (nfctbl[i].specific)
6060 nfcpufunctbl[cft_map(tbl[i].opcode)] = nfctbl[i].handler;
6061 }
6062
6063 count=0;
6064 for (opcode = 0; opcode < 65536; opcode++) {
6065 if (compfunctbl[cft_map(opcode)])
6066 count++;
6067 }
6068 write_log("<JIT compiler> : supposedly %d compileable opcodes!\n",count);
6069
6070 /* Initialise state */
6071 create_popalls();
6072 alloc_cache();
6073 reset_lists();
6074
6075 for (i=0;i<TAGSIZE;i+=2) {
6076 cache_tags[i].handler=(cpuop_func *)popall_execute_normal;
6077 cache_tags[i+1].bi=NULL;
6078 }
6079
6080 #if 0
6081 for (i=0;i<N_REGS;i++) {
6082 empty_ss.nat[i].holds=-1;
6083 empty_ss.nat[i].validsize=0;
6084 empty_ss.nat[i].dirtysize=0;
6085 }
6086 #endif
6087 for (i=0;i<VREGS;i++) {
6088 empty_ss.virt[i]=L_NEEDED;
6089 }
6090 for (i=0;i<N_REGS;i++) {
6091 empty_ss.nat[i]=L_UNKNOWN;
6092 }
6093 default_ss=empty_ss;
6094 }
6095
6096
6097 static void flush_icache_none(int n)
6098 {
6099 /* Nothing to do. */
6100 }
6101
6102 static void flush_icache_hard(int n)
6103 {
6104 uae_u32 i;
6105 blockinfo* bi, *dbi;
6106
6107 hard_flush_count++;
6108 #if 0
6109 write_log("Flush Icache_hard(%d/%x/%p), %u KB\n",
6110 n,regs.pc,regs.pc_p,current_cache_size/1024);
6111 current_cache_size = 0;
6112 #endif
6113 bi=active;
6114 while(bi) {
6115 cache_tags[cacheline(bi->pc_p)].handler=(cpuop_func *)popall_execute_normal;
6116 cache_tags[cacheline(bi->pc_p)+1].bi=NULL;
6117 dbi=bi; bi=bi->next;
6118 free_blockinfo(dbi);
6119 }
6120 bi=dormant;
6121 while(bi) {
6122 cache_tags[cacheline(bi->pc_p)].handler=(cpuop_func *)popall_execute_normal;
6123 cache_tags[cacheline(bi->pc_p)+1].bi=NULL;
6124 dbi=bi; bi=bi->next;
6125 free_blockinfo(dbi);
6126 }
6127
6128 reset_lists();
6129 if (!compiled_code)
6130 return;
6131 current_compile_p=compiled_code;
6132 SPCFLAGS_SET( SPCFLAG_JIT_EXEC_RETURN ); /* To get out of compiled code */
6133 }
6134
6135
6136 /* "Soft flushing" --- instead of actually throwing everything away,
6137 we simply mark everything as "needs to be checked".
6138 */
6139
6140 static inline void flush_icache_lazy(int n)
6141 {
6142 uae_u32 i;
6143 blockinfo* bi;
6144 blockinfo* bi2;
6145
6146 soft_flush_count++;
6147 if (!active)
6148 return;
6149
6150 bi=active;
6151 while (bi) {
6152 uae_u32 cl=cacheline(bi->pc_p);
6153 if (bi->status==BI_INVALID ||
6154 bi->status==BI_NEED_RECOMP) {
6155 if (bi==cache_tags[cl+1].bi)
6156 cache_tags[cl].handler=(cpuop_func *)popall_execute_normal;
6157 bi->handler_to_use=(cpuop_func *)popall_execute_normal;
6158 set_dhtu(bi,bi->direct_pen);
6159 bi->status=BI_INVALID;
6160 }
6161 else {
6162 if (bi==cache_tags[cl+1].bi)
6163 cache_tags[cl].handler=(cpuop_func *)popall_check_checksum;
6164 bi->handler_to_use=(cpuop_func *)popall_check_checksum;
6165 set_dhtu(bi,bi->direct_pcc);
6166 bi->status=BI_NEED_CHECK;
6167 }
6168 bi2=bi;
6169 bi=bi->next;
6170 }
6171 /* bi2 is now the last entry in the active list */
6172 bi2->next=dormant;
6173 if (dormant)
6174 dormant->prev_p=&(bi2->next);
6175
6176 dormant=active;
6177 active->prev_p=&dormant;
6178 active=NULL;
6179 }
6180
6181 static void catastrophe(void)
6182 {
6183 abort();
6184 }
6185
6186 int failure;
6187
6188 #define TARGET_M68K 0
6189 #define TARGET_POWERPC 1
6190 #define TARGET_X86 2
6191 #if defined(i386) || defined(__i386__)
6192 #define TARGET_NATIVE TARGET_X86
6193 #endif
6194 #if defined(powerpc) || defined(__powerpc__)
6195 #define TARGET_NATIVE TARGET_POWERPC
6196 #endif
6197
6198 #ifdef ENABLE_MON
6199 static uae_u32 mon_read_byte_jit(uae_u32 addr)
6200 {
6201 uae_u8 *m = (uae_u8 *)addr;
6202 return (uae_u32)(*m);
6203 }
6204
6205 static void mon_write_byte_jit(uae_u32 addr, uae_u32 b)
6206 {
6207 uae_u8 *m = (uae_u8 *)addr;
6208 *m = b;
6209 }
6210 #endif
6211
6212 void disasm_block(int target, uint8 * start, size_t length)
6213 {
6214 if (!JITDebug)
6215 return;
6216
6217 #if defined(JIT_DEBUG) && defined(ENABLE_MON)
6218 char disasm_str[200];
6219 sprintf(disasm_str, "%s $%x $%x",
6220 target == TARGET_M68K ? "d68" :
6221 target == TARGET_X86 ? "d86" :
6222 target == TARGET_POWERPC ? "d" : "x",
6223 start, start + length - 1);
6224
6225 uae_u32 (*old_mon_read_byte)(uae_u32) = mon_read_byte;
6226 void (*old_mon_write_byte)(uae_u32, uae_u32) = mon_write_byte;
6227
6228 mon_read_byte = mon_read_byte_jit;
6229 mon_write_byte = mon_write_byte_jit;
6230
6231 char *arg[5] = {"mon", "-m", "-r", disasm_str, NULL};
6232 mon(4, arg);
6233
6234 mon_read_byte = old_mon_read_byte;
6235 mon_write_byte = old_mon_write_byte;
6236 #endif
6237 }
6238
6239 static inline void disasm_native_block(uint8 *start, size_t length)
6240 {
6241 disasm_block(TARGET_NATIVE, start, length);
6242 }
6243
6244 static inline void disasm_m68k_block(uint8 *start, size_t length)
6245 {
6246 disasm_block(TARGET_M68K, start, length);
6247 }
6248
6249 #ifdef HAVE_GET_WORD_UNSWAPPED
6250 # define DO_GET_OPCODE(a) (do_get_mem_word_unswapped((uae_u16 *)(a)))
6251 #else
6252 # define DO_GET_OPCODE(a) (do_get_mem_word((uae_u16 *)(a)))
6253 #endif
6254
6255 #if JIT_DEBUG
6256 static uae_u8 *last_regs_pc_p = 0;
6257 static uae_u8 *last_compiled_block_addr = 0;
6258
6259 void compiler_dumpstate(void)
6260 {
6261 if (!JITDebug)
6262 return;
6263
6264 write_log("### Host addresses\n");
6265 write_log("MEM_BASE : %x\n", MEMBaseDiff);
6266 write_log("PC_P : %p\n", &regs.pc_p);
6267 write_log("SPCFLAGS : %p\n", &regs.spcflags);
6268 write_log("D0-D7 : %p-%p\n", &regs.regs[0], &regs.regs[7]);
6269 write_log("A0-A7 : %p-%p\n", &regs.regs[8], &regs.regs[15]);
6270 write_log("\n");
6271
6272 write_log("### M68k processor state\n");
6273 m68k_dumpstate(0);
6274 write_log("\n");
6275
6276 write_log("### Block in Mac address space\n");
6277 write_log("M68K block : %p\n",
6278 (void *)get_virtual_address(last_regs_pc_p));
6279 write_log("Native block : %p (%d bytes)\n",
6280 (void *)get_virtual_address(last_compiled_block_addr),
6281 get_blockinfo_addr(last_regs_pc_p)->direct_handler_size);
6282 write_log("\n");
6283 }
6284 #endif
6285
6286 static void compile_block(cpu_history* pc_hist, int blocklen)
6287 {
6288 if (letit && compiled_code) {
6289 #if PROFILE_COMPILE_TIME
6290 compile_count++;
6291 clock_t start_time = clock();
6292 #endif
6293 #if JIT_DEBUG
6294 bool disasm_block = false;
6295 #endif
6296
6297 /* OK, here we need to 'compile' a block */
6298 int i;
6299 int r;
6300 int was_comp=0;
6301 uae_u8 liveflags[MAXRUN+1];
6302 #if USE_CHECKSUM_INFO
6303 bool trace_in_rom = isinrom((uintptr)pc_hist[0].location);
6304 uae_u32 max_pcp=(uae_u32)pc_hist[blocklen - 1].location;
6305 uae_u32 min_pcp=max_pcp;
6306 #else
6307 uae_u32 max_pcp=(uae_u32)pc_hist[0].location;
6308 uae_u32 min_pcp=max_pcp;
6309 #endif
6310 uae_u32 cl=cacheline(pc_hist[0].location);
6311 void* specflags=(void*)&regs.spcflags;
6312 blockinfo* bi=NULL;
6313 blockinfo* bi2;
6314 int extra_len=0;
6315
6316 redo_current_block=0;
6317 if (current_compile_p>=max_compile_start)
6318 flush_icache_hard(7);
6319
6320 alloc_blockinfos();
6321
6322 bi=get_blockinfo_addr_new(pc_hist[0].location,0);
6323 bi2=get_blockinfo(cl);
6324
6325 optlev=bi->optlevel;
6326 if (bi->status!=BI_INVALID) {
6327 Dif (bi!=bi2) {
6328 /* I don't think it can happen anymore. Shouldn't, in
6329 any case. So let's make sure... */
6330 write_log("WOOOWOO count=%d, ol=%d %p %p\n",
6331 bi->count,bi->optlevel,bi->handler_to_use,
6332 cache_tags[cl].handler);
6333 abort();
6334 }
6335
6336 Dif (bi->count!=-1 && bi->status!=BI_NEED_RECOMP) {
6337 write_log("bi->count=%d, bi->status=%d\n",bi->count,bi->status);
6338 /* What the heck? We are not supposed to be here! */
6339 abort();
6340 }
6341 }
6342 if (bi->count==-1) {
6343 optlev++;
6344 while (!optcount[optlev])
6345 optlev++;
6346 bi->count=optcount[optlev]-1;
6347 }
6348 current_block_pc_p=(uae_u32)pc_hist[0].location;
6349
6350 remove_deps(bi); /* We are about to create new code */
6351 bi->optlevel=optlev;
6352 bi->pc_p=(uae_u8*)pc_hist[0].location;
6353 #if USE_CHECKSUM_INFO
6354 free_checksum_info_chain(bi->csi);
6355 bi->csi = NULL;
6356 #endif
6357
6358 liveflags[blocklen]=0x1f; /* All flags needed afterwards */
6359 i=blocklen;
6360 while (i--) {
6361 uae_u16* currpcp=pc_hist[i].location;
6362 uae_u32 op=DO_GET_OPCODE(currpcp);
6363
6364 #if USE_CHECKSUM_INFO
6365 trace_in_rom = trace_in_rom && isinrom((uintptr)currpcp);
6366 #if USE_INLINING
6367 if (is_const_jump(op)) {
6368 checksum_info *csi = alloc_checksum_info();
6369 csi->start_p = (uae_u8 *)min_pcp;
6370 csi->length = max_pcp - min_pcp + LONGEST_68K_INST;
6371 csi->next = bi->csi;
6372 bi->csi = csi;
6373 max_pcp = (uae_u32)currpcp;
6374 }
6375 #endif
6376 min_pcp = (uae_u32)currpcp;
6377 #else
6378 if ((uae_u32)currpcp<min_pcp)
6379 min_pcp=(uae_u32)currpcp;
6380 if ((uae_u32)currpcp>max_pcp)
6381 max_pcp=(uae_u32)currpcp;
6382 #endif
6383
6384 liveflags[i]=((liveflags[i+1]&
6385 (~prop[op].set_flags))|
6386 prop[op].use_flags);
6387 if (prop[op].is_addx && (liveflags[i+1]&FLAG_Z)==0)
6388 liveflags[i]&= ~FLAG_Z;
6389 }
6390
6391 #if USE_CHECKSUM_INFO
6392 checksum_info *csi = alloc_checksum_info();
6393 csi->start_p = (uae_u8 *)min_pcp;
6394 csi->length = max_pcp - min_pcp + LONGEST_68K_INST;
6395 csi->next = bi->csi;
6396 bi->csi = csi;
6397 #endif
6398
6399 bi->needed_flags=liveflags[0];
6400
6401 align_target(align_loops);
6402 was_comp=0;
6403
6404 bi->direct_handler=(cpuop_func *)get_target();
6405 set_dhtu(bi,bi->direct_handler);
6406 bi->status=BI_COMPILING;
6407 current_block_start_target=(uae_u32)get_target();
6408
6409 log_startblock();
6410
6411 if (bi->count>=0) { /* Need to generate countdown code */
6412 raw_mov_l_mi((uae_u32)&regs.pc_p,(uae_u32)pc_hist[0].location);
6413 raw_sub_l_mi((uae_u32)&(bi->count),1);
6414 raw_jl((uae_u32)popall_recompile_block);
6415 }
6416 if (optlev==0) { /* No need to actually translate */
6417 /* Execute normally without keeping stats */
6418 raw_mov_l_mi((uae_u32)&regs.pc_p,(uae_u32)pc_hist[0].location);
6419 raw_jmp((uae_u32)popall_exec_nostats);
6420 }
6421 else {
6422 reg_alloc_run=0;
6423 next_pc_p=0;
6424 taken_pc_p=0;
6425 branch_cc=0;
6426
6427 comp_pc_p=(uae_u8*)pc_hist[0].location;
6428 init_comp();
6429 was_comp=1;
6430
6431 #if JIT_DEBUG
6432 if (JITDebug) {
6433 raw_mov_l_mi((uae_u32)&last_regs_pc_p,(uae_u32)pc_hist[0].location);
6434 raw_mov_l_mi((uae_u32)&last_compiled_block_addr,(uae_u32)current_block_start_target);
6435 }
6436 #endif
6437
6438 for (i=0;i<blocklen &&
6439 get_target_noopt()<max_compile_start;i++) {
6440 cpuop_func **cputbl;
6441 compop_func **comptbl;
6442 uae_u32 opcode=DO_GET_OPCODE(pc_hist[i].location);
6443 needed_flags=(liveflags[i+1] & prop[opcode].set_flags);
6444 if (!needed_flags) {
6445 cputbl=nfcpufunctbl;
6446 comptbl=nfcompfunctbl;
6447 }
6448 else {
6449 cputbl=cpufunctbl;
6450 comptbl=compfunctbl;
6451 }
6452
6453 failure = 1; // gb-- defaults to failure state
6454 if (comptbl[opcode] && optlev>1) {
6455 failure=0;
6456 if (!was_comp) {
6457 comp_pc_p=(uae_u8*)pc_hist[i].location;
6458 init_comp();
6459 }
6460 was_comp=1;
6461
6462 comptbl[opcode](opcode);
6463 freescratch();
6464 if (!(liveflags[i+1] & FLAG_CZNV)) {
6465 /* We can forget about flags */
6466 dont_care_flags();
6467 }
6468 #if INDIVIDUAL_INST
6469 flush(1);
6470 nop();
6471 flush(1);
6472 was_comp=0;
6473 #endif
6474 }
6475
6476 if (failure) {
6477 if (was_comp) {
6478 flush(1);
6479 was_comp=0;
6480 }
6481 raw_mov_l_ri(REG_PAR1,(uae_u32)opcode);
6482 #if USE_NORMAL_CALLING_CONVENTION
6483 raw_push_l_r(REG_PAR1);
6484 #endif
6485 raw_mov_l_mi((uae_u32)&regs.pc_p,
6486 (uae_u32)pc_hist[i].location);
6487 raw_call((uae_u32)cputbl[opcode]);
6488 #if PROFILE_UNTRANSLATED_INSNS
6489 // raw_cputbl_count[] is indexed with plain opcode (in m68k order)
6490 raw_add_l_mi((uae_u32)&raw_cputbl_count[cft_map(opcode)],1);
6491 #endif
6492 #if USE_NORMAL_CALLING_CONVENTION
6493 raw_inc_sp(4);
6494 #endif
6495
6496 if (i < blocklen - 1) {
6497 uae_s8* branchadd;
6498
6499 raw_mov_l_rm(0,(uae_u32)specflags);
6500 raw_test_l_rr(0,0);
6501 raw_jz_b_oponly();
6502 branchadd=(uae_s8 *)get_target();
6503 emit_byte(0);
6504 raw_jmp((uae_u32)popall_do_nothing);
6505 *branchadd=(uae_u32)get_target()-(uae_u32)branchadd-1;
6506 }
6507 }
6508 }
6509 #if 1 /* This isn't completely kosher yet; It really needs to be
6510 be integrated into a general inter-block-dependency scheme */
6511 if (next_pc_p && taken_pc_p &&
6512 was_comp && taken_pc_p==current_block_pc_p) {
6513 blockinfo* bi1=get_blockinfo_addr_new((void*)next_pc_p,0);
6514 blockinfo* bi2=get_blockinfo_addr_new((void*)taken_pc_p,0);
6515 uae_u8 x=bi1->needed_flags;
6516
6517 if (x==0xff || 1) { /* To be on the safe side */
6518 uae_u16* next=(uae_u16*)next_pc_p;
6519 uae_u32 op=DO_GET_OPCODE(next);
6520
6521 x=0x1f;
6522 x&=(~prop[op].set_flags);
6523 x|=prop[op].use_flags;
6524 }
6525
6526 x|=bi2->needed_flags;
6527 if (!(x & FLAG_CZNV)) {
6528 /* We can forget about flags */
6529 dont_care_flags();
6530 extra_len+=2; /* The next instruction now is part of this
6531 block */
6532 }
6533
6534 }
6535 #endif
6536 log_flush();
6537
6538 if (next_pc_p) { /* A branch was registered */
6539 uae_u32 t1=next_pc_p;
6540 uae_u32 t2=taken_pc_p;
6541 int cc=branch_cc;
6542
6543 uae_u32* branchadd;
6544 uae_u32* tba;
6545 bigstate tmp;
6546 blockinfo* tbi;
6547
6548 if (taken_pc_p<next_pc_p) {
6549 /* backward branch. Optimize for the "taken" case ---
6550 which means the raw_jcc should fall through when
6551 the 68k branch is taken. */
6552 t1=taken_pc_p;
6553 t2=next_pc_p;
6554 cc=branch_cc^1;
6555 }
6556
6557 tmp=live; /* ouch! This is big... */
6558 raw_jcc_l_oponly(cc);
6559 branchadd=(uae_u32*)get_target();
6560 emit_long(0);
6561
6562 /* predicted outcome */
6563 tbi=get_blockinfo_addr_new((void*)t1,1);
6564 match_states(tbi);
6565 raw_cmp_l_mi((uae_u32)specflags,0);
6566 raw_jcc_l_oponly(4);
6567 tba=(uae_u32*)get_target();
6568 emit_long(get_handler(t1)-((uae_u32)tba+4));
6569 raw_mov_l_mi((uae_u32)&regs.pc_p,t1);
6570 raw_jmp((uae_u32)popall_do_nothing);
6571 create_jmpdep(bi,0,tba,t1);
6572
6573 align_target(align_jumps);
6574 /* not-predicted outcome */
6575 *branchadd=(uae_u32)get_target()-((uae_u32)branchadd+4);
6576 live=tmp; /* Ouch again */
6577 tbi=get_blockinfo_addr_new((void*)t2,1);
6578 match_states(tbi);
6579
6580 //flush(1); /* Can only get here if was_comp==1 */
6581 raw_cmp_l_mi((uae_u32)specflags,0);
6582 raw_jcc_l_oponly(4);
6583 tba=(uae_u32*)get_target();
6584 emit_long(get_handler(t2)-((uae_u32)tba+4));
6585 raw_mov_l_mi((uae_u32)&regs.pc_p,t2);
6586 raw_jmp((uae_u32)popall_do_nothing);
6587 create_jmpdep(bi,1,tba,t2);
6588 }
6589 else
6590 {
6591 if (was_comp) {
6592 flush(1);
6593 }
6594
6595 /* Let's find out where next_handler is... */
6596 if (was_comp && isinreg(PC_P)) {
6597 r=live.state[PC_P].realreg;
6598 raw_and_l_ri(r,TAGMASK);
6599 int r2 = (r==0) ? 1 : 0;
6600 raw_mov_l_ri(r2,(uae_u32)popall_do_nothing);
6601 raw_cmp_l_mi((uae_u32)specflags,0);
6602 raw_cmov_l_rm_indexed(r2,(uae_u32)cache_tags,r,4,4);
6603 raw_jmp_r(r2);
6604 }
6605 else if (was_comp && isconst(PC_P)) {
6606 uae_u32 v=live.state[PC_P].val;
6607 uae_u32* tba;
6608 blockinfo* tbi;
6609
6610 tbi=get_blockinfo_addr_new((void*)v,1);
6611 match_states(tbi);
6612
6613 raw_cmp_l_mi((uae_u32)specflags,0);
6614 raw_jcc_l_oponly(4);
6615 tba=(uae_u32*)get_target();
6616 emit_long(get_handler(v)-((uae_u32)tba+4));
6617 raw_mov_l_mi((uae_u32)&regs.pc_p,v);
6618 raw_jmp((uae_u32)popall_do_nothing);
6619 create_jmpdep(bi,0,tba,v);
6620 }
6621 else {
6622 r=REG_PC_TMP;
6623 raw_mov_l_rm(r,(uae_u32)&regs.pc_p);
6624 raw_and_l_ri(r,TAGMASK);
6625 int r2 = (r==0) ? 1 : 0;
6626 raw_mov_l_ri(r2,(uae_u32)popall_do_nothing);
6627 raw_cmp_l_mi((uae_u32)specflags,0);
6628 raw_cmov_l_rm_indexed(r2,(uae_u32)cache_tags,r,4,4);
6629 raw_jmp_r(r2);
6630 }
6631 }
6632 }
6633
6634 #if USE_MATCH
6635 if (callers_need_recompile(&live,&(bi->env))) {
6636 mark_callers_recompile(bi);
6637 }
6638
6639 big_to_small_state(&live,&(bi->env));
6640 #endif
6641
6642 #if USE_CHECKSUM_INFO
6643 remove_from_list(bi);
6644 if (trace_in_rom) {
6645 // No need to checksum that block trace on cache invalidation
6646 free_checksum_info_chain(bi->csi);
6647 bi->csi = NULL;
6648 add_to_dormant(bi);
6649 }
6650 else {
6651 calc_checksum(bi,&(bi->c1),&(bi->c2));
6652 add_to_active(bi);
6653 }
6654 #else
6655 if (next_pc_p+extra_len>=max_pcp &&
6656 next_pc_p+extra_len<max_pcp+LONGEST_68K_INST)
6657 max_pcp=next_pc_p+extra_len; /* extra_len covers flags magic */
6658 else
6659 max_pcp+=LONGEST_68K_INST;
6660
6661 bi->len=max_pcp-min_pcp;
6662 bi->min_pcp=min_pcp;
6663
6664 remove_from_list(bi);
6665 if (isinrom(min_pcp) && isinrom(max_pcp)) {
6666 add_to_dormant(bi); /* No need to checksum it on cache flush.
6667 Please don't start changing ROMs in
6668 flight! */
6669 }
6670 else {
6671 calc_checksum(bi,&(bi->c1),&(bi->c2));
6672 add_to_active(bi);
6673 }
6674 #endif
6675
6676 current_cache_size += get_target() - (uae_u8 *)current_compile_p;
6677
6678 #if JIT_DEBUG
6679 if (JITDebug)
6680 bi->direct_handler_size = get_target() - (uae_u8 *)current_block_start_target;
6681
6682 if (JITDebug && disasm_block) {
6683 uaecptr block_addr = start_pc + ((char *)pc_hist[0].location - (char *)start_pc_p);
6684 D(bug("M68K block @ 0x%08x (%d insns)\n", block_addr, blocklen));
6685 uae_u32 block_size = ((uae_u8 *)pc_hist[blocklen - 1].location - (uae_u8 *)pc_hist[0].location) + 1;
6686 disasm_m68k_block((uae_u8 *)pc_hist[0].location, block_size);
6687 D(bug("Compiled block @ 0x%08x\n", pc_hist[0].location));
6688 disasm_native_block((uae_u8 *)current_block_start_target, bi->direct_handler_size);
6689 getchar();
6690 }
6691 #endif
6692
6693 log_dump();
6694 align_target(align_jumps);
6695
6696 /* This is the non-direct handler */
6697 bi->handler=
6698 bi->handler_to_use=(cpuop_func *)get_target();
6699 raw_cmp_l_mi((uae_u32)&regs.pc_p,(uae_u32)pc_hist[0].location);
6700 raw_jnz((uae_u32)popall_cache_miss);
6701 comp_pc_p=(uae_u8*)pc_hist[0].location;
6702
6703 bi->status=BI_FINALIZING;
6704 init_comp();
6705 match_states(bi);
6706 flush(1);
6707
6708 raw_jmp((uae_u32)bi->direct_handler);
6709
6710 current_compile_p=get_target();
6711 raise_in_cl_list(bi);
6712
6713 /* We will flush soon, anyway, so let's do it now */
6714 if (current_compile_p>=max_compile_start)
6715 flush_icache_hard(7);
6716
6717 bi->status=BI_ACTIVE;
6718 if (redo_current_block)
6719 block_need_recompile(bi);
6720
6721 #if PROFILE_COMPILE_TIME
6722 compile_time += (clock() - start_time);
6723 #endif
6724 }
6725 }
6726
6727 void do_nothing(void)
6728 {
6729 /* What did you expect this to do? */
6730 }
6731
6732 void exec_nostats(void)
6733 {
6734 for (;;) {
6735 uae_u32 opcode = GET_OPCODE;
6736 (*cpufunctbl[opcode])(opcode);
6737 if (end_block(opcode) || SPCFLAGS_TEST(SPCFLAG_ALL)) {
6738 return; /* We will deal with the spcflags in the caller */
6739 }
6740 }
6741 }
6742
6743 void execute_normal(void)
6744 {
6745 if (!check_for_cache_miss()) {
6746 cpu_history pc_hist[MAXRUN];
6747 int blocklen = 0;
6748 #if REAL_ADDRESSING || DIRECT_ADDRESSING
6749 start_pc_p = regs.pc_p;
6750 start_pc = get_virtual_address(regs.pc_p);
6751 #else
6752 start_pc_p = regs.pc_oldp;
6753 start_pc = regs.pc;
6754 #endif
6755 for (;;) { /* Take note: This is the do-it-normal loop */
6756 pc_hist[blocklen++].location = (uae_u16 *)regs.pc_p;
6757 uae_u32 opcode = GET_OPCODE;
6758 #if FLIGHT_RECORDER
6759 m68k_record_step(m68k_getpc());
6760 #endif
6761 (*cpufunctbl[opcode])(opcode);
6762 if (end_block(opcode) || SPCFLAGS_TEST(SPCFLAG_ALL) || blocklen>=MAXRUN) {
6763 compile_block(pc_hist, blocklen);
6764 return; /* We will deal with the spcflags in the caller */
6765 }
6766 /* No need to check regs.spcflags, because if they were set,
6767 we'd have ended up inside that "if" */
6768 }
6769 }
6770 }
6771
6772 typedef void (*compiled_handler)(void);
6773
6774 #ifdef X86_ASSEMBLY
6775 void (*m68k_compile_execute)(void) = NULL;
6776 #else
6777 void m68k_do_compile_execute(void)
6778 {
6779 for (;;) {
6780 ((compiled_handler)(pushall_call_handler))();
6781 /* Whenever we return from that, we should check spcflags */
6782 if (SPCFLAGS_TEST(SPCFLAG_ALL)) {
6783 if (m68k_do_specialties ())
6784 return;
6785 }
6786 }
6787 }
6788 #endif