ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/compiler/compemu_support.cpp
Revision: 1.18
Committed: 2003-03-21T19:12:44Z (21 years, 6 months ago) by gbeauche
Branch: MAIN
CVS Tags: nigel-build-12, nigel-build-13
Changes since 1.17: +250 -21 lines
Log Message:
Remove some dead code. Start implementation of optimized calls to interpretive
fallbacks for untranslatable instruction handlers. Disabled for now since
call_m_01() is not correctly imeplemented yet.

File Contents

# Content
1 /*
2 * compiler/compemu_support.cpp - Core dynamic translation engine
3 *
4 * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
5 *
6 * Adaptation for Basilisk II and improvements, copyright 2000-2002
7 * Gwenole Beauchesne
8 *
9 * Basilisk II (C) 1997-2002 Christian Bauer
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 */
25
26 #if !REAL_ADDRESSING && !DIRECT_ADDRESSING
27 #error "Only Real or Direct Addressing is supported with the JIT Compiler"
28 #endif
29
30 #if X86_ASSEMBLY && !SAHF_SETO_PROFITABLE
31 #error "Only [LS]AHF scheme to [gs]et flags is supported with the JIT Compiler"
32 #endif
33
34 #define USE_MATCH 0
35
36 /* kludge for Brian, so he can compile under MSVC++ */
37 #define USE_NORMAL_CALLING_CONVENTION 0
38
39 #ifndef WIN32
40 #include <sys/types.h>
41 #include <sys/mman.h>
42 #endif
43
44 #include <stdlib.h>
45 #include <fcntl.h>
46 #include <errno.h>
47
48 #include "sysdeps.h"
49 #include "cpu_emulation.h"
50 #include "main.h"
51 #include "prefs.h"
52 #include "user_strings.h"
53 #include "vm_alloc.h"
54
55 #include "m68k.h"
56 #include "memory.h"
57 #include "readcpu.h"
58 #include "newcpu.h"
59 #include "comptbl.h"
60 #include "compiler/compemu.h"
61 #include "fpu/fpu.h"
62 #include "fpu/flags.h"
63
64 #define DEBUG 1
65 #include "debug.h"
66
67 #ifdef ENABLE_MON
68 #include "mon.h"
69 #endif
70
71 #ifndef WIN32
72 #define PROFILE_COMPILE_TIME 1
73 #define PROFILE_UNTRANSLATED_INSNS 1
74 #endif
75
76 #ifdef WIN32
77 #undef write_log
78 #define write_log dummy_write_log
79 static void dummy_write_log(const char *, ...) { }
80 #endif
81
82 #if JIT_DEBUG
83 #undef abort
84 #define abort() do { \
85 fprintf(stderr, "Abort in file %s at line %d\n", __FILE__, __LINE__); \
86 exit(EXIT_FAILURE); \
87 } while (0)
88 #endif
89
90 #if PROFILE_COMPILE_TIME
91 #include <time.h>
92 static uae_u32 compile_count = 0;
93 static clock_t compile_time = 0;
94 static clock_t emul_start_time = 0;
95 static clock_t emul_end_time = 0;
96 #endif
97
98 #if PROFILE_UNTRANSLATED_INSNS
99 const int untranslated_top_ten = 20;
100 static uae_u32 raw_cputbl_count[65536] = { 0, };
101 static uae_u16 opcode_nums[65536];
102
103 static int untranslated_compfn(const void *e1, const void *e2)
104 {
105 return raw_cputbl_count[*(const uae_u16 *)e1] < raw_cputbl_count[*(const uae_u16 *)e2];
106 }
107 #endif
108
109 compop_func *compfunctbl[65536];
110 compop_func *nfcompfunctbl[65536];
111 cpuop_func *nfcpufunctbl[65536];
112 uae_u8* comp_pc_p;
113
114 // From newcpu.cpp
115 extern bool quit_program;
116
117 // gb-- Extra data for Basilisk II/JIT
118 #if JIT_DEBUG
119 static bool JITDebug = false; // Enable runtime disassemblers through mon?
120 #else
121 const bool JITDebug = false; // Don't use JIT debug mode at all
122 #endif
123
124 const uae_u32 MIN_CACHE_SIZE = 2048; // Minimal translation cache size (2048 KB)
125 static uae_u32 cache_size = 0; // Size of total cache allocated for compiled blocks
126 static uae_u32 current_cache_size = 0; // Cache grows upwards: how much has been consumed already
127 static bool lazy_flush = true; // Flag: lazy translation cache invalidation
128 static bool avoid_fpu = true; // Flag: compile FPU instructions ?
129 static bool have_cmov = false; // target has CMOV instructions ?
130 static bool have_rat_stall = true; // target has partial register stalls ?
131 const bool tune_alignment = true; // Tune code alignments for running CPU ?
132 const bool tune_nop_fillers = true; // Tune no-op fillers for architecture
133 static bool setzflg_uses_bsf = false; // setzflg virtual instruction can use native BSF instruction correctly?
134 static int align_loops = 32; // Align the start of loops
135 static int align_jumps = 32; // Align the start of jumps
136 static int zero_fd = -1;
137 static int optcount[10] = {
138 10, // How often a block has to be executed before it is translated
139 0, // How often to use naive translation
140 0, 0, 0, 0,
141 -1, -1, -1, -1
142 };
143
144 struct op_properties {
145 uae_u8 use_flags;
146 uae_u8 set_flags;
147 uae_u8 is_addx;
148 uae_u8 cflow;
149 };
150 static op_properties prop[65536];
151
152 static inline int end_block(uae_u32 opcode)
153 {
154 return (prop[opcode].cflow & fl_end_block);
155 }
156
157 static inline bool is_const_jump(uae_u32 opcode)
158 {
159 return (prop[opcode].cflow == fl_const_jump);
160 }
161
162 static inline bool may_trap(uae_u32 opcode)
163 {
164 return (prop[opcode].cflow & fl_trap);
165 }
166
167 static inline unsigned int cft_map (unsigned int f)
168 {
169 #ifndef HAVE_GET_WORD_UNSWAPPED
170 return f;
171 #else
172 return ((f >> 8) & 255) | ((f & 255) << 8);
173 #endif
174 }
175
176 uae_u8* start_pc_p;
177 uae_u32 start_pc;
178 uae_u32 current_block_pc_p;
179 uae_u32 current_block_start_target;
180 uae_u32 needed_flags;
181 static uae_u32 next_pc_p;
182 static uae_u32 taken_pc_p;
183 static int branch_cc;
184 static int redo_current_block;
185
186 int segvcount=0;
187 int soft_flush_count=0;
188 int hard_flush_count=0;
189 int checksum_count=0;
190 static uae_u8* current_compile_p=NULL;
191 static uae_u8* max_compile_start;
192 static uae_u8* compiled_code=NULL;
193 static uae_s32 reg_alloc_run;
194
195 void* pushall_call_handler=NULL;
196 static void* popall_do_nothing=NULL;
197 static void* popall_exec_nostats=NULL;
198 static void* popall_execute_normal=NULL;
199 static void* popall_cache_miss=NULL;
200 static void* popall_recompile_block=NULL;
201 static void* popall_check_checksum=NULL;
202
203 /* The 68k only ever executes from even addresses. So right now, we
204 * waste half the entries in this array
205 * UPDATE: We now use those entries to store the start of the linked
206 * lists that we maintain for each hash result.
207 */
208 cacheline cache_tags[TAGSIZE];
209 int letit=0;
210 blockinfo* hold_bi[MAX_HOLD_BI];
211 blockinfo* active;
212 blockinfo* dormant;
213
214 /* 68040 */
215 extern struct cputbl op_smalltbl_0_nf[];
216 extern struct comptbl op_smalltbl_0_comp_nf[];
217 extern struct comptbl op_smalltbl_0_comp_ff[];
218
219 /* 68020 + 68881 */
220 extern struct cputbl op_smalltbl_1_nf[];
221
222 /* 68020 */
223 extern struct cputbl op_smalltbl_2_nf[];
224
225 /* 68010 */
226 extern struct cputbl op_smalltbl_3_nf[];
227
228 /* 68000 */
229 extern struct cputbl op_smalltbl_4_nf[];
230
231 /* 68000 slow but compatible. */
232 extern struct cputbl op_smalltbl_5_nf[];
233
234 static void flush_icache_hard(int n);
235 static void flush_icache_lazy(int n);
236 static void flush_icache_none(int n);
237 void (*flush_icache)(int n) = flush_icache_none;
238
239
240
241 bigstate live;
242 smallstate empty_ss;
243 smallstate default_ss;
244 static int optlev;
245
246 static int writereg(int r, int size);
247 static void unlock2(int r);
248 static void setlock(int r);
249 static int readreg_specific(int r, int size, int spec);
250 static int writereg_specific(int r, int size, int spec);
251 static void prepare_for_call_1(void);
252 static void prepare_for_call_2(void);
253 static void align_target(uae_u32 a);
254
255 static uae_s32 nextused[VREGS];
256
257 uae_u32 m68k_pc_offset;
258
259 /* Some arithmetic ooperations can be optimized away if the operands
260 * are known to be constant. But that's only a good idea when the
261 * side effects they would have on the flags are not important. This
262 * variable indicates whether we need the side effects or not
263 */
264 uae_u32 needflags=0;
265
266 /* Flag handling is complicated.
267 *
268 * x86 instructions create flags, which quite often are exactly what we
269 * want. So at times, the "68k" flags are actually in the x86 flags.
270 *
271 * Then again, sometimes we do x86 instructions that clobber the x86
272 * flags, but don't represent a corresponding m68k instruction. In that
273 * case, we have to save them.
274 *
275 * We used to save them to the stack, but now store them back directly
276 * into the regflags.cznv of the traditional emulation. Thus some odd
277 * names.
278 *
279 * So flags can be in either of two places (used to be three; boy were
280 * things complicated back then!); And either place can contain either
281 * valid flags or invalid trash (and on the stack, there was also the
282 * option of "nothing at all", now gone). A couple of variables keep
283 * track of the respective states.
284 *
285 * To make things worse, we might or might not be interested in the flags.
286 * by default, we are, but a call to dont_care_flags can change that
287 * until the next call to live_flags. If we are not, pretty much whatever
288 * is in the register and/or the native flags is seen as valid.
289 */
290
291 static __inline__ blockinfo* get_blockinfo(uae_u32 cl)
292 {
293 return cache_tags[cl+1].bi;
294 }
295
296 static __inline__ blockinfo* get_blockinfo_addr(void* addr)
297 {
298 blockinfo* bi=get_blockinfo(cacheline(addr));
299
300 while (bi) {
301 if (bi->pc_p==addr)
302 return bi;
303 bi=bi->next_same_cl;
304 }
305 return NULL;
306 }
307
308
309 /*******************************************************************
310 * All sorts of list related functions for all of the lists *
311 *******************************************************************/
312
313 static __inline__ void remove_from_cl_list(blockinfo* bi)
314 {
315 uae_u32 cl=cacheline(bi->pc_p);
316
317 if (bi->prev_same_cl_p)
318 *(bi->prev_same_cl_p)=bi->next_same_cl;
319 if (bi->next_same_cl)
320 bi->next_same_cl->prev_same_cl_p=bi->prev_same_cl_p;
321 if (cache_tags[cl+1].bi)
322 cache_tags[cl].handler=cache_tags[cl+1].bi->handler_to_use;
323 else
324 cache_tags[cl].handler=(cpuop_func *)popall_execute_normal;
325 }
326
327 static __inline__ void remove_from_list(blockinfo* bi)
328 {
329 if (bi->prev_p)
330 *(bi->prev_p)=bi->next;
331 if (bi->next)
332 bi->next->prev_p=bi->prev_p;
333 }
334
335 static __inline__ void remove_from_lists(blockinfo* bi)
336 {
337 remove_from_list(bi);
338 remove_from_cl_list(bi);
339 }
340
341 static __inline__ void add_to_cl_list(blockinfo* bi)
342 {
343 uae_u32 cl=cacheline(bi->pc_p);
344
345 if (cache_tags[cl+1].bi)
346 cache_tags[cl+1].bi->prev_same_cl_p=&(bi->next_same_cl);
347 bi->next_same_cl=cache_tags[cl+1].bi;
348
349 cache_tags[cl+1].bi=bi;
350 bi->prev_same_cl_p=&(cache_tags[cl+1].bi);
351
352 cache_tags[cl].handler=bi->handler_to_use;
353 }
354
355 static __inline__ void raise_in_cl_list(blockinfo* bi)
356 {
357 remove_from_cl_list(bi);
358 add_to_cl_list(bi);
359 }
360
361 static __inline__ void add_to_active(blockinfo* bi)
362 {
363 if (active)
364 active->prev_p=&(bi->next);
365 bi->next=active;
366
367 active=bi;
368 bi->prev_p=&active;
369 }
370
371 static __inline__ void add_to_dormant(blockinfo* bi)
372 {
373 if (dormant)
374 dormant->prev_p=&(bi->next);
375 bi->next=dormant;
376
377 dormant=bi;
378 bi->prev_p=&dormant;
379 }
380
381 static __inline__ void remove_dep(dependency* d)
382 {
383 if (d->prev_p)
384 *(d->prev_p)=d->next;
385 if (d->next)
386 d->next->prev_p=d->prev_p;
387 d->prev_p=NULL;
388 d->next=NULL;
389 }
390
391 /* This block's code is about to be thrown away, so it no longer
392 depends on anything else */
393 static __inline__ void remove_deps(blockinfo* bi)
394 {
395 remove_dep(&(bi->dep[0]));
396 remove_dep(&(bi->dep[1]));
397 }
398
399 static __inline__ void adjust_jmpdep(dependency* d, cpuop_func* a)
400 {
401 *(d->jmp_off)=(uintptr)a-((uintptr)d->jmp_off+4);
402 }
403
404 /********************************************************************
405 * Soft flush handling support functions *
406 ********************************************************************/
407
408 static __inline__ void set_dhtu(blockinfo* bi, cpuop_func* dh)
409 {
410 //write_log("bi is %p\n",bi);
411 if (dh!=bi->direct_handler_to_use) {
412 dependency* x=bi->deplist;
413 //write_log("bi->deplist=%p\n",bi->deplist);
414 while (x) {
415 //write_log("x is %p\n",x);
416 //write_log("x->next is %p\n",x->next);
417 //write_log("x->prev_p is %p\n",x->prev_p);
418
419 if (x->jmp_off) {
420 adjust_jmpdep(x,dh);
421 }
422 x=x->next;
423 }
424 bi->direct_handler_to_use=dh;
425 }
426 }
427
428 static __inline__ void invalidate_block(blockinfo* bi)
429 {
430 int i;
431
432 bi->optlevel=0;
433 bi->count=optcount[0]-1;
434 bi->handler=NULL;
435 bi->handler_to_use=(cpuop_func *)popall_execute_normal;
436 bi->direct_handler=NULL;
437 set_dhtu(bi,bi->direct_pen);
438 bi->needed_flags=0xff;
439 bi->status=BI_INVALID;
440 for (i=0;i<2;i++) {
441 bi->dep[i].jmp_off=NULL;
442 bi->dep[i].target=NULL;
443 }
444 remove_deps(bi);
445 }
446
447 static __inline__ void create_jmpdep(blockinfo* bi, int i, uae_u32* jmpaddr, uae_u32 target)
448 {
449 blockinfo* tbi=get_blockinfo_addr((void*)target);
450
451 Dif(!tbi) {
452 write_log("Could not create jmpdep!\n");
453 abort();
454 }
455 bi->dep[i].jmp_off=jmpaddr;
456 bi->dep[i].source=bi;
457 bi->dep[i].target=tbi;
458 bi->dep[i].next=tbi->deplist;
459 if (bi->dep[i].next)
460 bi->dep[i].next->prev_p=&(bi->dep[i].next);
461 bi->dep[i].prev_p=&(tbi->deplist);
462 tbi->deplist=&(bi->dep[i]);
463 }
464
465 static __inline__ void block_need_recompile(blockinfo * bi)
466 {
467 uae_u32 cl = cacheline(bi->pc_p);
468
469 set_dhtu(bi, bi->direct_pen);
470 bi->direct_handler = bi->direct_pen;
471
472 bi->handler_to_use = (cpuop_func *)popall_execute_normal;
473 bi->handler = (cpuop_func *)popall_execute_normal;
474 if (bi == cache_tags[cl + 1].bi)
475 cache_tags[cl].handler = (cpuop_func *)popall_execute_normal;
476 bi->status = BI_NEED_RECOMP;
477 }
478
479 static __inline__ void mark_callers_recompile(blockinfo * bi)
480 {
481 dependency *x = bi->deplist;
482
483 while (x) {
484 dependency *next = x->next; /* This disappears when we mark for
485 * recompilation and thus remove the
486 * blocks from the lists */
487 if (x->jmp_off) {
488 blockinfo *cbi = x->source;
489
490 Dif(cbi->status == BI_INVALID) {
491 // write_log("invalid block in dependency list\n"); // FIXME?
492 // abort();
493 }
494 if (cbi->status == BI_ACTIVE || cbi->status == BI_NEED_CHECK) {
495 block_need_recompile(cbi);
496 mark_callers_recompile(cbi);
497 }
498 else if (cbi->status == BI_COMPILING) {
499 redo_current_block = 1;
500 }
501 else if (cbi->status == BI_NEED_RECOMP) {
502 /* nothing */
503 }
504 else {
505 //write_log("Status %d in mark_callers\n",cbi->status); // FIXME?
506 }
507 }
508 x = next;
509 }
510 }
511
512 static __inline__ blockinfo* get_blockinfo_addr_new(void* addr, int setstate)
513 {
514 blockinfo* bi=get_blockinfo_addr(addr);
515 int i;
516
517 if (!bi) {
518 for (i=0;i<MAX_HOLD_BI && !bi;i++) {
519 if (hold_bi[i]) {
520 uae_u32 cl=cacheline(addr);
521
522 bi=hold_bi[i];
523 hold_bi[i]=NULL;
524 bi->pc_p=(uae_u8 *)addr;
525 invalidate_block(bi);
526 add_to_active(bi);
527 add_to_cl_list(bi);
528
529 }
530 }
531 }
532 if (!bi) {
533 write_log("Looking for blockinfo, can't find free one\n");
534 abort();
535 }
536 return bi;
537 }
538
539 static void prepare_block(blockinfo* bi);
540
541 /* Managment of blockinfos.
542
543 A blockinfo struct is allocated whenever a new block has to be
544 compiled. If the list of free blockinfos is empty, we allocate a new
545 pool of blockinfos and link the newly created blockinfos altogether
546 into the list of free blockinfos. Otherwise, we simply pop a structure
547 off the free list.
548
549 Blockinfo are lazily deallocated, i.e. chained altogether in the
550 list of free blockinfos whenvever a translation cache flush (hard or
551 soft) request occurs.
552 */
553
554 template< class T >
555 class LazyBlockAllocator
556 {
557 enum {
558 kPoolSize = 1 + 4096 / sizeof(T)
559 };
560 struct Pool {
561 T chunk[kPoolSize];
562 Pool * next;
563 };
564 Pool * mPools;
565 T * mChunks;
566 public:
567 LazyBlockAllocator() : mPools(0), mChunks(0) { }
568 ~LazyBlockAllocator();
569 T * acquire();
570 void release(T * const);
571 };
572
573 template< class T >
574 LazyBlockAllocator<T>::~LazyBlockAllocator()
575 {
576 Pool * currentPool = mPools;
577 while (currentPool) {
578 Pool * deadPool = currentPool;
579 currentPool = currentPool->next;
580 free(deadPool);
581 }
582 }
583
584 template< class T >
585 T * LazyBlockAllocator<T>::acquire()
586 {
587 if (!mChunks) {
588 // There is no chunk left, allocate a new pool and link the
589 // chunks into the free list
590 Pool * newPool = (Pool *)malloc(sizeof(Pool));
591 for (T * chunk = &newPool->chunk[0]; chunk < &newPool->chunk[kPoolSize]; chunk++) {
592 chunk->next = mChunks;
593 mChunks = chunk;
594 }
595 newPool->next = mPools;
596 mPools = newPool;
597 }
598 T * chunk = mChunks;
599 mChunks = chunk->next;
600 return chunk;
601 }
602
603 template< class T >
604 void LazyBlockAllocator<T>::release(T * const chunk)
605 {
606 chunk->next = mChunks;
607 mChunks = chunk;
608 }
609
610 template< class T >
611 class HardBlockAllocator
612 {
613 public:
614 T * acquire() {
615 T * data = (T *)current_compile_p;
616 current_compile_p += sizeof(T);
617 return data;
618 }
619
620 void release(T * const chunk) {
621 // Deallocated on invalidation
622 }
623 };
624
625 #if USE_SEPARATE_BIA
626 static LazyBlockAllocator<blockinfo> BlockInfoAllocator;
627 static LazyBlockAllocator<checksum_info> ChecksumInfoAllocator;
628 #else
629 static HardBlockAllocator<blockinfo> BlockInfoAllocator;
630 static HardBlockAllocator<checksum_info> ChecksumInfoAllocator;
631 #endif
632
633 static __inline__ checksum_info *alloc_checksum_info(void)
634 {
635 checksum_info *csi = ChecksumInfoAllocator.acquire();
636 csi->next = NULL;
637 return csi;
638 }
639
640 static __inline__ void free_checksum_info(checksum_info *csi)
641 {
642 csi->next = NULL;
643 ChecksumInfoAllocator.release(csi);
644 }
645
646 static __inline__ void free_checksum_info_chain(checksum_info *csi)
647 {
648 while (csi != NULL) {
649 checksum_info *csi2 = csi->next;
650 free_checksum_info(csi);
651 csi = csi2;
652 }
653 }
654
655 static __inline__ blockinfo *alloc_blockinfo(void)
656 {
657 blockinfo *bi = BlockInfoAllocator.acquire();
658 #if USE_CHECKSUM_INFO
659 bi->csi = NULL;
660 #endif
661 return bi;
662 }
663
664 static __inline__ void free_blockinfo(blockinfo *bi)
665 {
666 #if USE_CHECKSUM_INFO
667 free_checksum_info_chain(bi->csi);
668 bi->csi = NULL;
669 #endif
670 BlockInfoAllocator.release(bi);
671 }
672
673 static __inline__ void alloc_blockinfos(void)
674 {
675 int i;
676 blockinfo* bi;
677
678 for (i=0;i<MAX_HOLD_BI;i++) {
679 if (hold_bi[i])
680 return;
681 bi=hold_bi[i]=alloc_blockinfo();
682 prepare_block(bi);
683 }
684 }
685
686 /********************************************************************
687 * Functions to emit data into memory, and other general support *
688 ********************************************************************/
689
690 static uae_u8* target;
691
692 static void emit_init(void)
693 {
694 }
695
696 static __inline__ void emit_byte(uae_u8 x)
697 {
698 *target++=x;
699 }
700
701 static __inline__ void emit_word(uae_u16 x)
702 {
703 *((uae_u16*)target)=x;
704 target+=2;
705 }
706
707 static __inline__ void emit_long(uae_u32 x)
708 {
709 *((uae_u32*)target)=x;
710 target+=4;
711 }
712
713 static __inline__ void emit_block(const uae_u8 *block, uae_u32 blocklen)
714 {
715 memcpy((uae_u8 *)target,block,blocklen);
716 target+=blocklen;
717 }
718
719 static __inline__ uae_u32 reverse32(uae_u32 v)
720 {
721 #if 1
722 // gb-- We have specialized byteswapping functions, just use them
723 return do_byteswap_32(v);
724 #else
725 return ((v>>24)&0xff) | ((v>>8)&0xff00) | ((v<<8)&0xff0000) | ((v<<24)&0xff000000);
726 #endif
727 }
728
729 /********************************************************************
730 * Getting the information about the target CPU *
731 ********************************************************************/
732
733 #include "codegen_x86.cpp"
734
735 void set_target(uae_u8* t)
736 {
737 target=t;
738 }
739
740 static __inline__ uae_u8* get_target_noopt(void)
741 {
742 return target;
743 }
744
745 __inline__ uae_u8* get_target(void)
746 {
747 return get_target_noopt();
748 }
749
750
751 /********************************************************************
752 * Flags status handling. EMIT TIME! *
753 ********************************************************************/
754
755 static void bt_l_ri_noclobber(R4 r, IMM i);
756
757 static void make_flags_live_internal(void)
758 {
759 if (live.flags_in_flags==VALID)
760 return;
761 Dif (live.flags_on_stack==TRASH) {
762 write_log("Want flags, got something on stack, but it is TRASH\n");
763 abort();
764 }
765 if (live.flags_on_stack==VALID) {
766 int tmp;
767 tmp=readreg_specific(FLAGTMP,4,FLAG_NREG2);
768 raw_reg_to_flags(tmp);
769 unlock2(tmp);
770
771 live.flags_in_flags=VALID;
772 return;
773 }
774 write_log("Huh? live.flags_in_flags=%d, live.flags_on_stack=%d, but need to make live\n",
775 live.flags_in_flags,live.flags_on_stack);
776 abort();
777 }
778
779 static void flags_to_stack(void)
780 {
781 if (live.flags_on_stack==VALID)
782 return;
783 if (!live.flags_are_important) {
784 live.flags_on_stack=VALID;
785 return;
786 }
787 Dif (live.flags_in_flags!=VALID)
788 abort();
789 else {
790 int tmp;
791 tmp=writereg_specific(FLAGTMP,4,FLAG_NREG1);
792 raw_flags_to_reg(tmp);
793 unlock2(tmp);
794 }
795 live.flags_on_stack=VALID;
796 }
797
798 static __inline__ void clobber_flags(void)
799 {
800 if (live.flags_in_flags==VALID && live.flags_on_stack!=VALID)
801 flags_to_stack();
802 live.flags_in_flags=TRASH;
803 }
804
805 /* Prepare for leaving the compiled stuff */
806 static __inline__ void flush_flags(void)
807 {
808 flags_to_stack();
809 return;
810 }
811
812 int touchcnt;
813
814 /********************************************************************
815 * Partial register flushing for optimized calls *
816 ********************************************************************/
817
818 struct regusage {
819 uae_u16 rmask;
820 uae_u16 wmask;
821 };
822
823 static inline void ru_set(uae_u16 *mask, int reg)
824 {
825 #if USE_OPTIMIZED_CALLS
826 *mask |= 1 << reg;
827 #endif
828 }
829
830 static inline bool ru_get(const uae_u16 *mask, int reg)
831 {
832 #if USE_OPTIMIZED_CALLS
833 return (*mask & (1 << reg));
834 #else
835 /* Default: instruction reads & write to register */
836 return true;
837 #endif
838 }
839
840 static inline void ru_set_read(regusage *ru, int reg)
841 {
842 ru_set(&ru->rmask, reg);
843 }
844
845 static inline void ru_set_write(regusage *ru, int reg)
846 {
847 ru_set(&ru->wmask, reg);
848 }
849
850 static inline bool ru_read_p(const regusage *ru, int reg)
851 {
852 return ru_get(&ru->rmask, reg);
853 }
854
855 static inline bool ru_write_p(const regusage *ru, int reg)
856 {
857 return ru_get(&ru->wmask, reg);
858 }
859
860 static void ru_fill_ea(regusage *ru, int reg, amodes mode,
861 wordsizes size, int write_mode)
862 {
863 switch (mode) {
864 case Areg:
865 reg += 8;
866 /* fall through */
867 case Dreg:
868 ru_set(write_mode ? &ru->wmask : &ru->rmask, reg);
869 break;
870 case Ad16:
871 /* skip displacment */
872 m68k_pc_offset += 2;
873 case Aind:
874 case Aipi:
875 case Apdi:
876 ru_set_read(ru, reg+8);
877 break;
878 case Ad8r:
879 ru_set_read(ru, reg+8);
880 /* fall through */
881 case PC8r: {
882 uae_u16 dp = comp_get_iword((m68k_pc_offset+=2)-2);
883 reg = (dp >> 12) & 15;
884 ru_set_read(ru, reg);
885 if (dp & 0x100)
886 m68k_pc_offset += (((dp & 0x30) >> 3) & 7) + ((dp & 3) * 2);
887 break;
888 }
889 case PC16:
890 case absw:
891 case imm0:
892 case imm1:
893 m68k_pc_offset += 2;
894 break;
895 case absl:
896 case imm2:
897 m68k_pc_offset += 4;
898 break;
899 case immi:
900 m68k_pc_offset += (size == sz_long) ? 4 : 2;
901 break;
902 }
903 }
904
905 /* TODO: split into a static initialization part and a dynamic one
906 (instructions depending on extension words) */
907 static void ru_fill(regusage *ru, uae_u32 opcode)
908 {
909 m68k_pc_offset += 2;
910
911 /* Default: no register is used or written to */
912 ru->rmask = 0;
913 ru->wmask = 0;
914
915 uae_u32 real_opcode = cft_map(opcode);
916 struct instr *dp = &table68k[real_opcode];
917
918 bool rw_dest = true;
919 bool handled = false;
920
921 /* Handle some instructions specifically */
922 uae_u16 reg, ext;
923 switch (dp->mnemo) {
924 case i_BFCHG:
925 case i_BFCLR:
926 case i_BFEXTS:
927 case i_BFEXTU:
928 case i_BFFFO:
929 case i_BFINS:
930 case i_BFSET:
931 case i_BFTST:
932 ext = comp_get_iword((m68k_pc_offset+=2)-2);
933 if (ext & 0x800) ru_set_read(ru, (ext >> 6) & 7);
934 if (ext & 0x020) ru_set_read(ru, ext & 7);
935 ru_fill_ea(ru, dp->dreg, (amodes)dp->dmode, (wordsizes)dp->size, 1);
936 if (dp->dmode == Dreg)
937 ru_set_read(ru, dp->dreg);
938 switch (dp->mnemo) {
939 case i_BFEXTS:
940 case i_BFEXTU:
941 case i_BFFFO:
942 ru_set_write(ru, (ext >> 12) & 7);
943 break;
944 case i_BFINS:
945 ru_set_read(ru, (ext >> 12) & 7);
946 /* fall through */
947 case i_BFCHG:
948 case i_BFCLR:
949 case i_BSET:
950 if (dp->dmode == Dreg)
951 ru_set_write(ru, dp->dreg);
952 break;
953 }
954 handled = true;
955 rw_dest = false;
956 break;
957
958 case i_BTST:
959 rw_dest = false;
960 break;
961
962 case i_CAS:
963 {
964 ext = comp_get_iword((m68k_pc_offset+=2)-2);
965 int Du = ext & 7;
966 ru_set_read(ru, Du);
967 int Dc = (ext >> 6) & 7;
968 ru_set_read(ru, Dc);
969 ru_set_write(ru, Dc);
970 break;
971 }
972 case i_CAS2:
973 {
974 int Dc1, Dc2, Du1, Du2, Rn1, Rn2;
975 ext = comp_get_iword((m68k_pc_offset+=2)-2);
976 Rn1 = (ext >> 12) & 15;
977 Du1 = (ext >> 6) & 7;
978 Dc1 = ext & 7;
979 ru_set_read(ru, Rn1);
980 ru_set_read(ru, Du1);
981 ru_set_read(ru, Dc1);
982 ru_set_write(ru, Dc1);
983 ext = comp_get_iword((m68k_pc_offset+=2)-2);
984 Rn2 = (ext >> 12) & 15;
985 Du2 = (ext >> 6) & 7;
986 Dc2 = ext & 7;
987 ru_set_read(ru, Rn2);
988 ru_set_read(ru, Du2);
989 ru_set_write(ru, Dc2);
990 break;
991 }
992 case i_DIVL: case i_MULL:
993 m68k_pc_offset += 2;
994 break;
995 case i_LEA:
996 case i_MOVE: case i_MOVEA: case i_MOVE16:
997 rw_dest = false;
998 break;
999 case i_PACK: case i_UNPK:
1000 rw_dest = false;
1001 m68k_pc_offset += 2;
1002 break;
1003 case i_TRAPcc:
1004 m68k_pc_offset += (dp->size == sz_long) ? 4 : 2;
1005 break;
1006 case i_RTR:
1007 /* do nothing, just for coverage debugging */
1008 break;
1009 /* TODO: handle EXG instruction */
1010 }
1011
1012 /* Handle A-Traps better */
1013 if ((real_opcode & 0xf000) == 0xa000) {
1014 handled = true;
1015 }
1016
1017 /* Handle EmulOps better */
1018 if ((real_opcode & 0xff00) == 0x7100) {
1019 handled = true;
1020 ru->rmask = 0xffff;
1021 ru->wmask = 0;
1022 }
1023
1024 if (dp->suse && !handled)
1025 ru_fill_ea(ru, dp->sreg, (amodes)dp->smode, (wordsizes)dp->size, 0);
1026
1027 if (dp->duse && !handled)
1028 ru_fill_ea(ru, dp->dreg, (amodes)dp->dmode, (wordsizes)dp->size, 1);
1029
1030 if (rw_dest)
1031 ru->rmask |= ru->wmask;
1032
1033 handled = handled || dp->suse || dp->duse;
1034
1035 /* Mark all registers as used/written if the instruction may trap */
1036 if (may_trap(opcode)) {
1037 handled = true;
1038 ru->rmask = 0xffff;
1039 ru->wmask = 0xffff;
1040 }
1041
1042 if (!handled) {
1043 write_log("ru_fill: %04x = { %04x, %04x }\n",
1044 real_opcode, ru->rmask, ru->wmask);
1045 abort();
1046 }
1047 }
1048
1049 /********************************************************************
1050 * register allocation per block logging *
1051 ********************************************************************/
1052
1053 static uae_s8 vstate[VREGS];
1054 static uae_s8 vwritten[VREGS];
1055 static uae_s8 nstate[N_REGS];
1056
1057 #define L_UNKNOWN -127
1058 #define L_UNAVAIL -1
1059 #define L_NEEDED -2
1060 #define L_UNNEEDED -3
1061
1062 static __inline__ void big_to_small_state(bigstate * b, smallstate * s)
1063 {
1064 int i;
1065
1066 for (i = 0; i < VREGS; i++)
1067 s->virt[i] = vstate[i];
1068 for (i = 0; i < N_REGS; i++)
1069 s->nat[i] = nstate[i];
1070 }
1071
1072 static __inline__ int callers_need_recompile(bigstate * b, smallstate * s)
1073 {
1074 int i;
1075 int reverse = 0;
1076
1077 for (i = 0; i < VREGS; i++) {
1078 if (vstate[i] != L_UNNEEDED && s->virt[i] == L_UNNEEDED)
1079 return 1;
1080 if (vstate[i] == L_UNNEEDED && s->virt[i] != L_UNNEEDED)
1081 reverse++;
1082 }
1083 for (i = 0; i < N_REGS; i++) {
1084 if (nstate[i] >= 0 && nstate[i] != s->nat[i])
1085 return 1;
1086 if (nstate[i] < 0 && s->nat[i] >= 0)
1087 reverse++;
1088 }
1089 if (reverse >= 2 && USE_MATCH)
1090 return 1; /* In this case, it might be worth recompiling the
1091 * callers */
1092 return 0;
1093 }
1094
1095 static __inline__ void log_startblock(void)
1096 {
1097 int i;
1098
1099 for (i = 0; i < VREGS; i++) {
1100 vstate[i] = L_UNKNOWN;
1101 vwritten[i] = 0;
1102 }
1103 for (i = 0; i < N_REGS; i++)
1104 nstate[i] = L_UNKNOWN;
1105 }
1106
1107 /* Using an n-reg for a temp variable */
1108 static __inline__ void log_isused(int n)
1109 {
1110 if (nstate[n] == L_UNKNOWN)
1111 nstate[n] = L_UNAVAIL;
1112 }
1113
1114 static __inline__ void log_visused(int r)
1115 {
1116 if (vstate[r] == L_UNKNOWN)
1117 vstate[r] = L_NEEDED;
1118 }
1119
1120 static __inline__ void do_load_reg(int n, int r)
1121 {
1122 if (r == FLAGTMP)
1123 raw_load_flagreg(n, r);
1124 else if (r == FLAGX)
1125 raw_load_flagx(n, r);
1126 else
1127 raw_mov_l_rm(n, (uae_u32) live.state[r].mem);
1128 }
1129
1130 static __inline__ void check_load_reg(int n, int r)
1131 {
1132 raw_mov_l_rm(n, (uae_u32) live.state[r].mem);
1133 }
1134
1135 static __inline__ void log_vwrite(int r)
1136 {
1137 vwritten[r] = 1;
1138 }
1139
1140 /* Using an n-reg to hold a v-reg */
1141 static __inline__ void log_isreg(int n, int r)
1142 {
1143 static int count = 0;
1144
1145 if (nstate[n] == L_UNKNOWN && r < 16 && !vwritten[r] && USE_MATCH)
1146 nstate[n] = r;
1147 else {
1148 do_load_reg(n, r);
1149 if (nstate[n] == L_UNKNOWN)
1150 nstate[n] = L_UNAVAIL;
1151 }
1152 if (vstate[r] == L_UNKNOWN)
1153 vstate[r] = L_NEEDED;
1154 }
1155
1156 static __inline__ void log_clobberreg(int r)
1157 {
1158 if (vstate[r] == L_UNKNOWN)
1159 vstate[r] = L_UNNEEDED;
1160 }
1161
1162 /* This ends all possibility of clever register allocation */
1163
1164 static __inline__ void log_flush(void)
1165 {
1166 int i;
1167
1168 for (i = 0; i < VREGS; i++)
1169 if (vstate[i] == L_UNKNOWN)
1170 vstate[i] = L_NEEDED;
1171 for (i = 0; i < N_REGS; i++)
1172 if (nstate[i] == L_UNKNOWN)
1173 nstate[i] = L_UNAVAIL;
1174 }
1175
1176 static __inline__ void log_dump(void)
1177 {
1178 int i;
1179
1180 return;
1181
1182 write_log("----------------------\n");
1183 for (i = 0; i < N_REGS; i++) {
1184 switch (nstate[i]) {
1185 case L_UNKNOWN:
1186 write_log("Nat %d : UNKNOWN\n", i);
1187 break;
1188 case L_UNAVAIL:
1189 write_log("Nat %d : UNAVAIL\n", i);
1190 break;
1191 default:
1192 write_log("Nat %d : %d\n", i, nstate[i]);
1193 break;
1194 }
1195 }
1196 for (i = 0; i < VREGS; i++) {
1197 if (vstate[i] == L_UNNEEDED)
1198 write_log("Virt %d: UNNEEDED\n", i);
1199 }
1200 }
1201
1202 /********************************************************************
1203 * register status handling. EMIT TIME! *
1204 ********************************************************************/
1205
1206 static __inline__ void set_status(int r, int status)
1207 {
1208 if (status == ISCONST)
1209 log_clobberreg(r);
1210 live.state[r].status=status;
1211 }
1212
1213 static __inline__ int isinreg(int r)
1214 {
1215 return live.state[r].status==CLEAN || live.state[r].status==DIRTY;
1216 }
1217
1218 static __inline__ void adjust_nreg(int r, uae_u32 val)
1219 {
1220 if (!val)
1221 return;
1222 raw_lea_l_brr(r,r,val);
1223 }
1224
1225 static void tomem(int r)
1226 {
1227 int rr=live.state[r].realreg;
1228
1229 if (isinreg(r)) {
1230 if (live.state[r].val && live.nat[rr].nholds==1
1231 && !live.nat[rr].locked) {
1232 // write_log("RemovingA offset %x from reg %d (%d) at %p\n",
1233 // live.state[r].val,r,rr,target);
1234 adjust_nreg(rr,live.state[r].val);
1235 live.state[r].val=0;
1236 live.state[r].dirtysize=4;
1237 set_status(r,DIRTY);
1238 }
1239 }
1240
1241 if (live.state[r].status==DIRTY) {
1242 switch (live.state[r].dirtysize) {
1243 case 1: raw_mov_b_mr((uae_u32)live.state[r].mem,rr); break;
1244 case 2: raw_mov_w_mr((uae_u32)live.state[r].mem,rr); break;
1245 case 4: raw_mov_l_mr((uae_u32)live.state[r].mem,rr); break;
1246 default: abort();
1247 }
1248 log_vwrite(r);
1249 set_status(r,CLEAN);
1250 live.state[r].dirtysize=0;
1251 }
1252 }
1253
1254 static __inline__ int isconst(int r)
1255 {
1256 return live.state[r].status==ISCONST;
1257 }
1258
1259 int is_const(int r)
1260 {
1261 return isconst(r);
1262 }
1263
1264 static __inline__ void writeback_const(int r)
1265 {
1266 if (!isconst(r))
1267 return;
1268 Dif (live.state[r].needflush==NF_HANDLER) {
1269 write_log("Trying to write back constant NF_HANDLER!\n");
1270 abort();
1271 }
1272
1273 raw_mov_l_mi((uae_u32)live.state[r].mem,live.state[r].val);
1274 log_vwrite(r);
1275 live.state[r].val=0;
1276 set_status(r,INMEM);
1277 }
1278
1279 static __inline__ void tomem_c(int r)
1280 {
1281 if (isconst(r)) {
1282 writeback_const(r);
1283 }
1284 else
1285 tomem(r);
1286 }
1287
1288 static void evict(int r)
1289 {
1290 int rr;
1291
1292 if (!isinreg(r))
1293 return;
1294 tomem(r);
1295 rr=live.state[r].realreg;
1296
1297 Dif (live.nat[rr].locked &&
1298 live.nat[rr].nholds==1) {
1299 write_log("register %d in nreg %d is locked!\n",r,live.state[r].realreg);
1300 abort();
1301 }
1302
1303 live.nat[rr].nholds--;
1304 if (live.nat[rr].nholds!=live.state[r].realind) { /* Was not last */
1305 int topreg=live.nat[rr].holds[live.nat[rr].nholds];
1306 int thisind=live.state[r].realind;
1307
1308 live.nat[rr].holds[thisind]=topreg;
1309 live.state[topreg].realind=thisind;
1310 }
1311 live.state[r].realreg=-1;
1312 set_status(r,INMEM);
1313 }
1314
1315 static __inline__ void free_nreg(int r)
1316 {
1317 int i=live.nat[r].nholds;
1318
1319 while (i) {
1320 int vr;
1321
1322 --i;
1323 vr=live.nat[r].holds[i];
1324 evict(vr);
1325 }
1326 Dif (live.nat[r].nholds!=0) {
1327 write_log("Failed to free nreg %d, nholds is %d\n",r,live.nat[r].nholds);
1328 abort();
1329 }
1330 }
1331
1332 /* Use with care! */
1333 static __inline__ void isclean(int r)
1334 {
1335 if (!isinreg(r))
1336 return;
1337 live.state[r].validsize=4;
1338 live.state[r].dirtysize=0;
1339 live.state[r].val=0;
1340 set_status(r,CLEAN);
1341 }
1342
1343 static __inline__ void disassociate(int r)
1344 {
1345 isclean(r);
1346 evict(r);
1347 }
1348
1349 static __inline__ void set_const(int r, uae_u32 val)
1350 {
1351 disassociate(r);
1352 live.state[r].val=val;
1353 set_status(r,ISCONST);
1354 }
1355
1356 static __inline__ uae_u32 get_offset(int r)
1357 {
1358 return live.state[r].val;
1359 }
1360
1361 static int alloc_reg_hinted(int r, int size, int willclobber, int hint)
1362 {
1363 int bestreg;
1364 uae_s32 when;
1365 int i;
1366 uae_s32 badness=0; /* to shut up gcc */
1367 bestreg=-1;
1368 when=2000000000;
1369
1370 for (i=N_REGS;i--;) {
1371 badness=live.nat[i].touched;
1372 if (live.nat[i].nholds==0)
1373 badness=0;
1374 if (i==hint)
1375 badness-=200000000;
1376 if (!live.nat[i].locked && badness<when) {
1377 if ((size==1 && live.nat[i].canbyte) ||
1378 (size==2 && live.nat[i].canword) ||
1379 (size==4)) {
1380 bestreg=i;
1381 when=badness;
1382 if (live.nat[i].nholds==0 && hint<0)
1383 break;
1384 if (i==hint)
1385 break;
1386 }
1387 }
1388 }
1389 Dif (bestreg==-1)
1390 abort();
1391
1392 if (live.nat[bestreg].nholds>0) {
1393 free_nreg(bestreg);
1394 }
1395 if (isinreg(r)) {
1396 int rr=live.state[r].realreg;
1397 /* This will happen if we read a partially dirty register at a
1398 bigger size */
1399 Dif (willclobber || live.state[r].validsize>=size)
1400 abort();
1401 Dif (live.nat[rr].nholds!=1)
1402 abort();
1403 if (size==4 && live.state[r].validsize==2) {
1404 log_isused(bestreg);
1405 log_visused(r);
1406 raw_mov_l_rm(bestreg,(uae_u32)live.state[r].mem);
1407 raw_bswap_32(bestreg);
1408 raw_zero_extend_16_rr(rr,rr);
1409 raw_zero_extend_16_rr(bestreg,bestreg);
1410 raw_bswap_32(bestreg);
1411 raw_lea_l_brr_indexed(rr,rr,bestreg,1,0);
1412 live.state[r].validsize=4;
1413 live.nat[rr].touched=touchcnt++;
1414 return rr;
1415 }
1416 if (live.state[r].validsize==1) {
1417 /* Nothing yet */
1418 }
1419 evict(r);
1420 }
1421
1422 if (!willclobber) {
1423 if (live.state[r].status!=UNDEF) {
1424 if (isconst(r)) {
1425 raw_mov_l_ri(bestreg,live.state[r].val);
1426 live.state[r].val=0;
1427 live.state[r].dirtysize=4;
1428 set_status(r,DIRTY);
1429 log_isused(bestreg);
1430 }
1431 else {
1432 log_isreg(bestreg, r); /* This will also load it! */
1433 live.state[r].dirtysize=0;
1434 set_status(r,CLEAN);
1435 }
1436 }
1437 else {
1438 live.state[r].val=0;
1439 live.state[r].dirtysize=0;
1440 set_status(r,CLEAN);
1441 log_isused(bestreg);
1442 }
1443 live.state[r].validsize=4;
1444 }
1445 else { /* this is the easiest way, but not optimal. FIXME! */
1446 /* Now it's trickier, but hopefully still OK */
1447 if (!isconst(r) || size==4) {
1448 live.state[r].validsize=size;
1449 live.state[r].dirtysize=size;
1450 live.state[r].val=0;
1451 set_status(r,DIRTY);
1452 if (size == 4) {
1453 log_clobberreg(r);
1454 log_isused(bestreg);
1455 }
1456 else {
1457 log_visused(r);
1458 log_isused(bestreg);
1459 }
1460 }
1461 else {
1462 if (live.state[r].status!=UNDEF)
1463 raw_mov_l_ri(bestreg,live.state[r].val);
1464 live.state[r].val=0;
1465 live.state[r].validsize=4;
1466 live.state[r].dirtysize=4;
1467 set_status(r,DIRTY);
1468 log_isused(bestreg);
1469 }
1470 }
1471 live.state[r].realreg=bestreg;
1472 live.state[r].realind=live.nat[bestreg].nholds;
1473 live.nat[bestreg].touched=touchcnt++;
1474 live.nat[bestreg].holds[live.nat[bestreg].nholds]=r;
1475 live.nat[bestreg].nholds++;
1476
1477 return bestreg;
1478 }
1479
1480 static int alloc_reg(int r, int size, int willclobber)
1481 {
1482 return alloc_reg_hinted(r,size,willclobber,-1);
1483 }
1484
1485 static void unlock2(int r)
1486 {
1487 Dif (!live.nat[r].locked)
1488 abort();
1489 live.nat[r].locked--;
1490 }
1491
1492 static void setlock(int r)
1493 {
1494 live.nat[r].locked++;
1495 }
1496
1497
1498 static void mov_nregs(int d, int s)
1499 {
1500 int ns=live.nat[s].nholds;
1501 int nd=live.nat[d].nholds;
1502 int i;
1503
1504 if (s==d)
1505 return;
1506
1507 if (nd>0)
1508 free_nreg(d);
1509
1510 log_isused(d);
1511 raw_mov_l_rr(d,s);
1512
1513 for (i=0;i<live.nat[s].nholds;i++) {
1514 int vs=live.nat[s].holds[i];
1515
1516 live.state[vs].realreg=d;
1517 live.state[vs].realind=i;
1518 live.nat[d].holds[i]=vs;
1519 }
1520 live.nat[d].nholds=live.nat[s].nholds;
1521
1522 live.nat[s].nholds=0;
1523 }
1524
1525
1526 static __inline__ void make_exclusive(int r, int size, int spec)
1527 {
1528 int clobber;
1529 reg_status oldstate;
1530 int rr=live.state[r].realreg;
1531 int nr;
1532 int nind;
1533 int ndirt=0;
1534 int i;
1535
1536 if (!isinreg(r))
1537 return;
1538 if (live.nat[rr].nholds==1)
1539 return;
1540 for (i=0;i<live.nat[rr].nholds;i++) {
1541 int vr=live.nat[rr].holds[i];
1542 if (vr!=r &&
1543 (live.state[vr].status==DIRTY || live.state[vr].val))
1544 ndirt++;
1545 }
1546 if (!ndirt && size<live.state[r].validsize && !live.nat[rr].locked) {
1547 /* Everything else is clean, so let's keep this register */
1548 for (i=0;i<live.nat[rr].nholds;i++) {
1549 int vr=live.nat[rr].holds[i];
1550 if (vr!=r) {
1551 evict(vr);
1552 i--; /* Try that index again! */
1553 }
1554 }
1555 Dif (live.nat[rr].nholds!=1) {
1556 write_log("natreg %d holds %d vregs, %d not exclusive\n",
1557 rr,live.nat[rr].nholds,r);
1558 abort();
1559 }
1560 return;
1561 }
1562
1563 /* We have to split the register */
1564 oldstate=live.state[r];
1565
1566 setlock(rr); /* Make sure this doesn't go away */
1567 /* Forget about r being in the register rr */
1568 disassociate(r);
1569 /* Get a new register, that we will clobber completely */
1570 if (oldstate.status==DIRTY) {
1571 /* If dirtysize is <4, we need a register that can handle the
1572 eventual smaller memory store! Thanks to Quake68k for exposing
1573 this detail ;-) */
1574 nr=alloc_reg_hinted(r,oldstate.dirtysize,1,spec);
1575 }
1576 else {
1577 nr=alloc_reg_hinted(r,4,1,spec);
1578 }
1579 nind=live.state[r].realind;
1580 live.state[r]=oldstate; /* Keep all the old state info */
1581 live.state[r].realreg=nr;
1582 live.state[r].realind=nind;
1583
1584 if (size<live.state[r].validsize) {
1585 if (live.state[r].val) {
1586 /* Might as well compensate for the offset now */
1587 raw_lea_l_brr(nr,rr,oldstate.val);
1588 live.state[r].val=0;
1589 live.state[r].dirtysize=4;
1590 set_status(r,DIRTY);
1591 }
1592 else
1593 raw_mov_l_rr(nr,rr); /* Make another copy */
1594 }
1595 unlock2(rr);
1596 }
1597
1598 static __inline__ void add_offset(int r, uae_u32 off)
1599 {
1600 live.state[r].val+=off;
1601 }
1602
1603 static __inline__ void remove_offset(int r, int spec)
1604 {
1605 reg_status oldstate;
1606 int rr;
1607
1608 if (isconst(r))
1609 return;
1610 if (live.state[r].val==0)
1611 return;
1612 if (isinreg(r) && live.state[r].validsize<4)
1613 evict(r);
1614
1615 if (!isinreg(r))
1616 alloc_reg_hinted(r,4,0,spec);
1617
1618 Dif (live.state[r].validsize!=4) {
1619 write_log("Validsize=%d in remove_offset\n",live.state[r].validsize);
1620 abort();
1621 }
1622 make_exclusive(r,0,-1);
1623 /* make_exclusive might have done the job already */
1624 if (live.state[r].val==0)
1625 return;
1626
1627 rr=live.state[r].realreg;
1628
1629 if (live.nat[rr].nholds==1) {
1630 //write_log("RemovingB offset %x from reg %d (%d) at %p\n",
1631 // live.state[r].val,r,rr,target);
1632 adjust_nreg(rr,live.state[r].val);
1633 live.state[r].dirtysize=4;
1634 live.state[r].val=0;
1635 set_status(r,DIRTY);
1636 return;
1637 }
1638 write_log("Failed in remove_offset\n");
1639 abort();
1640 }
1641
1642 static __inline__ void remove_all_offsets(void)
1643 {
1644 int i;
1645
1646 for (i=0;i<VREGS;i++)
1647 remove_offset(i,-1);
1648 }
1649
1650 static __inline__ int readreg_general(int r, int size, int spec, int can_offset)
1651 {
1652 int n;
1653 int answer=-1;
1654
1655 if (live.state[r].status==UNDEF) {
1656 write_log("WARNING: Unexpected read of undefined register %d\n",r);
1657 }
1658 if (!can_offset)
1659 remove_offset(r,spec);
1660
1661 if (isinreg(r) && live.state[r].validsize>=size) {
1662 n=live.state[r].realreg;
1663 switch(size) {
1664 case 1:
1665 if (live.nat[n].canbyte || spec>=0) {
1666 answer=n;
1667 }
1668 break;
1669 case 2:
1670 if (live.nat[n].canword || spec>=0) {
1671 answer=n;
1672 }
1673 break;
1674 case 4:
1675 answer=n;
1676 break;
1677 default: abort();
1678 }
1679 if (answer<0)
1680 evict(r);
1681 }
1682 /* either the value was in memory to start with, or it was evicted and
1683 is in memory now */
1684 if (answer<0) {
1685 answer=alloc_reg_hinted(r,spec>=0?4:size,0,spec);
1686 }
1687
1688 if (spec>=0 && spec!=answer) {
1689 /* Too bad */
1690 mov_nregs(spec,answer);
1691 answer=spec;
1692 }
1693 live.nat[answer].locked++;
1694 live.nat[answer].touched=touchcnt++;
1695 return answer;
1696 }
1697
1698
1699
1700 static int readreg(int r, int size)
1701 {
1702 return readreg_general(r,size,-1,0);
1703 }
1704
1705 static int readreg_specific(int r, int size, int spec)
1706 {
1707 return readreg_general(r,size,spec,0);
1708 }
1709
1710 static int readreg_offset(int r, int size)
1711 {
1712 return readreg_general(r,size,-1,1);
1713 }
1714
1715 /* writereg_general(r, size, spec)
1716 *
1717 * INPUT
1718 * - r : mid-layer register
1719 * - size : requested size (1/2/4)
1720 * - spec : -1 if find or make a register free, otherwise specifies
1721 * the physical register to use in any case
1722 *
1723 * OUTPUT
1724 * - hard (physical, x86 here) register allocated to virtual register r
1725 */
1726 static __inline__ int writereg_general(int r, int size, int spec)
1727 {
1728 int n;
1729 int answer=-1;
1730
1731 if (size<4) {
1732 remove_offset(r,spec);
1733 }
1734
1735 make_exclusive(r,size,spec);
1736 if (isinreg(r)) {
1737 int nvsize=size>live.state[r].validsize?size:live.state[r].validsize;
1738 int ndsize=size>live.state[r].dirtysize?size:live.state[r].dirtysize;
1739 n=live.state[r].realreg;
1740
1741 Dif (live.nat[n].nholds!=1)
1742 abort();
1743 switch(size) {
1744 case 1:
1745 if (live.nat[n].canbyte || spec>=0) {
1746 live.state[r].dirtysize=ndsize;
1747 live.state[r].validsize=nvsize;
1748 answer=n;
1749 }
1750 break;
1751 case 2:
1752 if (live.nat[n].canword || spec>=0) {
1753 live.state[r].dirtysize=ndsize;
1754 live.state[r].validsize=nvsize;
1755 answer=n;
1756 }
1757 break;
1758 case 4:
1759 live.state[r].dirtysize=ndsize;
1760 live.state[r].validsize=nvsize;
1761 answer=n;
1762 break;
1763 default: abort();
1764 }
1765 if (answer<0)
1766 evict(r);
1767 }
1768 /* either the value was in memory to start with, or it was evicted and
1769 is in memory now */
1770 if (answer<0) {
1771 answer=alloc_reg_hinted(r,size,1,spec);
1772 }
1773 if (spec>=0 && spec!=answer) {
1774 mov_nregs(spec,answer);
1775 answer=spec;
1776 }
1777 if (live.state[r].status==UNDEF)
1778 live.state[r].validsize=4;
1779 live.state[r].dirtysize=size>live.state[r].dirtysize?size:live.state[r].dirtysize;
1780 live.state[r].validsize=size>live.state[r].validsize?size:live.state[r].validsize;
1781
1782 live.nat[answer].locked++;
1783 live.nat[answer].touched=touchcnt++;
1784 if (size==4) {
1785 live.state[r].val=0;
1786 }
1787 else {
1788 Dif (live.state[r].val) {
1789 write_log("Problem with val\n");
1790 abort();
1791 }
1792 }
1793 set_status(r,DIRTY);
1794 return answer;
1795 }
1796
1797 static int writereg(int r, int size)
1798 {
1799 return writereg_general(r,size,-1);
1800 }
1801
1802 static int writereg_specific(int r, int size, int spec)
1803 {
1804 return writereg_general(r,size,spec);
1805 }
1806
1807 static __inline__ int rmw_general(int r, int wsize, int rsize, int spec)
1808 {
1809 int n;
1810 int answer=-1;
1811
1812 if (live.state[r].status==UNDEF) {
1813 write_log("WARNING: Unexpected read of undefined register %d\n",r);
1814 }
1815 remove_offset(r,spec);
1816 make_exclusive(r,0,spec);
1817
1818 Dif (wsize<rsize) {
1819 write_log("Cannot handle wsize<rsize in rmw_general()\n");
1820 abort();
1821 }
1822 if (isinreg(r) && live.state[r].validsize>=rsize) {
1823 n=live.state[r].realreg;
1824 Dif (live.nat[n].nholds!=1)
1825 abort();
1826
1827 switch(rsize) {
1828 case 1:
1829 if (live.nat[n].canbyte || spec>=0) {
1830 answer=n;
1831 }
1832 break;
1833 case 2:
1834 if (live.nat[n].canword || spec>=0) {
1835 answer=n;
1836 }
1837 break;
1838 case 4:
1839 answer=n;
1840 break;
1841 default: abort();
1842 }
1843 if (answer<0)
1844 evict(r);
1845 }
1846 /* either the value was in memory to start with, or it was evicted and
1847 is in memory now */
1848 if (answer<0) {
1849 answer=alloc_reg_hinted(r,spec>=0?4:rsize,0,spec);
1850 }
1851
1852 if (spec>=0 && spec!=answer) {
1853 /* Too bad */
1854 mov_nregs(spec,answer);
1855 answer=spec;
1856 }
1857 if (wsize>live.state[r].dirtysize)
1858 live.state[r].dirtysize=wsize;
1859 if (wsize>live.state[r].validsize)
1860 live.state[r].validsize=wsize;
1861 set_status(r,DIRTY);
1862
1863 live.nat[answer].locked++;
1864 live.nat[answer].touched=touchcnt++;
1865
1866 Dif (live.state[r].val) {
1867 write_log("Problem with val(rmw)\n");
1868 abort();
1869 }
1870 return answer;
1871 }
1872
1873 static int rmw(int r, int wsize, int rsize)
1874 {
1875 return rmw_general(r,wsize,rsize,-1);
1876 }
1877
1878 static int rmw_specific(int r, int wsize, int rsize, int spec)
1879 {
1880 return rmw_general(r,wsize,rsize,spec);
1881 }
1882
1883
1884 /* needed for restoring the carry flag on non-P6 cores */
1885 static void bt_l_ri_noclobber(R4 r, IMM i)
1886 {
1887 int size=4;
1888 if (i<16)
1889 size=2;
1890 r=readreg(r,size);
1891 raw_bt_l_ri(r,i);
1892 unlock2(r);
1893 }
1894
1895 /********************************************************************
1896 * FPU register status handling. EMIT TIME! *
1897 ********************************************************************/
1898
1899 static void f_tomem(int r)
1900 {
1901 if (live.fate[r].status==DIRTY) {
1902 #if USE_LONG_DOUBLE
1903 raw_fmov_ext_mr((uae_u32)live.fate[r].mem,live.fate[r].realreg);
1904 #else
1905 raw_fmov_mr((uae_u32)live.fate[r].mem,live.fate[r].realreg);
1906 #endif
1907 live.fate[r].status=CLEAN;
1908 }
1909 }
1910
1911 static void f_tomem_drop(int r)
1912 {
1913 if (live.fate[r].status==DIRTY) {
1914 #if USE_LONG_DOUBLE
1915 raw_fmov_ext_mr_drop((uae_u32)live.fate[r].mem,live.fate[r].realreg);
1916 #else
1917 raw_fmov_mr_drop((uae_u32)live.fate[r].mem,live.fate[r].realreg);
1918 #endif
1919 live.fate[r].status=INMEM;
1920 }
1921 }
1922
1923
1924 static __inline__ int f_isinreg(int r)
1925 {
1926 return live.fate[r].status==CLEAN || live.fate[r].status==DIRTY;
1927 }
1928
1929 static void f_evict(int r)
1930 {
1931 int rr;
1932
1933 if (!f_isinreg(r))
1934 return;
1935 rr=live.fate[r].realreg;
1936 if (live.fat[rr].nholds==1)
1937 f_tomem_drop(r);
1938 else
1939 f_tomem(r);
1940
1941 Dif (live.fat[rr].locked &&
1942 live.fat[rr].nholds==1) {
1943 write_log("FPU register %d in nreg %d is locked!\n",r,live.fate[r].realreg);
1944 abort();
1945 }
1946
1947 live.fat[rr].nholds--;
1948 if (live.fat[rr].nholds!=live.fate[r].realind) { /* Was not last */
1949 int topreg=live.fat[rr].holds[live.fat[rr].nholds];
1950 int thisind=live.fate[r].realind;
1951 live.fat[rr].holds[thisind]=topreg;
1952 live.fate[topreg].realind=thisind;
1953 }
1954 live.fate[r].status=INMEM;
1955 live.fate[r].realreg=-1;
1956 }
1957
1958 static __inline__ void f_free_nreg(int r)
1959 {
1960 int i=live.fat[r].nholds;
1961
1962 while (i) {
1963 int vr;
1964
1965 --i;
1966 vr=live.fat[r].holds[i];
1967 f_evict(vr);
1968 }
1969 Dif (live.fat[r].nholds!=0) {
1970 write_log("Failed to free nreg %d, nholds is %d\n",r,live.fat[r].nholds);
1971 abort();
1972 }
1973 }
1974
1975
1976 /* Use with care! */
1977 static __inline__ void f_isclean(int r)
1978 {
1979 if (!f_isinreg(r))
1980 return;
1981 live.fate[r].status=CLEAN;
1982 }
1983
1984 static __inline__ void f_disassociate(int r)
1985 {
1986 f_isclean(r);
1987 f_evict(r);
1988 }
1989
1990
1991
1992 static int f_alloc_reg(int r, int willclobber)
1993 {
1994 int bestreg;
1995 uae_s32 when;
1996 int i;
1997 uae_s32 badness;
1998 bestreg=-1;
1999 when=2000000000;
2000 for (i=N_FREGS;i--;) {
2001 badness=live.fat[i].touched;
2002 if (live.fat[i].nholds==0)
2003 badness=0;
2004
2005 if (!live.fat[i].locked && badness<when) {
2006 bestreg=i;
2007 when=badness;
2008 if (live.fat[i].nholds==0)
2009 break;
2010 }
2011 }
2012 Dif (bestreg==-1)
2013 abort();
2014
2015 if (live.fat[bestreg].nholds>0) {
2016 f_free_nreg(bestreg);
2017 }
2018 if (f_isinreg(r)) {
2019 f_evict(r);
2020 }
2021
2022 if (!willclobber) {
2023 if (live.fate[r].status!=UNDEF) {
2024 #if USE_LONG_DOUBLE
2025 raw_fmov_ext_rm(bestreg,(uae_u32)live.fate[r].mem);
2026 #else
2027 raw_fmov_rm(bestreg,(uae_u32)live.fate[r].mem);
2028 #endif
2029 }
2030 live.fate[r].status=CLEAN;
2031 }
2032 else {
2033 live.fate[r].status=DIRTY;
2034 }
2035 live.fate[r].realreg=bestreg;
2036 live.fate[r].realind=live.fat[bestreg].nholds;
2037 live.fat[bestreg].touched=touchcnt++;
2038 live.fat[bestreg].holds[live.fat[bestreg].nholds]=r;
2039 live.fat[bestreg].nholds++;
2040
2041 return bestreg;
2042 }
2043
2044 static void f_unlock(int r)
2045 {
2046 Dif (!live.fat[r].locked)
2047 abort();
2048 live.fat[r].locked--;
2049 }
2050
2051 static void f_setlock(int r)
2052 {
2053 live.fat[r].locked++;
2054 }
2055
2056 static __inline__ int f_readreg(int r)
2057 {
2058 int n;
2059 int answer=-1;
2060
2061 if (f_isinreg(r)) {
2062 n=live.fate[r].realreg;
2063 answer=n;
2064 }
2065 /* either the value was in memory to start with, or it was evicted and
2066 is in memory now */
2067 if (answer<0)
2068 answer=f_alloc_reg(r,0);
2069
2070 live.fat[answer].locked++;
2071 live.fat[answer].touched=touchcnt++;
2072 return answer;
2073 }
2074
2075 static __inline__ void f_make_exclusive(int r, int clobber)
2076 {
2077 freg_status oldstate;
2078 int rr=live.fate[r].realreg;
2079 int nr;
2080 int nind;
2081 int ndirt=0;
2082 int i;
2083
2084 if (!f_isinreg(r))
2085 return;
2086 if (live.fat[rr].nholds==1)
2087 return;
2088 for (i=0;i<live.fat[rr].nholds;i++) {
2089 int vr=live.fat[rr].holds[i];
2090 if (vr!=r && live.fate[vr].status==DIRTY)
2091 ndirt++;
2092 }
2093 if (!ndirt && !live.fat[rr].locked) {
2094 /* Everything else is clean, so let's keep this register */
2095 for (i=0;i<live.fat[rr].nholds;i++) {
2096 int vr=live.fat[rr].holds[i];
2097 if (vr!=r) {
2098 f_evict(vr);
2099 i--; /* Try that index again! */
2100 }
2101 }
2102 Dif (live.fat[rr].nholds!=1) {
2103 write_log("realreg %d holds %d (",rr,live.fat[rr].nholds);
2104 for (i=0;i<live.fat[rr].nholds;i++) {
2105 write_log(" %d(%d,%d)",live.fat[rr].holds[i],
2106 live.fate[live.fat[rr].holds[i]].realreg,
2107 live.fate[live.fat[rr].holds[i]].realind);
2108 }
2109 write_log("\n");
2110 abort();
2111 }
2112 return;
2113 }
2114
2115 /* We have to split the register */
2116 oldstate=live.fate[r];
2117
2118 f_setlock(rr); /* Make sure this doesn't go away */
2119 /* Forget about r being in the register rr */
2120 f_disassociate(r);
2121 /* Get a new register, that we will clobber completely */
2122 nr=f_alloc_reg(r,1);
2123 nind=live.fate[r].realind;
2124 if (!clobber)
2125 raw_fmov_rr(nr,rr); /* Make another copy */
2126 live.fate[r]=oldstate; /* Keep all the old state info */
2127 live.fate[r].realreg=nr;
2128 live.fate[r].realind=nind;
2129 f_unlock(rr);
2130 }
2131
2132
2133 static __inline__ int f_writereg(int r)
2134 {
2135 int n;
2136 int answer=-1;
2137
2138 f_make_exclusive(r,1);
2139 if (f_isinreg(r)) {
2140 n=live.fate[r].realreg;
2141 answer=n;
2142 }
2143 if (answer<0) {
2144 answer=f_alloc_reg(r,1);
2145 }
2146 live.fate[r].status=DIRTY;
2147 live.fat[answer].locked++;
2148 live.fat[answer].touched=touchcnt++;
2149 return answer;
2150 }
2151
2152 static int f_rmw(int r)
2153 {
2154 int n;
2155
2156 f_make_exclusive(r,0);
2157 if (f_isinreg(r)) {
2158 n=live.fate[r].realreg;
2159 }
2160 else
2161 n=f_alloc_reg(r,0);
2162 live.fate[r].status=DIRTY;
2163 live.fat[n].locked++;
2164 live.fat[n].touched=touchcnt++;
2165 return n;
2166 }
2167
2168 static void fflags_into_flags_internal(uae_u32 tmp)
2169 {
2170 int r;
2171
2172 clobber_flags();
2173 r=f_readreg(FP_RESULT);
2174 if (FFLAG_NREG_CLOBBER_CONDITION) {
2175 int tmp2=tmp;
2176 tmp=writereg_specific(tmp,4,FFLAG_NREG);
2177 raw_fflags_into_flags(r);
2178 unlock2(tmp);
2179 forget_about(tmp2);
2180 }
2181 else
2182 raw_fflags_into_flags(r);
2183 f_unlock(r);
2184 }
2185
2186
2187
2188
2189 /********************************************************************
2190 * CPU functions exposed to gencomp. Both CREATE and EMIT time *
2191 ********************************************************************/
2192
2193 /*
2194 * RULES FOR HANDLING REGISTERS:
2195 *
2196 * * In the function headers, order the parameters
2197 * - 1st registers written to
2198 * - 2nd read/modify/write registers
2199 * - 3rd registers read from
2200 * * Before calling raw_*, you must call readreg, writereg or rmw for
2201 * each register
2202 * * The order for this is
2203 * - 1st call remove_offset for all registers written to with size<4
2204 * - 2nd call readreg for all registers read without offset
2205 * - 3rd call rmw for all rmw registers
2206 * - 4th call readreg_offset for all registers that can handle offsets
2207 * - 5th call get_offset for all the registers from the previous step
2208 * - 6th call writereg for all written-to registers
2209 * - 7th call raw_*
2210 * - 8th unlock2 all registers that were locked
2211 */
2212
2213 MIDFUNC(0,live_flags,(void))
2214 {
2215 live.flags_on_stack=TRASH;
2216 live.flags_in_flags=VALID;
2217 live.flags_are_important=1;
2218 }
2219 MENDFUNC(0,live_flags,(void))
2220
2221 MIDFUNC(0,dont_care_flags,(void))
2222 {
2223 live.flags_are_important=0;
2224 }
2225 MENDFUNC(0,dont_care_flags,(void))
2226
2227
2228 MIDFUNC(0,duplicate_carry,(void))
2229 {
2230 evict(FLAGX);
2231 make_flags_live_internal();
2232 COMPCALL(setcc_m)((uae_u32)live.state[FLAGX].mem,2);
2233 log_vwrite(FLAGX);
2234 }
2235 MENDFUNC(0,duplicate_carry,(void))
2236
2237 MIDFUNC(0,restore_carry,(void))
2238 {
2239 if (!have_rat_stall) { /* Not a P6 core, i.e. no partial stalls */
2240 bt_l_ri_noclobber(FLAGX,0);
2241 }
2242 else { /* Avoid the stall the above creates.
2243 This is slow on non-P6, though.
2244 */
2245 COMPCALL(rol_b_ri(FLAGX,8));
2246 isclean(FLAGX);
2247 }
2248 }
2249 MENDFUNC(0,restore_carry,(void))
2250
2251 MIDFUNC(0,start_needflags,(void))
2252 {
2253 needflags=1;
2254 }
2255 MENDFUNC(0,start_needflags,(void))
2256
2257 MIDFUNC(0,end_needflags,(void))
2258 {
2259 needflags=0;
2260 }
2261 MENDFUNC(0,end_needflags,(void))
2262
2263 MIDFUNC(0,make_flags_live,(void))
2264 {
2265 make_flags_live_internal();
2266 }
2267 MENDFUNC(0,make_flags_live,(void))
2268
2269 MIDFUNC(1,fflags_into_flags,(W2 tmp))
2270 {
2271 clobber_flags();
2272 fflags_into_flags_internal(tmp);
2273 }
2274 MENDFUNC(1,fflags_into_flags,(W2 tmp))
2275
2276
2277 MIDFUNC(2,bt_l_ri,(R4 r, IMM i)) /* This is defined as only affecting C */
2278 {
2279 int size=4;
2280 if (i<16)
2281 size=2;
2282 CLOBBER_BT;
2283 r=readreg(r,size);
2284 raw_bt_l_ri(r,i);
2285 unlock2(r);
2286 }
2287 MENDFUNC(2,bt_l_ri,(R4 r, IMM i)) /* This is defined as only affecting C */
2288
2289 MIDFUNC(2,bt_l_rr,(R4 r, R4 b)) /* This is defined as only affecting C */
2290 {
2291 CLOBBER_BT;
2292 r=readreg(r,4);
2293 b=readreg(b,4);
2294 raw_bt_l_rr(r,b);
2295 unlock2(r);
2296 unlock2(b);
2297 }
2298 MENDFUNC(2,bt_l_rr,(R4 r, R4 b)) /* This is defined as only affecting C */
2299
2300 MIDFUNC(2,btc_l_ri,(RW4 r, IMM i))
2301 {
2302 int size=4;
2303 if (i<16)
2304 size=2;
2305 CLOBBER_BT;
2306 r=rmw(r,size,size);
2307 raw_btc_l_ri(r,i);
2308 unlock2(r);
2309 }
2310 MENDFUNC(2,btc_l_ri,(RW4 r, IMM i))
2311
2312 MIDFUNC(2,btc_l_rr,(RW4 r, R4 b))
2313 {
2314 CLOBBER_BT;
2315 b=readreg(b,4);
2316 r=rmw(r,4,4);
2317 raw_btc_l_rr(r,b);
2318 unlock2(r);
2319 unlock2(b);
2320 }
2321 MENDFUNC(2,btc_l_rr,(RW4 r, R4 b))
2322
2323
2324 MIDFUNC(2,btr_l_ri,(RW4 r, IMM i))
2325 {
2326 int size=4;
2327 if (i<16)
2328 size=2;
2329 CLOBBER_BT;
2330 r=rmw(r,size,size);
2331 raw_btr_l_ri(r,i);
2332 unlock2(r);
2333 }
2334 MENDFUNC(2,btr_l_ri,(RW4 r, IMM i))
2335
2336 MIDFUNC(2,btr_l_rr,(RW4 r, R4 b))
2337 {
2338 CLOBBER_BT;
2339 b=readreg(b,4);
2340 r=rmw(r,4,4);
2341 raw_btr_l_rr(r,b);
2342 unlock2(r);
2343 unlock2(b);
2344 }
2345 MENDFUNC(2,btr_l_rr,(RW4 r, R4 b))
2346
2347
2348 MIDFUNC(2,bts_l_ri,(RW4 r, IMM i))
2349 {
2350 int size=4;
2351 if (i<16)
2352 size=2;
2353 CLOBBER_BT;
2354 r=rmw(r,size,size);
2355 raw_bts_l_ri(r,i);
2356 unlock2(r);
2357 }
2358 MENDFUNC(2,bts_l_ri,(RW4 r, IMM i))
2359
2360 MIDFUNC(2,bts_l_rr,(RW4 r, R4 b))
2361 {
2362 CLOBBER_BT;
2363 b=readreg(b,4);
2364 r=rmw(r,4,4);
2365 raw_bts_l_rr(r,b);
2366 unlock2(r);
2367 unlock2(b);
2368 }
2369 MENDFUNC(2,bts_l_rr,(RW4 r, R4 b))
2370
2371 MIDFUNC(2,mov_l_rm,(W4 d, IMM s))
2372 {
2373 CLOBBER_MOV;
2374 d=writereg(d,4);
2375 raw_mov_l_rm(d,s);
2376 unlock2(d);
2377 }
2378 MENDFUNC(2,mov_l_rm,(W4 d, IMM s))
2379
2380
2381 MIDFUNC(1,call_r,(R4 r)) /* Clobbering is implicit */
2382 {
2383 r=readreg(r,4);
2384 raw_call_r(r);
2385 unlock2(r);
2386 }
2387 MENDFUNC(1,call_r,(R4 r)) /* Clobbering is implicit */
2388
2389 MIDFUNC(2,sub_l_mi,(IMM d, IMM s))
2390 {
2391 CLOBBER_SUB;
2392 raw_sub_l_mi(d,s) ;
2393 }
2394 MENDFUNC(2,sub_l_mi,(IMM d, IMM s))
2395
2396 MIDFUNC(2,mov_l_mi,(IMM d, IMM s))
2397 {
2398 CLOBBER_MOV;
2399 raw_mov_l_mi(d,s) ;
2400 }
2401 MENDFUNC(2,mov_l_mi,(IMM d, IMM s))
2402
2403 MIDFUNC(2,mov_w_mi,(IMM d, IMM s))
2404 {
2405 CLOBBER_MOV;
2406 raw_mov_w_mi(d,s) ;
2407 }
2408 MENDFUNC(2,mov_w_mi,(IMM d, IMM s))
2409
2410 MIDFUNC(2,mov_b_mi,(IMM d, IMM s))
2411 {
2412 CLOBBER_MOV;
2413 raw_mov_b_mi(d,s) ;
2414 }
2415 MENDFUNC(2,mov_b_mi,(IMM d, IMM s))
2416
2417 MIDFUNC(2,rol_b_ri,(RW1 r, IMM i))
2418 {
2419 if (!i && !needflags)
2420 return;
2421 CLOBBER_ROL;
2422 r=rmw(r,1,1);
2423 raw_rol_b_ri(r,i);
2424 unlock2(r);
2425 }
2426 MENDFUNC(2,rol_b_ri,(RW1 r, IMM i))
2427
2428 MIDFUNC(2,rol_w_ri,(RW2 r, IMM i))
2429 {
2430 if (!i && !needflags)
2431 return;
2432 CLOBBER_ROL;
2433 r=rmw(r,2,2);
2434 raw_rol_w_ri(r,i);
2435 unlock2(r);
2436 }
2437 MENDFUNC(2,rol_w_ri,(RW2 r, IMM i))
2438
2439 MIDFUNC(2,rol_l_ri,(RW4 r, IMM i))
2440 {
2441 if (!i && !needflags)
2442 return;
2443 CLOBBER_ROL;
2444 r=rmw(r,4,4);
2445 raw_rol_l_ri(r,i);
2446 unlock2(r);
2447 }
2448 MENDFUNC(2,rol_l_ri,(RW4 r, IMM i))
2449
2450 MIDFUNC(2,rol_l_rr,(RW4 d, R1 r))
2451 {
2452 if (isconst(r)) {
2453 COMPCALL(rol_l_ri)(d,(uae_u8)live.state[r].val);
2454 return;
2455 }
2456 CLOBBER_ROL;
2457 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2458 d=rmw(d,4,4);
2459 Dif (r!=1) {
2460 write_log("Illegal register %d in raw_rol_b\n",r);
2461 abort();
2462 }
2463 raw_rol_l_rr(d,r) ;
2464 unlock2(r);
2465 unlock2(d);
2466 }
2467 MENDFUNC(2,rol_l_rr,(RW4 d, R1 r))
2468
2469 MIDFUNC(2,rol_w_rr,(RW2 d, R1 r))
2470 { /* Can only do this with r==1, i.e. cl */
2471
2472 if (isconst(r)) {
2473 COMPCALL(rol_w_ri)(d,(uae_u8)live.state[r].val);
2474 return;
2475 }
2476 CLOBBER_ROL;
2477 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2478 d=rmw(d,2,2);
2479 Dif (r!=1) {
2480 write_log("Illegal register %d in raw_rol_b\n",r);
2481 abort();
2482 }
2483 raw_rol_w_rr(d,r) ;
2484 unlock2(r);
2485 unlock2(d);
2486 }
2487 MENDFUNC(2,rol_w_rr,(RW2 d, R1 r))
2488
2489 MIDFUNC(2,rol_b_rr,(RW1 d, R1 r))
2490 { /* Can only do this with r==1, i.e. cl */
2491
2492 if (isconst(r)) {
2493 COMPCALL(rol_b_ri)(d,(uae_u8)live.state[r].val);
2494 return;
2495 }
2496
2497 CLOBBER_ROL;
2498 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2499 d=rmw(d,1,1);
2500 Dif (r!=1) {
2501 write_log("Illegal register %d in raw_rol_b\n",r);
2502 abort();
2503 }
2504 raw_rol_b_rr(d,r) ;
2505 unlock2(r);
2506 unlock2(d);
2507 }
2508 MENDFUNC(2,rol_b_rr,(RW1 d, R1 r))
2509
2510
2511 MIDFUNC(2,shll_l_rr,(RW4 d, R1 r))
2512 {
2513 if (isconst(r)) {
2514 COMPCALL(shll_l_ri)(d,(uae_u8)live.state[r].val);
2515 return;
2516 }
2517 CLOBBER_SHLL;
2518 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2519 d=rmw(d,4,4);
2520 Dif (r!=1) {
2521 write_log("Illegal register %d in raw_rol_b\n",r);
2522 abort();
2523 }
2524 raw_shll_l_rr(d,r) ;
2525 unlock2(r);
2526 unlock2(d);
2527 }
2528 MENDFUNC(2,shll_l_rr,(RW4 d, R1 r))
2529
2530 MIDFUNC(2,shll_w_rr,(RW2 d, R1 r))
2531 { /* Can only do this with r==1, i.e. cl */
2532
2533 if (isconst(r)) {
2534 COMPCALL(shll_w_ri)(d,(uae_u8)live.state[r].val);
2535 return;
2536 }
2537 CLOBBER_SHLL;
2538 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2539 d=rmw(d,2,2);
2540 Dif (r!=1) {
2541 write_log("Illegal register %d in raw_shll_b\n",r);
2542 abort();
2543 }
2544 raw_shll_w_rr(d,r) ;
2545 unlock2(r);
2546 unlock2(d);
2547 }
2548 MENDFUNC(2,shll_w_rr,(RW2 d, R1 r))
2549
2550 MIDFUNC(2,shll_b_rr,(RW1 d, R1 r))
2551 { /* Can only do this with r==1, i.e. cl */
2552
2553 if (isconst(r)) {
2554 COMPCALL(shll_b_ri)(d,(uae_u8)live.state[r].val);
2555 return;
2556 }
2557
2558 CLOBBER_SHLL;
2559 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2560 d=rmw(d,1,1);
2561 Dif (r!=1) {
2562 write_log("Illegal register %d in raw_shll_b\n",r);
2563 abort();
2564 }
2565 raw_shll_b_rr(d,r) ;
2566 unlock2(r);
2567 unlock2(d);
2568 }
2569 MENDFUNC(2,shll_b_rr,(RW1 d, R1 r))
2570
2571
2572 MIDFUNC(2,ror_b_ri,(R1 r, IMM i))
2573 {
2574 if (!i && !needflags)
2575 return;
2576 CLOBBER_ROR;
2577 r=rmw(r,1,1);
2578 raw_ror_b_ri(r,i);
2579 unlock2(r);
2580 }
2581 MENDFUNC(2,ror_b_ri,(R1 r, IMM i))
2582
2583 MIDFUNC(2,ror_w_ri,(R2 r, IMM i))
2584 {
2585 if (!i && !needflags)
2586 return;
2587 CLOBBER_ROR;
2588 r=rmw(r,2,2);
2589 raw_ror_w_ri(r,i);
2590 unlock2(r);
2591 }
2592 MENDFUNC(2,ror_w_ri,(R2 r, IMM i))
2593
2594 MIDFUNC(2,ror_l_ri,(R4 r, IMM i))
2595 {
2596 if (!i && !needflags)
2597 return;
2598 CLOBBER_ROR;
2599 r=rmw(r,4,4);
2600 raw_ror_l_ri(r,i);
2601 unlock2(r);
2602 }
2603 MENDFUNC(2,ror_l_ri,(R4 r, IMM i))
2604
2605 MIDFUNC(2,ror_l_rr,(R4 d, R1 r))
2606 {
2607 if (isconst(r)) {
2608 COMPCALL(ror_l_ri)(d,(uae_u8)live.state[r].val);
2609 return;
2610 }
2611 CLOBBER_ROR;
2612 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2613 d=rmw(d,4,4);
2614 raw_ror_l_rr(d,r) ;
2615 unlock2(r);
2616 unlock2(d);
2617 }
2618 MENDFUNC(2,ror_l_rr,(R4 d, R1 r))
2619
2620 MIDFUNC(2,ror_w_rr,(R2 d, R1 r))
2621 {
2622 if (isconst(r)) {
2623 COMPCALL(ror_w_ri)(d,(uae_u8)live.state[r].val);
2624 return;
2625 }
2626 CLOBBER_ROR;
2627 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2628 d=rmw(d,2,2);
2629 raw_ror_w_rr(d,r) ;
2630 unlock2(r);
2631 unlock2(d);
2632 }
2633 MENDFUNC(2,ror_w_rr,(R2 d, R1 r))
2634
2635 MIDFUNC(2,ror_b_rr,(R1 d, R1 r))
2636 {
2637 if (isconst(r)) {
2638 COMPCALL(ror_b_ri)(d,(uae_u8)live.state[r].val);
2639 return;
2640 }
2641
2642 CLOBBER_ROR;
2643 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2644 d=rmw(d,1,1);
2645 raw_ror_b_rr(d,r) ;
2646 unlock2(r);
2647 unlock2(d);
2648 }
2649 MENDFUNC(2,ror_b_rr,(R1 d, R1 r))
2650
2651 MIDFUNC(2,shrl_l_rr,(RW4 d, R1 r))
2652 {
2653 if (isconst(r)) {
2654 COMPCALL(shrl_l_ri)(d,(uae_u8)live.state[r].val);
2655 return;
2656 }
2657 CLOBBER_SHRL;
2658 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2659 d=rmw(d,4,4);
2660 Dif (r!=1) {
2661 write_log("Illegal register %d in raw_rol_b\n",r);
2662 abort();
2663 }
2664 raw_shrl_l_rr(d,r) ;
2665 unlock2(r);
2666 unlock2(d);
2667 }
2668 MENDFUNC(2,shrl_l_rr,(RW4 d, R1 r))
2669
2670 MIDFUNC(2,shrl_w_rr,(RW2 d, R1 r))
2671 { /* Can only do this with r==1, i.e. cl */
2672
2673 if (isconst(r)) {
2674 COMPCALL(shrl_w_ri)(d,(uae_u8)live.state[r].val);
2675 return;
2676 }
2677 CLOBBER_SHRL;
2678 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2679 d=rmw(d,2,2);
2680 Dif (r!=1) {
2681 write_log("Illegal register %d in raw_shrl_b\n",r);
2682 abort();
2683 }
2684 raw_shrl_w_rr(d,r) ;
2685 unlock2(r);
2686 unlock2(d);
2687 }
2688 MENDFUNC(2,shrl_w_rr,(RW2 d, R1 r))
2689
2690 MIDFUNC(2,shrl_b_rr,(RW1 d, R1 r))
2691 { /* Can only do this with r==1, i.e. cl */
2692
2693 if (isconst(r)) {
2694 COMPCALL(shrl_b_ri)(d,(uae_u8)live.state[r].val);
2695 return;
2696 }
2697
2698 CLOBBER_SHRL;
2699 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2700 d=rmw(d,1,1);
2701 Dif (r!=1) {
2702 write_log("Illegal register %d in raw_shrl_b\n",r);
2703 abort();
2704 }
2705 raw_shrl_b_rr(d,r) ;
2706 unlock2(r);
2707 unlock2(d);
2708 }
2709 MENDFUNC(2,shrl_b_rr,(RW1 d, R1 r))
2710
2711
2712
2713 MIDFUNC(2,shll_l_ri,(RW4 r, IMM i))
2714 {
2715 if (!i && !needflags)
2716 return;
2717 if (isconst(r) && !needflags) {
2718 live.state[r].val<<=i;
2719 return;
2720 }
2721 CLOBBER_SHLL;
2722 r=rmw(r,4,4);
2723 raw_shll_l_ri(r,i);
2724 unlock2(r);
2725 }
2726 MENDFUNC(2,shll_l_ri,(RW4 r, IMM i))
2727
2728 MIDFUNC(2,shll_w_ri,(RW2 r, IMM i))
2729 {
2730 if (!i && !needflags)
2731 return;
2732 CLOBBER_SHLL;
2733 r=rmw(r,2,2);
2734 raw_shll_w_ri(r,i);
2735 unlock2(r);
2736 }
2737 MENDFUNC(2,shll_w_ri,(RW2 r, IMM i))
2738
2739 MIDFUNC(2,shll_b_ri,(RW1 r, IMM i))
2740 {
2741 if (!i && !needflags)
2742 return;
2743 CLOBBER_SHLL;
2744 r=rmw(r,1,1);
2745 raw_shll_b_ri(r,i);
2746 unlock2(r);
2747 }
2748 MENDFUNC(2,shll_b_ri,(RW1 r, IMM i))
2749
2750 MIDFUNC(2,shrl_l_ri,(RW4 r, IMM i))
2751 {
2752 if (!i && !needflags)
2753 return;
2754 if (isconst(r) && !needflags) {
2755 live.state[r].val>>=i;
2756 return;
2757 }
2758 CLOBBER_SHRL;
2759 r=rmw(r,4,4);
2760 raw_shrl_l_ri(r,i);
2761 unlock2(r);
2762 }
2763 MENDFUNC(2,shrl_l_ri,(RW4 r, IMM i))
2764
2765 MIDFUNC(2,shrl_w_ri,(RW2 r, IMM i))
2766 {
2767 if (!i && !needflags)
2768 return;
2769 CLOBBER_SHRL;
2770 r=rmw(r,2,2);
2771 raw_shrl_w_ri(r,i);
2772 unlock2(r);
2773 }
2774 MENDFUNC(2,shrl_w_ri,(RW2 r, IMM i))
2775
2776 MIDFUNC(2,shrl_b_ri,(RW1 r, IMM i))
2777 {
2778 if (!i && !needflags)
2779 return;
2780 CLOBBER_SHRL;
2781 r=rmw(r,1,1);
2782 raw_shrl_b_ri(r,i);
2783 unlock2(r);
2784 }
2785 MENDFUNC(2,shrl_b_ri,(RW1 r, IMM i))
2786
2787 MIDFUNC(2,shra_l_ri,(RW4 r, IMM i))
2788 {
2789 if (!i && !needflags)
2790 return;
2791 CLOBBER_SHRA;
2792 r=rmw(r,4,4);
2793 raw_shra_l_ri(r,i);
2794 unlock2(r);
2795 }
2796 MENDFUNC(2,shra_l_ri,(RW4 r, IMM i))
2797
2798 MIDFUNC(2,shra_w_ri,(RW2 r, IMM i))
2799 {
2800 if (!i && !needflags)
2801 return;
2802 CLOBBER_SHRA;
2803 r=rmw(r,2,2);
2804 raw_shra_w_ri(r,i);
2805 unlock2(r);
2806 }
2807 MENDFUNC(2,shra_w_ri,(RW2 r, IMM i))
2808
2809 MIDFUNC(2,shra_b_ri,(RW1 r, IMM i))
2810 {
2811 if (!i && !needflags)
2812 return;
2813 CLOBBER_SHRA;
2814 r=rmw(r,1,1);
2815 raw_shra_b_ri(r,i);
2816 unlock2(r);
2817 }
2818 MENDFUNC(2,shra_b_ri,(RW1 r, IMM i))
2819
2820 MIDFUNC(2,shra_l_rr,(RW4 d, R1 r))
2821 {
2822 if (isconst(r)) {
2823 COMPCALL(shra_l_ri)(d,(uae_u8)live.state[r].val);
2824 return;
2825 }
2826 CLOBBER_SHRA;
2827 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2828 d=rmw(d,4,4);
2829 Dif (r!=1) {
2830 write_log("Illegal register %d in raw_rol_b\n",r);
2831 abort();
2832 }
2833 raw_shra_l_rr(d,r) ;
2834 unlock2(r);
2835 unlock2(d);
2836 }
2837 MENDFUNC(2,shra_l_rr,(RW4 d, R1 r))
2838
2839 MIDFUNC(2,shra_w_rr,(RW2 d, R1 r))
2840 { /* Can only do this with r==1, i.e. cl */
2841
2842 if (isconst(r)) {
2843 COMPCALL(shra_w_ri)(d,(uae_u8)live.state[r].val);
2844 return;
2845 }
2846 CLOBBER_SHRA;
2847 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2848 d=rmw(d,2,2);
2849 Dif (r!=1) {
2850 write_log("Illegal register %d in raw_shra_b\n",r);
2851 abort();
2852 }
2853 raw_shra_w_rr(d,r) ;
2854 unlock2(r);
2855 unlock2(d);
2856 }
2857 MENDFUNC(2,shra_w_rr,(RW2 d, R1 r))
2858
2859 MIDFUNC(2,shra_b_rr,(RW1 d, R1 r))
2860 { /* Can only do this with r==1, i.e. cl */
2861
2862 if (isconst(r)) {
2863 COMPCALL(shra_b_ri)(d,(uae_u8)live.state[r].val);
2864 return;
2865 }
2866
2867 CLOBBER_SHRA;
2868 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2869 d=rmw(d,1,1);
2870 Dif (r!=1) {
2871 write_log("Illegal register %d in raw_shra_b\n",r);
2872 abort();
2873 }
2874 raw_shra_b_rr(d,r) ;
2875 unlock2(r);
2876 unlock2(d);
2877 }
2878 MENDFUNC(2,shra_b_rr,(RW1 d, R1 r))
2879
2880
2881 MIDFUNC(2,setcc,(W1 d, IMM cc))
2882 {
2883 CLOBBER_SETCC;
2884 d=writereg(d,1);
2885 raw_setcc(d,cc);
2886 unlock2(d);
2887 }
2888 MENDFUNC(2,setcc,(W1 d, IMM cc))
2889
2890 MIDFUNC(2,setcc_m,(IMM d, IMM cc))
2891 {
2892 CLOBBER_SETCC;
2893 raw_setcc_m(d,cc);
2894 }
2895 MENDFUNC(2,setcc_m,(IMM d, IMM cc))
2896
2897 MIDFUNC(3,cmov_l_rr,(RW4 d, R4 s, IMM cc))
2898 {
2899 if (d==s)
2900 return;
2901 CLOBBER_CMOV;
2902 s=readreg(s,4);
2903 d=rmw(d,4,4);
2904 raw_cmov_l_rr(d,s,cc);
2905 unlock2(s);
2906 unlock2(d);
2907 }
2908 MENDFUNC(3,cmov_l_rr,(RW4 d, R4 s, IMM cc))
2909
2910 MIDFUNC(3,cmov_l_rm,(RW4 d, IMM s, IMM cc))
2911 {
2912 CLOBBER_CMOV;
2913 d=rmw(d,4,4);
2914 raw_cmov_l_rm(d,s,cc);
2915 unlock2(d);
2916 }
2917 MENDFUNC(3,cmov_l_rm,(RW4 d, IMM s, IMM cc))
2918
2919 MIDFUNC(1,setzflg_l,(RW4 r))
2920 {
2921 if (setzflg_uses_bsf) {
2922 CLOBBER_BSF;
2923 r=rmw(r,4,4);
2924 raw_bsf_l_rr(r,r);
2925 unlock2(r);
2926 }
2927 else {
2928 Dif (live.flags_in_flags!=VALID) {
2929 write_log("setzflg() wanted flags in native flags, they are %d\n",
2930 live.flags_in_flags);
2931 abort();
2932 }
2933 r=readreg(r,4);
2934 int f=writereg(S11,4);
2935 int t=writereg(S12,4);
2936 raw_flags_set_zero(f,r,t);
2937 unlock2(f);
2938 unlock2(r);
2939 unlock2(t);
2940 }
2941 }
2942 MENDFUNC(1,setzflg_l,(RW4 r))
2943
2944 MIDFUNC(2,imul_32_32,(RW4 d, R4 s))
2945 {
2946 CLOBBER_MUL;
2947 s=readreg(s,4);
2948 d=rmw(d,4,4);
2949 raw_imul_32_32(d,s);
2950 unlock2(s);
2951 unlock2(d);
2952 }
2953 MENDFUNC(2,imul_32_32,(RW4 d, R4 s))
2954
2955 MIDFUNC(2,imul_64_32,(RW4 d, RW4 s))
2956 {
2957 CLOBBER_MUL;
2958 s=rmw_specific(s,4,4,MUL_NREG2);
2959 d=rmw_specific(d,4,4,MUL_NREG1);
2960 raw_imul_64_32(d,s);
2961 unlock2(s);
2962 unlock2(d);
2963 }
2964 MENDFUNC(2,imul_64_32,(RW4 d, RW4 s))
2965
2966 MIDFUNC(2,mul_64_32,(RW4 d, RW4 s))
2967 {
2968 CLOBBER_MUL;
2969 s=rmw_specific(s,4,4,MUL_NREG2);
2970 d=rmw_specific(d,4,4,MUL_NREG1);
2971 raw_mul_64_32(d,s);
2972 unlock2(s);
2973 unlock2(d);
2974 }
2975 MENDFUNC(2,mul_64_32,(RW4 d, RW4 s))
2976
2977 MIDFUNC(2,mul_32_32,(RW4 d, R4 s))
2978 {
2979 CLOBBER_MUL;
2980 s=readreg(s,4);
2981 d=rmw(d,4,4);
2982 raw_mul_32_32(d,s);
2983 unlock2(s);
2984 unlock2(d);
2985 }
2986 MENDFUNC(2,mul_32_32,(RW4 d, R4 s))
2987
2988 MIDFUNC(2,sign_extend_16_rr,(W4 d, R2 s))
2989 {
2990 int isrmw;
2991
2992 if (isconst(s)) {
2993 set_const(d,(uae_s32)(uae_s16)live.state[s].val);
2994 return;
2995 }
2996
2997 CLOBBER_SE16;
2998 isrmw=(s==d);
2999 if (!isrmw) {
3000 s=readreg(s,2);
3001 d=writereg(d,4);
3002 }
3003 else { /* If we try to lock this twice, with different sizes, we
3004 are int trouble! */
3005 s=d=rmw(s,4,2);
3006 }
3007 raw_sign_extend_16_rr(d,s);
3008 if (!isrmw) {
3009 unlock2(d);
3010 unlock2(s);
3011 }
3012 else {
3013 unlock2(s);
3014 }
3015 }
3016 MENDFUNC(2,sign_extend_16_rr,(W4 d, R2 s))
3017
3018 MIDFUNC(2,sign_extend_8_rr,(W4 d, R1 s))
3019 {
3020 int isrmw;
3021
3022 if (isconst(s)) {
3023 set_const(d,(uae_s32)(uae_s8)live.state[s].val);
3024 return;
3025 }
3026
3027 isrmw=(s==d);
3028 CLOBBER_SE8;
3029 if (!isrmw) {
3030 s=readreg(s,1);
3031 d=writereg(d,4);
3032 }
3033 else { /* If we try to lock this twice, with different sizes, we
3034 are int trouble! */
3035 s=d=rmw(s,4,1);
3036 }
3037
3038 raw_sign_extend_8_rr(d,s);
3039
3040 if (!isrmw) {
3041 unlock2(d);
3042 unlock2(s);
3043 }
3044 else {
3045 unlock2(s);
3046 }
3047 }
3048 MENDFUNC(2,sign_extend_8_rr,(W4 d, R1 s))
3049
3050
3051 MIDFUNC(2,zero_extend_16_rr,(W4 d, R2 s))
3052 {
3053 int isrmw;
3054
3055 if (isconst(s)) {
3056 set_const(d,(uae_u32)(uae_u16)live.state[s].val);
3057 return;
3058 }
3059
3060 isrmw=(s==d);
3061 CLOBBER_ZE16;
3062 if (!isrmw) {
3063 s=readreg(s,2);
3064 d=writereg(d,4);
3065 }
3066 else { /* If we try to lock this twice, with different sizes, we
3067 are int trouble! */
3068 s=d=rmw(s,4,2);
3069 }
3070 raw_zero_extend_16_rr(d,s);
3071 if (!isrmw) {
3072 unlock2(d);
3073 unlock2(s);
3074 }
3075 else {
3076 unlock2(s);
3077 }
3078 }
3079 MENDFUNC(2,zero_extend_16_rr,(W4 d, R2 s))
3080
3081 MIDFUNC(2,zero_extend_8_rr,(W4 d, R1 s))
3082 {
3083 int isrmw;
3084 if (isconst(s)) {
3085 set_const(d,(uae_u32)(uae_u8)live.state[s].val);
3086 return;
3087 }
3088
3089 isrmw=(s==d);
3090 CLOBBER_ZE8;
3091 if (!isrmw) {
3092 s=readreg(s,1);
3093 d=writereg(d,4);
3094 }
3095 else { /* If we try to lock this twice, with different sizes, we
3096 are int trouble! */
3097 s=d=rmw(s,4,1);
3098 }
3099
3100 raw_zero_extend_8_rr(d,s);
3101
3102 if (!isrmw) {
3103 unlock2(d);
3104 unlock2(s);
3105 }
3106 else {
3107 unlock2(s);
3108 }
3109 }
3110 MENDFUNC(2,zero_extend_8_rr,(W4 d, R1 s))
3111
3112 MIDFUNC(2,mov_b_rr,(W1 d, R1 s))
3113 {
3114 if (d==s)
3115 return;
3116 if (isconst(s)) {
3117 COMPCALL(mov_b_ri)(d,(uae_u8)live.state[s].val);
3118 return;
3119 }
3120
3121 CLOBBER_MOV;
3122 s=readreg(s,1);
3123 d=writereg(d,1);
3124 raw_mov_b_rr(d,s);
3125 unlock2(d);
3126 unlock2(s);
3127 }
3128 MENDFUNC(2,mov_b_rr,(W1 d, R1 s))
3129
3130 MIDFUNC(2,mov_w_rr,(W2 d, R2 s))
3131 {
3132 if (d==s)
3133 return;
3134 if (isconst(s)) {
3135 COMPCALL(mov_w_ri)(d,(uae_u16)live.state[s].val);
3136 return;
3137 }
3138
3139 CLOBBER_MOV;
3140 s=readreg(s,2);
3141 d=writereg(d,2);
3142 raw_mov_w_rr(d,s);
3143 unlock2(d);
3144 unlock2(s);
3145 }
3146 MENDFUNC(2,mov_w_rr,(W2 d, R2 s))
3147
3148
3149 MIDFUNC(4,mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
3150 {
3151 CLOBBER_MOV;
3152 baser=readreg(baser,4);
3153 index=readreg(index,4);
3154 d=writereg(d,4);
3155
3156 raw_mov_l_rrm_indexed(d,baser,index,factor);
3157 unlock2(d);
3158 unlock2(baser);
3159 unlock2(index);
3160 }
3161 MENDFUNC(4,mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
3162
3163 MIDFUNC(4,mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
3164 {
3165 CLOBBER_MOV;
3166 baser=readreg(baser,4);
3167 index=readreg(index,4);
3168 d=writereg(d,2);
3169
3170 raw_mov_w_rrm_indexed(d,baser,index,factor);
3171 unlock2(d);
3172 unlock2(baser);
3173 unlock2(index);
3174 }
3175 MENDFUNC(4,mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
3176
3177 MIDFUNC(4,mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
3178 {
3179 CLOBBER_MOV;
3180 baser=readreg(baser,4);
3181 index=readreg(index,4);
3182 d=writereg(d,1);
3183
3184 raw_mov_b_rrm_indexed(d,baser,index,factor);
3185
3186 unlock2(d);
3187 unlock2(baser);
3188 unlock2(index);
3189 }
3190 MENDFUNC(4,mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
3191
3192
3193 MIDFUNC(4,mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
3194 {
3195 CLOBBER_MOV;
3196 baser=readreg(baser,4);
3197 index=readreg(index,4);
3198 s=readreg(s,4);
3199
3200 Dif (baser==s || index==s)
3201 abort();
3202
3203
3204 raw_mov_l_mrr_indexed(baser,index,factor,s);
3205 unlock2(s);
3206 unlock2(baser);
3207 unlock2(index);
3208 }
3209 MENDFUNC(4,mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
3210
3211 MIDFUNC(4,mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
3212 {
3213 CLOBBER_MOV;
3214 baser=readreg(baser,4);
3215 index=readreg(index,4);
3216 s=readreg(s,2);
3217
3218 raw_mov_w_mrr_indexed(baser,index,factor,s);
3219 unlock2(s);
3220 unlock2(baser);
3221 unlock2(index);
3222 }
3223 MENDFUNC(4,mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
3224
3225 MIDFUNC(4,mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
3226 {
3227 CLOBBER_MOV;
3228 s=readreg(s,1);
3229 baser=readreg(baser,4);
3230 index=readreg(index,4);
3231
3232 raw_mov_b_mrr_indexed(baser,index,factor,s);
3233 unlock2(s);
3234 unlock2(baser);
3235 unlock2(index);
3236 }
3237 MENDFUNC(4,mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
3238
3239
3240 MIDFUNC(5,mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
3241 {
3242 int basereg=baser;
3243 int indexreg=index;
3244
3245 CLOBBER_MOV;
3246 s=readreg(s,4);
3247 baser=readreg_offset(baser,4);
3248 index=readreg_offset(index,4);
3249
3250 base+=get_offset(basereg);
3251 base+=factor*get_offset(indexreg);
3252
3253 raw_mov_l_bmrr_indexed(base,baser,index,factor,s);
3254 unlock2(s);
3255 unlock2(baser);
3256 unlock2(index);
3257 }
3258 MENDFUNC(5,mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
3259
3260 MIDFUNC(5,mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
3261 {
3262 int basereg=baser;
3263 int indexreg=index;
3264
3265 CLOBBER_MOV;
3266 s=readreg(s,2);
3267 baser=readreg_offset(baser,4);
3268 index=readreg_offset(index,4);
3269
3270 base+=get_offset(basereg);
3271 base+=factor*get_offset(indexreg);
3272
3273 raw_mov_w_bmrr_indexed(base,baser,index,factor,s);
3274 unlock2(s);
3275 unlock2(baser);
3276 unlock2(index);
3277 }
3278 MENDFUNC(5,mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
3279
3280 MIDFUNC(5,mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
3281 {
3282 int basereg=baser;
3283 int indexreg=index;
3284
3285 CLOBBER_MOV;
3286 s=readreg(s,1);
3287 baser=readreg_offset(baser,4);
3288 index=readreg_offset(index,4);
3289
3290 base+=get_offset(basereg);
3291 base+=factor*get_offset(indexreg);
3292
3293 raw_mov_b_bmrr_indexed(base,baser,index,factor,s);
3294 unlock2(s);
3295 unlock2(baser);
3296 unlock2(index);
3297 }
3298 MENDFUNC(5,mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
3299
3300
3301
3302 /* Read a long from base+baser+factor*index */
3303 MIDFUNC(5,mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
3304 {
3305 int basereg=baser;
3306 int indexreg=index;
3307
3308 CLOBBER_MOV;
3309 baser=readreg_offset(baser,4);
3310 index=readreg_offset(index,4);
3311 base+=get_offset(basereg);
3312 base+=factor*get_offset(indexreg);
3313 d=writereg(d,4);
3314 raw_mov_l_brrm_indexed(d,base,baser,index,factor);
3315 unlock2(d);
3316 unlock2(baser);
3317 unlock2(index);
3318 }
3319 MENDFUNC(5,mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
3320
3321
3322 MIDFUNC(5,mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
3323 {
3324 int basereg=baser;
3325 int indexreg=index;
3326
3327 CLOBBER_MOV;
3328 remove_offset(d,-1);
3329 baser=readreg_offset(baser,4);
3330 index=readreg_offset(index,4);
3331 base+=get_offset(basereg);
3332 base+=factor*get_offset(indexreg);
3333 d=writereg(d,2);
3334 raw_mov_w_brrm_indexed(d,base,baser,index,factor);
3335 unlock2(d);
3336 unlock2(baser);
3337 unlock2(index);
3338 }
3339 MENDFUNC(5,mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
3340
3341
3342 MIDFUNC(5,mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
3343 {
3344 int basereg=baser;
3345 int indexreg=index;
3346
3347 CLOBBER_MOV;
3348 remove_offset(d,-1);
3349 baser=readreg_offset(baser,4);
3350 index=readreg_offset(index,4);
3351 base+=get_offset(basereg);
3352 base+=factor*get_offset(indexreg);
3353 d=writereg(d,1);
3354 raw_mov_b_brrm_indexed(d,base,baser,index,factor);
3355 unlock2(d);
3356 unlock2(baser);
3357 unlock2(index);
3358 }
3359 MENDFUNC(5,mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
3360
3361 /* Read a long from base+factor*index */
3362 MIDFUNC(4,mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
3363 {
3364 int indexreg=index;
3365
3366 if (isconst(index)) {
3367 COMPCALL(mov_l_rm)(d,base+factor*live.state[index].val);
3368 return;
3369 }
3370
3371 CLOBBER_MOV;
3372 index=readreg_offset(index,4);
3373 base+=get_offset(indexreg)*factor;
3374 d=writereg(d,4);
3375
3376 raw_mov_l_rm_indexed(d,base,index,factor);
3377 unlock2(index);
3378 unlock2(d);
3379 }
3380 MENDFUNC(4,mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
3381
3382
3383 /* read the long at the address contained in s+offset and store in d */
3384 MIDFUNC(3,mov_l_rR,(W4 d, R4 s, IMM offset))
3385 {
3386 if (isconst(s)) {
3387 COMPCALL(mov_l_rm)(d,live.state[s].val+offset);
3388 return;
3389 }
3390 CLOBBER_MOV;
3391 s=readreg(s,4);
3392 d=writereg(d,4);
3393
3394 raw_mov_l_rR(d,s,offset);
3395 unlock2(d);
3396 unlock2(s);
3397 }
3398 MENDFUNC(3,mov_l_rR,(W4 d, R4 s, IMM offset))
3399
3400 /* read the word at the address contained in s+offset and store in d */
3401 MIDFUNC(3,mov_w_rR,(W2 d, R4 s, IMM offset))
3402 {
3403 if (isconst(s)) {
3404 COMPCALL(mov_w_rm)(d,live.state[s].val+offset);
3405 return;
3406 }
3407 CLOBBER_MOV;
3408 s=readreg(s,4);
3409 d=writereg(d,2);
3410
3411 raw_mov_w_rR(d,s,offset);
3412 unlock2(d);
3413 unlock2(s);
3414 }
3415 MENDFUNC(3,mov_w_rR,(W2 d, R4 s, IMM offset))
3416
3417 /* read the word at the address contained in s+offset and store in d */
3418 MIDFUNC(3,mov_b_rR,(W1 d, R4 s, IMM offset))
3419 {
3420 if (isconst(s)) {
3421 COMPCALL(mov_b_rm)(d,live.state[s].val+offset);
3422 return;
3423 }
3424 CLOBBER_MOV;
3425 s=readreg(s,4);
3426 d=writereg(d,1);
3427
3428 raw_mov_b_rR(d,s,offset);
3429 unlock2(d);
3430 unlock2(s);
3431 }
3432 MENDFUNC(3,mov_b_rR,(W1 d, R4 s, IMM offset))
3433
3434 /* read the long at the address contained in s+offset and store in d */
3435 MIDFUNC(3,mov_l_brR,(W4 d, R4 s, IMM offset))
3436 {
3437 int sreg=s;
3438 if (isconst(s)) {
3439 COMPCALL(mov_l_rm)(d,live.state[s].val+offset);
3440 return;
3441 }
3442 CLOBBER_MOV;
3443 s=readreg_offset(s,4);
3444 offset+=get_offset(sreg);
3445 d=writereg(d,4);
3446
3447 raw_mov_l_brR(d,s,offset);
3448 unlock2(d);
3449 unlock2(s);
3450 }
3451 MENDFUNC(3,mov_l_brR,(W4 d, R4 s, IMM offset))
3452
3453 /* read the word at the address contained in s+offset and store in d */
3454 MIDFUNC(3,mov_w_brR,(W2 d, R4 s, IMM offset))
3455 {
3456 int sreg=s;
3457 if (isconst(s)) {
3458 COMPCALL(mov_w_rm)(d,live.state[s].val+offset);
3459 return;
3460 }
3461 CLOBBER_MOV;
3462 remove_offset(d,-1);
3463 s=readreg_offset(s,4);
3464 offset+=get_offset(sreg);
3465 d=writereg(d,2);
3466
3467 raw_mov_w_brR(d,s,offset);
3468 unlock2(d);
3469 unlock2(s);
3470 }
3471 MENDFUNC(3,mov_w_brR,(W2 d, R4 s, IMM offset))
3472
3473 /* read the word at the address contained in s+offset and store in d */
3474 MIDFUNC(3,mov_b_brR,(W1 d, R4 s, IMM offset))
3475 {
3476 int sreg=s;
3477 if (isconst(s)) {
3478 COMPCALL(mov_b_rm)(d,live.state[s].val+offset);
3479 return;
3480 }
3481 CLOBBER_MOV;
3482 remove_offset(d,-1);
3483 s=readreg_offset(s,4);
3484 offset+=get_offset(sreg);
3485 d=writereg(d,1);
3486
3487 raw_mov_b_brR(d,s,offset);
3488 unlock2(d);
3489 unlock2(s);
3490 }
3491 MENDFUNC(3,mov_b_brR,(W1 d, R4 s, IMM offset))
3492
3493 MIDFUNC(3,mov_l_Ri,(R4 d, IMM i, IMM offset))
3494 {
3495 int dreg=d;
3496 if (isconst(d)) {
3497 COMPCALL(mov_l_mi)(live.state[d].val+offset,i);
3498 return;
3499 }
3500
3501 CLOBBER_MOV;
3502 d=readreg_offset(d,4);
3503 offset+=get_offset(dreg);
3504 raw_mov_l_Ri(d,i,offset);
3505 unlock2(d);
3506 }
3507 MENDFUNC(3,mov_l_Ri,(R4 d, IMM i, IMM offset))
3508
3509 MIDFUNC(3,mov_w_Ri,(R4 d, IMM i, IMM offset))
3510 {
3511 int dreg=d;
3512 if (isconst(d)) {
3513 COMPCALL(mov_w_mi)(live.state[d].val+offset,i);
3514 return;
3515 }
3516
3517 CLOBBER_MOV;
3518 d=readreg_offset(d,4);
3519 offset+=get_offset(dreg);
3520 raw_mov_w_Ri(d,i,offset);
3521 unlock2(d);
3522 }
3523 MENDFUNC(3,mov_w_Ri,(R4 d, IMM i, IMM offset))
3524
3525 MIDFUNC(3,mov_b_Ri,(R4 d, IMM i, IMM offset))
3526 {
3527 int dreg=d;
3528 if (isconst(d)) {
3529 COMPCALL(mov_b_mi)(live.state[d].val+offset,i);
3530 return;
3531 }
3532
3533 CLOBBER_MOV;
3534 d=readreg_offset(d,4);
3535 offset+=get_offset(dreg);
3536 raw_mov_b_Ri(d,i,offset);
3537 unlock2(d);
3538 }
3539 MENDFUNC(3,mov_b_Ri,(R4 d, IMM i, IMM offset))
3540
3541 /* Warning! OFFSET is byte sized only! */
3542 MIDFUNC(3,mov_l_Rr,(R4 d, R4 s, IMM offset))
3543 {
3544 if (isconst(d)) {
3545 COMPCALL(mov_l_mr)(live.state[d].val+offset,s);
3546 return;
3547 }
3548 if (isconst(s)) {
3549 COMPCALL(mov_l_Ri)(d,live.state[s].val,offset);
3550 return;
3551 }
3552
3553 CLOBBER_MOV;
3554 s=readreg(s,4);
3555 d=readreg(d,4);
3556
3557 raw_mov_l_Rr(d,s,offset);
3558 unlock2(d);
3559 unlock2(s);
3560 }
3561 MENDFUNC(3,mov_l_Rr,(R4 d, R4 s, IMM offset))
3562
3563 MIDFUNC(3,mov_w_Rr,(R4 d, R2 s, IMM offset))
3564 {
3565 if (isconst(d)) {
3566 COMPCALL(mov_w_mr)(live.state[d].val+offset,s);
3567 return;
3568 }
3569 if (isconst(s)) {
3570 COMPCALL(mov_w_Ri)(d,(uae_u16)live.state[s].val,offset);
3571 return;
3572 }
3573
3574 CLOBBER_MOV;
3575 s=readreg(s,2);
3576 d=readreg(d,4);
3577 raw_mov_w_Rr(d,s,offset);
3578 unlock2(d);
3579 unlock2(s);
3580 }
3581 MENDFUNC(3,mov_w_Rr,(R4 d, R2 s, IMM offset))
3582
3583 MIDFUNC(3,mov_b_Rr,(R4 d, R1 s, IMM offset))
3584 {
3585 if (isconst(d)) {
3586 COMPCALL(mov_b_mr)(live.state[d].val+offset,s);
3587 return;
3588 }
3589 if (isconst(s)) {
3590 COMPCALL(mov_b_Ri)(d,(uae_u8)live.state[s].val,offset);
3591 return;
3592 }
3593
3594 CLOBBER_MOV;
3595 s=readreg(s,1);
3596 d=readreg(d,4);
3597 raw_mov_b_Rr(d,s,offset);
3598 unlock2(d);
3599 unlock2(s);
3600 }
3601 MENDFUNC(3,mov_b_Rr,(R4 d, R1 s, IMM offset))
3602
3603 MIDFUNC(3,lea_l_brr,(W4 d, R4 s, IMM offset))
3604 {
3605 if (isconst(s)) {
3606 COMPCALL(mov_l_ri)(d,live.state[s].val+offset);
3607 return;
3608 }
3609 #if USE_OFFSET
3610 if (d==s) {
3611 add_offset(d,offset);
3612 return;
3613 }
3614 #endif
3615 CLOBBER_LEA;
3616 s=readreg(s,4);
3617 d=writereg(d,4);
3618 raw_lea_l_brr(d,s,offset);
3619 unlock2(d);
3620 unlock2(s);
3621 }
3622 MENDFUNC(3,lea_l_brr,(W4 d, R4 s, IMM offset))
3623
3624 MIDFUNC(5,lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
3625 {
3626 if (!offset) {
3627 COMPCALL(lea_l_rr_indexed)(d,s,index,factor);
3628 return;
3629 }
3630 CLOBBER_LEA;
3631 s=readreg(s,4);
3632 index=readreg(index,4);
3633 d=writereg(d,4);
3634
3635 raw_lea_l_brr_indexed(d,s,index,factor,offset);
3636 unlock2(d);
3637 unlock2(index);
3638 unlock2(s);
3639 }
3640 MENDFUNC(5,lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
3641
3642 MIDFUNC(4,lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
3643 {
3644 CLOBBER_LEA;
3645 s=readreg(s,4);
3646 index=readreg(index,4);
3647 d=writereg(d,4);
3648
3649 raw_lea_l_rr_indexed(d,s,index,factor);
3650 unlock2(d);
3651 unlock2(index);
3652 unlock2(s);
3653 }
3654 MENDFUNC(4,lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
3655
3656 /* write d to the long at the address contained in s+offset */
3657 MIDFUNC(3,mov_l_bRr,(R4 d, R4 s, IMM offset))
3658 {
3659 int dreg=d;
3660 if (isconst(d)) {
3661 COMPCALL(mov_l_mr)(live.state[d].val+offset,s);
3662 return;
3663 }
3664
3665 CLOBBER_MOV;
3666 s=readreg(s,4);
3667 d=readreg_offset(d,4);
3668 offset+=get_offset(dreg);
3669
3670 raw_mov_l_bRr(d,s,offset);
3671 unlock2(d);
3672 unlock2(s);
3673 }
3674 MENDFUNC(3,mov_l_bRr,(R4 d, R4 s, IMM offset))
3675
3676 /* write the word at the address contained in s+offset and store in d */
3677 MIDFUNC(3,mov_w_bRr,(R4 d, R2 s, IMM offset))
3678 {
3679 int dreg=d;
3680
3681 if (isconst(d)) {
3682 COMPCALL(mov_w_mr)(live.state[d].val+offset,s);
3683 return;
3684 }
3685
3686 CLOBBER_MOV;
3687 s=readreg(s,2);
3688 d=readreg_offset(d,4);
3689 offset+=get_offset(dreg);
3690 raw_mov_w_bRr(d,s,offset);
3691 unlock2(d);
3692 unlock2(s);
3693 }
3694 MENDFUNC(3,mov_w_bRr,(R4 d, R2 s, IMM offset))
3695
3696 MIDFUNC(3,mov_b_bRr,(R4 d, R1 s, IMM offset))
3697 {
3698 int dreg=d;
3699 if (isconst(d)) {
3700 COMPCALL(mov_b_mr)(live.state[d].val+offset,s);
3701 return;
3702 }
3703
3704 CLOBBER_MOV;
3705 s=readreg(s,1);
3706 d=readreg_offset(d,4);
3707 offset+=get_offset(dreg);
3708 raw_mov_b_bRr(d,s,offset);
3709 unlock2(d);
3710 unlock2(s);
3711 }
3712 MENDFUNC(3,mov_b_bRr,(R4 d, R1 s, IMM offset))
3713
3714 MIDFUNC(1,bswap_32,(RW4 r))
3715 {
3716 int reg=r;
3717
3718 if (isconst(r)) {
3719 uae_u32 oldv=live.state[r].val;
3720 live.state[r].val=reverse32(oldv);
3721 return;
3722 }
3723
3724 CLOBBER_SW32;
3725 r=rmw(r,4,4);
3726 raw_bswap_32(r);
3727 unlock2(r);
3728 }
3729 MENDFUNC(1,bswap_32,(RW4 r))
3730
3731 MIDFUNC(1,bswap_16,(RW2 r))
3732 {
3733 if (isconst(r)) {
3734 uae_u32 oldv=live.state[r].val;
3735 live.state[r].val=((oldv>>8)&0xff) | ((oldv<<8)&0xff00) |
3736 (oldv&0xffff0000);
3737 return;
3738 }
3739
3740 CLOBBER_SW16;
3741 r=rmw(r,2,2);
3742
3743 raw_bswap_16(r);
3744 unlock2(r);
3745 }
3746 MENDFUNC(1,bswap_16,(RW2 r))
3747
3748
3749
3750 MIDFUNC(2,mov_l_rr,(W4 d, R4 s))
3751 {
3752 int olds;
3753
3754 if (d==s) { /* How pointless! */
3755 return;
3756 }
3757 if (isconst(s)) {
3758 COMPCALL(mov_l_ri)(d,live.state[s].val);
3759 return;
3760 }
3761 olds=s;
3762 disassociate(d);
3763 s=readreg_offset(s,4);
3764 live.state[d].realreg=s;
3765 live.state[d].realind=live.nat[s].nholds;
3766 live.state[d].val=live.state[olds].val;
3767 live.state[d].validsize=4;
3768 live.state[d].dirtysize=4;
3769 set_status(d,DIRTY);
3770
3771 live.nat[s].holds[live.nat[s].nholds]=d;
3772 live.nat[s].nholds++;
3773 log_clobberreg(d);
3774 /* write_log("Added %d to nreg %d(%d), now holds %d regs\n",
3775 d,s,live.state[d].realind,live.nat[s].nholds); */
3776 unlock2(s);
3777 }
3778 MENDFUNC(2,mov_l_rr,(W4 d, R4 s))
3779
3780 MIDFUNC(2,mov_l_mr,(IMM d, R4 s))
3781 {
3782 if (isconst(s)) {
3783 COMPCALL(mov_l_mi)(d,live.state[s].val);
3784 return;
3785 }
3786 CLOBBER_MOV;
3787 s=readreg(s,4);
3788
3789 raw_mov_l_mr(d,s);
3790 unlock2(s);
3791 }
3792 MENDFUNC(2,mov_l_mr,(IMM d, R4 s))
3793
3794
3795 MIDFUNC(2,mov_w_mr,(IMM d, R2 s))
3796 {
3797 if (isconst(s)) {
3798 COMPCALL(mov_w_mi)(d,(uae_u16)live.state[s].val);
3799 return;
3800 }
3801 CLOBBER_MOV;
3802 s=readreg(s,2);
3803
3804 raw_mov_w_mr(d,s);
3805 unlock2(s);
3806 }
3807 MENDFUNC(2,mov_w_mr,(IMM d, R2 s))
3808
3809 MIDFUNC(2,mov_w_rm,(W2 d, IMM s))
3810 {
3811 CLOBBER_MOV;
3812 d=writereg(d,2);
3813
3814 raw_mov_w_rm(d,s);
3815 unlock2(d);
3816 }
3817 MENDFUNC(2,mov_w_rm,(W2 d, IMM s))
3818
3819 MIDFUNC(2,mov_b_mr,(IMM d, R1 s))
3820 {
3821 if (isconst(s)) {
3822 COMPCALL(mov_b_mi)(d,(uae_u8)live.state[s].val);
3823 return;
3824 }
3825
3826 CLOBBER_MOV;
3827 s=readreg(s,1);
3828
3829 raw_mov_b_mr(d,s);
3830 unlock2(s);
3831 }
3832 MENDFUNC(2,mov_b_mr,(IMM d, R1 s))
3833
3834 MIDFUNC(2,mov_b_rm,(W1 d, IMM s))
3835 {
3836 CLOBBER_MOV;
3837 d=writereg(d,1);
3838
3839 raw_mov_b_rm(d,s);
3840 unlock2(d);
3841 }
3842 MENDFUNC(2,mov_b_rm,(W1 d, IMM s))
3843
3844 MIDFUNC(2,mov_l_ri,(W4 d, IMM s))
3845 {
3846 set_const(d,s);
3847 return;
3848 }
3849 MENDFUNC(2,mov_l_ri,(W4 d, IMM s))
3850
3851 MIDFUNC(2,mov_w_ri,(W2 d, IMM s))
3852 {
3853 CLOBBER_MOV;
3854 d=writereg(d,2);
3855
3856 raw_mov_w_ri(d,s);
3857 unlock2(d);
3858 }
3859 MENDFUNC(2,mov_w_ri,(W2 d, IMM s))
3860
3861 MIDFUNC(2,mov_b_ri,(W1 d, IMM s))
3862 {
3863 CLOBBER_MOV;
3864 d=writereg(d,1);
3865
3866 raw_mov_b_ri(d,s);
3867 unlock2(d);
3868 }
3869 MENDFUNC(2,mov_b_ri,(W1 d, IMM s))
3870
3871
3872 MIDFUNC(2,add_l_mi,(IMM d, IMM s))
3873 {
3874 CLOBBER_ADD;
3875 raw_add_l_mi(d,s) ;
3876 }
3877 MENDFUNC(2,add_l_mi,(IMM d, IMM s))
3878
3879 MIDFUNC(2,add_w_mi,(IMM d, IMM s))
3880 {
3881 CLOBBER_ADD;
3882 raw_add_w_mi(d,s) ;
3883 }
3884 MENDFUNC(2,add_w_mi,(IMM d, IMM s))
3885
3886 MIDFUNC(2,add_b_mi,(IMM d, IMM s))
3887 {
3888 CLOBBER_ADD;
3889 raw_add_b_mi(d,s) ;
3890 }
3891 MENDFUNC(2,add_b_mi,(IMM d, IMM s))
3892
3893
3894 MIDFUNC(2,test_l_ri,(R4 d, IMM i))
3895 {
3896 CLOBBER_TEST;
3897 d=readreg(d,4);
3898
3899 raw_test_l_ri(d,i);
3900 unlock2(d);
3901 }
3902 MENDFUNC(2,test_l_ri,(R4 d, IMM i))
3903
3904 MIDFUNC(2,test_l_rr,(R4 d, R4 s))
3905 {
3906 CLOBBER_TEST;
3907 d=readreg(d,4);
3908 s=readreg(s,4);
3909
3910 raw_test_l_rr(d,s);;
3911 unlock2(d);
3912 unlock2(s);
3913 }
3914 MENDFUNC(2,test_l_rr,(R4 d, R4 s))
3915
3916 MIDFUNC(2,test_w_rr,(R2 d, R2 s))
3917 {
3918 CLOBBER_TEST;
3919 d=readreg(d,2);
3920 s=readreg(s,2);
3921
3922 raw_test_w_rr(d,s);
3923 unlock2(d);
3924 unlock2(s);
3925 }
3926 MENDFUNC(2,test_w_rr,(R2 d, R2 s))
3927
3928 MIDFUNC(2,test_b_rr,(R1 d, R1 s))
3929 {
3930 CLOBBER_TEST;
3931 d=readreg(d,1);
3932 s=readreg(s,1);
3933
3934 raw_test_b_rr(d,s);
3935 unlock2(d);
3936 unlock2(s);
3937 }
3938 MENDFUNC(2,test_b_rr,(R1 d, R1 s))
3939
3940
3941 MIDFUNC(2,and_l_ri,(RW4 d, IMM i))
3942 {
3943 if (isconst(d) && !needflags) {
3944 live.state[d].val &= i;
3945 return;
3946 }
3947
3948 CLOBBER_AND;
3949 d=rmw(d,4,4);
3950
3951 raw_and_l_ri(d,i);
3952 unlock2(d);
3953 }
3954 MENDFUNC(2,and_l_ri,(RW4 d, IMM i))
3955
3956 MIDFUNC(2,and_l,(RW4 d, R4 s))
3957 {
3958 CLOBBER_AND;
3959 s=readreg(s,4);
3960 d=rmw(d,4,4);
3961
3962 raw_and_l(d,s);
3963 unlock2(d);
3964 unlock2(s);
3965 }
3966 MENDFUNC(2,and_l,(RW4 d, R4 s))
3967
3968 MIDFUNC(2,and_w,(RW2 d, R2 s))
3969 {
3970 CLOBBER_AND;
3971 s=readreg(s,2);
3972 d=rmw(d,2,2);
3973
3974 raw_and_w(d,s);
3975 unlock2(d);
3976 unlock2(s);
3977 }
3978 MENDFUNC(2,and_w,(RW2 d, R2 s))
3979
3980 MIDFUNC(2,and_b,(RW1 d, R1 s))
3981 {
3982 CLOBBER_AND;
3983 s=readreg(s,1);
3984 d=rmw(d,1,1);
3985
3986 raw_and_b(d,s);
3987 unlock2(d);
3988 unlock2(s);
3989 }
3990 MENDFUNC(2,and_b,(RW1 d, R1 s))
3991
3992 // gb-- used for making an fpcr value in compemu_fpp.cpp
3993 MIDFUNC(2,or_l_rm,(RW4 d, IMM s))
3994 {
3995 CLOBBER_OR;
3996 d=rmw(d,4,4);
3997
3998 raw_or_l_rm(d,s);
3999 unlock2(d);
4000 }
4001 MENDFUNC(2,or_l_rm,(RW4 d, IMM s))
4002
4003 MIDFUNC(2,or_l_ri,(RW4 d, IMM i))
4004 {
4005 if (isconst(d) && !needflags) {
4006 live.state[d].val|=i;
4007 return;
4008 }
4009 CLOBBER_OR;
4010 d=rmw(d,4,4);
4011
4012 raw_or_l_ri(d,i);
4013 unlock2(d);
4014 }
4015 MENDFUNC(2,or_l_ri,(RW4 d, IMM i))
4016
4017 MIDFUNC(2,or_l,(RW4 d, R4 s))
4018 {
4019 if (isconst(d) && isconst(s) && !needflags) {
4020 live.state[d].val|=live.state[s].val;
4021 return;
4022 }
4023 CLOBBER_OR;
4024 s=readreg(s,4);
4025 d=rmw(d,4,4);
4026
4027 raw_or_l(d,s);
4028 unlock2(d);
4029 unlock2(s);
4030 }
4031 MENDFUNC(2,or_l,(RW4 d, R4 s))
4032
4033 MIDFUNC(2,or_w,(RW2 d, R2 s))
4034 {
4035 CLOBBER_OR;
4036 s=readreg(s,2);
4037 d=rmw(d,2,2);
4038
4039 raw_or_w(d,s);
4040 unlock2(d);
4041 unlock2(s);
4042 }
4043 MENDFUNC(2,or_w,(RW2 d, R2 s))
4044
4045 MIDFUNC(2,or_b,(RW1 d, R1 s))
4046 {
4047 CLOBBER_OR;
4048 s=readreg(s,1);
4049 d=rmw(d,1,1);
4050
4051 raw_or_b(d,s);
4052 unlock2(d);
4053 unlock2(s);
4054 }
4055 MENDFUNC(2,or_b,(RW1 d, R1 s))
4056
4057 MIDFUNC(2,adc_l,(RW4 d, R4 s))
4058 {
4059 CLOBBER_ADC;
4060 s=readreg(s,4);
4061 d=rmw(d,4,4);
4062
4063 raw_adc_l(d,s);
4064
4065 unlock2(d);
4066 unlock2(s);
4067 }
4068 MENDFUNC(2,adc_l,(RW4 d, R4 s))
4069
4070 MIDFUNC(2,adc_w,(RW2 d, R2 s))
4071 {
4072 CLOBBER_ADC;
4073 s=readreg(s,2);
4074 d=rmw(d,2,2);
4075
4076 raw_adc_w(d,s);
4077 unlock2(d);
4078 unlock2(s);
4079 }
4080 MENDFUNC(2,adc_w,(RW2 d, R2 s))
4081
4082 MIDFUNC(2,adc_b,(RW1 d, R1 s))
4083 {
4084 CLOBBER_ADC;
4085 s=readreg(s,1);
4086 d=rmw(d,1,1);
4087
4088 raw_adc_b(d,s);
4089 unlock2(d);
4090 unlock2(s);
4091 }
4092 MENDFUNC(2,adc_b,(RW1 d, R1 s))
4093
4094 MIDFUNC(2,add_l,(RW4 d, R4 s))
4095 {
4096 if (isconst(s)) {
4097 COMPCALL(add_l_ri)(d,live.state[s].val);
4098 return;
4099 }
4100
4101 CLOBBER_ADD;
4102 s=readreg(s,4);
4103 d=rmw(d,4,4);
4104
4105 raw_add_l(d,s);
4106
4107 unlock2(d);
4108 unlock2(s);
4109 }
4110 MENDFUNC(2,add_l,(RW4 d, R4 s))
4111
4112 MIDFUNC(2,add_w,(RW2 d, R2 s))
4113 {
4114 if (isconst(s)) {
4115 COMPCALL(add_w_ri)(d,(uae_u16)live.state[s].val);
4116 return;
4117 }
4118
4119 CLOBBER_ADD;
4120 s=readreg(s,2);
4121 d=rmw(d,2,2);
4122
4123 raw_add_w(d,s);
4124 unlock2(d);
4125 unlock2(s);
4126 }
4127 MENDFUNC(2,add_w,(RW2 d, R2 s))
4128
4129 MIDFUNC(2,add_b,(RW1 d, R1 s))
4130 {
4131 if (isconst(s)) {
4132 COMPCALL(add_b_ri)(d,(uae_u8)live.state[s].val);
4133 return;
4134 }
4135
4136 CLOBBER_ADD;
4137 s=readreg(s,1);
4138 d=rmw(d,1,1);
4139
4140 raw_add_b(d,s);
4141 unlock2(d);
4142 unlock2(s);
4143 }
4144 MENDFUNC(2,add_b,(RW1 d, R1 s))
4145
4146 MIDFUNC(2,sub_l_ri,(RW4 d, IMM i))
4147 {
4148 if (!i && !needflags)
4149 return;
4150 if (isconst(d) && !needflags) {
4151 live.state[d].val-=i;
4152 return;
4153 }
4154 #if USE_OFFSET
4155 if (!needflags) {
4156 add_offset(d,-i);
4157 return;
4158 }
4159 #endif
4160
4161 CLOBBER_SUB;
4162 d=rmw(d,4,4);
4163
4164 raw_sub_l_ri(d,i);
4165 unlock2(d);
4166 }
4167 MENDFUNC(2,sub_l_ri,(RW4 d, IMM i))
4168
4169 MIDFUNC(2,sub_w_ri,(RW2 d, IMM i))
4170 {
4171 if (!i && !needflags)
4172 return;
4173
4174 CLOBBER_SUB;
4175 d=rmw(d,2,2);
4176
4177 raw_sub_w_ri(d,i);
4178 unlock2(d);
4179 }
4180 MENDFUNC(2,sub_w_ri,(RW2 d, IMM i))
4181
4182 MIDFUNC(2,sub_b_ri,(RW1 d, IMM i))
4183 {
4184 if (!i && !needflags)
4185 return;
4186
4187 CLOBBER_SUB;
4188 d=rmw(d,1,1);
4189
4190 raw_sub_b_ri(d,i);
4191
4192 unlock2(d);
4193 }
4194 MENDFUNC(2,sub_b_ri,(RW1 d, IMM i))
4195
4196 MIDFUNC(2,add_l_ri,(RW4 d, IMM i))
4197 {
4198 if (!i && !needflags)
4199 return;
4200 if (isconst(d) && !needflags) {
4201 live.state[d].val+=i;
4202 return;
4203 }
4204 #if USE_OFFSET
4205 if (!needflags) {
4206 add_offset(d,i);
4207 return;
4208 }
4209 #endif
4210 CLOBBER_ADD;
4211 d=rmw(d,4,4);
4212 raw_add_l_ri(d,i);
4213 unlock2(d);
4214 }
4215 MENDFUNC(2,add_l_ri,(RW4 d, IMM i))
4216
4217 MIDFUNC(2,add_w_ri,(RW2 d, IMM i))
4218 {
4219 if (!i && !needflags)
4220 return;
4221
4222 CLOBBER_ADD;
4223 d=rmw(d,2,2);
4224
4225 raw_add_w_ri(d,i);
4226 unlock2(d);
4227 }
4228 MENDFUNC(2,add_w_ri,(RW2 d, IMM i))
4229
4230 MIDFUNC(2,add_b_ri,(RW1 d, IMM i))
4231 {
4232 if (!i && !needflags)
4233 return;
4234
4235 CLOBBER_ADD;
4236 d=rmw(d,1,1);
4237
4238 raw_add_b_ri(d,i);
4239
4240 unlock2(d);
4241 }
4242 MENDFUNC(2,add_b_ri,(RW1 d, IMM i))
4243
4244 MIDFUNC(2,sbb_l,(RW4 d, R4 s))
4245 {
4246 CLOBBER_SBB;
4247 s=readreg(s,4);
4248 d=rmw(d,4,4);
4249
4250 raw_sbb_l(d,s);
4251 unlock2(d);
4252 unlock2(s);
4253 }
4254 MENDFUNC(2,sbb_l,(RW4 d, R4 s))
4255
4256 MIDFUNC(2,sbb_w,(RW2 d, R2 s))
4257 {
4258 CLOBBER_SBB;
4259 s=readreg(s,2);
4260 d=rmw(d,2,2);
4261
4262 raw_sbb_w(d,s);
4263 unlock2(d);
4264 unlock2(s);
4265 }
4266 MENDFUNC(2,sbb_w,(RW2 d, R2 s))
4267
4268 MIDFUNC(2,sbb_b,(RW1 d, R1 s))
4269 {
4270 CLOBBER_SBB;
4271 s=readreg(s,1);
4272 d=rmw(d,1,1);
4273
4274 raw_sbb_b(d,s);
4275 unlock2(d);
4276 unlock2(s);
4277 }
4278 MENDFUNC(2,sbb_b,(RW1 d, R1 s))
4279
4280 MIDFUNC(2,sub_l,(RW4 d, R4 s))
4281 {
4282 if (isconst(s)) {
4283 COMPCALL(sub_l_ri)(d,live.state[s].val);
4284 return;
4285 }
4286
4287 CLOBBER_SUB;
4288 s=readreg(s,4);
4289 d=rmw(d,4,4);
4290
4291 raw_sub_l(d,s);
4292 unlock2(d);
4293 unlock2(s);
4294 }
4295 MENDFUNC(2,sub_l,(RW4 d, R4 s))
4296
4297 MIDFUNC(2,sub_w,(RW2 d, R2 s))
4298 {
4299 if (isconst(s)) {
4300 COMPCALL(sub_w_ri)(d,(uae_u16)live.state[s].val);
4301 return;
4302 }
4303
4304 CLOBBER_SUB;
4305 s=readreg(s,2);
4306 d=rmw(d,2,2);
4307
4308 raw_sub_w(d,s);
4309 unlock2(d);
4310 unlock2(s);
4311 }
4312 MENDFUNC(2,sub_w,(RW2 d, R2 s))
4313
4314 MIDFUNC(2,sub_b,(RW1 d, R1 s))
4315 {
4316 if (isconst(s)) {
4317 COMPCALL(sub_b_ri)(d,(uae_u8)live.state[s].val);
4318 return;
4319 }
4320
4321 CLOBBER_SUB;
4322 s=readreg(s,1);
4323 d=rmw(d,1,1);
4324
4325 raw_sub_b(d,s);
4326 unlock2(d);
4327 unlock2(s);
4328 }
4329 MENDFUNC(2,sub_b,(RW1 d, R1 s))
4330
4331 MIDFUNC(2,cmp_l,(R4 d, R4 s))
4332 {
4333 CLOBBER_CMP;
4334 s=readreg(s,4);
4335 d=readreg(d,4);
4336
4337 raw_cmp_l(d,s);
4338 unlock2(d);
4339 unlock2(s);
4340 }
4341 MENDFUNC(2,cmp_l,(R4 d, R4 s))
4342
4343 MIDFUNC(2,cmp_l_ri,(R4 r, IMM i))
4344 {
4345 CLOBBER_CMP;
4346 r=readreg(r,4);
4347
4348 raw_cmp_l_ri(r,i);
4349 unlock2(r);
4350 }
4351 MENDFUNC(2,cmp_l_ri,(R4 r, IMM i))
4352
4353 MIDFUNC(2,cmp_w,(R2 d, R2 s))
4354 {
4355 CLOBBER_CMP;
4356 s=readreg(s,2);
4357 d=readreg(d,2);
4358
4359 raw_cmp_w(d,s);
4360 unlock2(d);
4361 unlock2(s);
4362 }
4363 MENDFUNC(2,cmp_w,(R2 d, R2 s))
4364
4365 MIDFUNC(2,cmp_b,(R1 d, R1 s))
4366 {
4367 CLOBBER_CMP;
4368 s=readreg(s,1);
4369 d=readreg(d,1);
4370
4371 raw_cmp_b(d,s);
4372 unlock2(d);
4373 unlock2(s);
4374 }
4375 MENDFUNC(2,cmp_b,(R1 d, R1 s))
4376
4377
4378 MIDFUNC(2,xor_l,(RW4 d, R4 s))
4379 {
4380 CLOBBER_XOR;
4381 s=readreg(s,4);
4382 d=rmw(d,4,4);
4383
4384 raw_xor_l(d,s);
4385 unlock2(d);
4386 unlock2(s);
4387 }
4388 MENDFUNC(2,xor_l,(RW4 d, R4 s))
4389
4390 MIDFUNC(2,xor_w,(RW2 d, R2 s))
4391 {
4392 CLOBBER_XOR;
4393 s=readreg(s,2);
4394 d=rmw(d,2,2);
4395
4396 raw_xor_w(d,s);
4397 unlock2(d);
4398 unlock2(s);
4399 }
4400 MENDFUNC(2,xor_w,(RW2 d, R2 s))
4401
4402 MIDFUNC(2,xor_b,(RW1 d, R1 s))
4403 {
4404 CLOBBER_XOR;
4405 s=readreg(s,1);
4406 d=rmw(d,1,1);
4407
4408 raw_xor_b(d,s);
4409 unlock2(d);
4410 unlock2(s);
4411 }
4412 MENDFUNC(2,xor_b,(RW1 d, R1 s))
4413
4414 MIDFUNC(5,call_r_11,(W4 out1, R4 r, R4 in1, IMM osize, IMM isize))
4415 {
4416 clobber_flags();
4417 remove_all_offsets();
4418 if (osize==4) {
4419 if (out1!=in1 && out1!=r) {
4420 COMPCALL(forget_about)(out1);
4421 }
4422 }
4423 else {
4424 tomem_c(out1);
4425 }
4426
4427 in1=readreg_specific(in1,isize,REG_PAR1);
4428 r=readreg(r,4);
4429 prepare_for_call_1(); /* This should ensure that there won't be
4430 any need for swapping nregs in prepare_for_call_2
4431 */
4432 #if USE_NORMAL_CALLING_CONVENTION
4433 raw_push_l_r(in1);
4434 #endif
4435 unlock2(in1);
4436 unlock2(r);
4437
4438 prepare_for_call_2();
4439 raw_call_r(r);
4440
4441 #if USE_NORMAL_CALLING_CONVENTION
4442 raw_inc_sp(4);
4443 #endif
4444
4445
4446 live.nat[REG_RESULT].holds[0]=out1;
4447 live.nat[REG_RESULT].nholds=1;
4448 live.nat[REG_RESULT].touched=touchcnt++;
4449
4450 live.state[out1].realreg=REG_RESULT;
4451 live.state[out1].realind=0;
4452 live.state[out1].val=0;
4453 live.state[out1].validsize=osize;
4454 live.state[out1].dirtysize=osize;
4455 set_status(out1,DIRTY);
4456 }
4457 MENDFUNC(5,call_r_11,(W4 out1, R4 r, R4 in1, IMM osize, IMM isize))
4458
4459 MIDFUNC(5,call_r_02,(R4 r, R4 in1, R4 in2, IMM isize1, IMM isize2))
4460 {
4461 clobber_flags();
4462 remove_all_offsets();
4463 in1=readreg_specific(in1,isize1,REG_PAR1);
4464 in2=readreg_specific(in2,isize2,REG_PAR2);
4465 r=readreg(r,4);
4466 prepare_for_call_1(); /* This should ensure that there won't be
4467 any need for swapping nregs in prepare_for_call_2
4468 */
4469 #if USE_NORMAL_CALLING_CONVENTION
4470 raw_push_l_r(in2);
4471 raw_push_l_r(in1);
4472 #endif
4473 unlock2(r);
4474 unlock2(in1);
4475 unlock2(in2);
4476 prepare_for_call_2();
4477 raw_call_r(r);
4478 #if USE_NORMAL_CALLING_CONVENTION
4479 raw_inc_sp(8);
4480 #endif
4481 }
4482 MENDFUNC(5,call_r_02,(R4 r, R4 in1, R4 in2, IMM isize1, IMM isize2))
4483
4484 /* forget_about() takes a mid-layer register */
4485 MIDFUNC(1,forget_about,(W4 r))
4486 {
4487 if (isinreg(r))
4488 disassociate(r);
4489 live.state[r].val=0;
4490 set_status(r,UNDEF);
4491 }
4492 MENDFUNC(1,forget_about,(W4 r))
4493
4494 MIDFUNC(0,nop,(void))
4495 {
4496 raw_nop();
4497 }
4498 MENDFUNC(0,nop,(void))
4499
4500
4501 MIDFUNC(1,f_forget_about,(FW r))
4502 {
4503 if (f_isinreg(r))
4504 f_disassociate(r);
4505 live.fate[r].status=UNDEF;
4506 }
4507 MENDFUNC(1,f_forget_about,(FW r))
4508
4509 MIDFUNC(1,fmov_pi,(FW r))
4510 {
4511 r=f_writereg(r);
4512 raw_fmov_pi(r);
4513 f_unlock(r);
4514 }
4515 MENDFUNC(1,fmov_pi,(FW r))
4516
4517 MIDFUNC(1,fmov_log10_2,(FW r))
4518 {
4519 r=f_writereg(r);
4520 raw_fmov_log10_2(r);
4521 f_unlock(r);
4522 }
4523 MENDFUNC(1,fmov_log10_2,(FW r))
4524
4525 MIDFUNC(1,fmov_log2_e,(FW r))
4526 {
4527 r=f_writereg(r);
4528 raw_fmov_log2_e(r);
4529 f_unlock(r);
4530 }
4531 MENDFUNC(1,fmov_log2_e,(FW r))
4532
4533 MIDFUNC(1,fmov_loge_2,(FW r))
4534 {
4535 r=f_writereg(r);
4536 raw_fmov_loge_2(r);
4537 f_unlock(r);
4538 }
4539 MENDFUNC(1,fmov_loge_2,(FW r))
4540
4541 MIDFUNC(1,fmov_1,(FW r))
4542 {
4543 r=f_writereg(r);
4544 raw_fmov_1(r);
4545 f_unlock(r);
4546 }
4547 MENDFUNC(1,fmov_1,(FW r))
4548
4549 MIDFUNC(1,fmov_0,(FW r))
4550 {
4551 r=f_writereg(r);
4552 raw_fmov_0(r);
4553 f_unlock(r);
4554 }
4555 MENDFUNC(1,fmov_0,(FW r))
4556
4557 MIDFUNC(2,fmov_rm,(FW r, MEMR m))
4558 {
4559 r=f_writereg(r);
4560 raw_fmov_rm(r,m);
4561 f_unlock(r);
4562 }
4563 MENDFUNC(2,fmov_rm,(FW r, MEMR m))
4564
4565 MIDFUNC(2,fmovi_rm,(FW r, MEMR m))
4566 {
4567 r=f_writereg(r);
4568 raw_fmovi_rm(r,m);
4569 f_unlock(r);
4570 }
4571 MENDFUNC(2,fmovi_rm,(FW r, MEMR m))
4572
4573 MIDFUNC(2,fmovi_mr,(MEMW m, FR r))
4574 {
4575 r=f_readreg(r);
4576 raw_fmovi_mr(m,r);
4577 f_unlock(r);
4578 }
4579 MENDFUNC(2,fmovi_mr,(MEMW m, FR r))
4580
4581 MIDFUNC(2,fmovs_rm,(FW r, MEMR m))
4582 {
4583 r=f_writereg(r);
4584 raw_fmovs_rm(r,m);
4585 f_unlock(r);
4586 }
4587 MENDFUNC(2,fmovs_rm,(FW r, MEMR m))
4588
4589 MIDFUNC(2,fmovs_mr,(MEMW m, FR r))
4590 {
4591 r=f_readreg(r);
4592 raw_fmovs_mr(m,r);
4593 f_unlock(r);
4594 }
4595 MENDFUNC(2,fmovs_mr,(MEMW m, FR r))
4596
4597 MIDFUNC(2,fmov_ext_mr,(MEMW m, FR r))
4598 {
4599 r=f_readreg(r);
4600 raw_fmov_ext_mr(m,r);
4601 f_unlock(r);
4602 }
4603 MENDFUNC(2,fmov_ext_mr,(MEMW m, FR r))
4604
4605 MIDFUNC(2,fmov_mr,(MEMW m, FR r))
4606 {
4607 r=f_readreg(r);
4608 raw_fmov_mr(m,r);
4609 f_unlock(r);
4610 }
4611 MENDFUNC(2,fmov_mr,(MEMW m, FR r))
4612
4613 MIDFUNC(2,fmov_ext_rm,(FW r, MEMR m))
4614 {
4615 r=f_writereg(r);
4616 raw_fmov_ext_rm(r,m);
4617 f_unlock(r);
4618 }
4619 MENDFUNC(2,fmov_ext_rm,(FW r, MEMR m))
4620
4621 MIDFUNC(2,fmov_rr,(FW d, FR s))
4622 {
4623 if (d==s) { /* How pointless! */
4624 return;
4625 }
4626 #if USE_F_ALIAS
4627 f_disassociate(d);
4628 s=f_readreg(s);
4629 live.fate[d].realreg=s;
4630 live.fate[d].realind=live.fat[s].nholds;
4631 live.fate[d].status=DIRTY;
4632 live.fat[s].holds[live.fat[s].nholds]=d;
4633 live.fat[s].nholds++;
4634 f_unlock(s);
4635 #else
4636 s=f_readreg(s);
4637 d=f_writereg(d);
4638 raw_fmov_rr(d,s);
4639 f_unlock(s);
4640 f_unlock(d);
4641 #endif
4642 }
4643 MENDFUNC(2,fmov_rr,(FW d, FR s))
4644
4645 MIDFUNC(2,fldcw_m_indexed,(R4 index, IMM base))
4646 {
4647 index=readreg(index,4);
4648
4649 raw_fldcw_m_indexed(index,base);
4650 unlock2(index);
4651 }
4652 MENDFUNC(2,fldcw_m_indexed,(R4 index, IMM base))
4653
4654 MIDFUNC(1,ftst_r,(FR r))
4655 {
4656 r=f_readreg(r);
4657 raw_ftst_r(r);
4658 f_unlock(r);
4659 }
4660 MENDFUNC(1,ftst_r,(FR r))
4661
4662 MIDFUNC(0,dont_care_fflags,(void))
4663 {
4664 f_disassociate(FP_RESULT);
4665 }
4666 MENDFUNC(0,dont_care_fflags,(void))
4667
4668 MIDFUNC(2,fsqrt_rr,(FW d, FR s))
4669 {
4670 s=f_readreg(s);
4671 d=f_writereg(d);
4672 raw_fsqrt_rr(d,s);
4673 f_unlock(s);
4674 f_unlock(d);
4675 }
4676 MENDFUNC(2,fsqrt_rr,(FW d, FR s))
4677
4678 MIDFUNC(2,fabs_rr,(FW d, FR s))
4679 {
4680 s=f_readreg(s);
4681 d=f_writereg(d);
4682 raw_fabs_rr(d,s);
4683 f_unlock(s);
4684 f_unlock(d);
4685 }
4686 MENDFUNC(2,fabs_rr,(FW d, FR s))
4687
4688 MIDFUNC(2,fsin_rr,(FW d, FR s))
4689 {
4690 s=f_readreg(s);
4691 d=f_writereg(d);
4692 raw_fsin_rr(d,s);
4693 f_unlock(s);
4694 f_unlock(d);
4695 }
4696 MENDFUNC(2,fsin_rr,(FW d, FR s))
4697
4698 MIDFUNC(2,fcos_rr,(FW d, FR s))
4699 {
4700 s=f_readreg(s);
4701 d=f_writereg(d);
4702 raw_fcos_rr(d,s);
4703 f_unlock(s);
4704 f_unlock(d);
4705 }
4706 MENDFUNC(2,fcos_rr,(FW d, FR s))
4707
4708 MIDFUNC(2,ftwotox_rr,(FW d, FR s))
4709 {
4710 s=f_readreg(s);
4711 d=f_writereg(d);
4712 raw_ftwotox_rr(d,s);
4713 f_unlock(s);
4714 f_unlock(d);
4715 }
4716 MENDFUNC(2,ftwotox_rr,(FW d, FR s))
4717
4718 MIDFUNC(2,fetox_rr,(FW d, FR s))
4719 {
4720 s=f_readreg(s);
4721 d=f_writereg(d);
4722 raw_fetox_rr(d,s);
4723 f_unlock(s);
4724 f_unlock(d);
4725 }
4726 MENDFUNC(2,fetox_rr,(FW d, FR s))
4727
4728 MIDFUNC(2,frndint_rr,(FW d, FR s))
4729 {
4730 s=f_readreg(s);
4731 d=f_writereg(d);
4732 raw_frndint_rr(d,s);
4733 f_unlock(s);
4734 f_unlock(d);
4735 }
4736 MENDFUNC(2,frndint_rr,(FW d, FR s))
4737
4738 MIDFUNC(2,flog2_rr,(FW d, FR s))
4739 {
4740 s=f_readreg(s);
4741 d=f_writereg(d);
4742 raw_flog2_rr(d,s);
4743 f_unlock(s);
4744 f_unlock(d);
4745 }
4746 MENDFUNC(2,flog2_rr,(FW d, FR s))
4747
4748 MIDFUNC(2,fneg_rr,(FW d, FR s))
4749 {
4750 s=f_readreg(s);
4751 d=f_writereg(d);
4752 raw_fneg_rr(d,s);
4753 f_unlock(s);
4754 f_unlock(d);
4755 }
4756 MENDFUNC(2,fneg_rr,(FW d, FR s))
4757
4758 MIDFUNC(2,fadd_rr,(FRW d, FR s))
4759 {
4760 s=f_readreg(s);
4761 d=f_rmw(d);
4762 raw_fadd_rr(d,s);
4763 f_unlock(s);
4764 f_unlock(d);
4765 }
4766 MENDFUNC(2,fadd_rr,(FRW d, FR s))
4767
4768 MIDFUNC(2,fsub_rr,(FRW d, FR s))
4769 {
4770 s=f_readreg(s);
4771 d=f_rmw(d);
4772 raw_fsub_rr(d,s);
4773 f_unlock(s);
4774 f_unlock(d);
4775 }
4776 MENDFUNC(2,fsub_rr,(FRW d, FR s))
4777
4778 MIDFUNC(2,fcmp_rr,(FR d, FR s))
4779 {
4780 d=f_readreg(d);
4781 s=f_readreg(s);
4782 raw_fcmp_rr(d,s);
4783 f_unlock(s);
4784 f_unlock(d);
4785 }
4786 MENDFUNC(2,fcmp_rr,(FR d, FR s))
4787
4788 MIDFUNC(2,fdiv_rr,(FRW d, FR s))
4789 {
4790 s=f_readreg(s);
4791 d=f_rmw(d);
4792 raw_fdiv_rr(d,s);
4793 f_unlock(s);
4794 f_unlock(d);
4795 }
4796 MENDFUNC(2,fdiv_rr,(FRW d, FR s))
4797
4798 MIDFUNC(2,frem_rr,(FRW d, FR s))
4799 {
4800 s=f_readreg(s);
4801 d=f_rmw(d);
4802 raw_frem_rr(d,s);
4803 f_unlock(s);
4804 f_unlock(d);
4805 }
4806 MENDFUNC(2,frem_rr,(FRW d, FR s))
4807
4808 MIDFUNC(2,frem1_rr,(FRW d, FR s))
4809 {
4810 s=f_readreg(s);
4811 d=f_rmw(d);
4812 raw_frem1_rr(d,s);
4813 f_unlock(s);
4814 f_unlock(d);
4815 }
4816 MENDFUNC(2,frem1_rr,(FRW d, FR s))
4817
4818 MIDFUNC(2,fmul_rr,(FRW d, FR s))
4819 {
4820 s=f_readreg(s);
4821 d=f_rmw(d);
4822 raw_fmul_rr(d,s);
4823 f_unlock(s);
4824 f_unlock(d);
4825 }
4826 MENDFUNC(2,fmul_rr,(FRW d, FR s))
4827
4828 /********************************************************************
4829 * Support functions exposed to gencomp. CREATE time *
4830 ********************************************************************/
4831
4832 int kill_rodent(int r)
4833 {
4834 return KILLTHERAT &&
4835 have_rat_stall &&
4836 (live.state[r].status==INMEM ||
4837 live.state[r].status==CLEAN ||
4838 live.state[r].status==ISCONST ||
4839 live.state[r].dirtysize==4);
4840 }
4841
4842 uae_u32 get_const(int r)
4843 {
4844 Dif (!isconst(r)) {
4845 write_log("Register %d should be constant, but isn't\n",r);
4846 abort();
4847 }
4848 return live.state[r].val;
4849 }
4850
4851 void sync_m68k_pc(void)
4852 {
4853 if (m68k_pc_offset) {
4854 add_l_ri(PC_P,m68k_pc_offset);
4855 comp_pc_p+=m68k_pc_offset;
4856 m68k_pc_offset=0;
4857 }
4858 }
4859
4860 /********************************************************************
4861 * Scratch registers management *
4862 ********************************************************************/
4863
4864 struct scratch_t {
4865 uae_u32 regs[VREGS];
4866 fpu_register fregs[VFREGS];
4867 };
4868
4869 static scratch_t scratch;
4870
4871 /********************************************************************
4872 * Support functions exposed to newcpu *
4873 ********************************************************************/
4874
4875 static inline const char *str_on_off(bool b)
4876 {
4877 return b ? "on" : "off";
4878 }
4879
4880 void compiler_init(void)
4881 {
4882 static bool initialized = false;
4883 if (initialized)
4884 return;
4885
4886 #ifndef WIN32
4887 // Open /dev/zero
4888 zero_fd = open("/dev/zero", O_RDWR);
4889 if (zero_fd < 0) {
4890 char str[200];
4891 sprintf(str, GetString(STR_NO_DEV_ZERO_ERR), strerror(errno));
4892 ErrorAlert(str);
4893 QuitEmulator();
4894 }
4895 #endif
4896
4897 #if JIT_DEBUG
4898 // JIT debug mode ?
4899 JITDebug = PrefsFindBool("jitdebug");
4900 #endif
4901 write_log("<JIT compiler> : enable runtime disassemblers : %s\n", JITDebug ? "yes" : "no");
4902
4903 #ifdef USE_JIT_FPU
4904 // Use JIT compiler for FPU instructions ?
4905 avoid_fpu = !PrefsFindBool("jitfpu");
4906 #else
4907 // JIT FPU is always disabled
4908 avoid_fpu = true;
4909 #endif
4910 write_log("<JIT compiler> : compile FPU instructions : %s\n", !avoid_fpu ? "yes" : "no");
4911
4912 // Get size of the translation cache (in KB)
4913 cache_size = PrefsFindInt32("jitcachesize");
4914 write_log("<JIT compiler> : requested translation cache size : %d KB\n", cache_size);
4915
4916 // Initialize target CPU (check for features, e.g. CMOV, rat stalls)
4917 raw_init_cpu();
4918 setzflg_uses_bsf = target_check_bsf();
4919 write_log("<JIT compiler> : target processor has CMOV instructions : %s\n", have_cmov ? "yes" : "no");
4920 write_log("<JIT compiler> : target processor can suffer from partial register stalls : %s\n", have_rat_stall ? "yes" : "no");
4921 write_log("<JIT compiler> : alignment for loops, jumps are %d, %d\n", align_loops, align_jumps);
4922
4923 // Translation cache flush mechanism
4924 lazy_flush = PrefsFindBool("jitlazyflush");
4925 write_log("<JIT compiler> : lazy translation cache invalidation : %s\n", str_on_off(lazy_flush));
4926 flush_icache = lazy_flush ? flush_icache_lazy : flush_icache_hard;
4927
4928 // Compiler features
4929 write_log("<JIT compiler> : register aliasing : %s\n", str_on_off(1));
4930 write_log("<JIT compiler> : FP register aliasing : %s\n", str_on_off(USE_F_ALIAS));
4931 write_log("<JIT compiler> : lazy constant offsetting : %s\n", str_on_off(USE_OFFSET));
4932 write_log("<JIT compiler> : block inlining : %s\n", str_on_off(USE_INLINING));
4933 write_log("<JIT compiler> : separate blockinfo allocation : %s\n", str_on_off(USE_SEPARATE_BIA));
4934
4935 // Build compiler tables
4936 build_comp();
4937
4938 initialized = true;
4939
4940 #if PROFILE_UNTRANSLATED_INSNS
4941 write_log("<JIT compiler> : gather statistics on untranslated insns count\n");
4942 #endif
4943
4944 #if PROFILE_COMPILE_TIME
4945 write_log("<JIT compiler> : gather statistics on translation time\n");
4946 emul_start_time = clock();
4947 #endif
4948 }
4949
4950 void compiler_exit(void)
4951 {
4952 #if PROFILE_COMPILE_TIME
4953 emul_end_time = clock();
4954 #endif
4955
4956 // Deallocate translation cache
4957 if (compiled_code) {
4958 vm_release(compiled_code, cache_size * 1024);
4959 compiled_code = 0;
4960 }
4961
4962 #ifndef WIN32
4963 // Close /dev/zero
4964 if (zero_fd > 0)
4965 close(zero_fd);
4966 #endif
4967
4968 #if PROFILE_COMPILE_TIME
4969 write_log("### Compile Block statistics\n");
4970 write_log("Number of calls to compile_block : %d\n", compile_count);
4971 uae_u32 emul_time = emul_end_time - emul_start_time;
4972 write_log("Total emulation time : %.1f sec\n", double(emul_time)/double(CLOCKS_PER_SEC));
4973 write_log("Total compilation time : %.1f sec (%.1f%%)\n", double(compile_time)/double(CLOCKS_PER_SEC),
4974 100.0*double(compile_time)/double(emul_time));
4975 write_log("\n");
4976 #endif
4977
4978 #if PROFILE_UNTRANSLATED_INSNS
4979 uae_u64 untranslated_count = 0;
4980 for (int i = 0; i < 65536; i++) {
4981 opcode_nums[i] = i;
4982 untranslated_count += raw_cputbl_count[i];
4983 }
4984 write_log("Sorting out untranslated instructions count...\n");
4985 qsort(opcode_nums, 65536, sizeof(uae_u16), untranslated_compfn);
4986 write_log("\nRank Opc Count Name\n");
4987 for (int i = 0; i < untranslated_top_ten; i++) {
4988 uae_u32 count = raw_cputbl_count[opcode_nums[i]];
4989 struct instr *dp;
4990 struct mnemolookup *lookup;
4991 if (!count)
4992 break;
4993 dp = table68k + opcode_nums[i];
4994 for (lookup = lookuptab; lookup->mnemo != dp->mnemo; lookup++)
4995 ;
4996 write_log("%03d: %04x %10lu %s\n", i, opcode_nums[i], count, lookup->name);
4997 }
4998 #endif
4999 }
5000
5001 bool compiler_use_jit(void)
5002 {
5003 // Check for the "jit" prefs item
5004 if (!PrefsFindBool("jit"))
5005 return false;
5006
5007 // Don't use JIT if translation cache size is less then MIN_CACHE_SIZE KB
5008 if (PrefsFindInt32("jitcachesize") < MIN_CACHE_SIZE) {
5009 write_log("<JIT compiler> : translation cache size is less than %d KB. Disabling JIT.\n", MIN_CACHE_SIZE);
5010 return false;
5011 }
5012
5013 // FIXME: there are currently problems with JIT compilation and anything below a 68040
5014 if (CPUType < 4) {
5015 write_log("<JIT compiler> : 68040 emulation is required instead of 680%d0. Disabling JIT.\n", CPUType);
5016 return false;
5017 }
5018
5019 return true;
5020 }
5021
5022 void init_comp(void)
5023 {
5024 int i;
5025 uae_s8* cb=can_byte;
5026 uae_s8* cw=can_word;
5027 uae_s8* au=always_used;
5028
5029 for (i=0;i<VREGS;i++) {
5030 live.state[i].realreg=-1;
5031 live.state[i].needflush=NF_SCRATCH;
5032 live.state[i].val=0;
5033 set_status(i,UNDEF);
5034 }
5035
5036 for (i=0;i<VFREGS;i++) {
5037 live.fate[i].status=UNDEF;
5038 live.fate[i].realreg=-1;
5039 live.fate[i].needflush=NF_SCRATCH;
5040 }
5041
5042 for (i=0;i<VREGS;i++) {
5043 if (i<16) { /* First 16 registers map to 68k registers */
5044 live.state[i].mem=((uae_u32*)&regs)+i;
5045 live.state[i].needflush=NF_TOMEM;
5046 set_status(i,INMEM);
5047 }
5048 else
5049 live.state[i].mem=scratch.regs+i;
5050 }
5051 live.state[PC_P].mem=(uae_u32*)&(regs.pc_p);
5052 live.state[PC_P].needflush=NF_TOMEM;
5053 set_const(PC_P,(uae_u32)comp_pc_p);
5054
5055 live.state[FLAGX].mem=&(regflags.x);
5056 live.state[FLAGX].needflush=NF_TOMEM;
5057 set_status(FLAGX,INMEM);
5058
5059 live.state[FLAGTMP].mem=&(regflags.cznv);
5060 live.state[FLAGTMP].needflush=NF_TOMEM;
5061 set_status(FLAGTMP,INMEM);
5062
5063 live.state[NEXT_HANDLER].needflush=NF_HANDLER;
5064 set_status(NEXT_HANDLER,UNDEF);
5065
5066 for (i=0;i<VFREGS;i++) {
5067 if (i<8) { /* First 8 registers map to 68k FPU registers */
5068 live.fate[i].mem=(uae_u32*)fpu_register_address(i);
5069 live.fate[i].needflush=NF_TOMEM;
5070 live.fate[i].status=INMEM;
5071 }
5072 else if (i==FP_RESULT) {
5073 live.fate[i].mem=(uae_u32*)(&fpu.result);
5074 live.fate[i].needflush=NF_TOMEM;
5075 live.fate[i].status=INMEM;
5076 }
5077 else
5078 live.fate[i].mem=(uae_u32*)(scratch.fregs+i);
5079 }
5080
5081
5082 for (i=0;i<N_REGS;i++) {
5083 live.nat[i].touched=0;
5084 live.nat[i].nholds=0;
5085 live.nat[i].locked=0;
5086 if (*cb==i) {
5087 live.nat[i].canbyte=1; cb++;
5088 } else live.nat[i].canbyte=0;
5089 if (*cw==i) {
5090 live.nat[i].canword=1; cw++;
5091 } else live.nat[i].canword=0;
5092 if (*au==i) {
5093 live.nat[i].locked=1; au++;
5094 }
5095 }
5096
5097 for (i=0;i<N_FREGS;i++) {
5098 live.fat[i].touched=0;
5099 live.fat[i].nholds=0;
5100 live.fat[i].locked=0;
5101 }
5102
5103 touchcnt=1;
5104 m68k_pc_offset=0;
5105 live.flags_in_flags=TRASH;
5106 live.flags_on_stack=VALID;
5107 live.flags_are_important=1;
5108
5109 raw_fp_init();
5110 }
5111
5112 /* Only do this if you really mean it! The next call should be to init!*/
5113 void flush(int save_regs)
5114 {
5115 int fi,i;
5116
5117 log_flush();
5118 flush_flags(); /* low level */
5119 sync_m68k_pc(); /* mid level */
5120
5121 if (save_regs) {
5122 for (i=0;i<VFREGS;i++) {
5123 if (live.fate[i].needflush==NF_SCRATCH ||
5124 live.fate[i].status==CLEAN) {
5125 f_disassociate(i);
5126 }
5127 }
5128 for (i=0;i<VREGS;i++) {
5129 if (live.state[i].needflush==NF_TOMEM) {
5130 switch(live.state[i].status) {
5131 case INMEM:
5132 if (live.state[i].val) {
5133 raw_add_l_mi((uae_u32)live.state[i].mem,live.state[i].val);
5134 log_vwrite(i);
5135 live.state[i].val=0;
5136 }
5137 break;
5138 case CLEAN:
5139 case DIRTY:
5140 remove_offset(i,-1); tomem(i); break;
5141 case ISCONST:
5142 if (i!=PC_P)
5143 writeback_const(i);
5144 break;
5145 default: break;
5146 }
5147 Dif (live.state[i].val && i!=PC_P) {
5148 write_log("Register %d still has val %x\n",
5149 i,live.state[i].val);
5150 }
5151 }
5152 }
5153 for (i=0;i<VFREGS;i++) {
5154 if (live.fate[i].needflush==NF_TOMEM &&
5155 live.fate[i].status==DIRTY) {
5156 f_evict(i);
5157 }
5158 }
5159 raw_fp_cleanup_drop();
5160 }
5161 if (needflags) {
5162 write_log("Warning! flush with needflags=1!\n");
5163 }
5164 }
5165
5166 static void flush_keepflags(void)
5167 {
5168 int fi,i;
5169
5170 for (i=0;i<VFREGS;i++) {
5171 if (live.fate[i].needflush==NF_SCRATCH ||
5172 live.fate[i].status==CLEAN) {
5173 f_disassociate(i);
5174 }
5175 }
5176 for (i=0;i<VREGS;i++) {
5177 if (live.state[i].needflush==NF_TOMEM) {
5178 switch(live.state[i].status) {
5179 case INMEM:
5180 /* Can't adjust the offset here --- that needs "add" */
5181 break;
5182 case CLEAN:
5183 case DIRTY:
5184 remove_offset(i,-1); tomem(i); break;
5185 case ISCONST:
5186 if (i!=PC_P)
5187 writeback_const(i);
5188 break;
5189 default: break;
5190 }
5191 }
5192 }
5193 for (i=0;i<VFREGS;i++) {
5194 if (live.fate[i].needflush==NF_TOMEM &&
5195 live.fate[i].status==DIRTY) {
5196 f_evict(i);
5197 }
5198 }
5199 raw_fp_cleanup_drop();
5200 }
5201
5202 void freescratch(void)
5203 {
5204 int i;
5205 for (i=0;i<N_REGS;i++)
5206 if (live.nat[i].locked && i!=4)
5207 write_log("Warning! %d is locked\n",i);
5208
5209 for (i=0;i<VREGS;i++)
5210 if (live.state[i].needflush==NF_SCRATCH) {
5211 forget_about(i);
5212 }
5213
5214 for (i=0;i<VFREGS;i++)
5215 if (live.fate[i].needflush==NF_SCRATCH) {
5216 f_forget_about(i);
5217 }
5218 }
5219
5220 /********************************************************************
5221 * Support functions, internal *
5222 ********************************************************************/
5223
5224
5225 static void align_target(uae_u32 a)
5226 {
5227 if (!a)
5228 return;
5229
5230 if (tune_nop_fillers)
5231 raw_emit_nop_filler(a - (((uae_u32)target) & (a - 1)));
5232 else {
5233 /* Fill with NOPs --- makes debugging with gdb easier */
5234 while ((uae_u32)target&(a-1))
5235 *target++=0x90;
5236 }
5237 }
5238
5239 static __inline__ int isinrom(uintptr addr)
5240 {
5241 return ((addr >= (uintptr)ROMBaseHost) && (addr < (uintptr)ROMBaseHost + ROMSize));
5242 }
5243
5244 static void flush_all(void)
5245 {
5246 int i;
5247
5248 log_flush();
5249 for (i=0;i<VREGS;i++)
5250 if (live.state[i].status==DIRTY) {
5251 if (!call_saved[live.state[i].realreg]) {
5252 tomem(i);
5253 }
5254 }
5255 for (i=0;i<VFREGS;i++)
5256 if (f_isinreg(i))
5257 f_evict(i);
5258 raw_fp_cleanup_drop();
5259 }
5260
5261 /* Make sure all registers that will get clobbered by a call are
5262 save and sound in memory */
5263 static void prepare_for_call_1(void)
5264 {
5265 flush_all(); /* If there are registers that don't get clobbered,
5266 * we should be a bit more selective here */
5267 }
5268
5269 /* We will call a C routine in a moment. That will clobber all registers,
5270 so we need to disassociate everything */
5271 static void prepare_for_call_2(void)
5272 {
5273 int i;
5274 for (i=0;i<N_REGS;i++)
5275 if (!call_saved[i] && live.nat[i].nholds>0)
5276 free_nreg(i);
5277
5278 for (i=0;i<N_FREGS;i++)
5279 if (live.fat[i].nholds>0)
5280 f_free_nreg(i);
5281
5282 live.flags_in_flags=TRASH; /* Note: We assume we already rescued the
5283 flags at the very start of the call_r
5284 functions! */
5285 }
5286
5287 /********************************************************************
5288 * Memory access and related functions, CREATE time *
5289 ********************************************************************/
5290
5291 void register_branch(uae_u32 not_taken, uae_u32 taken, uae_u8 cond)
5292 {
5293 next_pc_p=not_taken;
5294 taken_pc_p=taken;
5295 branch_cc=cond;
5296 }
5297
5298
5299 static uae_u32 get_handler_address(uae_u32 addr)
5300 {
5301 uae_u32 cl=cacheline(addr);
5302 blockinfo* bi=get_blockinfo_addr_new((void*)addr,0);
5303 return (uae_u32)&(bi->direct_handler_to_use);
5304 }
5305
5306 static uae_u32 get_handler(uae_u32 addr)
5307 {
5308 uae_u32 cl=cacheline(addr);
5309 blockinfo* bi=get_blockinfo_addr_new((void*)addr,0);
5310 return (uae_u32)bi->direct_handler_to_use;
5311 }
5312
5313 static void load_handler(int reg, uae_u32 addr)
5314 {
5315 mov_l_rm(reg,get_handler_address(addr));
5316 }
5317
5318 /* This version assumes that it is writing *real* memory, and *will* fail
5319 * if that assumption is wrong! No branches, no second chances, just
5320 * straight go-for-it attitude */
5321
5322 static void writemem_real(int address, int source, int offset, int size, int tmp, int clobber)
5323 {
5324 int f=tmp;
5325
5326 if (clobber)
5327 f=source;
5328 switch(size) {
5329 case 1: mov_b_bRr(address,source,MEMBaseDiff); break;
5330 case 2: mov_w_rr(f,source); bswap_16(f); mov_w_bRr(address,f,MEMBaseDiff); break;
5331 case 4: mov_l_rr(f,source); bswap_32(f); mov_l_bRr(address,f,MEMBaseDiff); break;
5332 }
5333 forget_about(tmp);
5334 forget_about(f);
5335 }
5336
5337 void writebyte(int address, int source, int tmp)
5338 {
5339 writemem_real(address,source,20,1,tmp,0);
5340 }
5341
5342 static __inline__ void writeword_general(int address, int source, int tmp,
5343 int clobber)
5344 {
5345 writemem_real(address,source,16,2,tmp,clobber);
5346 }
5347
5348 void writeword_clobber(int address, int source, int tmp)
5349 {
5350 writeword_general(address,source,tmp,1);
5351 }
5352
5353 void writeword(int address, int source, int tmp)
5354 {
5355 writeword_general(address,source,tmp,0);
5356 }
5357
5358 static __inline__ void writelong_general(int address, int source, int tmp,
5359 int clobber)
5360 {
5361 writemem_real(address,source,12,4,tmp,clobber);
5362 }
5363
5364 void writelong_clobber(int address, int source, int tmp)
5365 {
5366 writelong_general(address,source,tmp,1);
5367 }
5368
5369 void writelong(int address, int source, int tmp)
5370 {
5371 writelong_general(address,source,tmp,0);
5372 }
5373
5374
5375
5376 /* This version assumes that it is reading *real* memory, and *will* fail
5377 * if that assumption is wrong! No branches, no second chances, just
5378 * straight go-for-it attitude */
5379
5380 static void readmem_real(int address, int dest, int offset, int size, int tmp)
5381 {
5382 int f=tmp;
5383
5384 if (size==4 && address!=dest)
5385 f=dest;
5386
5387 switch(size) {
5388 case 1: mov_b_brR(dest,address,MEMBaseDiff); break;
5389 case 2: mov_w_brR(dest,address,MEMBaseDiff); bswap_16(dest); break;
5390 case 4: mov_l_brR(dest,address,MEMBaseDiff); bswap_32(dest); break;
5391 }
5392 forget_about(tmp);
5393 }
5394
5395 void readbyte(int address, int dest, int tmp)
5396 {
5397 readmem_real(address,dest,8,1,tmp);
5398 }
5399
5400 void readword(int address, int dest, int tmp)
5401 {
5402 readmem_real(address,dest,4,2,tmp);
5403 }
5404
5405 void readlong(int address, int dest, int tmp)
5406 {
5407 readmem_real(address,dest,0,4,tmp);
5408 }
5409
5410 void get_n_addr(int address, int dest, int tmp)
5411 {
5412 // a is the register containing the virtual address
5413 // after the offset had been fetched
5414 int a=tmp;
5415
5416 // f is the register that will contain the offset
5417 int f=tmp;
5418
5419 // a == f == tmp if (address == dest)
5420 if (address!=dest) {
5421 a=address;
5422 f=dest;
5423 }
5424
5425 #if REAL_ADDRESSING
5426 mov_l_rr(dest, address);
5427 #elif DIRECT_ADDRESSING
5428 lea_l_brr(dest,address,MEMBaseDiff);
5429 #endif
5430 forget_about(tmp);
5431 }
5432
5433 void get_n_addr_jmp(int address, int dest, int tmp)
5434 {
5435 /* For this, we need to get the same address as the rest of UAE
5436 would --- otherwise we end up translating everything twice */
5437 get_n_addr(address,dest,tmp);
5438 }
5439
5440
5441 /* base is a register, but dp is an actual value.
5442 target is a register, as is tmp */
5443 void calc_disp_ea_020(int base, uae_u32 dp, int target, int tmp)
5444 {
5445 int reg = (dp >> 12) & 15;
5446 int regd_shift=(dp >> 9) & 3;
5447
5448 if (dp & 0x100) {
5449 int ignorebase=(dp&0x80);
5450 int ignorereg=(dp&0x40);
5451 int addbase=0;
5452 int outer=0;
5453
5454 if ((dp & 0x30) == 0x20) addbase = (uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
5455 if ((dp & 0x30) == 0x30) addbase = comp_get_ilong((m68k_pc_offset+=4)-4);
5456
5457 if ((dp & 0x3) == 0x2) outer = (uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
5458 if ((dp & 0x3) == 0x3) outer = comp_get_ilong((m68k_pc_offset+=4)-4);
5459
5460 if ((dp & 0x4) == 0) { /* add regd *before* the get_long */
5461 if (!ignorereg) {
5462 if ((dp & 0x800) == 0)
5463 sign_extend_16_rr(target,reg);
5464 else
5465 mov_l_rr(target,reg);
5466 shll_l_ri(target,regd_shift);
5467 }
5468 else
5469 mov_l_ri(target,0);
5470
5471 /* target is now regd */
5472 if (!ignorebase)
5473 add_l(target,base);
5474 add_l_ri(target,addbase);
5475 if (dp&0x03) readlong(target,target,tmp);
5476 } else { /* do the getlong first, then add regd */
5477 if (!ignorebase) {
5478 mov_l_rr(target,base);
5479 add_l_ri(target,addbase);
5480 }
5481 else
5482 mov_l_ri(target,addbase);
5483 if (dp&0x03) readlong(target,target,tmp);
5484
5485 if (!ignorereg) {
5486 if ((dp & 0x800) == 0)
5487 sign_extend_16_rr(tmp,reg);
5488 else
5489 mov_l_rr(tmp,reg);
5490 shll_l_ri(tmp,regd_shift);
5491 /* tmp is now regd */
5492 add_l(target,tmp);
5493 }
5494 }
5495 add_l_ri(target,outer);
5496 }
5497 else { /* 68000 version */
5498 if ((dp & 0x800) == 0) { /* Sign extend */
5499 sign_extend_16_rr(target,reg);
5500 lea_l_brr_indexed(target,base,target,1<<regd_shift,(uae_s32)((uae_s8)dp));
5501 }
5502 else {
5503 lea_l_brr_indexed(target,base,reg,1<<regd_shift,(uae_s32)((uae_s8)dp));
5504 }
5505 }
5506 forget_about(tmp);
5507 }
5508
5509
5510
5511
5512
5513 void set_cache_state(int enabled)
5514 {
5515 if (enabled!=letit)
5516 flush_icache_hard(77);
5517 letit=enabled;
5518 }
5519
5520 int get_cache_state(void)
5521 {
5522 return letit;
5523 }
5524
5525 uae_u32 get_jitted_size(void)
5526 {
5527 if (compiled_code)
5528 return current_compile_p-compiled_code;
5529 return 0;
5530 }
5531
5532 void alloc_cache(void)
5533 {
5534 if (compiled_code) {
5535 flush_icache_hard(6);
5536 vm_release(compiled_code, cache_size * 1024);
5537 compiled_code = 0;
5538 }
5539
5540 if (cache_size == 0)
5541 return;
5542
5543 while (!compiled_code && cache_size) {
5544 if ((compiled_code = (uae_u8 *)vm_acquire(cache_size * 1024)) == VM_MAP_FAILED) {
5545 compiled_code = 0;
5546 cache_size /= 2;
5547 }
5548 }
5549 vm_protect(compiled_code, cache_size, VM_PAGE_READ | VM_PAGE_WRITE | VM_PAGE_EXECUTE);
5550
5551 if (compiled_code) {
5552 write_log("<JIT compiler> : actual translation cache size : %d KB at 0x%08X\n", cache_size, compiled_code);
5553 max_compile_start = compiled_code + cache_size*1024 - BYTES_PER_INST;
5554 current_compile_p = compiled_code;
5555 current_cache_size = 0;
5556 }
5557 }
5558
5559
5560
5561 extern void op_illg_1 (uae_u32 opcode) REGPARAM;
5562
5563 static void calc_checksum(blockinfo* bi, uae_u32* c1, uae_u32* c2)
5564 {
5565 uae_u32 k1 = 0;
5566 uae_u32 k2 = 0;
5567
5568 #if USE_CHECKSUM_INFO
5569 checksum_info *csi = bi->csi;
5570 Dif(!csi) abort();
5571 while (csi) {
5572 uae_s32 len = csi->length;
5573 uae_u32 tmp = (uae_u32)csi->start_p;
5574 #else
5575 uae_s32 len = bi->len;
5576 uae_u32 tmp = (uae_u32)bi->min_pcp;
5577 #endif
5578 uae_u32*pos;
5579
5580 len += (tmp & 3);
5581 tmp &= ~3;
5582 pos = (uae_u32 *)tmp;
5583
5584 if (len >= 0 && len <= MAX_CHECKSUM_LEN) {
5585 while (len > 0) {
5586 k1 += *pos;
5587 k2 ^= *pos;
5588 pos++;
5589 len -= 4;
5590 }
5591 }
5592
5593 #if USE_CHECKSUM_INFO
5594 csi = csi->next;
5595 }
5596 #endif
5597
5598 *c1 = k1;
5599 *c2 = k2;
5600 }
5601
5602 #if 0
5603 static void show_checksum(CSI_TYPE* csi)
5604 {
5605 uae_u32 k1=0;
5606 uae_u32 k2=0;
5607 uae_s32 len=CSI_LENGTH(csi);
5608 uae_u32 tmp=(uae_u32)CSI_START_P(csi);
5609 uae_u32* pos;
5610
5611 len+=(tmp&3);
5612 tmp&=(~3);
5613 pos=(uae_u32*)tmp;
5614
5615 if (len<0 || len>MAX_CHECKSUM_LEN) {
5616 return;
5617 }
5618 else {
5619 while (len>0) {
5620 write_log("%08x ",*pos);
5621 pos++;
5622 len-=4;
5623 }
5624 write_log(" bla\n");
5625 }
5626 }
5627 #endif
5628
5629
5630 int check_for_cache_miss(void)
5631 {
5632 blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5633
5634 if (bi) {
5635 int cl=cacheline(regs.pc_p);
5636 if (bi!=cache_tags[cl+1].bi) {
5637 raise_in_cl_list(bi);
5638 return 1;
5639 }
5640 }
5641 return 0;
5642 }
5643
5644
5645 static void recompile_block(void)
5646 {
5647 /* An existing block's countdown code has expired. We need to make
5648 sure that execute_normal doesn't refuse to recompile due to a
5649 perceived cache miss... */
5650 blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5651
5652 Dif (!bi)
5653 abort();
5654 raise_in_cl_list(bi);
5655 execute_normal();
5656 return;
5657 }
5658 static void cache_miss(void)
5659 {
5660 blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5661 uae_u32 cl=cacheline(regs.pc_p);
5662 blockinfo* bi2=get_blockinfo(cl);
5663
5664 if (!bi) {
5665 execute_normal(); /* Compile this block now */
5666 return;
5667 }
5668 Dif (!bi2 || bi==bi2) {
5669 write_log("Unexplained cache miss %p %p\n",bi,bi2);
5670 abort();
5671 }
5672 raise_in_cl_list(bi);
5673 return;
5674 }
5675
5676 static int called_check_checksum(blockinfo* bi);
5677
5678 static inline int block_check_checksum(blockinfo* bi)
5679 {
5680 uae_u32 c1,c2;
5681 bool isgood;
5682
5683 if (bi->status!=BI_NEED_CHECK)
5684 return 1; /* This block is in a checked state */
5685
5686 checksum_count++;
5687
5688 if (bi->c1 || bi->c2)
5689 calc_checksum(bi,&c1,&c2);
5690 else {
5691 c1=c2=1; /* Make sure it doesn't match */
5692 }
5693
5694 isgood=(c1==bi->c1 && c2==bi->c2);
5695
5696 if (isgood) {
5697 /* This block is still OK. So we reactivate. Of course, that
5698 means we have to move it into the needs-to-be-flushed list */
5699 bi->handler_to_use=bi->handler;
5700 set_dhtu(bi,bi->direct_handler);
5701 bi->status=BI_CHECKING;
5702 isgood=called_check_checksum(bi);
5703 }
5704 if (isgood) {
5705 /* write_log("reactivate %p/%p (%x %x/%x %x)\n",bi,bi->pc_p,
5706 c1,c2,bi->c1,bi->c2);*/
5707 remove_from_list(bi);
5708 add_to_active(bi);
5709 raise_in_cl_list(bi);
5710 bi->status=BI_ACTIVE;
5711 }
5712 else {
5713 /* This block actually changed. We need to invalidate it,
5714 and set it up to be recompiled */
5715 /* write_log("discard %p/%p (%x %x/%x %x)\n",bi,bi->pc_p,
5716 c1,c2,bi->c1,bi->c2); */
5717 invalidate_block(bi);
5718 raise_in_cl_list(bi);
5719 }
5720 return isgood;
5721 }
5722
5723 static int called_check_checksum(blockinfo* bi)
5724 {
5725 dependency* x=bi->deplist;
5726 int isgood=1;
5727 int i;
5728
5729 for (i=0;i<2 && isgood;i++) {
5730 if (bi->dep[i].jmp_off) {
5731 isgood=block_check_checksum(bi->dep[i].target);
5732 }
5733 }
5734 return isgood;
5735 }
5736
5737 static void check_checksum(void)
5738 {
5739 blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5740 uae_u32 cl=cacheline(regs.pc_p);
5741 blockinfo* bi2=get_blockinfo(cl);
5742
5743 /* These are not the droids you are looking for... */
5744 if (!bi) {
5745 /* Whoever is the primary target is in a dormant state, but
5746 calling it was accidental, and we should just compile this
5747 new block */
5748 execute_normal();
5749 return;
5750 }
5751 if (bi!=bi2) {
5752 /* The block was hit accidentally, but it does exist. Cache miss */
5753 cache_miss();
5754 return;
5755 }
5756
5757 if (!block_check_checksum(bi))
5758 execute_normal();
5759 }
5760
5761 static __inline__ void match_states(blockinfo* bi)
5762 {
5763 int i;
5764 smallstate* s=&(bi->env);
5765
5766 if (bi->status==BI_NEED_CHECK) {
5767 block_check_checksum(bi);
5768 }
5769 if (bi->status==BI_ACTIVE ||
5770 bi->status==BI_FINALIZING) { /* Deal with the *promises* the
5771 block makes (about not using
5772 certain vregs) */
5773 for (i=0;i<16;i++) {
5774 if (s->virt[i]==L_UNNEEDED) {
5775 // write_log("unneeded reg %d at %p\n",i,target);
5776 COMPCALL(forget_about)(i); // FIXME
5777 }
5778 }
5779 }
5780 flush(1);
5781
5782 /* And now deal with the *demands* the block makes */
5783 for (i=0;i<N_REGS;i++) {
5784 int v=s->nat[i];
5785 if (v>=0) {
5786 // printf("Loading reg %d into %d at %p\n",v,i,target);
5787 readreg_specific(v,4,i);
5788 // do_load_reg(i,v);
5789 // setlock(i);
5790 }
5791 }
5792 for (i=0;i<N_REGS;i++) {
5793 int v=s->nat[i];
5794 if (v>=0) {
5795 unlock2(i);
5796 }
5797 }
5798 }
5799
5800 static uae_u8 popallspace[1024]; /* That should be enough space */
5801
5802 static __inline__ void create_popalls(void)
5803 {
5804 int i,r;
5805
5806 current_compile_p=popallspace;
5807 set_target(current_compile_p);
5808 #if USE_PUSH_POP
5809 /* If we can't use gcc inline assembly, we need to pop some
5810 registers before jumping back to the various get-out routines.
5811 This generates the code for it.
5812 */
5813 align_target(align_jumps);
5814 popall_do_nothing=get_target();
5815 for (i=0;i<N_REGS;i++) {
5816 if (need_to_preserve[i])
5817 raw_pop_l_r(i);
5818 }
5819 raw_jmp((uae_u32)do_nothing);
5820
5821 align_target(align_jumps);
5822 popall_execute_normal=get_target();
5823 for (i=0;i<N_REGS;i++) {
5824 if (need_to_preserve[i])
5825 raw_pop_l_r(i);
5826 }
5827 raw_jmp((uae_u32)execute_normal);
5828
5829 align_target(align_jumps);
5830 popall_cache_miss=get_target();
5831 for (i=0;i<N_REGS;i++) {
5832 if (need_to_preserve[i])
5833 raw_pop_l_r(i);
5834 }
5835 raw_jmp((uae_u32)cache_miss);
5836
5837 align_target(align_jumps);
5838 popall_recompile_block=get_target();
5839 for (i=0;i<N_REGS;i++) {
5840 if (need_to_preserve[i])
5841 raw_pop_l_r(i);
5842 }
5843 raw_jmp((uae_u32)recompile_block);
5844
5845 align_target(align_jumps);
5846 popall_exec_nostats=get_target();
5847 for (i=0;i<N_REGS;i++) {
5848 if (need_to_preserve[i])
5849 raw_pop_l_r(i);
5850 }
5851 raw_jmp((uae_u32)exec_nostats);
5852
5853 align_target(align_jumps);
5854 popall_check_checksum=get_target();
5855 for (i=0;i<N_REGS;i++) {
5856 if (need_to_preserve[i])
5857 raw_pop_l_r(i);
5858 }
5859 raw_jmp((uae_u32)check_checksum);
5860
5861 align_target(align_jumps);
5862 current_compile_p=get_target();
5863 #else
5864 popall_exec_nostats=(void *)exec_nostats;
5865 popall_execute_normal=(void *)execute_normal;
5866 popall_cache_miss=(void *)cache_miss;
5867 popall_recompile_block=(void *)recompile_block;
5868 popall_do_nothing=(void *)do_nothing;
5869 popall_check_checksum=(void *)check_checksum;
5870 #endif
5871
5872 /* And now, the code to do the matching pushes and then jump
5873 into a handler routine */
5874 pushall_call_handler=get_target();
5875 #if USE_PUSH_POP
5876 for (i=N_REGS;i--;) {
5877 if (need_to_preserve[i])
5878 raw_push_l_r(i);
5879 }
5880 #endif
5881 r=REG_PC_TMP;
5882 raw_mov_l_rm(r,(uae_u32)&regs.pc_p);
5883 raw_and_l_ri(r,TAGMASK);
5884 raw_jmp_m_indexed((uae_u32)cache_tags,r,4);
5885
5886 #ifdef X86_ASSEMBLY
5887 align_target(align_jumps);
5888 m68k_compile_execute = (void (*)(void))get_target();
5889 for (i=N_REGS;i--;) {
5890 if (need_to_preserve[i])
5891 raw_push_l_r(i);
5892 }
5893 align_target(align_loops);
5894 uae_u32 dispatch_loop = (uae_u32)get_target();
5895 r=REG_PC_TMP;
5896 raw_mov_l_rm(r,(uae_u32)&regs.pc_p);
5897 raw_and_l_ri(r,TAGMASK);
5898 raw_call_m_indexed((uae_u32)cache_tags,r,4);
5899 raw_cmp_l_mi((uae_u32)&regs.spcflags,0);
5900 raw_jcc_b_oponly(NATIVE_CC_EQ);
5901 emit_byte(dispatch_loop-((uae_u32)get_target()+1));
5902 raw_call((uae_u32)m68k_do_specialties);
5903 raw_test_l_rr(REG_RESULT,REG_RESULT);
5904 raw_jcc_b_oponly(NATIVE_CC_EQ);
5905 emit_byte(dispatch_loop-((uae_u32)get_target()+1));
5906 raw_cmp_b_mi((uae_u32)&quit_program,0);
5907 raw_jcc_b_oponly(NATIVE_CC_EQ);
5908 emit_byte(dispatch_loop-((uae_u32)get_target()+1));
5909 for (i=0;i<N_REGS;i++) {
5910 if (need_to_preserve[i])
5911 raw_pop_l_r(i);
5912 }
5913 raw_ret();
5914 #endif
5915 }
5916
5917 static __inline__ void reset_lists(void)
5918 {
5919 int i;
5920
5921 for (i=0;i<MAX_HOLD_BI;i++)
5922 hold_bi[i]=NULL;
5923 active=NULL;
5924 dormant=NULL;
5925 }
5926
5927 static void prepare_block(blockinfo* bi)
5928 {
5929 int i;
5930
5931 set_target(current_compile_p);
5932 align_target(align_jumps);
5933 bi->direct_pen=(cpuop_func *)get_target();
5934 raw_mov_l_rm(0,(uae_u32)&(bi->pc_p));
5935 raw_mov_l_mr((uae_u32)&regs.pc_p,0);
5936 raw_jmp((uae_u32)popall_execute_normal);
5937
5938 align_target(align_jumps);
5939 bi->direct_pcc=(cpuop_func *)get_target();
5940 raw_mov_l_rm(0,(uae_u32)&(bi->pc_p));
5941 raw_mov_l_mr((uae_u32)&regs.pc_p,0);
5942 raw_jmp((uae_u32)popall_check_checksum);
5943 current_compile_p=get_target();
5944
5945 bi->deplist=NULL;
5946 for (i=0;i<2;i++) {
5947 bi->dep[i].prev_p=NULL;
5948 bi->dep[i].next=NULL;
5949 }
5950 bi->env=default_ss;
5951 bi->status=BI_INVALID;
5952 bi->havestate=0;
5953 //bi->env=empty_ss;
5954 }
5955
5956 static bool avoid_opcode(uae_u32 opcode)
5957 {
5958 #if JIT_DEBUG
5959 struct instr *dp = &table68k[opcode];
5960 // filter opcodes per type, integral value, or whatever
5961 #endif
5962 return false;
5963 }
5964
5965 void build_comp(void)
5966 {
5967 int i;
5968 int jumpcount=0;
5969 unsigned long opcode;
5970 struct comptbl* tbl=op_smalltbl_0_comp_ff;
5971 struct comptbl* nftbl=op_smalltbl_0_comp_nf;
5972 int count;
5973 int cpu_level = 0; // 68000 (default)
5974 if (CPUType == 4)
5975 cpu_level = 4; // 68040 with FPU
5976 else {
5977 if (FPUType)
5978 cpu_level = 3; // 68020 with FPU
5979 else if (CPUType >= 2)
5980 cpu_level = 2; // 68020
5981 else if (CPUType == 1)
5982 cpu_level = 1;
5983 }
5984 struct cputbl *nfctbl = (
5985 cpu_level == 4 ? op_smalltbl_0_nf
5986 : cpu_level == 3 ? op_smalltbl_1_nf
5987 : cpu_level == 2 ? op_smalltbl_2_nf
5988 : cpu_level == 1 ? op_smalltbl_3_nf
5989 : op_smalltbl_4_nf);
5990
5991 write_log ("<JIT compiler> : building compiler function tables\n");
5992
5993 for (opcode = 0; opcode < 65536; opcode++) {
5994 nfcpufunctbl[opcode] = op_illg_1;
5995 compfunctbl[opcode] = NULL;
5996 nfcompfunctbl[opcode] = NULL;
5997 prop[opcode].use_flags = 0x1f;
5998 prop[opcode].set_flags = 0x1f;
5999 prop[opcode].cflow = fl_trap; // ILLEGAL instructions do trap
6000 }
6001
6002 for (i = 0; tbl[i].opcode < 65536; i++) {
6003 int cflow = table68k[tbl[i].opcode].cflow;
6004 if (USE_INLINING && ((cflow & fl_const_jump) != 0))
6005 cflow = fl_const_jump;
6006 else
6007 cflow &= ~fl_const_jump;
6008 prop[cft_map(tbl[i].opcode)].cflow = cflow;
6009
6010 int uses_fpu = tbl[i].specific & 32;
6011 if ((uses_fpu && avoid_fpu) || avoid_opcode(tbl[i].opcode))
6012 compfunctbl[cft_map(tbl[i].opcode)] = NULL;
6013 else
6014 compfunctbl[cft_map(tbl[i].opcode)] = tbl[i].handler;
6015 }
6016
6017 for (i = 0; nftbl[i].opcode < 65536; i++) {
6018 int uses_fpu = tbl[i].specific & 32;
6019 if ((uses_fpu && avoid_fpu) || avoid_opcode(nftbl[i].opcode))
6020 nfcompfunctbl[cft_map(nftbl[i].opcode)] = NULL;
6021 else
6022 nfcompfunctbl[cft_map(nftbl[i].opcode)] = nftbl[i].handler;
6023
6024 nfcpufunctbl[cft_map(nftbl[i].opcode)] = nfctbl[i].handler;
6025 }
6026
6027 for (i = 0; nfctbl[i].handler; i++) {
6028 nfcpufunctbl[cft_map(nfctbl[i].opcode)] = nfctbl[i].handler;
6029 }
6030
6031 for (opcode = 0; opcode < 65536; opcode++) {
6032 compop_func *f;
6033 compop_func *nff;
6034 cpuop_func *nfcf;
6035 int isaddx,cflow;
6036
6037 if (table68k[opcode].mnemo == i_ILLG || table68k[opcode].clev > cpu_level)
6038 continue;
6039
6040 if (table68k[opcode].handler != -1) {
6041 f = compfunctbl[cft_map(table68k[opcode].handler)];
6042 nff = nfcompfunctbl[cft_map(table68k[opcode].handler)];
6043 nfcf = nfcpufunctbl[cft_map(table68k[opcode].handler)];
6044 cflow = prop[cft_map(table68k[opcode].handler)].cflow;
6045 isaddx = prop[cft_map(table68k[opcode].handler)].is_addx;
6046 prop[cft_map(opcode)].cflow = cflow;
6047 prop[cft_map(opcode)].is_addx = isaddx;
6048 compfunctbl[cft_map(opcode)] = f;
6049 nfcompfunctbl[cft_map(opcode)] = nff;
6050 Dif (nfcf == op_illg_1)
6051 abort();
6052 nfcpufunctbl[cft_map(opcode)] = nfcf;
6053 }
6054 prop[cft_map(opcode)].set_flags = table68k[opcode].flagdead;
6055 prop[cft_map(opcode)].use_flags = table68k[opcode].flaglive;
6056 }
6057 for (i = 0; nfctbl[i].handler != NULL; i++) {
6058 if (nfctbl[i].specific)
6059 nfcpufunctbl[cft_map(tbl[i].opcode)] = nfctbl[i].handler;
6060 }
6061
6062 count=0;
6063 for (opcode = 0; opcode < 65536; opcode++) {
6064 if (compfunctbl[cft_map(opcode)])
6065 count++;
6066 }
6067 write_log("<JIT compiler> : supposedly %d compileable opcodes!\n",count);
6068
6069 /* Initialise state */
6070 create_popalls();
6071 alloc_cache();
6072 reset_lists();
6073
6074 for (i=0;i<TAGSIZE;i+=2) {
6075 cache_tags[i].handler=(cpuop_func *)popall_execute_normal;
6076 cache_tags[i+1].bi=NULL;
6077 }
6078
6079 #if 0
6080 for (i=0;i<N_REGS;i++) {
6081 empty_ss.nat[i].holds=-1;
6082 empty_ss.nat[i].validsize=0;
6083 empty_ss.nat[i].dirtysize=0;
6084 }
6085 #endif
6086 for (i=0;i<VREGS;i++) {
6087 empty_ss.virt[i]=L_NEEDED;
6088 }
6089 for (i=0;i<N_REGS;i++) {
6090 empty_ss.nat[i]=L_UNKNOWN;
6091 }
6092 default_ss=empty_ss;
6093 }
6094
6095
6096 static void flush_icache_none(int n)
6097 {
6098 /* Nothing to do. */
6099 }
6100
6101 static void flush_icache_hard(int n)
6102 {
6103 uae_u32 i;
6104 blockinfo* bi, *dbi;
6105
6106 hard_flush_count++;
6107 #if 0
6108 write_log("Flush Icache_hard(%d/%x/%p), %u KB\n",
6109 n,regs.pc,regs.pc_p,current_cache_size/1024);
6110 current_cache_size = 0;
6111 #endif
6112 bi=active;
6113 while(bi) {
6114 cache_tags[cacheline(bi->pc_p)].handler=(cpuop_func *)popall_execute_normal;
6115 cache_tags[cacheline(bi->pc_p)+1].bi=NULL;
6116 dbi=bi; bi=bi->next;
6117 free_blockinfo(dbi);
6118 }
6119 bi=dormant;
6120 while(bi) {
6121 cache_tags[cacheline(bi->pc_p)].handler=(cpuop_func *)popall_execute_normal;
6122 cache_tags[cacheline(bi->pc_p)+1].bi=NULL;
6123 dbi=bi; bi=bi->next;
6124 free_blockinfo(dbi);
6125 }
6126
6127 reset_lists();
6128 if (!compiled_code)
6129 return;
6130 current_compile_p=compiled_code;
6131 SPCFLAGS_SET( SPCFLAG_JIT_EXEC_RETURN ); /* To get out of compiled code */
6132 }
6133
6134
6135 /* "Soft flushing" --- instead of actually throwing everything away,
6136 we simply mark everything as "needs to be checked".
6137 */
6138
6139 static inline void flush_icache_lazy(int n)
6140 {
6141 uae_u32 i;
6142 blockinfo* bi;
6143 blockinfo* bi2;
6144
6145 soft_flush_count++;
6146 if (!active)
6147 return;
6148
6149 bi=active;
6150 while (bi) {
6151 uae_u32 cl=cacheline(bi->pc_p);
6152 if (bi->status==BI_INVALID ||
6153 bi->status==BI_NEED_RECOMP) {
6154 if (bi==cache_tags[cl+1].bi)
6155 cache_tags[cl].handler=(cpuop_func *)popall_execute_normal;
6156 bi->handler_to_use=(cpuop_func *)popall_execute_normal;
6157 set_dhtu(bi,bi->direct_pen);
6158 bi->status=BI_INVALID;
6159 }
6160 else {
6161 if (bi==cache_tags[cl+1].bi)
6162 cache_tags[cl].handler=(cpuop_func *)popall_check_checksum;
6163 bi->handler_to_use=(cpuop_func *)popall_check_checksum;
6164 set_dhtu(bi,bi->direct_pcc);
6165 bi->status=BI_NEED_CHECK;
6166 }
6167 bi2=bi;
6168 bi=bi->next;
6169 }
6170 /* bi2 is now the last entry in the active list */
6171 bi2->next=dormant;
6172 if (dormant)
6173 dormant->prev_p=&(bi2->next);
6174
6175 dormant=active;
6176 active->prev_p=&dormant;
6177 active=NULL;
6178 }
6179
6180 static void catastrophe(void)
6181 {
6182 abort();
6183 }
6184
6185 int failure;
6186
6187 #define TARGET_M68K 0
6188 #define TARGET_POWERPC 1
6189 #define TARGET_X86 2
6190 #if defined(i386) || defined(__i386__)
6191 #define TARGET_NATIVE TARGET_X86
6192 #endif
6193 #if defined(powerpc) || defined(__powerpc__)
6194 #define TARGET_NATIVE TARGET_POWERPC
6195 #endif
6196
6197 #ifdef ENABLE_MON
6198 static uae_u32 mon_read_byte_jit(uae_u32 addr)
6199 {
6200 uae_u8 *m = (uae_u8 *)addr;
6201 return (uae_u32)(*m);
6202 }
6203
6204 static void mon_write_byte_jit(uae_u32 addr, uae_u32 b)
6205 {
6206 uae_u8 *m = (uae_u8 *)addr;
6207 *m = b;
6208 }
6209 #endif
6210
6211 void disasm_block(int target, uint8 * start, size_t length)
6212 {
6213 if (!JITDebug)
6214 return;
6215
6216 #if defined(JIT_DEBUG) && defined(ENABLE_MON)
6217 char disasm_str[200];
6218 sprintf(disasm_str, "%s $%x $%x",
6219 target == TARGET_M68K ? "d68" :
6220 target == TARGET_X86 ? "d86" :
6221 target == TARGET_POWERPC ? "d" : "x",
6222 start, start + length - 1);
6223
6224 uae_u32 (*old_mon_read_byte)(uae_u32) = mon_read_byte;
6225 void (*old_mon_write_byte)(uae_u32, uae_u32) = mon_write_byte;
6226
6227 mon_read_byte = mon_read_byte_jit;
6228 mon_write_byte = mon_write_byte_jit;
6229
6230 char *arg[5] = {"mon", "-m", "-r", disasm_str, NULL};
6231 mon(4, arg);
6232
6233 mon_read_byte = old_mon_read_byte;
6234 mon_write_byte = old_mon_write_byte;
6235 #endif
6236 }
6237
6238 static inline void disasm_native_block(uint8 *start, size_t length)
6239 {
6240 disasm_block(TARGET_NATIVE, start, length);
6241 }
6242
6243 static inline void disasm_m68k_block(uint8 *start, size_t length)
6244 {
6245 disasm_block(TARGET_M68K, start, length);
6246 }
6247
6248 #ifdef HAVE_GET_WORD_UNSWAPPED
6249 # define DO_GET_OPCODE(a) (do_get_mem_word_unswapped((uae_u16 *)(a)))
6250 #else
6251 # define DO_GET_OPCODE(a) (do_get_mem_word((uae_u16 *)(a)))
6252 #endif
6253
6254 #if JIT_DEBUG
6255 static uae_u8 *last_regs_pc_p = 0;
6256 static uae_u8 *last_compiled_block_addr = 0;
6257
6258 void compiler_dumpstate(void)
6259 {
6260 if (!JITDebug)
6261 return;
6262
6263 write_log("### Host addresses\n");
6264 write_log("MEM_BASE : %x\n", MEMBaseDiff);
6265 write_log("PC_P : %p\n", &regs.pc_p);
6266 write_log("SPCFLAGS : %p\n", &regs.spcflags);
6267 write_log("D0-D7 : %p-%p\n", &regs.regs[0], &regs.regs[7]);
6268 write_log("A0-A7 : %p-%p\n", &regs.regs[8], &regs.regs[15]);
6269 write_log("\n");
6270
6271 write_log("### M68k processor state\n");
6272 m68k_dumpstate(0);
6273 write_log("\n");
6274
6275 write_log("### Block in Mac address space\n");
6276 write_log("M68K block : %p\n",
6277 (void *)get_virtual_address(last_regs_pc_p));
6278 write_log("Native block : %p (%d bytes)\n",
6279 (void *)get_virtual_address(last_compiled_block_addr),
6280 get_blockinfo_addr(last_regs_pc_p)->direct_handler_size);
6281 write_log("\n");
6282 }
6283 #endif
6284
6285 static void compile_block(cpu_history* pc_hist, int blocklen)
6286 {
6287 if (letit && compiled_code) {
6288 #if PROFILE_COMPILE_TIME
6289 compile_count++;
6290 clock_t start_time = clock();
6291 #endif
6292 #if JIT_DEBUG
6293 bool disasm_block = false;
6294 #endif
6295
6296 /* OK, here we need to 'compile' a block */
6297 int i;
6298 int r;
6299 int was_comp=0;
6300 uae_u8 liveflags[MAXRUN+1];
6301 #if USE_CHECKSUM_INFO
6302 bool trace_in_rom = isinrom((uintptr)pc_hist[0].location);
6303 uae_u32 max_pcp=(uae_u32)pc_hist[blocklen - 1].location;
6304 uae_u32 min_pcp=max_pcp;
6305 #else
6306 uae_u32 max_pcp=(uae_u32)pc_hist[0].location;
6307 uae_u32 min_pcp=max_pcp;
6308 #endif
6309 uae_u32 cl=cacheline(pc_hist[0].location);
6310 void* specflags=(void*)&regs.spcflags;
6311 blockinfo* bi=NULL;
6312 blockinfo* bi2;
6313 int extra_len=0;
6314
6315 redo_current_block=0;
6316 if (current_compile_p>=max_compile_start)
6317 flush_icache_hard(7);
6318
6319 alloc_blockinfos();
6320
6321 bi=get_blockinfo_addr_new(pc_hist[0].location,0);
6322 bi2=get_blockinfo(cl);
6323
6324 optlev=bi->optlevel;
6325 if (bi->status!=BI_INVALID) {
6326 Dif (bi!=bi2) {
6327 /* I don't think it can happen anymore. Shouldn't, in
6328 any case. So let's make sure... */
6329 write_log("WOOOWOO count=%d, ol=%d %p %p\n",
6330 bi->count,bi->optlevel,bi->handler_to_use,
6331 cache_tags[cl].handler);
6332 abort();
6333 }
6334
6335 Dif (bi->count!=-1 && bi->status!=BI_NEED_RECOMP) {
6336 write_log("bi->count=%d, bi->status=%d\n",bi->count,bi->status);
6337 /* What the heck? We are not supposed to be here! */
6338 abort();
6339 }
6340 }
6341 if (bi->count==-1) {
6342 optlev++;
6343 while (!optcount[optlev])
6344 optlev++;
6345 bi->count=optcount[optlev]-1;
6346 }
6347 current_block_pc_p=(uae_u32)pc_hist[0].location;
6348
6349 remove_deps(bi); /* We are about to create new code */
6350 bi->optlevel=optlev;
6351 bi->pc_p=(uae_u8*)pc_hist[0].location;
6352 #if USE_CHECKSUM_INFO
6353 free_checksum_info_chain(bi->csi);
6354 bi->csi = NULL;
6355 #endif
6356
6357 liveflags[blocklen]=0x1f; /* All flags needed afterwards */
6358 i=blocklen;
6359 while (i--) {
6360 uae_u16* currpcp=pc_hist[i].location;
6361 uae_u32 op=DO_GET_OPCODE(currpcp);
6362
6363 #if USE_CHECKSUM_INFO
6364 trace_in_rom = trace_in_rom && isinrom((uintptr)currpcp);
6365 #if USE_INLINING
6366 if (is_const_jump(op)) {
6367 checksum_info *csi = alloc_checksum_info();
6368 csi->start_p = (uae_u8 *)min_pcp;
6369 csi->length = max_pcp - min_pcp + LONGEST_68K_INST;
6370 csi->next = bi->csi;
6371 bi->csi = csi;
6372 max_pcp = (uae_u32)currpcp;
6373 }
6374 #endif
6375 min_pcp = (uae_u32)currpcp;
6376 #else
6377 if ((uae_u32)currpcp<min_pcp)
6378 min_pcp=(uae_u32)currpcp;
6379 if ((uae_u32)currpcp>max_pcp)
6380 max_pcp=(uae_u32)currpcp;
6381 #endif
6382
6383 liveflags[i]=((liveflags[i+1]&
6384 (~prop[op].set_flags))|
6385 prop[op].use_flags);
6386 if (prop[op].is_addx && (liveflags[i+1]&FLAG_Z)==0)
6387 liveflags[i]&= ~FLAG_Z;
6388 }
6389
6390 #if USE_CHECKSUM_INFO
6391 checksum_info *csi = alloc_checksum_info();
6392 csi->start_p = (uae_u8 *)min_pcp;
6393 csi->length = max_pcp - min_pcp + LONGEST_68K_INST;
6394 csi->next = bi->csi;
6395 bi->csi = csi;
6396 #endif
6397
6398 bi->needed_flags=liveflags[0];
6399
6400 align_target(align_loops);
6401 was_comp=0;
6402
6403 bi->direct_handler=(cpuop_func *)get_target();
6404 set_dhtu(bi,bi->direct_handler);
6405 bi->status=BI_COMPILING;
6406 current_block_start_target=(uae_u32)get_target();
6407
6408 log_startblock();
6409
6410 if (bi->count>=0) { /* Need to generate countdown code */
6411 raw_mov_l_mi((uae_u32)&regs.pc_p,(uae_u32)pc_hist[0].location);
6412 raw_sub_l_mi((uae_u32)&(bi->count),1);
6413 raw_jl((uae_u32)popall_recompile_block);
6414 }
6415 if (optlev==0) { /* No need to actually translate */
6416 /* Execute normally without keeping stats */
6417 raw_mov_l_mi((uae_u32)&regs.pc_p,(uae_u32)pc_hist[0].location);
6418 raw_jmp((uae_u32)popall_exec_nostats);
6419 }
6420 else {
6421 reg_alloc_run=0;
6422 next_pc_p=0;
6423 taken_pc_p=0;
6424 branch_cc=0;
6425
6426 comp_pc_p=(uae_u8*)pc_hist[0].location;
6427 init_comp();
6428 was_comp=1;
6429
6430 #if JIT_DEBUG
6431 if (JITDebug) {
6432 raw_mov_l_mi((uae_u32)&last_regs_pc_p,(uae_u32)pc_hist[0].location);
6433 raw_mov_l_mi((uae_u32)&last_compiled_block_addr,(uae_u32)current_block_start_target);
6434 }
6435 #endif
6436
6437 for (i=0;i<blocklen &&
6438 get_target_noopt()<max_compile_start;i++) {
6439 cpuop_func **cputbl;
6440 compop_func **comptbl;
6441 uae_u32 opcode=DO_GET_OPCODE(pc_hist[i].location);
6442 needed_flags=(liveflags[i+1] & prop[opcode].set_flags);
6443 if (!needed_flags) {
6444 cputbl=nfcpufunctbl;
6445 comptbl=nfcompfunctbl;
6446 }
6447 else {
6448 cputbl=cpufunctbl;
6449 comptbl=compfunctbl;
6450 }
6451
6452 failure = 1; // gb-- defaults to failure state
6453 if (comptbl[opcode] && optlev>1) {
6454 failure=0;
6455 if (!was_comp) {
6456 comp_pc_p=(uae_u8*)pc_hist[i].location;
6457 init_comp();
6458 }
6459 was_comp=1;
6460
6461 comptbl[opcode](opcode);
6462 freescratch();
6463 if (!(liveflags[i+1] & FLAG_CZNV)) {
6464 /* We can forget about flags */
6465 dont_care_flags();
6466 }
6467 #if INDIVIDUAL_INST
6468 flush(1);
6469 nop();
6470 flush(1);
6471 was_comp=0;
6472 #endif
6473 }
6474
6475 if (failure) {
6476 if (was_comp) {
6477 flush(1);
6478 was_comp=0;
6479 }
6480 raw_mov_l_ri(REG_PAR1,(uae_u32)opcode);
6481 #if USE_NORMAL_CALLING_CONVENTION
6482 raw_push_l_r(REG_PAR1);
6483 #endif
6484 raw_mov_l_mi((uae_u32)&regs.pc_p,
6485 (uae_u32)pc_hist[i].location);
6486 raw_call((uae_u32)cputbl[opcode]);
6487 #if PROFILE_UNTRANSLATED_INSNS
6488 // raw_cputbl_count[] is indexed with plain opcode (in m68k order)
6489 raw_add_l_mi((uae_u32)&raw_cputbl_count[cft_map(opcode)],1);
6490 #endif
6491 #if USE_NORMAL_CALLING_CONVENTION
6492 raw_inc_sp(4);
6493 #endif
6494
6495 if (i < blocklen - 1) {
6496 uae_s8* branchadd;
6497
6498 raw_mov_l_rm(0,(uae_u32)specflags);
6499 raw_test_l_rr(0,0);
6500 raw_jz_b_oponly();
6501 branchadd=(uae_s8 *)get_target();
6502 emit_byte(0);
6503 raw_jmp((uae_u32)popall_do_nothing);
6504 *branchadd=(uae_u32)get_target()-(uae_u32)branchadd-1;
6505 }
6506 }
6507 }
6508 #if 1 /* This isn't completely kosher yet; It really needs to be
6509 be integrated into a general inter-block-dependency scheme */
6510 if (next_pc_p && taken_pc_p &&
6511 was_comp && taken_pc_p==current_block_pc_p) {
6512 blockinfo* bi1=get_blockinfo_addr_new((void*)next_pc_p,0);
6513 blockinfo* bi2=get_blockinfo_addr_new((void*)taken_pc_p,0);
6514 uae_u8 x=bi1->needed_flags;
6515
6516 if (x==0xff || 1) { /* To be on the safe side */
6517 uae_u16* next=(uae_u16*)next_pc_p;
6518 uae_u32 op=DO_GET_OPCODE(next);
6519
6520 x=0x1f;
6521 x&=(~prop[op].set_flags);
6522 x|=prop[op].use_flags;
6523 }
6524
6525 x|=bi2->needed_flags;
6526 if (!(x & FLAG_CZNV)) {
6527 /* We can forget about flags */
6528 dont_care_flags();
6529 extra_len+=2; /* The next instruction now is part of this
6530 block */
6531 }
6532
6533 }
6534 #endif
6535 log_flush();
6536
6537 if (next_pc_p) { /* A branch was registered */
6538 uae_u32 t1=next_pc_p;
6539 uae_u32 t2=taken_pc_p;
6540 int cc=branch_cc;
6541
6542 uae_u32* branchadd;
6543 uae_u32* tba;
6544 bigstate tmp;
6545 blockinfo* tbi;
6546
6547 if (taken_pc_p<next_pc_p) {
6548 /* backward branch. Optimize for the "taken" case ---
6549 which means the raw_jcc should fall through when
6550 the 68k branch is taken. */
6551 t1=taken_pc_p;
6552 t2=next_pc_p;
6553 cc=branch_cc^1;
6554 }
6555
6556 tmp=live; /* ouch! This is big... */
6557 raw_jcc_l_oponly(cc);
6558 branchadd=(uae_u32*)get_target();
6559 emit_long(0);
6560
6561 /* predicted outcome */
6562 tbi=get_blockinfo_addr_new((void*)t1,1);
6563 match_states(tbi);
6564 raw_cmp_l_mi((uae_u32)specflags,0);
6565 raw_jcc_l_oponly(4);
6566 tba=(uae_u32*)get_target();
6567 emit_long(get_handler(t1)-((uae_u32)tba+4));
6568 raw_mov_l_mi((uae_u32)&regs.pc_p,t1);
6569 raw_jmp((uae_u32)popall_do_nothing);
6570 create_jmpdep(bi,0,tba,t1);
6571
6572 align_target(align_jumps);
6573 /* not-predicted outcome */
6574 *branchadd=(uae_u32)get_target()-((uae_u32)branchadd+4);
6575 live=tmp; /* Ouch again */
6576 tbi=get_blockinfo_addr_new((void*)t2,1);
6577 match_states(tbi);
6578
6579 //flush(1); /* Can only get here if was_comp==1 */
6580 raw_cmp_l_mi((uae_u32)specflags,0);
6581 raw_jcc_l_oponly(4);
6582 tba=(uae_u32*)get_target();
6583 emit_long(get_handler(t2)-((uae_u32)tba+4));
6584 raw_mov_l_mi((uae_u32)&regs.pc_p,t2);
6585 raw_jmp((uae_u32)popall_do_nothing);
6586 create_jmpdep(bi,1,tba,t2);
6587 }
6588 else
6589 {
6590 if (was_comp) {
6591 flush(1);
6592 }
6593
6594 /* Let's find out where next_handler is... */
6595 if (was_comp && isinreg(PC_P)) {
6596 r=live.state[PC_P].realreg;
6597 raw_and_l_ri(r,TAGMASK);
6598 int r2 = (r==0) ? 1 : 0;
6599 raw_mov_l_ri(r2,(uae_u32)popall_do_nothing);
6600 raw_cmp_l_mi((uae_u32)specflags,0);
6601 raw_cmov_l_rm_indexed(r2,(uae_u32)cache_tags,r,4,4);
6602 raw_jmp_r(r2);
6603 }
6604 else if (was_comp && isconst(PC_P)) {
6605 uae_u32 v=live.state[PC_P].val;
6606 uae_u32* tba;
6607 blockinfo* tbi;
6608
6609 tbi=get_blockinfo_addr_new((void*)v,1);
6610 match_states(tbi);
6611
6612 raw_cmp_l_mi((uae_u32)specflags,0);
6613 raw_jcc_l_oponly(4);
6614 tba=(uae_u32*)get_target();
6615 emit_long(get_handler(v)-((uae_u32)tba+4));
6616 raw_mov_l_mi((uae_u32)&regs.pc_p,v);
6617 raw_jmp((uae_u32)popall_do_nothing);
6618 create_jmpdep(bi,0,tba,v);
6619 }
6620 else {
6621 r=REG_PC_TMP;
6622 raw_mov_l_rm(r,(uae_u32)&regs.pc_p);
6623 raw_and_l_ri(r,TAGMASK);
6624 int r2 = (r==0) ? 1 : 0;
6625 raw_mov_l_ri(r2,(uae_u32)popall_do_nothing);
6626 raw_cmp_l_mi((uae_u32)specflags,0);
6627 raw_cmov_l_rm_indexed(r2,(uae_u32)cache_tags,r,4,4);
6628 raw_jmp_r(r2);
6629 }
6630 }
6631 }
6632
6633 #if USE_MATCH
6634 if (callers_need_recompile(&live,&(bi->env))) {
6635 mark_callers_recompile(bi);
6636 }
6637
6638 big_to_small_state(&live,&(bi->env));
6639 #endif
6640
6641 #if USE_CHECKSUM_INFO
6642 remove_from_list(bi);
6643 if (trace_in_rom) {
6644 // No need to checksum that block trace on cache invalidation
6645 free_checksum_info_chain(bi->csi);
6646 bi->csi = NULL;
6647 add_to_dormant(bi);
6648 }
6649 else {
6650 calc_checksum(bi,&(bi->c1),&(bi->c2));
6651 add_to_active(bi);
6652 }
6653 #else
6654 if (next_pc_p+extra_len>=max_pcp &&
6655 next_pc_p+extra_len<max_pcp+LONGEST_68K_INST)
6656 max_pcp=next_pc_p+extra_len; /* extra_len covers flags magic */
6657 else
6658 max_pcp+=LONGEST_68K_INST;
6659
6660 bi->len=max_pcp-min_pcp;
6661 bi->min_pcp=min_pcp;
6662
6663 remove_from_list(bi);
6664 if (isinrom(min_pcp) && isinrom(max_pcp)) {
6665 add_to_dormant(bi); /* No need to checksum it on cache flush.
6666 Please don't start changing ROMs in
6667 flight! */
6668 }
6669 else {
6670 calc_checksum(bi,&(bi->c1),&(bi->c2));
6671 add_to_active(bi);
6672 }
6673 #endif
6674
6675 current_cache_size += get_target() - (uae_u8 *)current_compile_p;
6676
6677 #if JIT_DEBUG
6678 if (JITDebug)
6679 bi->direct_handler_size = get_target() - (uae_u8 *)current_block_start_target;
6680
6681 if (JITDebug && disasm_block) {
6682 uaecptr block_addr = start_pc + ((char *)pc_hist[0].location - (char *)start_pc_p);
6683 D(bug("M68K block @ 0x%08x (%d insns)\n", block_addr, blocklen));
6684 uae_u32 block_size = ((uae_u8 *)pc_hist[blocklen - 1].location - (uae_u8 *)pc_hist[0].location) + 1;
6685 disasm_m68k_block((uae_u8 *)pc_hist[0].location, block_size);
6686 D(bug("Compiled block @ 0x%08x\n", pc_hist[0].location));
6687 disasm_native_block((uae_u8 *)current_block_start_target, bi->direct_handler_size);
6688 getchar();
6689 }
6690 #endif
6691
6692 log_dump();
6693 align_target(align_jumps);
6694
6695 /* This is the non-direct handler */
6696 bi->handler=
6697 bi->handler_to_use=(cpuop_func *)get_target();
6698 raw_cmp_l_mi((uae_u32)&regs.pc_p,(uae_u32)pc_hist[0].location);
6699 raw_jnz((uae_u32)popall_cache_miss);
6700 comp_pc_p=(uae_u8*)pc_hist[0].location;
6701
6702 bi->status=BI_FINALIZING;
6703 init_comp();
6704 match_states(bi);
6705 flush(1);
6706
6707 raw_jmp((uae_u32)bi->direct_handler);
6708
6709 current_compile_p=get_target();
6710 raise_in_cl_list(bi);
6711
6712 /* We will flush soon, anyway, so let's do it now */
6713 if (current_compile_p>=max_compile_start)
6714 flush_icache_hard(7);
6715
6716 bi->status=BI_ACTIVE;
6717 if (redo_current_block)
6718 block_need_recompile(bi);
6719
6720 #if PROFILE_COMPILE_TIME
6721 compile_time += (clock() - start_time);
6722 #endif
6723 }
6724 }
6725
6726 void do_nothing(void)
6727 {
6728 /* What did you expect this to do? */
6729 }
6730
6731 void exec_nostats(void)
6732 {
6733 for (;;) {
6734 uae_u32 opcode = GET_OPCODE;
6735 (*cpufunctbl[opcode])(opcode);
6736 if (end_block(opcode) || SPCFLAGS_TEST(SPCFLAG_ALL)) {
6737 return; /* We will deal with the spcflags in the caller */
6738 }
6739 }
6740 }
6741
6742 void execute_normal(void)
6743 {
6744 if (!check_for_cache_miss()) {
6745 cpu_history pc_hist[MAXRUN];
6746 int blocklen = 0;
6747 #if REAL_ADDRESSING || DIRECT_ADDRESSING
6748 start_pc_p = regs.pc_p;
6749 start_pc = get_virtual_address(regs.pc_p);
6750 #else
6751 start_pc_p = regs.pc_oldp;
6752 start_pc = regs.pc;
6753 #endif
6754 for (;;) { /* Take note: This is the do-it-normal loop */
6755 pc_hist[blocklen++].location = (uae_u16 *)regs.pc_p;
6756 uae_u32 opcode = GET_OPCODE;
6757 #if FLIGHT_RECORDER
6758 m68k_record_step(m68k_getpc());
6759 #endif
6760 (*cpufunctbl[opcode])(opcode);
6761 if (end_block(opcode) || SPCFLAGS_TEST(SPCFLAG_ALL) || blocklen>=MAXRUN) {
6762 compile_block(pc_hist, blocklen);
6763 return; /* We will deal with the spcflags in the caller */
6764 }
6765 /* No need to check regs.spcflags, because if they were set,
6766 we'd have ended up inside that "if" */
6767 }
6768 }
6769 }
6770
6771 typedef void (*compiled_handler)(void);
6772
6773 #ifdef X86_ASSEMBLY
6774 void (*m68k_compile_execute)(void) = NULL;
6775 #else
6776 void m68k_do_compile_execute(void)
6777 {
6778 for (;;) {
6779 ((compiled_handler)(pushall_call_handler))();
6780 /* Whenever we return from that, we should check spcflags */
6781 if (SPCFLAGS_TEST(SPCFLAG_ALL)) {
6782 if (m68k_do_specialties ())
6783 return;
6784 }
6785 }
6786 }
6787 #endif