ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/compiler/compemu_support.cpp
Revision: 1.23
Committed: 2004-01-12T15:29:30Z (20 years, 6 months ago) by cebix
Branch: MAIN
CVS Tags: nigel-build-16, nigel-build-15
Changes since 1.22: +2 -2 lines
Log Message:
Happy New Year! :)

File Contents

# Content
1 /*
2 * compiler/compemu_support.cpp - Core dynamic translation engine
3 *
4 * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
5 *
6 * Adaptation for Basilisk II and improvements, copyright 2000-2004
7 * Gwenole Beauchesne
8 *
9 * Basilisk II (C) 1997-2004 Christian Bauer
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 */
25
26 #if !REAL_ADDRESSING && !DIRECT_ADDRESSING
27 #error "Only Real or Direct Addressing is supported with the JIT Compiler"
28 #endif
29
30 #if X86_ASSEMBLY && !SAHF_SETO_PROFITABLE
31 #error "Only [LS]AHF scheme to [gs]et flags is supported with the JIT Compiler"
32 #endif
33
34 #define USE_MATCH 0
35
36 /* kludge for Brian, so he can compile under MSVC++ */
37 #define USE_NORMAL_CALLING_CONVENTION 0
38
39 #ifndef WIN32
40 #include <unistd.h>
41 #include <sys/types.h>
42 #include <sys/mman.h>
43 #endif
44
45 #include <stdlib.h>
46 #include <fcntl.h>
47 #include <errno.h>
48
49 #include "sysdeps.h"
50 #include "cpu_emulation.h"
51 #include "main.h"
52 #include "prefs.h"
53 #include "user_strings.h"
54 #include "vm_alloc.h"
55
56 #include "m68k.h"
57 #include "memory.h"
58 #include "readcpu.h"
59 #include "newcpu.h"
60 #include "comptbl.h"
61 #include "compiler/compemu.h"
62 #include "fpu/fpu.h"
63 #include "fpu/flags.h"
64
65 #define DEBUG 1
66 #include "debug.h"
67
68 #ifdef ENABLE_MON
69 #include "mon.h"
70 #endif
71
72 #ifndef WIN32
73 #define PROFILE_COMPILE_TIME 1
74 #define PROFILE_UNTRANSLATED_INSNS 1
75 #endif
76
77 #ifdef WIN32
78 #undef write_log
79 #define write_log dummy_write_log
80 static void dummy_write_log(const char *, ...) { }
81 #endif
82
83 #if JIT_DEBUG
84 #undef abort
85 #define abort() do { \
86 fprintf(stderr, "Abort in file %s at line %d\n", __FILE__, __LINE__); \
87 exit(EXIT_FAILURE); \
88 } while (0)
89 #endif
90
91 #if PROFILE_COMPILE_TIME
92 #include <time.h>
93 static uae_u32 compile_count = 0;
94 static clock_t compile_time = 0;
95 static clock_t emul_start_time = 0;
96 static clock_t emul_end_time = 0;
97 #endif
98
99 #if PROFILE_UNTRANSLATED_INSNS
100 const int untranslated_top_ten = 20;
101 static uae_u32 raw_cputbl_count[65536] = { 0, };
102 static uae_u16 opcode_nums[65536];
103
104 static int untranslated_compfn(const void *e1, const void *e2)
105 {
106 return raw_cputbl_count[*(const uae_u16 *)e1] < raw_cputbl_count[*(const uae_u16 *)e2];
107 }
108 #endif
109
110 compop_func *compfunctbl[65536];
111 compop_func *nfcompfunctbl[65536];
112 cpuop_func *nfcpufunctbl[65536];
113 uae_u8* comp_pc_p;
114
115 // From newcpu.cpp
116 extern bool quit_program;
117
118 // gb-- Extra data for Basilisk II/JIT
119 #if JIT_DEBUG
120 static bool JITDebug = false; // Enable runtime disassemblers through mon?
121 #else
122 const bool JITDebug = false; // Don't use JIT debug mode at all
123 #endif
124
125 const uae_u32 MIN_CACHE_SIZE = 1024; // Minimal translation cache size (1 MB)
126 static uae_u32 cache_size = 0; // Size of total cache allocated for compiled blocks
127 static uae_u32 current_cache_size = 0; // Cache grows upwards: how much has been consumed already
128 static bool lazy_flush = true; // Flag: lazy translation cache invalidation
129 static bool avoid_fpu = true; // Flag: compile FPU instructions ?
130 static bool have_cmov = false; // target has CMOV instructions ?
131 static bool have_rat_stall = true; // target has partial register stalls ?
132 const bool tune_alignment = true; // Tune code alignments for running CPU ?
133 const bool tune_nop_fillers = true; // Tune no-op fillers for architecture
134 static bool setzflg_uses_bsf = false; // setzflg virtual instruction can use native BSF instruction correctly?
135 static int align_loops = 32; // Align the start of loops
136 static int align_jumps = 32; // Align the start of jumps
137 static int zero_fd = -1;
138 static int optcount[10] = {
139 10, // How often a block has to be executed before it is translated
140 0, // How often to use naive translation
141 0, 0, 0, 0,
142 -1, -1, -1, -1
143 };
144
145 struct op_properties {
146 uae_u8 use_flags;
147 uae_u8 set_flags;
148 uae_u8 is_addx;
149 uae_u8 cflow;
150 };
151 static op_properties prop[65536];
152
153 static inline int end_block(uae_u32 opcode)
154 {
155 return (prop[opcode].cflow & fl_end_block);
156 }
157
158 static inline bool is_const_jump(uae_u32 opcode)
159 {
160 return (prop[opcode].cflow == fl_const_jump);
161 }
162
163 static inline bool may_trap(uae_u32 opcode)
164 {
165 return (prop[opcode].cflow & fl_trap);
166 }
167
168 static inline unsigned int cft_map (unsigned int f)
169 {
170 #ifndef HAVE_GET_WORD_UNSWAPPED
171 return f;
172 #else
173 return ((f >> 8) & 255) | ((f & 255) << 8);
174 #endif
175 }
176
177 uae_u8* start_pc_p;
178 uae_u32 start_pc;
179 uae_u32 current_block_pc_p;
180 uae_u32 current_block_start_target;
181 uae_u32 needed_flags;
182 static uae_u32 next_pc_p;
183 static uae_u32 taken_pc_p;
184 static int branch_cc;
185 static int redo_current_block;
186
187 int segvcount=0;
188 int soft_flush_count=0;
189 int hard_flush_count=0;
190 int checksum_count=0;
191 static uae_u8* current_compile_p=NULL;
192 static uae_u8* max_compile_start;
193 static uae_u8* compiled_code=NULL;
194 static uae_s32 reg_alloc_run;
195
196 void* pushall_call_handler=NULL;
197 static void* popall_do_nothing=NULL;
198 static void* popall_exec_nostats=NULL;
199 static void* popall_execute_normal=NULL;
200 static void* popall_cache_miss=NULL;
201 static void* popall_recompile_block=NULL;
202 static void* popall_check_checksum=NULL;
203
204 /* The 68k only ever executes from even addresses. So right now, we
205 * waste half the entries in this array
206 * UPDATE: We now use those entries to store the start of the linked
207 * lists that we maintain for each hash result.
208 */
209 cacheline cache_tags[TAGSIZE];
210 int letit=0;
211 blockinfo* hold_bi[MAX_HOLD_BI];
212 blockinfo* active;
213 blockinfo* dormant;
214
215 /* 68040 */
216 extern struct cputbl op_smalltbl_0_nf[];
217 extern struct comptbl op_smalltbl_0_comp_nf[];
218 extern struct comptbl op_smalltbl_0_comp_ff[];
219
220 /* 68020 + 68881 */
221 extern struct cputbl op_smalltbl_1_nf[];
222
223 /* 68020 */
224 extern struct cputbl op_smalltbl_2_nf[];
225
226 /* 68010 */
227 extern struct cputbl op_smalltbl_3_nf[];
228
229 /* 68000 */
230 extern struct cputbl op_smalltbl_4_nf[];
231
232 /* 68000 slow but compatible. */
233 extern struct cputbl op_smalltbl_5_nf[];
234
235 static void flush_icache_hard(int n);
236 static void flush_icache_lazy(int n);
237 static void flush_icache_none(int n);
238 void (*flush_icache)(int n) = flush_icache_none;
239
240
241
242 bigstate live;
243 smallstate empty_ss;
244 smallstate default_ss;
245 static int optlev;
246
247 static int writereg(int r, int size);
248 static void unlock2(int r);
249 static void setlock(int r);
250 static int readreg_specific(int r, int size, int spec);
251 static int writereg_specific(int r, int size, int spec);
252 static void prepare_for_call_1(void);
253 static void prepare_for_call_2(void);
254 static void align_target(uae_u32 a);
255
256 static uae_s32 nextused[VREGS];
257
258 uae_u32 m68k_pc_offset;
259
260 /* Some arithmetic ooperations can be optimized away if the operands
261 * are known to be constant. But that's only a good idea when the
262 * side effects they would have on the flags are not important. This
263 * variable indicates whether we need the side effects or not
264 */
265 uae_u32 needflags=0;
266
267 /* Flag handling is complicated.
268 *
269 * x86 instructions create flags, which quite often are exactly what we
270 * want. So at times, the "68k" flags are actually in the x86 flags.
271 *
272 * Then again, sometimes we do x86 instructions that clobber the x86
273 * flags, but don't represent a corresponding m68k instruction. In that
274 * case, we have to save them.
275 *
276 * We used to save them to the stack, but now store them back directly
277 * into the regflags.cznv of the traditional emulation. Thus some odd
278 * names.
279 *
280 * So flags can be in either of two places (used to be three; boy were
281 * things complicated back then!); And either place can contain either
282 * valid flags or invalid trash (and on the stack, there was also the
283 * option of "nothing at all", now gone). A couple of variables keep
284 * track of the respective states.
285 *
286 * To make things worse, we might or might not be interested in the flags.
287 * by default, we are, but a call to dont_care_flags can change that
288 * until the next call to live_flags. If we are not, pretty much whatever
289 * is in the register and/or the native flags is seen as valid.
290 */
291
292 static __inline__ blockinfo* get_blockinfo(uae_u32 cl)
293 {
294 return cache_tags[cl+1].bi;
295 }
296
297 static __inline__ blockinfo* get_blockinfo_addr(void* addr)
298 {
299 blockinfo* bi=get_blockinfo(cacheline(addr));
300
301 while (bi) {
302 if (bi->pc_p==addr)
303 return bi;
304 bi=bi->next_same_cl;
305 }
306 return NULL;
307 }
308
309
310 /*******************************************************************
311 * All sorts of list related functions for all of the lists *
312 *******************************************************************/
313
314 static __inline__ void remove_from_cl_list(blockinfo* bi)
315 {
316 uae_u32 cl=cacheline(bi->pc_p);
317
318 if (bi->prev_same_cl_p)
319 *(bi->prev_same_cl_p)=bi->next_same_cl;
320 if (bi->next_same_cl)
321 bi->next_same_cl->prev_same_cl_p=bi->prev_same_cl_p;
322 if (cache_tags[cl+1].bi)
323 cache_tags[cl].handler=cache_tags[cl+1].bi->handler_to_use;
324 else
325 cache_tags[cl].handler=(cpuop_func *)popall_execute_normal;
326 }
327
328 static __inline__ void remove_from_list(blockinfo* bi)
329 {
330 if (bi->prev_p)
331 *(bi->prev_p)=bi->next;
332 if (bi->next)
333 bi->next->prev_p=bi->prev_p;
334 }
335
336 static __inline__ void remove_from_lists(blockinfo* bi)
337 {
338 remove_from_list(bi);
339 remove_from_cl_list(bi);
340 }
341
342 static __inline__ void add_to_cl_list(blockinfo* bi)
343 {
344 uae_u32 cl=cacheline(bi->pc_p);
345
346 if (cache_tags[cl+1].bi)
347 cache_tags[cl+1].bi->prev_same_cl_p=&(bi->next_same_cl);
348 bi->next_same_cl=cache_tags[cl+1].bi;
349
350 cache_tags[cl+1].bi=bi;
351 bi->prev_same_cl_p=&(cache_tags[cl+1].bi);
352
353 cache_tags[cl].handler=bi->handler_to_use;
354 }
355
356 static __inline__ void raise_in_cl_list(blockinfo* bi)
357 {
358 remove_from_cl_list(bi);
359 add_to_cl_list(bi);
360 }
361
362 static __inline__ void add_to_active(blockinfo* bi)
363 {
364 if (active)
365 active->prev_p=&(bi->next);
366 bi->next=active;
367
368 active=bi;
369 bi->prev_p=&active;
370 }
371
372 static __inline__ void add_to_dormant(blockinfo* bi)
373 {
374 if (dormant)
375 dormant->prev_p=&(bi->next);
376 bi->next=dormant;
377
378 dormant=bi;
379 bi->prev_p=&dormant;
380 }
381
382 static __inline__ void remove_dep(dependency* d)
383 {
384 if (d->prev_p)
385 *(d->prev_p)=d->next;
386 if (d->next)
387 d->next->prev_p=d->prev_p;
388 d->prev_p=NULL;
389 d->next=NULL;
390 }
391
392 /* This block's code is about to be thrown away, so it no longer
393 depends on anything else */
394 static __inline__ void remove_deps(blockinfo* bi)
395 {
396 remove_dep(&(bi->dep[0]));
397 remove_dep(&(bi->dep[1]));
398 }
399
400 static __inline__ void adjust_jmpdep(dependency* d, cpuop_func* a)
401 {
402 *(d->jmp_off)=(uintptr)a-((uintptr)d->jmp_off+4);
403 }
404
405 /********************************************************************
406 * Soft flush handling support functions *
407 ********************************************************************/
408
409 static __inline__ void set_dhtu(blockinfo* bi, cpuop_func* dh)
410 {
411 //write_log("bi is %p\n",bi);
412 if (dh!=bi->direct_handler_to_use) {
413 dependency* x=bi->deplist;
414 //write_log("bi->deplist=%p\n",bi->deplist);
415 while (x) {
416 //write_log("x is %p\n",x);
417 //write_log("x->next is %p\n",x->next);
418 //write_log("x->prev_p is %p\n",x->prev_p);
419
420 if (x->jmp_off) {
421 adjust_jmpdep(x,dh);
422 }
423 x=x->next;
424 }
425 bi->direct_handler_to_use=dh;
426 }
427 }
428
429 static __inline__ void invalidate_block(blockinfo* bi)
430 {
431 int i;
432
433 bi->optlevel=0;
434 bi->count=optcount[0]-1;
435 bi->handler=NULL;
436 bi->handler_to_use=(cpuop_func *)popall_execute_normal;
437 bi->direct_handler=NULL;
438 set_dhtu(bi,bi->direct_pen);
439 bi->needed_flags=0xff;
440 bi->status=BI_INVALID;
441 for (i=0;i<2;i++) {
442 bi->dep[i].jmp_off=NULL;
443 bi->dep[i].target=NULL;
444 }
445 remove_deps(bi);
446 }
447
448 static __inline__ void create_jmpdep(blockinfo* bi, int i, uae_u32* jmpaddr, uae_u32 target)
449 {
450 blockinfo* tbi=get_blockinfo_addr((void*)target);
451
452 Dif(!tbi) {
453 write_log("Could not create jmpdep!\n");
454 abort();
455 }
456 bi->dep[i].jmp_off=jmpaddr;
457 bi->dep[i].source=bi;
458 bi->dep[i].target=tbi;
459 bi->dep[i].next=tbi->deplist;
460 if (bi->dep[i].next)
461 bi->dep[i].next->prev_p=&(bi->dep[i].next);
462 bi->dep[i].prev_p=&(tbi->deplist);
463 tbi->deplist=&(bi->dep[i]);
464 }
465
466 static __inline__ void block_need_recompile(blockinfo * bi)
467 {
468 uae_u32 cl = cacheline(bi->pc_p);
469
470 set_dhtu(bi, bi->direct_pen);
471 bi->direct_handler = bi->direct_pen;
472
473 bi->handler_to_use = (cpuop_func *)popall_execute_normal;
474 bi->handler = (cpuop_func *)popall_execute_normal;
475 if (bi == cache_tags[cl + 1].bi)
476 cache_tags[cl].handler = (cpuop_func *)popall_execute_normal;
477 bi->status = BI_NEED_RECOMP;
478 }
479
480 static __inline__ void mark_callers_recompile(blockinfo * bi)
481 {
482 dependency *x = bi->deplist;
483
484 while (x) {
485 dependency *next = x->next; /* This disappears when we mark for
486 * recompilation and thus remove the
487 * blocks from the lists */
488 if (x->jmp_off) {
489 blockinfo *cbi = x->source;
490
491 Dif(cbi->status == BI_INVALID) {
492 // write_log("invalid block in dependency list\n"); // FIXME?
493 // abort();
494 }
495 if (cbi->status == BI_ACTIVE || cbi->status == BI_NEED_CHECK) {
496 block_need_recompile(cbi);
497 mark_callers_recompile(cbi);
498 }
499 else if (cbi->status == BI_COMPILING) {
500 redo_current_block = 1;
501 }
502 else if (cbi->status == BI_NEED_RECOMP) {
503 /* nothing */
504 }
505 else {
506 //write_log("Status %d in mark_callers\n",cbi->status); // FIXME?
507 }
508 }
509 x = next;
510 }
511 }
512
513 static __inline__ blockinfo* get_blockinfo_addr_new(void* addr, int setstate)
514 {
515 blockinfo* bi=get_blockinfo_addr(addr);
516 int i;
517
518 if (!bi) {
519 for (i=0;i<MAX_HOLD_BI && !bi;i++) {
520 if (hold_bi[i]) {
521 uae_u32 cl=cacheline(addr);
522
523 bi=hold_bi[i];
524 hold_bi[i]=NULL;
525 bi->pc_p=(uae_u8 *)addr;
526 invalidate_block(bi);
527 add_to_active(bi);
528 add_to_cl_list(bi);
529
530 }
531 }
532 }
533 if (!bi) {
534 write_log("Looking for blockinfo, can't find free one\n");
535 abort();
536 }
537 return bi;
538 }
539
540 static void prepare_block(blockinfo* bi);
541
542 /* Managment of blockinfos.
543
544 A blockinfo struct is allocated whenever a new block has to be
545 compiled. If the list of free blockinfos is empty, we allocate a new
546 pool of blockinfos and link the newly created blockinfos altogether
547 into the list of free blockinfos. Otherwise, we simply pop a structure
548 off the free list.
549
550 Blockinfo are lazily deallocated, i.e. chained altogether in the
551 list of free blockinfos whenvever a translation cache flush (hard or
552 soft) request occurs.
553 */
554
555 template< class T >
556 class LazyBlockAllocator
557 {
558 enum {
559 kPoolSize = 1 + 4096 / sizeof(T)
560 };
561 struct Pool {
562 T chunk[kPoolSize];
563 Pool * next;
564 };
565 Pool * mPools;
566 T * mChunks;
567 public:
568 LazyBlockAllocator() : mPools(0), mChunks(0) { }
569 ~LazyBlockAllocator();
570 T * acquire();
571 void release(T * const);
572 };
573
574 template< class T >
575 LazyBlockAllocator<T>::~LazyBlockAllocator()
576 {
577 Pool * currentPool = mPools;
578 while (currentPool) {
579 Pool * deadPool = currentPool;
580 currentPool = currentPool->next;
581 free(deadPool);
582 }
583 }
584
585 template< class T >
586 T * LazyBlockAllocator<T>::acquire()
587 {
588 if (!mChunks) {
589 // There is no chunk left, allocate a new pool and link the
590 // chunks into the free list
591 Pool * newPool = (Pool *)malloc(sizeof(Pool));
592 for (T * chunk = &newPool->chunk[0]; chunk < &newPool->chunk[kPoolSize]; chunk++) {
593 chunk->next = mChunks;
594 mChunks = chunk;
595 }
596 newPool->next = mPools;
597 mPools = newPool;
598 }
599 T * chunk = mChunks;
600 mChunks = chunk->next;
601 return chunk;
602 }
603
604 template< class T >
605 void LazyBlockAllocator<T>::release(T * const chunk)
606 {
607 chunk->next = mChunks;
608 mChunks = chunk;
609 }
610
611 template< class T >
612 class HardBlockAllocator
613 {
614 public:
615 T * acquire() {
616 T * data = (T *)current_compile_p;
617 current_compile_p += sizeof(T);
618 return data;
619 }
620
621 void release(T * const chunk) {
622 // Deallocated on invalidation
623 }
624 };
625
626 #if USE_SEPARATE_BIA
627 static LazyBlockAllocator<blockinfo> BlockInfoAllocator;
628 static LazyBlockAllocator<checksum_info> ChecksumInfoAllocator;
629 #else
630 static HardBlockAllocator<blockinfo> BlockInfoAllocator;
631 static HardBlockAllocator<checksum_info> ChecksumInfoAllocator;
632 #endif
633
634 static __inline__ checksum_info *alloc_checksum_info(void)
635 {
636 checksum_info *csi = ChecksumInfoAllocator.acquire();
637 csi->next = NULL;
638 return csi;
639 }
640
641 static __inline__ void free_checksum_info(checksum_info *csi)
642 {
643 csi->next = NULL;
644 ChecksumInfoAllocator.release(csi);
645 }
646
647 static __inline__ void free_checksum_info_chain(checksum_info *csi)
648 {
649 while (csi != NULL) {
650 checksum_info *csi2 = csi->next;
651 free_checksum_info(csi);
652 csi = csi2;
653 }
654 }
655
656 static __inline__ blockinfo *alloc_blockinfo(void)
657 {
658 blockinfo *bi = BlockInfoAllocator.acquire();
659 #if USE_CHECKSUM_INFO
660 bi->csi = NULL;
661 #endif
662 return bi;
663 }
664
665 static __inline__ void free_blockinfo(blockinfo *bi)
666 {
667 #if USE_CHECKSUM_INFO
668 free_checksum_info_chain(bi->csi);
669 bi->csi = NULL;
670 #endif
671 BlockInfoAllocator.release(bi);
672 }
673
674 static __inline__ void alloc_blockinfos(void)
675 {
676 int i;
677 blockinfo* bi;
678
679 for (i=0;i<MAX_HOLD_BI;i++) {
680 if (hold_bi[i])
681 return;
682 bi=hold_bi[i]=alloc_blockinfo();
683 prepare_block(bi);
684 }
685 }
686
687 /********************************************************************
688 * Functions to emit data into memory, and other general support *
689 ********************************************************************/
690
691 static uae_u8* target;
692
693 static void emit_init(void)
694 {
695 }
696
697 static __inline__ void emit_byte(uae_u8 x)
698 {
699 *target++=x;
700 }
701
702 static __inline__ void emit_word(uae_u16 x)
703 {
704 *((uae_u16*)target)=x;
705 target+=2;
706 }
707
708 static __inline__ void emit_long(uae_u32 x)
709 {
710 *((uae_u32*)target)=x;
711 target+=4;
712 }
713
714 static __inline__ void emit_block(const uae_u8 *block, uae_u32 blocklen)
715 {
716 memcpy((uae_u8 *)target,block,blocklen);
717 target+=blocklen;
718 }
719
720 static __inline__ uae_u32 reverse32(uae_u32 v)
721 {
722 #if 1
723 // gb-- We have specialized byteswapping functions, just use them
724 return do_byteswap_32(v);
725 #else
726 return ((v>>24)&0xff) | ((v>>8)&0xff00) | ((v<<8)&0xff0000) | ((v<<24)&0xff000000);
727 #endif
728 }
729
730 /********************************************************************
731 * Getting the information about the target CPU *
732 ********************************************************************/
733
734 #include "codegen_x86.cpp"
735
736 void set_target(uae_u8* t)
737 {
738 target=t;
739 }
740
741 static __inline__ uae_u8* get_target_noopt(void)
742 {
743 return target;
744 }
745
746 __inline__ uae_u8* get_target(void)
747 {
748 return get_target_noopt();
749 }
750
751
752 /********************************************************************
753 * Flags status handling. EMIT TIME! *
754 ********************************************************************/
755
756 static void bt_l_ri_noclobber(R4 r, IMM i);
757
758 static void make_flags_live_internal(void)
759 {
760 if (live.flags_in_flags==VALID)
761 return;
762 Dif (live.flags_on_stack==TRASH) {
763 write_log("Want flags, got something on stack, but it is TRASH\n");
764 abort();
765 }
766 if (live.flags_on_stack==VALID) {
767 int tmp;
768 tmp=readreg_specific(FLAGTMP,4,FLAG_NREG2);
769 raw_reg_to_flags(tmp);
770 unlock2(tmp);
771
772 live.flags_in_flags=VALID;
773 return;
774 }
775 write_log("Huh? live.flags_in_flags=%d, live.flags_on_stack=%d, but need to make live\n",
776 live.flags_in_flags,live.flags_on_stack);
777 abort();
778 }
779
780 static void flags_to_stack(void)
781 {
782 if (live.flags_on_stack==VALID)
783 return;
784 if (!live.flags_are_important) {
785 live.flags_on_stack=VALID;
786 return;
787 }
788 Dif (live.flags_in_flags!=VALID)
789 abort();
790 else {
791 int tmp;
792 tmp=writereg_specific(FLAGTMP,4,FLAG_NREG1);
793 raw_flags_to_reg(tmp);
794 unlock2(tmp);
795 }
796 live.flags_on_stack=VALID;
797 }
798
799 static __inline__ void clobber_flags(void)
800 {
801 if (live.flags_in_flags==VALID && live.flags_on_stack!=VALID)
802 flags_to_stack();
803 live.flags_in_flags=TRASH;
804 }
805
806 /* Prepare for leaving the compiled stuff */
807 static __inline__ void flush_flags(void)
808 {
809 flags_to_stack();
810 return;
811 }
812
813 int touchcnt;
814
815 /********************************************************************
816 * Partial register flushing for optimized calls *
817 ********************************************************************/
818
819 struct regusage {
820 uae_u16 rmask;
821 uae_u16 wmask;
822 };
823
824 static inline void ru_set(uae_u16 *mask, int reg)
825 {
826 #if USE_OPTIMIZED_CALLS
827 *mask |= 1 << reg;
828 #endif
829 }
830
831 static inline bool ru_get(const uae_u16 *mask, int reg)
832 {
833 #if USE_OPTIMIZED_CALLS
834 return (*mask & (1 << reg));
835 #else
836 /* Default: instruction reads & write to register */
837 return true;
838 #endif
839 }
840
841 static inline void ru_set_read(regusage *ru, int reg)
842 {
843 ru_set(&ru->rmask, reg);
844 }
845
846 static inline void ru_set_write(regusage *ru, int reg)
847 {
848 ru_set(&ru->wmask, reg);
849 }
850
851 static inline bool ru_read_p(const regusage *ru, int reg)
852 {
853 return ru_get(&ru->rmask, reg);
854 }
855
856 static inline bool ru_write_p(const regusage *ru, int reg)
857 {
858 return ru_get(&ru->wmask, reg);
859 }
860
861 static void ru_fill_ea(regusage *ru, int reg, amodes mode,
862 wordsizes size, int write_mode)
863 {
864 switch (mode) {
865 case Areg:
866 reg += 8;
867 /* fall through */
868 case Dreg:
869 ru_set(write_mode ? &ru->wmask : &ru->rmask, reg);
870 break;
871 case Ad16:
872 /* skip displacment */
873 m68k_pc_offset += 2;
874 case Aind:
875 case Aipi:
876 case Apdi:
877 ru_set_read(ru, reg+8);
878 break;
879 case Ad8r:
880 ru_set_read(ru, reg+8);
881 /* fall through */
882 case PC8r: {
883 uae_u16 dp = comp_get_iword((m68k_pc_offset+=2)-2);
884 reg = (dp >> 12) & 15;
885 ru_set_read(ru, reg);
886 if (dp & 0x100)
887 m68k_pc_offset += (((dp & 0x30) >> 3) & 7) + ((dp & 3) * 2);
888 break;
889 }
890 case PC16:
891 case absw:
892 case imm0:
893 case imm1:
894 m68k_pc_offset += 2;
895 break;
896 case absl:
897 case imm2:
898 m68k_pc_offset += 4;
899 break;
900 case immi:
901 m68k_pc_offset += (size == sz_long) ? 4 : 2;
902 break;
903 }
904 }
905
906 /* TODO: split into a static initialization part and a dynamic one
907 (instructions depending on extension words) */
908 static void ru_fill(regusage *ru, uae_u32 opcode)
909 {
910 m68k_pc_offset += 2;
911
912 /* Default: no register is used or written to */
913 ru->rmask = 0;
914 ru->wmask = 0;
915
916 uae_u32 real_opcode = cft_map(opcode);
917 struct instr *dp = &table68k[real_opcode];
918
919 bool rw_dest = true;
920 bool handled = false;
921
922 /* Handle some instructions specifically */
923 uae_u16 reg, ext;
924 switch (dp->mnemo) {
925 case i_BFCHG:
926 case i_BFCLR:
927 case i_BFEXTS:
928 case i_BFEXTU:
929 case i_BFFFO:
930 case i_BFINS:
931 case i_BFSET:
932 case i_BFTST:
933 ext = comp_get_iword((m68k_pc_offset+=2)-2);
934 if (ext & 0x800) ru_set_read(ru, (ext >> 6) & 7);
935 if (ext & 0x020) ru_set_read(ru, ext & 7);
936 ru_fill_ea(ru, dp->dreg, (amodes)dp->dmode, (wordsizes)dp->size, 1);
937 if (dp->dmode == Dreg)
938 ru_set_read(ru, dp->dreg);
939 switch (dp->mnemo) {
940 case i_BFEXTS:
941 case i_BFEXTU:
942 case i_BFFFO:
943 ru_set_write(ru, (ext >> 12) & 7);
944 break;
945 case i_BFINS:
946 ru_set_read(ru, (ext >> 12) & 7);
947 /* fall through */
948 case i_BFCHG:
949 case i_BFCLR:
950 case i_BSET:
951 if (dp->dmode == Dreg)
952 ru_set_write(ru, dp->dreg);
953 break;
954 }
955 handled = true;
956 rw_dest = false;
957 break;
958
959 case i_BTST:
960 rw_dest = false;
961 break;
962
963 case i_CAS:
964 {
965 ext = comp_get_iword((m68k_pc_offset+=2)-2);
966 int Du = ext & 7;
967 ru_set_read(ru, Du);
968 int Dc = (ext >> 6) & 7;
969 ru_set_read(ru, Dc);
970 ru_set_write(ru, Dc);
971 break;
972 }
973 case i_CAS2:
974 {
975 int Dc1, Dc2, Du1, Du2, Rn1, Rn2;
976 ext = comp_get_iword((m68k_pc_offset+=2)-2);
977 Rn1 = (ext >> 12) & 15;
978 Du1 = (ext >> 6) & 7;
979 Dc1 = ext & 7;
980 ru_set_read(ru, Rn1);
981 ru_set_read(ru, Du1);
982 ru_set_read(ru, Dc1);
983 ru_set_write(ru, Dc1);
984 ext = comp_get_iword((m68k_pc_offset+=2)-2);
985 Rn2 = (ext >> 12) & 15;
986 Du2 = (ext >> 6) & 7;
987 Dc2 = ext & 7;
988 ru_set_read(ru, Rn2);
989 ru_set_read(ru, Du2);
990 ru_set_write(ru, Dc2);
991 break;
992 }
993 case i_DIVL: case i_MULL:
994 m68k_pc_offset += 2;
995 break;
996 case i_LEA:
997 case i_MOVE: case i_MOVEA: case i_MOVE16:
998 rw_dest = false;
999 break;
1000 case i_PACK: case i_UNPK:
1001 rw_dest = false;
1002 m68k_pc_offset += 2;
1003 break;
1004 case i_TRAPcc:
1005 m68k_pc_offset += (dp->size == sz_long) ? 4 : 2;
1006 break;
1007 case i_RTR:
1008 /* do nothing, just for coverage debugging */
1009 break;
1010 /* TODO: handle EXG instruction */
1011 }
1012
1013 /* Handle A-Traps better */
1014 if ((real_opcode & 0xf000) == 0xa000) {
1015 handled = true;
1016 }
1017
1018 /* Handle EmulOps better */
1019 if ((real_opcode & 0xff00) == 0x7100) {
1020 handled = true;
1021 ru->rmask = 0xffff;
1022 ru->wmask = 0;
1023 }
1024
1025 if (dp->suse && !handled)
1026 ru_fill_ea(ru, dp->sreg, (amodes)dp->smode, (wordsizes)dp->size, 0);
1027
1028 if (dp->duse && !handled)
1029 ru_fill_ea(ru, dp->dreg, (amodes)dp->dmode, (wordsizes)dp->size, 1);
1030
1031 if (rw_dest)
1032 ru->rmask |= ru->wmask;
1033
1034 handled = handled || dp->suse || dp->duse;
1035
1036 /* Mark all registers as used/written if the instruction may trap */
1037 if (may_trap(opcode)) {
1038 handled = true;
1039 ru->rmask = 0xffff;
1040 ru->wmask = 0xffff;
1041 }
1042
1043 if (!handled) {
1044 write_log("ru_fill: %04x = { %04x, %04x }\n",
1045 real_opcode, ru->rmask, ru->wmask);
1046 abort();
1047 }
1048 }
1049
1050 /********************************************************************
1051 * register allocation per block logging *
1052 ********************************************************************/
1053
1054 static uae_s8 vstate[VREGS];
1055 static uae_s8 vwritten[VREGS];
1056 static uae_s8 nstate[N_REGS];
1057
1058 #define L_UNKNOWN -127
1059 #define L_UNAVAIL -1
1060 #define L_NEEDED -2
1061 #define L_UNNEEDED -3
1062
1063 static __inline__ void big_to_small_state(bigstate * b, smallstate * s)
1064 {
1065 int i;
1066
1067 for (i = 0; i < VREGS; i++)
1068 s->virt[i] = vstate[i];
1069 for (i = 0; i < N_REGS; i++)
1070 s->nat[i] = nstate[i];
1071 }
1072
1073 static __inline__ int callers_need_recompile(bigstate * b, smallstate * s)
1074 {
1075 int i;
1076 int reverse = 0;
1077
1078 for (i = 0; i < VREGS; i++) {
1079 if (vstate[i] != L_UNNEEDED && s->virt[i] == L_UNNEEDED)
1080 return 1;
1081 if (vstate[i] == L_UNNEEDED && s->virt[i] != L_UNNEEDED)
1082 reverse++;
1083 }
1084 for (i = 0; i < N_REGS; i++) {
1085 if (nstate[i] >= 0 && nstate[i] != s->nat[i])
1086 return 1;
1087 if (nstate[i] < 0 && s->nat[i] >= 0)
1088 reverse++;
1089 }
1090 if (reverse >= 2 && USE_MATCH)
1091 return 1; /* In this case, it might be worth recompiling the
1092 * callers */
1093 return 0;
1094 }
1095
1096 static __inline__ void log_startblock(void)
1097 {
1098 int i;
1099
1100 for (i = 0; i < VREGS; i++) {
1101 vstate[i] = L_UNKNOWN;
1102 vwritten[i] = 0;
1103 }
1104 for (i = 0; i < N_REGS; i++)
1105 nstate[i] = L_UNKNOWN;
1106 }
1107
1108 /* Using an n-reg for a temp variable */
1109 static __inline__ void log_isused(int n)
1110 {
1111 if (nstate[n] == L_UNKNOWN)
1112 nstate[n] = L_UNAVAIL;
1113 }
1114
1115 static __inline__ void log_visused(int r)
1116 {
1117 if (vstate[r] == L_UNKNOWN)
1118 vstate[r] = L_NEEDED;
1119 }
1120
1121 static __inline__ void do_load_reg(int n, int r)
1122 {
1123 if (r == FLAGTMP)
1124 raw_load_flagreg(n, r);
1125 else if (r == FLAGX)
1126 raw_load_flagx(n, r);
1127 else
1128 raw_mov_l_rm(n, (uae_u32) live.state[r].mem);
1129 }
1130
1131 static __inline__ void check_load_reg(int n, int r)
1132 {
1133 raw_mov_l_rm(n, (uae_u32) live.state[r].mem);
1134 }
1135
1136 static __inline__ void log_vwrite(int r)
1137 {
1138 vwritten[r] = 1;
1139 }
1140
1141 /* Using an n-reg to hold a v-reg */
1142 static __inline__ void log_isreg(int n, int r)
1143 {
1144 static int count = 0;
1145
1146 if (nstate[n] == L_UNKNOWN && r < 16 && !vwritten[r] && USE_MATCH)
1147 nstate[n] = r;
1148 else {
1149 do_load_reg(n, r);
1150 if (nstate[n] == L_UNKNOWN)
1151 nstate[n] = L_UNAVAIL;
1152 }
1153 if (vstate[r] == L_UNKNOWN)
1154 vstate[r] = L_NEEDED;
1155 }
1156
1157 static __inline__ void log_clobberreg(int r)
1158 {
1159 if (vstate[r] == L_UNKNOWN)
1160 vstate[r] = L_UNNEEDED;
1161 }
1162
1163 /* This ends all possibility of clever register allocation */
1164
1165 static __inline__ void log_flush(void)
1166 {
1167 int i;
1168
1169 for (i = 0; i < VREGS; i++)
1170 if (vstate[i] == L_UNKNOWN)
1171 vstate[i] = L_NEEDED;
1172 for (i = 0; i < N_REGS; i++)
1173 if (nstate[i] == L_UNKNOWN)
1174 nstate[i] = L_UNAVAIL;
1175 }
1176
1177 static __inline__ void log_dump(void)
1178 {
1179 int i;
1180
1181 return;
1182
1183 write_log("----------------------\n");
1184 for (i = 0; i < N_REGS; i++) {
1185 switch (nstate[i]) {
1186 case L_UNKNOWN:
1187 write_log("Nat %d : UNKNOWN\n", i);
1188 break;
1189 case L_UNAVAIL:
1190 write_log("Nat %d : UNAVAIL\n", i);
1191 break;
1192 default:
1193 write_log("Nat %d : %d\n", i, nstate[i]);
1194 break;
1195 }
1196 }
1197 for (i = 0; i < VREGS; i++) {
1198 if (vstate[i] == L_UNNEEDED)
1199 write_log("Virt %d: UNNEEDED\n", i);
1200 }
1201 }
1202
1203 /********************************************************************
1204 * register status handling. EMIT TIME! *
1205 ********************************************************************/
1206
1207 static __inline__ void set_status(int r, int status)
1208 {
1209 if (status == ISCONST)
1210 log_clobberreg(r);
1211 live.state[r].status=status;
1212 }
1213
1214 static __inline__ int isinreg(int r)
1215 {
1216 return live.state[r].status==CLEAN || live.state[r].status==DIRTY;
1217 }
1218
1219 static __inline__ void adjust_nreg(int r, uae_u32 val)
1220 {
1221 if (!val)
1222 return;
1223 raw_lea_l_brr(r,r,val);
1224 }
1225
1226 static void tomem(int r)
1227 {
1228 int rr=live.state[r].realreg;
1229
1230 if (isinreg(r)) {
1231 if (live.state[r].val && live.nat[rr].nholds==1
1232 && !live.nat[rr].locked) {
1233 // write_log("RemovingA offset %x from reg %d (%d) at %p\n",
1234 // live.state[r].val,r,rr,target);
1235 adjust_nreg(rr,live.state[r].val);
1236 live.state[r].val=0;
1237 live.state[r].dirtysize=4;
1238 set_status(r,DIRTY);
1239 }
1240 }
1241
1242 if (live.state[r].status==DIRTY) {
1243 switch (live.state[r].dirtysize) {
1244 case 1: raw_mov_b_mr((uae_u32)live.state[r].mem,rr); break;
1245 case 2: raw_mov_w_mr((uae_u32)live.state[r].mem,rr); break;
1246 case 4: raw_mov_l_mr((uae_u32)live.state[r].mem,rr); break;
1247 default: abort();
1248 }
1249 log_vwrite(r);
1250 set_status(r,CLEAN);
1251 live.state[r].dirtysize=0;
1252 }
1253 }
1254
1255 static __inline__ int isconst(int r)
1256 {
1257 return live.state[r].status==ISCONST;
1258 }
1259
1260 int is_const(int r)
1261 {
1262 return isconst(r);
1263 }
1264
1265 static __inline__ void writeback_const(int r)
1266 {
1267 if (!isconst(r))
1268 return;
1269 Dif (live.state[r].needflush==NF_HANDLER) {
1270 write_log("Trying to write back constant NF_HANDLER!\n");
1271 abort();
1272 }
1273
1274 raw_mov_l_mi((uae_u32)live.state[r].mem,live.state[r].val);
1275 log_vwrite(r);
1276 live.state[r].val=0;
1277 set_status(r,INMEM);
1278 }
1279
1280 static __inline__ void tomem_c(int r)
1281 {
1282 if (isconst(r)) {
1283 writeback_const(r);
1284 }
1285 else
1286 tomem(r);
1287 }
1288
1289 static void evict(int r)
1290 {
1291 int rr;
1292
1293 if (!isinreg(r))
1294 return;
1295 tomem(r);
1296 rr=live.state[r].realreg;
1297
1298 Dif (live.nat[rr].locked &&
1299 live.nat[rr].nholds==1) {
1300 write_log("register %d in nreg %d is locked!\n",r,live.state[r].realreg);
1301 abort();
1302 }
1303
1304 live.nat[rr].nholds--;
1305 if (live.nat[rr].nholds!=live.state[r].realind) { /* Was not last */
1306 int topreg=live.nat[rr].holds[live.nat[rr].nholds];
1307 int thisind=live.state[r].realind;
1308
1309 live.nat[rr].holds[thisind]=topreg;
1310 live.state[topreg].realind=thisind;
1311 }
1312 live.state[r].realreg=-1;
1313 set_status(r,INMEM);
1314 }
1315
1316 static __inline__ void free_nreg(int r)
1317 {
1318 int i=live.nat[r].nholds;
1319
1320 while (i) {
1321 int vr;
1322
1323 --i;
1324 vr=live.nat[r].holds[i];
1325 evict(vr);
1326 }
1327 Dif (live.nat[r].nholds!=0) {
1328 write_log("Failed to free nreg %d, nholds is %d\n",r,live.nat[r].nholds);
1329 abort();
1330 }
1331 }
1332
1333 /* Use with care! */
1334 static __inline__ void isclean(int r)
1335 {
1336 if (!isinreg(r))
1337 return;
1338 live.state[r].validsize=4;
1339 live.state[r].dirtysize=0;
1340 live.state[r].val=0;
1341 set_status(r,CLEAN);
1342 }
1343
1344 static __inline__ void disassociate(int r)
1345 {
1346 isclean(r);
1347 evict(r);
1348 }
1349
1350 static __inline__ void set_const(int r, uae_u32 val)
1351 {
1352 disassociate(r);
1353 live.state[r].val=val;
1354 set_status(r,ISCONST);
1355 }
1356
1357 static __inline__ uae_u32 get_offset(int r)
1358 {
1359 return live.state[r].val;
1360 }
1361
1362 static int alloc_reg_hinted(int r, int size, int willclobber, int hint)
1363 {
1364 int bestreg;
1365 uae_s32 when;
1366 int i;
1367 uae_s32 badness=0; /* to shut up gcc */
1368 bestreg=-1;
1369 when=2000000000;
1370
1371 for (i=N_REGS;i--;) {
1372 badness=live.nat[i].touched;
1373 if (live.nat[i].nholds==0)
1374 badness=0;
1375 if (i==hint)
1376 badness-=200000000;
1377 if (!live.nat[i].locked && badness<when) {
1378 if ((size==1 && live.nat[i].canbyte) ||
1379 (size==2 && live.nat[i].canword) ||
1380 (size==4)) {
1381 bestreg=i;
1382 when=badness;
1383 if (live.nat[i].nholds==0 && hint<0)
1384 break;
1385 if (i==hint)
1386 break;
1387 }
1388 }
1389 }
1390 Dif (bestreg==-1)
1391 abort();
1392
1393 if (live.nat[bestreg].nholds>0) {
1394 free_nreg(bestreg);
1395 }
1396 if (isinreg(r)) {
1397 int rr=live.state[r].realreg;
1398 /* This will happen if we read a partially dirty register at a
1399 bigger size */
1400 Dif (willclobber || live.state[r].validsize>=size)
1401 abort();
1402 Dif (live.nat[rr].nholds!=1)
1403 abort();
1404 if (size==4 && live.state[r].validsize==2) {
1405 log_isused(bestreg);
1406 log_visused(r);
1407 raw_mov_l_rm(bestreg,(uae_u32)live.state[r].mem);
1408 raw_bswap_32(bestreg);
1409 raw_zero_extend_16_rr(rr,rr);
1410 raw_zero_extend_16_rr(bestreg,bestreg);
1411 raw_bswap_32(bestreg);
1412 raw_lea_l_brr_indexed(rr,rr,bestreg,1,0);
1413 live.state[r].validsize=4;
1414 live.nat[rr].touched=touchcnt++;
1415 return rr;
1416 }
1417 if (live.state[r].validsize==1) {
1418 /* Nothing yet */
1419 }
1420 evict(r);
1421 }
1422
1423 if (!willclobber) {
1424 if (live.state[r].status!=UNDEF) {
1425 if (isconst(r)) {
1426 raw_mov_l_ri(bestreg,live.state[r].val);
1427 live.state[r].val=0;
1428 live.state[r].dirtysize=4;
1429 set_status(r,DIRTY);
1430 log_isused(bestreg);
1431 }
1432 else {
1433 log_isreg(bestreg, r); /* This will also load it! */
1434 live.state[r].dirtysize=0;
1435 set_status(r,CLEAN);
1436 }
1437 }
1438 else {
1439 live.state[r].val=0;
1440 live.state[r].dirtysize=0;
1441 set_status(r,CLEAN);
1442 log_isused(bestreg);
1443 }
1444 live.state[r].validsize=4;
1445 }
1446 else { /* this is the easiest way, but not optimal. FIXME! */
1447 /* Now it's trickier, but hopefully still OK */
1448 if (!isconst(r) || size==4) {
1449 live.state[r].validsize=size;
1450 live.state[r].dirtysize=size;
1451 live.state[r].val=0;
1452 set_status(r,DIRTY);
1453 if (size == 4) {
1454 log_clobberreg(r);
1455 log_isused(bestreg);
1456 }
1457 else {
1458 log_visused(r);
1459 log_isused(bestreg);
1460 }
1461 }
1462 else {
1463 if (live.state[r].status!=UNDEF)
1464 raw_mov_l_ri(bestreg,live.state[r].val);
1465 live.state[r].val=0;
1466 live.state[r].validsize=4;
1467 live.state[r].dirtysize=4;
1468 set_status(r,DIRTY);
1469 log_isused(bestreg);
1470 }
1471 }
1472 live.state[r].realreg=bestreg;
1473 live.state[r].realind=live.nat[bestreg].nholds;
1474 live.nat[bestreg].touched=touchcnt++;
1475 live.nat[bestreg].holds[live.nat[bestreg].nholds]=r;
1476 live.nat[bestreg].nholds++;
1477
1478 return bestreg;
1479 }
1480
1481 static int alloc_reg(int r, int size, int willclobber)
1482 {
1483 return alloc_reg_hinted(r,size,willclobber,-1);
1484 }
1485
1486 static void unlock2(int r)
1487 {
1488 Dif (!live.nat[r].locked)
1489 abort();
1490 live.nat[r].locked--;
1491 }
1492
1493 static void setlock(int r)
1494 {
1495 live.nat[r].locked++;
1496 }
1497
1498
1499 static void mov_nregs(int d, int s)
1500 {
1501 int ns=live.nat[s].nholds;
1502 int nd=live.nat[d].nholds;
1503 int i;
1504
1505 if (s==d)
1506 return;
1507
1508 if (nd>0)
1509 free_nreg(d);
1510
1511 log_isused(d);
1512 raw_mov_l_rr(d,s);
1513
1514 for (i=0;i<live.nat[s].nholds;i++) {
1515 int vs=live.nat[s].holds[i];
1516
1517 live.state[vs].realreg=d;
1518 live.state[vs].realind=i;
1519 live.nat[d].holds[i]=vs;
1520 }
1521 live.nat[d].nholds=live.nat[s].nholds;
1522
1523 live.nat[s].nholds=0;
1524 }
1525
1526
1527 static __inline__ void make_exclusive(int r, int size, int spec)
1528 {
1529 int clobber;
1530 reg_status oldstate;
1531 int rr=live.state[r].realreg;
1532 int nr;
1533 int nind;
1534 int ndirt=0;
1535 int i;
1536
1537 if (!isinreg(r))
1538 return;
1539 if (live.nat[rr].nholds==1)
1540 return;
1541 for (i=0;i<live.nat[rr].nholds;i++) {
1542 int vr=live.nat[rr].holds[i];
1543 if (vr!=r &&
1544 (live.state[vr].status==DIRTY || live.state[vr].val))
1545 ndirt++;
1546 }
1547 if (!ndirt && size<live.state[r].validsize && !live.nat[rr].locked) {
1548 /* Everything else is clean, so let's keep this register */
1549 for (i=0;i<live.nat[rr].nholds;i++) {
1550 int vr=live.nat[rr].holds[i];
1551 if (vr!=r) {
1552 evict(vr);
1553 i--; /* Try that index again! */
1554 }
1555 }
1556 Dif (live.nat[rr].nholds!=1) {
1557 write_log("natreg %d holds %d vregs, %d not exclusive\n",
1558 rr,live.nat[rr].nholds,r);
1559 abort();
1560 }
1561 return;
1562 }
1563
1564 /* We have to split the register */
1565 oldstate=live.state[r];
1566
1567 setlock(rr); /* Make sure this doesn't go away */
1568 /* Forget about r being in the register rr */
1569 disassociate(r);
1570 /* Get a new register, that we will clobber completely */
1571 if (oldstate.status==DIRTY) {
1572 /* If dirtysize is <4, we need a register that can handle the
1573 eventual smaller memory store! Thanks to Quake68k for exposing
1574 this detail ;-) */
1575 nr=alloc_reg_hinted(r,oldstate.dirtysize,1,spec);
1576 }
1577 else {
1578 nr=alloc_reg_hinted(r,4,1,spec);
1579 }
1580 nind=live.state[r].realind;
1581 live.state[r]=oldstate; /* Keep all the old state info */
1582 live.state[r].realreg=nr;
1583 live.state[r].realind=nind;
1584
1585 if (size<live.state[r].validsize) {
1586 if (live.state[r].val) {
1587 /* Might as well compensate for the offset now */
1588 raw_lea_l_brr(nr,rr,oldstate.val);
1589 live.state[r].val=0;
1590 live.state[r].dirtysize=4;
1591 set_status(r,DIRTY);
1592 }
1593 else
1594 raw_mov_l_rr(nr,rr); /* Make another copy */
1595 }
1596 unlock2(rr);
1597 }
1598
1599 static __inline__ void add_offset(int r, uae_u32 off)
1600 {
1601 live.state[r].val+=off;
1602 }
1603
1604 static __inline__ void remove_offset(int r, int spec)
1605 {
1606 reg_status oldstate;
1607 int rr;
1608
1609 if (isconst(r))
1610 return;
1611 if (live.state[r].val==0)
1612 return;
1613 if (isinreg(r) && live.state[r].validsize<4)
1614 evict(r);
1615
1616 if (!isinreg(r))
1617 alloc_reg_hinted(r,4,0,spec);
1618
1619 Dif (live.state[r].validsize!=4) {
1620 write_log("Validsize=%d in remove_offset\n",live.state[r].validsize);
1621 abort();
1622 }
1623 make_exclusive(r,0,-1);
1624 /* make_exclusive might have done the job already */
1625 if (live.state[r].val==0)
1626 return;
1627
1628 rr=live.state[r].realreg;
1629
1630 if (live.nat[rr].nholds==1) {
1631 //write_log("RemovingB offset %x from reg %d (%d) at %p\n",
1632 // live.state[r].val,r,rr,target);
1633 adjust_nreg(rr,live.state[r].val);
1634 live.state[r].dirtysize=4;
1635 live.state[r].val=0;
1636 set_status(r,DIRTY);
1637 return;
1638 }
1639 write_log("Failed in remove_offset\n");
1640 abort();
1641 }
1642
1643 static __inline__ void remove_all_offsets(void)
1644 {
1645 int i;
1646
1647 for (i=0;i<VREGS;i++)
1648 remove_offset(i,-1);
1649 }
1650
1651 static __inline__ int readreg_general(int r, int size, int spec, int can_offset)
1652 {
1653 int n;
1654 int answer=-1;
1655
1656 if (live.state[r].status==UNDEF) {
1657 write_log("WARNING: Unexpected read of undefined register %d\n",r);
1658 }
1659 if (!can_offset)
1660 remove_offset(r,spec);
1661
1662 if (isinreg(r) && live.state[r].validsize>=size) {
1663 n=live.state[r].realreg;
1664 switch(size) {
1665 case 1:
1666 if (live.nat[n].canbyte || spec>=0) {
1667 answer=n;
1668 }
1669 break;
1670 case 2:
1671 if (live.nat[n].canword || spec>=0) {
1672 answer=n;
1673 }
1674 break;
1675 case 4:
1676 answer=n;
1677 break;
1678 default: abort();
1679 }
1680 if (answer<0)
1681 evict(r);
1682 }
1683 /* either the value was in memory to start with, or it was evicted and
1684 is in memory now */
1685 if (answer<0) {
1686 answer=alloc_reg_hinted(r,spec>=0?4:size,0,spec);
1687 }
1688
1689 if (spec>=0 && spec!=answer) {
1690 /* Too bad */
1691 mov_nregs(spec,answer);
1692 answer=spec;
1693 }
1694 live.nat[answer].locked++;
1695 live.nat[answer].touched=touchcnt++;
1696 return answer;
1697 }
1698
1699
1700
1701 static int readreg(int r, int size)
1702 {
1703 return readreg_general(r,size,-1,0);
1704 }
1705
1706 static int readreg_specific(int r, int size, int spec)
1707 {
1708 return readreg_general(r,size,spec,0);
1709 }
1710
1711 static int readreg_offset(int r, int size)
1712 {
1713 return readreg_general(r,size,-1,1);
1714 }
1715
1716 /* writereg_general(r, size, spec)
1717 *
1718 * INPUT
1719 * - r : mid-layer register
1720 * - size : requested size (1/2/4)
1721 * - spec : -1 if find or make a register free, otherwise specifies
1722 * the physical register to use in any case
1723 *
1724 * OUTPUT
1725 * - hard (physical, x86 here) register allocated to virtual register r
1726 */
1727 static __inline__ int writereg_general(int r, int size, int spec)
1728 {
1729 int n;
1730 int answer=-1;
1731
1732 if (size<4) {
1733 remove_offset(r,spec);
1734 }
1735
1736 make_exclusive(r,size,spec);
1737 if (isinreg(r)) {
1738 int nvsize=size>live.state[r].validsize?size:live.state[r].validsize;
1739 int ndsize=size>live.state[r].dirtysize?size:live.state[r].dirtysize;
1740 n=live.state[r].realreg;
1741
1742 Dif (live.nat[n].nholds!=1)
1743 abort();
1744 switch(size) {
1745 case 1:
1746 if (live.nat[n].canbyte || spec>=0) {
1747 live.state[r].dirtysize=ndsize;
1748 live.state[r].validsize=nvsize;
1749 answer=n;
1750 }
1751 break;
1752 case 2:
1753 if (live.nat[n].canword || spec>=0) {
1754 live.state[r].dirtysize=ndsize;
1755 live.state[r].validsize=nvsize;
1756 answer=n;
1757 }
1758 break;
1759 case 4:
1760 live.state[r].dirtysize=ndsize;
1761 live.state[r].validsize=nvsize;
1762 answer=n;
1763 break;
1764 default: abort();
1765 }
1766 if (answer<0)
1767 evict(r);
1768 }
1769 /* either the value was in memory to start with, or it was evicted and
1770 is in memory now */
1771 if (answer<0) {
1772 answer=alloc_reg_hinted(r,size,1,spec);
1773 }
1774 if (spec>=0 && spec!=answer) {
1775 mov_nregs(spec,answer);
1776 answer=spec;
1777 }
1778 if (live.state[r].status==UNDEF)
1779 live.state[r].validsize=4;
1780 live.state[r].dirtysize=size>live.state[r].dirtysize?size:live.state[r].dirtysize;
1781 live.state[r].validsize=size>live.state[r].validsize?size:live.state[r].validsize;
1782
1783 live.nat[answer].locked++;
1784 live.nat[answer].touched=touchcnt++;
1785 if (size==4) {
1786 live.state[r].val=0;
1787 }
1788 else {
1789 Dif (live.state[r].val) {
1790 write_log("Problem with val\n");
1791 abort();
1792 }
1793 }
1794 set_status(r,DIRTY);
1795 return answer;
1796 }
1797
1798 static int writereg(int r, int size)
1799 {
1800 return writereg_general(r,size,-1);
1801 }
1802
1803 static int writereg_specific(int r, int size, int spec)
1804 {
1805 return writereg_general(r,size,spec);
1806 }
1807
1808 static __inline__ int rmw_general(int r, int wsize, int rsize, int spec)
1809 {
1810 int n;
1811 int answer=-1;
1812
1813 if (live.state[r].status==UNDEF) {
1814 write_log("WARNING: Unexpected read of undefined register %d\n",r);
1815 }
1816 remove_offset(r,spec);
1817 make_exclusive(r,0,spec);
1818
1819 Dif (wsize<rsize) {
1820 write_log("Cannot handle wsize<rsize in rmw_general()\n");
1821 abort();
1822 }
1823 if (isinreg(r) && live.state[r].validsize>=rsize) {
1824 n=live.state[r].realreg;
1825 Dif (live.nat[n].nholds!=1)
1826 abort();
1827
1828 switch(rsize) {
1829 case 1:
1830 if (live.nat[n].canbyte || spec>=0) {
1831 answer=n;
1832 }
1833 break;
1834 case 2:
1835 if (live.nat[n].canword || spec>=0) {
1836 answer=n;
1837 }
1838 break;
1839 case 4:
1840 answer=n;
1841 break;
1842 default: abort();
1843 }
1844 if (answer<0)
1845 evict(r);
1846 }
1847 /* either the value was in memory to start with, or it was evicted and
1848 is in memory now */
1849 if (answer<0) {
1850 answer=alloc_reg_hinted(r,spec>=0?4:rsize,0,spec);
1851 }
1852
1853 if (spec>=0 && spec!=answer) {
1854 /* Too bad */
1855 mov_nregs(spec,answer);
1856 answer=spec;
1857 }
1858 if (wsize>live.state[r].dirtysize)
1859 live.state[r].dirtysize=wsize;
1860 if (wsize>live.state[r].validsize)
1861 live.state[r].validsize=wsize;
1862 set_status(r,DIRTY);
1863
1864 live.nat[answer].locked++;
1865 live.nat[answer].touched=touchcnt++;
1866
1867 Dif (live.state[r].val) {
1868 write_log("Problem with val(rmw)\n");
1869 abort();
1870 }
1871 return answer;
1872 }
1873
1874 static int rmw(int r, int wsize, int rsize)
1875 {
1876 return rmw_general(r,wsize,rsize,-1);
1877 }
1878
1879 static int rmw_specific(int r, int wsize, int rsize, int spec)
1880 {
1881 return rmw_general(r,wsize,rsize,spec);
1882 }
1883
1884
1885 /* needed for restoring the carry flag on non-P6 cores */
1886 static void bt_l_ri_noclobber(R4 r, IMM i)
1887 {
1888 int size=4;
1889 if (i<16)
1890 size=2;
1891 r=readreg(r,size);
1892 raw_bt_l_ri(r,i);
1893 unlock2(r);
1894 }
1895
1896 /********************************************************************
1897 * FPU register status handling. EMIT TIME! *
1898 ********************************************************************/
1899
1900 static void f_tomem(int r)
1901 {
1902 if (live.fate[r].status==DIRTY) {
1903 #if USE_LONG_DOUBLE
1904 raw_fmov_ext_mr((uae_u32)live.fate[r].mem,live.fate[r].realreg);
1905 #else
1906 raw_fmov_mr((uae_u32)live.fate[r].mem,live.fate[r].realreg);
1907 #endif
1908 live.fate[r].status=CLEAN;
1909 }
1910 }
1911
1912 static void f_tomem_drop(int r)
1913 {
1914 if (live.fate[r].status==DIRTY) {
1915 #if USE_LONG_DOUBLE
1916 raw_fmov_ext_mr_drop((uae_u32)live.fate[r].mem,live.fate[r].realreg);
1917 #else
1918 raw_fmov_mr_drop((uae_u32)live.fate[r].mem,live.fate[r].realreg);
1919 #endif
1920 live.fate[r].status=INMEM;
1921 }
1922 }
1923
1924
1925 static __inline__ int f_isinreg(int r)
1926 {
1927 return live.fate[r].status==CLEAN || live.fate[r].status==DIRTY;
1928 }
1929
1930 static void f_evict(int r)
1931 {
1932 int rr;
1933
1934 if (!f_isinreg(r))
1935 return;
1936 rr=live.fate[r].realreg;
1937 if (live.fat[rr].nholds==1)
1938 f_tomem_drop(r);
1939 else
1940 f_tomem(r);
1941
1942 Dif (live.fat[rr].locked &&
1943 live.fat[rr].nholds==1) {
1944 write_log("FPU register %d in nreg %d is locked!\n",r,live.fate[r].realreg);
1945 abort();
1946 }
1947
1948 live.fat[rr].nholds--;
1949 if (live.fat[rr].nholds!=live.fate[r].realind) { /* Was not last */
1950 int topreg=live.fat[rr].holds[live.fat[rr].nholds];
1951 int thisind=live.fate[r].realind;
1952 live.fat[rr].holds[thisind]=topreg;
1953 live.fate[topreg].realind=thisind;
1954 }
1955 live.fate[r].status=INMEM;
1956 live.fate[r].realreg=-1;
1957 }
1958
1959 static __inline__ void f_free_nreg(int r)
1960 {
1961 int i=live.fat[r].nholds;
1962
1963 while (i) {
1964 int vr;
1965
1966 --i;
1967 vr=live.fat[r].holds[i];
1968 f_evict(vr);
1969 }
1970 Dif (live.fat[r].nholds!=0) {
1971 write_log("Failed to free nreg %d, nholds is %d\n",r,live.fat[r].nholds);
1972 abort();
1973 }
1974 }
1975
1976
1977 /* Use with care! */
1978 static __inline__ void f_isclean(int r)
1979 {
1980 if (!f_isinreg(r))
1981 return;
1982 live.fate[r].status=CLEAN;
1983 }
1984
1985 static __inline__ void f_disassociate(int r)
1986 {
1987 f_isclean(r);
1988 f_evict(r);
1989 }
1990
1991
1992
1993 static int f_alloc_reg(int r, int willclobber)
1994 {
1995 int bestreg;
1996 uae_s32 when;
1997 int i;
1998 uae_s32 badness;
1999 bestreg=-1;
2000 when=2000000000;
2001 for (i=N_FREGS;i--;) {
2002 badness=live.fat[i].touched;
2003 if (live.fat[i].nholds==0)
2004 badness=0;
2005
2006 if (!live.fat[i].locked && badness<when) {
2007 bestreg=i;
2008 when=badness;
2009 if (live.fat[i].nholds==0)
2010 break;
2011 }
2012 }
2013 Dif (bestreg==-1)
2014 abort();
2015
2016 if (live.fat[bestreg].nholds>0) {
2017 f_free_nreg(bestreg);
2018 }
2019 if (f_isinreg(r)) {
2020 f_evict(r);
2021 }
2022
2023 if (!willclobber) {
2024 if (live.fate[r].status!=UNDEF) {
2025 #if USE_LONG_DOUBLE
2026 raw_fmov_ext_rm(bestreg,(uae_u32)live.fate[r].mem);
2027 #else
2028 raw_fmov_rm(bestreg,(uae_u32)live.fate[r].mem);
2029 #endif
2030 }
2031 live.fate[r].status=CLEAN;
2032 }
2033 else {
2034 live.fate[r].status=DIRTY;
2035 }
2036 live.fate[r].realreg=bestreg;
2037 live.fate[r].realind=live.fat[bestreg].nholds;
2038 live.fat[bestreg].touched=touchcnt++;
2039 live.fat[bestreg].holds[live.fat[bestreg].nholds]=r;
2040 live.fat[bestreg].nholds++;
2041
2042 return bestreg;
2043 }
2044
2045 static void f_unlock(int r)
2046 {
2047 Dif (!live.fat[r].locked)
2048 abort();
2049 live.fat[r].locked--;
2050 }
2051
2052 static void f_setlock(int r)
2053 {
2054 live.fat[r].locked++;
2055 }
2056
2057 static __inline__ int f_readreg(int r)
2058 {
2059 int n;
2060 int answer=-1;
2061
2062 if (f_isinreg(r)) {
2063 n=live.fate[r].realreg;
2064 answer=n;
2065 }
2066 /* either the value was in memory to start with, or it was evicted and
2067 is in memory now */
2068 if (answer<0)
2069 answer=f_alloc_reg(r,0);
2070
2071 live.fat[answer].locked++;
2072 live.fat[answer].touched=touchcnt++;
2073 return answer;
2074 }
2075
2076 static __inline__ void f_make_exclusive(int r, int clobber)
2077 {
2078 freg_status oldstate;
2079 int rr=live.fate[r].realreg;
2080 int nr;
2081 int nind;
2082 int ndirt=0;
2083 int i;
2084
2085 if (!f_isinreg(r))
2086 return;
2087 if (live.fat[rr].nholds==1)
2088 return;
2089 for (i=0;i<live.fat[rr].nholds;i++) {
2090 int vr=live.fat[rr].holds[i];
2091 if (vr!=r && live.fate[vr].status==DIRTY)
2092 ndirt++;
2093 }
2094 if (!ndirt && !live.fat[rr].locked) {
2095 /* Everything else is clean, so let's keep this register */
2096 for (i=0;i<live.fat[rr].nholds;i++) {
2097 int vr=live.fat[rr].holds[i];
2098 if (vr!=r) {
2099 f_evict(vr);
2100 i--; /* Try that index again! */
2101 }
2102 }
2103 Dif (live.fat[rr].nholds!=1) {
2104 write_log("realreg %d holds %d (",rr,live.fat[rr].nholds);
2105 for (i=0;i<live.fat[rr].nholds;i++) {
2106 write_log(" %d(%d,%d)",live.fat[rr].holds[i],
2107 live.fate[live.fat[rr].holds[i]].realreg,
2108 live.fate[live.fat[rr].holds[i]].realind);
2109 }
2110 write_log("\n");
2111 abort();
2112 }
2113 return;
2114 }
2115
2116 /* We have to split the register */
2117 oldstate=live.fate[r];
2118
2119 f_setlock(rr); /* Make sure this doesn't go away */
2120 /* Forget about r being in the register rr */
2121 f_disassociate(r);
2122 /* Get a new register, that we will clobber completely */
2123 nr=f_alloc_reg(r,1);
2124 nind=live.fate[r].realind;
2125 if (!clobber)
2126 raw_fmov_rr(nr,rr); /* Make another copy */
2127 live.fate[r]=oldstate; /* Keep all the old state info */
2128 live.fate[r].realreg=nr;
2129 live.fate[r].realind=nind;
2130 f_unlock(rr);
2131 }
2132
2133
2134 static __inline__ int f_writereg(int r)
2135 {
2136 int n;
2137 int answer=-1;
2138
2139 f_make_exclusive(r,1);
2140 if (f_isinreg(r)) {
2141 n=live.fate[r].realreg;
2142 answer=n;
2143 }
2144 if (answer<0) {
2145 answer=f_alloc_reg(r,1);
2146 }
2147 live.fate[r].status=DIRTY;
2148 live.fat[answer].locked++;
2149 live.fat[answer].touched=touchcnt++;
2150 return answer;
2151 }
2152
2153 static int f_rmw(int r)
2154 {
2155 int n;
2156
2157 f_make_exclusive(r,0);
2158 if (f_isinreg(r)) {
2159 n=live.fate[r].realreg;
2160 }
2161 else
2162 n=f_alloc_reg(r,0);
2163 live.fate[r].status=DIRTY;
2164 live.fat[n].locked++;
2165 live.fat[n].touched=touchcnt++;
2166 return n;
2167 }
2168
2169 static void fflags_into_flags_internal(uae_u32 tmp)
2170 {
2171 int r;
2172
2173 clobber_flags();
2174 r=f_readreg(FP_RESULT);
2175 if (FFLAG_NREG_CLOBBER_CONDITION) {
2176 int tmp2=tmp;
2177 tmp=writereg_specific(tmp,4,FFLAG_NREG);
2178 raw_fflags_into_flags(r);
2179 unlock2(tmp);
2180 forget_about(tmp2);
2181 }
2182 else
2183 raw_fflags_into_flags(r);
2184 f_unlock(r);
2185 live_flags();
2186 }
2187
2188
2189
2190
2191 /********************************************************************
2192 * CPU functions exposed to gencomp. Both CREATE and EMIT time *
2193 ********************************************************************/
2194
2195 /*
2196 * RULES FOR HANDLING REGISTERS:
2197 *
2198 * * In the function headers, order the parameters
2199 * - 1st registers written to
2200 * - 2nd read/modify/write registers
2201 * - 3rd registers read from
2202 * * Before calling raw_*, you must call readreg, writereg or rmw for
2203 * each register
2204 * * The order for this is
2205 * - 1st call remove_offset for all registers written to with size<4
2206 * - 2nd call readreg for all registers read without offset
2207 * - 3rd call rmw for all rmw registers
2208 * - 4th call readreg_offset for all registers that can handle offsets
2209 * - 5th call get_offset for all the registers from the previous step
2210 * - 6th call writereg for all written-to registers
2211 * - 7th call raw_*
2212 * - 8th unlock2 all registers that were locked
2213 */
2214
2215 MIDFUNC(0,live_flags,(void))
2216 {
2217 live.flags_on_stack=TRASH;
2218 live.flags_in_flags=VALID;
2219 live.flags_are_important=1;
2220 }
2221 MENDFUNC(0,live_flags,(void))
2222
2223 MIDFUNC(0,dont_care_flags,(void))
2224 {
2225 live.flags_are_important=0;
2226 }
2227 MENDFUNC(0,dont_care_flags,(void))
2228
2229
2230 MIDFUNC(0,duplicate_carry,(void))
2231 {
2232 evict(FLAGX);
2233 make_flags_live_internal();
2234 COMPCALL(setcc_m)((uae_u32)live.state[FLAGX].mem,2);
2235 log_vwrite(FLAGX);
2236 }
2237 MENDFUNC(0,duplicate_carry,(void))
2238
2239 MIDFUNC(0,restore_carry,(void))
2240 {
2241 if (!have_rat_stall) { /* Not a P6 core, i.e. no partial stalls */
2242 bt_l_ri_noclobber(FLAGX,0);
2243 }
2244 else { /* Avoid the stall the above creates.
2245 This is slow on non-P6, though.
2246 */
2247 COMPCALL(rol_b_ri(FLAGX,8));
2248 isclean(FLAGX);
2249 }
2250 }
2251 MENDFUNC(0,restore_carry,(void))
2252
2253 MIDFUNC(0,start_needflags,(void))
2254 {
2255 needflags=1;
2256 }
2257 MENDFUNC(0,start_needflags,(void))
2258
2259 MIDFUNC(0,end_needflags,(void))
2260 {
2261 needflags=0;
2262 }
2263 MENDFUNC(0,end_needflags,(void))
2264
2265 MIDFUNC(0,make_flags_live,(void))
2266 {
2267 make_flags_live_internal();
2268 }
2269 MENDFUNC(0,make_flags_live,(void))
2270
2271 MIDFUNC(1,fflags_into_flags,(W2 tmp))
2272 {
2273 clobber_flags();
2274 fflags_into_flags_internal(tmp);
2275 }
2276 MENDFUNC(1,fflags_into_flags,(W2 tmp))
2277
2278
2279 MIDFUNC(2,bt_l_ri,(R4 r, IMM i)) /* This is defined as only affecting C */
2280 {
2281 int size=4;
2282 if (i<16)
2283 size=2;
2284 CLOBBER_BT;
2285 r=readreg(r,size);
2286 raw_bt_l_ri(r,i);
2287 unlock2(r);
2288 }
2289 MENDFUNC(2,bt_l_ri,(R4 r, IMM i)) /* This is defined as only affecting C */
2290
2291 MIDFUNC(2,bt_l_rr,(R4 r, R4 b)) /* This is defined as only affecting C */
2292 {
2293 CLOBBER_BT;
2294 r=readreg(r,4);
2295 b=readreg(b,4);
2296 raw_bt_l_rr(r,b);
2297 unlock2(r);
2298 unlock2(b);
2299 }
2300 MENDFUNC(2,bt_l_rr,(R4 r, R4 b)) /* This is defined as only affecting C */
2301
2302 MIDFUNC(2,btc_l_ri,(RW4 r, IMM i))
2303 {
2304 int size=4;
2305 if (i<16)
2306 size=2;
2307 CLOBBER_BT;
2308 r=rmw(r,size,size);
2309 raw_btc_l_ri(r,i);
2310 unlock2(r);
2311 }
2312 MENDFUNC(2,btc_l_ri,(RW4 r, IMM i))
2313
2314 MIDFUNC(2,btc_l_rr,(RW4 r, R4 b))
2315 {
2316 CLOBBER_BT;
2317 b=readreg(b,4);
2318 r=rmw(r,4,4);
2319 raw_btc_l_rr(r,b);
2320 unlock2(r);
2321 unlock2(b);
2322 }
2323 MENDFUNC(2,btc_l_rr,(RW4 r, R4 b))
2324
2325
2326 MIDFUNC(2,btr_l_ri,(RW4 r, IMM i))
2327 {
2328 int size=4;
2329 if (i<16)
2330 size=2;
2331 CLOBBER_BT;
2332 r=rmw(r,size,size);
2333 raw_btr_l_ri(r,i);
2334 unlock2(r);
2335 }
2336 MENDFUNC(2,btr_l_ri,(RW4 r, IMM i))
2337
2338 MIDFUNC(2,btr_l_rr,(RW4 r, R4 b))
2339 {
2340 CLOBBER_BT;
2341 b=readreg(b,4);
2342 r=rmw(r,4,4);
2343 raw_btr_l_rr(r,b);
2344 unlock2(r);
2345 unlock2(b);
2346 }
2347 MENDFUNC(2,btr_l_rr,(RW4 r, R4 b))
2348
2349
2350 MIDFUNC(2,bts_l_ri,(RW4 r, IMM i))
2351 {
2352 int size=4;
2353 if (i<16)
2354 size=2;
2355 CLOBBER_BT;
2356 r=rmw(r,size,size);
2357 raw_bts_l_ri(r,i);
2358 unlock2(r);
2359 }
2360 MENDFUNC(2,bts_l_ri,(RW4 r, IMM i))
2361
2362 MIDFUNC(2,bts_l_rr,(RW4 r, R4 b))
2363 {
2364 CLOBBER_BT;
2365 b=readreg(b,4);
2366 r=rmw(r,4,4);
2367 raw_bts_l_rr(r,b);
2368 unlock2(r);
2369 unlock2(b);
2370 }
2371 MENDFUNC(2,bts_l_rr,(RW4 r, R4 b))
2372
2373 MIDFUNC(2,mov_l_rm,(W4 d, IMM s))
2374 {
2375 CLOBBER_MOV;
2376 d=writereg(d,4);
2377 raw_mov_l_rm(d,s);
2378 unlock2(d);
2379 }
2380 MENDFUNC(2,mov_l_rm,(W4 d, IMM s))
2381
2382
2383 MIDFUNC(1,call_r,(R4 r)) /* Clobbering is implicit */
2384 {
2385 r=readreg(r,4);
2386 raw_call_r(r);
2387 unlock2(r);
2388 }
2389 MENDFUNC(1,call_r,(R4 r)) /* Clobbering is implicit */
2390
2391 MIDFUNC(2,sub_l_mi,(IMM d, IMM s))
2392 {
2393 CLOBBER_SUB;
2394 raw_sub_l_mi(d,s) ;
2395 }
2396 MENDFUNC(2,sub_l_mi,(IMM d, IMM s))
2397
2398 MIDFUNC(2,mov_l_mi,(IMM d, IMM s))
2399 {
2400 CLOBBER_MOV;
2401 raw_mov_l_mi(d,s) ;
2402 }
2403 MENDFUNC(2,mov_l_mi,(IMM d, IMM s))
2404
2405 MIDFUNC(2,mov_w_mi,(IMM d, IMM s))
2406 {
2407 CLOBBER_MOV;
2408 raw_mov_w_mi(d,s) ;
2409 }
2410 MENDFUNC(2,mov_w_mi,(IMM d, IMM s))
2411
2412 MIDFUNC(2,mov_b_mi,(IMM d, IMM s))
2413 {
2414 CLOBBER_MOV;
2415 raw_mov_b_mi(d,s) ;
2416 }
2417 MENDFUNC(2,mov_b_mi,(IMM d, IMM s))
2418
2419 MIDFUNC(2,rol_b_ri,(RW1 r, IMM i))
2420 {
2421 if (!i && !needflags)
2422 return;
2423 CLOBBER_ROL;
2424 r=rmw(r,1,1);
2425 raw_rol_b_ri(r,i);
2426 unlock2(r);
2427 }
2428 MENDFUNC(2,rol_b_ri,(RW1 r, IMM i))
2429
2430 MIDFUNC(2,rol_w_ri,(RW2 r, IMM i))
2431 {
2432 if (!i && !needflags)
2433 return;
2434 CLOBBER_ROL;
2435 r=rmw(r,2,2);
2436 raw_rol_w_ri(r,i);
2437 unlock2(r);
2438 }
2439 MENDFUNC(2,rol_w_ri,(RW2 r, IMM i))
2440
2441 MIDFUNC(2,rol_l_ri,(RW4 r, IMM i))
2442 {
2443 if (!i && !needflags)
2444 return;
2445 CLOBBER_ROL;
2446 r=rmw(r,4,4);
2447 raw_rol_l_ri(r,i);
2448 unlock2(r);
2449 }
2450 MENDFUNC(2,rol_l_ri,(RW4 r, IMM i))
2451
2452 MIDFUNC(2,rol_l_rr,(RW4 d, R1 r))
2453 {
2454 if (isconst(r)) {
2455 COMPCALL(rol_l_ri)(d,(uae_u8)live.state[r].val);
2456 return;
2457 }
2458 CLOBBER_ROL;
2459 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2460 d=rmw(d,4,4);
2461 Dif (r!=1) {
2462 write_log("Illegal register %d in raw_rol_b\n",r);
2463 abort();
2464 }
2465 raw_rol_l_rr(d,r) ;
2466 unlock2(r);
2467 unlock2(d);
2468 }
2469 MENDFUNC(2,rol_l_rr,(RW4 d, R1 r))
2470
2471 MIDFUNC(2,rol_w_rr,(RW2 d, R1 r))
2472 { /* Can only do this with r==1, i.e. cl */
2473
2474 if (isconst(r)) {
2475 COMPCALL(rol_w_ri)(d,(uae_u8)live.state[r].val);
2476 return;
2477 }
2478 CLOBBER_ROL;
2479 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2480 d=rmw(d,2,2);
2481 Dif (r!=1) {
2482 write_log("Illegal register %d in raw_rol_b\n",r);
2483 abort();
2484 }
2485 raw_rol_w_rr(d,r) ;
2486 unlock2(r);
2487 unlock2(d);
2488 }
2489 MENDFUNC(2,rol_w_rr,(RW2 d, R1 r))
2490
2491 MIDFUNC(2,rol_b_rr,(RW1 d, R1 r))
2492 { /* Can only do this with r==1, i.e. cl */
2493
2494 if (isconst(r)) {
2495 COMPCALL(rol_b_ri)(d,(uae_u8)live.state[r].val);
2496 return;
2497 }
2498
2499 CLOBBER_ROL;
2500 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2501 d=rmw(d,1,1);
2502 Dif (r!=1) {
2503 write_log("Illegal register %d in raw_rol_b\n",r);
2504 abort();
2505 }
2506 raw_rol_b_rr(d,r) ;
2507 unlock2(r);
2508 unlock2(d);
2509 }
2510 MENDFUNC(2,rol_b_rr,(RW1 d, R1 r))
2511
2512
2513 MIDFUNC(2,shll_l_rr,(RW4 d, R1 r))
2514 {
2515 if (isconst(r)) {
2516 COMPCALL(shll_l_ri)(d,(uae_u8)live.state[r].val);
2517 return;
2518 }
2519 CLOBBER_SHLL;
2520 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2521 d=rmw(d,4,4);
2522 Dif (r!=1) {
2523 write_log("Illegal register %d in raw_rol_b\n",r);
2524 abort();
2525 }
2526 raw_shll_l_rr(d,r) ;
2527 unlock2(r);
2528 unlock2(d);
2529 }
2530 MENDFUNC(2,shll_l_rr,(RW4 d, R1 r))
2531
2532 MIDFUNC(2,shll_w_rr,(RW2 d, R1 r))
2533 { /* Can only do this with r==1, i.e. cl */
2534
2535 if (isconst(r)) {
2536 COMPCALL(shll_w_ri)(d,(uae_u8)live.state[r].val);
2537 return;
2538 }
2539 CLOBBER_SHLL;
2540 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2541 d=rmw(d,2,2);
2542 Dif (r!=1) {
2543 write_log("Illegal register %d in raw_shll_b\n",r);
2544 abort();
2545 }
2546 raw_shll_w_rr(d,r) ;
2547 unlock2(r);
2548 unlock2(d);
2549 }
2550 MENDFUNC(2,shll_w_rr,(RW2 d, R1 r))
2551
2552 MIDFUNC(2,shll_b_rr,(RW1 d, R1 r))
2553 { /* Can only do this with r==1, i.e. cl */
2554
2555 if (isconst(r)) {
2556 COMPCALL(shll_b_ri)(d,(uae_u8)live.state[r].val);
2557 return;
2558 }
2559
2560 CLOBBER_SHLL;
2561 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2562 d=rmw(d,1,1);
2563 Dif (r!=1) {
2564 write_log("Illegal register %d in raw_shll_b\n",r);
2565 abort();
2566 }
2567 raw_shll_b_rr(d,r) ;
2568 unlock2(r);
2569 unlock2(d);
2570 }
2571 MENDFUNC(2,shll_b_rr,(RW1 d, R1 r))
2572
2573
2574 MIDFUNC(2,ror_b_ri,(R1 r, IMM i))
2575 {
2576 if (!i && !needflags)
2577 return;
2578 CLOBBER_ROR;
2579 r=rmw(r,1,1);
2580 raw_ror_b_ri(r,i);
2581 unlock2(r);
2582 }
2583 MENDFUNC(2,ror_b_ri,(R1 r, IMM i))
2584
2585 MIDFUNC(2,ror_w_ri,(R2 r, IMM i))
2586 {
2587 if (!i && !needflags)
2588 return;
2589 CLOBBER_ROR;
2590 r=rmw(r,2,2);
2591 raw_ror_w_ri(r,i);
2592 unlock2(r);
2593 }
2594 MENDFUNC(2,ror_w_ri,(R2 r, IMM i))
2595
2596 MIDFUNC(2,ror_l_ri,(R4 r, IMM i))
2597 {
2598 if (!i && !needflags)
2599 return;
2600 CLOBBER_ROR;
2601 r=rmw(r,4,4);
2602 raw_ror_l_ri(r,i);
2603 unlock2(r);
2604 }
2605 MENDFUNC(2,ror_l_ri,(R4 r, IMM i))
2606
2607 MIDFUNC(2,ror_l_rr,(R4 d, R1 r))
2608 {
2609 if (isconst(r)) {
2610 COMPCALL(ror_l_ri)(d,(uae_u8)live.state[r].val);
2611 return;
2612 }
2613 CLOBBER_ROR;
2614 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2615 d=rmw(d,4,4);
2616 raw_ror_l_rr(d,r) ;
2617 unlock2(r);
2618 unlock2(d);
2619 }
2620 MENDFUNC(2,ror_l_rr,(R4 d, R1 r))
2621
2622 MIDFUNC(2,ror_w_rr,(R2 d, R1 r))
2623 {
2624 if (isconst(r)) {
2625 COMPCALL(ror_w_ri)(d,(uae_u8)live.state[r].val);
2626 return;
2627 }
2628 CLOBBER_ROR;
2629 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2630 d=rmw(d,2,2);
2631 raw_ror_w_rr(d,r) ;
2632 unlock2(r);
2633 unlock2(d);
2634 }
2635 MENDFUNC(2,ror_w_rr,(R2 d, R1 r))
2636
2637 MIDFUNC(2,ror_b_rr,(R1 d, R1 r))
2638 {
2639 if (isconst(r)) {
2640 COMPCALL(ror_b_ri)(d,(uae_u8)live.state[r].val);
2641 return;
2642 }
2643
2644 CLOBBER_ROR;
2645 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2646 d=rmw(d,1,1);
2647 raw_ror_b_rr(d,r) ;
2648 unlock2(r);
2649 unlock2(d);
2650 }
2651 MENDFUNC(2,ror_b_rr,(R1 d, R1 r))
2652
2653 MIDFUNC(2,shrl_l_rr,(RW4 d, R1 r))
2654 {
2655 if (isconst(r)) {
2656 COMPCALL(shrl_l_ri)(d,(uae_u8)live.state[r].val);
2657 return;
2658 }
2659 CLOBBER_SHRL;
2660 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2661 d=rmw(d,4,4);
2662 Dif (r!=1) {
2663 write_log("Illegal register %d in raw_rol_b\n",r);
2664 abort();
2665 }
2666 raw_shrl_l_rr(d,r) ;
2667 unlock2(r);
2668 unlock2(d);
2669 }
2670 MENDFUNC(2,shrl_l_rr,(RW4 d, R1 r))
2671
2672 MIDFUNC(2,shrl_w_rr,(RW2 d, R1 r))
2673 { /* Can only do this with r==1, i.e. cl */
2674
2675 if (isconst(r)) {
2676 COMPCALL(shrl_w_ri)(d,(uae_u8)live.state[r].val);
2677 return;
2678 }
2679 CLOBBER_SHRL;
2680 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2681 d=rmw(d,2,2);
2682 Dif (r!=1) {
2683 write_log("Illegal register %d in raw_shrl_b\n",r);
2684 abort();
2685 }
2686 raw_shrl_w_rr(d,r) ;
2687 unlock2(r);
2688 unlock2(d);
2689 }
2690 MENDFUNC(2,shrl_w_rr,(RW2 d, R1 r))
2691
2692 MIDFUNC(2,shrl_b_rr,(RW1 d, R1 r))
2693 { /* Can only do this with r==1, i.e. cl */
2694
2695 if (isconst(r)) {
2696 COMPCALL(shrl_b_ri)(d,(uae_u8)live.state[r].val);
2697 return;
2698 }
2699
2700 CLOBBER_SHRL;
2701 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2702 d=rmw(d,1,1);
2703 Dif (r!=1) {
2704 write_log("Illegal register %d in raw_shrl_b\n",r);
2705 abort();
2706 }
2707 raw_shrl_b_rr(d,r) ;
2708 unlock2(r);
2709 unlock2(d);
2710 }
2711 MENDFUNC(2,shrl_b_rr,(RW1 d, R1 r))
2712
2713
2714
2715 MIDFUNC(2,shll_l_ri,(RW4 r, IMM i))
2716 {
2717 if (!i && !needflags)
2718 return;
2719 if (isconst(r) && !needflags) {
2720 live.state[r].val<<=i;
2721 return;
2722 }
2723 CLOBBER_SHLL;
2724 r=rmw(r,4,4);
2725 raw_shll_l_ri(r,i);
2726 unlock2(r);
2727 }
2728 MENDFUNC(2,shll_l_ri,(RW4 r, IMM i))
2729
2730 MIDFUNC(2,shll_w_ri,(RW2 r, IMM i))
2731 {
2732 if (!i && !needflags)
2733 return;
2734 CLOBBER_SHLL;
2735 r=rmw(r,2,2);
2736 raw_shll_w_ri(r,i);
2737 unlock2(r);
2738 }
2739 MENDFUNC(2,shll_w_ri,(RW2 r, IMM i))
2740
2741 MIDFUNC(2,shll_b_ri,(RW1 r, IMM i))
2742 {
2743 if (!i && !needflags)
2744 return;
2745 CLOBBER_SHLL;
2746 r=rmw(r,1,1);
2747 raw_shll_b_ri(r,i);
2748 unlock2(r);
2749 }
2750 MENDFUNC(2,shll_b_ri,(RW1 r, IMM i))
2751
2752 MIDFUNC(2,shrl_l_ri,(RW4 r, IMM i))
2753 {
2754 if (!i && !needflags)
2755 return;
2756 if (isconst(r) && !needflags) {
2757 live.state[r].val>>=i;
2758 return;
2759 }
2760 CLOBBER_SHRL;
2761 r=rmw(r,4,4);
2762 raw_shrl_l_ri(r,i);
2763 unlock2(r);
2764 }
2765 MENDFUNC(2,shrl_l_ri,(RW4 r, IMM i))
2766
2767 MIDFUNC(2,shrl_w_ri,(RW2 r, IMM i))
2768 {
2769 if (!i && !needflags)
2770 return;
2771 CLOBBER_SHRL;
2772 r=rmw(r,2,2);
2773 raw_shrl_w_ri(r,i);
2774 unlock2(r);
2775 }
2776 MENDFUNC(2,shrl_w_ri,(RW2 r, IMM i))
2777
2778 MIDFUNC(2,shrl_b_ri,(RW1 r, IMM i))
2779 {
2780 if (!i && !needflags)
2781 return;
2782 CLOBBER_SHRL;
2783 r=rmw(r,1,1);
2784 raw_shrl_b_ri(r,i);
2785 unlock2(r);
2786 }
2787 MENDFUNC(2,shrl_b_ri,(RW1 r, IMM i))
2788
2789 MIDFUNC(2,shra_l_ri,(RW4 r, IMM i))
2790 {
2791 if (!i && !needflags)
2792 return;
2793 CLOBBER_SHRA;
2794 r=rmw(r,4,4);
2795 raw_shra_l_ri(r,i);
2796 unlock2(r);
2797 }
2798 MENDFUNC(2,shra_l_ri,(RW4 r, IMM i))
2799
2800 MIDFUNC(2,shra_w_ri,(RW2 r, IMM i))
2801 {
2802 if (!i && !needflags)
2803 return;
2804 CLOBBER_SHRA;
2805 r=rmw(r,2,2);
2806 raw_shra_w_ri(r,i);
2807 unlock2(r);
2808 }
2809 MENDFUNC(2,shra_w_ri,(RW2 r, IMM i))
2810
2811 MIDFUNC(2,shra_b_ri,(RW1 r, IMM i))
2812 {
2813 if (!i && !needflags)
2814 return;
2815 CLOBBER_SHRA;
2816 r=rmw(r,1,1);
2817 raw_shra_b_ri(r,i);
2818 unlock2(r);
2819 }
2820 MENDFUNC(2,shra_b_ri,(RW1 r, IMM i))
2821
2822 MIDFUNC(2,shra_l_rr,(RW4 d, R1 r))
2823 {
2824 if (isconst(r)) {
2825 COMPCALL(shra_l_ri)(d,(uae_u8)live.state[r].val);
2826 return;
2827 }
2828 CLOBBER_SHRA;
2829 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2830 d=rmw(d,4,4);
2831 Dif (r!=1) {
2832 write_log("Illegal register %d in raw_rol_b\n",r);
2833 abort();
2834 }
2835 raw_shra_l_rr(d,r) ;
2836 unlock2(r);
2837 unlock2(d);
2838 }
2839 MENDFUNC(2,shra_l_rr,(RW4 d, R1 r))
2840
2841 MIDFUNC(2,shra_w_rr,(RW2 d, R1 r))
2842 { /* Can only do this with r==1, i.e. cl */
2843
2844 if (isconst(r)) {
2845 COMPCALL(shra_w_ri)(d,(uae_u8)live.state[r].val);
2846 return;
2847 }
2848 CLOBBER_SHRA;
2849 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2850 d=rmw(d,2,2);
2851 Dif (r!=1) {
2852 write_log("Illegal register %d in raw_shra_b\n",r);
2853 abort();
2854 }
2855 raw_shra_w_rr(d,r) ;
2856 unlock2(r);
2857 unlock2(d);
2858 }
2859 MENDFUNC(2,shra_w_rr,(RW2 d, R1 r))
2860
2861 MIDFUNC(2,shra_b_rr,(RW1 d, R1 r))
2862 { /* Can only do this with r==1, i.e. cl */
2863
2864 if (isconst(r)) {
2865 COMPCALL(shra_b_ri)(d,(uae_u8)live.state[r].val);
2866 return;
2867 }
2868
2869 CLOBBER_SHRA;
2870 r=readreg_specific(r,1,SHIFTCOUNT_NREG);
2871 d=rmw(d,1,1);
2872 Dif (r!=1) {
2873 write_log("Illegal register %d in raw_shra_b\n",r);
2874 abort();
2875 }
2876 raw_shra_b_rr(d,r) ;
2877 unlock2(r);
2878 unlock2(d);
2879 }
2880 MENDFUNC(2,shra_b_rr,(RW1 d, R1 r))
2881
2882
2883 MIDFUNC(2,setcc,(W1 d, IMM cc))
2884 {
2885 CLOBBER_SETCC;
2886 d=writereg(d,1);
2887 raw_setcc(d,cc);
2888 unlock2(d);
2889 }
2890 MENDFUNC(2,setcc,(W1 d, IMM cc))
2891
2892 MIDFUNC(2,setcc_m,(IMM d, IMM cc))
2893 {
2894 CLOBBER_SETCC;
2895 raw_setcc_m(d,cc);
2896 }
2897 MENDFUNC(2,setcc_m,(IMM d, IMM cc))
2898
2899 MIDFUNC(3,cmov_l_rr,(RW4 d, R4 s, IMM cc))
2900 {
2901 if (d==s)
2902 return;
2903 CLOBBER_CMOV;
2904 s=readreg(s,4);
2905 d=rmw(d,4,4);
2906 raw_cmov_l_rr(d,s,cc);
2907 unlock2(s);
2908 unlock2(d);
2909 }
2910 MENDFUNC(3,cmov_l_rr,(RW4 d, R4 s, IMM cc))
2911
2912 MIDFUNC(3,cmov_l_rm,(RW4 d, IMM s, IMM cc))
2913 {
2914 CLOBBER_CMOV;
2915 d=rmw(d,4,4);
2916 raw_cmov_l_rm(d,s,cc);
2917 unlock2(d);
2918 }
2919 MENDFUNC(3,cmov_l_rm,(RW4 d, IMM s, IMM cc))
2920
2921 MIDFUNC(1,setzflg_l,(RW4 r))
2922 {
2923 if (setzflg_uses_bsf) {
2924 CLOBBER_BSF;
2925 r=rmw(r,4,4);
2926 raw_bsf_l_rr(r,r);
2927 unlock2(r);
2928 }
2929 else {
2930 Dif (live.flags_in_flags!=VALID) {
2931 write_log("setzflg() wanted flags in native flags, they are %d\n",
2932 live.flags_in_flags);
2933 abort();
2934 }
2935 r=readreg(r,4);
2936 int f=writereg(S11,4);
2937 int t=writereg(S12,4);
2938 raw_flags_set_zero(f,r,t);
2939 unlock2(f);
2940 unlock2(r);
2941 unlock2(t);
2942 }
2943 }
2944 MENDFUNC(1,setzflg_l,(RW4 r))
2945
2946 MIDFUNC(2,imul_32_32,(RW4 d, R4 s))
2947 {
2948 CLOBBER_MUL;
2949 s=readreg(s,4);
2950 d=rmw(d,4,4);
2951 raw_imul_32_32(d,s);
2952 unlock2(s);
2953 unlock2(d);
2954 }
2955 MENDFUNC(2,imul_32_32,(RW4 d, R4 s))
2956
2957 MIDFUNC(2,imul_64_32,(RW4 d, RW4 s))
2958 {
2959 CLOBBER_MUL;
2960 s=rmw_specific(s,4,4,MUL_NREG2);
2961 d=rmw_specific(d,4,4,MUL_NREG1);
2962 raw_imul_64_32(d,s);
2963 unlock2(s);
2964 unlock2(d);
2965 }
2966 MENDFUNC(2,imul_64_32,(RW4 d, RW4 s))
2967
2968 MIDFUNC(2,mul_64_32,(RW4 d, RW4 s))
2969 {
2970 CLOBBER_MUL;
2971 s=rmw_specific(s,4,4,MUL_NREG2);
2972 d=rmw_specific(d,4,4,MUL_NREG1);
2973 raw_mul_64_32(d,s);
2974 unlock2(s);
2975 unlock2(d);
2976 }
2977 MENDFUNC(2,mul_64_32,(RW4 d, RW4 s))
2978
2979 MIDFUNC(2,mul_32_32,(RW4 d, R4 s))
2980 {
2981 CLOBBER_MUL;
2982 s=readreg(s,4);
2983 d=rmw(d,4,4);
2984 raw_mul_32_32(d,s);
2985 unlock2(s);
2986 unlock2(d);
2987 }
2988 MENDFUNC(2,mul_32_32,(RW4 d, R4 s))
2989
2990 MIDFUNC(2,sign_extend_16_rr,(W4 d, R2 s))
2991 {
2992 int isrmw;
2993
2994 if (isconst(s)) {
2995 set_const(d,(uae_s32)(uae_s16)live.state[s].val);
2996 return;
2997 }
2998
2999 CLOBBER_SE16;
3000 isrmw=(s==d);
3001 if (!isrmw) {
3002 s=readreg(s,2);
3003 d=writereg(d,4);
3004 }
3005 else { /* If we try to lock this twice, with different sizes, we
3006 are int trouble! */
3007 s=d=rmw(s,4,2);
3008 }
3009 raw_sign_extend_16_rr(d,s);
3010 if (!isrmw) {
3011 unlock2(d);
3012 unlock2(s);
3013 }
3014 else {
3015 unlock2(s);
3016 }
3017 }
3018 MENDFUNC(2,sign_extend_16_rr,(W4 d, R2 s))
3019
3020 MIDFUNC(2,sign_extend_8_rr,(W4 d, R1 s))
3021 {
3022 int isrmw;
3023
3024 if (isconst(s)) {
3025 set_const(d,(uae_s32)(uae_s8)live.state[s].val);
3026 return;
3027 }
3028
3029 isrmw=(s==d);
3030 CLOBBER_SE8;
3031 if (!isrmw) {
3032 s=readreg(s,1);
3033 d=writereg(d,4);
3034 }
3035 else { /* If we try to lock this twice, with different sizes, we
3036 are int trouble! */
3037 s=d=rmw(s,4,1);
3038 }
3039
3040 raw_sign_extend_8_rr(d,s);
3041
3042 if (!isrmw) {
3043 unlock2(d);
3044 unlock2(s);
3045 }
3046 else {
3047 unlock2(s);
3048 }
3049 }
3050 MENDFUNC(2,sign_extend_8_rr,(W4 d, R1 s))
3051
3052
3053 MIDFUNC(2,zero_extend_16_rr,(W4 d, R2 s))
3054 {
3055 int isrmw;
3056
3057 if (isconst(s)) {
3058 set_const(d,(uae_u32)(uae_u16)live.state[s].val);
3059 return;
3060 }
3061
3062 isrmw=(s==d);
3063 CLOBBER_ZE16;
3064 if (!isrmw) {
3065 s=readreg(s,2);
3066 d=writereg(d,4);
3067 }
3068 else { /* If we try to lock this twice, with different sizes, we
3069 are int trouble! */
3070 s=d=rmw(s,4,2);
3071 }
3072 raw_zero_extend_16_rr(d,s);
3073 if (!isrmw) {
3074 unlock2(d);
3075 unlock2(s);
3076 }
3077 else {
3078 unlock2(s);
3079 }
3080 }
3081 MENDFUNC(2,zero_extend_16_rr,(W4 d, R2 s))
3082
3083 MIDFUNC(2,zero_extend_8_rr,(W4 d, R1 s))
3084 {
3085 int isrmw;
3086 if (isconst(s)) {
3087 set_const(d,(uae_u32)(uae_u8)live.state[s].val);
3088 return;
3089 }
3090
3091 isrmw=(s==d);
3092 CLOBBER_ZE8;
3093 if (!isrmw) {
3094 s=readreg(s,1);
3095 d=writereg(d,4);
3096 }
3097 else { /* If we try to lock this twice, with different sizes, we
3098 are int trouble! */
3099 s=d=rmw(s,4,1);
3100 }
3101
3102 raw_zero_extend_8_rr(d,s);
3103
3104 if (!isrmw) {
3105 unlock2(d);
3106 unlock2(s);
3107 }
3108 else {
3109 unlock2(s);
3110 }
3111 }
3112 MENDFUNC(2,zero_extend_8_rr,(W4 d, R1 s))
3113
3114 MIDFUNC(2,mov_b_rr,(W1 d, R1 s))
3115 {
3116 if (d==s)
3117 return;
3118 if (isconst(s)) {
3119 COMPCALL(mov_b_ri)(d,(uae_u8)live.state[s].val);
3120 return;
3121 }
3122
3123 CLOBBER_MOV;
3124 s=readreg(s,1);
3125 d=writereg(d,1);
3126 raw_mov_b_rr(d,s);
3127 unlock2(d);
3128 unlock2(s);
3129 }
3130 MENDFUNC(2,mov_b_rr,(W1 d, R1 s))
3131
3132 MIDFUNC(2,mov_w_rr,(W2 d, R2 s))
3133 {
3134 if (d==s)
3135 return;
3136 if (isconst(s)) {
3137 COMPCALL(mov_w_ri)(d,(uae_u16)live.state[s].val);
3138 return;
3139 }
3140
3141 CLOBBER_MOV;
3142 s=readreg(s,2);
3143 d=writereg(d,2);
3144 raw_mov_w_rr(d,s);
3145 unlock2(d);
3146 unlock2(s);
3147 }
3148 MENDFUNC(2,mov_w_rr,(W2 d, R2 s))
3149
3150
3151 MIDFUNC(4,mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
3152 {
3153 CLOBBER_MOV;
3154 baser=readreg(baser,4);
3155 index=readreg(index,4);
3156 d=writereg(d,4);
3157
3158 raw_mov_l_rrm_indexed(d,baser,index,factor);
3159 unlock2(d);
3160 unlock2(baser);
3161 unlock2(index);
3162 }
3163 MENDFUNC(4,mov_l_rrm_indexed,(W4 d,R4 baser, R4 index, IMM factor))
3164
3165 MIDFUNC(4,mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
3166 {
3167 CLOBBER_MOV;
3168 baser=readreg(baser,4);
3169 index=readreg(index,4);
3170 d=writereg(d,2);
3171
3172 raw_mov_w_rrm_indexed(d,baser,index,factor);
3173 unlock2(d);
3174 unlock2(baser);
3175 unlock2(index);
3176 }
3177 MENDFUNC(4,mov_w_rrm_indexed,(W2 d, R4 baser, R4 index, IMM factor))
3178
3179 MIDFUNC(4,mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
3180 {
3181 CLOBBER_MOV;
3182 baser=readreg(baser,4);
3183 index=readreg(index,4);
3184 d=writereg(d,1);
3185
3186 raw_mov_b_rrm_indexed(d,baser,index,factor);
3187
3188 unlock2(d);
3189 unlock2(baser);
3190 unlock2(index);
3191 }
3192 MENDFUNC(4,mov_b_rrm_indexed,(W1 d, R4 baser, R4 index, IMM factor))
3193
3194
3195 MIDFUNC(4,mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
3196 {
3197 CLOBBER_MOV;
3198 baser=readreg(baser,4);
3199 index=readreg(index,4);
3200 s=readreg(s,4);
3201
3202 Dif (baser==s || index==s)
3203 abort();
3204
3205
3206 raw_mov_l_mrr_indexed(baser,index,factor,s);
3207 unlock2(s);
3208 unlock2(baser);
3209 unlock2(index);
3210 }
3211 MENDFUNC(4,mov_l_mrr_indexed,(R4 baser, R4 index, IMM factor, R4 s))
3212
3213 MIDFUNC(4,mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
3214 {
3215 CLOBBER_MOV;
3216 baser=readreg(baser,4);
3217 index=readreg(index,4);
3218 s=readreg(s,2);
3219
3220 raw_mov_w_mrr_indexed(baser,index,factor,s);
3221 unlock2(s);
3222 unlock2(baser);
3223 unlock2(index);
3224 }
3225 MENDFUNC(4,mov_w_mrr_indexed,(R4 baser, R4 index, IMM factor, R2 s))
3226
3227 MIDFUNC(4,mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
3228 {
3229 CLOBBER_MOV;
3230 s=readreg(s,1);
3231 baser=readreg(baser,4);
3232 index=readreg(index,4);
3233
3234 raw_mov_b_mrr_indexed(baser,index,factor,s);
3235 unlock2(s);
3236 unlock2(baser);
3237 unlock2(index);
3238 }
3239 MENDFUNC(4,mov_b_mrr_indexed,(R4 baser, R4 index, IMM factor, R1 s))
3240
3241
3242 MIDFUNC(5,mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
3243 {
3244 int basereg=baser;
3245 int indexreg=index;
3246
3247 CLOBBER_MOV;
3248 s=readreg(s,4);
3249 baser=readreg_offset(baser,4);
3250 index=readreg_offset(index,4);
3251
3252 base+=get_offset(basereg);
3253 base+=factor*get_offset(indexreg);
3254
3255 raw_mov_l_bmrr_indexed(base,baser,index,factor,s);
3256 unlock2(s);
3257 unlock2(baser);
3258 unlock2(index);
3259 }
3260 MENDFUNC(5,mov_l_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R4 s))
3261
3262 MIDFUNC(5,mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
3263 {
3264 int basereg=baser;
3265 int indexreg=index;
3266
3267 CLOBBER_MOV;
3268 s=readreg(s,2);
3269 baser=readreg_offset(baser,4);
3270 index=readreg_offset(index,4);
3271
3272 base+=get_offset(basereg);
3273 base+=factor*get_offset(indexreg);
3274
3275 raw_mov_w_bmrr_indexed(base,baser,index,factor,s);
3276 unlock2(s);
3277 unlock2(baser);
3278 unlock2(index);
3279 }
3280 MENDFUNC(5,mov_w_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R2 s))
3281
3282 MIDFUNC(5,mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
3283 {
3284 int basereg=baser;
3285 int indexreg=index;
3286
3287 CLOBBER_MOV;
3288 s=readreg(s,1);
3289 baser=readreg_offset(baser,4);
3290 index=readreg_offset(index,4);
3291
3292 base+=get_offset(basereg);
3293 base+=factor*get_offset(indexreg);
3294
3295 raw_mov_b_bmrr_indexed(base,baser,index,factor,s);
3296 unlock2(s);
3297 unlock2(baser);
3298 unlock2(index);
3299 }
3300 MENDFUNC(5,mov_b_bmrr_indexed,(IMM base, R4 baser, R4 index, IMM factor, R1 s))
3301
3302
3303
3304 /* Read a long from base+baser+factor*index */
3305 MIDFUNC(5,mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
3306 {
3307 int basereg=baser;
3308 int indexreg=index;
3309
3310 CLOBBER_MOV;
3311 baser=readreg_offset(baser,4);
3312 index=readreg_offset(index,4);
3313 base+=get_offset(basereg);
3314 base+=factor*get_offset(indexreg);
3315 d=writereg(d,4);
3316 raw_mov_l_brrm_indexed(d,base,baser,index,factor);
3317 unlock2(d);
3318 unlock2(baser);
3319 unlock2(index);
3320 }
3321 MENDFUNC(5,mov_l_brrm_indexed,(W4 d, IMM base, R4 baser, R4 index, IMM factor))
3322
3323
3324 MIDFUNC(5,mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
3325 {
3326 int basereg=baser;
3327 int indexreg=index;
3328
3329 CLOBBER_MOV;
3330 remove_offset(d,-1);
3331 baser=readreg_offset(baser,4);
3332 index=readreg_offset(index,4);
3333 base+=get_offset(basereg);
3334 base+=factor*get_offset(indexreg);
3335 d=writereg(d,2);
3336 raw_mov_w_brrm_indexed(d,base,baser,index,factor);
3337 unlock2(d);
3338 unlock2(baser);
3339 unlock2(index);
3340 }
3341 MENDFUNC(5,mov_w_brrm_indexed,(W2 d, IMM base, R4 baser, R4 index, IMM factor))
3342
3343
3344 MIDFUNC(5,mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
3345 {
3346 int basereg=baser;
3347 int indexreg=index;
3348
3349 CLOBBER_MOV;
3350 remove_offset(d,-1);
3351 baser=readreg_offset(baser,4);
3352 index=readreg_offset(index,4);
3353 base+=get_offset(basereg);
3354 base+=factor*get_offset(indexreg);
3355 d=writereg(d,1);
3356 raw_mov_b_brrm_indexed(d,base,baser,index,factor);
3357 unlock2(d);
3358 unlock2(baser);
3359 unlock2(index);
3360 }
3361 MENDFUNC(5,mov_b_brrm_indexed,(W1 d, IMM base, R4 baser, R4 index, IMM factor))
3362
3363 /* Read a long from base+factor*index */
3364 MIDFUNC(4,mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
3365 {
3366 int indexreg=index;
3367
3368 if (isconst(index)) {
3369 COMPCALL(mov_l_rm)(d,base+factor*live.state[index].val);
3370 return;
3371 }
3372
3373 CLOBBER_MOV;
3374 index=readreg_offset(index,4);
3375 base+=get_offset(indexreg)*factor;
3376 d=writereg(d,4);
3377
3378 raw_mov_l_rm_indexed(d,base,index,factor);
3379 unlock2(index);
3380 unlock2(d);
3381 }
3382 MENDFUNC(4,mov_l_rm_indexed,(W4 d, IMM base, R4 index, IMM factor))
3383
3384
3385 /* read the long at the address contained in s+offset and store in d */
3386 MIDFUNC(3,mov_l_rR,(W4 d, R4 s, IMM offset))
3387 {
3388 if (isconst(s)) {
3389 COMPCALL(mov_l_rm)(d,live.state[s].val+offset);
3390 return;
3391 }
3392 CLOBBER_MOV;
3393 s=readreg(s,4);
3394 d=writereg(d,4);
3395
3396 raw_mov_l_rR(d,s,offset);
3397 unlock2(d);
3398 unlock2(s);
3399 }
3400 MENDFUNC(3,mov_l_rR,(W4 d, R4 s, IMM offset))
3401
3402 /* read the word at the address contained in s+offset and store in d */
3403 MIDFUNC(3,mov_w_rR,(W2 d, R4 s, IMM offset))
3404 {
3405 if (isconst(s)) {
3406 COMPCALL(mov_w_rm)(d,live.state[s].val+offset);
3407 return;
3408 }
3409 CLOBBER_MOV;
3410 s=readreg(s,4);
3411 d=writereg(d,2);
3412
3413 raw_mov_w_rR(d,s,offset);
3414 unlock2(d);
3415 unlock2(s);
3416 }
3417 MENDFUNC(3,mov_w_rR,(W2 d, R4 s, IMM offset))
3418
3419 /* read the word at the address contained in s+offset and store in d */
3420 MIDFUNC(3,mov_b_rR,(W1 d, R4 s, IMM offset))
3421 {
3422 if (isconst(s)) {
3423 COMPCALL(mov_b_rm)(d,live.state[s].val+offset);
3424 return;
3425 }
3426 CLOBBER_MOV;
3427 s=readreg(s,4);
3428 d=writereg(d,1);
3429
3430 raw_mov_b_rR(d,s,offset);
3431 unlock2(d);
3432 unlock2(s);
3433 }
3434 MENDFUNC(3,mov_b_rR,(W1 d, R4 s, IMM offset))
3435
3436 /* read the long at the address contained in s+offset and store in d */
3437 MIDFUNC(3,mov_l_brR,(W4 d, R4 s, IMM offset))
3438 {
3439 int sreg=s;
3440 if (isconst(s)) {
3441 COMPCALL(mov_l_rm)(d,live.state[s].val+offset);
3442 return;
3443 }
3444 CLOBBER_MOV;
3445 s=readreg_offset(s,4);
3446 offset+=get_offset(sreg);
3447 d=writereg(d,4);
3448
3449 raw_mov_l_brR(d,s,offset);
3450 unlock2(d);
3451 unlock2(s);
3452 }
3453 MENDFUNC(3,mov_l_brR,(W4 d, R4 s, IMM offset))
3454
3455 /* read the word at the address contained in s+offset and store in d */
3456 MIDFUNC(3,mov_w_brR,(W2 d, R4 s, IMM offset))
3457 {
3458 int sreg=s;
3459 if (isconst(s)) {
3460 COMPCALL(mov_w_rm)(d,live.state[s].val+offset);
3461 return;
3462 }
3463 CLOBBER_MOV;
3464 remove_offset(d,-1);
3465 s=readreg_offset(s,4);
3466 offset+=get_offset(sreg);
3467 d=writereg(d,2);
3468
3469 raw_mov_w_brR(d,s,offset);
3470 unlock2(d);
3471 unlock2(s);
3472 }
3473 MENDFUNC(3,mov_w_brR,(W2 d, R4 s, IMM offset))
3474
3475 /* read the word at the address contained in s+offset and store in d */
3476 MIDFUNC(3,mov_b_brR,(W1 d, R4 s, IMM offset))
3477 {
3478 int sreg=s;
3479 if (isconst(s)) {
3480 COMPCALL(mov_b_rm)(d,live.state[s].val+offset);
3481 return;
3482 }
3483 CLOBBER_MOV;
3484 remove_offset(d,-1);
3485 s=readreg_offset(s,4);
3486 offset+=get_offset(sreg);
3487 d=writereg(d,1);
3488
3489 raw_mov_b_brR(d,s,offset);
3490 unlock2(d);
3491 unlock2(s);
3492 }
3493 MENDFUNC(3,mov_b_brR,(W1 d, R4 s, IMM offset))
3494
3495 MIDFUNC(3,mov_l_Ri,(R4 d, IMM i, IMM offset))
3496 {
3497 int dreg=d;
3498 if (isconst(d)) {
3499 COMPCALL(mov_l_mi)(live.state[d].val+offset,i);
3500 return;
3501 }
3502
3503 CLOBBER_MOV;
3504 d=readreg_offset(d,4);
3505 offset+=get_offset(dreg);
3506 raw_mov_l_Ri(d,i,offset);
3507 unlock2(d);
3508 }
3509 MENDFUNC(3,mov_l_Ri,(R4 d, IMM i, IMM offset))
3510
3511 MIDFUNC(3,mov_w_Ri,(R4 d, IMM i, IMM offset))
3512 {
3513 int dreg=d;
3514 if (isconst(d)) {
3515 COMPCALL(mov_w_mi)(live.state[d].val+offset,i);
3516 return;
3517 }
3518
3519 CLOBBER_MOV;
3520 d=readreg_offset(d,4);
3521 offset+=get_offset(dreg);
3522 raw_mov_w_Ri(d,i,offset);
3523 unlock2(d);
3524 }
3525 MENDFUNC(3,mov_w_Ri,(R4 d, IMM i, IMM offset))
3526
3527 MIDFUNC(3,mov_b_Ri,(R4 d, IMM i, IMM offset))
3528 {
3529 int dreg=d;
3530 if (isconst(d)) {
3531 COMPCALL(mov_b_mi)(live.state[d].val+offset,i);
3532 return;
3533 }
3534
3535 CLOBBER_MOV;
3536 d=readreg_offset(d,4);
3537 offset+=get_offset(dreg);
3538 raw_mov_b_Ri(d,i,offset);
3539 unlock2(d);
3540 }
3541 MENDFUNC(3,mov_b_Ri,(R4 d, IMM i, IMM offset))
3542
3543 /* Warning! OFFSET is byte sized only! */
3544 MIDFUNC(3,mov_l_Rr,(R4 d, R4 s, IMM offset))
3545 {
3546 if (isconst(d)) {
3547 COMPCALL(mov_l_mr)(live.state[d].val+offset,s);
3548 return;
3549 }
3550 if (isconst(s)) {
3551 COMPCALL(mov_l_Ri)(d,live.state[s].val,offset);
3552 return;
3553 }
3554
3555 CLOBBER_MOV;
3556 s=readreg(s,4);
3557 d=readreg(d,4);
3558
3559 raw_mov_l_Rr(d,s,offset);
3560 unlock2(d);
3561 unlock2(s);
3562 }
3563 MENDFUNC(3,mov_l_Rr,(R4 d, R4 s, IMM offset))
3564
3565 MIDFUNC(3,mov_w_Rr,(R4 d, R2 s, IMM offset))
3566 {
3567 if (isconst(d)) {
3568 COMPCALL(mov_w_mr)(live.state[d].val+offset,s);
3569 return;
3570 }
3571 if (isconst(s)) {
3572 COMPCALL(mov_w_Ri)(d,(uae_u16)live.state[s].val,offset);
3573 return;
3574 }
3575
3576 CLOBBER_MOV;
3577 s=readreg(s,2);
3578 d=readreg(d,4);
3579 raw_mov_w_Rr(d,s,offset);
3580 unlock2(d);
3581 unlock2(s);
3582 }
3583 MENDFUNC(3,mov_w_Rr,(R4 d, R2 s, IMM offset))
3584
3585 MIDFUNC(3,mov_b_Rr,(R4 d, R1 s, IMM offset))
3586 {
3587 if (isconst(d)) {
3588 COMPCALL(mov_b_mr)(live.state[d].val+offset,s);
3589 return;
3590 }
3591 if (isconst(s)) {
3592 COMPCALL(mov_b_Ri)(d,(uae_u8)live.state[s].val,offset);
3593 return;
3594 }
3595
3596 CLOBBER_MOV;
3597 s=readreg(s,1);
3598 d=readreg(d,4);
3599 raw_mov_b_Rr(d,s,offset);
3600 unlock2(d);
3601 unlock2(s);
3602 }
3603 MENDFUNC(3,mov_b_Rr,(R4 d, R1 s, IMM offset))
3604
3605 MIDFUNC(3,lea_l_brr,(W4 d, R4 s, IMM offset))
3606 {
3607 if (isconst(s)) {
3608 COMPCALL(mov_l_ri)(d,live.state[s].val+offset);
3609 return;
3610 }
3611 #if USE_OFFSET
3612 if (d==s) {
3613 add_offset(d,offset);
3614 return;
3615 }
3616 #endif
3617 CLOBBER_LEA;
3618 s=readreg(s,4);
3619 d=writereg(d,4);
3620 raw_lea_l_brr(d,s,offset);
3621 unlock2(d);
3622 unlock2(s);
3623 }
3624 MENDFUNC(3,lea_l_brr,(W4 d, R4 s, IMM offset))
3625
3626 MIDFUNC(5,lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
3627 {
3628 if (!offset) {
3629 COMPCALL(lea_l_rr_indexed)(d,s,index,factor);
3630 return;
3631 }
3632 CLOBBER_LEA;
3633 s=readreg(s,4);
3634 index=readreg(index,4);
3635 d=writereg(d,4);
3636
3637 raw_lea_l_brr_indexed(d,s,index,factor,offset);
3638 unlock2(d);
3639 unlock2(index);
3640 unlock2(s);
3641 }
3642 MENDFUNC(5,lea_l_brr_indexed,(W4 d, R4 s, R4 index, IMM factor, IMM offset))
3643
3644 MIDFUNC(4,lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
3645 {
3646 CLOBBER_LEA;
3647 s=readreg(s,4);
3648 index=readreg(index,4);
3649 d=writereg(d,4);
3650
3651 raw_lea_l_rr_indexed(d,s,index,factor);
3652 unlock2(d);
3653 unlock2(index);
3654 unlock2(s);
3655 }
3656 MENDFUNC(4,lea_l_rr_indexed,(W4 d, R4 s, R4 index, IMM factor))
3657
3658 /* write d to the long at the address contained in s+offset */
3659 MIDFUNC(3,mov_l_bRr,(R4 d, R4 s, IMM offset))
3660 {
3661 int dreg=d;
3662 if (isconst(d)) {
3663 COMPCALL(mov_l_mr)(live.state[d].val+offset,s);
3664 return;
3665 }
3666
3667 CLOBBER_MOV;
3668 s=readreg(s,4);
3669 d=readreg_offset(d,4);
3670 offset+=get_offset(dreg);
3671
3672 raw_mov_l_bRr(d,s,offset);
3673 unlock2(d);
3674 unlock2(s);
3675 }
3676 MENDFUNC(3,mov_l_bRr,(R4 d, R4 s, IMM offset))
3677
3678 /* write the word at the address contained in s+offset and store in d */
3679 MIDFUNC(3,mov_w_bRr,(R4 d, R2 s, IMM offset))
3680 {
3681 int dreg=d;
3682
3683 if (isconst(d)) {
3684 COMPCALL(mov_w_mr)(live.state[d].val+offset,s);
3685 return;
3686 }
3687
3688 CLOBBER_MOV;
3689 s=readreg(s,2);
3690 d=readreg_offset(d,4);
3691 offset+=get_offset(dreg);
3692 raw_mov_w_bRr(d,s,offset);
3693 unlock2(d);
3694 unlock2(s);
3695 }
3696 MENDFUNC(3,mov_w_bRr,(R4 d, R2 s, IMM offset))
3697
3698 MIDFUNC(3,mov_b_bRr,(R4 d, R1 s, IMM offset))
3699 {
3700 int dreg=d;
3701 if (isconst(d)) {
3702 COMPCALL(mov_b_mr)(live.state[d].val+offset,s);
3703 return;
3704 }
3705
3706 CLOBBER_MOV;
3707 s=readreg(s,1);
3708 d=readreg_offset(d,4);
3709 offset+=get_offset(dreg);
3710 raw_mov_b_bRr(d,s,offset);
3711 unlock2(d);
3712 unlock2(s);
3713 }
3714 MENDFUNC(3,mov_b_bRr,(R4 d, R1 s, IMM offset))
3715
3716 MIDFUNC(1,bswap_32,(RW4 r))
3717 {
3718 int reg=r;
3719
3720 if (isconst(r)) {
3721 uae_u32 oldv=live.state[r].val;
3722 live.state[r].val=reverse32(oldv);
3723 return;
3724 }
3725
3726 CLOBBER_SW32;
3727 r=rmw(r,4,4);
3728 raw_bswap_32(r);
3729 unlock2(r);
3730 }
3731 MENDFUNC(1,bswap_32,(RW4 r))
3732
3733 MIDFUNC(1,bswap_16,(RW2 r))
3734 {
3735 if (isconst(r)) {
3736 uae_u32 oldv=live.state[r].val;
3737 live.state[r].val=((oldv>>8)&0xff) | ((oldv<<8)&0xff00) |
3738 (oldv&0xffff0000);
3739 return;
3740 }
3741
3742 CLOBBER_SW16;
3743 r=rmw(r,2,2);
3744
3745 raw_bswap_16(r);
3746 unlock2(r);
3747 }
3748 MENDFUNC(1,bswap_16,(RW2 r))
3749
3750
3751
3752 MIDFUNC(2,mov_l_rr,(W4 d, R4 s))
3753 {
3754 int olds;
3755
3756 if (d==s) { /* How pointless! */
3757 return;
3758 }
3759 if (isconst(s)) {
3760 COMPCALL(mov_l_ri)(d,live.state[s].val);
3761 return;
3762 }
3763 olds=s;
3764 disassociate(d);
3765 s=readreg_offset(s,4);
3766 live.state[d].realreg=s;
3767 live.state[d].realind=live.nat[s].nholds;
3768 live.state[d].val=live.state[olds].val;
3769 live.state[d].validsize=4;
3770 live.state[d].dirtysize=4;
3771 set_status(d,DIRTY);
3772
3773 live.nat[s].holds[live.nat[s].nholds]=d;
3774 live.nat[s].nholds++;
3775 log_clobberreg(d);
3776 /* write_log("Added %d to nreg %d(%d), now holds %d regs\n",
3777 d,s,live.state[d].realind,live.nat[s].nholds); */
3778 unlock2(s);
3779 }
3780 MENDFUNC(2,mov_l_rr,(W4 d, R4 s))
3781
3782 MIDFUNC(2,mov_l_mr,(IMM d, R4 s))
3783 {
3784 if (isconst(s)) {
3785 COMPCALL(mov_l_mi)(d,live.state[s].val);
3786 return;
3787 }
3788 CLOBBER_MOV;
3789 s=readreg(s,4);
3790
3791 raw_mov_l_mr(d,s);
3792 unlock2(s);
3793 }
3794 MENDFUNC(2,mov_l_mr,(IMM d, R4 s))
3795
3796
3797 MIDFUNC(2,mov_w_mr,(IMM d, R2 s))
3798 {
3799 if (isconst(s)) {
3800 COMPCALL(mov_w_mi)(d,(uae_u16)live.state[s].val);
3801 return;
3802 }
3803 CLOBBER_MOV;
3804 s=readreg(s,2);
3805
3806 raw_mov_w_mr(d,s);
3807 unlock2(s);
3808 }
3809 MENDFUNC(2,mov_w_mr,(IMM d, R2 s))
3810
3811 MIDFUNC(2,mov_w_rm,(W2 d, IMM s))
3812 {
3813 CLOBBER_MOV;
3814 d=writereg(d,2);
3815
3816 raw_mov_w_rm(d,s);
3817 unlock2(d);
3818 }
3819 MENDFUNC(2,mov_w_rm,(W2 d, IMM s))
3820
3821 MIDFUNC(2,mov_b_mr,(IMM d, R1 s))
3822 {
3823 if (isconst(s)) {
3824 COMPCALL(mov_b_mi)(d,(uae_u8)live.state[s].val);
3825 return;
3826 }
3827
3828 CLOBBER_MOV;
3829 s=readreg(s,1);
3830
3831 raw_mov_b_mr(d,s);
3832 unlock2(s);
3833 }
3834 MENDFUNC(2,mov_b_mr,(IMM d, R1 s))
3835
3836 MIDFUNC(2,mov_b_rm,(W1 d, IMM s))
3837 {
3838 CLOBBER_MOV;
3839 d=writereg(d,1);
3840
3841 raw_mov_b_rm(d,s);
3842 unlock2(d);
3843 }
3844 MENDFUNC(2,mov_b_rm,(W1 d, IMM s))
3845
3846 MIDFUNC(2,mov_l_ri,(W4 d, IMM s))
3847 {
3848 set_const(d,s);
3849 return;
3850 }
3851 MENDFUNC(2,mov_l_ri,(W4 d, IMM s))
3852
3853 MIDFUNC(2,mov_w_ri,(W2 d, IMM s))
3854 {
3855 CLOBBER_MOV;
3856 d=writereg(d,2);
3857
3858 raw_mov_w_ri(d,s);
3859 unlock2(d);
3860 }
3861 MENDFUNC(2,mov_w_ri,(W2 d, IMM s))
3862
3863 MIDFUNC(2,mov_b_ri,(W1 d, IMM s))
3864 {
3865 CLOBBER_MOV;
3866 d=writereg(d,1);
3867
3868 raw_mov_b_ri(d,s);
3869 unlock2(d);
3870 }
3871 MENDFUNC(2,mov_b_ri,(W1 d, IMM s))
3872
3873
3874 MIDFUNC(2,add_l_mi,(IMM d, IMM s))
3875 {
3876 CLOBBER_ADD;
3877 raw_add_l_mi(d,s) ;
3878 }
3879 MENDFUNC(2,add_l_mi,(IMM d, IMM s))
3880
3881 MIDFUNC(2,add_w_mi,(IMM d, IMM s))
3882 {
3883 CLOBBER_ADD;
3884 raw_add_w_mi(d,s) ;
3885 }
3886 MENDFUNC(2,add_w_mi,(IMM d, IMM s))
3887
3888 MIDFUNC(2,add_b_mi,(IMM d, IMM s))
3889 {
3890 CLOBBER_ADD;
3891 raw_add_b_mi(d,s) ;
3892 }
3893 MENDFUNC(2,add_b_mi,(IMM d, IMM s))
3894
3895
3896 MIDFUNC(2,test_l_ri,(R4 d, IMM i))
3897 {
3898 CLOBBER_TEST;
3899 d=readreg(d,4);
3900
3901 raw_test_l_ri(d,i);
3902 unlock2(d);
3903 }
3904 MENDFUNC(2,test_l_ri,(R4 d, IMM i))
3905
3906 MIDFUNC(2,test_l_rr,(R4 d, R4 s))
3907 {
3908 CLOBBER_TEST;
3909 d=readreg(d,4);
3910 s=readreg(s,4);
3911
3912 raw_test_l_rr(d,s);;
3913 unlock2(d);
3914 unlock2(s);
3915 }
3916 MENDFUNC(2,test_l_rr,(R4 d, R4 s))
3917
3918 MIDFUNC(2,test_w_rr,(R2 d, R2 s))
3919 {
3920 CLOBBER_TEST;
3921 d=readreg(d,2);
3922 s=readreg(s,2);
3923
3924 raw_test_w_rr(d,s);
3925 unlock2(d);
3926 unlock2(s);
3927 }
3928 MENDFUNC(2,test_w_rr,(R2 d, R2 s))
3929
3930 MIDFUNC(2,test_b_rr,(R1 d, R1 s))
3931 {
3932 CLOBBER_TEST;
3933 d=readreg(d,1);
3934 s=readreg(s,1);
3935
3936 raw_test_b_rr(d,s);
3937 unlock2(d);
3938 unlock2(s);
3939 }
3940 MENDFUNC(2,test_b_rr,(R1 d, R1 s))
3941
3942
3943 MIDFUNC(2,and_l_ri,(RW4 d, IMM i))
3944 {
3945 if (isconst(d) && !needflags) {
3946 live.state[d].val &= i;
3947 return;
3948 }
3949
3950 CLOBBER_AND;
3951 d=rmw(d,4,4);
3952
3953 raw_and_l_ri(d,i);
3954 unlock2(d);
3955 }
3956 MENDFUNC(2,and_l_ri,(RW4 d, IMM i))
3957
3958 MIDFUNC(2,and_l,(RW4 d, R4 s))
3959 {
3960 CLOBBER_AND;
3961 s=readreg(s,4);
3962 d=rmw(d,4,4);
3963
3964 raw_and_l(d,s);
3965 unlock2(d);
3966 unlock2(s);
3967 }
3968 MENDFUNC(2,and_l,(RW4 d, R4 s))
3969
3970 MIDFUNC(2,and_w,(RW2 d, R2 s))
3971 {
3972 CLOBBER_AND;
3973 s=readreg(s,2);
3974 d=rmw(d,2,2);
3975
3976 raw_and_w(d,s);
3977 unlock2(d);
3978 unlock2(s);
3979 }
3980 MENDFUNC(2,and_w,(RW2 d, R2 s))
3981
3982 MIDFUNC(2,and_b,(RW1 d, R1 s))
3983 {
3984 CLOBBER_AND;
3985 s=readreg(s,1);
3986 d=rmw(d,1,1);
3987
3988 raw_and_b(d,s);
3989 unlock2(d);
3990 unlock2(s);
3991 }
3992 MENDFUNC(2,and_b,(RW1 d, R1 s))
3993
3994 // gb-- used for making an fpcr value in compemu_fpp.cpp
3995 MIDFUNC(2,or_l_rm,(RW4 d, IMM s))
3996 {
3997 CLOBBER_OR;
3998 d=rmw(d,4,4);
3999
4000 raw_or_l_rm(d,s);
4001 unlock2(d);
4002 }
4003 MENDFUNC(2,or_l_rm,(RW4 d, IMM s))
4004
4005 MIDFUNC(2,or_l_ri,(RW4 d, IMM i))
4006 {
4007 if (isconst(d) && !needflags) {
4008 live.state[d].val|=i;
4009 return;
4010 }
4011 CLOBBER_OR;
4012 d=rmw(d,4,4);
4013
4014 raw_or_l_ri(d,i);
4015 unlock2(d);
4016 }
4017 MENDFUNC(2,or_l_ri,(RW4 d, IMM i))
4018
4019 MIDFUNC(2,or_l,(RW4 d, R4 s))
4020 {
4021 if (isconst(d) && isconst(s) && !needflags) {
4022 live.state[d].val|=live.state[s].val;
4023 return;
4024 }
4025 CLOBBER_OR;
4026 s=readreg(s,4);
4027 d=rmw(d,4,4);
4028
4029 raw_or_l(d,s);
4030 unlock2(d);
4031 unlock2(s);
4032 }
4033 MENDFUNC(2,or_l,(RW4 d, R4 s))
4034
4035 MIDFUNC(2,or_w,(RW2 d, R2 s))
4036 {
4037 CLOBBER_OR;
4038 s=readreg(s,2);
4039 d=rmw(d,2,2);
4040
4041 raw_or_w(d,s);
4042 unlock2(d);
4043 unlock2(s);
4044 }
4045 MENDFUNC(2,or_w,(RW2 d, R2 s))
4046
4047 MIDFUNC(2,or_b,(RW1 d, R1 s))
4048 {
4049 CLOBBER_OR;
4050 s=readreg(s,1);
4051 d=rmw(d,1,1);
4052
4053 raw_or_b(d,s);
4054 unlock2(d);
4055 unlock2(s);
4056 }
4057 MENDFUNC(2,or_b,(RW1 d, R1 s))
4058
4059 MIDFUNC(2,adc_l,(RW4 d, R4 s))
4060 {
4061 CLOBBER_ADC;
4062 s=readreg(s,4);
4063 d=rmw(d,4,4);
4064
4065 raw_adc_l(d,s);
4066
4067 unlock2(d);
4068 unlock2(s);
4069 }
4070 MENDFUNC(2,adc_l,(RW4 d, R4 s))
4071
4072 MIDFUNC(2,adc_w,(RW2 d, R2 s))
4073 {
4074 CLOBBER_ADC;
4075 s=readreg(s,2);
4076 d=rmw(d,2,2);
4077
4078 raw_adc_w(d,s);
4079 unlock2(d);
4080 unlock2(s);
4081 }
4082 MENDFUNC(2,adc_w,(RW2 d, R2 s))
4083
4084 MIDFUNC(2,adc_b,(RW1 d, R1 s))
4085 {
4086 CLOBBER_ADC;
4087 s=readreg(s,1);
4088 d=rmw(d,1,1);
4089
4090 raw_adc_b(d,s);
4091 unlock2(d);
4092 unlock2(s);
4093 }
4094 MENDFUNC(2,adc_b,(RW1 d, R1 s))
4095
4096 MIDFUNC(2,add_l,(RW4 d, R4 s))
4097 {
4098 if (isconst(s)) {
4099 COMPCALL(add_l_ri)(d,live.state[s].val);
4100 return;
4101 }
4102
4103 CLOBBER_ADD;
4104 s=readreg(s,4);
4105 d=rmw(d,4,4);
4106
4107 raw_add_l(d,s);
4108
4109 unlock2(d);
4110 unlock2(s);
4111 }
4112 MENDFUNC(2,add_l,(RW4 d, R4 s))
4113
4114 MIDFUNC(2,add_w,(RW2 d, R2 s))
4115 {
4116 if (isconst(s)) {
4117 COMPCALL(add_w_ri)(d,(uae_u16)live.state[s].val);
4118 return;
4119 }
4120
4121 CLOBBER_ADD;
4122 s=readreg(s,2);
4123 d=rmw(d,2,2);
4124
4125 raw_add_w(d,s);
4126 unlock2(d);
4127 unlock2(s);
4128 }
4129 MENDFUNC(2,add_w,(RW2 d, R2 s))
4130
4131 MIDFUNC(2,add_b,(RW1 d, R1 s))
4132 {
4133 if (isconst(s)) {
4134 COMPCALL(add_b_ri)(d,(uae_u8)live.state[s].val);
4135 return;
4136 }
4137
4138 CLOBBER_ADD;
4139 s=readreg(s,1);
4140 d=rmw(d,1,1);
4141
4142 raw_add_b(d,s);
4143 unlock2(d);
4144 unlock2(s);
4145 }
4146 MENDFUNC(2,add_b,(RW1 d, R1 s))
4147
4148 MIDFUNC(2,sub_l_ri,(RW4 d, IMM i))
4149 {
4150 if (!i && !needflags)
4151 return;
4152 if (isconst(d) && !needflags) {
4153 live.state[d].val-=i;
4154 return;
4155 }
4156 #if USE_OFFSET
4157 if (!needflags) {
4158 add_offset(d,-i);
4159 return;
4160 }
4161 #endif
4162
4163 CLOBBER_SUB;
4164 d=rmw(d,4,4);
4165
4166 raw_sub_l_ri(d,i);
4167 unlock2(d);
4168 }
4169 MENDFUNC(2,sub_l_ri,(RW4 d, IMM i))
4170
4171 MIDFUNC(2,sub_w_ri,(RW2 d, IMM i))
4172 {
4173 if (!i && !needflags)
4174 return;
4175
4176 CLOBBER_SUB;
4177 d=rmw(d,2,2);
4178
4179 raw_sub_w_ri(d,i);
4180 unlock2(d);
4181 }
4182 MENDFUNC(2,sub_w_ri,(RW2 d, IMM i))
4183
4184 MIDFUNC(2,sub_b_ri,(RW1 d, IMM i))
4185 {
4186 if (!i && !needflags)
4187 return;
4188
4189 CLOBBER_SUB;
4190 d=rmw(d,1,1);
4191
4192 raw_sub_b_ri(d,i);
4193
4194 unlock2(d);
4195 }
4196 MENDFUNC(2,sub_b_ri,(RW1 d, IMM i))
4197
4198 MIDFUNC(2,add_l_ri,(RW4 d, IMM i))
4199 {
4200 if (!i && !needflags)
4201 return;
4202 if (isconst(d) && !needflags) {
4203 live.state[d].val+=i;
4204 return;
4205 }
4206 #if USE_OFFSET
4207 if (!needflags) {
4208 add_offset(d,i);
4209 return;
4210 }
4211 #endif
4212 CLOBBER_ADD;
4213 d=rmw(d,4,4);
4214 raw_add_l_ri(d,i);
4215 unlock2(d);
4216 }
4217 MENDFUNC(2,add_l_ri,(RW4 d, IMM i))
4218
4219 MIDFUNC(2,add_w_ri,(RW2 d, IMM i))
4220 {
4221 if (!i && !needflags)
4222 return;
4223
4224 CLOBBER_ADD;
4225 d=rmw(d,2,2);
4226
4227 raw_add_w_ri(d,i);
4228 unlock2(d);
4229 }
4230 MENDFUNC(2,add_w_ri,(RW2 d, IMM i))
4231
4232 MIDFUNC(2,add_b_ri,(RW1 d, IMM i))
4233 {
4234 if (!i && !needflags)
4235 return;
4236
4237 CLOBBER_ADD;
4238 d=rmw(d,1,1);
4239
4240 raw_add_b_ri(d,i);
4241
4242 unlock2(d);
4243 }
4244 MENDFUNC(2,add_b_ri,(RW1 d, IMM i))
4245
4246 MIDFUNC(2,sbb_l,(RW4 d, R4 s))
4247 {
4248 CLOBBER_SBB;
4249 s=readreg(s,4);
4250 d=rmw(d,4,4);
4251
4252 raw_sbb_l(d,s);
4253 unlock2(d);
4254 unlock2(s);
4255 }
4256 MENDFUNC(2,sbb_l,(RW4 d, R4 s))
4257
4258 MIDFUNC(2,sbb_w,(RW2 d, R2 s))
4259 {
4260 CLOBBER_SBB;
4261 s=readreg(s,2);
4262 d=rmw(d,2,2);
4263
4264 raw_sbb_w(d,s);
4265 unlock2(d);
4266 unlock2(s);
4267 }
4268 MENDFUNC(2,sbb_w,(RW2 d, R2 s))
4269
4270 MIDFUNC(2,sbb_b,(RW1 d, R1 s))
4271 {
4272 CLOBBER_SBB;
4273 s=readreg(s,1);
4274 d=rmw(d,1,1);
4275
4276 raw_sbb_b(d,s);
4277 unlock2(d);
4278 unlock2(s);
4279 }
4280 MENDFUNC(2,sbb_b,(RW1 d, R1 s))
4281
4282 MIDFUNC(2,sub_l,(RW4 d, R4 s))
4283 {
4284 if (isconst(s)) {
4285 COMPCALL(sub_l_ri)(d,live.state[s].val);
4286 return;
4287 }
4288
4289 CLOBBER_SUB;
4290 s=readreg(s,4);
4291 d=rmw(d,4,4);
4292
4293 raw_sub_l(d,s);
4294 unlock2(d);
4295 unlock2(s);
4296 }
4297 MENDFUNC(2,sub_l,(RW4 d, R4 s))
4298
4299 MIDFUNC(2,sub_w,(RW2 d, R2 s))
4300 {
4301 if (isconst(s)) {
4302 COMPCALL(sub_w_ri)(d,(uae_u16)live.state[s].val);
4303 return;
4304 }
4305
4306 CLOBBER_SUB;
4307 s=readreg(s,2);
4308 d=rmw(d,2,2);
4309
4310 raw_sub_w(d,s);
4311 unlock2(d);
4312 unlock2(s);
4313 }
4314 MENDFUNC(2,sub_w,(RW2 d, R2 s))
4315
4316 MIDFUNC(2,sub_b,(RW1 d, R1 s))
4317 {
4318 if (isconst(s)) {
4319 COMPCALL(sub_b_ri)(d,(uae_u8)live.state[s].val);
4320 return;
4321 }
4322
4323 CLOBBER_SUB;
4324 s=readreg(s,1);
4325 d=rmw(d,1,1);
4326
4327 raw_sub_b(d,s);
4328 unlock2(d);
4329 unlock2(s);
4330 }
4331 MENDFUNC(2,sub_b,(RW1 d, R1 s))
4332
4333 MIDFUNC(2,cmp_l,(R4 d, R4 s))
4334 {
4335 CLOBBER_CMP;
4336 s=readreg(s,4);
4337 d=readreg(d,4);
4338
4339 raw_cmp_l(d,s);
4340 unlock2(d);
4341 unlock2(s);
4342 }
4343 MENDFUNC(2,cmp_l,(R4 d, R4 s))
4344
4345 MIDFUNC(2,cmp_l_ri,(R4 r, IMM i))
4346 {
4347 CLOBBER_CMP;
4348 r=readreg(r,4);
4349
4350 raw_cmp_l_ri(r,i);
4351 unlock2(r);
4352 }
4353 MENDFUNC(2,cmp_l_ri,(R4 r, IMM i))
4354
4355 MIDFUNC(2,cmp_w,(R2 d, R2 s))
4356 {
4357 CLOBBER_CMP;
4358 s=readreg(s,2);
4359 d=readreg(d,2);
4360
4361 raw_cmp_w(d,s);
4362 unlock2(d);
4363 unlock2(s);
4364 }
4365 MENDFUNC(2,cmp_w,(R2 d, R2 s))
4366
4367 MIDFUNC(2,cmp_b,(R1 d, R1 s))
4368 {
4369 CLOBBER_CMP;
4370 s=readreg(s,1);
4371 d=readreg(d,1);
4372
4373 raw_cmp_b(d,s);
4374 unlock2(d);
4375 unlock2(s);
4376 }
4377 MENDFUNC(2,cmp_b,(R1 d, R1 s))
4378
4379
4380 MIDFUNC(2,xor_l,(RW4 d, R4 s))
4381 {
4382 CLOBBER_XOR;
4383 s=readreg(s,4);
4384 d=rmw(d,4,4);
4385
4386 raw_xor_l(d,s);
4387 unlock2(d);
4388 unlock2(s);
4389 }
4390 MENDFUNC(2,xor_l,(RW4 d, R4 s))
4391
4392 MIDFUNC(2,xor_w,(RW2 d, R2 s))
4393 {
4394 CLOBBER_XOR;
4395 s=readreg(s,2);
4396 d=rmw(d,2,2);
4397
4398 raw_xor_w(d,s);
4399 unlock2(d);
4400 unlock2(s);
4401 }
4402 MENDFUNC(2,xor_w,(RW2 d, R2 s))
4403
4404 MIDFUNC(2,xor_b,(RW1 d, R1 s))
4405 {
4406 CLOBBER_XOR;
4407 s=readreg(s,1);
4408 d=rmw(d,1,1);
4409
4410 raw_xor_b(d,s);
4411 unlock2(d);
4412 unlock2(s);
4413 }
4414 MENDFUNC(2,xor_b,(RW1 d, R1 s))
4415
4416 MIDFUNC(5,call_r_11,(W4 out1, R4 r, R4 in1, IMM osize, IMM isize))
4417 {
4418 clobber_flags();
4419 remove_all_offsets();
4420 if (osize==4) {
4421 if (out1!=in1 && out1!=r) {
4422 COMPCALL(forget_about)(out1);
4423 }
4424 }
4425 else {
4426 tomem_c(out1);
4427 }
4428
4429 in1=readreg_specific(in1,isize,REG_PAR1);
4430 r=readreg(r,4);
4431 prepare_for_call_1(); /* This should ensure that there won't be
4432 any need for swapping nregs in prepare_for_call_2
4433 */
4434 #if USE_NORMAL_CALLING_CONVENTION
4435 raw_push_l_r(in1);
4436 #endif
4437 unlock2(in1);
4438 unlock2(r);
4439
4440 prepare_for_call_2();
4441 raw_call_r(r);
4442
4443 #if USE_NORMAL_CALLING_CONVENTION
4444 raw_inc_sp(4);
4445 #endif
4446
4447
4448 live.nat[REG_RESULT].holds[0]=out1;
4449 live.nat[REG_RESULT].nholds=1;
4450 live.nat[REG_RESULT].touched=touchcnt++;
4451
4452 live.state[out1].realreg=REG_RESULT;
4453 live.state[out1].realind=0;
4454 live.state[out1].val=0;
4455 live.state[out1].validsize=osize;
4456 live.state[out1].dirtysize=osize;
4457 set_status(out1,DIRTY);
4458 }
4459 MENDFUNC(5,call_r_11,(W4 out1, R4 r, R4 in1, IMM osize, IMM isize))
4460
4461 MIDFUNC(5,call_r_02,(R4 r, R4 in1, R4 in2, IMM isize1, IMM isize2))
4462 {
4463 clobber_flags();
4464 remove_all_offsets();
4465 in1=readreg_specific(in1,isize1,REG_PAR1);
4466 in2=readreg_specific(in2,isize2,REG_PAR2);
4467 r=readreg(r,4);
4468 prepare_for_call_1(); /* This should ensure that there won't be
4469 any need for swapping nregs in prepare_for_call_2
4470 */
4471 #if USE_NORMAL_CALLING_CONVENTION
4472 raw_push_l_r(in2);
4473 raw_push_l_r(in1);
4474 #endif
4475 unlock2(r);
4476 unlock2(in1);
4477 unlock2(in2);
4478 prepare_for_call_2();
4479 raw_call_r(r);
4480 #if USE_NORMAL_CALLING_CONVENTION
4481 raw_inc_sp(8);
4482 #endif
4483 }
4484 MENDFUNC(5,call_r_02,(R4 r, R4 in1, R4 in2, IMM isize1, IMM isize2))
4485
4486 /* forget_about() takes a mid-layer register */
4487 MIDFUNC(1,forget_about,(W4 r))
4488 {
4489 if (isinreg(r))
4490 disassociate(r);
4491 live.state[r].val=0;
4492 set_status(r,UNDEF);
4493 }
4494 MENDFUNC(1,forget_about,(W4 r))
4495
4496 MIDFUNC(0,nop,(void))
4497 {
4498 raw_nop();
4499 }
4500 MENDFUNC(0,nop,(void))
4501
4502
4503 MIDFUNC(1,f_forget_about,(FW r))
4504 {
4505 if (f_isinreg(r))
4506 f_disassociate(r);
4507 live.fate[r].status=UNDEF;
4508 }
4509 MENDFUNC(1,f_forget_about,(FW r))
4510
4511 MIDFUNC(1,fmov_pi,(FW r))
4512 {
4513 r=f_writereg(r);
4514 raw_fmov_pi(r);
4515 f_unlock(r);
4516 }
4517 MENDFUNC(1,fmov_pi,(FW r))
4518
4519 MIDFUNC(1,fmov_log10_2,(FW r))
4520 {
4521 r=f_writereg(r);
4522 raw_fmov_log10_2(r);
4523 f_unlock(r);
4524 }
4525 MENDFUNC(1,fmov_log10_2,(FW r))
4526
4527 MIDFUNC(1,fmov_log2_e,(FW r))
4528 {
4529 r=f_writereg(r);
4530 raw_fmov_log2_e(r);
4531 f_unlock(r);
4532 }
4533 MENDFUNC(1,fmov_log2_e,(FW r))
4534
4535 MIDFUNC(1,fmov_loge_2,(FW r))
4536 {
4537 r=f_writereg(r);
4538 raw_fmov_loge_2(r);
4539 f_unlock(r);
4540 }
4541 MENDFUNC(1,fmov_loge_2,(FW r))
4542
4543 MIDFUNC(1,fmov_1,(FW r))
4544 {
4545 r=f_writereg(r);
4546 raw_fmov_1(r);
4547 f_unlock(r);
4548 }
4549 MENDFUNC(1,fmov_1,(FW r))
4550
4551 MIDFUNC(1,fmov_0,(FW r))
4552 {
4553 r=f_writereg(r);
4554 raw_fmov_0(r);
4555 f_unlock(r);
4556 }
4557 MENDFUNC(1,fmov_0,(FW r))
4558
4559 MIDFUNC(2,fmov_rm,(FW r, MEMR m))
4560 {
4561 r=f_writereg(r);
4562 raw_fmov_rm(r,m);
4563 f_unlock(r);
4564 }
4565 MENDFUNC(2,fmov_rm,(FW r, MEMR m))
4566
4567 MIDFUNC(2,fmovi_rm,(FW r, MEMR m))
4568 {
4569 r=f_writereg(r);
4570 raw_fmovi_rm(r,m);
4571 f_unlock(r);
4572 }
4573 MENDFUNC(2,fmovi_rm,(FW r, MEMR m))
4574
4575 MIDFUNC(2,fmovi_mr,(MEMW m, FR r))
4576 {
4577 r=f_readreg(r);
4578 raw_fmovi_mr(m,r);
4579 f_unlock(r);
4580 }
4581 MENDFUNC(2,fmovi_mr,(MEMW m, FR r))
4582
4583 MIDFUNC(2,fmovs_rm,(FW r, MEMR m))
4584 {
4585 r=f_writereg(r);
4586 raw_fmovs_rm(r,m);
4587 f_unlock(r);
4588 }
4589 MENDFUNC(2,fmovs_rm,(FW r, MEMR m))
4590
4591 MIDFUNC(2,fmovs_mr,(MEMW m, FR r))
4592 {
4593 r=f_readreg(r);
4594 raw_fmovs_mr(m,r);
4595 f_unlock(r);
4596 }
4597 MENDFUNC(2,fmovs_mr,(MEMW m, FR r))
4598
4599 MIDFUNC(2,fmov_ext_mr,(MEMW m, FR r))
4600 {
4601 r=f_readreg(r);
4602 raw_fmov_ext_mr(m,r);
4603 f_unlock(r);
4604 }
4605 MENDFUNC(2,fmov_ext_mr,(MEMW m, FR r))
4606
4607 MIDFUNC(2,fmov_mr,(MEMW m, FR r))
4608 {
4609 r=f_readreg(r);
4610 raw_fmov_mr(m,r);
4611 f_unlock(r);
4612 }
4613 MENDFUNC(2,fmov_mr,(MEMW m, FR r))
4614
4615 MIDFUNC(2,fmov_ext_rm,(FW r, MEMR m))
4616 {
4617 r=f_writereg(r);
4618 raw_fmov_ext_rm(r,m);
4619 f_unlock(r);
4620 }
4621 MENDFUNC(2,fmov_ext_rm,(FW r, MEMR m))
4622
4623 MIDFUNC(2,fmov_rr,(FW d, FR s))
4624 {
4625 if (d==s) { /* How pointless! */
4626 return;
4627 }
4628 #if USE_F_ALIAS
4629 f_disassociate(d);
4630 s=f_readreg(s);
4631 live.fate[d].realreg=s;
4632 live.fate[d].realind=live.fat[s].nholds;
4633 live.fate[d].status=DIRTY;
4634 live.fat[s].holds[live.fat[s].nholds]=d;
4635 live.fat[s].nholds++;
4636 f_unlock(s);
4637 #else
4638 s=f_readreg(s);
4639 d=f_writereg(d);
4640 raw_fmov_rr(d,s);
4641 f_unlock(s);
4642 f_unlock(d);
4643 #endif
4644 }
4645 MENDFUNC(2,fmov_rr,(FW d, FR s))
4646
4647 MIDFUNC(2,fldcw_m_indexed,(R4 index, IMM base))
4648 {
4649 index=readreg(index,4);
4650
4651 raw_fldcw_m_indexed(index,base);
4652 unlock2(index);
4653 }
4654 MENDFUNC(2,fldcw_m_indexed,(R4 index, IMM base))
4655
4656 MIDFUNC(1,ftst_r,(FR r))
4657 {
4658 r=f_readreg(r);
4659 raw_ftst_r(r);
4660 f_unlock(r);
4661 }
4662 MENDFUNC(1,ftst_r,(FR r))
4663
4664 MIDFUNC(0,dont_care_fflags,(void))
4665 {
4666 f_disassociate(FP_RESULT);
4667 }
4668 MENDFUNC(0,dont_care_fflags,(void))
4669
4670 MIDFUNC(2,fsqrt_rr,(FW d, FR s))
4671 {
4672 s=f_readreg(s);
4673 d=f_writereg(d);
4674 raw_fsqrt_rr(d,s);
4675 f_unlock(s);
4676 f_unlock(d);
4677 }
4678 MENDFUNC(2,fsqrt_rr,(FW d, FR s))
4679
4680 MIDFUNC(2,fabs_rr,(FW d, FR s))
4681 {
4682 s=f_readreg(s);
4683 d=f_writereg(d);
4684 raw_fabs_rr(d,s);
4685 f_unlock(s);
4686 f_unlock(d);
4687 }
4688 MENDFUNC(2,fabs_rr,(FW d, FR s))
4689
4690 MIDFUNC(2,fsin_rr,(FW d, FR s))
4691 {
4692 s=f_readreg(s);
4693 d=f_writereg(d);
4694 raw_fsin_rr(d,s);
4695 f_unlock(s);
4696 f_unlock(d);
4697 }
4698 MENDFUNC(2,fsin_rr,(FW d, FR s))
4699
4700 MIDFUNC(2,fcos_rr,(FW d, FR s))
4701 {
4702 s=f_readreg(s);
4703 d=f_writereg(d);
4704 raw_fcos_rr(d,s);
4705 f_unlock(s);
4706 f_unlock(d);
4707 }
4708 MENDFUNC(2,fcos_rr,(FW d, FR s))
4709
4710 MIDFUNC(2,ftwotox_rr,(FW d, FR s))
4711 {
4712 s=f_readreg(s);
4713 d=f_writereg(d);
4714 raw_ftwotox_rr(d,s);
4715 f_unlock(s);
4716 f_unlock(d);
4717 }
4718 MENDFUNC(2,ftwotox_rr,(FW d, FR s))
4719
4720 MIDFUNC(2,fetox_rr,(FW d, FR s))
4721 {
4722 s=f_readreg(s);
4723 d=f_writereg(d);
4724 raw_fetox_rr(d,s);
4725 f_unlock(s);
4726 f_unlock(d);
4727 }
4728 MENDFUNC(2,fetox_rr,(FW d, FR s))
4729
4730 MIDFUNC(2,frndint_rr,(FW d, FR s))
4731 {
4732 s=f_readreg(s);
4733 d=f_writereg(d);
4734 raw_frndint_rr(d,s);
4735 f_unlock(s);
4736 f_unlock(d);
4737 }
4738 MENDFUNC(2,frndint_rr,(FW d, FR s))
4739
4740 MIDFUNC(2,flog2_rr,(FW d, FR s))
4741 {
4742 s=f_readreg(s);
4743 d=f_writereg(d);
4744 raw_flog2_rr(d,s);
4745 f_unlock(s);
4746 f_unlock(d);
4747 }
4748 MENDFUNC(2,flog2_rr,(FW d, FR s))
4749
4750 MIDFUNC(2,fneg_rr,(FW d, FR s))
4751 {
4752 s=f_readreg(s);
4753 d=f_writereg(d);
4754 raw_fneg_rr(d,s);
4755 f_unlock(s);
4756 f_unlock(d);
4757 }
4758 MENDFUNC(2,fneg_rr,(FW d, FR s))
4759
4760 MIDFUNC(2,fadd_rr,(FRW d, FR s))
4761 {
4762 s=f_readreg(s);
4763 d=f_rmw(d);
4764 raw_fadd_rr(d,s);
4765 f_unlock(s);
4766 f_unlock(d);
4767 }
4768 MENDFUNC(2,fadd_rr,(FRW d, FR s))
4769
4770 MIDFUNC(2,fsub_rr,(FRW d, FR s))
4771 {
4772 s=f_readreg(s);
4773 d=f_rmw(d);
4774 raw_fsub_rr(d,s);
4775 f_unlock(s);
4776 f_unlock(d);
4777 }
4778 MENDFUNC(2,fsub_rr,(FRW d, FR s))
4779
4780 MIDFUNC(2,fcmp_rr,(FR d, FR s))
4781 {
4782 d=f_readreg(d);
4783 s=f_readreg(s);
4784 raw_fcmp_rr(d,s);
4785 f_unlock(s);
4786 f_unlock(d);
4787 }
4788 MENDFUNC(2,fcmp_rr,(FR d, FR s))
4789
4790 MIDFUNC(2,fdiv_rr,(FRW d, FR s))
4791 {
4792 s=f_readreg(s);
4793 d=f_rmw(d);
4794 raw_fdiv_rr(d,s);
4795 f_unlock(s);
4796 f_unlock(d);
4797 }
4798 MENDFUNC(2,fdiv_rr,(FRW d, FR s))
4799
4800 MIDFUNC(2,frem_rr,(FRW d, FR s))
4801 {
4802 s=f_readreg(s);
4803 d=f_rmw(d);
4804 raw_frem_rr(d,s);
4805 f_unlock(s);
4806 f_unlock(d);
4807 }
4808 MENDFUNC(2,frem_rr,(FRW d, FR s))
4809
4810 MIDFUNC(2,frem1_rr,(FRW d, FR s))
4811 {
4812 s=f_readreg(s);
4813 d=f_rmw(d);
4814 raw_frem1_rr(d,s);
4815 f_unlock(s);
4816 f_unlock(d);
4817 }
4818 MENDFUNC(2,frem1_rr,(FRW d, FR s))
4819
4820 MIDFUNC(2,fmul_rr,(FRW d, FR s))
4821 {
4822 s=f_readreg(s);
4823 d=f_rmw(d);
4824 raw_fmul_rr(d,s);
4825 f_unlock(s);
4826 f_unlock(d);
4827 }
4828 MENDFUNC(2,fmul_rr,(FRW d, FR s))
4829
4830 /********************************************************************
4831 * Support functions exposed to gencomp. CREATE time *
4832 ********************************************************************/
4833
4834 int kill_rodent(int r)
4835 {
4836 return KILLTHERAT &&
4837 have_rat_stall &&
4838 (live.state[r].status==INMEM ||
4839 live.state[r].status==CLEAN ||
4840 live.state[r].status==ISCONST ||
4841 live.state[r].dirtysize==4);
4842 }
4843
4844 uae_u32 get_const(int r)
4845 {
4846 Dif (!isconst(r)) {
4847 write_log("Register %d should be constant, but isn't\n",r);
4848 abort();
4849 }
4850 return live.state[r].val;
4851 }
4852
4853 void sync_m68k_pc(void)
4854 {
4855 if (m68k_pc_offset) {
4856 add_l_ri(PC_P,m68k_pc_offset);
4857 comp_pc_p+=m68k_pc_offset;
4858 m68k_pc_offset=0;
4859 }
4860 }
4861
4862 /********************************************************************
4863 * Scratch registers management *
4864 ********************************************************************/
4865
4866 struct scratch_t {
4867 uae_u32 regs[VREGS];
4868 fpu_register fregs[VFREGS];
4869 };
4870
4871 static scratch_t scratch;
4872
4873 /********************************************************************
4874 * Support functions exposed to newcpu *
4875 ********************************************************************/
4876
4877 static inline const char *str_on_off(bool b)
4878 {
4879 return b ? "on" : "off";
4880 }
4881
4882 void compiler_init(void)
4883 {
4884 static bool initialized = false;
4885 if (initialized)
4886 return;
4887
4888 #ifndef WIN32
4889 // Open /dev/zero
4890 zero_fd = open("/dev/zero", O_RDWR);
4891 if (zero_fd < 0) {
4892 char str[200];
4893 sprintf(str, GetString(STR_NO_DEV_ZERO_ERR), strerror(errno));
4894 ErrorAlert(str);
4895 QuitEmulator();
4896 }
4897 #endif
4898
4899 #if JIT_DEBUG
4900 // JIT debug mode ?
4901 JITDebug = PrefsFindBool("jitdebug");
4902 #endif
4903 write_log("<JIT compiler> : enable runtime disassemblers : %s\n", JITDebug ? "yes" : "no");
4904
4905 #ifdef USE_JIT_FPU
4906 // Use JIT compiler for FPU instructions ?
4907 avoid_fpu = !PrefsFindBool("jitfpu");
4908 #else
4909 // JIT FPU is always disabled
4910 avoid_fpu = true;
4911 #endif
4912 write_log("<JIT compiler> : compile FPU instructions : %s\n", !avoid_fpu ? "yes" : "no");
4913
4914 // Get size of the translation cache (in KB)
4915 cache_size = PrefsFindInt32("jitcachesize");
4916 write_log("<JIT compiler> : requested translation cache size : %d KB\n", cache_size);
4917
4918 // Initialize target CPU (check for features, e.g. CMOV, rat stalls)
4919 raw_init_cpu();
4920 setzflg_uses_bsf = target_check_bsf();
4921 write_log("<JIT compiler> : target processor has CMOV instructions : %s\n", have_cmov ? "yes" : "no");
4922 write_log("<JIT compiler> : target processor can suffer from partial register stalls : %s\n", have_rat_stall ? "yes" : "no");
4923 write_log("<JIT compiler> : alignment for loops, jumps are %d, %d\n", align_loops, align_jumps);
4924
4925 // Translation cache flush mechanism
4926 lazy_flush = PrefsFindBool("jitlazyflush");
4927 write_log("<JIT compiler> : lazy translation cache invalidation : %s\n", str_on_off(lazy_flush));
4928 flush_icache = lazy_flush ? flush_icache_lazy : flush_icache_hard;
4929
4930 // Compiler features
4931 write_log("<JIT compiler> : register aliasing : %s\n", str_on_off(1));
4932 write_log("<JIT compiler> : FP register aliasing : %s\n", str_on_off(USE_F_ALIAS));
4933 write_log("<JIT compiler> : lazy constant offsetting : %s\n", str_on_off(USE_OFFSET));
4934 write_log("<JIT compiler> : block inlining : %s\n", str_on_off(USE_INLINING));
4935 write_log("<JIT compiler> : separate blockinfo allocation : %s\n", str_on_off(USE_SEPARATE_BIA));
4936
4937 // Build compiler tables
4938 build_comp();
4939
4940 initialized = true;
4941
4942 #if PROFILE_UNTRANSLATED_INSNS
4943 write_log("<JIT compiler> : gather statistics on untranslated insns count\n");
4944 #endif
4945
4946 #if PROFILE_COMPILE_TIME
4947 write_log("<JIT compiler> : gather statistics on translation time\n");
4948 emul_start_time = clock();
4949 #endif
4950 }
4951
4952 void compiler_exit(void)
4953 {
4954 #if PROFILE_COMPILE_TIME
4955 emul_end_time = clock();
4956 #endif
4957
4958 // Deallocate translation cache
4959 if (compiled_code) {
4960 vm_release(compiled_code, cache_size * 1024);
4961 compiled_code = 0;
4962 }
4963
4964 #ifndef WIN32
4965 // Close /dev/zero
4966 if (zero_fd > 0)
4967 close(zero_fd);
4968 #endif
4969
4970 #if PROFILE_COMPILE_TIME
4971 write_log("### Compile Block statistics\n");
4972 write_log("Number of calls to compile_block : %d\n", compile_count);
4973 uae_u32 emul_time = emul_end_time - emul_start_time;
4974 write_log("Total emulation time : %.1f sec\n", double(emul_time)/double(CLOCKS_PER_SEC));
4975 write_log("Total compilation time : %.1f sec (%.1f%%)\n", double(compile_time)/double(CLOCKS_PER_SEC),
4976 100.0*double(compile_time)/double(emul_time));
4977 write_log("\n");
4978 #endif
4979
4980 #if PROFILE_UNTRANSLATED_INSNS
4981 uae_u64 untranslated_count = 0;
4982 for (int i = 0; i < 65536; i++) {
4983 opcode_nums[i] = i;
4984 untranslated_count += raw_cputbl_count[i];
4985 }
4986 write_log("Sorting out untranslated instructions count...\n");
4987 qsort(opcode_nums, 65536, sizeof(uae_u16), untranslated_compfn);
4988 write_log("\nRank Opc Count Name\n");
4989 for (int i = 0; i < untranslated_top_ten; i++) {
4990 uae_u32 count = raw_cputbl_count[opcode_nums[i]];
4991 struct instr *dp;
4992 struct mnemolookup *lookup;
4993 if (!count)
4994 break;
4995 dp = table68k + opcode_nums[i];
4996 for (lookup = lookuptab; lookup->mnemo != dp->mnemo; lookup++)
4997 ;
4998 write_log("%03d: %04x %10lu %s\n", i, opcode_nums[i], count, lookup->name);
4999 }
5000 #endif
5001 }
5002
5003 bool compiler_use_jit(void)
5004 {
5005 // Check for the "jit" prefs item
5006 if (!PrefsFindBool("jit"))
5007 return false;
5008
5009 // Don't use JIT if translation cache size is less then MIN_CACHE_SIZE KB
5010 if (PrefsFindInt32("jitcachesize") < MIN_CACHE_SIZE) {
5011 write_log("<JIT compiler> : translation cache size is less than %d KB. Disabling JIT.\n", MIN_CACHE_SIZE);
5012 return false;
5013 }
5014
5015 // FIXME: there are currently problems with JIT compilation and anything below a 68040
5016 if (CPUType < 4) {
5017 write_log("<JIT compiler> : 68040 emulation is required instead of 680%d0. Disabling JIT.\n", CPUType);
5018 return false;
5019 }
5020
5021 return true;
5022 }
5023
5024 void init_comp(void)
5025 {
5026 int i;
5027 uae_s8* cb=can_byte;
5028 uae_s8* cw=can_word;
5029 uae_s8* au=always_used;
5030
5031 for (i=0;i<VREGS;i++) {
5032 live.state[i].realreg=-1;
5033 live.state[i].needflush=NF_SCRATCH;
5034 live.state[i].val=0;
5035 set_status(i,UNDEF);
5036 }
5037
5038 for (i=0;i<VFREGS;i++) {
5039 live.fate[i].status=UNDEF;
5040 live.fate[i].realreg=-1;
5041 live.fate[i].needflush=NF_SCRATCH;
5042 }
5043
5044 for (i=0;i<VREGS;i++) {
5045 if (i<16) { /* First 16 registers map to 68k registers */
5046 live.state[i].mem=((uae_u32*)&regs)+i;
5047 live.state[i].needflush=NF_TOMEM;
5048 set_status(i,INMEM);
5049 }
5050 else
5051 live.state[i].mem=scratch.regs+i;
5052 }
5053 live.state[PC_P].mem=(uae_u32*)&(regs.pc_p);
5054 live.state[PC_P].needflush=NF_TOMEM;
5055 set_const(PC_P,(uae_u32)comp_pc_p);
5056
5057 live.state[FLAGX].mem=&(regflags.x);
5058 live.state[FLAGX].needflush=NF_TOMEM;
5059 set_status(FLAGX,INMEM);
5060
5061 live.state[FLAGTMP].mem=&(regflags.cznv);
5062 live.state[FLAGTMP].needflush=NF_TOMEM;
5063 set_status(FLAGTMP,INMEM);
5064
5065 live.state[NEXT_HANDLER].needflush=NF_HANDLER;
5066 set_status(NEXT_HANDLER,UNDEF);
5067
5068 for (i=0;i<VFREGS;i++) {
5069 if (i<8) { /* First 8 registers map to 68k FPU registers */
5070 live.fate[i].mem=(uae_u32*)fpu_register_address(i);
5071 live.fate[i].needflush=NF_TOMEM;
5072 live.fate[i].status=INMEM;
5073 }
5074 else if (i==FP_RESULT) {
5075 live.fate[i].mem=(uae_u32*)(&fpu.result);
5076 live.fate[i].needflush=NF_TOMEM;
5077 live.fate[i].status=INMEM;
5078 }
5079 else
5080 live.fate[i].mem=(uae_u32*)(scratch.fregs+i);
5081 }
5082
5083
5084 for (i=0;i<N_REGS;i++) {
5085 live.nat[i].touched=0;
5086 live.nat[i].nholds=0;
5087 live.nat[i].locked=0;
5088 if (*cb==i) {
5089 live.nat[i].canbyte=1; cb++;
5090 } else live.nat[i].canbyte=0;
5091 if (*cw==i) {
5092 live.nat[i].canword=1; cw++;
5093 } else live.nat[i].canword=0;
5094 if (*au==i) {
5095 live.nat[i].locked=1; au++;
5096 }
5097 }
5098
5099 for (i=0;i<N_FREGS;i++) {
5100 live.fat[i].touched=0;
5101 live.fat[i].nholds=0;
5102 live.fat[i].locked=0;
5103 }
5104
5105 touchcnt=1;
5106 m68k_pc_offset=0;
5107 live.flags_in_flags=TRASH;
5108 live.flags_on_stack=VALID;
5109 live.flags_are_important=1;
5110
5111 raw_fp_init();
5112 }
5113
5114 /* Only do this if you really mean it! The next call should be to init!*/
5115 void flush(int save_regs)
5116 {
5117 int fi,i;
5118
5119 log_flush();
5120 flush_flags(); /* low level */
5121 sync_m68k_pc(); /* mid level */
5122
5123 if (save_regs) {
5124 for (i=0;i<VFREGS;i++) {
5125 if (live.fate[i].needflush==NF_SCRATCH ||
5126 live.fate[i].status==CLEAN) {
5127 f_disassociate(i);
5128 }
5129 }
5130 for (i=0;i<VREGS;i++) {
5131 if (live.state[i].needflush==NF_TOMEM) {
5132 switch(live.state[i].status) {
5133 case INMEM:
5134 if (live.state[i].val) {
5135 raw_add_l_mi((uae_u32)live.state[i].mem,live.state[i].val);
5136 log_vwrite(i);
5137 live.state[i].val=0;
5138 }
5139 break;
5140 case CLEAN:
5141 case DIRTY:
5142 remove_offset(i,-1); tomem(i); break;
5143 case ISCONST:
5144 if (i!=PC_P)
5145 writeback_const(i);
5146 break;
5147 default: break;
5148 }
5149 Dif (live.state[i].val && i!=PC_P) {
5150 write_log("Register %d still has val %x\n",
5151 i,live.state[i].val);
5152 }
5153 }
5154 }
5155 for (i=0;i<VFREGS;i++) {
5156 if (live.fate[i].needflush==NF_TOMEM &&
5157 live.fate[i].status==DIRTY) {
5158 f_evict(i);
5159 }
5160 }
5161 raw_fp_cleanup_drop();
5162 }
5163 if (needflags) {
5164 write_log("Warning! flush with needflags=1!\n");
5165 }
5166 }
5167
5168 static void flush_keepflags(void)
5169 {
5170 int fi,i;
5171
5172 for (i=0;i<VFREGS;i++) {
5173 if (live.fate[i].needflush==NF_SCRATCH ||
5174 live.fate[i].status==CLEAN) {
5175 f_disassociate(i);
5176 }
5177 }
5178 for (i=0;i<VREGS;i++) {
5179 if (live.state[i].needflush==NF_TOMEM) {
5180 switch(live.state[i].status) {
5181 case INMEM:
5182 /* Can't adjust the offset here --- that needs "add" */
5183 break;
5184 case CLEAN:
5185 case DIRTY:
5186 remove_offset(i,-1); tomem(i); break;
5187 case ISCONST:
5188 if (i!=PC_P)
5189 writeback_const(i);
5190 break;
5191 default: break;
5192 }
5193 }
5194 }
5195 for (i=0;i<VFREGS;i++) {
5196 if (live.fate[i].needflush==NF_TOMEM &&
5197 live.fate[i].status==DIRTY) {
5198 f_evict(i);
5199 }
5200 }
5201 raw_fp_cleanup_drop();
5202 }
5203
5204 void freescratch(void)
5205 {
5206 int i;
5207 for (i=0;i<N_REGS;i++)
5208 if (live.nat[i].locked && i!=4)
5209 write_log("Warning! %d is locked\n",i);
5210
5211 for (i=0;i<VREGS;i++)
5212 if (live.state[i].needflush==NF_SCRATCH) {
5213 forget_about(i);
5214 }
5215
5216 for (i=0;i<VFREGS;i++)
5217 if (live.fate[i].needflush==NF_SCRATCH) {
5218 f_forget_about(i);
5219 }
5220 }
5221
5222 /********************************************************************
5223 * Support functions, internal *
5224 ********************************************************************/
5225
5226
5227 static void align_target(uae_u32 a)
5228 {
5229 if (!a)
5230 return;
5231
5232 if (tune_nop_fillers)
5233 raw_emit_nop_filler(a - (((uae_u32)target) & (a - 1)));
5234 else {
5235 /* Fill with NOPs --- makes debugging with gdb easier */
5236 while ((uae_u32)target&(a-1))
5237 *target++=0x90;
5238 }
5239 }
5240
5241 static __inline__ int isinrom(uintptr addr)
5242 {
5243 return ((addr >= (uintptr)ROMBaseHost) && (addr < (uintptr)ROMBaseHost + ROMSize));
5244 }
5245
5246 static void flush_all(void)
5247 {
5248 int i;
5249
5250 log_flush();
5251 for (i=0;i<VREGS;i++)
5252 if (live.state[i].status==DIRTY) {
5253 if (!call_saved[live.state[i].realreg]) {
5254 tomem(i);
5255 }
5256 }
5257 for (i=0;i<VFREGS;i++)
5258 if (f_isinreg(i))
5259 f_evict(i);
5260 raw_fp_cleanup_drop();
5261 }
5262
5263 /* Make sure all registers that will get clobbered by a call are
5264 save and sound in memory */
5265 static void prepare_for_call_1(void)
5266 {
5267 flush_all(); /* If there are registers that don't get clobbered,
5268 * we should be a bit more selective here */
5269 }
5270
5271 /* We will call a C routine in a moment. That will clobber all registers,
5272 so we need to disassociate everything */
5273 static void prepare_for_call_2(void)
5274 {
5275 int i;
5276 for (i=0;i<N_REGS;i++)
5277 if (!call_saved[i] && live.nat[i].nholds>0)
5278 free_nreg(i);
5279
5280 for (i=0;i<N_FREGS;i++)
5281 if (live.fat[i].nholds>0)
5282 f_free_nreg(i);
5283
5284 live.flags_in_flags=TRASH; /* Note: We assume we already rescued the
5285 flags at the very start of the call_r
5286 functions! */
5287 }
5288
5289 /********************************************************************
5290 * Memory access and related functions, CREATE time *
5291 ********************************************************************/
5292
5293 void register_branch(uae_u32 not_taken, uae_u32 taken, uae_u8 cond)
5294 {
5295 next_pc_p=not_taken;
5296 taken_pc_p=taken;
5297 branch_cc=cond;
5298 }
5299
5300
5301 static uae_u32 get_handler_address(uae_u32 addr)
5302 {
5303 uae_u32 cl=cacheline(addr);
5304 blockinfo* bi=get_blockinfo_addr_new((void*)addr,0);
5305 return (uae_u32)&(bi->direct_handler_to_use);
5306 }
5307
5308 static uae_u32 get_handler(uae_u32 addr)
5309 {
5310 uae_u32 cl=cacheline(addr);
5311 blockinfo* bi=get_blockinfo_addr_new((void*)addr,0);
5312 return (uae_u32)bi->direct_handler_to_use;
5313 }
5314
5315 static void load_handler(int reg, uae_u32 addr)
5316 {
5317 mov_l_rm(reg,get_handler_address(addr));
5318 }
5319
5320 /* This version assumes that it is writing *real* memory, and *will* fail
5321 * if that assumption is wrong! No branches, no second chances, just
5322 * straight go-for-it attitude */
5323
5324 static void writemem_real(int address, int source, int offset, int size, int tmp, int clobber)
5325 {
5326 int f=tmp;
5327
5328 if (clobber)
5329 f=source;
5330 switch(size) {
5331 case 1: mov_b_bRr(address,source,MEMBaseDiff); break;
5332 case 2: mov_w_rr(f,source); bswap_16(f); mov_w_bRr(address,f,MEMBaseDiff); break;
5333 case 4: mov_l_rr(f,source); bswap_32(f); mov_l_bRr(address,f,MEMBaseDiff); break;
5334 }
5335 forget_about(tmp);
5336 forget_about(f);
5337 }
5338
5339 void writebyte(int address, int source, int tmp)
5340 {
5341 writemem_real(address,source,20,1,tmp,0);
5342 }
5343
5344 static __inline__ void writeword_general(int address, int source, int tmp,
5345 int clobber)
5346 {
5347 writemem_real(address,source,16,2,tmp,clobber);
5348 }
5349
5350 void writeword_clobber(int address, int source, int tmp)
5351 {
5352 writeword_general(address,source,tmp,1);
5353 }
5354
5355 void writeword(int address, int source, int tmp)
5356 {
5357 writeword_general(address,source,tmp,0);
5358 }
5359
5360 static __inline__ void writelong_general(int address, int source, int tmp,
5361 int clobber)
5362 {
5363 writemem_real(address,source,12,4,tmp,clobber);
5364 }
5365
5366 void writelong_clobber(int address, int source, int tmp)
5367 {
5368 writelong_general(address,source,tmp,1);
5369 }
5370
5371 void writelong(int address, int source, int tmp)
5372 {
5373 writelong_general(address,source,tmp,0);
5374 }
5375
5376
5377
5378 /* This version assumes that it is reading *real* memory, and *will* fail
5379 * if that assumption is wrong! No branches, no second chances, just
5380 * straight go-for-it attitude */
5381
5382 static void readmem_real(int address, int dest, int offset, int size, int tmp)
5383 {
5384 int f=tmp;
5385
5386 if (size==4 && address!=dest)
5387 f=dest;
5388
5389 switch(size) {
5390 case 1: mov_b_brR(dest,address,MEMBaseDiff); break;
5391 case 2: mov_w_brR(dest,address,MEMBaseDiff); bswap_16(dest); break;
5392 case 4: mov_l_brR(dest,address,MEMBaseDiff); bswap_32(dest); break;
5393 }
5394 forget_about(tmp);
5395 }
5396
5397 void readbyte(int address, int dest, int tmp)
5398 {
5399 readmem_real(address,dest,8,1,tmp);
5400 }
5401
5402 void readword(int address, int dest, int tmp)
5403 {
5404 readmem_real(address,dest,4,2,tmp);
5405 }
5406
5407 void readlong(int address, int dest, int tmp)
5408 {
5409 readmem_real(address,dest,0,4,tmp);
5410 }
5411
5412 void get_n_addr(int address, int dest, int tmp)
5413 {
5414 // a is the register containing the virtual address
5415 // after the offset had been fetched
5416 int a=tmp;
5417
5418 // f is the register that will contain the offset
5419 int f=tmp;
5420
5421 // a == f == tmp if (address == dest)
5422 if (address!=dest) {
5423 a=address;
5424 f=dest;
5425 }
5426
5427 #if REAL_ADDRESSING
5428 mov_l_rr(dest, address);
5429 #elif DIRECT_ADDRESSING
5430 lea_l_brr(dest,address,MEMBaseDiff);
5431 #endif
5432 forget_about(tmp);
5433 }
5434
5435 void get_n_addr_jmp(int address, int dest, int tmp)
5436 {
5437 /* For this, we need to get the same address as the rest of UAE
5438 would --- otherwise we end up translating everything twice */
5439 get_n_addr(address,dest,tmp);
5440 }
5441
5442
5443 /* base is a register, but dp is an actual value.
5444 target is a register, as is tmp */
5445 void calc_disp_ea_020(int base, uae_u32 dp, int target, int tmp)
5446 {
5447 int reg = (dp >> 12) & 15;
5448 int regd_shift=(dp >> 9) & 3;
5449
5450 if (dp & 0x100) {
5451 int ignorebase=(dp&0x80);
5452 int ignorereg=(dp&0x40);
5453 int addbase=0;
5454 int outer=0;
5455
5456 if ((dp & 0x30) == 0x20) addbase = (uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
5457 if ((dp & 0x30) == 0x30) addbase = comp_get_ilong((m68k_pc_offset+=4)-4);
5458
5459 if ((dp & 0x3) == 0x2) outer = (uae_s32)(uae_s16)comp_get_iword((m68k_pc_offset+=2)-2);
5460 if ((dp & 0x3) == 0x3) outer = comp_get_ilong((m68k_pc_offset+=4)-4);
5461
5462 if ((dp & 0x4) == 0) { /* add regd *before* the get_long */
5463 if (!ignorereg) {
5464 if ((dp & 0x800) == 0)
5465 sign_extend_16_rr(target,reg);
5466 else
5467 mov_l_rr(target,reg);
5468 shll_l_ri(target,regd_shift);
5469 }
5470 else
5471 mov_l_ri(target,0);
5472
5473 /* target is now regd */
5474 if (!ignorebase)
5475 add_l(target,base);
5476 add_l_ri(target,addbase);
5477 if (dp&0x03) readlong(target,target,tmp);
5478 } else { /* do the getlong first, then add regd */
5479 if (!ignorebase) {
5480 mov_l_rr(target,base);
5481 add_l_ri(target,addbase);
5482 }
5483 else
5484 mov_l_ri(target,addbase);
5485 if (dp&0x03) readlong(target,target,tmp);
5486
5487 if (!ignorereg) {
5488 if ((dp & 0x800) == 0)
5489 sign_extend_16_rr(tmp,reg);
5490 else
5491 mov_l_rr(tmp,reg);
5492 shll_l_ri(tmp,regd_shift);
5493 /* tmp is now regd */
5494 add_l(target,tmp);
5495 }
5496 }
5497 add_l_ri(target,outer);
5498 }
5499 else { /* 68000 version */
5500 if ((dp & 0x800) == 0) { /* Sign extend */
5501 sign_extend_16_rr(target,reg);
5502 lea_l_brr_indexed(target,base,target,1<<regd_shift,(uae_s32)((uae_s8)dp));
5503 }
5504 else {
5505 lea_l_brr_indexed(target,base,reg,1<<regd_shift,(uae_s32)((uae_s8)dp));
5506 }
5507 }
5508 forget_about(tmp);
5509 }
5510
5511
5512
5513
5514
5515 void set_cache_state(int enabled)
5516 {
5517 if (enabled!=letit)
5518 flush_icache_hard(77);
5519 letit=enabled;
5520 }
5521
5522 int get_cache_state(void)
5523 {
5524 return letit;
5525 }
5526
5527 uae_u32 get_jitted_size(void)
5528 {
5529 if (compiled_code)
5530 return current_compile_p-compiled_code;
5531 return 0;
5532 }
5533
5534 const int CODE_ALLOC_MAX_ATTEMPTS = 10;
5535 const int CODE_ALLOC_BOUNDARIES = 128 * 1024; // 128 KB
5536
5537 static uint8 *do_alloc_code(uint32 size, int depth)
5538 {
5539 #if defined(__linux__) && 0
5540 /*
5541 This is a really awful hack that is known to work on Linux at
5542 least.
5543
5544 The trick here is to make sure the allocated cache is nearby
5545 code segment, and more precisely in the positive half of a
5546 32-bit address space. i.e. addr < 0x80000000. Actually, it
5547 turned out that a 32-bit binary run on AMD64 yields a cache
5548 allocated around 0xa0000000, thus causing some troubles when
5549 translating addresses from m68k to x86.
5550 */
5551 static uint8 * code_base = NULL;
5552 if (code_base == NULL) {
5553 uintptr page_size = getpagesize();
5554 uintptr boundaries = CODE_ALLOC_BOUNDARIES;
5555 if (boundaries < page_size)
5556 boundaries = page_size;
5557 code_base = (uint8 *)sbrk(0);
5558 for (int attempts = 0; attempts < CODE_ALLOC_MAX_ATTEMPTS; attempts++) {
5559 if (vm_acquire_fixed(code_base, size) == 0) {
5560 uint8 *code = code_base;
5561 code_base += size;
5562 return code;
5563 }
5564 code_base += boundaries;
5565 }
5566 return NULL;
5567 }
5568
5569 if (vm_acquire_fixed(code_base, size) == 0) {
5570 uint8 *code = code_base;
5571 code_base += size;
5572 return code;
5573 }
5574
5575 if (depth >= CODE_ALLOC_MAX_ATTEMPTS)
5576 return NULL;
5577
5578 return do_alloc_code(size, depth + 1);
5579 #else
5580 uint8 *code = (uint8 *)vm_acquire(size);
5581 return code == VM_MAP_FAILED ? NULL : code;
5582 #endif
5583 }
5584
5585 static inline uint8 *alloc_code(uint32 size)
5586 {
5587 return do_alloc_code(size, 0);
5588 }
5589
5590 void alloc_cache(void)
5591 {
5592 if (compiled_code) {
5593 flush_icache_hard(6);
5594 vm_release(compiled_code, cache_size * 1024);
5595 compiled_code = 0;
5596 }
5597
5598 if (cache_size == 0)
5599 return;
5600
5601 while (!compiled_code && cache_size) {
5602 if ((compiled_code = alloc_code(cache_size * 1024)) == NULL) {
5603 compiled_code = 0;
5604 cache_size /= 2;
5605 }
5606 }
5607 vm_protect(compiled_code, cache_size, VM_PAGE_READ | VM_PAGE_WRITE | VM_PAGE_EXECUTE);
5608
5609 if (compiled_code) {
5610 write_log("<JIT compiler> : actual translation cache size : %d KB at 0x%08X\n", cache_size, compiled_code);
5611 max_compile_start = compiled_code + cache_size*1024 - BYTES_PER_INST;
5612 current_compile_p = compiled_code;
5613 current_cache_size = 0;
5614 }
5615 }
5616
5617
5618
5619 extern void op_illg_1 (uae_u32 opcode) REGPARAM;
5620
5621 static void calc_checksum(blockinfo* bi, uae_u32* c1, uae_u32* c2)
5622 {
5623 uae_u32 k1 = 0;
5624 uae_u32 k2 = 0;
5625
5626 #if USE_CHECKSUM_INFO
5627 checksum_info *csi = bi->csi;
5628 Dif(!csi) abort();
5629 while (csi) {
5630 uae_s32 len = csi->length;
5631 uae_u32 tmp = (uae_u32)csi->start_p;
5632 #else
5633 uae_s32 len = bi->len;
5634 uae_u32 tmp = (uae_u32)bi->min_pcp;
5635 #endif
5636 uae_u32*pos;
5637
5638 len += (tmp & 3);
5639 tmp &= ~3;
5640 pos = (uae_u32 *)tmp;
5641
5642 if (len >= 0 && len <= MAX_CHECKSUM_LEN) {
5643 while (len > 0) {
5644 k1 += *pos;
5645 k2 ^= *pos;
5646 pos++;
5647 len -= 4;
5648 }
5649 }
5650
5651 #if USE_CHECKSUM_INFO
5652 csi = csi->next;
5653 }
5654 #endif
5655
5656 *c1 = k1;
5657 *c2 = k2;
5658 }
5659
5660 #if 0
5661 static void show_checksum(CSI_TYPE* csi)
5662 {
5663 uae_u32 k1=0;
5664 uae_u32 k2=0;
5665 uae_s32 len=CSI_LENGTH(csi);
5666 uae_u32 tmp=(uae_u32)CSI_START_P(csi);
5667 uae_u32* pos;
5668
5669 len+=(tmp&3);
5670 tmp&=(~3);
5671 pos=(uae_u32*)tmp;
5672
5673 if (len<0 || len>MAX_CHECKSUM_LEN) {
5674 return;
5675 }
5676 else {
5677 while (len>0) {
5678 write_log("%08x ",*pos);
5679 pos++;
5680 len-=4;
5681 }
5682 write_log(" bla\n");
5683 }
5684 }
5685 #endif
5686
5687
5688 int check_for_cache_miss(void)
5689 {
5690 blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5691
5692 if (bi) {
5693 int cl=cacheline(regs.pc_p);
5694 if (bi!=cache_tags[cl+1].bi) {
5695 raise_in_cl_list(bi);
5696 return 1;
5697 }
5698 }
5699 return 0;
5700 }
5701
5702
5703 static void recompile_block(void)
5704 {
5705 /* An existing block's countdown code has expired. We need to make
5706 sure that execute_normal doesn't refuse to recompile due to a
5707 perceived cache miss... */
5708 blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5709
5710 Dif (!bi)
5711 abort();
5712 raise_in_cl_list(bi);
5713 execute_normal();
5714 return;
5715 }
5716 static void cache_miss(void)
5717 {
5718 blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5719 uae_u32 cl=cacheline(regs.pc_p);
5720 blockinfo* bi2=get_blockinfo(cl);
5721
5722 if (!bi) {
5723 execute_normal(); /* Compile this block now */
5724 return;
5725 }
5726 Dif (!bi2 || bi==bi2) {
5727 write_log("Unexplained cache miss %p %p\n",bi,bi2);
5728 abort();
5729 }
5730 raise_in_cl_list(bi);
5731 return;
5732 }
5733
5734 static int called_check_checksum(blockinfo* bi);
5735
5736 static inline int block_check_checksum(blockinfo* bi)
5737 {
5738 uae_u32 c1,c2;
5739 bool isgood;
5740
5741 if (bi->status!=BI_NEED_CHECK)
5742 return 1; /* This block is in a checked state */
5743
5744 checksum_count++;
5745
5746 if (bi->c1 || bi->c2)
5747 calc_checksum(bi,&c1,&c2);
5748 else {
5749 c1=c2=1; /* Make sure it doesn't match */
5750 }
5751
5752 isgood=(c1==bi->c1 && c2==bi->c2);
5753
5754 if (isgood) {
5755 /* This block is still OK. So we reactivate. Of course, that
5756 means we have to move it into the needs-to-be-flushed list */
5757 bi->handler_to_use=bi->handler;
5758 set_dhtu(bi,bi->direct_handler);
5759 bi->status=BI_CHECKING;
5760 isgood=called_check_checksum(bi);
5761 }
5762 if (isgood) {
5763 /* write_log("reactivate %p/%p (%x %x/%x %x)\n",bi,bi->pc_p,
5764 c1,c2,bi->c1,bi->c2);*/
5765 remove_from_list(bi);
5766 add_to_active(bi);
5767 raise_in_cl_list(bi);
5768 bi->status=BI_ACTIVE;
5769 }
5770 else {
5771 /* This block actually changed. We need to invalidate it,
5772 and set it up to be recompiled */
5773 /* write_log("discard %p/%p (%x %x/%x %x)\n",bi,bi->pc_p,
5774 c1,c2,bi->c1,bi->c2); */
5775 invalidate_block(bi);
5776 raise_in_cl_list(bi);
5777 }
5778 return isgood;
5779 }
5780
5781 static int called_check_checksum(blockinfo* bi)
5782 {
5783 dependency* x=bi->deplist;
5784 int isgood=1;
5785 int i;
5786
5787 for (i=0;i<2 && isgood;i++) {
5788 if (bi->dep[i].jmp_off) {
5789 isgood=block_check_checksum(bi->dep[i].target);
5790 }
5791 }
5792 return isgood;
5793 }
5794
5795 static void check_checksum(void)
5796 {
5797 blockinfo* bi=get_blockinfo_addr(regs.pc_p);
5798 uae_u32 cl=cacheline(regs.pc_p);
5799 blockinfo* bi2=get_blockinfo(cl);
5800
5801 /* These are not the droids you are looking for... */
5802 if (!bi) {
5803 /* Whoever is the primary target is in a dormant state, but
5804 calling it was accidental, and we should just compile this
5805 new block */
5806 execute_normal();
5807 return;
5808 }
5809 if (bi!=bi2) {
5810 /* The block was hit accidentally, but it does exist. Cache miss */
5811 cache_miss();
5812 return;
5813 }
5814
5815 if (!block_check_checksum(bi))
5816 execute_normal();
5817 }
5818
5819 static __inline__ void match_states(blockinfo* bi)
5820 {
5821 int i;
5822 smallstate* s=&(bi->env);
5823
5824 if (bi->status==BI_NEED_CHECK) {
5825 block_check_checksum(bi);
5826 }
5827 if (bi->status==BI_ACTIVE ||
5828 bi->status==BI_FINALIZING) { /* Deal with the *promises* the
5829 block makes (about not using
5830 certain vregs) */
5831 for (i=0;i<16;i++) {
5832 if (s->virt[i]==L_UNNEEDED) {
5833 // write_log("unneeded reg %d at %p\n",i,target);
5834 COMPCALL(forget_about)(i); // FIXME
5835 }
5836 }
5837 }
5838 flush(1);
5839
5840 /* And now deal with the *demands* the block makes */
5841 for (i=0;i<N_REGS;i++) {
5842 int v=s->nat[i];
5843 if (v>=0) {
5844 // printf("Loading reg %d into %d at %p\n",v,i,target);
5845 readreg_specific(v,4,i);
5846 // do_load_reg(i,v);
5847 // setlock(i);
5848 }
5849 }
5850 for (i=0;i<N_REGS;i++) {
5851 int v=s->nat[i];
5852 if (v>=0) {
5853 unlock2(i);
5854 }
5855 }
5856 }
5857
5858 static uae_u8 popallspace[1024]; /* That should be enough space */
5859
5860 static __inline__ void create_popalls(void)
5861 {
5862 int i,r;
5863
5864 current_compile_p=popallspace;
5865 set_target(current_compile_p);
5866 #if USE_PUSH_POP
5867 /* If we can't use gcc inline assembly, we need to pop some
5868 registers before jumping back to the various get-out routines.
5869 This generates the code for it.
5870 */
5871 align_target(align_jumps);
5872 popall_do_nothing=get_target();
5873 for (i=0;i<N_REGS;i++) {
5874 if (need_to_preserve[i])
5875 raw_pop_l_r(i);
5876 }
5877 raw_jmp((uae_u32)do_nothing);
5878
5879 align_target(align_jumps);
5880 popall_execute_normal=get_target();
5881 for (i=0;i<N_REGS;i++) {
5882 if (need_to_preserve[i])
5883 raw_pop_l_r(i);
5884 }
5885 raw_jmp((uae_u32)execute_normal);
5886
5887 align_target(align_jumps);
5888 popall_cache_miss=get_target();
5889 for (i=0;i<N_REGS;i++) {
5890 if (need_to_preserve[i])
5891 raw_pop_l_r(i);
5892 }
5893 raw_jmp((uae_u32)cache_miss);
5894
5895 align_target(align_jumps);
5896 popall_recompile_block=get_target();
5897 for (i=0;i<N_REGS;i++) {
5898 if (need_to_preserve[i])
5899 raw_pop_l_r(i);
5900 }
5901 raw_jmp((uae_u32)recompile_block);
5902
5903 align_target(align_jumps);
5904 popall_exec_nostats=get_target();
5905 for (i=0;i<N_REGS;i++) {
5906 if (need_to_preserve[i])
5907 raw_pop_l_r(i);
5908 }
5909 raw_jmp((uae_u32)exec_nostats);
5910
5911 align_target(align_jumps);
5912 popall_check_checksum=get_target();
5913 for (i=0;i<N_REGS;i++) {
5914 if (need_to_preserve[i])
5915 raw_pop_l_r(i);
5916 }
5917 raw_jmp((uae_u32)check_checksum);
5918
5919 align_target(align_jumps);
5920 current_compile_p=get_target();
5921 #else
5922 popall_exec_nostats=(void *)exec_nostats;
5923 popall_execute_normal=(void *)execute_normal;
5924 popall_cache_miss=(void *)cache_miss;
5925 popall_recompile_block=(void *)recompile_block;
5926 popall_do_nothing=(void *)do_nothing;
5927 popall_check_checksum=(void *)check_checksum;
5928 #endif
5929
5930 /* And now, the code to do the matching pushes and then jump
5931 into a handler routine */
5932 pushall_call_handler=get_target();
5933 #if USE_PUSH_POP
5934 for (i=N_REGS;i--;) {
5935 if (need_to_preserve[i])
5936 raw_push_l_r(i);
5937 }
5938 #endif
5939 r=REG_PC_TMP;
5940 raw_mov_l_rm(r,(uae_u32)&regs.pc_p);
5941 raw_and_l_ri(r,TAGMASK);
5942 raw_jmp_m_indexed((uae_u32)cache_tags,r,4);
5943
5944 #ifdef X86_ASSEMBLY
5945 align_target(align_jumps);
5946 m68k_compile_execute = (void (*)(void))get_target();
5947 for (i=N_REGS;i--;) {
5948 if (need_to_preserve[i])
5949 raw_push_l_r(i);
5950 }
5951 align_target(align_loops);
5952 uae_u32 dispatch_loop = (uae_u32)get_target();
5953 r=REG_PC_TMP;
5954 raw_mov_l_rm(r,(uae_u32)&regs.pc_p);
5955 raw_and_l_ri(r,TAGMASK);
5956 raw_call_m_indexed((uae_u32)cache_tags,r,4);
5957 raw_cmp_l_mi((uae_u32)&regs.spcflags,0);
5958 raw_jcc_b_oponly(NATIVE_CC_EQ);
5959 emit_byte(dispatch_loop-((uae_u32)get_target()+1));
5960 raw_call((uae_u32)m68k_do_specialties);
5961 raw_test_l_rr(REG_RESULT,REG_RESULT);
5962 raw_jcc_b_oponly(NATIVE_CC_EQ);
5963 emit_byte(dispatch_loop-((uae_u32)get_target()+1));
5964 raw_cmp_b_mi((uae_u32)&quit_program,0);
5965 raw_jcc_b_oponly(NATIVE_CC_EQ);
5966 emit_byte(dispatch_loop-((uae_u32)get_target()+1));
5967 for (i=0;i<N_REGS;i++) {
5968 if (need_to_preserve[i])
5969 raw_pop_l_r(i);
5970 }
5971 raw_ret();
5972 #endif
5973 }
5974
5975 static __inline__ void reset_lists(void)
5976 {
5977 int i;
5978
5979 for (i=0;i<MAX_HOLD_BI;i++)
5980 hold_bi[i]=NULL;
5981 active=NULL;
5982 dormant=NULL;
5983 }
5984
5985 static void prepare_block(blockinfo* bi)
5986 {
5987 int i;
5988
5989 set_target(current_compile_p);
5990 align_target(align_jumps);
5991 bi->direct_pen=(cpuop_func *)get_target();
5992 raw_mov_l_rm(0,(uae_u32)&(bi->pc_p));
5993 raw_mov_l_mr((uae_u32)&regs.pc_p,0);
5994 raw_jmp((uae_u32)popall_execute_normal);
5995
5996 align_target(align_jumps);
5997 bi->direct_pcc=(cpuop_func *)get_target();
5998 raw_mov_l_rm(0,(uae_u32)&(bi->pc_p));
5999 raw_mov_l_mr((uae_u32)&regs.pc_p,0);
6000 raw_jmp((uae_u32)popall_check_checksum);
6001 current_compile_p=get_target();
6002
6003 bi->deplist=NULL;
6004 for (i=0;i<2;i++) {
6005 bi->dep[i].prev_p=NULL;
6006 bi->dep[i].next=NULL;
6007 }
6008 bi->env=default_ss;
6009 bi->status=BI_INVALID;
6010 bi->havestate=0;
6011 //bi->env=empty_ss;
6012 }
6013
6014 // OPCODE is in big endian format, use cft_map() beforehand, if needed.
6015 static inline void reset_compop(int opcode)
6016 {
6017 compfunctbl[opcode] = NULL;
6018 nfcompfunctbl[opcode] = NULL;
6019 }
6020
6021 static int read_opcode(const char *p)
6022 {
6023 int opcode = 0;
6024 for (int i = 0; i < 4; i++) {
6025 int op = p[i];
6026 switch (op) {
6027 case '0': case '1': case '2': case '3': case '4':
6028 case '5': case '6': case '7': case '8': case '9':
6029 opcode = (opcode << 4) | (op - '0');
6030 break;
6031 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
6032 opcode = (opcode << 4) | ((op - 'a') + 10);
6033 break;
6034 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
6035 opcode = (opcode << 4) | ((op - 'A') + 10);
6036 break;
6037 default:
6038 return -1;
6039 }
6040 }
6041 return opcode;
6042 }
6043
6044 static bool merge_blacklist()
6045 {
6046 const char *blacklist = PrefsFindString("jitblacklist");
6047 if (blacklist) {
6048 const char *p = blacklist;
6049 for (;;) {
6050 if (*p == 0)
6051 return true;
6052
6053 int opcode1 = read_opcode(p);
6054 if (opcode1 < 0)
6055 return false;
6056 p += 4;
6057
6058 int opcode2 = opcode1;
6059 if (*p == '-') {
6060 p++;
6061 opcode2 = read_opcode(p);
6062 if (opcode2 < 0)
6063 return false;
6064 p += 4;
6065 }
6066
6067 if (*p == 0 || *p == ';') {
6068 write_log("<JIT compiler> : blacklist opcodes : %04x-%04x\n", opcode1, opcode2);
6069 for (int opcode = opcode1; opcode <= opcode2; opcode++)
6070 reset_compop(cft_map(opcode));
6071
6072 if (*p++ == ';')
6073 continue;
6074
6075 return true;
6076 }
6077
6078 return false;
6079 }
6080 }
6081 return true;
6082 }
6083
6084 void build_comp(void)
6085 {
6086 int i;
6087 int jumpcount=0;
6088 unsigned long opcode;
6089 struct comptbl* tbl=op_smalltbl_0_comp_ff;
6090 struct comptbl* nftbl=op_smalltbl_0_comp_nf;
6091 int count;
6092 int cpu_level = 0; // 68000 (default)
6093 if (CPUType == 4)
6094 cpu_level = 4; // 68040 with FPU
6095 else {
6096 if (FPUType)
6097 cpu_level = 3; // 68020 with FPU
6098 else if (CPUType >= 2)
6099 cpu_level = 2; // 68020
6100 else if (CPUType == 1)
6101 cpu_level = 1;
6102 }
6103 struct cputbl *nfctbl = (
6104 cpu_level == 4 ? op_smalltbl_0_nf
6105 : cpu_level == 3 ? op_smalltbl_1_nf
6106 : cpu_level == 2 ? op_smalltbl_2_nf
6107 : cpu_level == 1 ? op_smalltbl_3_nf
6108 : op_smalltbl_4_nf);
6109
6110 write_log ("<JIT compiler> : building compiler function tables\n");
6111
6112 for (opcode = 0; opcode < 65536; opcode++) {
6113 reset_compop(opcode);
6114 nfcpufunctbl[opcode] = op_illg_1;
6115 prop[opcode].use_flags = 0x1f;
6116 prop[opcode].set_flags = 0x1f;
6117 prop[opcode].cflow = fl_trap; // ILLEGAL instructions do trap
6118 }
6119
6120 for (i = 0; tbl[i].opcode < 65536; i++) {
6121 int cflow = table68k[tbl[i].opcode].cflow;
6122 if (USE_INLINING && ((cflow & fl_const_jump) != 0))
6123 cflow = fl_const_jump;
6124 else
6125 cflow &= ~fl_const_jump;
6126 prop[cft_map(tbl[i].opcode)].cflow = cflow;
6127
6128 int uses_fpu = tbl[i].specific & 32;
6129 if (uses_fpu && avoid_fpu)
6130 compfunctbl[cft_map(tbl[i].opcode)] = NULL;
6131 else
6132 compfunctbl[cft_map(tbl[i].opcode)] = tbl[i].handler;
6133 }
6134
6135 for (i = 0; nftbl[i].opcode < 65536; i++) {
6136 int uses_fpu = tbl[i].specific & 32;
6137 if (uses_fpu && avoid_fpu)
6138 nfcompfunctbl[cft_map(nftbl[i].opcode)] = NULL;
6139 else
6140 nfcompfunctbl[cft_map(nftbl[i].opcode)] = nftbl[i].handler;
6141
6142 nfcpufunctbl[cft_map(nftbl[i].opcode)] = nfctbl[i].handler;
6143 }
6144
6145 for (i = 0; nfctbl[i].handler; i++) {
6146 nfcpufunctbl[cft_map(nfctbl[i].opcode)] = nfctbl[i].handler;
6147 }
6148
6149 for (opcode = 0; opcode < 65536; opcode++) {
6150 compop_func *f;
6151 compop_func *nff;
6152 cpuop_func *nfcf;
6153 int isaddx,cflow;
6154
6155 if (table68k[opcode].mnemo == i_ILLG || table68k[opcode].clev > cpu_level)
6156 continue;
6157
6158 if (table68k[opcode].handler != -1) {
6159 f = compfunctbl[cft_map(table68k[opcode].handler)];
6160 nff = nfcompfunctbl[cft_map(table68k[opcode].handler)];
6161 nfcf = nfcpufunctbl[cft_map(table68k[opcode].handler)];
6162 cflow = prop[cft_map(table68k[opcode].handler)].cflow;
6163 isaddx = prop[cft_map(table68k[opcode].handler)].is_addx;
6164 prop[cft_map(opcode)].cflow = cflow;
6165 prop[cft_map(opcode)].is_addx = isaddx;
6166 compfunctbl[cft_map(opcode)] = f;
6167 nfcompfunctbl[cft_map(opcode)] = nff;
6168 Dif (nfcf == op_illg_1)
6169 abort();
6170 nfcpufunctbl[cft_map(opcode)] = nfcf;
6171 }
6172 prop[cft_map(opcode)].set_flags = table68k[opcode].flagdead;
6173 prop[cft_map(opcode)].use_flags = table68k[opcode].flaglive;
6174 }
6175 for (i = 0; nfctbl[i].handler != NULL; i++) {
6176 if (nfctbl[i].specific)
6177 nfcpufunctbl[cft_map(tbl[i].opcode)] = nfctbl[i].handler;
6178 }
6179
6180 /* Merge in blacklist */
6181 if (!merge_blacklist())
6182 write_log("<JIT compiler> : blacklist merge failure!\n");
6183
6184 count=0;
6185 for (opcode = 0; opcode < 65536; opcode++) {
6186 if (compfunctbl[cft_map(opcode)])
6187 count++;
6188 }
6189 write_log("<JIT compiler> : supposedly %d compileable opcodes!\n",count);
6190
6191 /* Initialise state */
6192 create_popalls();
6193 alloc_cache();
6194 reset_lists();
6195
6196 for (i=0;i<TAGSIZE;i+=2) {
6197 cache_tags[i].handler=(cpuop_func *)popall_execute_normal;
6198 cache_tags[i+1].bi=NULL;
6199 }
6200
6201 #if 0
6202 for (i=0;i<N_REGS;i++) {
6203 empty_ss.nat[i].holds=-1;
6204 empty_ss.nat[i].validsize=0;
6205 empty_ss.nat[i].dirtysize=0;
6206 }
6207 #endif
6208 for (i=0;i<VREGS;i++) {
6209 empty_ss.virt[i]=L_NEEDED;
6210 }
6211 for (i=0;i<N_REGS;i++) {
6212 empty_ss.nat[i]=L_UNKNOWN;
6213 }
6214 default_ss=empty_ss;
6215 }
6216
6217
6218 static void flush_icache_none(int n)
6219 {
6220 /* Nothing to do. */
6221 }
6222
6223 static void flush_icache_hard(int n)
6224 {
6225 uae_u32 i;
6226 blockinfo* bi, *dbi;
6227
6228 hard_flush_count++;
6229 #if 0
6230 write_log("Flush Icache_hard(%d/%x/%p), %u KB\n",
6231 n,regs.pc,regs.pc_p,current_cache_size/1024);
6232 current_cache_size = 0;
6233 #endif
6234 bi=active;
6235 while(bi) {
6236 cache_tags[cacheline(bi->pc_p)].handler=(cpuop_func *)popall_execute_normal;
6237 cache_tags[cacheline(bi->pc_p)+1].bi=NULL;
6238 dbi=bi; bi=bi->next;
6239 free_blockinfo(dbi);
6240 }
6241 bi=dormant;
6242 while(bi) {
6243 cache_tags[cacheline(bi->pc_p)].handler=(cpuop_func *)popall_execute_normal;
6244 cache_tags[cacheline(bi->pc_p)+1].bi=NULL;
6245 dbi=bi; bi=bi->next;
6246 free_blockinfo(dbi);
6247 }
6248
6249 reset_lists();
6250 if (!compiled_code)
6251 return;
6252 current_compile_p=compiled_code;
6253 SPCFLAGS_SET( SPCFLAG_JIT_EXEC_RETURN ); /* To get out of compiled code */
6254 }
6255
6256
6257 /* "Soft flushing" --- instead of actually throwing everything away,
6258 we simply mark everything as "needs to be checked".
6259 */
6260
6261 static inline void flush_icache_lazy(int n)
6262 {
6263 uae_u32 i;
6264 blockinfo* bi;
6265 blockinfo* bi2;
6266
6267 soft_flush_count++;
6268 if (!active)
6269 return;
6270
6271 bi=active;
6272 while (bi) {
6273 uae_u32 cl=cacheline(bi->pc_p);
6274 if (bi->status==BI_INVALID ||
6275 bi->status==BI_NEED_RECOMP) {
6276 if (bi==cache_tags[cl+1].bi)
6277 cache_tags[cl].handler=(cpuop_func *)popall_execute_normal;
6278 bi->handler_to_use=(cpuop_func *)popall_execute_normal;
6279 set_dhtu(bi,bi->direct_pen);
6280 bi->status=BI_INVALID;
6281 }
6282 else {
6283 if (bi==cache_tags[cl+1].bi)
6284 cache_tags[cl].handler=(cpuop_func *)popall_check_checksum;
6285 bi->handler_to_use=(cpuop_func *)popall_check_checksum;
6286 set_dhtu(bi,bi->direct_pcc);
6287 bi->status=BI_NEED_CHECK;
6288 }
6289 bi2=bi;
6290 bi=bi->next;
6291 }
6292 /* bi2 is now the last entry in the active list */
6293 bi2->next=dormant;
6294 if (dormant)
6295 dormant->prev_p=&(bi2->next);
6296
6297 dormant=active;
6298 active->prev_p=&dormant;
6299 active=NULL;
6300 }
6301
6302 void flush_icache_range(uae_u32 start, uae_u32 length)
6303 {
6304 if (!active)
6305 return;
6306
6307 #if LAZY_FLUSH_ICACHE_RANGE
6308 uae_u8 *start_p = get_real_address(start);
6309 blockinfo *bi = active;
6310 while (bi) {
6311 #if USE_CHECKSUM_INFO
6312 bool invalidate = false;
6313 for (checksum_info *csi = bi->csi; csi && !invalidate; csi = csi->next)
6314 invalidate = (((start_p - csi->start_p) < csi->length) ||
6315 ((csi->start_p - start_p) < length));
6316 #else
6317 // Assume system is consistent and would invalidate the right range
6318 const bool invalidate = (bi->pc_p - start_p) < length;
6319 #endif
6320 if (invalidate) {
6321 uae_u32 cl = cacheline(bi->pc_p);
6322 if (bi == cache_tags[cl + 1].bi)
6323 cache_tags[cl].handler = (cpuop_func *)popall_execute_normal;
6324 bi->handler_to_use = (cpuop_func *)popall_execute_normal;
6325 set_dhtu(bi, bi->direct_pen);
6326 bi->status = BI_NEED_RECOMP;
6327 }
6328 bi = bi->next;
6329 }
6330 return;
6331 #endif
6332 flush_icache(-1);
6333 }
6334
6335 static void catastrophe(void)
6336 {
6337 abort();
6338 }
6339
6340 int failure;
6341
6342 #define TARGET_M68K 0
6343 #define TARGET_POWERPC 1
6344 #define TARGET_X86 2
6345 #if defined(i386) || defined(__i386__)
6346 #define TARGET_NATIVE TARGET_X86
6347 #endif
6348 #if defined(powerpc) || defined(__powerpc__)
6349 #define TARGET_NATIVE TARGET_POWERPC
6350 #endif
6351
6352 #ifdef ENABLE_MON
6353 static uae_u32 mon_read_byte_jit(uae_u32 addr)
6354 {
6355 uae_u8 *m = (uae_u8 *)addr;
6356 return (uae_u32)(*m);
6357 }
6358
6359 static void mon_write_byte_jit(uae_u32 addr, uae_u32 b)
6360 {
6361 uae_u8 *m = (uae_u8 *)addr;
6362 *m = b;
6363 }
6364 #endif
6365
6366 void disasm_block(int target, uint8 * start, size_t length)
6367 {
6368 if (!JITDebug)
6369 return;
6370
6371 #if defined(JIT_DEBUG) && defined(ENABLE_MON)
6372 char disasm_str[200];
6373 sprintf(disasm_str, "%s $%x $%x",
6374 target == TARGET_M68K ? "d68" :
6375 target == TARGET_X86 ? "d86" :
6376 target == TARGET_POWERPC ? "d" : "x",
6377 start, start + length - 1);
6378
6379 uae_u32 (*old_mon_read_byte)(uae_u32) = mon_read_byte;
6380 void (*old_mon_write_byte)(uae_u32, uae_u32) = mon_write_byte;
6381
6382 mon_read_byte = mon_read_byte_jit;
6383 mon_write_byte = mon_write_byte_jit;
6384
6385 char *arg[5] = {"mon", "-m", "-r", disasm_str, NULL};
6386 mon(4, arg);
6387
6388 mon_read_byte = old_mon_read_byte;
6389 mon_write_byte = old_mon_write_byte;
6390 #endif
6391 }
6392
6393 static inline void disasm_native_block(uint8 *start, size_t length)
6394 {
6395 disasm_block(TARGET_NATIVE, start, length);
6396 }
6397
6398 static inline void disasm_m68k_block(uint8 *start, size_t length)
6399 {
6400 disasm_block(TARGET_M68K, start, length);
6401 }
6402
6403 #ifdef HAVE_GET_WORD_UNSWAPPED
6404 # define DO_GET_OPCODE(a) (do_get_mem_word_unswapped((uae_u16 *)(a)))
6405 #else
6406 # define DO_GET_OPCODE(a) (do_get_mem_word((uae_u16 *)(a)))
6407 #endif
6408
6409 #if JIT_DEBUG
6410 static uae_u8 *last_regs_pc_p = 0;
6411 static uae_u8 *last_compiled_block_addr = 0;
6412
6413 void compiler_dumpstate(void)
6414 {
6415 if (!JITDebug)
6416 return;
6417
6418 write_log("### Host addresses\n");
6419 write_log("MEM_BASE : %x\n", MEMBaseDiff);
6420 write_log("PC_P : %p\n", &regs.pc_p);
6421 write_log("SPCFLAGS : %p\n", &regs.spcflags);
6422 write_log("D0-D7 : %p-%p\n", &regs.regs[0], &regs.regs[7]);
6423 write_log("A0-A7 : %p-%p\n", &regs.regs[8], &regs.regs[15]);
6424 write_log("\n");
6425
6426 write_log("### M68k processor state\n");
6427 m68k_dumpstate(0);
6428 write_log("\n");
6429
6430 write_log("### Block in Mac address space\n");
6431 write_log("M68K block : %p\n",
6432 (void *)get_virtual_address(last_regs_pc_p));
6433 write_log("Native block : %p (%d bytes)\n",
6434 (void *)get_virtual_address(last_compiled_block_addr),
6435 get_blockinfo_addr(last_regs_pc_p)->direct_handler_size);
6436 write_log("\n");
6437 }
6438 #endif
6439
6440 static void compile_block(cpu_history* pc_hist, int blocklen)
6441 {
6442 if (letit && compiled_code) {
6443 #if PROFILE_COMPILE_TIME
6444 compile_count++;
6445 clock_t start_time = clock();
6446 #endif
6447 #if JIT_DEBUG
6448 bool disasm_block = false;
6449 #endif
6450
6451 /* OK, here we need to 'compile' a block */
6452 int i;
6453 int r;
6454 int was_comp=0;
6455 uae_u8 liveflags[MAXRUN+1];
6456 #if USE_CHECKSUM_INFO
6457 bool trace_in_rom = isinrom((uintptr)pc_hist[0].location);
6458 uae_u32 max_pcp=(uae_u32)pc_hist[blocklen - 1].location;
6459 uae_u32 min_pcp=max_pcp;
6460 #else
6461 uae_u32 max_pcp=(uae_u32)pc_hist[0].location;
6462 uae_u32 min_pcp=max_pcp;
6463 #endif
6464 uae_u32 cl=cacheline(pc_hist[0].location);
6465 void* specflags=(void*)&regs.spcflags;
6466 blockinfo* bi=NULL;
6467 blockinfo* bi2;
6468 int extra_len=0;
6469
6470 redo_current_block=0;
6471 if (current_compile_p>=max_compile_start)
6472 flush_icache_hard(7);
6473
6474 alloc_blockinfos();
6475
6476 bi=get_blockinfo_addr_new(pc_hist[0].location,0);
6477 bi2=get_blockinfo(cl);
6478
6479 optlev=bi->optlevel;
6480 if (bi->status!=BI_INVALID) {
6481 Dif (bi!=bi2) {
6482 /* I don't think it can happen anymore. Shouldn't, in
6483 any case. So let's make sure... */
6484 write_log("WOOOWOO count=%d, ol=%d %p %p\n",
6485 bi->count,bi->optlevel,bi->handler_to_use,
6486 cache_tags[cl].handler);
6487 abort();
6488 }
6489
6490 Dif (bi->count!=-1 && bi->status!=BI_NEED_RECOMP) {
6491 write_log("bi->count=%d, bi->status=%d\n",bi->count,bi->status);
6492 /* What the heck? We are not supposed to be here! */
6493 abort();
6494 }
6495 }
6496 if (bi->count==-1) {
6497 optlev++;
6498 while (!optcount[optlev])
6499 optlev++;
6500 bi->count=optcount[optlev]-1;
6501 }
6502 current_block_pc_p=(uae_u32)pc_hist[0].location;
6503
6504 remove_deps(bi); /* We are about to create new code */
6505 bi->optlevel=optlev;
6506 bi->pc_p=(uae_u8*)pc_hist[0].location;
6507 #if USE_CHECKSUM_INFO
6508 free_checksum_info_chain(bi->csi);
6509 bi->csi = NULL;
6510 #endif
6511
6512 liveflags[blocklen]=0x1f; /* All flags needed afterwards */
6513 i=blocklen;
6514 while (i--) {
6515 uae_u16* currpcp=pc_hist[i].location;
6516 uae_u32 op=DO_GET_OPCODE(currpcp);
6517
6518 #if USE_CHECKSUM_INFO
6519 trace_in_rom = trace_in_rom && isinrom((uintptr)currpcp);
6520 #if USE_INLINING
6521 if (is_const_jump(op)) {
6522 checksum_info *csi = alloc_checksum_info();
6523 csi->start_p = (uae_u8 *)min_pcp;
6524 csi->length = max_pcp - min_pcp + LONGEST_68K_INST;
6525 csi->next = bi->csi;
6526 bi->csi = csi;
6527 max_pcp = (uae_u32)currpcp;
6528 }
6529 #endif
6530 min_pcp = (uae_u32)currpcp;
6531 #else
6532 if ((uae_u32)currpcp<min_pcp)
6533 min_pcp=(uae_u32)currpcp;
6534 if ((uae_u32)currpcp>max_pcp)
6535 max_pcp=(uae_u32)currpcp;
6536 #endif
6537
6538 liveflags[i]=((liveflags[i+1]&
6539 (~prop[op].set_flags))|
6540 prop[op].use_flags);
6541 if (prop[op].is_addx && (liveflags[i+1]&FLAG_Z)==0)
6542 liveflags[i]&= ~FLAG_Z;
6543 }
6544
6545 #if USE_CHECKSUM_INFO
6546 checksum_info *csi = alloc_checksum_info();
6547 csi->start_p = (uae_u8 *)min_pcp;
6548 csi->length = max_pcp - min_pcp + LONGEST_68K_INST;
6549 csi->next = bi->csi;
6550 bi->csi = csi;
6551 #endif
6552
6553 bi->needed_flags=liveflags[0];
6554
6555 align_target(align_loops);
6556 was_comp=0;
6557
6558 bi->direct_handler=(cpuop_func *)get_target();
6559 set_dhtu(bi,bi->direct_handler);
6560 bi->status=BI_COMPILING;
6561 current_block_start_target=(uae_u32)get_target();
6562
6563 log_startblock();
6564
6565 if (bi->count>=0) { /* Need to generate countdown code */
6566 raw_mov_l_mi((uae_u32)&regs.pc_p,(uae_u32)pc_hist[0].location);
6567 raw_sub_l_mi((uae_u32)&(bi->count),1);
6568 raw_jl((uae_u32)popall_recompile_block);
6569 }
6570 if (optlev==0) { /* No need to actually translate */
6571 /* Execute normally without keeping stats */
6572 raw_mov_l_mi((uae_u32)&regs.pc_p,(uae_u32)pc_hist[0].location);
6573 raw_jmp((uae_u32)popall_exec_nostats);
6574 }
6575 else {
6576 reg_alloc_run=0;
6577 next_pc_p=0;
6578 taken_pc_p=0;
6579 branch_cc=0;
6580
6581 comp_pc_p=(uae_u8*)pc_hist[0].location;
6582 init_comp();
6583 was_comp=1;
6584
6585 #if JIT_DEBUG
6586 if (JITDebug) {
6587 raw_mov_l_mi((uae_u32)&last_regs_pc_p,(uae_u32)pc_hist[0].location);
6588 raw_mov_l_mi((uae_u32)&last_compiled_block_addr,(uae_u32)current_block_start_target);
6589 }
6590 #endif
6591
6592 for (i=0;i<blocklen &&
6593 get_target_noopt()<max_compile_start;i++) {
6594 cpuop_func **cputbl;
6595 compop_func **comptbl;
6596 uae_u32 opcode=DO_GET_OPCODE(pc_hist[i].location);
6597 needed_flags=(liveflags[i+1] & prop[opcode].set_flags);
6598 if (!needed_flags) {
6599 cputbl=nfcpufunctbl;
6600 comptbl=nfcompfunctbl;
6601 }
6602 else {
6603 cputbl=cpufunctbl;
6604 comptbl=compfunctbl;
6605 }
6606
6607 failure = 1; // gb-- defaults to failure state
6608 if (comptbl[opcode] && optlev>1) {
6609 failure=0;
6610 if (!was_comp) {
6611 comp_pc_p=(uae_u8*)pc_hist[i].location;
6612 init_comp();
6613 }
6614 was_comp=1;
6615
6616 comptbl[opcode](opcode);
6617 freescratch();
6618 if (!(liveflags[i+1] & FLAG_CZNV)) {
6619 /* We can forget about flags */
6620 dont_care_flags();
6621 }
6622 #if INDIVIDUAL_INST
6623 flush(1);
6624 nop();
6625 flush(1);
6626 was_comp=0;
6627 #endif
6628 }
6629
6630 if (failure) {
6631 if (was_comp) {
6632 flush(1);
6633 was_comp=0;
6634 }
6635 raw_mov_l_ri(REG_PAR1,(uae_u32)opcode);
6636 #if USE_NORMAL_CALLING_CONVENTION
6637 raw_push_l_r(REG_PAR1);
6638 #endif
6639 raw_mov_l_mi((uae_u32)&regs.pc_p,
6640 (uae_u32)pc_hist[i].location);
6641 raw_call((uae_u32)cputbl[opcode]);
6642 #if PROFILE_UNTRANSLATED_INSNS
6643 // raw_cputbl_count[] is indexed with plain opcode (in m68k order)
6644 raw_add_l_mi((uae_u32)&raw_cputbl_count[cft_map(opcode)],1);
6645 #endif
6646 #if USE_NORMAL_CALLING_CONVENTION
6647 raw_inc_sp(4);
6648 #endif
6649
6650 if (i < blocklen - 1) {
6651 uae_s8* branchadd;
6652
6653 raw_mov_l_rm(0,(uae_u32)specflags);
6654 raw_test_l_rr(0,0);
6655 raw_jz_b_oponly();
6656 branchadd=(uae_s8 *)get_target();
6657 emit_byte(0);
6658 raw_jmp((uae_u32)popall_do_nothing);
6659 *branchadd=(uae_u32)get_target()-(uae_u32)branchadd-1;
6660 }
6661 }
6662 }
6663 #if 1 /* This isn't completely kosher yet; It really needs to be
6664 be integrated into a general inter-block-dependency scheme */
6665 if (next_pc_p && taken_pc_p &&
6666 was_comp && taken_pc_p==current_block_pc_p) {
6667 blockinfo* bi1=get_blockinfo_addr_new((void*)next_pc_p,0);
6668 blockinfo* bi2=get_blockinfo_addr_new((void*)taken_pc_p,0);
6669 uae_u8 x=bi1->needed_flags;
6670
6671 if (x==0xff || 1) { /* To be on the safe side */
6672 uae_u16* next=(uae_u16*)next_pc_p;
6673 uae_u32 op=DO_GET_OPCODE(next);
6674
6675 x=0x1f;
6676 x&=(~prop[op].set_flags);
6677 x|=prop[op].use_flags;
6678 }
6679
6680 x|=bi2->needed_flags;
6681 if (!(x & FLAG_CZNV)) {
6682 /* We can forget about flags */
6683 dont_care_flags();
6684 extra_len+=2; /* The next instruction now is part of this
6685 block */
6686 }
6687
6688 }
6689 #endif
6690 log_flush();
6691
6692 if (next_pc_p) { /* A branch was registered */
6693 uae_u32 t1=next_pc_p;
6694 uae_u32 t2=taken_pc_p;
6695 int cc=branch_cc;
6696
6697 uae_u32* branchadd;
6698 uae_u32* tba;
6699 bigstate tmp;
6700 blockinfo* tbi;
6701
6702 if (taken_pc_p<next_pc_p) {
6703 /* backward branch. Optimize for the "taken" case ---
6704 which means the raw_jcc should fall through when
6705 the 68k branch is taken. */
6706 t1=taken_pc_p;
6707 t2=next_pc_p;
6708 cc=branch_cc^1;
6709 }
6710
6711 tmp=live; /* ouch! This is big... */
6712 raw_jcc_l_oponly(cc);
6713 branchadd=(uae_u32*)get_target();
6714 emit_long(0);
6715
6716 /* predicted outcome */
6717 tbi=get_blockinfo_addr_new((void*)t1,1);
6718 match_states(tbi);
6719 raw_cmp_l_mi((uae_u32)specflags,0);
6720 raw_jcc_l_oponly(4);
6721 tba=(uae_u32*)get_target();
6722 emit_long(get_handler(t1)-((uae_u32)tba+4));
6723 raw_mov_l_mi((uae_u32)&regs.pc_p,t1);
6724 raw_jmp((uae_u32)popall_do_nothing);
6725 create_jmpdep(bi,0,tba,t1);
6726
6727 align_target(align_jumps);
6728 /* not-predicted outcome */
6729 *branchadd=(uae_u32)get_target()-((uae_u32)branchadd+4);
6730 live=tmp; /* Ouch again */
6731 tbi=get_blockinfo_addr_new((void*)t2,1);
6732 match_states(tbi);
6733
6734 //flush(1); /* Can only get here if was_comp==1 */
6735 raw_cmp_l_mi((uae_u32)specflags,0);
6736 raw_jcc_l_oponly(4);
6737 tba=(uae_u32*)get_target();
6738 emit_long(get_handler(t2)-((uae_u32)tba+4));
6739 raw_mov_l_mi((uae_u32)&regs.pc_p,t2);
6740 raw_jmp((uae_u32)popall_do_nothing);
6741 create_jmpdep(bi,1,tba,t2);
6742 }
6743 else
6744 {
6745 if (was_comp) {
6746 flush(1);
6747 }
6748
6749 /* Let's find out where next_handler is... */
6750 if (was_comp && isinreg(PC_P)) {
6751 r=live.state[PC_P].realreg;
6752 raw_and_l_ri(r,TAGMASK);
6753 int r2 = (r==0) ? 1 : 0;
6754 raw_mov_l_ri(r2,(uae_u32)popall_do_nothing);
6755 raw_cmp_l_mi((uae_u32)specflags,0);
6756 raw_cmov_l_rm_indexed(r2,(uae_u32)cache_tags,r,4,4);
6757 raw_jmp_r(r2);
6758 }
6759 else if (was_comp && isconst(PC_P)) {
6760 uae_u32 v=live.state[PC_P].val;
6761 uae_u32* tba;
6762 blockinfo* tbi;
6763
6764 tbi=get_blockinfo_addr_new((void*)v,1);
6765 match_states(tbi);
6766
6767 raw_cmp_l_mi((uae_u32)specflags,0);
6768 raw_jcc_l_oponly(4);
6769 tba=(uae_u32*)get_target();
6770 emit_long(get_handler(v)-((uae_u32)tba+4));
6771 raw_mov_l_mi((uae_u32)&regs.pc_p,v);
6772 raw_jmp((uae_u32)popall_do_nothing);
6773 create_jmpdep(bi,0,tba,v);
6774 }
6775 else {
6776 r=REG_PC_TMP;
6777 raw_mov_l_rm(r,(uae_u32)&regs.pc_p);
6778 raw_and_l_ri(r,TAGMASK);
6779 int r2 = (r==0) ? 1 : 0;
6780 raw_mov_l_ri(r2,(uae_u32)popall_do_nothing);
6781 raw_cmp_l_mi((uae_u32)specflags,0);
6782 raw_cmov_l_rm_indexed(r2,(uae_u32)cache_tags,r,4,4);
6783 raw_jmp_r(r2);
6784 }
6785 }
6786 }
6787
6788 #if USE_MATCH
6789 if (callers_need_recompile(&live,&(bi->env))) {
6790 mark_callers_recompile(bi);
6791 }
6792
6793 big_to_small_state(&live,&(bi->env));
6794 #endif
6795
6796 #if USE_CHECKSUM_INFO
6797 remove_from_list(bi);
6798 if (trace_in_rom) {
6799 // No need to checksum that block trace on cache invalidation
6800 free_checksum_info_chain(bi->csi);
6801 bi->csi = NULL;
6802 add_to_dormant(bi);
6803 }
6804 else {
6805 calc_checksum(bi,&(bi->c1),&(bi->c2));
6806 add_to_active(bi);
6807 }
6808 #else
6809 if (next_pc_p+extra_len>=max_pcp &&
6810 next_pc_p+extra_len<max_pcp+LONGEST_68K_INST)
6811 max_pcp=next_pc_p+extra_len; /* extra_len covers flags magic */
6812 else
6813 max_pcp+=LONGEST_68K_INST;
6814
6815 bi->len=max_pcp-min_pcp;
6816 bi->min_pcp=min_pcp;
6817
6818 remove_from_list(bi);
6819 if (isinrom(min_pcp) && isinrom(max_pcp)) {
6820 add_to_dormant(bi); /* No need to checksum it on cache flush.
6821 Please don't start changing ROMs in
6822 flight! */
6823 }
6824 else {
6825 calc_checksum(bi,&(bi->c1),&(bi->c2));
6826 add_to_active(bi);
6827 }
6828 #endif
6829
6830 current_cache_size += get_target() - (uae_u8 *)current_compile_p;
6831
6832 #if JIT_DEBUG
6833 if (JITDebug)
6834 bi->direct_handler_size = get_target() - (uae_u8 *)current_block_start_target;
6835
6836 if (JITDebug && disasm_block) {
6837 uaecptr block_addr = start_pc + ((char *)pc_hist[0].location - (char *)start_pc_p);
6838 D(bug("M68K block @ 0x%08x (%d insns)\n", block_addr, blocklen));
6839 uae_u32 block_size = ((uae_u8 *)pc_hist[blocklen - 1].location - (uae_u8 *)pc_hist[0].location) + 1;
6840 disasm_m68k_block((uae_u8 *)pc_hist[0].location, block_size);
6841 D(bug("Compiled block @ 0x%08x\n", pc_hist[0].location));
6842 disasm_native_block((uae_u8 *)current_block_start_target, bi->direct_handler_size);
6843 getchar();
6844 }
6845 #endif
6846
6847 log_dump();
6848 align_target(align_jumps);
6849
6850 /* This is the non-direct handler */
6851 bi->handler=
6852 bi->handler_to_use=(cpuop_func *)get_target();
6853 raw_cmp_l_mi((uae_u32)&regs.pc_p,(uae_u32)pc_hist[0].location);
6854 raw_jnz((uae_u32)popall_cache_miss);
6855 comp_pc_p=(uae_u8*)pc_hist[0].location;
6856
6857 bi->status=BI_FINALIZING;
6858 init_comp();
6859 match_states(bi);
6860 flush(1);
6861
6862 raw_jmp((uae_u32)bi->direct_handler);
6863
6864 current_compile_p=get_target();
6865 raise_in_cl_list(bi);
6866
6867 /* We will flush soon, anyway, so let's do it now */
6868 if (current_compile_p>=max_compile_start)
6869 flush_icache_hard(7);
6870
6871 bi->status=BI_ACTIVE;
6872 if (redo_current_block)
6873 block_need_recompile(bi);
6874
6875 #if PROFILE_COMPILE_TIME
6876 compile_time += (clock() - start_time);
6877 #endif
6878 }
6879 }
6880
6881 void do_nothing(void)
6882 {
6883 /* What did you expect this to do? */
6884 }
6885
6886 void exec_nostats(void)
6887 {
6888 for (;;) {
6889 uae_u32 opcode = GET_OPCODE;
6890 (*cpufunctbl[opcode])(opcode);
6891 if (end_block(opcode) || SPCFLAGS_TEST(SPCFLAG_ALL)) {
6892 return; /* We will deal with the spcflags in the caller */
6893 }
6894 }
6895 }
6896
6897 void execute_normal(void)
6898 {
6899 if (!check_for_cache_miss()) {
6900 cpu_history pc_hist[MAXRUN];
6901 int blocklen = 0;
6902 #if REAL_ADDRESSING || DIRECT_ADDRESSING
6903 start_pc_p = regs.pc_p;
6904 start_pc = get_virtual_address(regs.pc_p);
6905 #else
6906 start_pc_p = regs.pc_oldp;
6907 start_pc = regs.pc;
6908 #endif
6909 for (;;) { /* Take note: This is the do-it-normal loop */
6910 pc_hist[blocklen++].location = (uae_u16 *)regs.pc_p;
6911 uae_u32 opcode = GET_OPCODE;
6912 #if FLIGHT_RECORDER
6913 m68k_record_step(m68k_getpc());
6914 #endif
6915 (*cpufunctbl[opcode])(opcode);
6916 if (end_block(opcode) || SPCFLAGS_TEST(SPCFLAG_ALL) || blocklen>=MAXRUN) {
6917 compile_block(pc_hist, blocklen);
6918 return; /* We will deal with the spcflags in the caller */
6919 }
6920 /* No need to check regs.spcflags, because if they were set,
6921 we'd have ended up inside that "if" */
6922 }
6923 }
6924 }
6925
6926 typedef void (*compiled_handler)(void);
6927
6928 #ifdef X86_ASSEMBLY
6929 void (*m68k_compile_execute)(void) = NULL;
6930 #else
6931 void m68k_do_compile_execute(void)
6932 {
6933 for (;;) {
6934 ((compiled_handler)(pushall_call_handler))();
6935 /* Whenever we return from that, we should check spcflags */
6936 if (SPCFLAGS_TEST(SPCFLAG_ALL)) {
6937 if (m68k_do_specialties ())
6938 return;
6939 }
6940 }
6941 }
6942 #endif