1 |
+ |
/* |
2 |
+ |
* compiler/compemu_support.cpp - Core dynamic translation engine |
3 |
+ |
* |
4 |
+ |
* Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer |
5 |
+ |
* |
6 |
+ |
* Adaptation for Basilisk II and improvements, copyright 2000-2002 |
7 |
+ |
* Gwenole Beauchesne |
8 |
+ |
* |
9 |
+ |
* Basilisk II (C) 1997-2002 Christian Bauer |
10 |
+ |
* |
11 |
+ |
* This program is free software; you can redistribute it and/or modify |
12 |
+ |
* it under the terms of the GNU General Public License as published by |
13 |
+ |
* the Free Software Foundation; either version 2 of the License, or |
14 |
+ |
* (at your option) any later version. |
15 |
+ |
* |
16 |
+ |
* This program is distributed in the hope that it will be useful, |
17 |
+ |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
18 |
+ |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
19 |
+ |
* GNU General Public License for more details. |
20 |
+ |
* |
21 |
+ |
* You should have received a copy of the GNU General Public License |
22 |
+ |
* along with this program; if not, write to the Free Software |
23 |
+ |
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
24 |
+ |
*/ |
25 |
+ |
|
26 |
|
#if !REAL_ADDRESSING && !DIRECT_ADDRESSING |
27 |
|
#error "Only Real or Direct Addressing is supported with the JIT Compiler" |
28 |
|
#endif |
29 |
|
|
30 |
+ |
#if X86_ASSEMBLY && !SAHF_SETO_PROFITABLE |
31 |
+ |
#error "Only [LS]AHF scheme to [gs]et flags is supported with the JIT Compiler" |
32 |
+ |
#endif |
33 |
+ |
|
34 |
|
#define USE_MATCH 0 |
35 |
|
|
36 |
|
/* kludge for Brian, so he can compile under MSVC++ */ |
69 |
|
#endif |
70 |
|
|
71 |
|
#ifndef WIN32 |
72 |
< |
#define PROFILE_COMPILE_TIME 1 |
72 |
> |
#define PROFILE_COMPILE_TIME 1 |
73 |
> |
#define PROFILE_UNTRANSLATED_INSNS 1 |
74 |
|
#endif |
75 |
|
|
76 |
|
#ifdef WIN32 |
95 |
|
static clock_t emul_end_time = 0; |
96 |
|
#endif |
97 |
|
|
98 |
+ |
#if PROFILE_UNTRANSLATED_INSNS |
99 |
+ |
const int untranslated_top_ten = 20; |
100 |
+ |
static uae_u32 raw_cputbl_count[65536] = { 0, }; |
101 |
+ |
static uae_u16 opcode_nums[65536]; |
102 |
+ |
|
103 |
+ |
static int untranslated_compfn(const void *e1, const void *e2) |
104 |
+ |
{ |
105 |
+ |
return raw_cputbl_count[*(const uae_u16 *)e1] < raw_cputbl_count[*(const uae_u16 *)e2]; |
106 |
+ |
} |
107 |
+ |
#endif |
108 |
+ |
|
109 |
|
compop_func *compfunctbl[65536]; |
110 |
|
compop_func *nfcompfunctbl[65536]; |
111 |
|
cpuop_func *nfcpufunctbl[65536]; |
112 |
|
uae_u8* comp_pc_p; |
113 |
|
|
114 |
+ |
// From newcpu.cpp |
115 |
+ |
extern bool quit_program; |
116 |
+ |
|
117 |
|
// gb-- Extra data for Basilisk II/JIT |
118 |
|
#if JIT_DEBUG |
119 |
|
static bool JITDebug = false; // Enable runtime disassemblers through mon? |
128 |
|
static bool avoid_fpu = true; // Flag: compile FPU instructions ? |
129 |
|
static bool have_cmov = false; // target has CMOV instructions ? |
130 |
|
static bool have_rat_stall = true; // target has partial register stalls ? |
131 |
+ |
const bool tune_alignment = true; // Tune code alignments for running CPU ? |
132 |
+ |
const bool tune_nop_fillers = true; // Tune no-op fillers for architecture |
133 |
+ |
static bool setzflg_uses_bsf = false; // setzflg virtual instruction can use native BSF instruction correctly? |
134 |
+ |
static int align_loops = 32; // Align the start of loops |
135 |
+ |
static int align_jumps = 32; // Align the start of jumps |
136 |
|
static int zero_fd = -1; |
137 |
|
static int optcount[10] = { |
138 |
|
10, // How often a block has to be executed before it is translated |
149 |
|
}; |
150 |
|
static op_properties prop[65536]; |
151 |
|
|
103 |
– |
// gb-- Control Flow Predicates |
104 |
– |
|
152 |
|
static inline int end_block(uae_u32 opcode) |
153 |
|
{ |
154 |
|
return (prop[opcode].cflow & fl_end_block); |
155 |
|
} |
156 |
|
|
157 |
< |
static inline bool may_trap(uae_u32 opcode) |
157 |
> |
static inline bool is_const_jump(uae_u32 opcode) |
158 |
|
{ |
159 |
< |
return (prop[opcode].cflow & fl_trap); |
159 |
> |
return (prop[opcode].cflow == fl_const_jump); |
160 |
|
} |
161 |
|
|
162 |
|
uae_u8* start_pc_p; |
534 |
|
compiled. If the list of free blockinfos is empty, we allocate a new |
535 |
|
pool of blockinfos and link the newly created blockinfos altogether |
536 |
|
into the list of free blockinfos. Otherwise, we simply pop a structure |
537 |
< |
of the free list. |
537 |
> |
off the free list. |
538 |
|
|
539 |
|
Blockinfo are lazily deallocated, i.e. chained altogether in the |
540 |
|
list of free blockinfos whenvever a translation cache flush (hard or |
541 |
|
soft) request occurs. |
542 |
|
*/ |
543 |
|
|
544 |
< |
#if USE_SEPARATE_BIA |
545 |
< |
const int BLOCKINFO_POOL_SIZE = 128; |
546 |
< |
struct blockinfo_pool { |
547 |
< |
blockinfo bi[BLOCKINFO_POOL_SIZE]; |
548 |
< |
blockinfo_pool *next; |
544 |
> |
template< class T > |
545 |
> |
class LazyBlockAllocator |
546 |
> |
{ |
547 |
> |
enum { |
548 |
> |
kPoolSize = 1 + 4096 / sizeof(T) |
549 |
> |
}; |
550 |
> |
struct Pool { |
551 |
> |
T chunk[kPoolSize]; |
552 |
> |
Pool * next; |
553 |
> |
}; |
554 |
> |
Pool * mPools; |
555 |
> |
T * mChunks; |
556 |
> |
public: |
557 |
> |
LazyBlockAllocator() : mPools(0), mChunks(0) { } |
558 |
> |
~LazyBlockAllocator(); |
559 |
> |
T * acquire(); |
560 |
> |
void release(T * const); |
561 |
|
}; |
503 |
– |
static blockinfo_pool * blockinfo_pools = 0; |
504 |
– |
static blockinfo * free_blockinfos = 0; |
505 |
– |
#endif |
562 |
|
|
563 |
< |
static __inline__ blockinfo *alloc_blockinfo(void) |
563 |
> |
template< class T > |
564 |
> |
LazyBlockAllocator<T>::~LazyBlockAllocator() |
565 |
|
{ |
566 |
< |
#if USE_SEPARATE_BIA |
567 |
< |
if (!free_blockinfos) { |
568 |
< |
// There is no blockinfo struct left, allocate a new |
569 |
< |
// pool and link the chunks into the free list |
570 |
< |
blockinfo_pool *bi_pool = (blockinfo_pool *)malloc(sizeof(blockinfo_pool)); |
571 |
< |
for (blockinfo *bi = &bi_pool->bi[0]; bi < &bi_pool->bi[BLOCKINFO_POOL_SIZE]; bi++) { |
572 |
< |
bi->next = free_blockinfos; |
573 |
< |
free_blockinfos = bi; |
566 |
> |
Pool * currentPool = mPools; |
567 |
> |
while (currentPool) { |
568 |
> |
Pool * deadPool = currentPool; |
569 |
> |
currentPool = currentPool->next; |
570 |
> |
free(deadPool); |
571 |
> |
} |
572 |
> |
} |
573 |
> |
|
574 |
> |
template< class T > |
575 |
> |
T * LazyBlockAllocator<T>::acquire() |
576 |
> |
{ |
577 |
> |
if (!mChunks) { |
578 |
> |
// There is no chunk left, allocate a new pool and link the |
579 |
> |
// chunks into the free list |
580 |
> |
Pool * newPool = (Pool *)malloc(sizeof(Pool)); |
581 |
> |
for (T * chunk = &newPool->chunk[0]; chunk < &newPool->chunk[kPoolSize]; chunk++) { |
582 |
> |
chunk->next = mChunks; |
583 |
> |
mChunks = chunk; |
584 |
|
} |
585 |
< |
bi_pool->next = blockinfo_pools; |
586 |
< |
blockinfo_pools = bi_pool; |
585 |
> |
newPool->next = mPools; |
586 |
> |
mPools = newPool; |
587 |
|
} |
588 |
< |
blockinfo *bi = free_blockinfos; |
589 |
< |
free_blockinfos = bi->next; |
590 |
< |
#else |
524 |
< |
blockinfo *bi = (blockinfo*)current_compile_p; |
525 |
< |
current_compile_p += sizeof(blockinfo); |
526 |
< |
#endif |
527 |
< |
return bi; |
588 |
> |
T * chunk = mChunks; |
589 |
> |
mChunks = chunk->next; |
590 |
> |
return chunk; |
591 |
|
} |
592 |
|
|
593 |
< |
static __inline__ void free_blockinfo(blockinfo *bi) |
593 |
> |
template< class T > |
594 |
> |
void LazyBlockAllocator<T>::release(T * const chunk) |
595 |
|
{ |
596 |
+ |
chunk->next = mChunks; |
597 |
+ |
mChunks = chunk; |
598 |
+ |
} |
599 |
+ |
|
600 |
+ |
template< class T > |
601 |
+ |
class HardBlockAllocator |
602 |
+ |
{ |
603 |
+ |
public: |
604 |
+ |
T * acquire() { |
605 |
+ |
T * data = (T *)current_compile_p; |
606 |
+ |
current_compile_p += sizeof(T); |
607 |
+ |
return data; |
608 |
+ |
} |
609 |
+ |
|
610 |
+ |
void release(T * const chunk) { |
611 |
+ |
// Deallocated on invalidation |
612 |
+ |
} |
613 |
+ |
}; |
614 |
+ |
|
615 |
|
#if USE_SEPARATE_BIA |
616 |
< |
bi->next = free_blockinfos; |
617 |
< |
free_blockinfos = bi; |
616 |
> |
static LazyBlockAllocator<blockinfo> BlockInfoAllocator; |
617 |
> |
static LazyBlockAllocator<checksum_info> ChecksumInfoAllocator; |
618 |
> |
#else |
619 |
> |
static HardBlockAllocator<blockinfo> BlockInfoAllocator; |
620 |
> |
static HardBlockAllocator<checksum_info> ChecksumInfoAllocator; |
621 |
|
#endif |
622 |
+ |
|
623 |
+ |
static __inline__ checksum_info *alloc_checksum_info(void) |
624 |
+ |
{ |
625 |
+ |
checksum_info *csi = ChecksumInfoAllocator.acquire(); |
626 |
+ |
csi->next = NULL; |
627 |
+ |
return csi; |
628 |
|
} |
629 |
|
|
630 |
< |
static void free_blockinfo_pools(void) |
630 |
> |
static __inline__ void free_checksum_info(checksum_info *csi) |
631 |
|
{ |
632 |
< |
#if USE_SEPARATE_BIA |
633 |
< |
int blockinfo_pool_count = 0; |
634 |
< |
blockinfo_pool *curr_pool = blockinfo_pools; |
635 |
< |
while (curr_pool) { |
636 |
< |
blockinfo_pool_count++; |
637 |
< |
blockinfo_pool *dead_pool = curr_pool; |
638 |
< |
curr_pool = curr_pool->next; |
639 |
< |
free(dead_pool); |
632 |
> |
csi->next = NULL; |
633 |
> |
ChecksumInfoAllocator.release(csi); |
634 |
> |
} |
635 |
> |
|
636 |
> |
static __inline__ void free_checksum_info_chain(checksum_info *csi) |
637 |
> |
{ |
638 |
> |
while (csi != NULL) { |
639 |
> |
checksum_info *csi2 = csi->next; |
640 |
> |
free_checksum_info(csi); |
641 |
> |
csi = csi2; |
642 |
|
} |
643 |
< |
|
644 |
< |
uae_u32 blockinfo_pools_size = blockinfo_pool_count * BLOCKINFO_POOL_SIZE * sizeof(blockinfo); |
645 |
< |
write_log("### Blockinfo allocation statistics\n"); |
646 |
< |
write_log("Number of blockinfo pools : %d\n", blockinfo_pool_count); |
647 |
< |
write_log("Total number of blockinfos : %d (%d KB)\n", |
648 |
< |
blockinfo_pool_count * BLOCKINFO_POOL_SIZE, |
649 |
< |
blockinfo_pools_size / 1024); |
556 |
< |
write_log("\n"); |
643 |
> |
} |
644 |
> |
|
645 |
> |
static __inline__ blockinfo *alloc_blockinfo(void) |
646 |
> |
{ |
647 |
> |
blockinfo *bi = BlockInfoAllocator.acquire(); |
648 |
> |
#if USE_CHECKSUM_INFO |
649 |
> |
bi->csi = NULL; |
650 |
|
#endif |
651 |
+ |
return bi; |
652 |
+ |
} |
653 |
+ |
|
654 |
+ |
static __inline__ void free_blockinfo(blockinfo *bi) |
655 |
+ |
{ |
656 |
+ |
#if USE_CHECKSUM_INFO |
657 |
+ |
free_checksum_info_chain(bi->csi); |
658 |
+ |
bi->csi = NULL; |
659 |
+ |
#endif |
660 |
+ |
BlockInfoAllocator.release(bi); |
661 |
|
} |
662 |
|
|
663 |
|
static __inline__ void alloc_blockinfos(void) |
700 |
|
target+=4; |
701 |
|
} |
702 |
|
|
703 |
+ |
static __inline__ void emit_block(const uae_u8 *block, uae_u32 blocklen) |
704 |
+ |
{ |
705 |
+ |
memcpy((uae_u8 *)target,block,blocklen); |
706 |
+ |
target+=blocklen; |
707 |
+ |
} |
708 |
+ |
|
709 |
|
static __inline__ uae_u32 reverse32(uae_u32 v) |
710 |
|
{ |
711 |
|
#if 1 |
2671 |
|
} |
2672 |
|
MENDFUNC(3,cmov_l_rm,(RW4 d, IMM s, IMM cc)) |
2673 |
|
|
2674 |
< |
MIDFUNC(2,bsf_l_rr,(W4 d, R4 s)) |
2674 |
> |
MIDFUNC(1,setzflg_l,(RW4 r)) |
2675 |
|
{ |
2676 |
< |
CLOBBER_BSF; |
2677 |
< |
s=readreg(s,4); |
2678 |
< |
d=writereg(d,4); |
2679 |
< |
raw_bsf_l_rr(d,s); |
2680 |
< |
unlock2(s); |
2681 |
< |
unlock2(d); |
2676 |
> |
if (setzflg_uses_bsf) { |
2677 |
> |
CLOBBER_BSF; |
2678 |
> |
r=rmw(r,4,4); |
2679 |
> |
raw_bsf_l_rr(r,r); |
2680 |
> |
unlock2(r); |
2681 |
> |
} |
2682 |
> |
else { |
2683 |
> |
/* Errr, not implemented yet in a generic way. And actually, |
2684 |
> |
that should not be generated for now, if BSF doesn't |
2685 |
> |
preserve flags but ZF. */ |
2686 |
> |
write_log("attempt to make unsupported setzflg()\n"); |
2687 |
> |
abort(); |
2688 |
> |
} |
2689 |
|
} |
2690 |
< |
MENDFUNC(2,bsf_l_rr,(W4 d, R4 s)) |
2690 |
> |
MENDFUNC(1,setzflg_l,(RW4 r)) |
2691 |
|
|
2692 |
|
MIDFUNC(2,imul_32_32,(RW4 d, R4 s)) |
2693 |
|
{ |
4672 |
|
|
4673 |
|
// Initialize target CPU (check for features, e.g. CMOV, rat stalls) |
4674 |
|
raw_init_cpu(); |
4675 |
+ |
setzflg_uses_bsf = target_check_bsf(); |
4676 |
|
write_log("<JIT compiler> : target processor has CMOV instructions : %s\n", have_cmov ? "yes" : "no"); |
4677 |
|
write_log("<JIT compiler> : target processor can suffer from partial register stalls : %s\n", have_rat_stall ? "yes" : "no"); |
4678 |
+ |
write_log("<JIT compiler> : alignment for loops, jumps are %d, %d\n", align_loops, align_jumps); |
4679 |
|
|
4680 |
|
// Translation cache flush mechanism |
4681 |
|
lazy_flush = PrefsFindBool("jitlazyflush"); |
4686 |
|
write_log("<JIT compiler> : register aliasing : %s\n", str_on_off(1)); |
4687 |
|
write_log("<JIT compiler> : FP register aliasing : %s\n", str_on_off(USE_F_ALIAS)); |
4688 |
|
write_log("<JIT compiler> : lazy constant offsetting : %s\n", str_on_off(USE_OFFSET)); |
4689 |
+ |
write_log("<JIT compiler> : block inlining : %s\n", str_on_off(USE_INLINING)); |
4690 |
|
write_log("<JIT compiler> : separate blockinfo allocation : %s\n", str_on_off(USE_SEPARATE_BIA)); |
4691 |
|
|
4692 |
|
// Build compiler tables |
4694 |
|
|
4695 |
|
initialized = true; |
4696 |
|
|
4697 |
+ |
#if PROFILE_UNTRANSLATED_INSNS |
4698 |
+ |
write_log("<JIT compiler> : gather statistics on untranslated insns count\n"); |
4699 |
+ |
#endif |
4700 |
+ |
|
4701 |
|
#if PROFILE_COMPILE_TIME |
4702 |
|
write_log("<JIT compiler> : gather statistics on translation time\n"); |
4703 |
|
emul_start_time = clock(); |
4716 |
|
compiled_code = 0; |
4717 |
|
} |
4718 |
|
|
4596 |
– |
// Deallocate blockinfo pools |
4597 |
– |
free_blockinfo_pools(); |
4598 |
– |
|
4719 |
|
#ifndef WIN32 |
4720 |
|
// Close /dev/zero |
4721 |
|
if (zero_fd > 0) |
4731 |
|
100.0*double(compile_time)/double(emul_time)); |
4732 |
|
write_log("\n"); |
4733 |
|
#endif |
4734 |
+ |
|
4735 |
+ |
#if PROFILE_UNTRANSLATED_INSNS |
4736 |
+ |
uae_u64 untranslated_count = 0; |
4737 |
+ |
for (int i = 0; i < 65536; i++) { |
4738 |
+ |
opcode_nums[i] = i; |
4739 |
+ |
untranslated_count += raw_cputbl_count[i]; |
4740 |
+ |
} |
4741 |
+ |
write_log("Sorting out untranslated instructions count...\n"); |
4742 |
+ |
qsort(opcode_nums, 65536, sizeof(uae_u16), untranslated_compfn); |
4743 |
+ |
write_log("\nRank Opc Count Name\n"); |
4744 |
+ |
for (int i = 0; i < untranslated_top_ten; i++) { |
4745 |
+ |
uae_u32 count = raw_cputbl_count[opcode_nums[i]]; |
4746 |
+ |
struct instr *dp; |
4747 |
+ |
struct mnemolookup *lookup; |
4748 |
+ |
if (!count) |
4749 |
+ |
break; |
4750 |
+ |
dp = table68k + opcode_nums[i]; |
4751 |
+ |
for (lookup = lookuptab; lookup->mnemo != dp->mnemo; lookup++) |
4752 |
+ |
; |
4753 |
+ |
write_log("%03d: %04x %10lu %s\n", i, opcode_nums[i], count, lookup->name); |
4754 |
+ |
} |
4755 |
+ |
#endif |
4756 |
|
} |
4757 |
|
|
4758 |
|
bool compiler_use_jit(void) |
4981 |
|
|
4982 |
|
static void align_target(uae_u32 a) |
4983 |
|
{ |
4984 |
< |
/* Fill with NOPs --- makes debugging with gdb easier */ |
4985 |
< |
while ((uae_u32)target&(a-1)) |
4986 |
< |
*target++=0x90; |
4984 |
> |
if (!a) |
4985 |
> |
return; |
4986 |
> |
|
4987 |
> |
if (tune_nop_fillers) |
4988 |
> |
raw_emit_nop_filler(a - (((uae_u32)target) & (a - 1))); |
4989 |
> |
else { |
4990 |
> |
/* Fill with NOPs --- makes debugging with gdb easier */ |
4991 |
> |
while ((uae_u32)target&(a-1)) |
4992 |
> |
*target++=0x90; |
4993 |
> |
} |
4994 |
|
} |
4995 |
|
|
4996 |
|
static __inline__ int isinrom(uintptr addr) |
5315 |
|
|
5316 |
|
|
5317 |
|
|
5318 |
< |
extern cpuop_rettype op_illg_1 (uae_u32 opcode) REGPARAM; |
5318 |
> |
extern void op_illg_1 (uae_u32 opcode) REGPARAM; |
5319 |
|
|
5320 |
|
static void calc_checksum(blockinfo* bi, uae_u32* c1, uae_u32* c2) |
5321 |
|
{ |
5322 |
< |
uae_u32 k1=0; |
5323 |
< |
uae_u32 k2=0; |
5175 |
< |
uae_s32 len=bi->len; |
5176 |
< |
uae_u32 tmp=bi->min_pcp; |
5177 |
< |
uae_u32* pos; |
5322 |
> |
uae_u32 k1 = 0; |
5323 |
> |
uae_u32 k2 = 0; |
5324 |
|
|
5325 |
< |
len+=(tmp&3); |
5326 |
< |
tmp&=(~3); |
5327 |
< |
pos=(uae_u32*)tmp; |
5325 |
> |
#if USE_CHECKSUM_INFO |
5326 |
> |
checksum_info *csi = bi->csi; |
5327 |
> |
Dif(!csi) abort(); |
5328 |
> |
while (csi) { |
5329 |
> |
uae_s32 len = csi->length; |
5330 |
> |
uae_u32 tmp = (uae_u32)csi->start_p; |
5331 |
> |
#else |
5332 |
> |
uae_s32 len = bi->len; |
5333 |
> |
uae_u32 tmp = (uae_u32)bi->min_pcp; |
5334 |
> |
#endif |
5335 |
> |
uae_u32*pos; |
5336 |
|
|
5337 |
< |
if (len<0 || len>MAX_CHECKSUM_LEN) { |
5338 |
< |
*c1=0; |
5339 |
< |
*c2=0; |
5340 |
< |
} |
5341 |
< |
else { |
5342 |
< |
while (len>0) { |
5343 |
< |
k1+=*pos; |
5344 |
< |
k2^=*pos; |
5345 |
< |
pos++; |
5346 |
< |
len-=4; |
5337 |
> |
len += (tmp & 3); |
5338 |
> |
tmp &= ~3; |
5339 |
> |
pos = (uae_u32 *)tmp; |
5340 |
> |
|
5341 |
> |
if (len >= 0 && len <= MAX_CHECKSUM_LEN) { |
5342 |
> |
while (len > 0) { |
5343 |
> |
k1 += *pos; |
5344 |
> |
k2 ^= *pos; |
5345 |
> |
pos++; |
5346 |
> |
len -= 4; |
5347 |
> |
} |
5348 |
> |
} |
5349 |
> |
|
5350 |
> |
#if USE_CHECKSUM_INFO |
5351 |
> |
csi = csi->next; |
5352 |
|
} |
5353 |
< |
*c1=k1; |
5354 |
< |
*c2=k2; |
5355 |
< |
} |
5353 |
> |
#endif |
5354 |
> |
|
5355 |
> |
*c1 = k1; |
5356 |
> |
*c2 = k2; |
5357 |
|
} |
5358 |
|
|
5359 |
< |
static void show_checksum(blockinfo* bi) |
5359 |
> |
#if 0 |
5360 |
> |
static void show_checksum(CSI_TYPE* csi) |
5361 |
|
{ |
5362 |
|
uae_u32 k1=0; |
5363 |
|
uae_u32 k2=0; |
5364 |
< |
uae_s32 len=bi->len; |
5365 |
< |
uae_u32 tmp=(uae_u32)bi->pc_p; |
5364 |
> |
uae_s32 len=CSI_LENGTH(csi); |
5365 |
> |
uae_u32 tmp=(uae_u32)CSI_START_P(csi); |
5366 |
|
uae_u32* pos; |
5367 |
|
|
5368 |
|
len+=(tmp&3); |
5381 |
|
write_log(" bla\n"); |
5382 |
|
} |
5383 |
|
} |
5384 |
+ |
#endif |
5385 |
|
|
5386 |
|
|
5387 |
|
int check_for_cache_miss(void) |
5435 |
|
static inline int block_check_checksum(blockinfo* bi) |
5436 |
|
{ |
5437 |
|
uae_u32 c1,c2; |
5438 |
< |
int isgood; |
5438 |
> |
bool isgood; |
5439 |
|
|
5440 |
|
if (bi->status!=BI_NEED_CHECK) |
5441 |
|
return 1; /* This block is in a checked state */ |
5442 |
|
|
5443 |
|
checksum_count++; |
5444 |
+ |
|
5445 |
|
if (bi->c1 || bi->c2) |
5446 |
|
calc_checksum(bi,&c1,&c2); |
5447 |
|
else { |
5448 |
|
c1=c2=1; /* Make sure it doesn't match */ |
5449 |
< |
} |
5449 |
> |
} |
5450 |
|
|
5451 |
|
isgood=(c1==bi->c1 && c2==bi->c2); |
5452 |
+ |
|
5453 |
|
if (isgood) { |
5454 |
|
/* This block is still OK. So we reactivate. Of course, that |
5455 |
|
means we have to move it into the needs-to-be-flushed list */ |
5567 |
|
registers before jumping back to the various get-out routines. |
5568 |
|
This generates the code for it. |
5569 |
|
*/ |
5570 |
< |
popall_do_nothing=current_compile_p; |
5570 |
> |
align_target(align_jumps); |
5571 |
> |
popall_do_nothing=get_target(); |
5572 |
|
for (i=0;i<N_REGS;i++) { |
5573 |
|
if (need_to_preserve[i]) |
5574 |
|
raw_pop_l_r(i); |
5575 |
|
} |
5576 |
|
raw_jmp((uae_u32)do_nothing); |
5412 |
– |
align_target(32); |
5577 |
|
|
5578 |
+ |
align_target(align_jumps); |
5579 |
|
popall_execute_normal=get_target(); |
5580 |
|
for (i=0;i<N_REGS;i++) { |
5581 |
|
if (need_to_preserve[i]) |
5582 |
|
raw_pop_l_r(i); |
5583 |
|
} |
5584 |
|
raw_jmp((uae_u32)execute_normal); |
5420 |
– |
align_target(32); |
5585 |
|
|
5586 |
+ |
align_target(align_jumps); |
5587 |
|
popall_cache_miss=get_target(); |
5588 |
|
for (i=0;i<N_REGS;i++) { |
5589 |
|
if (need_to_preserve[i]) |
5590 |
|
raw_pop_l_r(i); |
5591 |
|
} |
5592 |
|
raw_jmp((uae_u32)cache_miss); |
5428 |
– |
align_target(32); |
5593 |
|
|
5594 |
+ |
align_target(align_jumps); |
5595 |
|
popall_recompile_block=get_target(); |
5596 |
|
for (i=0;i<N_REGS;i++) { |
5597 |
|
if (need_to_preserve[i]) |
5598 |
|
raw_pop_l_r(i); |
5599 |
|
} |
5600 |
|
raw_jmp((uae_u32)recompile_block); |
5601 |
< |
align_target(32); |
5602 |
< |
|
5601 |
> |
|
5602 |
> |
align_target(align_jumps); |
5603 |
|
popall_exec_nostats=get_target(); |
5604 |
|
for (i=0;i<N_REGS;i++) { |
5605 |
|
if (need_to_preserve[i]) |
5606 |
|
raw_pop_l_r(i); |
5607 |
|
} |
5608 |
|
raw_jmp((uae_u32)exec_nostats); |
5609 |
< |
align_target(32); |
5610 |
< |
|
5609 |
> |
|
5610 |
> |
align_target(align_jumps); |
5611 |
|
popall_check_checksum=get_target(); |
5612 |
|
for (i=0;i<N_REGS;i++) { |
5613 |
|
if (need_to_preserve[i]) |
5614 |
|
raw_pop_l_r(i); |
5615 |
|
} |
5616 |
|
raw_jmp((uae_u32)check_checksum); |
5617 |
< |
align_target(32); |
5618 |
< |
|
5617 |
> |
|
5618 |
> |
align_target(align_jumps); |
5619 |
|
current_compile_p=get_target(); |
5620 |
|
#else |
5621 |
|
popall_exec_nostats=(void *)exec_nostats; |
5624 |
|
popall_recompile_block=(void *)recompile_block; |
5625 |
|
popall_do_nothing=(void *)do_nothing; |
5626 |
|
popall_check_checksum=(void *)check_checksum; |
5462 |
– |
pushall_call_handler=get_target(); |
5627 |
|
#endif |
5628 |
|
|
5629 |
|
/* And now, the code to do the matching pushes and then jump |
5639 |
|
raw_mov_l_rm(r,(uae_u32)®s.pc_p); |
5640 |
|
raw_and_l_ri(r,TAGMASK); |
5641 |
|
raw_jmp_m_indexed((uae_u32)cache_tags,r,4); |
5642 |
+ |
|
5643 |
+ |
#ifdef X86_ASSEMBLY |
5644 |
+ |
align_target(align_jumps); |
5645 |
+ |
m68k_compile_execute = (void (*)(void))get_target(); |
5646 |
+ |
for (i=N_REGS;i--;) { |
5647 |
+ |
if (need_to_preserve[i]) |
5648 |
+ |
raw_push_l_r(i); |
5649 |
+ |
} |
5650 |
+ |
align_target(align_loops); |
5651 |
+ |
uae_u32 dispatch_loop = (uae_u32)get_target(); |
5652 |
+ |
r=REG_PC_TMP; |
5653 |
+ |
raw_mov_l_rm(r,(uae_u32)®s.pc_p); |
5654 |
+ |
raw_and_l_ri(r,TAGMASK); |
5655 |
+ |
raw_call_m_indexed((uae_u32)cache_tags,r,4); |
5656 |
+ |
raw_cmp_l_mi((uae_u32)®s.spcflags,0); |
5657 |
+ |
raw_jcc_b_oponly(NATIVE_CC_EQ); |
5658 |
+ |
emit_byte(dispatch_loop-((uae_u32)get_target()+1)); |
5659 |
+ |
raw_call((uae_u32)m68k_do_specialties); |
5660 |
+ |
raw_test_l_rr(REG_RESULT,REG_RESULT); |
5661 |
+ |
raw_jcc_b_oponly(NATIVE_CC_EQ); |
5662 |
+ |
emit_byte(dispatch_loop-((uae_u32)get_target()+1)); |
5663 |
+ |
raw_cmp_b_mi((uae_u32)&quit_program,0); |
5664 |
+ |
raw_jcc_b_oponly(NATIVE_CC_EQ); |
5665 |
+ |
emit_byte(dispatch_loop-((uae_u32)get_target()+1)); |
5666 |
+ |
for (i=0;i<N_REGS;i++) { |
5667 |
+ |
if (need_to_preserve[i]) |
5668 |
+ |
raw_pop_l_r(i); |
5669 |
+ |
} |
5670 |
+ |
raw_ret(); |
5671 |
+ |
#endif |
5672 |
|
} |
5673 |
|
|
5674 |
|
static __inline__ void reset_lists(void) |
5686 |
|
int i; |
5687 |
|
|
5688 |
|
set_target(current_compile_p); |
5689 |
< |
align_target(32); |
5689 |
> |
align_target(align_jumps); |
5690 |
|
bi->direct_pen=(cpuop_func *)get_target(); |
5691 |
|
raw_mov_l_rm(0,(uae_u32)&(bi->pc_p)); |
5692 |
|
raw_mov_l_mr((uae_u32)®s.pc_p,0); |
5693 |
|
raw_jmp((uae_u32)popall_execute_normal); |
5694 |
|
|
5695 |
< |
align_target(32); |
5695 |
> |
align_target(align_jumps); |
5696 |
|
bi->direct_pcc=(cpuop_func *)get_target(); |
5697 |
|
raw_mov_l_rm(0,(uae_u32)&(bi->pc_p)); |
5698 |
|
raw_mov_l_mr((uae_u32)®s.pc_p,0); |
5699 |
|
raw_jmp((uae_u32)popall_check_checksum); |
5506 |
– |
|
5507 |
– |
align_target(32); |
5700 |
|
current_compile_p=get_target(); |
5701 |
|
|
5702 |
|
bi->deplist=NULL; |
5749 |
|
|
5750 |
|
for (i = 0; tbl[i].opcode < 65536; i++) { |
5751 |
|
int cflow = table68k[tbl[i].opcode].cflow; |
5752 |
+ |
if (USE_INLINING && ((cflow & fl_const_jump) != 0)) |
5753 |
+ |
cflow = fl_const_jump; |
5754 |
+ |
else |
5755 |
+ |
cflow &= ~fl_const_jump; |
5756 |
|
prop[cft_map(tbl[i].opcode)].cflow = cflow; |
5757 |
|
|
5758 |
|
int uses_fpu = tbl[i].specific & 32; |
5759 |
< |
if (uses_fpu && avoid_fpu) |
5759 |
> |
int uses_setzflg = tbl[i].specific & 64; |
5760 |
> |
if ((uses_fpu && avoid_fpu) || (uses_setzflg && !setzflg_uses_bsf)) |
5761 |
|
compfunctbl[cft_map(tbl[i].opcode)] = NULL; |
5762 |
|
else |
5763 |
|
compfunctbl[cft_map(tbl[i].opcode)] = tbl[i].handler; |
5765 |
|
|
5766 |
|
for (i = 0; nftbl[i].opcode < 65536; i++) { |
5767 |
|
int uses_fpu = tbl[i].specific & 32; |
5768 |
< |
if (uses_fpu && avoid_fpu) |
5768 |
> |
int uses_setzflg = tbl[i].specific & 64; |
5769 |
> |
if ((uses_fpu && avoid_fpu) || (uses_setzflg && !setzflg_uses_bsf)) |
5770 |
|
nfcompfunctbl[cft_map(nftbl[i].opcode)] = NULL; |
5771 |
|
else |
5772 |
|
nfcompfunctbl[cft_map(nftbl[i].opcode)] = nftbl[i].handler; |
6048 |
|
int r; |
6049 |
|
int was_comp=0; |
6050 |
|
uae_u8 liveflags[MAXRUN+1]; |
6051 |
+ |
#if USE_CHECKSUM_INFO |
6052 |
+ |
bool trace_in_rom = isinrom((uintptr)pc_hist[0].location); |
6053 |
+ |
uae_u32 max_pcp=(uae_u32)pc_hist[blocklen - 1].location; |
6054 |
+ |
uae_u32 min_pcp=max_pcp; |
6055 |
+ |
#else |
6056 |
|
uae_u32 max_pcp=(uae_u32)pc_hist[0].location; |
6057 |
|
uae_u32 min_pcp=max_pcp; |
6058 |
+ |
#endif |
6059 |
|
uae_u32 cl=cacheline(pc_hist[0].location); |
6060 |
|
void* specflags=(void*)®s.spcflags; |
6061 |
|
blockinfo* bi=NULL; |
6099 |
|
remove_deps(bi); /* We are about to create new code */ |
6100 |
|
bi->optlevel=optlev; |
6101 |
|
bi->pc_p=(uae_u8*)pc_hist[0].location; |
6102 |
+ |
#if USE_CHECKSUM_INFO |
6103 |
+ |
free_checksum_info_chain(bi->csi); |
6104 |
+ |
bi->csi = NULL; |
6105 |
+ |
#endif |
6106 |
|
|
6107 |
|
liveflags[blocklen]=0x1f; /* All flags needed afterwards */ |
6108 |
|
i=blocklen; |
6110 |
|
uae_u16* currpcp=pc_hist[i].location; |
6111 |
|
uae_u32 op=DO_GET_OPCODE(currpcp); |
6112 |
|
|
6113 |
+ |
#if USE_CHECKSUM_INFO |
6114 |
+ |
trace_in_rom = trace_in_rom && isinrom((uintptr)currpcp); |
6115 |
+ |
#if USE_INLINING |
6116 |
+ |
if (is_const_jump(op)) { |
6117 |
+ |
checksum_info *csi = alloc_checksum_info(); |
6118 |
+ |
csi->start_p = (uae_u8 *)min_pcp; |
6119 |
+ |
csi->length = max_pcp - min_pcp + LONGEST_68K_INST; |
6120 |
+ |
csi->next = bi->csi; |
6121 |
+ |
bi->csi = csi; |
6122 |
+ |
max_pcp = (uae_u32)currpcp; |
6123 |
+ |
} |
6124 |
+ |
#endif |
6125 |
+ |
min_pcp = (uae_u32)currpcp; |
6126 |
+ |
#else |
6127 |
|
if ((uae_u32)currpcp<min_pcp) |
6128 |
|
min_pcp=(uae_u32)currpcp; |
6129 |
|
if ((uae_u32)currpcp>max_pcp) |
6130 |
|
max_pcp=(uae_u32)currpcp; |
6131 |
+ |
#endif |
6132 |
|
|
6133 |
|
liveflags[i]=((liveflags[i+1]& |
6134 |
|
(~prop[op].set_flags))| |
6137 |
|
liveflags[i]&= ~FLAG_Z; |
6138 |
|
} |
6139 |
|
|
6140 |
+ |
#if USE_CHECKSUM_INFO |
6141 |
+ |
checksum_info *csi = alloc_checksum_info(); |
6142 |
+ |
csi->start_p = (uae_u8 *)min_pcp; |
6143 |
+ |
csi->length = max_pcp - min_pcp + LONGEST_68K_INST; |
6144 |
+ |
csi->next = bi->csi; |
6145 |
+ |
bi->csi = csi; |
6146 |
+ |
#endif |
6147 |
+ |
|
6148 |
|
bi->needed_flags=liveflags[0]; |
6149 |
|
|
6150 |
< |
align_target(32); |
6150 |
> |
align_target(align_loops); |
6151 |
|
was_comp=0; |
6152 |
|
|
6153 |
|
bi->direct_handler=(cpuop_func *)get_target(); |
6234 |
|
raw_mov_l_mi((uae_u32)®s.pc_p, |
6235 |
|
(uae_u32)pc_hist[i].location); |
6236 |
|
raw_call((uae_u32)cputbl[opcode]); |
6237 |
+ |
#if PROFILE_UNTRANSLATED_INSNS |
6238 |
+ |
// raw_cputbl_count[] is indexed with plain opcode (in m68k order) |
6239 |
+ |
raw_add_l_mi((uae_u32)&raw_cputbl_count[cft_map(opcode)],1); |
6240 |
+ |
#endif |
6241 |
|
//raw_add_l_mi((uae_u32)&oink,1); // FIXME |
6242 |
|
#if USE_NORMAL_CALLING_CONVENTION |
6243 |
|
raw_inc_sp(4); |
6326 |
|
raw_jmp((uae_u32)popall_do_nothing); |
6327 |
|
create_jmpdep(bi,0,tba,t1); |
6328 |
|
|
6329 |
< |
align_target(16); |
6329 |
> |
align_target(align_jumps); |
6330 |
|
/* not-predicted outcome */ |
6331 |
|
*branchadd=(uae_u32)get_target()-((uae_u32)branchadd+4); |
6332 |
|
live=tmp; /* Ouch again */ |
6395 |
|
big_to_small_state(&live,&(bi->env)); |
6396 |
|
#endif |
6397 |
|
|
6398 |
+ |
#if USE_CHECKSUM_INFO |
6399 |
+ |
remove_from_list(bi); |
6400 |
+ |
if (trace_in_rom) { |
6401 |
+ |
// No need to checksum that block trace on cache invalidation |
6402 |
+ |
free_checksum_info_chain(bi->csi); |
6403 |
+ |
bi->csi = NULL; |
6404 |
+ |
add_to_dormant(bi); |
6405 |
+ |
} |
6406 |
+ |
else { |
6407 |
+ |
calc_checksum(bi,&(bi->c1),&(bi->c2)); |
6408 |
+ |
add_to_active(bi); |
6409 |
+ |
} |
6410 |
+ |
#else |
6411 |
|
if (next_pc_p+extra_len>=max_pcp && |
6412 |
|
next_pc_p+extra_len<max_pcp+LONGEST_68K_INST) |
6413 |
|
max_pcp=next_pc_p+extra_len; /* extra_len covers flags magic */ |
6414 |
|
else |
6415 |
|
max_pcp+=LONGEST_68K_INST; |
6416 |
+ |
|
6417 |
|
bi->len=max_pcp-min_pcp; |
6418 |
|
bi->min_pcp=min_pcp; |
6419 |
< |
|
6419 |
> |
|
6420 |
|
remove_from_list(bi); |
6421 |
|
if (isinrom(min_pcp) && isinrom(max_pcp)) { |
6422 |
|
add_to_dormant(bi); /* No need to checksum it on cache flush. |
6427 |
|
calc_checksum(bi,&(bi->c1),&(bi->c2)); |
6428 |
|
add_to_active(bi); |
6429 |
|
} |
6430 |
+ |
#endif |
6431 |
|
|
6432 |
|
current_cache_size += get_target() - (uae_u8 *)current_compile_p; |
6433 |
|
|
6447 |
|
#endif |
6448 |
|
|
6449 |
|
log_dump(); |
6450 |
< |
align_target(32); |
6450 |
> |
align_target(align_jumps); |
6451 |
|
|
6452 |
|
/* This is the non-direct handler */ |
6453 |
|
bi->handler= |
6463 |
|
|
6464 |
|
raw_jmp((uae_u32)bi->direct_handler); |
6465 |
|
|
6216 |
– |
align_target(32); |
6466 |
|
current_compile_p=get_target(); |
6218 |
– |
|
6467 |
|
raise_in_cl_list(bi); |
6468 |
|
|
6469 |
|
/* We will flush soon, anyway, so let's do it now */ |
6489 |
|
{ |
6490 |
|
for (;;) { |
6491 |
|
uae_u32 opcode = GET_OPCODE; |
6244 |
– |
#ifdef X86_ASSEMBLY__disable |
6245 |
– |
__asm__ __volatile__("\tpushl %%ebp\n\tcall *%%ebx\n\tpopl %%ebp" /* FIXME */ |
6246 |
– |
: : "b" (cpufunctbl[opcode]), "a" (opcode) |
6247 |
– |
: "%edx", "%ecx", "%esi", "%edi", "%ebp", "memory", "cc"); |
6248 |
– |
#else |
6492 |
|
(*cpufunctbl[opcode])(opcode); |
6250 |
– |
#endif |
6493 |
|
if (end_block(opcode) || SPCFLAGS_TEST(SPCFLAG_ALL)) { |
6494 |
|
return; /* We will deal with the spcflags in the caller */ |
6495 |
|
} |
6514 |
|
#if FLIGHT_RECORDER |
6515 |
|
m68k_record_step(m68k_getpc()); |
6516 |
|
#endif |
6275 |
– |
#ifdef X86_ASSEMBLY__disable |
6276 |
– |
__asm__ __volatile__("\tpushl %%ebp\n\tcall *%%ebx\n\tpopl %%ebp" /* FIXME */ |
6277 |
– |
: : "b" (cpufunctbl[opcode]), "a" (opcode) |
6278 |
– |
: "%edx", "%ecx", "%esi", "%edi", "%ebp", "memory", "cc"); |
6279 |
– |
#else |
6517 |
|
(*cpufunctbl[opcode])(opcode); |
6281 |
– |
#endif |
6518 |
|
if (end_block(opcode) || SPCFLAGS_TEST(SPCFLAG_ALL) || blocklen>=MAXRUN) { |
6519 |
|
compile_block(pc_hist, blocklen); |
6520 |
|
return; /* We will deal with the spcflags in the caller */ |
6527 |
|
|
6528 |
|
typedef void (*compiled_handler)(void); |
6529 |
|
|
6530 |
+ |
#ifdef X86_ASSEMBLY |
6531 |
+ |
void (*m68k_compile_execute)(void) = NULL; |
6532 |
+ |
#else |
6533 |
|
void m68k_do_compile_execute(void) |
6534 |
|
{ |
6535 |
|
for (;;) { |
6297 |
– |
#ifdef X86_ASSEMBLY |
6298 |
– |
__asm__ __volatile__("\tpushl %%ebp\n\tcall *%%ebx\n\tpopl %%ebp" /* FIXME */ |
6299 |
– |
: : "b" (cache_tags[cacheline(regs.pc_p)].handler) |
6300 |
– |
: "%edx", "%ecx", "%eax", "%esi", "%edi", "%ebp", "memory", "cc"); |
6301 |
– |
#else |
6536 |
|
((compiled_handler)(pushall_call_handler))(); |
6303 |
– |
#endif |
6537 |
|
/* Whenever we return from that, we should check spcflags */ |
6538 |
|
if (SPCFLAGS_TEST(SPCFLAG_ALL)) { |
6539 |
|
if (m68k_do_specialties ()) |
6541 |
|
} |
6542 |
|
} |
6543 |
|
} |
6544 |
+ |
#endif |