ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/compiler/compemu_support.cpp
(Generate patch)

Comparing BasiliskII/src/uae_cpu/compiler/compemu_support.cpp (file contents):
Revision 1.4 by gbeauche, 2002-09-18T11:41:56Z vs.
Revision 1.13 by gbeauche, 2002-11-02T18:13:29Z

# Line 1 | Line 1
1 + /*
2 + *  compiler/compemu_support.cpp - Core dynamic translation engine
3 + *
4 + *  Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
5 + *
6 + *  Adaptation for Basilisk II and improvements, copyright 2000-2002
7 + *    Gwenole Beauchesne
8 + *
9 + *  Basilisk II (C) 1997-2002 Christian Bauer
10 + *  
11 + *  This program is free software; you can redistribute it and/or modify
12 + *  it under the terms of the GNU General Public License as published by
13 + *  the Free Software Foundation; either version 2 of the License, or
14 + *  (at your option) any later version.
15 + *
16 + *  This program is distributed in the hope that it will be useful,
17 + *  but WITHOUT ANY WARRANTY; without even the implied warranty of
18 + *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 + *  GNU General Public License for more details.
20 + *
21 + *  You should have received a copy of the GNU General Public License
22 + *  along with this program; if not, write to the Free Software
23 + *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
24 + */
25 +
26   #if !REAL_ADDRESSING && !DIRECT_ADDRESSING
27   #error "Only Real or Direct Addressing is supported with the JIT Compiler"
28   #endif
# Line 44 | Line 69
69   #endif
70  
71   #ifndef WIN32
72 < #define PROFILE_COMPILE_TIME    1
72 > #define PROFILE_COMPILE_TIME            1
73 > #define PROFILE_UNTRANSLATED_INSNS      1
74   #endif
75  
76   #ifdef WIN32
# Line 69 | Line 95 | static clock_t emul_start_time = 0;
95   static clock_t emul_end_time    = 0;
96   #endif
97  
98 + #if PROFILE_UNTRANSLATED_INSNS
99 + const int untranslated_top_ten = 20;
100 + static uae_u32 raw_cputbl_count[65536] = { 0, };
101 + static uae_u16 opcode_nums[65536];
102 +
103 + static int untranslated_compfn(const void *e1, const void *e2)
104 + {
105 +        return raw_cputbl_count[*(const uae_u16 *)e1] < raw_cputbl_count[*(const uae_u16 *)e2];
106 + }
107 + #endif
108 +
109   compop_func *compfunctbl[65536];
110   compop_func *nfcompfunctbl[65536];
111   cpuop_func *nfcpufunctbl[65536];
112   uae_u8* comp_pc_p;
113  
114 + // From newcpu.cpp
115 + extern bool quit_program;
116 +
117   // gb-- Extra data for Basilisk II/JIT
118   #if JIT_DEBUG
119   static bool             JITDebug                        = false;        // Enable runtime disassemblers through mon?
# Line 88 | Line 128 | static bool            lazy_flush                      = true;         // Fl
128   static bool             avoid_fpu                       = true;         // Flag: compile FPU instructions ?
129   static bool             have_cmov                       = false;        // target has CMOV instructions ?
130   static bool             have_rat_stall          = true;         // target has partial register stalls ?
131 + const bool              tune_alignment          = true;         // Tune code alignments for running CPU ?
132 + const bool              tune_nop_fillers        = true;         // Tune no-op fillers for architecture
133 + static int              align_loops                     = 32;           // Align the start of loops
134 + static int              align_jumps                     = 32;           // Align the start of jumps
135   static int              zero_fd                         = -1;
136   static int              optcount[10]            = {
137          10,             // How often a block has to be executed before it is translated
# Line 104 | Line 148 | struct op_properties {
148   };
149   static op_properties prop[65536];
150  
107 // gb-- Control Flow Predicates
108
151   static inline int end_block(uae_u32 opcode)
152   {
153          return (prop[opcode].cflow & fl_end_block);
154   }
155  
156 < static inline bool may_trap(uae_u32 opcode)
156 > static inline bool is_const_jump(uae_u32 opcode)
157   {
158 <        return (prop[opcode].cflow & fl_trap);
158 >        return (prop[opcode].cflow == fl_const_jump);
159   }
160  
161   uae_u8* start_pc_p;
# Line 491 | Line 533 | static void prepare_block(blockinfo* bi)
533     compiled. If the list of free blockinfos is empty, we allocate a new
534     pool of blockinfos and link the newly created blockinfos altogether
535     into the list of free blockinfos. Otherwise, we simply pop a structure
536 <   of the free list.
536 >   off the free list.
537  
538     Blockinfo are lazily deallocated, i.e. chained altogether in the
539     list of free blockinfos whenvever a translation cache flush (hard or
540     soft) request occurs.
541   */
542  
543 < #if USE_SEPARATE_BIA
544 < const int BLOCKINFO_POOL_SIZE = 128;
545 < struct blockinfo_pool {
546 <        blockinfo bi[BLOCKINFO_POOL_SIZE];
547 <        blockinfo_pool *next;
543 > template< class T >
544 > class LazyBlockAllocator
545 > {
546 >        enum {
547 >                kPoolSize = 1 + 4096 / sizeof(T)
548 >        };
549 >        struct Pool {
550 >                T chunk[kPoolSize];
551 >                Pool * next;
552 >        };
553 >        Pool * mPools;
554 >        T * mChunks;
555 > public:
556 >        LazyBlockAllocator() : mPools(0), mChunks(0) { }
557 >        ~LazyBlockAllocator();
558 >        T * acquire();
559 >        void release(T * const);
560   };
507 static blockinfo_pool * blockinfo_pools = 0;
508 static blockinfo *              free_blockinfos = 0;
509 #endif
561  
562 < static __inline__ blockinfo *alloc_blockinfo(void)
562 > template< class T >
563 > LazyBlockAllocator<T>::~LazyBlockAllocator()
564   {
565 < #if USE_SEPARATE_BIA
566 <        if (!free_blockinfos) {
567 <                // There is no blockinfo struct left, allocate a new
568 <                // pool and link the chunks into the free list
569 <                blockinfo_pool *bi_pool = (blockinfo_pool *)malloc(sizeof(blockinfo_pool));
570 <                for (blockinfo *bi = &bi_pool->bi[0]; bi < &bi_pool->bi[BLOCKINFO_POOL_SIZE]; bi++) {
571 <                        bi->next = free_blockinfos;
572 <                        free_blockinfos = bi;
565 >        Pool * currentPool = mPools;
566 >        while (currentPool) {
567 >                Pool * deadPool = currentPool;
568 >                currentPool = currentPool->next;
569 >                free(deadPool);
570 >        }
571 > }
572 >
573 > template< class T >
574 > T * LazyBlockAllocator<T>::acquire()
575 > {
576 >        if (!mChunks) {
577 >                // There is no chunk left, allocate a new pool and link the
578 >                // chunks into the free list
579 >                Pool * newPool = (Pool *)malloc(sizeof(Pool));
580 >                for (T * chunk = &newPool->chunk[0]; chunk < &newPool->chunk[kPoolSize]; chunk++) {
581 >                        chunk->next = mChunks;
582 >                        mChunks = chunk;
583                  }
584 <                bi_pool->next = blockinfo_pools;
585 <                blockinfo_pools = bi_pool;
584 >                newPool->next = mPools;
585 >                mPools = newPool;
586          }
587 <        blockinfo *bi = free_blockinfos;
588 <        free_blockinfos = bi->next;
589 < #else
528 <        blockinfo *bi = (blockinfo*)current_compile_p;
529 <        current_compile_p += sizeof(blockinfo);
530 < #endif
531 <        return bi;
587 >        T * chunk = mChunks;
588 >        mChunks = chunk->next;
589 >        return chunk;
590   }
591  
592 < static __inline__ void free_blockinfo(blockinfo *bi)
592 > template< class T >
593 > void LazyBlockAllocator<T>::release(T * const chunk)
594   {
595 +        chunk->next = mChunks;
596 +        mChunks = chunk;
597 + }
598 +
599 + template< class T >
600 + class HardBlockAllocator
601 + {
602 + public:
603 +        T * acquire() {
604 +                T * data = (T *)current_compile_p;
605 +                current_compile_p += sizeof(T);
606 +                return data;
607 +        }
608 +
609 +        void release(T * const chunk) {
610 +                // Deallocated on invalidation
611 +        }
612 + };
613 +
614   #if USE_SEPARATE_BIA
615 <        bi->next = free_blockinfos;
616 <        free_blockinfos = bi;
615 > static LazyBlockAllocator<blockinfo> BlockInfoAllocator;
616 > static LazyBlockAllocator<checksum_info> ChecksumInfoAllocator;
617 > #else
618 > static HardBlockAllocator<blockinfo> BlockInfoAllocator;
619 > static HardBlockAllocator<checksum_info> ChecksumInfoAllocator;
620   #endif
621 +
622 + static __inline__ checksum_info *alloc_checksum_info(void)
623 + {
624 +        checksum_info *csi = ChecksumInfoAllocator.acquire();
625 +        csi->next = NULL;
626 +        return csi;
627   }
628  
629 < static void free_blockinfo_pools(void)
629 > static __inline__ void free_checksum_info(checksum_info *csi)
630   {
631 < #if USE_SEPARATE_BIA
632 <        int blockinfo_pool_count = 0;
633 <        blockinfo_pool *curr_pool = blockinfo_pools;
634 <        while (curr_pool) {
635 <                blockinfo_pool_count++;
636 <                blockinfo_pool *dead_pool = curr_pool;
637 <                curr_pool = curr_pool->next;
638 <                free(dead_pool);
631 >        csi->next = NULL;
632 >        ChecksumInfoAllocator.release(csi);
633 > }
634 >
635 > static __inline__ void free_checksum_info_chain(checksum_info *csi)
636 > {
637 >        while (csi != NULL) {
638 >                checksum_info *csi2 = csi->next;
639 >                free_checksum_info(csi);
640 >                csi = csi2;
641          }
642 <        
643 <        uae_u32 blockinfo_pools_size = blockinfo_pool_count * BLOCKINFO_POOL_SIZE * sizeof(blockinfo);
644 <        write_log("### Blockinfo allocation statistics\n");
645 <        write_log("Number of blockinfo pools  : %d\n", blockinfo_pool_count);
646 <        write_log("Total number of blockinfos : %d (%d KB)\n",
647 <                          blockinfo_pool_count * BLOCKINFO_POOL_SIZE,
648 <                          blockinfo_pools_size / 1024);
649 <        write_log("\n");
642 > }
643 >
644 > static __inline__ blockinfo *alloc_blockinfo(void)
645 > {
646 >        blockinfo *bi = BlockInfoAllocator.acquire();
647 > #if USE_CHECKSUM_INFO
648 >        bi->csi = NULL;
649 > #endif
650 >        return bi;
651 > }
652 >
653 > static __inline__ void free_blockinfo(blockinfo *bi)
654 > {
655 > #if USE_CHECKSUM_INFO
656 >        free_checksum_info_chain(bi->csi);
657 >        bi->csi = NULL;
658   #endif
659 +        BlockInfoAllocator.release(bi);
660   }
661  
662   static __inline__ void alloc_blockinfos(void)
# Line 601 | Line 699 | static __inline__ void emit_long(uae_u32
699      target+=4;
700   }
701  
702 + static __inline__ void emit_block(const uae_u8 *block, uae_u32 blocklen)
703 + {
704 +        memcpy((uae_u8 *)target,block,blocklen);
705 +        target+=blocklen;
706 + }
707 +
708   static __inline__ uae_u32 reverse32(uae_u32 v)
709   {
710   #if 1
# Line 4562 | Line 4666 | void compiler_init(void)
4666          raw_init_cpu();
4667          write_log("<JIT compiler> : target processor has CMOV instructions : %s\n", have_cmov ? "yes" : "no");
4668          write_log("<JIT compiler> : target processor can suffer from partial register stalls : %s\n", have_rat_stall ? "yes" : "no");
4669 +        write_log("<JIT compiler> : alignment for loops, jumps are %d, %d\n", align_loops, align_jumps);
4670          
4671          // Translation cache flush mechanism
4672          lazy_flush = PrefsFindBool("jitlazyflush");
# Line 4572 | Line 4677 | void compiler_init(void)
4677          write_log("<JIT compiler> : register aliasing : %s\n", str_on_off(1));
4678          write_log("<JIT compiler> : FP register aliasing : %s\n", str_on_off(USE_F_ALIAS));
4679          write_log("<JIT compiler> : lazy constant offsetting : %s\n", str_on_off(USE_OFFSET));
4680 +        write_log("<JIT compiler> : block inlining : %s\n", str_on_off(USE_INLINING));
4681          write_log("<JIT compiler> : separate blockinfo allocation : %s\n", str_on_off(USE_SEPARATE_BIA));
4682          
4683          // Build compiler tables
# Line 4579 | Line 4685 | void compiler_init(void)
4685          
4686          initialized = true;
4687          
4688 + #if PROFILE_UNTRANSLATED_INSNS
4689 +        write_log("<JIT compiler> : gather statistics on untranslated insns count\n");
4690 + #endif
4691 +
4692   #if PROFILE_COMPILE_TIME
4693          write_log("<JIT compiler> : gather statistics on translation time\n");
4694          emul_start_time = clock();
# Line 4597 | Line 4707 | void compiler_exit(void)
4707                  compiled_code = 0;
4708          }
4709          
4600        // Deallocate blockinfo pools
4601        free_blockinfo_pools();
4602        
4710   #ifndef WIN32
4711          // Close /dev/zero
4712          if (zero_fd > 0)
# Line 4615 | Line 4722 | void compiler_exit(void)
4722                  100.0*double(compile_time)/double(emul_time));
4723          write_log("\n");
4724   #endif
4725 +
4726 + #if PROFILE_UNTRANSLATED_INSNS
4727 +        uae_u64 untranslated_count = 0;
4728 +        for (int i = 0; i < 65536; i++) {
4729 +                opcode_nums[i] = i;
4730 +                untranslated_count += raw_cputbl_count[i];
4731 +        }
4732 +        write_log("Sorting out untranslated instructions count...\n");
4733 +        qsort(opcode_nums, 65536, sizeof(uae_u16), untranslated_compfn);
4734 +        write_log("\nRank  Opc      Count Name\n");
4735 +        for (int i = 0; i < untranslated_top_ten; i++) {
4736 +                uae_u32 count = raw_cputbl_count[opcode_nums[i]];
4737 +                struct instr *dp;
4738 +                struct mnemolookup *lookup;
4739 +                if (!count)
4740 +                        break;
4741 +                dp = table68k + opcode_nums[i];
4742 +                for (lookup = lookuptab; lookup->mnemo != dp->mnemo; lookup++)
4743 +                        ;
4744 +                write_log("%03d: %04x %10lu %s\n", i, opcode_nums[i], count, lookup->name);
4745 +        }
4746 + #endif
4747   }
4748  
4749   bool compiler_use_jit(void)
# Line 4843 | Line 4972 | void freescratch(void)
4972  
4973   static void align_target(uae_u32 a)
4974   {
4975 <    /* Fill with NOPs --- makes debugging with gdb easier */
4976 <    while ((uae_u32)target&(a-1))
4977 <        *target++=0x90;
4975 >        if (tune_nop_fillers)
4976 >                raw_emit_nop_filler(a - (((uae_u32)target) & (a - 1)));
4977 >        else {
4978 >                /* Fill with NOPs --- makes debugging with gdb easier */
4979 >                while ((uae_u32)target&(a-1))
4980 >                        *target++=0x90;
4981 >        }
4982   }
4983  
4984   static __inline__ int isinrom(uintptr addr)
# Line 5170 | Line 5303 | void alloc_cache(void)
5303  
5304  
5305  
5306 < extern cpuop_rettype op_illg_1 (uae_u32 opcode) REGPARAM;
5306 > extern void op_illg_1 (uae_u32 opcode) REGPARAM;
5307  
5308   static void calc_checksum(blockinfo* bi, uae_u32* c1, uae_u32* c2)
5309   {
5310 <    uae_u32 k1=0;
5311 <    uae_u32 k2=0;
5179 <    uae_s32 len=bi->len;
5180 <    uae_u32 tmp=bi->min_pcp;
5181 <    uae_u32* pos;
5310 >    uae_u32 k1 = 0;
5311 >    uae_u32 k2 = 0;
5312  
5313 <    len+=(tmp&3);
5314 <    tmp&=(~3);
5315 <    pos=(uae_u32*)tmp;
5313 > #if USE_CHECKSUM_INFO
5314 >    checksum_info *csi = bi->csi;
5315 >        Dif(!csi) abort();
5316 >        while (csi) {
5317 >                uae_s32 len = csi->length;
5318 >                uae_u32 tmp = (uae_u32)csi->start_p;
5319 > #else
5320 >                uae_s32 len = bi->len;
5321 >                uae_u32 tmp = (uae_u32)bi->min_pcp;
5322 > #endif
5323 >                uae_u32*pos;
5324  
5325 <    if (len<0 || len>MAX_CHECKSUM_LEN) {
5326 <        *c1=0;
5327 <        *c2=0;
5328 <    }
5329 <    else {
5330 <        while (len>0) {
5331 <            k1+=*pos;
5332 <            k2^=*pos;
5333 <            pos++;
5334 <            len-=4;
5325 >                len += (tmp & 3);
5326 >                tmp &= ~3;
5327 >                pos = (uae_u32 *)tmp;
5328 >
5329 >                if (len >= 0 && len <= MAX_CHECKSUM_LEN) {
5330 >                        while (len > 0) {
5331 >                                k1 += *pos;
5332 >                                k2 ^= *pos;
5333 >                                pos++;
5334 >                                len -= 4;
5335 >                        }
5336 >                }
5337 >
5338 > #if USE_CHECKSUM_INFO
5339 >                csi = csi->next;
5340          }
5341 <        *c1=k1;
5342 <        *c2=k2;
5343 <    }
5341 > #endif
5342 >
5343 >        *c1 = k1;
5344 >        *c2 = k2;
5345   }
5346  
5347 < static void show_checksum(blockinfo* bi)
5347 > #if 0
5348 > static void show_checksum(CSI_TYPE* csi)
5349   {
5350      uae_u32 k1=0;
5351      uae_u32 k2=0;
5352 <    uae_s32 len=bi->len;
5353 <    uae_u32 tmp=(uae_u32)bi->pc_p;
5352 >    uae_s32 len=CSI_LENGTH(csi);
5353 >    uae_u32 tmp=(uae_u32)CSI_START_P(csi);
5354      uae_u32* pos;
5355  
5356      len+=(tmp&3);
# Line 5224 | Line 5369 | static void show_checksum(blockinfo* bi)
5369          write_log(" bla\n");
5370      }
5371   }
5372 + #endif
5373  
5374  
5375   int check_for_cache_miss(void)
# Line 5277 | Line 5423 | static int called_check_checksum(blockin
5423   static inline int block_check_checksum(blockinfo* bi)
5424   {
5425      uae_u32     c1,c2;
5426 <    int         isgood;
5426 >    bool        isgood;
5427      
5428      if (bi->status!=BI_NEED_CHECK)
5429          return 1;  /* This block is in a checked state */
5430      
5431      checksum_count++;
5432 +
5433      if (bi->c1 || bi->c2)
5434          calc_checksum(bi,&c1,&c2);
5435      else {
5436          c1=c2=1;  /* Make sure it doesn't match */
5437 <    }
5437 >        }
5438      
5439      isgood=(c1==bi->c1 && c2==bi->c2);
5440 +
5441      if (isgood) {
5442          /* This block is still OK. So we reactivate. Of course, that
5443             means we have to move it into the needs-to-be-flushed list */
# Line 5407 | Line 5555 | static __inline__ void create_popalls(vo
5555       registers before jumping back to the various get-out routines.
5556       This generates the code for it.
5557    */
5558 <  popall_do_nothing=current_compile_p;
5558 >  align_target(align_jumps);
5559 >  popall_do_nothing=get_target();
5560    for (i=0;i<N_REGS;i++) {
5561        if (need_to_preserve[i])
5562            raw_pop_l_r(i);
5563    }
5564    raw_jmp((uae_u32)do_nothing);
5416  align_target(32);
5565    
5566 +  align_target(align_jumps);
5567    popall_execute_normal=get_target();
5568    for (i=0;i<N_REGS;i++) {
5569        if (need_to_preserve[i])
5570            raw_pop_l_r(i);
5571    }
5572    raw_jmp((uae_u32)execute_normal);
5424  align_target(32);
5573  
5574 +  align_target(align_jumps);
5575    popall_cache_miss=get_target();
5576    for (i=0;i<N_REGS;i++) {
5577        if (need_to_preserve[i])
5578            raw_pop_l_r(i);
5579    }
5580    raw_jmp((uae_u32)cache_miss);
5432  align_target(32);
5581  
5582 +  align_target(align_jumps);
5583    popall_recompile_block=get_target();
5584    for (i=0;i<N_REGS;i++) {
5585        if (need_to_preserve[i])
5586            raw_pop_l_r(i);
5587    }
5588    raw_jmp((uae_u32)recompile_block);
5589 <  align_target(32);
5590 <  
5589 >
5590 >  align_target(align_jumps);
5591    popall_exec_nostats=get_target();
5592    for (i=0;i<N_REGS;i++) {
5593        if (need_to_preserve[i])
5594            raw_pop_l_r(i);
5595    }
5596    raw_jmp((uae_u32)exec_nostats);
5597 <  align_target(32);
5598 <  
5597 >
5598 >  align_target(align_jumps);
5599    popall_check_checksum=get_target();
5600    for (i=0;i<N_REGS;i++) {
5601        if (need_to_preserve[i])
5602            raw_pop_l_r(i);
5603    }
5604    raw_jmp((uae_u32)check_checksum);
5605 <  align_target(32);
5606 <  
5605 >
5606 >  align_target(align_jumps);
5607    current_compile_p=get_target();
5608   #else
5609    popall_exec_nostats=(void *)exec_nostats;
# Line 5463 | Line 5612 | static __inline__ void create_popalls(vo
5612    popall_recompile_block=(void *)recompile_block;
5613    popall_do_nothing=(void *)do_nothing;
5614    popall_check_checksum=(void *)check_checksum;
5466  pushall_call_handler=get_target();  
5615   #endif
5616  
5617    /* And now, the code to do the matching pushes and then jump
# Line 5479 | Line 5627 | static __inline__ void create_popalls(vo
5627    raw_mov_l_rm(r,(uae_u32)&regs.pc_p);
5628    raw_and_l_ri(r,TAGMASK);
5629    raw_jmp_m_indexed((uae_u32)cache_tags,r,4);
5630 +
5631 + #ifdef X86_ASSEMBLY
5632 +  align_target(align_jumps);
5633 +  m68k_compile_execute = (void (*)(void))get_target();
5634 +  for (i=N_REGS;i--;) {
5635 +          if (need_to_preserve[i])
5636 +                  raw_push_l_r(i);
5637 +  }
5638 +  align_target(align_loops);
5639 +  uae_u32 dispatch_loop = (uae_u32)get_target();
5640 +  r=REG_PC_TMP;
5641 +  raw_mov_l_rm(r,(uae_u32)&regs.pc_p);
5642 +  raw_and_l_ri(r,TAGMASK);
5643 +  raw_call_m_indexed((uae_u32)cache_tags,r,4);
5644 +  raw_cmp_l_mi((uae_u32)&regs.spcflags,0);
5645 +  raw_jcc_b_oponly(NATIVE_CC_EQ);
5646 +  emit_byte(dispatch_loop-((uae_u32)get_target()+1));
5647 +  raw_call((uae_u32)m68k_do_specialties);
5648 +  raw_test_l_rr(REG_RESULT,REG_RESULT);
5649 +  raw_jcc_b_oponly(NATIVE_CC_EQ);
5650 +  emit_byte(dispatch_loop-((uae_u32)get_target()+1));
5651 +  raw_cmp_b_mi((uae_u32)&quit_program,0);
5652 +  raw_jcc_b_oponly(NATIVE_CC_EQ);
5653 +  emit_byte(dispatch_loop-((uae_u32)get_target()+1));
5654 +  for (i=0;i<N_REGS;i++) {
5655 +          if (need_to_preserve[i])
5656 +                  raw_pop_l_r(i);
5657 +  }
5658 +  raw_ret();
5659 + #endif
5660   }
5661  
5662   static __inline__ void reset_lists(void)
# Line 5496 | Line 5674 | static void prepare_block(blockinfo* bi)
5674      int i;
5675  
5676      set_target(current_compile_p);
5677 <    align_target(32);
5677 >    align_target(align_jumps);
5678      bi->direct_pen=(cpuop_func *)get_target();
5679      raw_mov_l_rm(0,(uae_u32)&(bi->pc_p));
5680      raw_mov_l_mr((uae_u32)&regs.pc_p,0);
5681      raw_jmp((uae_u32)popall_execute_normal);
5682  
5683 <    align_target(32);
5683 >    align_target(align_jumps);
5684      bi->direct_pcc=(cpuop_func *)get_target();
5685      raw_mov_l_rm(0,(uae_u32)&(bi->pc_p));
5686      raw_mov_l_mr((uae_u32)&regs.pc_p,0);
5687      raw_jmp((uae_u32)popall_check_checksum);
5510
5511    align_target(32);
5688      current_compile_p=get_target();
5689  
5690      bi->deplist=NULL;
# Line 5561 | Line 5737 | void build_comp(void)
5737          
5738          for (i = 0; tbl[i].opcode < 65536; i++) {
5739                  int cflow = table68k[tbl[i].opcode].cflow;
5740 +                if (USE_INLINING && ((cflow & fl_const_jump) != 0))
5741 +                        cflow = fl_const_jump;
5742 +                else
5743 +                        cflow &= ~fl_const_jump;
5744                  prop[cft_map(tbl[i].opcode)].cflow = cflow;
5745  
5746                  int uses_fpu = tbl[i].specific & 32;
# Line 5854 | Line 6034 | static void compile_block(cpu_history* p
6034          int r;
6035          int was_comp=0;
6036          uae_u8 liveflags[MAXRUN+1];
6037 + #if USE_CHECKSUM_INFO
6038 +        bool trace_in_rom = isinrom((uintptr)pc_hist[0].location);
6039 +        uae_u32 max_pcp=(uae_u32)pc_hist[blocklen - 1].location;
6040 +        uae_u32 min_pcp=max_pcp;
6041 + #else
6042          uae_u32 max_pcp=(uae_u32)pc_hist[0].location;
6043          uae_u32 min_pcp=max_pcp;
6044 + #endif
6045          uae_u32 cl=cacheline(pc_hist[0].location);
6046          void* specflags=(void*)&regs.spcflags;
6047          blockinfo* bi=NULL;
# Line 5899 | Line 6085 | static void compile_block(cpu_history* p
6085          remove_deps(bi); /* We are about to create new code */
6086          bi->optlevel=optlev;
6087          bi->pc_p=(uae_u8*)pc_hist[0].location;
6088 + #if USE_CHECKSUM_INFO
6089 +        free_checksum_info_chain(bi->csi);
6090 +        bi->csi = NULL;
6091 + #endif
6092          
6093          liveflags[blocklen]=0x1f; /* All flags needed afterwards */
6094          i=blocklen;
# Line 5906 | Line 6096 | static void compile_block(cpu_history* p
6096              uae_u16* currpcp=pc_hist[i].location;
6097              uae_u32 op=DO_GET_OPCODE(currpcp);
6098  
6099 + #if USE_CHECKSUM_INFO
6100 +                trace_in_rom = trace_in_rom && isinrom((uintptr)currpcp);
6101 + #if USE_INLINING
6102 +                if (is_const_jump(op)) {
6103 +                        checksum_info *csi = alloc_checksum_info();
6104 +                        csi->start_p = (uae_u8 *)min_pcp;
6105 +                        csi->length = max_pcp - min_pcp + LONGEST_68K_INST;
6106 +                        csi->next = bi->csi;
6107 +                        bi->csi = csi;
6108 +                        max_pcp = (uae_u32)currpcp;
6109 +                }
6110 + #endif
6111 +                min_pcp = (uae_u32)currpcp;
6112 + #else
6113              if ((uae_u32)currpcp<min_pcp)
6114                  min_pcp=(uae_u32)currpcp;
6115              if ((uae_u32)currpcp>max_pcp)
6116                  max_pcp=(uae_u32)currpcp;
6117 + #endif
6118  
6119                  liveflags[i]=((liveflags[i+1]&
6120                                 (~prop[op].set_flags))|
# Line 5918 | Line 6123 | static void compile_block(cpu_history* p
6123                      liveflags[i]&= ~FLAG_Z;
6124          }
6125  
6126 + #if USE_CHECKSUM_INFO
6127 +        checksum_info *csi = alloc_checksum_info();
6128 +        csi->start_p = (uae_u8 *)min_pcp;
6129 +        csi->length = max_pcp - min_pcp + LONGEST_68K_INST;
6130 +        csi->next = bi->csi;
6131 +        bi->csi = csi;
6132 + #endif
6133 +
6134          bi->needed_flags=liveflags[0];
6135  
6136 <        align_target(32);
6136 >        align_target(align_loops);
6137          was_comp=0;
6138  
6139          bi->direct_handler=(cpuop_func *)get_target();
# Line 6007 | Line 6220 | static void compile_block(cpu_history* p
6220                      raw_mov_l_mi((uae_u32)&regs.pc_p,
6221                                   (uae_u32)pc_hist[i].location);
6222                      raw_call((uae_u32)cputbl[opcode]);
6223 + #if PROFILE_UNTRANSLATED_INSNS
6224 +                        // raw_cputbl_count[] is indexed with plain opcode (in m68k order)
6225 +                        raw_add_l_mi((uae_u32)&raw_cputbl_count[cft_map(opcode)],1);
6226 + #endif
6227                      //raw_add_l_mi((uae_u32)&oink,1); // FIXME
6228   #if USE_NORMAL_CALLING_CONVENTION
6229                      raw_inc_sp(4);
# Line 6095 | Line 6312 | static void compile_block(cpu_history* p
6312                  raw_jmp((uae_u32)popall_do_nothing);
6313                  create_jmpdep(bi,0,tba,t1);
6314  
6315 <                align_target(16);
6315 >                align_target(align_jumps);
6316                  /* not-predicted outcome */
6317                  *branchadd=(uae_u32)get_target()-((uae_u32)branchadd+4);
6318                  live=tmp; /* Ouch again */
# Line 6164 | Line 6381 | static void compile_block(cpu_history* p
6381          big_to_small_state(&live,&(bi->env));
6382   #endif
6383  
6384 + #if USE_CHECKSUM_INFO
6385 +        remove_from_list(bi);
6386 +        if (trace_in_rom) {
6387 +                // No need to checksum that block trace on cache invalidation
6388 +                free_checksum_info_chain(bi->csi);
6389 +                bi->csi = NULL;
6390 +                add_to_dormant(bi);
6391 +        }
6392 +        else {
6393 +            calc_checksum(bi,&(bi->c1),&(bi->c2));
6394 +                add_to_active(bi);
6395 +        }
6396 + #else
6397          if (next_pc_p+extra_len>=max_pcp &&
6398              next_pc_p+extra_len<max_pcp+LONGEST_68K_INST)
6399              max_pcp=next_pc_p+extra_len;  /* extra_len covers flags magic */
6400          else
6401              max_pcp+=LONGEST_68K_INST;
6402 +
6403          bi->len=max_pcp-min_pcp;
6404          bi->min_pcp=min_pcp;
6405 <                    
6405 >        
6406          remove_from_list(bi);
6407          if (isinrom(min_pcp) && isinrom(max_pcp)) {
6408              add_to_dormant(bi); /* No need to checksum it on cache flush.
# Line 6182 | Line 6413 | static void compile_block(cpu_history* p
6413              calc_checksum(bi,&(bi->c1),&(bi->c2));
6414              add_to_active(bi);
6415          }
6416 + #endif
6417          
6418          current_cache_size += get_target() - (uae_u8 *)current_compile_p;
6419          
# Line 6201 | Line 6433 | static void compile_block(cpu_history* p
6433   #endif
6434          
6435          log_dump();
6436 <        align_target(32);
6436 >        align_target(align_jumps);
6437  
6438          /* This is the non-direct handler */
6439          bi->handler=
# Line 6217 | Line 6449 | static void compile_block(cpu_history* p
6449  
6450          raw_jmp((uae_u32)bi->direct_handler);
6451  
6220        align_target(32);
6452          current_compile_p=get_target();
6222
6453          raise_in_cl_list(bi);
6454          
6455          /* We will flush soon, anyway, so let's do it now */
# Line 6245 | Line 6475 | void exec_nostats(void)
6475   {
6476          for (;;)  {
6477                  uae_u32 opcode = GET_OPCODE;
6248 #ifdef X86_ASSEMBLY__disable
6249                __asm__ __volatile__("\tpushl %%ebp\n\tcall *%%ebx\n\tpopl %%ebp" /* FIXME */
6250                                                         : : "b" (cpufunctbl[opcode]), "a" (opcode)
6251                                                         : "%edx", "%ecx", "%esi", "%edi",  "%ebp", "memory", "cc");
6252 #else
6478                  (*cpufunctbl[opcode])(opcode);
6254 #endif
6479                  if (end_block(opcode) || SPCFLAGS_TEST(SPCFLAG_ALL)) {
6480                          return; /* We will deal with the spcflags in the caller */
6481                  }
# Line 6276 | Line 6500 | void execute_normal(void)
6500   #if FLIGHT_RECORDER
6501                          m68k_record_step(m68k_getpc());
6502   #endif
6279 #ifdef X86_ASSEMBLY__disable
6280                        __asm__ __volatile__("\tpushl %%ebp\n\tcall *%%ebx\n\tpopl %%ebp" /* FIXME */
6281                                                                 : : "b" (cpufunctbl[opcode]), "a" (opcode)
6282                                                                 : "%edx", "%ecx", "%esi", "%edi", "%ebp", "memory", "cc");
6283 #else
6503                          (*cpufunctbl[opcode])(opcode);
6285 #endif
6504                          if (end_block(opcode) || SPCFLAGS_TEST(SPCFLAG_ALL) || blocklen>=MAXRUN) {
6505                                  compile_block(pc_hist, blocklen);
6506                                  return; /* We will deal with the spcflags in the caller */
# Line 6295 | Line 6513 | void execute_normal(void)
6513  
6514   typedef void (*compiled_handler)(void);
6515  
6516 + #ifdef X86_ASSEMBLY
6517 + void (*m68k_compile_execute)(void) = NULL;
6518 + #else
6519   void m68k_do_compile_execute(void)
6520   {
6521          for (;;) {
6301 #ifdef X86_ASSEMBLY
6302                __asm__ __volatile__("\tpushl %%ebp\n\tcall *%%ebx\n\tpopl %%ebp" /* FIXME */
6303                                                         : : "b" (cache_tags[cacheline(regs.pc_p)].handler)
6304                                                         : "%edx", "%ecx", "%eax", "%esi", "%edi", "%ebp", "memory", "cc");
6305 #else
6522                  ((compiled_handler)(pushall_call_handler))();
6307 #endif
6523                  /* Whenever we return from that, we should check spcflags */
6524                  if (SPCFLAGS_TEST(SPCFLAG_ALL)) {
6525                          if (m68k_do_specialties ())
# Line 6312 | Line 6527 | void m68k_do_compile_execute(void)
6527                  }
6528          }
6529   }
6530 + #endif

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines