ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/compiler/compemu_support.cpp
(Generate patch)

Comparing BasiliskII/src/uae_cpu/compiler/compemu_support.cpp (file contents):
Revision 1.3 by gbeauche, 2002-09-18T09:55:37Z vs.
Revision 1.14 by gbeauche, 2003-03-13T09:51:31Z

# Line 1 | Line 1
1 + /*
2 + *  compiler/compemu_support.cpp - Core dynamic translation engine
3 + *
4 + *  Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
5 + *
6 + *  Adaptation for Basilisk II and improvements, copyright 2000-2002
7 + *    Gwenole Beauchesne
8 + *
9 + *  Basilisk II (C) 1997-2002 Christian Bauer
10 + *  
11 + *  This program is free software; you can redistribute it and/or modify
12 + *  it under the terms of the GNU General Public License as published by
13 + *  the Free Software Foundation; either version 2 of the License, or
14 + *  (at your option) any later version.
15 + *
16 + *  This program is distributed in the hope that it will be useful,
17 + *  but WITHOUT ANY WARRANTY; without even the implied warranty of
18 + *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 + *  GNU General Public License for more details.
20 + *
21 + *  You should have received a copy of the GNU General Public License
22 + *  along with this program; if not, write to the Free Software
23 + *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
24 + */
25 +
26   #if !REAL_ADDRESSING && !DIRECT_ADDRESSING
27   #error "Only Real or Direct Addressing is supported with the JIT Compiler"
28   #endif
29  
30 + #if X86_ASSEMBLY && !SAHF_SETO_PROFITABLE
31 + #error "Only [LS]AHF scheme to [gs]et flags is supported with the JIT Compiler"
32 + #endif
33 +
34   #define USE_MATCH 0
35  
36   /* kludge for Brian, so he can compile under MSVC++ */
# Line 40 | Line 69
69   #endif
70  
71   #ifndef WIN32
72 < #define PROFILE_COMPILE_TIME    1
72 > #define PROFILE_COMPILE_TIME            1
73 > #define PROFILE_UNTRANSLATED_INSNS      1
74   #endif
75  
76   #ifdef WIN32
# Line 65 | Line 95 | static clock_t emul_start_time = 0;
95   static clock_t emul_end_time    = 0;
96   #endif
97  
98 + #if PROFILE_UNTRANSLATED_INSNS
99 + const int untranslated_top_ten = 20;
100 + static uae_u32 raw_cputbl_count[65536] = { 0, };
101 + static uae_u16 opcode_nums[65536];
102 +
103 + static int untranslated_compfn(const void *e1, const void *e2)
104 + {
105 +        return raw_cputbl_count[*(const uae_u16 *)e1] < raw_cputbl_count[*(const uae_u16 *)e2];
106 + }
107 + #endif
108 +
109   compop_func *compfunctbl[65536];
110   compop_func *nfcompfunctbl[65536];
111   cpuop_func *nfcpufunctbl[65536];
112   uae_u8* comp_pc_p;
113  
114 + // From newcpu.cpp
115 + extern bool quit_program;
116 +
117   // gb-- Extra data for Basilisk II/JIT
118   #if JIT_DEBUG
119   static bool             JITDebug                        = false;        // Enable runtime disassemblers through mon?
# Line 84 | Line 128 | static bool            lazy_flush                      = true;         // Fl
128   static bool             avoid_fpu                       = true;         // Flag: compile FPU instructions ?
129   static bool             have_cmov                       = false;        // target has CMOV instructions ?
130   static bool             have_rat_stall          = true;         // target has partial register stalls ?
131 + const bool              tune_alignment          = true;         // Tune code alignments for running CPU ?
132 + const bool              tune_nop_fillers        = true;         // Tune no-op fillers for architecture
133 + static int              align_loops                     = 32;           // Align the start of loops
134 + static int              align_jumps                     = 32;           // Align the start of jumps
135   static int              zero_fd                         = -1;
136   static int              optcount[10]            = {
137          10,             // How often a block has to be executed before it is translated
# Line 100 | Line 148 | struct op_properties {
148   };
149   static op_properties prop[65536];
150  
103 // gb-- Control Flow Predicates
104
151   static inline int end_block(uae_u32 opcode)
152   {
153          return (prop[opcode].cflow & fl_end_block);
154   }
155  
156 < static inline bool may_trap(uae_u32 opcode)
156 > static inline bool is_const_jump(uae_u32 opcode)
157   {
158 <        return (prop[opcode].cflow & fl_trap);
158 >        return (prop[opcode].cflow == fl_const_jump);
159   }
160  
161   uae_u8* start_pc_p;
# Line 487 | Line 533 | static void prepare_block(blockinfo* bi)
533     compiled. If the list of free blockinfos is empty, we allocate a new
534     pool of blockinfos and link the newly created blockinfos altogether
535     into the list of free blockinfos. Otherwise, we simply pop a structure
536 <   of the free list.
536 >   off the free list.
537  
538     Blockinfo are lazily deallocated, i.e. chained altogether in the
539     list of free blockinfos whenvever a translation cache flush (hard or
540     soft) request occurs.
541   */
542  
543 < #if USE_SEPARATE_BIA
544 < const int BLOCKINFO_POOL_SIZE = 128;
545 < struct blockinfo_pool {
546 <        blockinfo bi[BLOCKINFO_POOL_SIZE];
547 <        blockinfo_pool *next;
543 > template< class T >
544 > class LazyBlockAllocator
545 > {
546 >        enum {
547 >                kPoolSize = 1 + 4096 / sizeof(T)
548 >        };
549 >        struct Pool {
550 >                T chunk[kPoolSize];
551 >                Pool * next;
552 >        };
553 >        Pool * mPools;
554 >        T * mChunks;
555 > public:
556 >        LazyBlockAllocator() : mPools(0), mChunks(0) { }
557 >        ~LazyBlockAllocator();
558 >        T * acquire();
559 >        void release(T * const);
560   };
503 static blockinfo_pool * blockinfo_pools = 0;
504 static blockinfo *              free_blockinfos = 0;
505 #endif
561  
562 < static __inline__ blockinfo *alloc_blockinfo(void)
562 > template< class T >
563 > LazyBlockAllocator<T>::~LazyBlockAllocator()
564   {
565 < #if USE_SEPARATE_BIA
566 <        if (!free_blockinfos) {
567 <                // There is no blockinfo struct left, allocate a new
568 <                // pool and link the chunks into the free list
569 <                blockinfo_pool *bi_pool = (blockinfo_pool *)malloc(sizeof(blockinfo_pool));
570 <                for (blockinfo *bi = &bi_pool->bi[0]; bi < &bi_pool->bi[BLOCKINFO_POOL_SIZE]; bi++) {
571 <                        bi->next = free_blockinfos;
572 <                        free_blockinfos = bi;
565 >        Pool * currentPool = mPools;
566 >        while (currentPool) {
567 >                Pool * deadPool = currentPool;
568 >                currentPool = currentPool->next;
569 >                free(deadPool);
570 >        }
571 > }
572 >
573 > template< class T >
574 > T * LazyBlockAllocator<T>::acquire()
575 > {
576 >        if (!mChunks) {
577 >                // There is no chunk left, allocate a new pool and link the
578 >                // chunks into the free list
579 >                Pool * newPool = (Pool *)malloc(sizeof(Pool));
580 >                for (T * chunk = &newPool->chunk[0]; chunk < &newPool->chunk[kPoolSize]; chunk++) {
581 >                        chunk->next = mChunks;
582 >                        mChunks = chunk;
583                  }
584 <                bi_pool->next = blockinfo_pools;
585 <                blockinfo_pools = bi_pool;
584 >                newPool->next = mPools;
585 >                mPools = newPool;
586          }
587 <        blockinfo *bi = free_blockinfos;
588 <        free_blockinfos = bi->next;
589 < #else
524 <        blockinfo *bi = (blockinfo*)current_compile_p;
525 <        current_compile_p += sizeof(blockinfo);
526 < #endif
527 <        return bi;
587 >        T * chunk = mChunks;
588 >        mChunks = chunk->next;
589 >        return chunk;
590   }
591  
592 < static __inline__ void free_blockinfo(blockinfo *bi)
592 > template< class T >
593 > void LazyBlockAllocator<T>::release(T * const chunk)
594   {
595 +        chunk->next = mChunks;
596 +        mChunks = chunk;
597 + }
598 +
599 + template< class T >
600 + class HardBlockAllocator
601 + {
602 + public:
603 +        T * acquire() {
604 +                T * data = (T *)current_compile_p;
605 +                current_compile_p += sizeof(T);
606 +                return data;
607 +        }
608 +
609 +        void release(T * const chunk) {
610 +                // Deallocated on invalidation
611 +        }
612 + };
613 +
614   #if USE_SEPARATE_BIA
615 <        bi->next = free_blockinfos;
616 <        free_blockinfos = bi;
615 > static LazyBlockAllocator<blockinfo> BlockInfoAllocator;
616 > static LazyBlockAllocator<checksum_info> ChecksumInfoAllocator;
617 > #else
618 > static HardBlockAllocator<blockinfo> BlockInfoAllocator;
619 > static HardBlockAllocator<checksum_info> ChecksumInfoAllocator;
620   #endif
621 +
622 + static __inline__ checksum_info *alloc_checksum_info(void)
623 + {
624 +        checksum_info *csi = ChecksumInfoAllocator.acquire();
625 +        csi->next = NULL;
626 +        return csi;
627   }
628  
629 < static void free_blockinfo_pools(void)
629 > static __inline__ void free_checksum_info(checksum_info *csi)
630   {
631 < #if USE_SEPARATE_BIA
632 <        int blockinfo_pool_count = 0;
633 <        blockinfo_pool *curr_pool = blockinfo_pools;
634 <        while (curr_pool) {
635 <                blockinfo_pool_count++;
636 <                blockinfo_pool *dead_pool = curr_pool;
637 <                curr_pool = curr_pool->next;
638 <                free(dead_pool);
631 >        csi->next = NULL;
632 >        ChecksumInfoAllocator.release(csi);
633 > }
634 >
635 > static __inline__ void free_checksum_info_chain(checksum_info *csi)
636 > {
637 >        while (csi != NULL) {
638 >                checksum_info *csi2 = csi->next;
639 >                free_checksum_info(csi);
640 >                csi = csi2;
641          }
642 <        
643 <        uae_u32 blockinfo_pools_size = blockinfo_pool_count * BLOCKINFO_POOL_SIZE * sizeof(blockinfo);
644 <        write_log("### Blockinfo allocation statistics\n");
645 <        write_log("Number of blockinfo pools  : %d\n", blockinfo_pool_count);
646 <        write_log("Total number of blockinfos : %d (%d KB)\n",
647 <                          blockinfo_pool_count * BLOCKINFO_POOL_SIZE,
648 <                          blockinfo_pools_size / 1024);
649 <        write_log("\n");
642 > }
643 >
644 > static __inline__ blockinfo *alloc_blockinfo(void)
645 > {
646 >        blockinfo *bi = BlockInfoAllocator.acquire();
647 > #if USE_CHECKSUM_INFO
648 >        bi->csi = NULL;
649 > #endif
650 >        return bi;
651 > }
652 >
653 > static __inline__ void free_blockinfo(blockinfo *bi)
654 > {
655 > #if USE_CHECKSUM_INFO
656 >        free_checksum_info_chain(bi->csi);
657 >        bi->csi = NULL;
658   #endif
659 +        BlockInfoAllocator.release(bi);
660   }
661  
662   static __inline__ void alloc_blockinfos(void)
# Line 597 | Line 699 | static __inline__ void emit_long(uae_u32
699      target+=4;
700   }
701  
702 + static __inline__ void emit_block(const uae_u8 *block, uae_u32 blocklen)
703 + {
704 +        memcpy((uae_u8 *)target,block,blocklen);
705 +        target+=blocklen;
706 + }
707 +
708   static __inline__ uae_u32 reverse32(uae_u32 v)
709   {
710   #if 1
# Line 4558 | Line 4666 | void compiler_init(void)
4666          raw_init_cpu();
4667          write_log("<JIT compiler> : target processor has CMOV instructions : %s\n", have_cmov ? "yes" : "no");
4668          write_log("<JIT compiler> : target processor can suffer from partial register stalls : %s\n", have_rat_stall ? "yes" : "no");
4669 +        write_log("<JIT compiler> : alignment for loops, jumps are %d, %d\n", align_loops, align_jumps);
4670          
4671          // Translation cache flush mechanism
4672          lazy_flush = PrefsFindBool("jitlazyflush");
# Line 4568 | Line 4677 | void compiler_init(void)
4677          write_log("<JIT compiler> : register aliasing : %s\n", str_on_off(1));
4678          write_log("<JIT compiler> : FP register aliasing : %s\n", str_on_off(USE_F_ALIAS));
4679          write_log("<JIT compiler> : lazy constant offsetting : %s\n", str_on_off(USE_OFFSET));
4680 +        write_log("<JIT compiler> : block inlining : %s\n", str_on_off(USE_INLINING));
4681          write_log("<JIT compiler> : separate blockinfo allocation : %s\n", str_on_off(USE_SEPARATE_BIA));
4682          
4683          // Build compiler tables
# Line 4575 | Line 4685 | void compiler_init(void)
4685          
4686          initialized = true;
4687          
4688 + #if PROFILE_UNTRANSLATED_INSNS
4689 +        write_log("<JIT compiler> : gather statistics on untranslated insns count\n");
4690 + #endif
4691 +
4692   #if PROFILE_COMPILE_TIME
4693          write_log("<JIT compiler> : gather statistics on translation time\n");
4694          emul_start_time = clock();
# Line 4593 | Line 4707 | void compiler_exit(void)
4707                  compiled_code = 0;
4708          }
4709          
4596        // Deallocate blockinfo pools
4597        free_blockinfo_pools();
4598        
4710   #ifndef WIN32
4711          // Close /dev/zero
4712          if (zero_fd > 0)
# Line 4611 | Line 4722 | void compiler_exit(void)
4722                  100.0*double(compile_time)/double(emul_time));
4723          write_log("\n");
4724   #endif
4725 +
4726 + #if PROFILE_UNTRANSLATED_INSNS
4727 +        uae_u64 untranslated_count = 0;
4728 +        for (int i = 0; i < 65536; i++) {
4729 +                opcode_nums[i] = i;
4730 +                untranslated_count += raw_cputbl_count[i];
4731 +        }
4732 +        write_log("Sorting out untranslated instructions count...\n");
4733 +        qsort(opcode_nums, 65536, sizeof(uae_u16), untranslated_compfn);
4734 +        write_log("\nRank  Opc      Count Name\n");
4735 +        for (int i = 0; i < untranslated_top_ten; i++) {
4736 +                uae_u32 count = raw_cputbl_count[opcode_nums[i]];
4737 +                struct instr *dp;
4738 +                struct mnemolookup *lookup;
4739 +                if (!count)
4740 +                        break;
4741 +                dp = table68k + opcode_nums[i];
4742 +                for (lookup = lookuptab; lookup->mnemo != dp->mnemo; lookup++)
4743 +                        ;
4744 +                write_log("%03d: %04x %10lu %s\n", i, opcode_nums[i], count, lookup->name);
4745 +        }
4746 + #endif
4747   }
4748  
4749   bool compiler_use_jit(void)
# Line 4839 | Line 4972 | void freescratch(void)
4972  
4973   static void align_target(uae_u32 a)
4974   {
4975 <    /* Fill with NOPs --- makes debugging with gdb easier */
4976 <    while ((uae_u32)target&(a-1))
4977 <        *target++=0x90;
4975 >        if (!a)
4976 >                return;
4977 >
4978 >        if (tune_nop_fillers)
4979 >                raw_emit_nop_filler(a - (((uae_u32)target) & (a - 1)));
4980 >        else {
4981 >                /* Fill with NOPs --- makes debugging with gdb easier */
4982 >                while ((uae_u32)target&(a-1))
4983 >                        *target++=0x90;
4984 >        }
4985   }
4986  
4987   static __inline__ int isinrom(uintptr addr)
# Line 5166 | Line 5306 | void alloc_cache(void)
5306  
5307  
5308  
5309 < extern cpuop_rettype op_illg_1 (uae_u32 opcode) REGPARAM;
5309 > extern void op_illg_1 (uae_u32 opcode) REGPARAM;
5310  
5311   static void calc_checksum(blockinfo* bi, uae_u32* c1, uae_u32* c2)
5312   {
5313 <    uae_u32 k1=0;
5314 <    uae_u32 k2=0;
5175 <    uae_s32 len=bi->len;
5176 <    uae_u32 tmp=bi->min_pcp;
5177 <    uae_u32* pos;
5313 >    uae_u32 k1 = 0;
5314 >    uae_u32 k2 = 0;
5315  
5316 <    len+=(tmp&3);
5317 <    tmp&=(~3);
5318 <    pos=(uae_u32*)tmp;
5316 > #if USE_CHECKSUM_INFO
5317 >    checksum_info *csi = bi->csi;
5318 >        Dif(!csi) abort();
5319 >        while (csi) {
5320 >                uae_s32 len = csi->length;
5321 >                uae_u32 tmp = (uae_u32)csi->start_p;
5322 > #else
5323 >                uae_s32 len = bi->len;
5324 >                uae_u32 tmp = (uae_u32)bi->min_pcp;
5325 > #endif
5326 >                uae_u32*pos;
5327  
5328 <    if (len<0 || len>MAX_CHECKSUM_LEN) {
5329 <        *c1=0;
5330 <        *c2=0;
5331 <    }
5332 <    else {
5333 <        while (len>0) {
5334 <            k1+=*pos;
5335 <            k2^=*pos;
5336 <            pos++;
5337 <            len-=4;
5328 >                len += (tmp & 3);
5329 >                tmp &= ~3;
5330 >                pos = (uae_u32 *)tmp;
5331 >
5332 >                if (len >= 0 && len <= MAX_CHECKSUM_LEN) {
5333 >                        while (len > 0) {
5334 >                                k1 += *pos;
5335 >                                k2 ^= *pos;
5336 >                                pos++;
5337 >                                len -= 4;
5338 >                        }
5339 >                }
5340 >
5341 > #if USE_CHECKSUM_INFO
5342 >                csi = csi->next;
5343          }
5344 <        *c1=k1;
5345 <        *c2=k2;
5346 <    }
5344 > #endif
5345 >
5346 >        *c1 = k1;
5347 >        *c2 = k2;
5348   }
5349  
5350 < static void show_checksum(blockinfo* bi)
5350 > #if 0
5351 > static void show_checksum(CSI_TYPE* csi)
5352   {
5353      uae_u32 k1=0;
5354      uae_u32 k2=0;
5355 <    uae_s32 len=bi->len;
5356 <    uae_u32 tmp=(uae_u32)bi->pc_p;
5355 >    uae_s32 len=CSI_LENGTH(csi);
5356 >    uae_u32 tmp=(uae_u32)CSI_START_P(csi);
5357      uae_u32* pos;
5358  
5359      len+=(tmp&3);
# Line 5220 | Line 5372 | static void show_checksum(blockinfo* bi)
5372          write_log(" bla\n");
5373      }
5374   }
5375 + #endif
5376  
5377  
5378   int check_for_cache_miss(void)
# Line 5273 | Line 5426 | static int called_check_checksum(blockin
5426   static inline int block_check_checksum(blockinfo* bi)
5427   {
5428      uae_u32     c1,c2;
5429 <    int         isgood;
5429 >    bool        isgood;
5430      
5431      if (bi->status!=BI_NEED_CHECK)
5432          return 1;  /* This block is in a checked state */
5433      
5434      checksum_count++;
5435 +
5436      if (bi->c1 || bi->c2)
5437          calc_checksum(bi,&c1,&c2);
5438      else {
5439          c1=c2=1;  /* Make sure it doesn't match */
5440 <    }
5440 >        }
5441      
5442      isgood=(c1==bi->c1 && c2==bi->c2);
5443 +
5444      if (isgood) {
5445          /* This block is still OK. So we reactivate. Of course, that
5446             means we have to move it into the needs-to-be-flushed list */
# Line 5403 | Line 5558 | static __inline__ void create_popalls(vo
5558       registers before jumping back to the various get-out routines.
5559       This generates the code for it.
5560    */
5561 <  popall_do_nothing=current_compile_p;
5561 >  align_target(align_jumps);
5562 >  popall_do_nothing=get_target();
5563    for (i=0;i<N_REGS;i++) {
5564        if (need_to_preserve[i])
5565            raw_pop_l_r(i);
5566    }
5567    raw_jmp((uae_u32)do_nothing);
5412  align_target(32);
5568    
5569 +  align_target(align_jumps);
5570    popall_execute_normal=get_target();
5571    for (i=0;i<N_REGS;i++) {
5572        if (need_to_preserve[i])
5573            raw_pop_l_r(i);
5574    }
5575    raw_jmp((uae_u32)execute_normal);
5420  align_target(32);
5576  
5577 +  align_target(align_jumps);
5578    popall_cache_miss=get_target();
5579    for (i=0;i<N_REGS;i++) {
5580        if (need_to_preserve[i])
5581            raw_pop_l_r(i);
5582    }
5583    raw_jmp((uae_u32)cache_miss);
5428  align_target(32);
5584  
5585 +  align_target(align_jumps);
5586    popall_recompile_block=get_target();
5587    for (i=0;i<N_REGS;i++) {
5588        if (need_to_preserve[i])
5589            raw_pop_l_r(i);
5590    }
5591    raw_jmp((uae_u32)recompile_block);
5592 <  align_target(32);
5593 <  
5592 >
5593 >  align_target(align_jumps);
5594    popall_exec_nostats=get_target();
5595    for (i=0;i<N_REGS;i++) {
5596        if (need_to_preserve[i])
5597            raw_pop_l_r(i);
5598    }
5599    raw_jmp((uae_u32)exec_nostats);
5600 <  align_target(32);
5601 <  
5600 >
5601 >  align_target(align_jumps);
5602    popall_check_checksum=get_target();
5603    for (i=0;i<N_REGS;i++) {
5604        if (need_to_preserve[i])
5605            raw_pop_l_r(i);
5606    }
5607    raw_jmp((uae_u32)check_checksum);
5608 <  align_target(32);
5609 <  
5608 >
5609 >  align_target(align_jumps);
5610    current_compile_p=get_target();
5611   #else
5612    popall_exec_nostats=(void *)exec_nostats;
# Line 5459 | Line 5615 | static __inline__ void create_popalls(vo
5615    popall_recompile_block=(void *)recompile_block;
5616    popall_do_nothing=(void *)do_nothing;
5617    popall_check_checksum=(void *)check_checksum;
5462  pushall_call_handler=get_target();  
5618   #endif
5619  
5620    /* And now, the code to do the matching pushes and then jump
# Line 5475 | Line 5630 | static __inline__ void create_popalls(vo
5630    raw_mov_l_rm(r,(uae_u32)&regs.pc_p);
5631    raw_and_l_ri(r,TAGMASK);
5632    raw_jmp_m_indexed((uae_u32)cache_tags,r,4);
5633 +
5634 + #ifdef X86_ASSEMBLY
5635 +  align_target(align_jumps);
5636 +  m68k_compile_execute = (void (*)(void))get_target();
5637 +  for (i=N_REGS;i--;) {
5638 +          if (need_to_preserve[i])
5639 +                  raw_push_l_r(i);
5640 +  }
5641 +  align_target(align_loops);
5642 +  uae_u32 dispatch_loop = (uae_u32)get_target();
5643 +  r=REG_PC_TMP;
5644 +  raw_mov_l_rm(r,(uae_u32)&regs.pc_p);
5645 +  raw_and_l_ri(r,TAGMASK);
5646 +  raw_call_m_indexed((uae_u32)cache_tags,r,4);
5647 +  raw_cmp_l_mi((uae_u32)&regs.spcflags,0);
5648 +  raw_jcc_b_oponly(NATIVE_CC_EQ);
5649 +  emit_byte(dispatch_loop-((uae_u32)get_target()+1));
5650 +  raw_call((uae_u32)m68k_do_specialties);
5651 +  raw_test_l_rr(REG_RESULT,REG_RESULT);
5652 +  raw_jcc_b_oponly(NATIVE_CC_EQ);
5653 +  emit_byte(dispatch_loop-((uae_u32)get_target()+1));
5654 +  raw_cmp_b_mi((uae_u32)&quit_program,0);
5655 +  raw_jcc_b_oponly(NATIVE_CC_EQ);
5656 +  emit_byte(dispatch_loop-((uae_u32)get_target()+1));
5657 +  for (i=0;i<N_REGS;i++) {
5658 +          if (need_to_preserve[i])
5659 +                  raw_pop_l_r(i);
5660 +  }
5661 +  raw_ret();
5662 + #endif
5663   }
5664  
5665   static __inline__ void reset_lists(void)
# Line 5492 | Line 5677 | static void prepare_block(blockinfo* bi)
5677      int i;
5678  
5679      set_target(current_compile_p);
5680 <    align_target(32);
5680 >    align_target(align_jumps);
5681      bi->direct_pen=(cpuop_func *)get_target();
5682      raw_mov_l_rm(0,(uae_u32)&(bi->pc_p));
5683      raw_mov_l_mr((uae_u32)&regs.pc_p,0);
5684      raw_jmp((uae_u32)popall_execute_normal);
5685  
5686 <    align_target(32);
5686 >    align_target(align_jumps);
5687      bi->direct_pcc=(cpuop_func *)get_target();
5688      raw_mov_l_rm(0,(uae_u32)&(bi->pc_p));
5689      raw_mov_l_mr((uae_u32)&regs.pc_p,0);
5690      raw_jmp((uae_u32)popall_check_checksum);
5506
5507    align_target(32);
5691      current_compile_p=get_target();
5692  
5693      bi->deplist=NULL;
# Line 5557 | Line 5740 | void build_comp(void)
5740          
5741          for (i = 0; tbl[i].opcode < 65536; i++) {
5742                  int cflow = table68k[tbl[i].opcode].cflow;
5743 +                if (USE_INLINING && ((cflow & fl_const_jump) != 0))
5744 +                        cflow = fl_const_jump;
5745 +                else
5746 +                        cflow &= ~fl_const_jump;
5747                  prop[cft_map(tbl[i].opcode)].cflow = cflow;
5748  
5749                  int uses_fpu = tbl[i].specific & 32;
# Line 5850 | Line 6037 | static void compile_block(cpu_history* p
6037          int r;
6038          int was_comp=0;
6039          uae_u8 liveflags[MAXRUN+1];
6040 + #if USE_CHECKSUM_INFO
6041 +        bool trace_in_rom = isinrom((uintptr)pc_hist[0].location);
6042 +        uae_u32 max_pcp=(uae_u32)pc_hist[blocklen - 1].location;
6043 +        uae_u32 min_pcp=max_pcp;
6044 + #else
6045          uae_u32 max_pcp=(uae_u32)pc_hist[0].location;
6046          uae_u32 min_pcp=max_pcp;
6047 + #endif
6048          uae_u32 cl=cacheline(pc_hist[0].location);
6049          void* specflags=(void*)&regs.spcflags;
6050          blockinfo* bi=NULL;
# Line 5895 | Line 6088 | static void compile_block(cpu_history* p
6088          remove_deps(bi); /* We are about to create new code */
6089          bi->optlevel=optlev;
6090          bi->pc_p=(uae_u8*)pc_hist[0].location;
6091 + #if USE_CHECKSUM_INFO
6092 +        free_checksum_info_chain(bi->csi);
6093 +        bi->csi = NULL;
6094 + #endif
6095          
6096          liveflags[blocklen]=0x1f; /* All flags needed afterwards */
6097          i=blocklen;
# Line 5902 | Line 6099 | static void compile_block(cpu_history* p
6099              uae_u16* currpcp=pc_hist[i].location;
6100              uae_u32 op=DO_GET_OPCODE(currpcp);
6101  
6102 + #if USE_CHECKSUM_INFO
6103 +                trace_in_rom = trace_in_rom && isinrom((uintptr)currpcp);
6104 + #if USE_INLINING
6105 +                if (is_const_jump(op)) {
6106 +                        checksum_info *csi = alloc_checksum_info();
6107 +                        csi->start_p = (uae_u8 *)min_pcp;
6108 +                        csi->length = max_pcp - min_pcp + LONGEST_68K_INST;
6109 +                        csi->next = bi->csi;
6110 +                        bi->csi = csi;
6111 +                        max_pcp = (uae_u32)currpcp;
6112 +                }
6113 + #endif
6114 +                min_pcp = (uae_u32)currpcp;
6115 + #else
6116              if ((uae_u32)currpcp<min_pcp)
6117                  min_pcp=(uae_u32)currpcp;
6118              if ((uae_u32)currpcp>max_pcp)
6119                  max_pcp=(uae_u32)currpcp;
6120 + #endif
6121  
6122                  liveflags[i]=((liveflags[i+1]&
6123                                 (~prop[op].set_flags))|
# Line 5914 | Line 6126 | static void compile_block(cpu_history* p
6126                      liveflags[i]&= ~FLAG_Z;
6127          }
6128  
6129 + #if USE_CHECKSUM_INFO
6130 +        checksum_info *csi = alloc_checksum_info();
6131 +        csi->start_p = (uae_u8 *)min_pcp;
6132 +        csi->length = max_pcp - min_pcp + LONGEST_68K_INST;
6133 +        csi->next = bi->csi;
6134 +        bi->csi = csi;
6135 + #endif
6136 +
6137          bi->needed_flags=liveflags[0];
6138  
6139 <        align_target(32);
6139 >        align_target(align_loops);
6140          was_comp=0;
6141  
6142          bi->direct_handler=(cpuop_func *)get_target();
# Line 6003 | Line 6223 | static void compile_block(cpu_history* p
6223                      raw_mov_l_mi((uae_u32)&regs.pc_p,
6224                                   (uae_u32)pc_hist[i].location);
6225                      raw_call((uae_u32)cputbl[opcode]);
6226 + #if PROFILE_UNTRANSLATED_INSNS
6227 +                        // raw_cputbl_count[] is indexed with plain opcode (in m68k order)
6228 +                        raw_add_l_mi((uae_u32)&raw_cputbl_count[cft_map(opcode)],1);
6229 + #endif
6230                      //raw_add_l_mi((uae_u32)&oink,1); // FIXME
6231   #if USE_NORMAL_CALLING_CONVENTION
6232                      raw_inc_sp(4);
# Line 6091 | Line 6315 | static void compile_block(cpu_history* p
6315                  raw_jmp((uae_u32)popall_do_nothing);
6316                  create_jmpdep(bi,0,tba,t1);
6317  
6318 <                align_target(16);
6318 >                align_target(align_jumps);
6319                  /* not-predicted outcome */
6320                  *branchadd=(uae_u32)get_target()-((uae_u32)branchadd+4);
6321                  live=tmp; /* Ouch again */
# Line 6160 | Line 6384 | static void compile_block(cpu_history* p
6384          big_to_small_state(&live,&(bi->env));
6385   #endif
6386  
6387 + #if USE_CHECKSUM_INFO
6388 +        remove_from_list(bi);
6389 +        if (trace_in_rom) {
6390 +                // No need to checksum that block trace on cache invalidation
6391 +                free_checksum_info_chain(bi->csi);
6392 +                bi->csi = NULL;
6393 +                add_to_dormant(bi);
6394 +        }
6395 +        else {
6396 +            calc_checksum(bi,&(bi->c1),&(bi->c2));
6397 +                add_to_active(bi);
6398 +        }
6399 + #else
6400          if (next_pc_p+extra_len>=max_pcp &&
6401              next_pc_p+extra_len<max_pcp+LONGEST_68K_INST)
6402              max_pcp=next_pc_p+extra_len;  /* extra_len covers flags magic */
6403          else
6404              max_pcp+=LONGEST_68K_INST;
6405 +
6406          bi->len=max_pcp-min_pcp;
6407          bi->min_pcp=min_pcp;
6408 <                    
6408 >        
6409          remove_from_list(bi);
6410          if (isinrom(min_pcp) && isinrom(max_pcp)) {
6411              add_to_dormant(bi); /* No need to checksum it on cache flush.
# Line 6178 | Line 6416 | static void compile_block(cpu_history* p
6416              calc_checksum(bi,&(bi->c1),&(bi->c2));
6417              add_to_active(bi);
6418          }
6419 + #endif
6420          
6421          current_cache_size += get_target() - (uae_u8 *)current_compile_p;
6422          
# Line 6197 | Line 6436 | static void compile_block(cpu_history* p
6436   #endif
6437          
6438          log_dump();
6439 <        align_target(32);
6439 >        align_target(align_jumps);
6440  
6441          /* This is the non-direct handler */
6442          bi->handler=
# Line 6213 | Line 6452 | static void compile_block(cpu_history* p
6452  
6453          raw_jmp((uae_u32)bi->direct_handler);
6454  
6216        align_target(32);
6455          current_compile_p=get_target();
6218
6456          raise_in_cl_list(bi);
6457          
6458          /* We will flush soon, anyway, so let's do it now */
# Line 6241 | Line 6478 | void exec_nostats(void)
6478   {
6479          for (;;)  {
6480                  uae_u32 opcode = GET_OPCODE;
6244 #ifdef X86_ASSEMBLY__disable
6245                __asm__ __volatile__("\tpushl %%ebp\n\tcall *%%ebx\n\tpopl %%ebp" /* FIXME */
6246                                                         : : "b" (cpufunctbl[opcode]), "a" (opcode)
6247                                                         : "%edx", "%ecx", "%esi", "%edi",  "%ebp", "memory", "cc");
6248 #else
6481                  (*cpufunctbl[opcode])(opcode);
6250 #endif
6482                  if (end_block(opcode) || SPCFLAGS_TEST(SPCFLAG_ALL)) {
6483                          return; /* We will deal with the spcflags in the caller */
6484                  }
# Line 6272 | Line 6503 | void execute_normal(void)
6503   #if FLIGHT_RECORDER
6504                          m68k_record_step(m68k_getpc());
6505   #endif
6275 #ifdef X86_ASSEMBLY__disable
6276                        __asm__ __volatile__("\tpushl %%ebp\n\tcall *%%ebx\n\tpopl %%ebp" /* FIXME */
6277                                                                 : : "b" (cpufunctbl[opcode]), "a" (opcode)
6278                                                                 : "%edx", "%ecx", "%esi", "%edi", "%ebp", "memory", "cc");
6279 #else
6506                          (*cpufunctbl[opcode])(opcode);
6281 #endif
6507                          if (end_block(opcode) || SPCFLAGS_TEST(SPCFLAG_ALL) || blocklen>=MAXRUN) {
6508                                  compile_block(pc_hist, blocklen);
6509                                  return; /* We will deal with the spcflags in the caller */
# Line 6291 | Line 6516 | void execute_normal(void)
6516  
6517   typedef void (*compiled_handler)(void);
6518  
6519 + #ifdef X86_ASSEMBLY
6520 + void (*m68k_compile_execute)(void) = NULL;
6521 + #else
6522   void m68k_do_compile_execute(void)
6523   {
6524          for (;;) {
6297 #ifdef X86_ASSEMBLY
6298                __asm__ __volatile__("\tpushl %%ebp\n\tcall *%%ebx\n\tpopl %%ebp" /* FIXME */
6299                                                         : : "b" (cache_tags[cacheline(regs.pc_p)].handler)
6300                                                         : "%edx", "%ecx", "%eax", "%esi", "%edi", "%ebp", "memory", "cc");
6301 #else
6525                  ((compiled_handler)(pushall_call_handler))();
6303 #endif
6526                  /* Whenever we return from that, we should check spcflags */
6527                  if (SPCFLAGS_TEST(SPCFLAG_ALL)) {
6528                          if (m68k_do_specialties ())
# Line 6308 | Line 6530 | void m68k_do_compile_execute(void)
6530                  }
6531          }
6532   }
6533 + #endif

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines