ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/compiler/compemu_support.cpp
(Generate patch)

Comparing BasiliskII/src/uae_cpu/compiler/compemu_support.cpp (file contents):
Revision 1.3 by gbeauche, 2002-09-18T09:55:37Z vs.
Revision 1.15 by gbeauche, 2003-03-13T15:57:01Z

# Line 1 | Line 1
1 + /*
2 + *  compiler/compemu_support.cpp - Core dynamic translation engine
3 + *
4 + *  Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
5 + *
6 + *  Adaptation for Basilisk II and improvements, copyright 2000-2002
7 + *    Gwenole Beauchesne
8 + *
9 + *  Basilisk II (C) 1997-2002 Christian Bauer
10 + *  
11 + *  This program is free software; you can redistribute it and/or modify
12 + *  it under the terms of the GNU General Public License as published by
13 + *  the Free Software Foundation; either version 2 of the License, or
14 + *  (at your option) any later version.
15 + *
16 + *  This program is distributed in the hope that it will be useful,
17 + *  but WITHOUT ANY WARRANTY; without even the implied warranty of
18 + *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 + *  GNU General Public License for more details.
20 + *
21 + *  You should have received a copy of the GNU General Public License
22 + *  along with this program; if not, write to the Free Software
23 + *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
24 + */
25 +
26   #if !REAL_ADDRESSING && !DIRECT_ADDRESSING
27   #error "Only Real or Direct Addressing is supported with the JIT Compiler"
28   #endif
29  
30 + #if X86_ASSEMBLY && !SAHF_SETO_PROFITABLE
31 + #error "Only [LS]AHF scheme to [gs]et flags is supported with the JIT Compiler"
32 + #endif
33 +
34   #define USE_MATCH 0
35  
36   /* kludge for Brian, so he can compile under MSVC++ */
# Line 40 | Line 69
69   #endif
70  
71   #ifndef WIN32
72 < #define PROFILE_COMPILE_TIME    1
72 > #define PROFILE_COMPILE_TIME            1
73 > #define PROFILE_UNTRANSLATED_INSNS      1
74   #endif
75  
76   #ifdef WIN32
# Line 65 | Line 95 | static clock_t emul_start_time = 0;
95   static clock_t emul_end_time    = 0;
96   #endif
97  
98 + #if PROFILE_UNTRANSLATED_INSNS
99 + const int untranslated_top_ten = 20;
100 + static uae_u32 raw_cputbl_count[65536] = { 0, };
101 + static uae_u16 opcode_nums[65536];
102 +
103 + static int untranslated_compfn(const void *e1, const void *e2)
104 + {
105 +        return raw_cputbl_count[*(const uae_u16 *)e1] < raw_cputbl_count[*(const uae_u16 *)e2];
106 + }
107 + #endif
108 +
109   compop_func *compfunctbl[65536];
110   compop_func *nfcompfunctbl[65536];
111   cpuop_func *nfcpufunctbl[65536];
112   uae_u8* comp_pc_p;
113  
114 + // From newcpu.cpp
115 + extern bool quit_program;
116 +
117   // gb-- Extra data for Basilisk II/JIT
118   #if JIT_DEBUG
119   static bool             JITDebug                        = false;        // Enable runtime disassemblers through mon?
# Line 84 | Line 128 | static bool            lazy_flush                      = true;         // Fl
128   static bool             avoid_fpu                       = true;         // Flag: compile FPU instructions ?
129   static bool             have_cmov                       = false;        // target has CMOV instructions ?
130   static bool             have_rat_stall          = true;         // target has partial register stalls ?
131 + const bool              tune_alignment          = true;         // Tune code alignments for running CPU ?
132 + const bool              tune_nop_fillers        = true;         // Tune no-op fillers for architecture
133 + static bool             setzflg_uses_bsf        = false;        // setzflg virtual instruction can use native BSF instruction correctly?
134 + static int              align_loops                     = 32;           // Align the start of loops
135 + static int              align_jumps                     = 32;           // Align the start of jumps
136   static int              zero_fd                         = -1;
137   static int              optcount[10]            = {
138          10,             // How often a block has to be executed before it is translated
# Line 100 | Line 149 | struct op_properties {
149   };
150   static op_properties prop[65536];
151  
103 // gb-- Control Flow Predicates
104
152   static inline int end_block(uae_u32 opcode)
153   {
154          return (prop[opcode].cflow & fl_end_block);
155   }
156  
157 < static inline bool may_trap(uae_u32 opcode)
157 > static inline bool is_const_jump(uae_u32 opcode)
158   {
159 <        return (prop[opcode].cflow & fl_trap);
159 >        return (prop[opcode].cflow == fl_const_jump);
160   }
161  
162   uae_u8* start_pc_p;
# Line 487 | Line 534 | static void prepare_block(blockinfo* bi)
534     compiled. If the list of free blockinfos is empty, we allocate a new
535     pool of blockinfos and link the newly created blockinfos altogether
536     into the list of free blockinfos. Otherwise, we simply pop a structure
537 <   of the free list.
537 >   off the free list.
538  
539     Blockinfo are lazily deallocated, i.e. chained altogether in the
540     list of free blockinfos whenvever a translation cache flush (hard or
541     soft) request occurs.
542   */
543  
544 < #if USE_SEPARATE_BIA
545 < const int BLOCKINFO_POOL_SIZE = 128;
546 < struct blockinfo_pool {
547 <        blockinfo bi[BLOCKINFO_POOL_SIZE];
548 <        blockinfo_pool *next;
544 > template< class T >
545 > class LazyBlockAllocator
546 > {
547 >        enum {
548 >                kPoolSize = 1 + 4096 / sizeof(T)
549 >        };
550 >        struct Pool {
551 >                T chunk[kPoolSize];
552 >                Pool * next;
553 >        };
554 >        Pool * mPools;
555 >        T * mChunks;
556 > public:
557 >        LazyBlockAllocator() : mPools(0), mChunks(0) { }
558 >        ~LazyBlockAllocator();
559 >        T * acquire();
560 >        void release(T * const);
561   };
503 static blockinfo_pool * blockinfo_pools = 0;
504 static blockinfo *              free_blockinfos = 0;
505 #endif
562  
563 < static __inline__ blockinfo *alloc_blockinfo(void)
563 > template< class T >
564 > LazyBlockAllocator<T>::~LazyBlockAllocator()
565   {
566 < #if USE_SEPARATE_BIA
567 <        if (!free_blockinfos) {
568 <                // There is no blockinfo struct left, allocate a new
569 <                // pool and link the chunks into the free list
570 <                blockinfo_pool *bi_pool = (blockinfo_pool *)malloc(sizeof(blockinfo_pool));
571 <                for (blockinfo *bi = &bi_pool->bi[0]; bi < &bi_pool->bi[BLOCKINFO_POOL_SIZE]; bi++) {
572 <                        bi->next = free_blockinfos;
573 <                        free_blockinfos = bi;
566 >        Pool * currentPool = mPools;
567 >        while (currentPool) {
568 >                Pool * deadPool = currentPool;
569 >                currentPool = currentPool->next;
570 >                free(deadPool);
571 >        }
572 > }
573 >
574 > template< class T >
575 > T * LazyBlockAllocator<T>::acquire()
576 > {
577 >        if (!mChunks) {
578 >                // There is no chunk left, allocate a new pool and link the
579 >                // chunks into the free list
580 >                Pool * newPool = (Pool *)malloc(sizeof(Pool));
581 >                for (T * chunk = &newPool->chunk[0]; chunk < &newPool->chunk[kPoolSize]; chunk++) {
582 >                        chunk->next = mChunks;
583 >                        mChunks = chunk;
584                  }
585 <                bi_pool->next = blockinfo_pools;
586 <                blockinfo_pools = bi_pool;
585 >                newPool->next = mPools;
586 >                mPools = newPool;
587          }
588 <        blockinfo *bi = free_blockinfos;
589 <        free_blockinfos = bi->next;
590 < #else
524 <        blockinfo *bi = (blockinfo*)current_compile_p;
525 <        current_compile_p += sizeof(blockinfo);
526 < #endif
527 <        return bi;
588 >        T * chunk = mChunks;
589 >        mChunks = chunk->next;
590 >        return chunk;
591   }
592  
593 < static __inline__ void free_blockinfo(blockinfo *bi)
593 > template< class T >
594 > void LazyBlockAllocator<T>::release(T * const chunk)
595   {
596 +        chunk->next = mChunks;
597 +        mChunks = chunk;
598 + }
599 +
600 + template< class T >
601 + class HardBlockAllocator
602 + {
603 + public:
604 +        T * acquire() {
605 +                T * data = (T *)current_compile_p;
606 +                current_compile_p += sizeof(T);
607 +                return data;
608 +        }
609 +
610 +        void release(T * const chunk) {
611 +                // Deallocated on invalidation
612 +        }
613 + };
614 +
615   #if USE_SEPARATE_BIA
616 <        bi->next = free_blockinfos;
617 <        free_blockinfos = bi;
616 > static LazyBlockAllocator<blockinfo> BlockInfoAllocator;
617 > static LazyBlockAllocator<checksum_info> ChecksumInfoAllocator;
618 > #else
619 > static HardBlockAllocator<blockinfo> BlockInfoAllocator;
620 > static HardBlockAllocator<checksum_info> ChecksumInfoAllocator;
621   #endif
622 +
623 + static __inline__ checksum_info *alloc_checksum_info(void)
624 + {
625 +        checksum_info *csi = ChecksumInfoAllocator.acquire();
626 +        csi->next = NULL;
627 +        return csi;
628   }
629  
630 < static void free_blockinfo_pools(void)
630 > static __inline__ void free_checksum_info(checksum_info *csi)
631   {
632 < #if USE_SEPARATE_BIA
633 <        int blockinfo_pool_count = 0;
634 <        blockinfo_pool *curr_pool = blockinfo_pools;
635 <        while (curr_pool) {
636 <                blockinfo_pool_count++;
637 <                blockinfo_pool *dead_pool = curr_pool;
638 <                curr_pool = curr_pool->next;
639 <                free(dead_pool);
632 >        csi->next = NULL;
633 >        ChecksumInfoAllocator.release(csi);
634 > }
635 >
636 > static __inline__ void free_checksum_info_chain(checksum_info *csi)
637 > {
638 >        while (csi != NULL) {
639 >                checksum_info *csi2 = csi->next;
640 >                free_checksum_info(csi);
641 >                csi = csi2;
642          }
643 <        
644 <        uae_u32 blockinfo_pools_size = blockinfo_pool_count * BLOCKINFO_POOL_SIZE * sizeof(blockinfo);
645 <        write_log("### Blockinfo allocation statistics\n");
646 <        write_log("Number of blockinfo pools  : %d\n", blockinfo_pool_count);
647 <        write_log("Total number of blockinfos : %d (%d KB)\n",
648 <                          blockinfo_pool_count * BLOCKINFO_POOL_SIZE,
649 <                          blockinfo_pools_size / 1024);
556 <        write_log("\n");
643 > }
644 >
645 > static __inline__ blockinfo *alloc_blockinfo(void)
646 > {
647 >        blockinfo *bi = BlockInfoAllocator.acquire();
648 > #if USE_CHECKSUM_INFO
649 >        bi->csi = NULL;
650   #endif
651 +        return bi;
652 + }
653 +
654 + static __inline__ void free_blockinfo(blockinfo *bi)
655 + {
656 + #if USE_CHECKSUM_INFO
657 +        free_checksum_info_chain(bi->csi);
658 +        bi->csi = NULL;
659 + #endif
660 +        BlockInfoAllocator.release(bi);
661   }
662  
663   static __inline__ void alloc_blockinfos(void)
# Line 597 | Line 700 | static __inline__ void emit_long(uae_u32
700      target+=4;
701   }
702  
703 + static __inline__ void emit_block(const uae_u8 *block, uae_u32 blocklen)
704 + {
705 +        memcpy((uae_u8 *)target,block,blocklen);
706 +        target+=blocklen;
707 + }
708 +
709   static __inline__ uae_u32 reverse32(uae_u32 v)
710   {
711   #if 1
# Line 2562 | Line 2671 | MIDFUNC(3,cmov_l_rm,(RW4 d, IMM s, IMM c
2671   }
2672   MENDFUNC(3,cmov_l_rm,(RW4 d, IMM s, IMM cc))
2673  
2674 < MIDFUNC(2,bsf_l_rr,(W4 d, R4 s))
2674 > MIDFUNC(1,setzflg_l,(RW4 r))
2675   {
2676 <    CLOBBER_BSF;
2677 <    s=readreg(s,4);
2678 <    d=writereg(d,4);
2679 <    raw_bsf_l_rr(d,s);
2680 <    unlock2(s);
2681 <    unlock2(d);
2676 >        if (setzflg_uses_bsf) {
2677 >                CLOBBER_BSF;
2678 >                r=rmw(r,4,4);
2679 >                raw_bsf_l_rr(r,r);
2680 >                unlock2(r);
2681 >        }
2682 >        else {
2683 >                /* Errr, not implemented yet in a generic way. And actually,
2684 >                   that should not be generated for now, if BSF doesn't
2685 >                   preserve flags but ZF.  */
2686 >                write_log("attempt to make unsupported setzflg()\n");
2687 >                abort();
2688 >        }
2689   }
2690 < MENDFUNC(2,bsf_l_rr,(W4 d, R4 s))
2690 > MENDFUNC(1,setzflg_l,(RW4 r))
2691  
2692   MIDFUNC(2,imul_32_32,(RW4 d, R4 s))
2693   {
# Line 4556 | Line 4672 | void compiler_init(void)
4672          
4673          // Initialize target CPU (check for features, e.g. CMOV, rat stalls)
4674          raw_init_cpu();
4675 +        setzflg_uses_bsf = target_check_bsf();
4676          write_log("<JIT compiler> : target processor has CMOV instructions : %s\n", have_cmov ? "yes" : "no");
4677          write_log("<JIT compiler> : target processor can suffer from partial register stalls : %s\n", have_rat_stall ? "yes" : "no");
4678 +        write_log("<JIT compiler> : alignment for loops, jumps are %d, %d\n", align_loops, align_jumps);
4679          
4680          // Translation cache flush mechanism
4681          lazy_flush = PrefsFindBool("jitlazyflush");
# Line 4568 | Line 4686 | void compiler_init(void)
4686          write_log("<JIT compiler> : register aliasing : %s\n", str_on_off(1));
4687          write_log("<JIT compiler> : FP register aliasing : %s\n", str_on_off(USE_F_ALIAS));
4688          write_log("<JIT compiler> : lazy constant offsetting : %s\n", str_on_off(USE_OFFSET));
4689 +        write_log("<JIT compiler> : block inlining : %s\n", str_on_off(USE_INLINING));
4690          write_log("<JIT compiler> : separate blockinfo allocation : %s\n", str_on_off(USE_SEPARATE_BIA));
4691          
4692          // Build compiler tables
# Line 4575 | Line 4694 | void compiler_init(void)
4694          
4695          initialized = true;
4696          
4697 + #if PROFILE_UNTRANSLATED_INSNS
4698 +        write_log("<JIT compiler> : gather statistics on untranslated insns count\n");
4699 + #endif
4700 +
4701   #if PROFILE_COMPILE_TIME
4702          write_log("<JIT compiler> : gather statistics on translation time\n");
4703          emul_start_time = clock();
# Line 4593 | Line 4716 | void compiler_exit(void)
4716                  compiled_code = 0;
4717          }
4718          
4596        // Deallocate blockinfo pools
4597        free_blockinfo_pools();
4598        
4719   #ifndef WIN32
4720          // Close /dev/zero
4721          if (zero_fd > 0)
# Line 4611 | Line 4731 | void compiler_exit(void)
4731                  100.0*double(compile_time)/double(emul_time));
4732          write_log("\n");
4733   #endif
4734 +
4735 + #if PROFILE_UNTRANSLATED_INSNS
4736 +        uae_u64 untranslated_count = 0;
4737 +        for (int i = 0; i < 65536; i++) {
4738 +                opcode_nums[i] = i;
4739 +                untranslated_count += raw_cputbl_count[i];
4740 +        }
4741 +        write_log("Sorting out untranslated instructions count...\n");
4742 +        qsort(opcode_nums, 65536, sizeof(uae_u16), untranslated_compfn);
4743 +        write_log("\nRank  Opc      Count Name\n");
4744 +        for (int i = 0; i < untranslated_top_ten; i++) {
4745 +                uae_u32 count = raw_cputbl_count[opcode_nums[i]];
4746 +                struct instr *dp;
4747 +                struct mnemolookup *lookup;
4748 +                if (!count)
4749 +                        break;
4750 +                dp = table68k + opcode_nums[i];
4751 +                for (lookup = lookuptab; lookup->mnemo != dp->mnemo; lookup++)
4752 +                        ;
4753 +                write_log("%03d: %04x %10lu %s\n", i, opcode_nums[i], count, lookup->name);
4754 +        }
4755 + #endif
4756   }
4757  
4758   bool compiler_use_jit(void)
# Line 4839 | Line 4981 | void freescratch(void)
4981  
4982   static void align_target(uae_u32 a)
4983   {
4984 <    /* Fill with NOPs --- makes debugging with gdb easier */
4985 <    while ((uae_u32)target&(a-1))
4986 <        *target++=0x90;
4984 >        if (!a)
4985 >                return;
4986 >
4987 >        if (tune_nop_fillers)
4988 >                raw_emit_nop_filler(a - (((uae_u32)target) & (a - 1)));
4989 >        else {
4990 >                /* Fill with NOPs --- makes debugging with gdb easier */
4991 >                while ((uae_u32)target&(a-1))
4992 >                        *target++=0x90;
4993 >        }
4994   }
4995  
4996   static __inline__ int isinrom(uintptr addr)
# Line 5166 | Line 5315 | void alloc_cache(void)
5315  
5316  
5317  
5318 < extern cpuop_rettype op_illg_1 (uae_u32 opcode) REGPARAM;
5318 > extern void op_illg_1 (uae_u32 opcode) REGPARAM;
5319  
5320   static void calc_checksum(blockinfo* bi, uae_u32* c1, uae_u32* c2)
5321   {
5322 <    uae_u32 k1=0;
5323 <    uae_u32 k2=0;
5175 <    uae_s32 len=bi->len;
5176 <    uae_u32 tmp=bi->min_pcp;
5177 <    uae_u32* pos;
5322 >    uae_u32 k1 = 0;
5323 >    uae_u32 k2 = 0;
5324  
5325 <    len+=(tmp&3);
5326 <    tmp&=(~3);
5327 <    pos=(uae_u32*)tmp;
5325 > #if USE_CHECKSUM_INFO
5326 >    checksum_info *csi = bi->csi;
5327 >        Dif(!csi) abort();
5328 >        while (csi) {
5329 >                uae_s32 len = csi->length;
5330 >                uae_u32 tmp = (uae_u32)csi->start_p;
5331 > #else
5332 >                uae_s32 len = bi->len;
5333 >                uae_u32 tmp = (uae_u32)bi->min_pcp;
5334 > #endif
5335 >                uae_u32*pos;
5336  
5337 <    if (len<0 || len>MAX_CHECKSUM_LEN) {
5338 <        *c1=0;
5339 <        *c2=0;
5340 <    }
5341 <    else {
5342 <        while (len>0) {
5343 <            k1+=*pos;
5344 <            k2^=*pos;
5345 <            pos++;
5346 <            len-=4;
5337 >                len += (tmp & 3);
5338 >                tmp &= ~3;
5339 >                pos = (uae_u32 *)tmp;
5340 >
5341 >                if (len >= 0 && len <= MAX_CHECKSUM_LEN) {
5342 >                        while (len > 0) {
5343 >                                k1 += *pos;
5344 >                                k2 ^= *pos;
5345 >                                pos++;
5346 >                                len -= 4;
5347 >                        }
5348 >                }
5349 >
5350 > #if USE_CHECKSUM_INFO
5351 >                csi = csi->next;
5352          }
5353 <        *c1=k1;
5354 <        *c2=k2;
5355 <    }
5353 > #endif
5354 >
5355 >        *c1 = k1;
5356 >        *c2 = k2;
5357   }
5358  
5359 < static void show_checksum(blockinfo* bi)
5359 > #if 0
5360 > static void show_checksum(CSI_TYPE* csi)
5361   {
5362      uae_u32 k1=0;
5363      uae_u32 k2=0;
5364 <    uae_s32 len=bi->len;
5365 <    uae_u32 tmp=(uae_u32)bi->pc_p;
5364 >    uae_s32 len=CSI_LENGTH(csi);
5365 >    uae_u32 tmp=(uae_u32)CSI_START_P(csi);
5366      uae_u32* pos;
5367  
5368      len+=(tmp&3);
# Line 5220 | Line 5381 | static void show_checksum(blockinfo* bi)
5381          write_log(" bla\n");
5382      }
5383   }
5384 + #endif
5385  
5386  
5387   int check_for_cache_miss(void)
# Line 5273 | Line 5435 | static int called_check_checksum(blockin
5435   static inline int block_check_checksum(blockinfo* bi)
5436   {
5437      uae_u32     c1,c2;
5438 <    int         isgood;
5438 >    bool        isgood;
5439      
5440      if (bi->status!=BI_NEED_CHECK)
5441          return 1;  /* This block is in a checked state */
5442      
5443      checksum_count++;
5444 +
5445      if (bi->c1 || bi->c2)
5446          calc_checksum(bi,&c1,&c2);
5447      else {
5448          c1=c2=1;  /* Make sure it doesn't match */
5449 <    }
5449 >        }
5450      
5451      isgood=(c1==bi->c1 && c2==bi->c2);
5452 +
5453      if (isgood) {
5454          /* This block is still OK. So we reactivate. Of course, that
5455             means we have to move it into the needs-to-be-flushed list */
# Line 5403 | Line 5567 | static __inline__ void create_popalls(vo
5567       registers before jumping back to the various get-out routines.
5568       This generates the code for it.
5569    */
5570 <  popall_do_nothing=current_compile_p;
5570 >  align_target(align_jumps);
5571 >  popall_do_nothing=get_target();
5572    for (i=0;i<N_REGS;i++) {
5573        if (need_to_preserve[i])
5574            raw_pop_l_r(i);
5575    }
5576    raw_jmp((uae_u32)do_nothing);
5412  align_target(32);
5577    
5578 +  align_target(align_jumps);
5579    popall_execute_normal=get_target();
5580    for (i=0;i<N_REGS;i++) {
5581        if (need_to_preserve[i])
5582            raw_pop_l_r(i);
5583    }
5584    raw_jmp((uae_u32)execute_normal);
5420  align_target(32);
5585  
5586 +  align_target(align_jumps);
5587    popall_cache_miss=get_target();
5588    for (i=0;i<N_REGS;i++) {
5589        if (need_to_preserve[i])
5590            raw_pop_l_r(i);
5591    }
5592    raw_jmp((uae_u32)cache_miss);
5428  align_target(32);
5593  
5594 +  align_target(align_jumps);
5595    popall_recompile_block=get_target();
5596    for (i=0;i<N_REGS;i++) {
5597        if (need_to_preserve[i])
5598            raw_pop_l_r(i);
5599    }
5600    raw_jmp((uae_u32)recompile_block);
5601 <  align_target(32);
5602 <  
5601 >
5602 >  align_target(align_jumps);
5603    popall_exec_nostats=get_target();
5604    for (i=0;i<N_REGS;i++) {
5605        if (need_to_preserve[i])
5606            raw_pop_l_r(i);
5607    }
5608    raw_jmp((uae_u32)exec_nostats);
5609 <  align_target(32);
5610 <  
5609 >
5610 >  align_target(align_jumps);
5611    popall_check_checksum=get_target();
5612    for (i=0;i<N_REGS;i++) {
5613        if (need_to_preserve[i])
5614            raw_pop_l_r(i);
5615    }
5616    raw_jmp((uae_u32)check_checksum);
5617 <  align_target(32);
5618 <  
5617 >
5618 >  align_target(align_jumps);
5619    current_compile_p=get_target();
5620   #else
5621    popall_exec_nostats=(void *)exec_nostats;
# Line 5459 | Line 5624 | static __inline__ void create_popalls(vo
5624    popall_recompile_block=(void *)recompile_block;
5625    popall_do_nothing=(void *)do_nothing;
5626    popall_check_checksum=(void *)check_checksum;
5462  pushall_call_handler=get_target();  
5627   #endif
5628  
5629    /* And now, the code to do the matching pushes and then jump
# Line 5475 | Line 5639 | static __inline__ void create_popalls(vo
5639    raw_mov_l_rm(r,(uae_u32)&regs.pc_p);
5640    raw_and_l_ri(r,TAGMASK);
5641    raw_jmp_m_indexed((uae_u32)cache_tags,r,4);
5642 +
5643 + #ifdef X86_ASSEMBLY
5644 +  align_target(align_jumps);
5645 +  m68k_compile_execute = (void (*)(void))get_target();
5646 +  for (i=N_REGS;i--;) {
5647 +          if (need_to_preserve[i])
5648 +                  raw_push_l_r(i);
5649 +  }
5650 +  align_target(align_loops);
5651 +  uae_u32 dispatch_loop = (uae_u32)get_target();
5652 +  r=REG_PC_TMP;
5653 +  raw_mov_l_rm(r,(uae_u32)&regs.pc_p);
5654 +  raw_and_l_ri(r,TAGMASK);
5655 +  raw_call_m_indexed((uae_u32)cache_tags,r,4);
5656 +  raw_cmp_l_mi((uae_u32)&regs.spcflags,0);
5657 +  raw_jcc_b_oponly(NATIVE_CC_EQ);
5658 +  emit_byte(dispatch_loop-((uae_u32)get_target()+1));
5659 +  raw_call((uae_u32)m68k_do_specialties);
5660 +  raw_test_l_rr(REG_RESULT,REG_RESULT);
5661 +  raw_jcc_b_oponly(NATIVE_CC_EQ);
5662 +  emit_byte(dispatch_loop-((uae_u32)get_target()+1));
5663 +  raw_cmp_b_mi((uae_u32)&quit_program,0);
5664 +  raw_jcc_b_oponly(NATIVE_CC_EQ);
5665 +  emit_byte(dispatch_loop-((uae_u32)get_target()+1));
5666 +  for (i=0;i<N_REGS;i++) {
5667 +          if (need_to_preserve[i])
5668 +                  raw_pop_l_r(i);
5669 +  }
5670 +  raw_ret();
5671 + #endif
5672   }
5673  
5674   static __inline__ void reset_lists(void)
# Line 5492 | Line 5686 | static void prepare_block(blockinfo* bi)
5686      int i;
5687  
5688      set_target(current_compile_p);
5689 <    align_target(32);
5689 >    align_target(align_jumps);
5690      bi->direct_pen=(cpuop_func *)get_target();
5691      raw_mov_l_rm(0,(uae_u32)&(bi->pc_p));
5692      raw_mov_l_mr((uae_u32)&regs.pc_p,0);
5693      raw_jmp((uae_u32)popall_execute_normal);
5694  
5695 <    align_target(32);
5695 >    align_target(align_jumps);
5696      bi->direct_pcc=(cpuop_func *)get_target();
5697      raw_mov_l_rm(0,(uae_u32)&(bi->pc_p));
5698      raw_mov_l_mr((uae_u32)&regs.pc_p,0);
5699      raw_jmp((uae_u32)popall_check_checksum);
5506
5507    align_target(32);
5700      current_compile_p=get_target();
5701  
5702      bi->deplist=NULL;
# Line 5557 | Line 5749 | void build_comp(void)
5749          
5750          for (i = 0; tbl[i].opcode < 65536; i++) {
5751                  int cflow = table68k[tbl[i].opcode].cflow;
5752 +                if (USE_INLINING && ((cflow & fl_const_jump) != 0))
5753 +                        cflow = fl_const_jump;
5754 +                else
5755 +                        cflow &= ~fl_const_jump;
5756                  prop[cft_map(tbl[i].opcode)].cflow = cflow;
5757  
5758                  int uses_fpu = tbl[i].specific & 32;
5759 <                if (uses_fpu && avoid_fpu)
5759 >                int uses_setzflg = tbl[i].specific & 64;
5760 >                if ((uses_fpu && avoid_fpu) || (uses_setzflg && !setzflg_uses_bsf))
5761                          compfunctbl[cft_map(tbl[i].opcode)] = NULL;
5762                  else
5763                          compfunctbl[cft_map(tbl[i].opcode)] = tbl[i].handler;
# Line 5568 | Line 5765 | void build_comp(void)
5765  
5766      for (i = 0; nftbl[i].opcode < 65536; i++) {
5767                  int uses_fpu = tbl[i].specific & 32;
5768 <                if (uses_fpu && avoid_fpu)
5768 >                int uses_setzflg = tbl[i].specific & 64;
5769 >                if ((uses_fpu && avoid_fpu) || (uses_setzflg && !setzflg_uses_bsf))
5770                          nfcompfunctbl[cft_map(nftbl[i].opcode)] = NULL;
5771                  else
5772                          nfcompfunctbl[cft_map(nftbl[i].opcode)] = nftbl[i].handler;
# Line 5850 | Line 6048 | static void compile_block(cpu_history* p
6048          int r;
6049          int was_comp=0;
6050          uae_u8 liveflags[MAXRUN+1];
6051 + #if USE_CHECKSUM_INFO
6052 +        bool trace_in_rom = isinrom((uintptr)pc_hist[0].location);
6053 +        uae_u32 max_pcp=(uae_u32)pc_hist[blocklen - 1].location;
6054 +        uae_u32 min_pcp=max_pcp;
6055 + #else
6056          uae_u32 max_pcp=(uae_u32)pc_hist[0].location;
6057          uae_u32 min_pcp=max_pcp;
6058 + #endif
6059          uae_u32 cl=cacheline(pc_hist[0].location);
6060          void* specflags=(void*)&regs.spcflags;
6061          blockinfo* bi=NULL;
# Line 5895 | Line 6099 | static void compile_block(cpu_history* p
6099          remove_deps(bi); /* We are about to create new code */
6100          bi->optlevel=optlev;
6101          bi->pc_p=(uae_u8*)pc_hist[0].location;
6102 + #if USE_CHECKSUM_INFO
6103 +        free_checksum_info_chain(bi->csi);
6104 +        bi->csi = NULL;
6105 + #endif
6106          
6107          liveflags[blocklen]=0x1f; /* All flags needed afterwards */
6108          i=blocklen;
# Line 5902 | Line 6110 | static void compile_block(cpu_history* p
6110              uae_u16* currpcp=pc_hist[i].location;
6111              uae_u32 op=DO_GET_OPCODE(currpcp);
6112  
6113 + #if USE_CHECKSUM_INFO
6114 +                trace_in_rom = trace_in_rom && isinrom((uintptr)currpcp);
6115 + #if USE_INLINING
6116 +                if (is_const_jump(op)) {
6117 +                        checksum_info *csi = alloc_checksum_info();
6118 +                        csi->start_p = (uae_u8 *)min_pcp;
6119 +                        csi->length = max_pcp - min_pcp + LONGEST_68K_INST;
6120 +                        csi->next = bi->csi;
6121 +                        bi->csi = csi;
6122 +                        max_pcp = (uae_u32)currpcp;
6123 +                }
6124 + #endif
6125 +                min_pcp = (uae_u32)currpcp;
6126 + #else
6127              if ((uae_u32)currpcp<min_pcp)
6128                  min_pcp=(uae_u32)currpcp;
6129              if ((uae_u32)currpcp>max_pcp)
6130                  max_pcp=(uae_u32)currpcp;
6131 + #endif
6132  
6133                  liveflags[i]=((liveflags[i+1]&
6134                                 (~prop[op].set_flags))|
# Line 5914 | Line 6137 | static void compile_block(cpu_history* p
6137                      liveflags[i]&= ~FLAG_Z;
6138          }
6139  
6140 + #if USE_CHECKSUM_INFO
6141 +        checksum_info *csi = alloc_checksum_info();
6142 +        csi->start_p = (uae_u8 *)min_pcp;
6143 +        csi->length = max_pcp - min_pcp + LONGEST_68K_INST;
6144 +        csi->next = bi->csi;
6145 +        bi->csi = csi;
6146 + #endif
6147 +
6148          bi->needed_flags=liveflags[0];
6149  
6150 <        align_target(32);
6150 >        align_target(align_loops);
6151          was_comp=0;
6152  
6153          bi->direct_handler=(cpuop_func *)get_target();
# Line 6003 | Line 6234 | static void compile_block(cpu_history* p
6234                      raw_mov_l_mi((uae_u32)&regs.pc_p,
6235                                   (uae_u32)pc_hist[i].location);
6236                      raw_call((uae_u32)cputbl[opcode]);
6237 + #if PROFILE_UNTRANSLATED_INSNS
6238 +                        // raw_cputbl_count[] is indexed with plain opcode (in m68k order)
6239 +                        raw_add_l_mi((uae_u32)&raw_cputbl_count[cft_map(opcode)],1);
6240 + #endif
6241                      //raw_add_l_mi((uae_u32)&oink,1); // FIXME
6242   #if USE_NORMAL_CALLING_CONVENTION
6243                      raw_inc_sp(4);
# Line 6091 | Line 6326 | static void compile_block(cpu_history* p
6326                  raw_jmp((uae_u32)popall_do_nothing);
6327                  create_jmpdep(bi,0,tba,t1);
6328  
6329 <                align_target(16);
6329 >                align_target(align_jumps);
6330                  /* not-predicted outcome */
6331                  *branchadd=(uae_u32)get_target()-((uae_u32)branchadd+4);
6332                  live=tmp; /* Ouch again */
# Line 6160 | Line 6395 | static void compile_block(cpu_history* p
6395          big_to_small_state(&live,&(bi->env));
6396   #endif
6397  
6398 + #if USE_CHECKSUM_INFO
6399 +        remove_from_list(bi);
6400 +        if (trace_in_rom) {
6401 +                // No need to checksum that block trace on cache invalidation
6402 +                free_checksum_info_chain(bi->csi);
6403 +                bi->csi = NULL;
6404 +                add_to_dormant(bi);
6405 +        }
6406 +        else {
6407 +            calc_checksum(bi,&(bi->c1),&(bi->c2));
6408 +                add_to_active(bi);
6409 +        }
6410 + #else
6411          if (next_pc_p+extra_len>=max_pcp &&
6412              next_pc_p+extra_len<max_pcp+LONGEST_68K_INST)
6413              max_pcp=next_pc_p+extra_len;  /* extra_len covers flags magic */
6414          else
6415              max_pcp+=LONGEST_68K_INST;
6416 +
6417          bi->len=max_pcp-min_pcp;
6418          bi->min_pcp=min_pcp;
6419 <                    
6419 >        
6420          remove_from_list(bi);
6421          if (isinrom(min_pcp) && isinrom(max_pcp)) {
6422              add_to_dormant(bi); /* No need to checksum it on cache flush.
# Line 6178 | Line 6427 | static void compile_block(cpu_history* p
6427              calc_checksum(bi,&(bi->c1),&(bi->c2));
6428              add_to_active(bi);
6429          }
6430 + #endif
6431          
6432          current_cache_size += get_target() - (uae_u8 *)current_compile_p;
6433          
# Line 6197 | Line 6447 | static void compile_block(cpu_history* p
6447   #endif
6448          
6449          log_dump();
6450 <        align_target(32);
6450 >        align_target(align_jumps);
6451  
6452          /* This is the non-direct handler */
6453          bi->handler=
# Line 6213 | Line 6463 | static void compile_block(cpu_history* p
6463  
6464          raw_jmp((uae_u32)bi->direct_handler);
6465  
6216        align_target(32);
6466          current_compile_p=get_target();
6218
6467          raise_in_cl_list(bi);
6468          
6469          /* We will flush soon, anyway, so let's do it now */
# Line 6241 | Line 6489 | void exec_nostats(void)
6489   {
6490          for (;;)  {
6491                  uae_u32 opcode = GET_OPCODE;
6244 #ifdef X86_ASSEMBLY__disable
6245                __asm__ __volatile__("\tpushl %%ebp\n\tcall *%%ebx\n\tpopl %%ebp" /* FIXME */
6246                                                         : : "b" (cpufunctbl[opcode]), "a" (opcode)
6247                                                         : "%edx", "%ecx", "%esi", "%edi",  "%ebp", "memory", "cc");
6248 #else
6492                  (*cpufunctbl[opcode])(opcode);
6250 #endif
6493                  if (end_block(opcode) || SPCFLAGS_TEST(SPCFLAG_ALL)) {
6494                          return; /* We will deal with the spcflags in the caller */
6495                  }
# Line 6272 | Line 6514 | void execute_normal(void)
6514   #if FLIGHT_RECORDER
6515                          m68k_record_step(m68k_getpc());
6516   #endif
6275 #ifdef X86_ASSEMBLY__disable
6276                        __asm__ __volatile__("\tpushl %%ebp\n\tcall *%%ebx\n\tpopl %%ebp" /* FIXME */
6277                                                                 : : "b" (cpufunctbl[opcode]), "a" (opcode)
6278                                                                 : "%edx", "%ecx", "%esi", "%edi", "%ebp", "memory", "cc");
6279 #else
6517                          (*cpufunctbl[opcode])(opcode);
6281 #endif
6518                          if (end_block(opcode) || SPCFLAGS_TEST(SPCFLAG_ALL) || blocklen>=MAXRUN) {
6519                                  compile_block(pc_hist, blocklen);
6520                                  return; /* We will deal with the spcflags in the caller */
# Line 6291 | Line 6527 | void execute_normal(void)
6527  
6528   typedef void (*compiled_handler)(void);
6529  
6530 + #ifdef X86_ASSEMBLY
6531 + void (*m68k_compile_execute)(void) = NULL;
6532 + #else
6533   void m68k_do_compile_execute(void)
6534   {
6535          for (;;) {
6297 #ifdef X86_ASSEMBLY
6298                __asm__ __volatile__("\tpushl %%ebp\n\tcall *%%ebx\n\tpopl %%ebp" /* FIXME */
6299                                                         : : "b" (cache_tags[cacheline(regs.pc_p)].handler)
6300                                                         : "%edx", "%ecx", "%eax", "%esi", "%edi", "%ebp", "memory", "cc");
6301 #else
6536                  ((compiled_handler)(pushall_call_handler))();
6303 #endif
6537                  /* Whenever we return from that, we should check spcflags */
6538                  if (SPCFLAGS_TEST(SPCFLAG_ALL)) {
6539                          if (m68k_do_specialties ())
# Line 6308 | Line 6541 | void m68k_do_compile_execute(void)
6541                  }
6542          }
6543   }
6544 + #endif

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines