ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/compiler/compemu_support.cpp
(Generate patch)

Comparing BasiliskII/src/uae_cpu/compiler/compemu_support.cpp (file contents):
Revision 1.4 by gbeauche, 2002-09-18T11:41:56Z vs.
Revision 1.16 by gbeauche, 2003-03-13T20:34:34Z

# Line 1 | Line 1
1 + /*
2 + *  compiler/compemu_support.cpp - Core dynamic translation engine
3 + *
4 + *  Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
5 + *
6 + *  Adaptation for Basilisk II and improvements, copyright 2000-2002
7 + *    Gwenole Beauchesne
8 + *
9 + *  Basilisk II (C) 1997-2002 Christian Bauer
10 + *  
11 + *  This program is free software; you can redistribute it and/or modify
12 + *  it under the terms of the GNU General Public License as published by
13 + *  the Free Software Foundation; either version 2 of the License, or
14 + *  (at your option) any later version.
15 + *
16 + *  This program is distributed in the hope that it will be useful,
17 + *  but WITHOUT ANY WARRANTY; without even the implied warranty of
18 + *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 + *  GNU General Public License for more details.
20 + *
21 + *  You should have received a copy of the GNU General Public License
22 + *  along with this program; if not, write to the Free Software
23 + *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
24 + */
25 +
26   #if !REAL_ADDRESSING && !DIRECT_ADDRESSING
27   #error "Only Real or Direct Addressing is supported with the JIT Compiler"
28   #endif
# Line 44 | Line 69
69   #endif
70  
71   #ifndef WIN32
72 < #define PROFILE_COMPILE_TIME    1
72 > #define PROFILE_COMPILE_TIME            1
73 > #define PROFILE_UNTRANSLATED_INSNS      1
74   #endif
75  
76   #ifdef WIN32
# Line 69 | Line 95 | static clock_t emul_start_time = 0;
95   static clock_t emul_end_time    = 0;
96   #endif
97  
98 + #if PROFILE_UNTRANSLATED_INSNS
99 + const int untranslated_top_ten = 20;
100 + static uae_u32 raw_cputbl_count[65536] = { 0, };
101 + static uae_u16 opcode_nums[65536];
102 +
103 + static int untranslated_compfn(const void *e1, const void *e2)
104 + {
105 +        return raw_cputbl_count[*(const uae_u16 *)e1] < raw_cputbl_count[*(const uae_u16 *)e2];
106 + }
107 + #endif
108 +
109   compop_func *compfunctbl[65536];
110   compop_func *nfcompfunctbl[65536];
111   cpuop_func *nfcpufunctbl[65536];
112   uae_u8* comp_pc_p;
113  
114 + // From newcpu.cpp
115 + extern bool quit_program;
116 +
117   // gb-- Extra data for Basilisk II/JIT
118   #if JIT_DEBUG
119   static bool             JITDebug                        = false;        // Enable runtime disassemblers through mon?
# Line 88 | Line 128 | static bool            lazy_flush                      = true;         // Fl
128   static bool             avoid_fpu                       = true;         // Flag: compile FPU instructions ?
129   static bool             have_cmov                       = false;        // target has CMOV instructions ?
130   static bool             have_rat_stall          = true;         // target has partial register stalls ?
131 + const bool              tune_alignment          = true;         // Tune code alignments for running CPU ?
132 + const bool              tune_nop_fillers        = true;         // Tune no-op fillers for architecture
133 + static bool             setzflg_uses_bsf        = false;        // setzflg virtual instruction can use native BSF instruction correctly?
134 + static int              align_loops                     = 32;           // Align the start of loops
135 + static int              align_jumps                     = 32;           // Align the start of jumps
136   static int              zero_fd                         = -1;
137   static int              optcount[10]            = {
138          10,             // How often a block has to be executed before it is translated
# Line 104 | Line 149 | struct op_properties {
149   };
150   static op_properties prop[65536];
151  
107 // gb-- Control Flow Predicates
108
152   static inline int end_block(uae_u32 opcode)
153   {
154          return (prop[opcode].cflow & fl_end_block);
155   }
156  
157 < static inline bool may_trap(uae_u32 opcode)
157 > static inline bool is_const_jump(uae_u32 opcode)
158   {
159 <        return (prop[opcode].cflow & fl_trap);
159 >        return (prop[opcode].cflow == fl_const_jump);
160   }
161  
162   uae_u8* start_pc_p;
# Line 491 | Line 534 | static void prepare_block(blockinfo* bi)
534     compiled. If the list of free blockinfos is empty, we allocate a new
535     pool of blockinfos and link the newly created blockinfos altogether
536     into the list of free blockinfos. Otherwise, we simply pop a structure
537 <   of the free list.
537 >   off the free list.
538  
539     Blockinfo are lazily deallocated, i.e. chained altogether in the
540     list of free blockinfos whenvever a translation cache flush (hard or
541     soft) request occurs.
542   */
543  
544 < #if USE_SEPARATE_BIA
545 < const int BLOCKINFO_POOL_SIZE = 128;
546 < struct blockinfo_pool {
547 <        blockinfo bi[BLOCKINFO_POOL_SIZE];
548 <        blockinfo_pool *next;
544 > template< class T >
545 > class LazyBlockAllocator
546 > {
547 >        enum {
548 >                kPoolSize = 1 + 4096 / sizeof(T)
549 >        };
550 >        struct Pool {
551 >                T chunk[kPoolSize];
552 >                Pool * next;
553 >        };
554 >        Pool * mPools;
555 >        T * mChunks;
556 > public:
557 >        LazyBlockAllocator() : mPools(0), mChunks(0) { }
558 >        ~LazyBlockAllocator();
559 >        T * acquire();
560 >        void release(T * const);
561   };
507 static blockinfo_pool * blockinfo_pools = 0;
508 static blockinfo *              free_blockinfos = 0;
509 #endif
562  
563 < static __inline__ blockinfo *alloc_blockinfo(void)
563 > template< class T >
564 > LazyBlockAllocator<T>::~LazyBlockAllocator()
565   {
566 < #if USE_SEPARATE_BIA
567 <        if (!free_blockinfos) {
568 <                // There is no blockinfo struct left, allocate a new
569 <                // pool and link the chunks into the free list
570 <                blockinfo_pool *bi_pool = (blockinfo_pool *)malloc(sizeof(blockinfo_pool));
571 <                for (blockinfo *bi = &bi_pool->bi[0]; bi < &bi_pool->bi[BLOCKINFO_POOL_SIZE]; bi++) {
572 <                        bi->next = free_blockinfos;
573 <                        free_blockinfos = bi;
566 >        Pool * currentPool = mPools;
567 >        while (currentPool) {
568 >                Pool * deadPool = currentPool;
569 >                currentPool = currentPool->next;
570 >                free(deadPool);
571 >        }
572 > }
573 >
574 > template< class T >
575 > T * LazyBlockAllocator<T>::acquire()
576 > {
577 >        if (!mChunks) {
578 >                // There is no chunk left, allocate a new pool and link the
579 >                // chunks into the free list
580 >                Pool * newPool = (Pool *)malloc(sizeof(Pool));
581 >                for (T * chunk = &newPool->chunk[0]; chunk < &newPool->chunk[kPoolSize]; chunk++) {
582 >                        chunk->next = mChunks;
583 >                        mChunks = chunk;
584                  }
585 <                bi_pool->next = blockinfo_pools;
586 <                blockinfo_pools = bi_pool;
585 >                newPool->next = mPools;
586 >                mPools = newPool;
587          }
588 <        blockinfo *bi = free_blockinfos;
589 <        free_blockinfos = bi->next;
590 < #else
528 <        blockinfo *bi = (blockinfo*)current_compile_p;
529 <        current_compile_p += sizeof(blockinfo);
530 < #endif
531 <        return bi;
588 >        T * chunk = mChunks;
589 >        mChunks = chunk->next;
590 >        return chunk;
591   }
592  
593 < static __inline__ void free_blockinfo(blockinfo *bi)
593 > template< class T >
594 > void LazyBlockAllocator<T>::release(T * const chunk)
595 > {
596 >        chunk->next = mChunks;
597 >        mChunks = chunk;
598 > }
599 >
600 > template< class T >
601 > class HardBlockAllocator
602   {
603 + public:
604 +        T * acquire() {
605 +                T * data = (T *)current_compile_p;
606 +                current_compile_p += sizeof(T);
607 +                return data;
608 +        }
609 +
610 +        void release(T * const chunk) {
611 +                // Deallocated on invalidation
612 +        }
613 + };
614 +
615   #if USE_SEPARATE_BIA
616 <        bi->next = free_blockinfos;
617 <        free_blockinfos = bi;
616 > static LazyBlockAllocator<blockinfo> BlockInfoAllocator;
617 > static LazyBlockAllocator<checksum_info> ChecksumInfoAllocator;
618 > #else
619 > static HardBlockAllocator<blockinfo> BlockInfoAllocator;
620 > static HardBlockAllocator<checksum_info> ChecksumInfoAllocator;
621   #endif
622 +
623 + static __inline__ checksum_info *alloc_checksum_info(void)
624 + {
625 +        checksum_info *csi = ChecksumInfoAllocator.acquire();
626 +        csi->next = NULL;
627 +        return csi;
628   }
629  
630 < static void free_blockinfo_pools(void)
630 > static __inline__ void free_checksum_info(checksum_info *csi)
631   {
632 < #if USE_SEPARATE_BIA
633 <        int blockinfo_pool_count = 0;
634 <        blockinfo_pool *curr_pool = blockinfo_pools;
635 <        while (curr_pool) {
636 <                blockinfo_pool_count++;
637 <                blockinfo_pool *dead_pool = curr_pool;
638 <                curr_pool = curr_pool->next;
639 <                free(dead_pool);
632 >        csi->next = NULL;
633 >        ChecksumInfoAllocator.release(csi);
634 > }
635 >
636 > static __inline__ void free_checksum_info_chain(checksum_info *csi)
637 > {
638 >        while (csi != NULL) {
639 >                checksum_info *csi2 = csi->next;
640 >                free_checksum_info(csi);
641 >                csi = csi2;
642          }
643 <        
644 <        uae_u32 blockinfo_pools_size = blockinfo_pool_count * BLOCKINFO_POOL_SIZE * sizeof(blockinfo);
645 <        write_log("### Blockinfo allocation statistics\n");
646 <        write_log("Number of blockinfo pools  : %d\n", blockinfo_pool_count);
647 <        write_log("Total number of blockinfos : %d (%d KB)\n",
648 <                          blockinfo_pool_count * BLOCKINFO_POOL_SIZE,
649 <                          blockinfo_pools_size / 1024);
560 <        write_log("\n");
643 > }
644 >
645 > static __inline__ blockinfo *alloc_blockinfo(void)
646 > {
647 >        blockinfo *bi = BlockInfoAllocator.acquire();
648 > #if USE_CHECKSUM_INFO
649 >        bi->csi = NULL;
650   #endif
651 +        return bi;
652 + }
653 +
654 + static __inline__ void free_blockinfo(blockinfo *bi)
655 + {
656 + #if USE_CHECKSUM_INFO
657 +        free_checksum_info_chain(bi->csi);
658 +        bi->csi = NULL;
659 + #endif
660 +        BlockInfoAllocator.release(bi);
661   }
662  
663   static __inline__ void alloc_blockinfos(void)
# Line 601 | Line 700 | static __inline__ void emit_long(uae_u32
700      target+=4;
701   }
702  
703 + static __inline__ void emit_block(const uae_u8 *block, uae_u32 blocklen)
704 + {
705 +        memcpy((uae_u8 *)target,block,blocklen);
706 +        target+=blocklen;
707 + }
708 +
709   static __inline__ uae_u32 reverse32(uae_u32 v)
710   {
711   #if 1
# Line 2566 | Line 2671 | MIDFUNC(3,cmov_l_rm,(RW4 d, IMM s, IMM c
2671   }
2672   MENDFUNC(3,cmov_l_rm,(RW4 d, IMM s, IMM cc))
2673  
2674 < MIDFUNC(2,bsf_l_rr,(W4 d, R4 s))
2674 > MIDFUNC(1,setzflg_l,(RW4 r))
2675   {
2676 <    CLOBBER_BSF;
2677 <    s=readreg(s,4);
2678 <    d=writereg(d,4);
2679 <    raw_bsf_l_rr(d,s);
2680 <    unlock2(s);
2681 <    unlock2(d);
2676 >        if (setzflg_uses_bsf) {
2677 >                CLOBBER_BSF;
2678 >                r=rmw(r,4,4);
2679 >                raw_bsf_l_rr(r,r);
2680 >                unlock2(r);
2681 >        }
2682 >        else {
2683 >                Dif (live.flags_in_flags!=VALID) {
2684 >                        write_log("setzflg() wanted flags in native flags, they are %d\n",
2685 >                                          live.flags_in_flags);
2686 >                        abort();
2687 >                }
2688 >                r=readreg(r,4);
2689 >                int f=writereg(S11,4);
2690 >                int t=writereg(S12,4);
2691 >                raw_flags_set_zero(f,r,t);
2692 >                unlock2(f);
2693 >                unlock2(r);
2694 >                unlock2(t);
2695 >        }
2696   }
2697 < MENDFUNC(2,bsf_l_rr,(W4 d, R4 s))
2697 > MENDFUNC(1,setzflg_l,(RW4 r))
2698  
2699   MIDFUNC(2,imul_32_32,(RW4 d, R4 s))
2700   {
# Line 4560 | Line 4679 | void compiler_init(void)
4679          
4680          // Initialize target CPU (check for features, e.g. CMOV, rat stalls)
4681          raw_init_cpu();
4682 +        setzflg_uses_bsf = target_check_bsf();
4683          write_log("<JIT compiler> : target processor has CMOV instructions : %s\n", have_cmov ? "yes" : "no");
4684          write_log("<JIT compiler> : target processor can suffer from partial register stalls : %s\n", have_rat_stall ? "yes" : "no");
4685 +        write_log("<JIT compiler> : alignment for loops, jumps are %d, %d\n", align_loops, align_jumps);
4686          
4687          // Translation cache flush mechanism
4688          lazy_flush = PrefsFindBool("jitlazyflush");
# Line 4572 | Line 4693 | void compiler_init(void)
4693          write_log("<JIT compiler> : register aliasing : %s\n", str_on_off(1));
4694          write_log("<JIT compiler> : FP register aliasing : %s\n", str_on_off(USE_F_ALIAS));
4695          write_log("<JIT compiler> : lazy constant offsetting : %s\n", str_on_off(USE_OFFSET));
4696 +        write_log("<JIT compiler> : block inlining : %s\n", str_on_off(USE_INLINING));
4697          write_log("<JIT compiler> : separate blockinfo allocation : %s\n", str_on_off(USE_SEPARATE_BIA));
4698          
4699          // Build compiler tables
# Line 4579 | Line 4701 | void compiler_init(void)
4701          
4702          initialized = true;
4703          
4704 + #if PROFILE_UNTRANSLATED_INSNS
4705 +        write_log("<JIT compiler> : gather statistics on untranslated insns count\n");
4706 + #endif
4707 +
4708   #if PROFILE_COMPILE_TIME
4709          write_log("<JIT compiler> : gather statistics on translation time\n");
4710          emul_start_time = clock();
# Line 4597 | Line 4723 | void compiler_exit(void)
4723                  compiled_code = 0;
4724          }
4725          
4600        // Deallocate blockinfo pools
4601        free_blockinfo_pools();
4602        
4726   #ifndef WIN32
4727          // Close /dev/zero
4728          if (zero_fd > 0)
# Line 4615 | Line 4738 | void compiler_exit(void)
4738                  100.0*double(compile_time)/double(emul_time));
4739          write_log("\n");
4740   #endif
4741 +
4742 + #if PROFILE_UNTRANSLATED_INSNS
4743 +        uae_u64 untranslated_count = 0;
4744 +        for (int i = 0; i < 65536; i++) {
4745 +                opcode_nums[i] = i;
4746 +                untranslated_count += raw_cputbl_count[i];
4747 +        }
4748 +        write_log("Sorting out untranslated instructions count...\n");
4749 +        qsort(opcode_nums, 65536, sizeof(uae_u16), untranslated_compfn);
4750 +        write_log("\nRank  Opc      Count Name\n");
4751 +        for (int i = 0; i < untranslated_top_ten; i++) {
4752 +                uae_u32 count = raw_cputbl_count[opcode_nums[i]];
4753 +                struct instr *dp;
4754 +                struct mnemolookup *lookup;
4755 +                if (!count)
4756 +                        break;
4757 +                dp = table68k + opcode_nums[i];
4758 +                for (lookup = lookuptab; lookup->mnemo != dp->mnemo; lookup++)
4759 +                        ;
4760 +                write_log("%03d: %04x %10lu %s\n", i, opcode_nums[i], count, lookup->name);
4761 +        }
4762 + #endif
4763   }
4764  
4765   bool compiler_use_jit(void)
# Line 4843 | Line 4988 | void freescratch(void)
4988  
4989   static void align_target(uae_u32 a)
4990   {
4991 <    /* Fill with NOPs --- makes debugging with gdb easier */
4992 <    while ((uae_u32)target&(a-1))
4993 <        *target++=0x90;
4991 >        if (!a)
4992 >                return;
4993 >
4994 >        if (tune_nop_fillers)
4995 >                raw_emit_nop_filler(a - (((uae_u32)target) & (a - 1)));
4996 >        else {
4997 >                /* Fill with NOPs --- makes debugging with gdb easier */
4998 >                while ((uae_u32)target&(a-1))
4999 >                        *target++=0x90;
5000 >        }
5001   }
5002  
5003   static __inline__ int isinrom(uintptr addr)
# Line 5170 | Line 5322 | void alloc_cache(void)
5322  
5323  
5324  
5325 < extern cpuop_rettype op_illg_1 (uae_u32 opcode) REGPARAM;
5325 > extern void op_illg_1 (uae_u32 opcode) REGPARAM;
5326  
5327   static void calc_checksum(blockinfo* bi, uae_u32* c1, uae_u32* c2)
5328   {
5329 <    uae_u32 k1=0;
5330 <    uae_u32 k2=0;
5179 <    uae_s32 len=bi->len;
5180 <    uae_u32 tmp=bi->min_pcp;
5181 <    uae_u32* pos;
5329 >    uae_u32 k1 = 0;
5330 >    uae_u32 k2 = 0;
5331  
5332 <    len+=(tmp&3);
5333 <    tmp&=(~3);
5334 <    pos=(uae_u32*)tmp;
5332 > #if USE_CHECKSUM_INFO
5333 >    checksum_info *csi = bi->csi;
5334 >        Dif(!csi) abort();
5335 >        while (csi) {
5336 >                uae_s32 len = csi->length;
5337 >                uae_u32 tmp = (uae_u32)csi->start_p;
5338 > #else
5339 >                uae_s32 len = bi->len;
5340 >                uae_u32 tmp = (uae_u32)bi->min_pcp;
5341 > #endif
5342 >                uae_u32*pos;
5343  
5344 <    if (len<0 || len>MAX_CHECKSUM_LEN) {
5345 <        *c1=0;
5346 <        *c2=0;
5347 <    }
5348 <    else {
5349 <        while (len>0) {
5350 <            k1+=*pos;
5351 <            k2^=*pos;
5352 <            pos++;
5353 <            len-=4;
5344 >                len += (tmp & 3);
5345 >                tmp &= ~3;
5346 >                pos = (uae_u32 *)tmp;
5347 >
5348 >                if (len >= 0 && len <= MAX_CHECKSUM_LEN) {
5349 >                        while (len > 0) {
5350 >                                k1 += *pos;
5351 >                                k2 ^= *pos;
5352 >                                pos++;
5353 >                                len -= 4;
5354 >                        }
5355 >                }
5356 >
5357 > #if USE_CHECKSUM_INFO
5358 >                csi = csi->next;
5359          }
5360 <        *c1=k1;
5361 <        *c2=k2;
5362 <    }
5360 > #endif
5361 >
5362 >        *c1 = k1;
5363 >        *c2 = k2;
5364   }
5365  
5366 < static void show_checksum(blockinfo* bi)
5366 > #if 0
5367 > static void show_checksum(CSI_TYPE* csi)
5368   {
5369      uae_u32 k1=0;
5370      uae_u32 k2=0;
5371 <    uae_s32 len=bi->len;
5372 <    uae_u32 tmp=(uae_u32)bi->pc_p;
5371 >    uae_s32 len=CSI_LENGTH(csi);
5372 >    uae_u32 tmp=(uae_u32)CSI_START_P(csi);
5373      uae_u32* pos;
5374  
5375      len+=(tmp&3);
# Line 5224 | Line 5388 | static void show_checksum(blockinfo* bi)
5388          write_log(" bla\n");
5389      }
5390   }
5391 + #endif
5392  
5393  
5394   int check_for_cache_miss(void)
# Line 5277 | Line 5442 | static int called_check_checksum(blockin
5442   static inline int block_check_checksum(blockinfo* bi)
5443   {
5444      uae_u32     c1,c2;
5445 <    int         isgood;
5445 >    bool        isgood;
5446      
5447      if (bi->status!=BI_NEED_CHECK)
5448          return 1;  /* This block is in a checked state */
5449      
5450      checksum_count++;
5451 +
5452      if (bi->c1 || bi->c2)
5453          calc_checksum(bi,&c1,&c2);
5454      else {
5455          c1=c2=1;  /* Make sure it doesn't match */
5456 <    }
5456 >        }
5457      
5458      isgood=(c1==bi->c1 && c2==bi->c2);
5459 +
5460      if (isgood) {
5461          /* This block is still OK. So we reactivate. Of course, that
5462             means we have to move it into the needs-to-be-flushed list */
# Line 5407 | Line 5574 | static __inline__ void create_popalls(vo
5574       registers before jumping back to the various get-out routines.
5575       This generates the code for it.
5576    */
5577 <  popall_do_nothing=current_compile_p;
5577 >  align_target(align_jumps);
5578 >  popall_do_nothing=get_target();
5579    for (i=0;i<N_REGS;i++) {
5580        if (need_to_preserve[i])
5581            raw_pop_l_r(i);
5582    }
5583    raw_jmp((uae_u32)do_nothing);
5416  align_target(32);
5584    
5585 +  align_target(align_jumps);
5586    popall_execute_normal=get_target();
5587    for (i=0;i<N_REGS;i++) {
5588        if (need_to_preserve[i])
5589            raw_pop_l_r(i);
5590    }
5591    raw_jmp((uae_u32)execute_normal);
5424  align_target(32);
5592  
5593 +  align_target(align_jumps);
5594    popall_cache_miss=get_target();
5595    for (i=0;i<N_REGS;i++) {
5596        if (need_to_preserve[i])
5597            raw_pop_l_r(i);
5598    }
5599    raw_jmp((uae_u32)cache_miss);
5432  align_target(32);
5600  
5601 +  align_target(align_jumps);
5602    popall_recompile_block=get_target();
5603    for (i=0;i<N_REGS;i++) {
5604        if (need_to_preserve[i])
5605            raw_pop_l_r(i);
5606    }
5607    raw_jmp((uae_u32)recompile_block);
5608 <  align_target(32);
5609 <  
5608 >
5609 >  align_target(align_jumps);
5610    popall_exec_nostats=get_target();
5611    for (i=0;i<N_REGS;i++) {
5612        if (need_to_preserve[i])
5613            raw_pop_l_r(i);
5614    }
5615    raw_jmp((uae_u32)exec_nostats);
5616 <  align_target(32);
5617 <  
5616 >
5617 >  align_target(align_jumps);
5618    popall_check_checksum=get_target();
5619    for (i=0;i<N_REGS;i++) {
5620        if (need_to_preserve[i])
5621            raw_pop_l_r(i);
5622    }
5623    raw_jmp((uae_u32)check_checksum);
5624 <  align_target(32);
5625 <  
5624 >
5625 >  align_target(align_jumps);
5626    current_compile_p=get_target();
5627   #else
5628    popall_exec_nostats=(void *)exec_nostats;
# Line 5463 | Line 5631 | static __inline__ void create_popalls(vo
5631    popall_recompile_block=(void *)recompile_block;
5632    popall_do_nothing=(void *)do_nothing;
5633    popall_check_checksum=(void *)check_checksum;
5466  pushall_call_handler=get_target();  
5634   #endif
5635  
5636    /* And now, the code to do the matching pushes and then jump
# Line 5479 | Line 5646 | static __inline__ void create_popalls(vo
5646    raw_mov_l_rm(r,(uae_u32)&regs.pc_p);
5647    raw_and_l_ri(r,TAGMASK);
5648    raw_jmp_m_indexed((uae_u32)cache_tags,r,4);
5649 +
5650 + #ifdef X86_ASSEMBLY
5651 +  align_target(align_jumps);
5652 +  m68k_compile_execute = (void (*)(void))get_target();
5653 +  for (i=N_REGS;i--;) {
5654 +          if (need_to_preserve[i])
5655 +                  raw_push_l_r(i);
5656 +  }
5657 +  align_target(align_loops);
5658 +  uae_u32 dispatch_loop = (uae_u32)get_target();
5659 +  r=REG_PC_TMP;
5660 +  raw_mov_l_rm(r,(uae_u32)&regs.pc_p);
5661 +  raw_and_l_ri(r,TAGMASK);
5662 +  raw_call_m_indexed((uae_u32)cache_tags,r,4);
5663 +  raw_cmp_l_mi((uae_u32)&regs.spcflags,0);
5664 +  raw_jcc_b_oponly(NATIVE_CC_EQ);
5665 +  emit_byte(dispatch_loop-((uae_u32)get_target()+1));
5666 +  raw_call((uae_u32)m68k_do_specialties);
5667 +  raw_test_l_rr(REG_RESULT,REG_RESULT);
5668 +  raw_jcc_b_oponly(NATIVE_CC_EQ);
5669 +  emit_byte(dispatch_loop-((uae_u32)get_target()+1));
5670 +  raw_cmp_b_mi((uae_u32)&quit_program,0);
5671 +  raw_jcc_b_oponly(NATIVE_CC_EQ);
5672 +  emit_byte(dispatch_loop-((uae_u32)get_target()+1));
5673 +  for (i=0;i<N_REGS;i++) {
5674 +          if (need_to_preserve[i])
5675 +                  raw_pop_l_r(i);
5676 +  }
5677 +  raw_ret();
5678 + #endif
5679   }
5680  
5681   static __inline__ void reset_lists(void)
# Line 5496 | Line 5693 | static void prepare_block(blockinfo* bi)
5693      int i;
5694  
5695      set_target(current_compile_p);
5696 <    align_target(32);
5696 >    align_target(align_jumps);
5697      bi->direct_pen=(cpuop_func *)get_target();
5698      raw_mov_l_rm(0,(uae_u32)&(bi->pc_p));
5699      raw_mov_l_mr((uae_u32)&regs.pc_p,0);
5700      raw_jmp((uae_u32)popall_execute_normal);
5701  
5702 <    align_target(32);
5702 >    align_target(align_jumps);
5703      bi->direct_pcc=(cpuop_func *)get_target();
5704      raw_mov_l_rm(0,(uae_u32)&(bi->pc_p));
5705      raw_mov_l_mr((uae_u32)&regs.pc_p,0);
5706      raw_jmp((uae_u32)popall_check_checksum);
5510
5511    align_target(32);
5707      current_compile_p=get_target();
5708  
5709      bi->deplist=NULL;
# Line 5561 | Line 5756 | void build_comp(void)
5756          
5757          for (i = 0; tbl[i].opcode < 65536; i++) {
5758                  int cflow = table68k[tbl[i].opcode].cflow;
5759 +                if (USE_INLINING && ((cflow & fl_const_jump) != 0))
5760 +                        cflow = fl_const_jump;
5761 +                else
5762 +                        cflow &= ~fl_const_jump;
5763                  prop[cft_map(tbl[i].opcode)].cflow = cflow;
5764  
5765                  int uses_fpu = tbl[i].specific & 32;
# Line 5854 | Line 6053 | static void compile_block(cpu_history* p
6053          int r;
6054          int was_comp=0;
6055          uae_u8 liveflags[MAXRUN+1];
6056 + #if USE_CHECKSUM_INFO
6057 +        bool trace_in_rom = isinrom((uintptr)pc_hist[0].location);
6058 +        uae_u32 max_pcp=(uae_u32)pc_hist[blocklen - 1].location;
6059 +        uae_u32 min_pcp=max_pcp;
6060 + #else
6061          uae_u32 max_pcp=(uae_u32)pc_hist[0].location;
6062          uae_u32 min_pcp=max_pcp;
6063 + #endif
6064          uae_u32 cl=cacheline(pc_hist[0].location);
6065          void* specflags=(void*)&regs.spcflags;
6066          blockinfo* bi=NULL;
# Line 5899 | Line 6104 | static void compile_block(cpu_history* p
6104          remove_deps(bi); /* We are about to create new code */
6105          bi->optlevel=optlev;
6106          bi->pc_p=(uae_u8*)pc_hist[0].location;
6107 + #if USE_CHECKSUM_INFO
6108 +        free_checksum_info_chain(bi->csi);
6109 +        bi->csi = NULL;
6110 + #endif
6111          
6112          liveflags[blocklen]=0x1f; /* All flags needed afterwards */
6113          i=blocklen;
# Line 5906 | Line 6115 | static void compile_block(cpu_history* p
6115              uae_u16* currpcp=pc_hist[i].location;
6116              uae_u32 op=DO_GET_OPCODE(currpcp);
6117  
6118 + #if USE_CHECKSUM_INFO
6119 +                trace_in_rom = trace_in_rom && isinrom((uintptr)currpcp);
6120 + #if USE_INLINING
6121 +                if (is_const_jump(op)) {
6122 +                        checksum_info *csi = alloc_checksum_info();
6123 +                        csi->start_p = (uae_u8 *)min_pcp;
6124 +                        csi->length = max_pcp - min_pcp + LONGEST_68K_INST;
6125 +                        csi->next = bi->csi;
6126 +                        bi->csi = csi;
6127 +                        max_pcp = (uae_u32)currpcp;
6128 +                }
6129 + #endif
6130 +                min_pcp = (uae_u32)currpcp;
6131 + #else
6132              if ((uae_u32)currpcp<min_pcp)
6133                  min_pcp=(uae_u32)currpcp;
6134              if ((uae_u32)currpcp>max_pcp)
6135                  max_pcp=(uae_u32)currpcp;
6136 + #endif
6137  
6138                  liveflags[i]=((liveflags[i+1]&
6139                                 (~prop[op].set_flags))|
# Line 5918 | Line 6142 | static void compile_block(cpu_history* p
6142                      liveflags[i]&= ~FLAG_Z;
6143          }
6144  
6145 + #if USE_CHECKSUM_INFO
6146 +        checksum_info *csi = alloc_checksum_info();
6147 +        csi->start_p = (uae_u8 *)min_pcp;
6148 +        csi->length = max_pcp - min_pcp + LONGEST_68K_INST;
6149 +        csi->next = bi->csi;
6150 +        bi->csi = csi;
6151 + #endif
6152 +
6153          bi->needed_flags=liveflags[0];
6154  
6155 <        align_target(32);
6155 >        align_target(align_loops);
6156          was_comp=0;
6157  
6158          bi->direct_handler=(cpuop_func *)get_target();
# Line 6007 | Line 6239 | static void compile_block(cpu_history* p
6239                      raw_mov_l_mi((uae_u32)&regs.pc_p,
6240                                   (uae_u32)pc_hist[i].location);
6241                      raw_call((uae_u32)cputbl[opcode]);
6242 + #if PROFILE_UNTRANSLATED_INSNS
6243 +                        // raw_cputbl_count[] is indexed with plain opcode (in m68k order)
6244 +                        raw_add_l_mi((uae_u32)&raw_cputbl_count[cft_map(opcode)],1);
6245 + #endif
6246                      //raw_add_l_mi((uae_u32)&oink,1); // FIXME
6247   #if USE_NORMAL_CALLING_CONVENTION
6248                      raw_inc_sp(4);
# Line 6095 | Line 6331 | static void compile_block(cpu_history* p
6331                  raw_jmp((uae_u32)popall_do_nothing);
6332                  create_jmpdep(bi,0,tba,t1);
6333  
6334 <                align_target(16);
6334 >                align_target(align_jumps);
6335                  /* not-predicted outcome */
6336                  *branchadd=(uae_u32)get_target()-((uae_u32)branchadd+4);
6337                  live=tmp; /* Ouch again */
# Line 6164 | Line 6400 | static void compile_block(cpu_history* p
6400          big_to_small_state(&live,&(bi->env));
6401   #endif
6402  
6403 + #if USE_CHECKSUM_INFO
6404 +        remove_from_list(bi);
6405 +        if (trace_in_rom) {
6406 +                // No need to checksum that block trace on cache invalidation
6407 +                free_checksum_info_chain(bi->csi);
6408 +                bi->csi = NULL;
6409 +                add_to_dormant(bi);
6410 +        }
6411 +        else {
6412 +            calc_checksum(bi,&(bi->c1),&(bi->c2));
6413 +                add_to_active(bi);
6414 +        }
6415 + #else
6416          if (next_pc_p+extra_len>=max_pcp &&
6417              next_pc_p+extra_len<max_pcp+LONGEST_68K_INST)
6418              max_pcp=next_pc_p+extra_len;  /* extra_len covers flags magic */
6419          else
6420              max_pcp+=LONGEST_68K_INST;
6421 +
6422          bi->len=max_pcp-min_pcp;
6423          bi->min_pcp=min_pcp;
6424 <                    
6424 >        
6425          remove_from_list(bi);
6426          if (isinrom(min_pcp) && isinrom(max_pcp)) {
6427              add_to_dormant(bi); /* No need to checksum it on cache flush.
# Line 6182 | Line 6432 | static void compile_block(cpu_history* p
6432              calc_checksum(bi,&(bi->c1),&(bi->c2));
6433              add_to_active(bi);
6434          }
6435 + #endif
6436          
6437          current_cache_size += get_target() - (uae_u8 *)current_compile_p;
6438          
# Line 6201 | Line 6452 | static void compile_block(cpu_history* p
6452   #endif
6453          
6454          log_dump();
6455 <        align_target(32);
6455 >        align_target(align_jumps);
6456  
6457          /* This is the non-direct handler */
6458          bi->handler=
# Line 6217 | Line 6468 | static void compile_block(cpu_history* p
6468  
6469          raw_jmp((uae_u32)bi->direct_handler);
6470  
6220        align_target(32);
6471          current_compile_p=get_target();
6222
6472          raise_in_cl_list(bi);
6473          
6474          /* We will flush soon, anyway, so let's do it now */
# Line 6245 | Line 6494 | void exec_nostats(void)
6494   {
6495          for (;;)  {
6496                  uae_u32 opcode = GET_OPCODE;
6248 #ifdef X86_ASSEMBLY__disable
6249                __asm__ __volatile__("\tpushl %%ebp\n\tcall *%%ebx\n\tpopl %%ebp" /* FIXME */
6250                                                         : : "b" (cpufunctbl[opcode]), "a" (opcode)
6251                                                         : "%edx", "%ecx", "%esi", "%edi",  "%ebp", "memory", "cc");
6252 #else
6497                  (*cpufunctbl[opcode])(opcode);
6254 #endif
6498                  if (end_block(opcode) || SPCFLAGS_TEST(SPCFLAG_ALL)) {
6499                          return; /* We will deal with the spcflags in the caller */
6500                  }
# Line 6276 | Line 6519 | void execute_normal(void)
6519   #if FLIGHT_RECORDER
6520                          m68k_record_step(m68k_getpc());
6521   #endif
6279 #ifdef X86_ASSEMBLY__disable
6280                        __asm__ __volatile__("\tpushl %%ebp\n\tcall *%%ebx\n\tpopl %%ebp" /* FIXME */
6281                                                                 : : "b" (cpufunctbl[opcode]), "a" (opcode)
6282                                                                 : "%edx", "%ecx", "%esi", "%edi", "%ebp", "memory", "cc");
6283 #else
6522                          (*cpufunctbl[opcode])(opcode);
6285 #endif
6523                          if (end_block(opcode) || SPCFLAGS_TEST(SPCFLAG_ALL) || blocklen>=MAXRUN) {
6524                                  compile_block(pc_hist, blocklen);
6525                                  return; /* We will deal with the spcflags in the caller */
# Line 6295 | Line 6532 | void execute_normal(void)
6532  
6533   typedef void (*compiled_handler)(void);
6534  
6535 + #ifdef X86_ASSEMBLY
6536 + void (*m68k_compile_execute)(void) = NULL;
6537 + #else
6538   void m68k_do_compile_execute(void)
6539   {
6540          for (;;) {
6301 #ifdef X86_ASSEMBLY
6302                __asm__ __volatile__("\tpushl %%ebp\n\tcall *%%ebx\n\tpopl %%ebp" /* FIXME */
6303                                                         : : "b" (cache_tags[cacheline(regs.pc_p)].handler)
6304                                                         : "%edx", "%ecx", "%eax", "%esi", "%edi", "%ebp", "memory", "cc");
6305 #else
6541                  ((compiled_handler)(pushall_call_handler))();
6307 #endif
6542                  /* Whenever we return from that, we should check spcflags */
6543                  if (SPCFLAGS_TEST(SPCFLAG_ALL)) {
6544                          if (m68k_do_specialties ())
# Line 6312 | Line 6546 | void m68k_do_compile_execute(void)
6546                  }
6547          }
6548   }
6549 + #endif

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines