ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/compiler/compemu_support.cpp
(Generate patch)

Comparing BasiliskII/src/uae_cpu/compiler/compemu_support.cpp (file contents):
Revision 1.3 by gbeauche, 2002-09-18T09:55:37Z vs.
Revision 1.7 by gbeauche, 2002-10-01T16:22:36Z

# Line 2 | Line 2
2   #error "Only Real or Direct Addressing is supported with the JIT Compiler"
3   #endif
4  
5 + #if X86_ASSEMBLY && !SAHF_SETO_PROFITABLE
6 + #error "Only [LS]AHF scheme to [gs]et flags is supported with the JIT Compiler"
7 + #endif
8 +
9   #define USE_MATCH 0
10  
11   /* kludge for Brian, so he can compile under MSVC++ */
# Line 70 | Line 74 | compop_func *nfcompfunctbl[65536];
74   cpuop_func *nfcpufunctbl[65536];
75   uae_u8* comp_pc_p;
76  
77 + // From newcpu.cpp
78 + extern bool quit_program;
79 +
80   // gb-- Extra data for Basilisk II/JIT
81   #if JIT_DEBUG
82   static bool             JITDebug                        = false;        // Enable runtime disassemblers through mon?
# Line 84 | Line 91 | static bool            lazy_flush                      = true;         // Fl
91   static bool             avoid_fpu                       = true;         // Flag: compile FPU instructions ?
92   static bool             have_cmov                       = false;        // target has CMOV instructions ?
93   static bool             have_rat_stall          = true;         // target has partial register stalls ?
94 + static bool             tune_alignment          = false;        // Tune code alignments for running CPU ?
95 + static int              align_loops                     = 32;           // Align the start of loops
96 + static int              align_jumps                     = 32;           // Align the start of jumps
97   static int              zero_fd                         = -1;
98   static int              optcount[10]            = {
99          10,             // How often a block has to be executed before it is translated
# Line 100 | Line 110 | struct op_properties {
110   };
111   static op_properties prop[65536];
112  
103 // gb-- Control Flow Predicates
104
113   static inline int end_block(uae_u32 opcode)
114   {
115          return (prop[opcode].cflow & fl_end_block);
116   }
117  
110 static inline bool may_trap(uae_u32 opcode)
111 {
112        return (prop[opcode].cflow & fl_trap);
113 }
114
118   uae_u8* start_pc_p;
119   uae_u32 start_pc;
120   uae_u32 current_block_pc_p;
# Line 487 | Line 490 | static void prepare_block(blockinfo* bi)
490     compiled. If the list of free blockinfos is empty, we allocate a new
491     pool of blockinfos and link the newly created blockinfos altogether
492     into the list of free blockinfos. Otherwise, we simply pop a structure
493 <   of the free list.
493 >   off the free list.
494  
495     Blockinfo are lazily deallocated, i.e. chained altogether in the
496     list of free blockinfos whenvever a translation cache flush (hard or
497     soft) request occurs.
498   */
499  
500 < #if USE_SEPARATE_BIA
501 < const int BLOCKINFO_POOL_SIZE = 128;
502 < struct blockinfo_pool {
503 <        blockinfo bi[BLOCKINFO_POOL_SIZE];
504 <        blockinfo_pool *next;
500 > template< class T >
501 > class LazyBlockAllocator
502 > {
503 >        enum {
504 >                kPoolSize = 1 + 4096 / sizeof(T)
505 >        };
506 >        struct Pool {
507 >                T chunk[kPoolSize];
508 >                Pool * next;
509 >        };
510 >        Pool * mPools;
511 >        T * mChunks;
512 > public:
513 >        LazyBlockAllocator() : mPools(0), mChunks(0) { }
514 >        ~LazyBlockAllocator();
515 >        T * acquire();
516 >        void release(T * const);
517   };
503 static blockinfo_pool * blockinfo_pools = 0;
504 static blockinfo *              free_blockinfos = 0;
505 #endif
518  
519 < static __inline__ blockinfo *alloc_blockinfo(void)
519 > template< class T >
520 > LazyBlockAllocator<T>::~LazyBlockAllocator()
521   {
522 < #if USE_SEPARATE_BIA
523 <        if (!free_blockinfos) {
524 <                // There is no blockinfo struct left, allocate a new
525 <                // pool and link the chunks into the free list
526 <                blockinfo_pool *bi_pool = (blockinfo_pool *)malloc(sizeof(blockinfo_pool));
527 <                for (blockinfo *bi = &bi_pool->bi[0]; bi < &bi_pool->bi[BLOCKINFO_POOL_SIZE]; bi++) {
528 <                        bi->next = free_blockinfos;
529 <                        free_blockinfos = bi;
522 >        Pool * currentPool = mPools;
523 >        while (currentPool) {
524 >                Pool * deadPool = currentPool;
525 >                currentPool = currentPool->next;
526 >                free(deadPool);
527 >        }
528 > }
529 >
530 > template< class T >
531 > T * LazyBlockAllocator<T>::acquire()
532 > {
533 >        if (!mChunks) {
534 >                // There is no chunk left, allocate a new pool and link the
535 >                // chunks into the free list
536 >                Pool * newPool = (Pool *)malloc(sizeof(Pool));
537 >                for (T * chunk = &newPool->chunk[0]; chunk < &newPool->chunk[kPoolSize]; chunk++) {
538 >                        chunk->next = mChunks;
539 >                        mChunks = chunk;
540                  }
541 <                bi_pool->next = blockinfo_pools;
542 <                blockinfo_pools = bi_pool;
541 >                newPool->next = mPools;
542 >                mPools = newPool;
543          }
544 <        blockinfo *bi = free_blockinfos;
545 <        free_blockinfos = bi->next;
546 < #else
524 <        blockinfo *bi = (blockinfo*)current_compile_p;
525 <        current_compile_p += sizeof(blockinfo);
526 < #endif
527 <        return bi;
544 >        T * chunk = mChunks;
545 >        mChunks = chunk->next;
546 >        return chunk;
547   }
548  
549 < static __inline__ void free_blockinfo(blockinfo *bi)
549 > template< class T >
550 > void LazyBlockAllocator<T>::release(T * const chunk)
551   {
552 +        chunk->next = mChunks;
553 +        mChunks = chunk;
554 + }
555 +
556 + template< class T >
557 + class HardBlockAllocator
558 + {
559 + public:
560 +        T * acquire() {
561 +                T * data = (T *)current_compile_p;
562 +                current_compile_p += sizeof(T);
563 +                return data;
564 +        }
565 +
566 +        void release(T * const chunk) {
567 +                // Deallocated on invalidation
568 +        }
569 + };
570 +
571   #if USE_SEPARATE_BIA
572 <        bi->next = free_blockinfos;
573 <        free_blockinfos = bi;
572 > static LazyBlockAllocator<blockinfo> BlockInfoAllocator;
573 > static LazyBlockAllocator<checksum_info> ChecksumInfoAllocator;
574 > #else
575 > static HardBlockAllocator<blockinfo> BlockInfoAllocator;
576 > static HardBlockAllocator<checksum_info> ChecksumInfoAllocator;
577 > #endif
578 >
579 >
580 > static __inline__ blockinfo *alloc_blockinfo(void)
581 > {
582 >        blockinfo *bi = BlockInfoAllocator.acquire();
583 > #if USE_CHECKSUM_INFO
584 >        bi->csi = NULL;
585   #endif
586 +        return bi;
587   }
588  
589 < static void free_blockinfo_pools(void)
589 > static __inline__ void free_blockinfo(blockinfo *bi)
590   {
591 < #if USE_SEPARATE_BIA
592 <        int blockinfo_pool_count = 0;
593 <        blockinfo_pool *curr_pool = blockinfo_pools;
594 <        while (curr_pool) {
595 <                blockinfo_pool_count++;
596 <                blockinfo_pool *dead_pool = curr_pool;
546 <                curr_pool = curr_pool->next;
547 <                free(dead_pool);
591 > #if USE_CHECKSUM_INFO
592 >        checksum_info *csi = bi->csi;
593 >        while (csi != NULL) {
594 >                checksum_info *csi2 = csi->next;
595 >                ChecksumInfoAllocator.release(csi);
596 >                csi = csi2;
597          }
549        
550        uae_u32 blockinfo_pools_size = blockinfo_pool_count * BLOCKINFO_POOL_SIZE * sizeof(blockinfo);
551        write_log("### Blockinfo allocation statistics\n");
552        write_log("Number of blockinfo pools  : %d\n", blockinfo_pool_count);
553        write_log("Total number of blockinfos : %d (%d KB)\n",
554                          blockinfo_pool_count * BLOCKINFO_POOL_SIZE,
555                          blockinfo_pools_size / 1024);
556        write_log("\n");
598   #endif
599 +        BlockInfoAllocator.release(bi);
600   }
601  
602   static __inline__ void alloc_blockinfos(void)
# Line 4558 | Line 4600 | void compiler_init(void)
4600          raw_init_cpu();
4601          write_log("<JIT compiler> : target processor has CMOV instructions : %s\n", have_cmov ? "yes" : "no");
4602          write_log("<JIT compiler> : target processor can suffer from partial register stalls : %s\n", have_rat_stall ? "yes" : "no");
4603 +        write_log("<JIT compiler> : alignment for loops, jumps are %d, %d\n", align_loops, align_jumps);
4604          
4605          // Translation cache flush mechanism
4606          lazy_flush = PrefsFindBool("jitlazyflush");
# Line 4593 | Line 4636 | void compiler_exit(void)
4636                  compiled_code = 0;
4637          }
4638          
4596        // Deallocate blockinfo pools
4597        free_blockinfo_pools();
4598        
4639   #ifndef WIN32
4640          // Close /dev/zero
4641          if (zero_fd > 0)
# Line 5168 | Line 5208 | void alloc_cache(void)
5208  
5209   extern cpuop_rettype op_illg_1 (uae_u32 opcode) REGPARAM;
5210  
5211 < static void calc_checksum(blockinfo* bi, uae_u32* c1, uae_u32* c2)
5211 > static void calc_checksum(CSI_TYPE* csi, uae_u32* c1, uae_u32* c2)
5212   {
5213      uae_u32 k1=0;
5214      uae_u32 k2=0;
5215 <    uae_s32 len=bi->len;
5216 <    uae_u32 tmp=bi->min_pcp;
5215 >    uae_s32 len=CSI_LENGTH(csi);
5216 >    uae_u32 tmp=(uae_u32)CSI_START_P(csi);
5217      uae_u32* pos;
5218  
5219      len+=(tmp&3);
# Line 5196 | Line 5236 | static void calc_checksum(blockinfo* bi,
5236      }
5237   }
5238  
5239 < static void show_checksum(blockinfo* bi)
5239 > static void show_checksum(CSI_TYPE* csi)
5240   {
5241      uae_u32 k1=0;
5242      uae_u32 k2=0;
5243 <    uae_s32 len=bi->len;
5244 <    uae_u32 tmp=(uae_u32)bi->pc_p;
5243 >    uae_s32 len=CSI_LENGTH(csi);
5244 >    uae_u32 tmp=(uae_u32)CSI_START_P(csi);
5245      uae_u32* pos;
5246  
5247      len+=(tmp&3);
# Line 5273 | Line 5313 | static int called_check_checksum(blockin
5313   static inline int block_check_checksum(blockinfo* bi)
5314   {
5315      uae_u32     c1,c2;
5316 <    int         isgood;
5316 >    bool        isgood;
5317      
5318      if (bi->status!=BI_NEED_CHECK)
5319          return 1;  /* This block is in a checked state */
5320      
5321      checksum_count++;
5322 +
5323 + #if USE_CHECKSUM_INFO
5324 +    checksum_info *csi = bi->csi;
5325 +        Dif(!csi) abort();
5326 +        isgood = true;
5327 +        while (csi && isgood) {
5328 +                if (csi->c1 || csi->c2)
5329 +                        calc_checksum(csi,&c1,&c2);
5330 +                else
5331 +                        c1 = c2 = 1; /* Make sure it doesn't match */
5332 +                isgood = isgood && (c1 == csi->c1 && c2 == csi->c2);
5333 +                csi = csi->next;
5334 +        }
5335 + #else
5336      if (bi->c1 || bi->c2)
5337          calc_checksum(bi,&c1,&c2);
5338      else {
5339          c1=c2=1;  /* Make sure it doesn't match */
5340 <    }
5340 >        }
5341      
5342      isgood=(c1==bi->c1 && c2==bi->c2);
5343 + #endif
5344 +
5345      if (isgood) {
5346          /* This block is still OK. So we reactivate. Of course, that
5347             means we have to move it into the needs-to-be-flushed list */
# Line 5403 | Line 5459 | static __inline__ void create_popalls(vo
5459       registers before jumping back to the various get-out routines.
5460       This generates the code for it.
5461    */
5462 <  popall_do_nothing=current_compile_p;
5462 >  align_target(align_jumps);
5463 >  popall_do_nothing=get_target();
5464    for (i=0;i<N_REGS;i++) {
5465        if (need_to_preserve[i])
5466            raw_pop_l_r(i);
5467    }
5468    raw_jmp((uae_u32)do_nothing);
5412  align_target(32);
5469    
5470 +  align_target(align_jumps);
5471    popall_execute_normal=get_target();
5472    for (i=0;i<N_REGS;i++) {
5473        if (need_to_preserve[i])
5474            raw_pop_l_r(i);
5475    }
5476    raw_jmp((uae_u32)execute_normal);
5420  align_target(32);
5477  
5478 +  align_target(align_jumps);
5479    popall_cache_miss=get_target();
5480    for (i=0;i<N_REGS;i++) {
5481        if (need_to_preserve[i])
5482            raw_pop_l_r(i);
5483    }
5484    raw_jmp((uae_u32)cache_miss);
5428  align_target(32);
5485  
5486 +  align_target(align_jumps);
5487    popall_recompile_block=get_target();
5488    for (i=0;i<N_REGS;i++) {
5489        if (need_to_preserve[i])
5490            raw_pop_l_r(i);
5491    }
5492    raw_jmp((uae_u32)recompile_block);
5493 <  align_target(32);
5494 <  
5493 >
5494 >  align_target(align_jumps);
5495    popall_exec_nostats=get_target();
5496    for (i=0;i<N_REGS;i++) {
5497        if (need_to_preserve[i])
5498            raw_pop_l_r(i);
5499    }
5500    raw_jmp((uae_u32)exec_nostats);
5501 <  align_target(32);
5502 <  
5501 >
5502 >  align_target(align_jumps);
5503    popall_check_checksum=get_target();
5504    for (i=0;i<N_REGS;i++) {
5505        if (need_to_preserve[i])
5506            raw_pop_l_r(i);
5507    }
5508    raw_jmp((uae_u32)check_checksum);
5509 <  align_target(32);
5510 <  
5509 >
5510 >  align_target(align_jumps);
5511    current_compile_p=get_target();
5512   #else
5513    popall_exec_nostats=(void *)exec_nostats;
# Line 5459 | Line 5516 | static __inline__ void create_popalls(vo
5516    popall_recompile_block=(void *)recompile_block;
5517    popall_do_nothing=(void *)do_nothing;
5518    popall_check_checksum=(void *)check_checksum;
5462  pushall_call_handler=get_target();  
5519   #endif
5520  
5521    /* And now, the code to do the matching pushes and then jump
# Line 5475 | Line 5531 | static __inline__ void create_popalls(vo
5531    raw_mov_l_rm(r,(uae_u32)&regs.pc_p);
5532    raw_and_l_ri(r,TAGMASK);
5533    raw_jmp_m_indexed((uae_u32)cache_tags,r,4);
5534 +
5535 + #ifdef X86_ASSEMBLY
5536 +  align_target(align_jumps);
5537 +  m68k_compile_execute = (void (*)(void))get_target();
5538 +  for (i=N_REGS;i--;) {
5539 +          if (need_to_preserve[i])
5540 +                  raw_push_l_r(i);
5541 +  }
5542 +  align_target(align_loops);
5543 +  uae_u32 dispatch_loop = (uae_u32)get_target();
5544 +  r=REG_PC_TMP;
5545 +  raw_mov_l_rm(r,(uae_u32)&regs.pc_p);
5546 +  raw_and_l_ri(r,TAGMASK);
5547 +  raw_call_m_indexed((uae_u32)cache_tags,r,4);
5548 +  raw_cmp_l_mi((uae_u32)&regs.spcflags,0);
5549 +  raw_jcc_b_oponly(NATIVE_CC_EQ);
5550 +  emit_byte(dispatch_loop-((uae_u32)get_target()+1));
5551 +  raw_call((uae_u32)m68k_do_specialties);
5552 +  raw_test_l_rr(REG_RESULT,REG_RESULT);
5553 +  raw_jcc_b_oponly(NATIVE_CC_EQ);
5554 +  emit_byte(dispatch_loop-((uae_u32)get_target()+1));
5555 +  raw_cmp_b_mi((uae_u32)&quit_program,0);
5556 +  raw_jcc_b_oponly(NATIVE_CC_EQ);
5557 +  emit_byte(dispatch_loop-((uae_u32)get_target()+1));
5558 +  for (i=0;i<N_REGS;i++) {
5559 +          if (need_to_preserve[i])
5560 +                  raw_pop_l_r(i);
5561 +  }
5562 +  raw_ret();
5563 + #endif
5564   }
5565  
5566   static __inline__ void reset_lists(void)
# Line 5492 | Line 5578 | static void prepare_block(blockinfo* bi)
5578      int i;
5579  
5580      set_target(current_compile_p);
5581 <    align_target(32);
5581 >    align_target(align_jumps);
5582      bi->direct_pen=(cpuop_func *)get_target();
5583      raw_mov_l_rm(0,(uae_u32)&(bi->pc_p));
5584      raw_mov_l_mr((uae_u32)&regs.pc_p,0);
5585      raw_jmp((uae_u32)popall_execute_normal);
5586  
5587 <    align_target(32);
5587 >    align_target(align_jumps);
5588      bi->direct_pcc=(cpuop_func *)get_target();
5589      raw_mov_l_rm(0,(uae_u32)&(bi->pc_p));
5590      raw_mov_l_mr((uae_u32)&regs.pc_p,0);
5591      raw_jmp((uae_u32)popall_check_checksum);
5506
5507    align_target(32);
5592      current_compile_p=get_target();
5593  
5594      bi->deplist=NULL;
# Line 5916 | Line 6000 | static void compile_block(cpu_history* p
6000  
6001          bi->needed_flags=liveflags[0];
6002  
6003 <        align_target(32);
6003 >        align_target(align_loops);
6004          was_comp=0;
6005  
6006          bi->direct_handler=(cpuop_func *)get_target();
# Line 6091 | Line 6175 | static void compile_block(cpu_history* p
6175                  raw_jmp((uae_u32)popall_do_nothing);
6176                  create_jmpdep(bi,0,tba,t1);
6177  
6178 <                align_target(16);
6178 >                align_target(align_jumps);
6179                  /* not-predicted outcome */
6180                  *branchadd=(uae_u32)get_target()-((uae_u32)branchadd+4);
6181                  live=tmp; /* Ouch again */
# Line 6165 | Line 6249 | static void compile_block(cpu_history* p
6249              max_pcp=next_pc_p+extra_len;  /* extra_len covers flags magic */
6250          else
6251              max_pcp+=LONGEST_68K_INST;
6252 +
6253 + #if USE_CHECKSUM_INFO
6254 +        checksum_info *csi = (bi->csi = ChecksumInfoAllocator.acquire());
6255 +        csi->next = NULL;
6256 +        csi->length = max_pcp - min_pcp;
6257 +        csi->start_p = (uae_u8 *)min_pcp;
6258 + #else
6259          bi->len=max_pcp-min_pcp;
6260          bi->min_pcp=min_pcp;
6261 <                    
6261 > #endif
6262 >        
6263          remove_from_list(bi);
6264          if (isinrom(min_pcp) && isinrom(max_pcp)) {
6265              add_to_dormant(bi); /* No need to checksum it on cache flush.
# Line 6175 | Line 6267 | static void compile_block(cpu_history* p
6267                                     flight! */
6268          }
6269          else {
6270 + #if USE_CHECKSUM_INFO
6271 +                calc_checksum(csi,&csi->c1,&csi->c2);
6272 + #else
6273              calc_checksum(bi,&(bi->c1),&(bi->c2));
6274 + #endif
6275              add_to_active(bi);
6276          }
6277          
# Line 6197 | Line 6293 | static void compile_block(cpu_history* p
6293   #endif
6294          
6295          log_dump();
6296 <        align_target(32);
6296 >        align_target(align_jumps);
6297  
6298          /* This is the non-direct handler */
6299          bi->handler=
# Line 6213 | Line 6309 | static void compile_block(cpu_history* p
6309  
6310          raw_jmp((uae_u32)bi->direct_handler);
6311  
6216        align_target(32);
6312          current_compile_p=get_target();
6218
6313          raise_in_cl_list(bi);
6314          
6315          /* We will flush soon, anyway, so let's do it now */
# Line 6241 | Line 6335 | void exec_nostats(void)
6335   {
6336          for (;;)  {
6337                  uae_u32 opcode = GET_OPCODE;
6244 #ifdef X86_ASSEMBLY__disable
6245                __asm__ __volatile__("\tpushl %%ebp\n\tcall *%%ebx\n\tpopl %%ebp" /* FIXME */
6246                                                         : : "b" (cpufunctbl[opcode]), "a" (opcode)
6247                                                         : "%edx", "%ecx", "%esi", "%edi",  "%ebp", "memory", "cc");
6248 #else
6338                  (*cpufunctbl[opcode])(opcode);
6250 #endif
6339                  if (end_block(opcode) || SPCFLAGS_TEST(SPCFLAG_ALL)) {
6340                          return; /* We will deal with the spcflags in the caller */
6341                  }
# Line 6272 | Line 6360 | void execute_normal(void)
6360   #if FLIGHT_RECORDER
6361                          m68k_record_step(m68k_getpc());
6362   #endif
6275 #ifdef X86_ASSEMBLY__disable
6276                        __asm__ __volatile__("\tpushl %%ebp\n\tcall *%%ebx\n\tpopl %%ebp" /* FIXME */
6277                                                                 : : "b" (cpufunctbl[opcode]), "a" (opcode)
6278                                                                 : "%edx", "%ecx", "%esi", "%edi", "%ebp", "memory", "cc");
6279 #else
6363                          (*cpufunctbl[opcode])(opcode);
6281 #endif
6364                          if (end_block(opcode) || SPCFLAGS_TEST(SPCFLAG_ALL) || blocklen>=MAXRUN) {
6365                                  compile_block(pc_hist, blocklen);
6366                                  return; /* We will deal with the spcflags in the caller */
# Line 6291 | Line 6373 | void execute_normal(void)
6373  
6374   typedef void (*compiled_handler)(void);
6375  
6376 + #ifdef X86_ASSEMBLY
6377 + void (*m68k_compile_execute)(void) = NULL;
6378 + #else
6379   void m68k_do_compile_execute(void)
6380   {
6381          for (;;) {
6297 #ifdef X86_ASSEMBLY
6298                __asm__ __volatile__("\tpushl %%ebp\n\tcall *%%ebx\n\tpopl %%ebp" /* FIXME */
6299                                                         : : "b" (cache_tags[cacheline(regs.pc_p)].handler)
6300                                                         : "%edx", "%ecx", "%eax", "%esi", "%edi", "%ebp", "memory", "cc");
6301 #else
6382                  ((compiled_handler)(pushall_call_handler))();
6303 #endif
6383                  /* Whenever we return from that, we should check spcflags */
6384                  if (SPCFLAGS_TEST(SPCFLAG_ALL)) {
6385                          if (m68k_do_specialties ())
# Line 6308 | Line 6387 | void m68k_do_compile_execute(void)
6387                  }
6388          }
6389   }
6390 + #endif

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines