ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/compiler/codegen_x86.cpp
(Generate patch)

Comparing BasiliskII/src/uae_cpu/compiler/codegen_x86.cpp (file contents):
Revision 1.2 by gbeauche, 2002-09-18T15:56:17Z vs.
Revision 1.6 by gbeauche, 2002-10-03T16:13:46Z

# Line 1 | Line 1
1 + /*
2 + *  compiler/codegen_x86.cpp - IA-32 code generator
3 + *
4 + *  Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
5 + *
6 + *  Adaptation for Basilisk II and improvements, copyright 2000-2002
7 + *    Gwenole Beauchesne
8 + *
9 + *  Basilisk II (C) 1997-2002 Christian Bauer
10 + *  
11 + *  This program is free software; you can redistribute it and/or modify
12 + *  it under the terms of the GNU General Public License as published by
13 + *  the Free Software Foundation; either version 2 of the License, or
14 + *  (at your option) any later version.
15 + *
16 + *  This program is distributed in the hope that it will be useful,
17 + *  but WITHOUT ANY WARRANTY; without even the implied warranty of
18 + *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 + *  GNU General Public License for more details.
20 + *
21 + *  You should have received a copy of the GNU General Public License
22 + *  along with this program; if not, write to the Free Software
23 + *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
24 + */
25 +
26   /* This should eventually end up in machdep/, but for now, x86 is the
27     only target, and it's easier this way... */
28  
29 + #include "flags_x86.h"
30 +
31   /*************************************************************************
32   * Some basic information about the the target CPU                       *
33   *************************************************************************/
# Line 1719 | Line 1746 | LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2
1746   }
1747   LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1748  
1749 + LOWFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s))
1750 + {
1751 +    emit_byte(0x80);
1752 +    emit_byte(0x3d);
1753 +    emit_long(d);
1754 +    emit_byte(s);
1755 + }
1756 + LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1757 +
1758   LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1759   {
1760    if (optimize_accum && isaccum(d))
# Line 1893 | Line 1929 | static __inline__ void raw_call_r(R4 r)
1929      emit_byte(0xd0+r);
1930   }
1931  
1932 + static __inline__ void raw_call_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
1933 + {
1934 +    int mu;
1935 +    switch(m) {
1936 +     case 1: mu=0; break;
1937 +     case 2: mu=1; break;
1938 +     case 4: mu=2; break;
1939 +     case 8: mu=3; break;
1940 +     default: abort();
1941 +    }
1942 +    emit_byte(0xff);
1943 +    emit_byte(0x14);
1944 +    emit_byte(0x05+8*r+0x40*mu);
1945 +    emit_long(base);
1946 + }
1947 +
1948   static __inline__ void raw_jmp_r(R4 r)
1949   {
1950      emit_byte(0xff);
# Line 2343 | Line 2395 | static void vec(int x, struct sigcontext
2395   * Checking for CPU features                                             *
2396   *************************************************************************/
2397  
2398 < typedef struct {
2399 <    uae_u32 eax;
2400 <    uae_u32 ecx;
2401 <    uae_u32 edx;
2402 <    uae_u32 ebx;
2403 < } x86_regs;
2404 <
2405 <
2406 < /* This could be so much easier if it could make assumptions about the
2407 <   compiler... */
2408 <
2409 < static uae_u8 cpuid_space[256];  
2410 < static uae_u32 cpuid_ptr;
2411 < static uae_u32 cpuid_level;
2412 <
2413 < static x86_regs cpuid(uae_u32 level)
2414 < {
2415 <    x86_regs answer;
2416 <    uae_u8* tmp=get_target();
2417 <
2418 <    cpuid_ptr=(uae_u32)&answer;
2419 <    cpuid_level=level;
2420 <
2421 <    set_target(cpuid_space);
2422 <    raw_push_l_r(0); /* eax */
2423 <    raw_push_l_r(1); /* ecx */
2424 <    raw_push_l_r(2); /* edx */
2425 <    raw_push_l_r(3); /* ebx */
2426 <    raw_push_l_r(7); /* edi */
2427 <    raw_mov_l_rm(0,(uae_u32)&cpuid_level);
2428 <    raw_cpuid(0);
2429 <    raw_mov_l_rm(7,(uae_u32)&cpuid_ptr);
2430 <    raw_mov_l_Rr(7,0,0);
2431 <    raw_mov_l_Rr(7,1,4);
2432 <    raw_mov_l_Rr(7,2,8);
2433 <    raw_mov_l_Rr(7,3,12);
2434 <    raw_pop_l_r(7);
2435 <    raw_pop_l_r(3);
2436 <    raw_pop_l_r(2);
2437 <    raw_pop_l_r(1);
2438 <    raw_pop_l_r(0);
2439 <    raw_ret();
2440 <    set_target(tmp);
2398 > struct cpuinfo_x86 {
2399 >  uae_u8        x86;                    // CPU family
2400 >  uae_u8        x86_vendor;             // CPU vendor
2401 >  uae_u8        x86_processor;  // CPU canonical processor type
2402 >  uae_u8        x86_brand_id;   // CPU BrandID if supported, yield 0 otherwise
2403 >  uae_u32       x86_hwcap;
2404 >  uae_u8        x86_model;
2405 >  uae_u8        x86_mask;
2406 >  int           cpuid_level;    // Maximum supported CPUID level, -1=no CPUID
2407 >  char          x86_vendor_id[16];
2408 > };
2409 > struct cpuinfo_x86 cpuinfo;
2410 >
2411 > enum {
2412 >  X86_VENDOR_INTEL              = 0,
2413 >  X86_VENDOR_CYRIX              = 1,
2414 >  X86_VENDOR_AMD                = 2,
2415 >  X86_VENDOR_UMC                = 3,
2416 >  X86_VENDOR_NEXGEN             = 4,
2417 >  X86_VENDOR_CENTAUR    = 5,
2418 >  X86_VENDOR_RISE               = 6,
2419 >  X86_VENDOR_TRANSMETA  = 7,
2420 >  X86_VENDOR_NSC                = 8,
2421 >  X86_VENDOR_UNKNOWN    = 0xff
2422 > };
2423 >
2424 > enum {
2425 >  X86_PROCESSOR_I386,                       /* 80386 */
2426 >  X86_PROCESSOR_I486,                       /* 80486DX, 80486SX, 80486DX[24] */
2427 >  X86_PROCESSOR_PENTIUM,
2428 >  X86_PROCESSOR_PENTIUMPRO,
2429 >  X86_PROCESSOR_K6,
2430 >  X86_PROCESSOR_ATHLON,
2431 >  X86_PROCESSOR_PENTIUM4,
2432 >  X86_PROCESSOR_max
2433 > };
2434 >
2435 > static const char * x86_processor_string_table[X86_PROCESSOR_max] = {
2436 >  "80386",
2437 >  "80486",
2438 >  "Pentium",
2439 >  "PentiumPro",
2440 >  "K6",
2441 >  "Athlon",
2442 >  "Pentium4"
2443 > };
2444 >
2445 > static struct ptt {
2446 >  const int align_loop;
2447 >  const int align_loop_max_skip;
2448 >  const int align_jump;
2449 >  const int align_jump_max_skip;
2450 >  const int align_func;
2451 > }
2452 > x86_alignments[X86_PROCESSOR_max] = {
2453 >  {  4,  3,  4,  3,  4 },
2454 >  { 16, 15, 16, 15, 16 },
2455 >  { 16,  7, 16,  7, 16 },
2456 >  { 16, 15, 16,  7, 16 },
2457 >  { 32,  7, 32,  7, 32 },
2458 >  { 16,  7, 16,  7, 16 },
2459 >  {  0,  0,  0,  0,  0 }
2460 > };
2461 >
2462 > static void
2463 > x86_get_cpu_vendor(struct cpuinfo_x86 *c)
2464 > {
2465 >        char *v = c->x86_vendor_id;
2466 >
2467 >        if (!strcmp(v, "GenuineIntel"))
2468 >                c->x86_vendor = X86_VENDOR_INTEL;
2469 >        else if (!strcmp(v, "AuthenticAMD"))
2470 >                c->x86_vendor = X86_VENDOR_AMD;
2471 >        else if (!strcmp(v, "CyrixInstead"))
2472 >                c->x86_vendor = X86_VENDOR_CYRIX;
2473 >        else if (!strcmp(v, "Geode by NSC"))
2474 >                c->x86_vendor = X86_VENDOR_NSC;
2475 >        else if (!strcmp(v, "UMC UMC UMC "))
2476 >                c->x86_vendor = X86_VENDOR_UMC;
2477 >        else if (!strcmp(v, "CentaurHauls"))
2478 >                c->x86_vendor = X86_VENDOR_CENTAUR;
2479 >        else if (!strcmp(v, "NexGenDriven"))
2480 >                c->x86_vendor = X86_VENDOR_NEXGEN;
2481 >        else if (!strcmp(v, "RiseRiseRise"))
2482 >                c->x86_vendor = X86_VENDOR_RISE;
2483 >        else if (!strcmp(v, "GenuineTMx86") ||
2484 >                 !strcmp(v, "TransmetaCPU"))
2485 >                c->x86_vendor = X86_VENDOR_TRANSMETA;
2486 >        else
2487 >                c->x86_vendor = X86_VENDOR_UNKNOWN;
2488 > }
2489 >
2490 > static void
2491 > cpuid(uae_u32 op, uae_u32 *eax, uae_u32 *ebx, uae_u32 *ecx, uae_u32 *edx)
2492 > {
2493 >  static uae_u8 cpuid_space[256];  
2494 >  uae_u8* tmp=get_target();
2495 >
2496 >  set_target(cpuid_space);
2497 >  raw_push_l_r(0); /* eax */
2498 >  raw_push_l_r(1); /* ecx */
2499 >  raw_push_l_r(2); /* edx */
2500 >  raw_push_l_r(3); /* ebx */
2501 >  raw_mov_l_rm(0,(uae_u32)&op);
2502 >  raw_cpuid(0);
2503 >  if (eax != NULL) raw_mov_l_mr((uae_u32)eax,0);
2504 >  if (ebx != NULL) raw_mov_l_mr((uae_u32)ebx,3);
2505 >  if (ecx != NULL) raw_mov_l_mr((uae_u32)ecx,1);
2506 >  if (edx != NULL) raw_mov_l_mr((uae_u32)edx,2);
2507 >  raw_pop_l_r(3);
2508 >  raw_pop_l_r(2);
2509 >  raw_pop_l_r(1);
2510 >  raw_pop_l_r(0);
2511 >  raw_ret();
2512 >  set_target(tmp);
2513 >
2514 >  ((cpuop_func*)cpuid_space)(0);
2515 > }
2516 >
2517 > static void
2518 > raw_init_cpu(void)
2519 > {
2520 >  struct cpuinfo_x86 *c = &cpuinfo;
2521 >
2522 >  /* Defaults */
2523 >  c->x86_vendor = X86_VENDOR_UNKNOWN;
2524 >  c->cpuid_level = -1;                          /* CPUID not detected */
2525 >  c->x86_model = c->x86_mask = 0;       /* So far unknown... */
2526 >  c->x86_vendor_id[0] = '\0';           /* Unset */
2527 >  c->x86_hwcap = 0;
2528 >  
2529 >  /* Get vendor name */
2530 >  c->x86_vendor_id[12] = '\0';
2531 >  cpuid(0x00000000,
2532 >                (uae_u32 *)&c->cpuid_level,
2533 >                (uae_u32 *)&c->x86_vendor_id[0],
2534 >                (uae_u32 *)&c->x86_vendor_id[8],
2535 >                (uae_u32 *)&c->x86_vendor_id[4]);
2536 >  x86_get_cpu_vendor(c);
2537 >
2538 >  /* Intel-defined flags: level 0x00000001 */
2539 >  c->x86_brand_id = 0;
2540 >  if ( c->cpuid_level >= 0x00000001 ) {
2541 >        uae_u32 tfms, brand_id;
2542 >        cpuid(0x00000001, &tfms, &brand_id, NULL, &c->x86_hwcap);
2543 >        c->x86 = (tfms >> 8) & 15;
2544 >        c->x86_model = (tfms >> 4) & 15;
2545 >        c->x86_brand_id = brand_id & 0xff;
2546 >        if ( (c->x86_vendor == X86_VENDOR_AMD) &&
2547 >                 (c->x86 == 0xf)) {
2548 >          /* AMD Extended Family and Model Values */
2549 >          c->x86 += (tfms >> 20) & 0xff;
2550 >          c->x86_model += (tfms >> 12) & 0xf0;
2551 >        }
2552 >        c->x86_mask = tfms & 15;
2553 >  } else {
2554 >        /* Have CPUID level 0 only - unheard of */
2555 >        c->x86 = 4;
2556 >  }
2557  
2558 <    ((cpuop_func*)cpuid_space)(0);
2559 <    return answer;
2560 < }
2558 >  /* Canonicalize processor ID */
2559 >  c->x86_processor = X86_PROCESSOR_max;
2560 >  switch (c->x86) {
2561 >  case 3:
2562 >        c->x86_processor = X86_PROCESSOR_I386;
2563 >        break;
2564 >  case 4:
2565 >        c->x86_processor = X86_PROCESSOR_I486;
2566 >        break;
2567 >  case 5:
2568 >        if (c->x86_vendor == X86_VENDOR_AMD)
2569 >          c->x86_processor = X86_PROCESSOR_K6;
2570 >        else
2571 >          c->x86_processor = X86_PROCESSOR_PENTIUM;
2572 >        break;
2573 >  case 6:
2574 >        if (c->x86_vendor == X86_VENDOR_AMD)
2575 >          c->x86_processor = X86_PROCESSOR_ATHLON;
2576 >        else
2577 >          c->x86_processor = X86_PROCESSOR_PENTIUMPRO;
2578 >        break;
2579 >  case 15:
2580 >        if (c->x86_vendor == X86_VENDOR_INTEL) {
2581 >          /*  Assume any BranID >= 8 and family == 15 yields a Pentium 4 */
2582 >          if (c->x86_brand_id >= 8)
2583 >                c->x86_processor = X86_PROCESSOR_PENTIUM4;
2584 >        }
2585 >        break;
2586 >  }
2587 >  if (c->x86_processor == X86_PROCESSOR_max) {
2588 >        fprintf(stderr, "Error: unknown processor type\n");
2589 >        fprintf(stderr, "  Family  : %d\n", c->x86);
2590 >        fprintf(stderr, "  Model   : %d\n", c->x86_model);
2591 >        fprintf(stderr, "  Mask    : %d\n", c->x86_mask);
2592 >        if (c->x86_brand_id)
2593 >          fprintf(stderr, "  BrandID : %02x\n", c->x86_brand_id);
2594 >        abort();
2595 >  }
2596  
2597 < static void raw_init_cpu(void)
2598 < {
2599 <    x86_regs x;
2600 <    uae_u32 maxlev;
2601 <    
2602 <    x=cpuid(0);
2603 <    maxlev=x.eax;
2401 <    write_log("Max CPUID level=%d Processor is %c%c%c%c%c%c%c%c%c%c%c%c\n",
2402 <              maxlev,
2403 <              x.ebx,
2404 <              x.ebx>>8,
2405 <              x.ebx>>16,
2406 <              x.ebx>>24,
2407 <              x.edx,
2408 <              x.edx>>8,
2409 <              x.edx>>16,
2410 <              x.edx>>24,
2411 <              x.ecx,
2412 <              x.ecx>>8,
2413 <              x.ecx>>16,
2414 <              x.ecx>>24
2415 <              );
2416 <    have_rat_stall=(x.ecx==0x6c65746e);
2417 <
2418 <    if (maxlev>=1) {
2419 <        x=cpuid(1);
2420 <        if (x.edx&(1<<15))
2421 <            have_cmov=1;
2422 <    }
2423 <    if (!have_cmov)
2424 <        have_rat_stall=0;
2425 < #if 0   /* For testing of non-cmov code! */
2426 <    have_cmov=0;
2427 < #endif
2428 < #if 1 /* It appears that partial register writes are a bad idea even on
2597 >  /* Have CMOV support? */
2598 >  have_cmov = (c->x86_hwcap & (1 << 15)) && true;
2599 >
2600 >  /* Can the host CPU suffer from partial register stalls? */
2601 >  have_rat_stall = (c->x86_vendor == X86_VENDOR_INTEL);
2602 > #if 1
2603 >  /* It appears that partial register writes are a bad idea even on
2604           AMD K7 cores, even though they are not supposed to have the
2605           dreaded rat stall. Why? Anyway, that's why we lie about it ;-) */
2606 <    if (have_cmov)
2607 <      have_rat_stall=1;
2606 >  if (c->x86_processor == X86_PROCESSOR_ATHLON)
2607 >        have_rat_stall = true;
2608   #endif
2609 +
2610 +  /* Alignments */
2611 +  if (tune_alignment) {
2612 +        align_loops = x86_alignments[c->x86_processor].align_loop;
2613 +        align_jumps = x86_alignments[c->x86_processor].align_jump;
2614 +  }
2615 +
2616 +  write_log("Max CPUID level=%d Processor is %s [%s]\n",
2617 +                        c->cpuid_level, c->x86_vendor_id,
2618 +                        x86_processor_string_table[c->x86_processor]);
2619   }
2620  
2621  

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines