ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/compiler/codegen_x86.cpp
(Generate patch)

Comparing BasiliskII/src/uae_cpu/compiler/codegen_x86.cpp (file contents):
Revision 1.1 by gbeauche, 2002-09-17T16:04:06Z vs.
Revision 1.6 by gbeauche, 2002-10-03T16:13:46Z

# Line 1 | Line 1
1 + /*
2 + *  compiler/codegen_x86.cpp - IA-32 code generator
3 + *
4 + *  Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
5 + *
6 + *  Adaptation for Basilisk II and improvements, copyright 2000-2002
7 + *    Gwenole Beauchesne
8 + *
9 + *  Basilisk II (C) 1997-2002 Christian Bauer
10 + *  
11 + *  This program is free software; you can redistribute it and/or modify
12 + *  it under the terms of the GNU General Public License as published by
13 + *  the Free Software Foundation; either version 2 of the License, or
14 + *  (at your option) any later version.
15 + *
16 + *  This program is distributed in the hope that it will be useful,
17 + *  but WITHOUT ANY WARRANTY; without even the implied warranty of
18 + *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 + *  GNU General Public License for more details.
20 + *
21 + *  You should have received a copy of the GNU General Public License
22 + *  along with this program; if not, write to the Free Software
23 + *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
24 + */
25 +
26   /* This should eventually end up in machdep/, but for now, x86 is the
27     only target, and it's easier this way... */
28  
29 + #include "flags_x86.h"
30 +
31   /*************************************************************************
32   * Some basic information about the the target CPU                       *
33   *************************************************************************/
# Line 95 | Line 122 | uae_u8 need_to_preserve[]={1,1,1,1,0,1,1
122   #define CLOBBER_BT   clobber_flags()
123   #define CLOBBER_BSF  clobber_flags()
124  
125 + const bool optimize_accum               = true;
126   const bool optimize_imm8                = true;
127   const bool optimize_shift_once  = true;
128  
# Line 102 | Line 130 | const bool optimize_shift_once = true;
130   * Actual encoding of the instructions on the target CPU                 *
131   *************************************************************************/
132  
133 + static __inline__ int isaccum(int r)
134 + {
135 +        return (r == EAX_INDEX);
136 + }
137 +
138   static __inline__ int isbyte(uae_s32 x)
139   {
140          return (x>=-128 && x<=127);
# Line 202 | Line 235 | LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d
235          emit_byte(i);
236      }
237      else {
238 +        if (optimize_accum && isaccum(d))
239 +        emit_byte(0x2d);
240 +        else {
241          emit_byte(0x81);
242          emit_byte(0xe8+d);
243 +        }
244          emit_word(i);
245      }
246   }
# Line 1371 | Line 1408 | LENDFUNC(WRITE,RMW,2,raw_add_b_mi,(IMM d
1408  
1409   LOWFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
1410   {
1411 +        if (optimize_accum && isaccum(d))
1412 +        emit_byte(0xa9);
1413 +        else {
1414      emit_byte(0xf7);
1415      emit_byte(0xc0+d);
1416 +        }
1417      emit_long(i);
1418   }
1419   LENDFUNC(WRITE,NONE,2,raw_test_l_ri,(R4 d, IMM i))
# Line 1402 | Line 1443 | LENDFUNC(WRITE,NONE,2,raw_test_b_rr,(R1
1443   LOWFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
1444   {
1445          if (optimize_imm8 && isbyte(i)) {
1446 <    emit_byte(0x83);
1447 <    emit_byte(0xe0+d);
1448 <    emit_byte(i);
1446 >        emit_byte(0x83);
1447 >        emit_byte(0xe0+d);
1448 >        emit_byte(i);
1449          }
1450          else {
1451 <    emit_byte(0x81);
1452 <    emit_byte(0xe0+d);
1453 <    emit_long(i);
1451 >        if (optimize_accum && isaccum(d))
1452 >        emit_byte(0x25);
1453 >        else {
1454 >        emit_byte(0x81);
1455 >        emit_byte(0xe0+d);
1456 >        }
1457 >        emit_long(i);
1458          }
1459   }
1460   LENDFUNC(WRITE,NONE,2,raw_and_l_ri,(RW4 d, IMM i))
1461  
1462   LOWFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
1463   {
1464 <    emit_byte(0x66);
1465 <    emit_byte(0x81);
1466 <    emit_byte(0xe0+d);
1467 <    emit_word(i);
1464 >        emit_byte(0x66);
1465 >        if (optimize_imm8 && isbyte(i)) {
1466 >        emit_byte(0x83);
1467 >        emit_byte(0xe0+d);
1468 >        emit_byte(i);
1469 >        }
1470 >        else {
1471 >        if (optimize_accum && isaccum(d))
1472 >        emit_byte(0x25);
1473 >        else {
1474 >        emit_byte(0x81);
1475 >        emit_byte(0xe0+d);
1476 >        }
1477 >        emit_word(i);
1478 >        }
1479   }
1480   LENDFUNC(WRITE,NONE,2,raw_and_w_ri,(RW2 d, IMM i))
1481  
# Line 1453 | Line 1509 | LOWFUNC(WRITE,NONE,2,raw_or_l_ri,(RW4 d,
1509      emit_byte(i);
1510          }
1511          else {
1512 +        if (optimize_accum && isaccum(d))
1513 +        emit_byte(0x0d);
1514 +        else {
1515      emit_byte(0x81);
1516      emit_byte(0xc8+d);
1517 +        }
1518      emit_long(i);
1519          }
1520   }
# Line 1534 | Line 1594 | LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d
1594      emit_byte(i);
1595    }
1596    else {
1597 +    if (optimize_accum && isaccum(d))
1598 +    emit_byte(0x2d);
1599 +    else {
1600      emit_byte(0x81);
1601      emit_byte(0xe8+d);
1602 +    }
1603      emit_long(i);
1604    }
1605   }
# Line 1543 | Line 1607 | LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4
1607  
1608   LOWFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
1609   {
1610 +        if (optimize_accum && isaccum(d))
1611 +        emit_byte(0x2c);
1612 +        else {
1613      emit_byte(0x80);
1614      emit_byte(0xe8+d);
1615 +        }
1616      emit_byte(i);
1617   }
1618   LENDFUNC(WRITE,NONE,2,raw_sub_b_ri,(RW1 d, IMM i))
# Line 1557 | Line 1625 | LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d
1625          emit_byte(i);
1626      }
1627      else {
1628 +        if (optimize_accum && isaccum(d))
1629 +        emit_byte(0x05);
1630 +        else {
1631          emit_byte(0x81);
1632          emit_byte(0xc0+d);
1633 +        }
1634          emit_long(i);
1635      }
1636   }
# Line 1566 | Line 1638 | LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4
1638  
1639   LOWFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2 d, IMM i))
1640   {
1569    if (isbyte(i)) {
1641          emit_byte(0x66);
1642 +    if (isbyte(i)) {
1643          emit_byte(0x83);
1644          emit_byte(0xc0+d);
1645          emit_byte(i);
1646      }
1647      else {
1648 <        emit_byte(0x66);
1648 >        if (optimize_accum && isaccum(d))
1649 >        emit_byte(0x05);
1650 >        else {
1651          emit_byte(0x81);
1652          emit_byte(0xc0+d);
1653 +        }
1654          emit_word(i);
1655      }
1656   }
# Line 1583 | Line 1658 | LENDFUNC(WRITE,NONE,2,raw_add_w_ri,(RW2
1658  
1659   LOWFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
1660   {
1661 <    emit_byte(0x80);
1662 <    emit_byte(0xc0+d);
1661 >        if (optimize_accum && isaccum(d))
1662 >        emit_byte(0x04);
1663 >        else {
1664 >        emit_byte(0x80);
1665 >        emit_byte(0xc0+d);
1666 >        }
1667      emit_byte(i);
1668   }
1669   LENDFUNC(WRITE,NONE,2,raw_add_b_ri,(RW1 d, IMM i))
# Line 1648 | Line 1727 | LOWFUNC(WRITE,NONE,2,raw_cmp_l_ri,(R4 r,
1727      emit_byte(i);
1728          }
1729          else {
1730 +        if (optimize_accum && isaccum(r))
1731 +        emit_byte(0x3d);
1732 +        else {
1733      emit_byte(0x81);
1734      emit_byte(0xf8+r);
1735 +        }
1736      emit_long(i);
1737          }
1738   }
# Line 1663 | Line 1746 | LOWFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2
1746   }
1747   LENDFUNC(WRITE,NONE,2,raw_cmp_w,(R2 d, R2 s))
1748  
1749 + LOWFUNC(WRITE,READ,2,raw_cmp_b_mi,(MEMR d, IMM s))
1750 + {
1751 +    emit_byte(0x80);
1752 +    emit_byte(0x3d);
1753 +    emit_long(d);
1754 +    emit_byte(s);
1755 + }
1756 + LENDFUNC(WRITE,READ,2,raw_cmp_l_mi,(MEMR d, IMM s))
1757 +
1758   LOWFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
1759   {
1760 +  if (optimize_accum && isaccum(d))
1761 +  emit_byte(0x3c);
1762 +  else {
1763    emit_byte(0x80);
1764    emit_byte(0xf8+d);
1765 +  }
1766    emit_byte(i);
1767   }
1768   LENDFUNC(WRITE,NONE,2,raw_cmp_b_ri,(R1 d, IMM i))
# Line 1833 | Line 1929 | static __inline__ void raw_call_r(R4 r)
1929      emit_byte(0xd0+r);
1930   }
1931  
1932 + static __inline__ void raw_call_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
1933 + {
1934 +    int mu;
1935 +    switch(m) {
1936 +     case 1: mu=0; break;
1937 +     case 2: mu=1; break;
1938 +     case 4: mu=2; break;
1939 +     case 8: mu=3; break;
1940 +     default: abort();
1941 +    }
1942 +    emit_byte(0xff);
1943 +    emit_byte(0x14);
1944 +    emit_byte(0x05+8*r+0x40*mu);
1945 +    emit_long(base);
1946 + }
1947 +
1948   static __inline__ void raw_jmp_r(R4 r)
1949   {
1950      emit_byte(0xff);
# Line 2034 | Line 2146 | static __inline__ void raw_load_flagx(ua
2146  
2147   static __inline__ void raw_inc_sp(int off)
2148   {
2149 <    raw_add_l_ri(4,off);
2149 >    raw_add_l_ri(ESP_INDEX,off);
2150   }
2151  
2152   /*************************************************************************
# Line 2283 | Line 2395 | static void vec(int x, struct sigcontext
2395   * Checking for CPU features                                             *
2396   *************************************************************************/
2397  
2398 < typedef struct {
2399 <    uae_u32 eax;
2400 <    uae_u32 ecx;
2401 <    uae_u32 edx;
2402 <    uae_u32 ebx;
2403 < } x86_regs;
2404 <
2405 <
2406 < /* This could be so much easier if it could make assumptions about the
2407 <   compiler... */
2408 <
2409 < static uae_u8 cpuid_space[256];  
2410 < static uae_u32 cpuid_ptr;
2411 < static uae_u32 cpuid_level;
2412 <
2413 < static x86_regs cpuid(uae_u32 level)
2414 < {
2415 <    x86_regs answer;
2416 <    uae_u8* tmp=get_target();
2417 <
2418 <    cpuid_ptr=(uae_u32)&answer;
2419 <    cpuid_level=level;
2420 <
2421 <    set_target(cpuid_space);
2422 <    raw_push_l_r(0); /* eax */
2423 <    raw_push_l_r(1); /* ecx */
2424 <    raw_push_l_r(2); /* edx */
2425 <    raw_push_l_r(3); /* ebx */
2426 <    raw_push_l_r(7); /* edi */
2427 <    raw_mov_l_rm(0,(uae_u32)&cpuid_level);
2428 <    raw_cpuid(0);
2429 <    raw_mov_l_rm(7,(uae_u32)&cpuid_ptr);
2430 <    raw_mov_l_Rr(7,0,0);
2431 <    raw_mov_l_Rr(7,1,4);
2432 <    raw_mov_l_Rr(7,2,8);
2433 <    raw_mov_l_Rr(7,3,12);
2434 <    raw_pop_l_r(7);
2435 <    raw_pop_l_r(3);
2436 <    raw_pop_l_r(2);
2437 <    raw_pop_l_r(1);
2438 <    raw_pop_l_r(0);
2439 <    raw_ret();
2440 <    set_target(tmp);
2398 > struct cpuinfo_x86 {
2399 >  uae_u8        x86;                    // CPU family
2400 >  uae_u8        x86_vendor;             // CPU vendor
2401 >  uae_u8        x86_processor;  // CPU canonical processor type
2402 >  uae_u8        x86_brand_id;   // CPU BrandID if supported, yield 0 otherwise
2403 >  uae_u32       x86_hwcap;
2404 >  uae_u8        x86_model;
2405 >  uae_u8        x86_mask;
2406 >  int           cpuid_level;    // Maximum supported CPUID level, -1=no CPUID
2407 >  char          x86_vendor_id[16];
2408 > };
2409 > struct cpuinfo_x86 cpuinfo;
2410 >
2411 > enum {
2412 >  X86_VENDOR_INTEL              = 0,
2413 >  X86_VENDOR_CYRIX              = 1,
2414 >  X86_VENDOR_AMD                = 2,
2415 >  X86_VENDOR_UMC                = 3,
2416 >  X86_VENDOR_NEXGEN             = 4,
2417 >  X86_VENDOR_CENTAUR    = 5,
2418 >  X86_VENDOR_RISE               = 6,
2419 >  X86_VENDOR_TRANSMETA  = 7,
2420 >  X86_VENDOR_NSC                = 8,
2421 >  X86_VENDOR_UNKNOWN    = 0xff
2422 > };
2423 >
2424 > enum {
2425 >  X86_PROCESSOR_I386,                       /* 80386 */
2426 >  X86_PROCESSOR_I486,                       /* 80486DX, 80486SX, 80486DX[24] */
2427 >  X86_PROCESSOR_PENTIUM,
2428 >  X86_PROCESSOR_PENTIUMPRO,
2429 >  X86_PROCESSOR_K6,
2430 >  X86_PROCESSOR_ATHLON,
2431 >  X86_PROCESSOR_PENTIUM4,
2432 >  X86_PROCESSOR_max
2433 > };
2434 >
2435 > static const char * x86_processor_string_table[X86_PROCESSOR_max] = {
2436 >  "80386",
2437 >  "80486",
2438 >  "Pentium",
2439 >  "PentiumPro",
2440 >  "K6",
2441 >  "Athlon",
2442 >  "Pentium4"
2443 > };
2444 >
2445 > static struct ptt {
2446 >  const int align_loop;
2447 >  const int align_loop_max_skip;
2448 >  const int align_jump;
2449 >  const int align_jump_max_skip;
2450 >  const int align_func;
2451 > }
2452 > x86_alignments[X86_PROCESSOR_max] = {
2453 >  {  4,  3,  4,  3,  4 },
2454 >  { 16, 15, 16, 15, 16 },
2455 >  { 16,  7, 16,  7, 16 },
2456 >  { 16, 15, 16,  7, 16 },
2457 >  { 32,  7, 32,  7, 32 },
2458 >  { 16,  7, 16,  7, 16 },
2459 >  {  0,  0,  0,  0,  0 }
2460 > };
2461 >
2462 > static void
2463 > x86_get_cpu_vendor(struct cpuinfo_x86 *c)
2464 > {
2465 >        char *v = c->x86_vendor_id;
2466 >
2467 >        if (!strcmp(v, "GenuineIntel"))
2468 >                c->x86_vendor = X86_VENDOR_INTEL;
2469 >        else if (!strcmp(v, "AuthenticAMD"))
2470 >                c->x86_vendor = X86_VENDOR_AMD;
2471 >        else if (!strcmp(v, "CyrixInstead"))
2472 >                c->x86_vendor = X86_VENDOR_CYRIX;
2473 >        else if (!strcmp(v, "Geode by NSC"))
2474 >                c->x86_vendor = X86_VENDOR_NSC;
2475 >        else if (!strcmp(v, "UMC UMC UMC "))
2476 >                c->x86_vendor = X86_VENDOR_UMC;
2477 >        else if (!strcmp(v, "CentaurHauls"))
2478 >                c->x86_vendor = X86_VENDOR_CENTAUR;
2479 >        else if (!strcmp(v, "NexGenDriven"))
2480 >                c->x86_vendor = X86_VENDOR_NEXGEN;
2481 >        else if (!strcmp(v, "RiseRiseRise"))
2482 >                c->x86_vendor = X86_VENDOR_RISE;
2483 >        else if (!strcmp(v, "GenuineTMx86") ||
2484 >                 !strcmp(v, "TransmetaCPU"))
2485 >                c->x86_vendor = X86_VENDOR_TRANSMETA;
2486 >        else
2487 >                c->x86_vendor = X86_VENDOR_UNKNOWN;
2488 > }
2489 >
2490 > static void
2491 > cpuid(uae_u32 op, uae_u32 *eax, uae_u32 *ebx, uae_u32 *ecx, uae_u32 *edx)
2492 > {
2493 >  static uae_u8 cpuid_space[256];  
2494 >  uae_u8* tmp=get_target();
2495 >
2496 >  set_target(cpuid_space);
2497 >  raw_push_l_r(0); /* eax */
2498 >  raw_push_l_r(1); /* ecx */
2499 >  raw_push_l_r(2); /* edx */
2500 >  raw_push_l_r(3); /* ebx */
2501 >  raw_mov_l_rm(0,(uae_u32)&op);
2502 >  raw_cpuid(0);
2503 >  if (eax != NULL) raw_mov_l_mr((uae_u32)eax,0);
2504 >  if (ebx != NULL) raw_mov_l_mr((uae_u32)ebx,3);
2505 >  if (ecx != NULL) raw_mov_l_mr((uae_u32)ecx,1);
2506 >  if (edx != NULL) raw_mov_l_mr((uae_u32)edx,2);
2507 >  raw_pop_l_r(3);
2508 >  raw_pop_l_r(2);
2509 >  raw_pop_l_r(1);
2510 >  raw_pop_l_r(0);
2511 >  raw_ret();
2512 >  set_target(tmp);
2513 >
2514 >  ((cpuop_func*)cpuid_space)(0);
2515 > }
2516 >
2517 > static void
2518 > raw_init_cpu(void)
2519 > {
2520 >  struct cpuinfo_x86 *c = &cpuinfo;
2521 >
2522 >  /* Defaults */
2523 >  c->x86_vendor = X86_VENDOR_UNKNOWN;
2524 >  c->cpuid_level = -1;                          /* CPUID not detected */
2525 >  c->x86_model = c->x86_mask = 0;       /* So far unknown... */
2526 >  c->x86_vendor_id[0] = '\0';           /* Unset */
2527 >  c->x86_hwcap = 0;
2528 >  
2529 >  /* Get vendor name */
2530 >  c->x86_vendor_id[12] = '\0';
2531 >  cpuid(0x00000000,
2532 >                (uae_u32 *)&c->cpuid_level,
2533 >                (uae_u32 *)&c->x86_vendor_id[0],
2534 >                (uae_u32 *)&c->x86_vendor_id[8],
2535 >                (uae_u32 *)&c->x86_vendor_id[4]);
2536 >  x86_get_cpu_vendor(c);
2537 >
2538 >  /* Intel-defined flags: level 0x00000001 */
2539 >  c->x86_brand_id = 0;
2540 >  if ( c->cpuid_level >= 0x00000001 ) {
2541 >        uae_u32 tfms, brand_id;
2542 >        cpuid(0x00000001, &tfms, &brand_id, NULL, &c->x86_hwcap);
2543 >        c->x86 = (tfms >> 8) & 15;
2544 >        c->x86_model = (tfms >> 4) & 15;
2545 >        c->x86_brand_id = brand_id & 0xff;
2546 >        if ( (c->x86_vendor == X86_VENDOR_AMD) &&
2547 >                 (c->x86 == 0xf)) {
2548 >          /* AMD Extended Family and Model Values */
2549 >          c->x86 += (tfms >> 20) & 0xff;
2550 >          c->x86_model += (tfms >> 12) & 0xf0;
2551 >        }
2552 >        c->x86_mask = tfms & 15;
2553 >  } else {
2554 >        /* Have CPUID level 0 only - unheard of */
2555 >        c->x86 = 4;
2556 >  }
2557  
2558 <    ((cpuop_func*)cpuid_space)(0);
2559 <    return answer;
2560 < }
2558 >  /* Canonicalize processor ID */
2559 >  c->x86_processor = X86_PROCESSOR_max;
2560 >  switch (c->x86) {
2561 >  case 3:
2562 >        c->x86_processor = X86_PROCESSOR_I386;
2563 >        break;
2564 >  case 4:
2565 >        c->x86_processor = X86_PROCESSOR_I486;
2566 >        break;
2567 >  case 5:
2568 >        if (c->x86_vendor == X86_VENDOR_AMD)
2569 >          c->x86_processor = X86_PROCESSOR_K6;
2570 >        else
2571 >          c->x86_processor = X86_PROCESSOR_PENTIUM;
2572 >        break;
2573 >  case 6:
2574 >        if (c->x86_vendor == X86_VENDOR_AMD)
2575 >          c->x86_processor = X86_PROCESSOR_ATHLON;
2576 >        else
2577 >          c->x86_processor = X86_PROCESSOR_PENTIUMPRO;
2578 >        break;
2579 >  case 15:
2580 >        if (c->x86_vendor == X86_VENDOR_INTEL) {
2581 >          /*  Assume any BranID >= 8 and family == 15 yields a Pentium 4 */
2582 >          if (c->x86_brand_id >= 8)
2583 >                c->x86_processor = X86_PROCESSOR_PENTIUM4;
2584 >        }
2585 >        break;
2586 >  }
2587 >  if (c->x86_processor == X86_PROCESSOR_max) {
2588 >        fprintf(stderr, "Error: unknown processor type\n");
2589 >        fprintf(stderr, "  Family  : %d\n", c->x86);
2590 >        fprintf(stderr, "  Model   : %d\n", c->x86_model);
2591 >        fprintf(stderr, "  Mask    : %d\n", c->x86_mask);
2592 >        if (c->x86_brand_id)
2593 >          fprintf(stderr, "  BrandID : %02x\n", c->x86_brand_id);
2594 >        abort();
2595 >  }
2596  
2597 < static void raw_init_cpu(void)
2598 < {
2599 <    x86_regs x;
2600 <    uae_u32 maxlev;
2601 <    
2602 <    x=cpuid(0);
2603 <    maxlev=x.eax;
2341 <    write_log("Max CPUID level=%d Processor is %c%c%c%c%c%c%c%c%c%c%c%c\n",
2342 <              maxlev,
2343 <              x.ebx,
2344 <              x.ebx>>8,
2345 <              x.ebx>>16,
2346 <              x.ebx>>24,
2347 <              x.edx,
2348 <              x.edx>>8,
2349 <              x.edx>>16,
2350 <              x.edx>>24,
2351 <              x.ecx,
2352 <              x.ecx>>8,
2353 <              x.ecx>>16,
2354 <              x.ecx>>24
2355 <              );
2356 <    have_rat_stall=(x.ecx==0x6c65746e);
2357 <
2358 <    if (maxlev>=1) {
2359 <        x=cpuid(1);
2360 <        if (x.edx&(1<<15))
2361 <            have_cmov=1;
2362 <    }
2363 <    if (!have_cmov)
2364 <        have_rat_stall=0;
2365 < #if 0   /* For testing of non-cmov code! */
2366 <    have_cmov=0;
2367 < #endif
2368 < #if 1 /* It appears that partial register writes are a bad idea even on
2597 >  /* Have CMOV support? */
2598 >  have_cmov = (c->x86_hwcap & (1 << 15)) && true;
2599 >
2600 >  /* Can the host CPU suffer from partial register stalls? */
2601 >  have_rat_stall = (c->x86_vendor == X86_VENDOR_INTEL);
2602 > #if 1
2603 >  /* It appears that partial register writes are a bad idea even on
2604           AMD K7 cores, even though they are not supposed to have the
2605           dreaded rat stall. Why? Anyway, that's why we lie about it ;-) */
2606 <    if (have_cmov)
2607 <      have_rat_stall=1;
2606 >  if (c->x86_processor == X86_PROCESSOR_ATHLON)
2607 >        have_rat_stall = true;
2608   #endif
2609 +
2610 +  /* Alignments */
2611 +  if (tune_alignment) {
2612 +        align_loops = x86_alignments[c->x86_processor].align_loop;
2613 +        align_jumps = x86_alignments[c->x86_processor].align_jump;
2614 +  }
2615 +
2616 +  write_log("Max CPUID level=%d Processor is %s [%s]\n",
2617 +                        c->cpuid_level, c->x86_vendor_id,
2618 +                        x86_processor_string_table[c->x86_processor]);
2619   }
2620  
2621  

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines