ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/fpu/fpu_ieee.cpp
(Generate patch)

Comparing BasiliskII/src/uae_cpu/fpu/fpu_ieee.cpp (file contents):
Revision 1.2 by gbeauche, 2002-09-13T15:06:42Z vs.
Revision 1.10 by asvitkine, 2012-03-30T01:45:08Z

# Line 1 | Line 1
1   /*
2 < * UAE - The Un*x Amiga Emulator
2 > *  fpu/fpu_ieee.cpp
3   *
4 < * MC68881/MC68040 emulation
4 > *  Basilisk II (C) 1997-2008 Christian Bauer
5   *
6 < * Copyright 1996 Herman ten Brugge
6 > *  MC68881/68040 fpu emulation
7   *
8 + *  Original UAE FPU, copyright 1996 Herman ten Brugge
9 + *  Rewrite for x86, copyright 1999-2000 Lauri Pesonen
10 + *  New framework, copyright 2000 Gwenole Beauchesne
11 + *  Adapted for JIT compilation (c) Bernd Meyer, 2000
12 + *  
13 + *  This program is free software; you can redistribute it and/or modify
14 + *  it under the terms of the GNU General Public License as published by
15 + *  the Free Software Foundation; either version 2 of the License, or
16 + *  (at your option) any later version.
17   *
18 + *  This program is distributed in the hope that it will be useful,
19 + *  but WITHOUT ANY WARRANTY; without even the implied warranty of
20 + *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21 + *  GNU General Public License for more details.
22 + *
23 + *  You should have received a copy of the GNU General Public License
24 + *  along with this program; if not, write to the Free Software
25 + *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
26 + */
27 +
28 + /*
29   * Following fixes by Lauri Pesonen, July 1999:
30   *
31   * FMOVEM list handling:
# Line 80 | Line 100
100   fpu_t fpu;
101  
102   /* -------------------------------------------------------------------------- */
83 /* --- Endianness                                                         --- */
84 /* -------------------------------------------------------------------------- */
85
86 // Taken from glibc 2.1.x: endian.h
87 #define UAE_LITTLE_ENDIAN               1234
88 #define UAE_BIG_ENDIAN                  4321
89
90 #if WORDS_BIGENDIAN
91 #define UAE_BYTE_ORDER                  UAE_BIG_ENDIAN
92 #else
93 #define UAE_BYTE_ORDER                  UAE_LITTLE_ENDIAN
94 #endif
95
96 // Some machines may need to use a different endianness for floating point values
97 // e.g. ARM in which case it is big endian
98 #define UAE_FLOAT_WORD_ORDER    UAE_BYTE_ORDER
99
100 /* -------------------------------------------------------------------------- */
103   /* --- Scopes Definition                                                  --- */
104   /* -------------------------------------------------------------------------- */
105  
# Line 198 | Line 200 | PRIVATE inline fpu_register FFPU make_si
200   #if 1
201          // Use a single, otherwise some checks for NaN, Inf, Zero would have to
202          // be performed
203 <        fpu_single result;
203 >        fpu_single result = 0; // = 0 to workaround a compiler bug on SPARC
204          fp_declare_init_shape(srp, result, single);
205          srp->ieee.negative      = (value >> 31) & 1;
206          srp->ieee.exponent      = (value >> 23) & FP_SINGLE_EXP_MAX;
# Line 264 | Line 266 | PRIVATE inline uae_u32 FFPU extract_sing
266   // to_exten
267   PRIVATE inline fpu_register FFPU make_extended(uae_u32 wrd1, uae_u32 wrd2, uae_u32 wrd3)
268   {
269 < #if 1
270 <        // FIXME: USE_QUAD_DOUBLE
271 <        fpu_extended result;
269 >        // is it zero?
270 >        if ((wrd1 & 0x7fff0000) == 0 && wrd2 == 0 && wrd3 == 0)
271 >                return 0.0;
272 >
273 >        fpu_register result;
274 > #if USE_QUAD_DOUBLE
275 >        // is it NaN?
276 >        if ((wrd1 & 0x7fff0000) == 0x7fff0000 && wrd2 != 0 && wrd3 != 0) {
277 >                make_nan(result);
278 >                return result;
279 >        }
280 >        // is it inf?
281 >        if ((wrd1 & 0x7ffff000) == 0x7fff0000 && wrd2 == 0 && wrd3 == 0) {
282 >                if ((wrd1 & 0x80000000) == 0)
283 >                        make_inf_positive(result);
284 >                else
285 >                        make_inf_negative(result);
286 >                return result;
287 >        }
288 >        fp_declare_init_shape(srp, result, extended);
289 >        srp->ieee.negative  = (wrd1 >> 31) & 1;
290 >        srp->ieee.exponent  = (wrd1 >> 16) & FP_EXTENDED_EXP_MAX;
291 >        srp->ieee.mantissa0 = (wrd2 >> 16) & 0xffff;
292 >        srp->ieee.mantissa1 = ((wrd2 & 0xffff) << 16) | ((wrd3 >> 16) & 0xffff);
293 >        srp->ieee.mantissa2 = (wrd3 & 0xffff) << 16;
294 >        srp->ieee.mantissa3 = 0;
295 > #elif USE_LONG_DOUBLE
296          fp_declare_init_shape(srp, result, extended);
297          srp->ieee.negative      = (wrd1 >> 31) & 1;
298          srp->ieee.exponent      = (wrd1 >> 16) & FP_EXTENDED_EXP_MAX;
299          srp->ieee.mantissa0     = wrd2;
300          srp->ieee.mantissa1     = wrd3;
301 <        fpu_debug(("make_extended (%X,%X,%X) = %.04f\n",wrd1,wrd2,wrd3,(double)result));
302 <        return result;
303 < #elif 0 /* original code */
278 <        if ((wrd1 & 0x7fff0000) == 0 && wrd2 == 0 && wrd3 == 0)
279 <                return 0.0;
280 <        
281 <        fpu_register result;
282 <        uae_u32 *p = (uae_u32 *)&result;
283 <        
284 <        uae_u32 sign =  wrd1 & 0x80000000;
285 <        uae_u32 exp  = (wrd1 >> 16) & 0x7fff;
301 > #else
302 >        uae_u32 sgn = (wrd1 >> 31) & 1;
303 >        uae_u32 exp = (wrd1 >> 16) & 0x7fff;
304  
305 <        // The explicit integer bit is not set, must normalize.
306 <        if((wrd2 & 0x80000000) == 0) {
305 >        // the explicit integer bit is not set, must normalize
306 >        if ((wrd2 & 0x80000000) == 0) {
307                  fpu_debug(("make_extended denormalized mantissa (%X,%X,%X)\n",wrd1,wrd2,wrd3));
308 <                if( wrd2 | wrd3 ) {
308 >                if (wrd2 | wrd3) {
309                          // mantissa, not fraction.
310                          uae_u64 man = ((uae_u64)wrd2 << 32) | wrd3;
311 <                        while( exp > 0 && (man & UVAL64(0x8000000000000000)) == 0 ) {
311 >                        while (exp > 0 && (man & UVAL64(0x8000000000000000)) == 0) {
312                                  man <<= 1;
313                                  exp--;
314                          }
315 <                        wrd2 = (uae_u32)( man >> 32 );
316 <                        wrd3 = (uae_u32)( man & 0xFFFFFFFF );
299 <                } else {
300 <                        if(exp == 0x7FFF) {
301 <                                // Infinity.
302 <                        } else {
303 <                                // Zero
304 <                                exp = 16383 - 1023;
305 <                        }
315 >                        wrd2 = (uae_u32)(man >> 32);
316 >                        wrd3 = (uae_u32)(man & 0xFFFFFFFF);
317                  }
318 +                else if (exp != 0x7fff) // zero
319 +                        exp = FP_EXTENDED_EXP_BIAS - FP_DOUBLE_EXP_BIAS;
320          }
321  
322 <        if(exp < 16383 - 1023) {
310 <                // should set underflow.
322 >        if (exp < FP_EXTENDED_EXP_BIAS - FP_DOUBLE_EXP_BIAS)
323                  exp = 0;
324 <        } else if(exp > 16383 + 1023) {
325 <                // should set overflow.
326 <                exp = 2047;
327 <        } else {
328 <                exp = exp + 1023 - 16383;
329 <        }
330 <
331 <        // drop the explicit integer bit.
332 <        p[FLO] = (wrd2 << 21) | (wrd3 >> 11);
333 <        p[FHI] = sign | (exp << 20) | ((wrd2 & 0x7FFFFFFF) >> 11);
334 <
323 <        fpu_debug(("make_extended (%X,%X,%X) = %.04f\n",wrd1,wrd2,wrd3,(double)result));
324 <
325 <        return(result);
324 >        else if (exp > FP_EXTENDED_EXP_BIAS + FP_DOUBLE_EXP_BIAS)
325 >                exp = FP_DOUBLE_EXP_MAX;
326 >        else
327 >                exp += FP_DOUBLE_EXP_BIAS - FP_EXTENDED_EXP_BIAS;
328 >        
329 >        fp_declare_init_shape(srp, result, double);
330 >        srp->ieee.negative  = sgn;
331 >        srp->ieee.exponent  = exp;
332 >        // drop the explicit integer bit
333 >        srp->ieee.mantissa0 = (wrd2 & 0x7fffffff) >> 11;
334 >        srp->ieee.mantissa1 = (wrd2 << 21) | (wrd3 >> 11);
335   #endif
336 +        fpu_debug(("make_extended (%X,%X,%X) = %.04f\n",wrd1,wrd2,wrd3,(double)result));
337 +        return result;
338   }
339  
340   /*
# Line 335 | Line 346 | PRIVATE inline void FFPU make_extended_n
346          uae_u32 wrd1, uae_u32 wrd2, uae_u32 wrd3, fpu_register & result
347   )
348   {
349 < #if 1
350 <        // FIXME: USE_QUAD_DOUBLE
340 <        fp_declare_init_shape(srp, result, extended);
341 <        srp->ieee.negative      = (wrd1 & 0x80000000) != 0;
342 <        srp->ieee.exponent      = (wrd1 >> 16) & 0x7fff;
343 <        srp->ieee.mantissa0     = wrd2;
344 <        srp->ieee.mantissa1     = wrd3;
345 < #elif 0 /* original code */
346 <        // Is it zero?
347 <        if ((wrd1 & 0x7fff0000) == 0 && wrd2 == 0 && wrd3 == 0) {
349 >        // is it zero?
350 >        if ((wrd1 && 0x7fff0000) == 0 && wrd2 == 0 && wrd3 == 0) {
351                  make_zero_positive(result);
352                  return;
353          }
354 <
355 <        // Is it NaN?
356 <        if( (wrd1 & 0x7FFF0000) == 0x7FFF0000 ) {
357 <                if( (wrd1 & 0x0000FFFF) || wrd2 || wrd3 ) {
355 <                        make_nan(result);
356 <                        return;
357 <                }
354 >        // is it NaN?
355 >        if ((wrd1 & 0x7fff0000) == 0x7fff0000 && wrd2 != 0 && wrd3 != 0) {
356 >                make_nan(result);
357 >                return;
358          }
359 <        
360 <        uae_u32 sign =  wrd1 & 0x80000000;
361 <        uae_u32 exp  = (wrd1 >> 16) & 0x7fff;
362 <
363 <        if(exp < 16383 - 1023) {
364 <                // should set underflow.
365 <                exp = 0;
366 <        } else if(exp > 16383 + 1023) {
367 <                // should set overflow.
368 <                exp = 2047;
369 <        } else {
370 <                exp = exp + 1023 - 16383;
359 > #if USE_QUAD_DOUBLE
360 >        // is it inf?
361 >        if ((wrd1 & 0x7ffff000) == 0x7fff0000 && wrd2 == 0 && wrd3 == 0) {
362 >                if ((wrd1 & 0x80000000) == 0)
363 >                        make_inf_positive(result);
364 >                else
365 >                        make_inf_negative(result);
366 >                return;
367          }
368 <
369 <        // drop the explicit integer bit.
370 <        uae_u32 *p = (uae_u32 *)&result;
371 <        p[FLO] = (wrd2 << 21) | (wrd3 >> 11);
372 <        p[FHI] = sign | (exp << 20) | ((wrd2 & 0x7FFFFFFF) >> 11);
373 <
374 <        fpu_debug(("make_extended (%X,%X,%X) = %.04f\n",wrd1,wrd2,wrd3,(float)(*(double *)p)));
368 >        fp_declare_init_shape(srp, result, extended);
369 >        srp->ieee.negative  = (wrd1 >> 31) & 1;
370 >        srp->ieee.exponent  = (wrd1 >> 16) & FP_EXTENDED_EXP_MAX;
371 >        srp->ieee.mantissa0 = (wrd2 >> 16) & 0xffff;
372 >        srp->ieee.mantissa1 = ((wrd2 & 0xffff) << 16) | ((wrd3 >> 16) & 0xffff);
373 >        srp->ieee.mantissa2 = (wrd3 & 0xffff) << 16;
374 >        srp->ieee.mantissa3 = 0;
375 > #elif USE_LONG_DOUBLE
376 >        fp_declare_init_shape(srp, result, extended);
377 >        srp->ieee.negative      = (wrd1 >> 31) & 1;
378 >        srp->ieee.exponent      = (wrd1 >> 16) & FP_EXTENDED_EXP_MAX;
379 >        srp->ieee.mantissa0     = wrd2;
380 >        srp->ieee.mantissa1     = wrd3;
381 > #else
382 >        uae_u32 exp = (wrd1 >> 16) & 0x7fff;
383 >        if (exp < FP_EXTENDED_EXP_BIAS - FP_DOUBLE_EXP_BIAS)
384 >                exp = 0;
385 >        else if (exp > FP_EXTENDED_EXP_BIAS + FP_DOUBLE_EXP_BIAS)
386 >                exp = FP_DOUBLE_EXP_MAX;
387 >        else
388 >                exp += FP_DOUBLE_EXP_BIAS - FP_EXTENDED_EXP_BIAS;
389 >        
390 >        fp_declare_init_shape(srp, result, double);
391 >        srp->ieee.negative  = (wrd1 >> 31) & 1;
392 >        srp->ieee.exponent  = exp;
393 >        // drop the explicit integer bit
394 >        srp->ieee.mantissa0 = (wrd2 & 0x7fffffff) >> 11;
395 >        srp->ieee.mantissa1 = (wrd2 << 21) | (wrd3 >> 11);
396   #endif
397 +        fpu_debug(("make_extended (%X,%X,%X) = %.04f\n",wrd1,wrd2,wrd3,(double)result));
398   }
399  
400   // from_exten
# Line 384 | Line 402 | PRIVATE inline void FFPU extract_extende
402          uae_u32 * wrd1, uae_u32 * wrd2, uae_u32 * wrd3
403   )
404   {
387 #if 1
388        // FIXME: USE_QUAD_DOUBLE and non little-endian specificities
389        uae_u32 *p = (uae_u32 *)&src;
390        *wrd3 = p[0];
391        *wrd2 = p[1];
392        *wrd1 = ( (uae_u32)*((uae_u16 *)&p[2]) ) << 16;
393        fpu_debug(("extract_extended (%.04f) = %X,%X,%X\n",(double)src,*wrd1,*wrd2,*wrd3));
394 #elif 0 /* original code */
405          if (src == 0.0) {
406                  *wrd1 = *wrd2 = *wrd3 = 0;
407                  return;
408          }
409 <        
409 > #if USE_QUAD_DOUBLE
410 >        // FIXME: deal with denormals?
411 >        fp_declare_init_shape(srp, src, extended);
412 >        *wrd1 = (srp->ieee.negative << 31) | (srp->ieee.exponent << 16);
413 >        // always set the explicit integer bit.
414 >        *wrd2 = 0x80000000 | (srp->ieee.mantissa0 << 15) | ((srp->ieee.mantissa1 & 0xfffe0000) >> 17);
415 >        *wrd3 = (srp->ieee.mantissa1 << 15) | ((srp->ieee.mantissa2 & 0xfffe0000) >> 17);
416 > #elif USE_LONG_DOUBLE
417          uae_u32 *p = (uae_u32 *)&src;
418 <        
419 <        fpu_debug(("extract_extended (%X,%X)\n",p[FLO],p[FHI]));
418 > #ifdef WORDS_BIGENDIAN
419 >        *wrd1 = p[0];
420 >        *wrd2 = p[1];
421 >        *wrd3 = p[2];
422 > #else
423 >        *wrd3 = p[0];
424 >        *wrd2 = p[1];
425 >        *wrd1 = ( (uae_u32)*((uae_u16 *)&p[2]) ) << 16;
426 > #endif
427 > #else
428 >        fp_declare_init_shape(srp, src, double);
429 >        fpu_debug(("extract_extended (%d,%d,%X,%X)\n",
430 >                           srp->ieee.negative , srp->ieee.exponent,
431 >                           srp->ieee.mantissa0, srp->ieee.mantissa1));
432  
433 <        uae_u32 sign =  p[FHI] & 0x80000000;
433 >        uae_u32 exp = srp->ieee.exponent;
434  
435 <        uae_u32 exp  = ((p[FHI] >> 20) & 0x7ff);
436 <        // Check for maximum
437 <        if(exp == 0x7FF) {
438 <                exp = 0x7FFF;
410 <        } else {
411 <                exp  += 16383 - 1023;
412 <        }
435 >        if (exp == FP_DOUBLE_EXP_MAX)
436 >                exp = FP_EXTENDED_EXP_MAX;
437 >        else
438 >                exp += FP_EXTENDED_EXP_BIAS - FP_DOUBLE_EXP_BIAS;
439  
440 <        *wrd1 = sign | (exp << 16);
440 >        *wrd1 = (srp->ieee.negative << 31) | (exp << 16);
441          // always set the explicit integer bit.
442 <        *wrd2 = 0x80000000 | ((p[FHI] & 0x000FFFFF) << 11) | ((p[FLO] & 0xFFE00000) >> 21);
443 <        *wrd3 = p[FLO] << 11;
418 <
419 <        fpu_debug(("extract_extended (%.04f) = %X,%X,%X\n",(double)src,*wrd1,*wrd2,*wrd3));
442 >        *wrd2 = 0x80000000 | (srp->ieee.mantissa0 << 11) | ((srp->ieee.mantissa1 & 0xffe00000) >> 21);
443 >        *wrd3 = srp->ieee.mantissa1 << 11;
444   #endif
445 +        fpu_debug(("extract_extended (%.04f) = %X,%X,%X\n",(double)src,*wrd1,*wrd2,*wrd3));
446   }
447  
448   // to_double
# Line 425 | Line 450 | PRIVATE inline fpu_register FFPU make_do
450   {
451          union {
452                  fpu_double value;
453 <                uae_u32   parts[2];
453 >                uae_u32    parts[2];
454          } dest;
455   #ifdef WORDS_BIGENDIAN
456          dest.parts[0] = wrd1;
# Line 445 | Line 470 | PRIVATE inline void FFPU extract_double(
470   {
471          union {
472                  fpu_double value;
473 <                uae_u32   parts[2];
473 >                uae_u32    parts[2];
474          } dest;
475          dest.value = (fpu_double)src;
476   #ifdef WORDS_BIGENDIAN
# Line 1621 | Line 1646 | void FFPU fpuop_arithmetic(uae_u32 opcod
1646                                  FPU registers[reg] = 1.0e256;
1647                                  fpu_debug(("FP const: 1.0e256\n"));
1648                                  break;
1649 < #if USE_LONG_DOUBLE
1649 > #if USE_LONG_DOUBLE || USE_QUAD_DOUBLE
1650                          case 0x3c:
1651 <                                FPU registers[reg] = 1.0e512;
1651 >                                FPU registers[reg] = 1.0e512L;
1652                                  fpu_debug(("FP const: 1.0e512\n"));
1653                                  break;
1654                          case 0x3d:
1655 <                                FPU registers[reg] = 1.0e1024;
1655 >                                FPU registers[reg] = 1.0e1024L;
1656                                  fpu_debug(("FP const: 1.0e1024\n"));
1657                                  break;
1658                          case 0x3e:
1659 <                                FPU registers[reg] = 1.0e2048;
1659 >                                FPU registers[reg] = 1.0e2048L;
1660                                  fpu_debug(("FP const: 1.0e2048\n"));
1661                                  break;
1662                          case 0x3f:
1663 <                                FPU registers[reg] = 1.0e4096;
1663 >                                FPU registers[reg] = 1.0e4096L;
1664                                  fpu_debug(("FP const: 1.0e4096\n"));
1665   #endif
1666                                  break;
# Line 1989 | Line 2014 | void FFPU fpuop_arithmetic(uae_u32 opcod
2014  
2015                  case 0x26:              /* FSCALE */
2016                          fpu_debug(("FSCALE %.04f\n",(double)src));
2017 <
2018 <                        // TODO:
2019 <                        // Overflow, underflow
2020 <
1996 <                        if( isinf(FPU registers[reg]) ) {
1997 <                                make_nan( FPU registers[reg] );
1998 <                        }
1999 <                        else {
2017 >                        // TODO: overflow flags
2018 >                        get_dest_flags(FPU registers[reg]);
2019 >                        get_source_flags(src);
2020 >                        if (fl_source.in_range && fl_dest.in_range) {
2021                                  // When the absolute value of the source operand is >= 2^14,
2022                                  // an overflow or underflow always results.
2023                                  // Here (int) cast is okay.
2024 <                                fast_scale( FPU registers[reg], (int)fp_round_to_zero(src) );
2024 >                                int scale_factor = (int)fp_round_to_zero(src);
2025 > #if USE_LONG_DOUBLE || USE_QUAD_DOUBLE
2026 >                                fp_declare_init_shape(sxp, FPU registers[reg], extended);
2027 >                                sxp->ieee.exponent += scale_factor;
2028 > #else
2029 >                                fp_declare_init_shape(sxp, FPU registers[reg], double);
2030 >                                uae_u32 exp = sxp->ieee.exponent + scale_factor;
2031 >                                if (exp < FP_EXTENDED_EXP_BIAS - FP_DOUBLE_EXP_BIAS)
2032 >                                        exp = 0;
2033 >                                else if (exp > FP_EXTENDED_EXP_BIAS + FP_DOUBLE_EXP_BIAS)
2034 >                                        exp = FP_DOUBLE_EXP_MAX;
2035 >                                else
2036 >                                        exp += FP_DOUBLE_EXP_BIAS - FP_EXTENDED_EXP_BIAS;
2037 >                                sxp->ieee.exponent = exp;
2038 > #endif
2039 >                        }
2040 >                        else if (fl_source.infinity) {
2041 >                                // Returns NaN for any Infinity source
2042 >                                make_nan( FPU registers[reg] );
2043                          }
2044                          make_fpsr(FPU registers[reg]);
2045                          break;
# Line 2086 | Line 2125 | PUBLIC void FFPU fpu_init (bool integral
2125   #if defined(FPU_USE_X86_ROUNDING)
2126          // Initial state after boot, reset and frestore(null frame)
2127          x86_control_word = CW_INITIAL;
2128 < #elif defined(__i386__) && defined(X86_ASSEMBLY)
2128 > #elif defined(USE_X87_ASSEMBLY)
2129          volatile unsigned short int cw;
2130          __asm__ __volatile__("fnstcw %0" : "=m" (cw));
2131          cw &= ~0x0300; cw |= 0x0300; // CW_PC_EXTENDED

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines