ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/fpu/mathlib.h
Revision: 1.2
Committed: 2002-09-15T18:21:13Z (21 years, 9 months ago) by gbeauche
Content type: text/plain
Branch: MAIN
Changes since 1.1: +99 -40 lines
Log Message:
Fix "ieee" FPU core on big endian and without long double > double support
- Handle conversions to/from host double for m68k long doubles formats
- Make mathlib aware of sizeof(long double) == sizeof(double) arches
- Attempt to fix FSCALE implementation

File Contents

# User Rev Content
1 gbeauche 1.1 /*
2     * fpu/mathlib.h - Floating-point math support library
3     *
4     * Basilisk II (C) 1997-2001 Christian Bauer
5     *
6     * MC68881/68040 fpu emulation
7     *
8     * Original UAE FPU, copyright 1996 Herman ten Brugge
9     * Rewrite for x86, copyright 1999-2001 Lauri Pesonen
10     * New framework, copyright 2000-2001 Gwenole Beauchesne
11     * Adapted for JIT compilation (c) Bernd Meyer, 2000-2001
12     *
13     * This program is free software; you can redistribute it and/or modify
14     * it under the terms of the GNU General Public License as published by
15     * the Free Software Foundation; either version 2 of the License, or
16     * (at your option) any later version.
17     *
18     * This program is distributed in the hope that it will be useful,
19     * but WITHOUT ANY WARRANTY; without even the implied warranty of
20     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21     * GNU General Public License for more details.
22     *
23     * You should have received a copy of the GNU General Public License
24     * along with this program; if not, write to the Free Software
25     * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26     */
27    
28     #ifndef FPU_MATHLIB_H
29     #define FPU_MATHLIB_H
30    
31     /* NOTE: this file shall be included only from fpu/fpu_*.cpp */
32     #undef PUBLIC
33     #define PUBLIC extern
34    
35     #undef PRIVATE
36     #define PRIVATE static
37    
38     #undef FFPU
39     #define FFPU /**/
40    
41     #undef FPU
42     #define FPU fpu.
43    
44     // Define the following macro if branches are expensive. If so,
45     // integer-based isnan() and isinf() functions are implemented.
46     // TODO: move to Makefile.in
47     #define BRANCHES_ARE_EXPENSIVE 1
48    
49     // Use ISO C99 extended-precision math functions (glibc 2.1+)
50     #define FPU_USE_ISO_C99 1
51    
52     // NOTE: this is irrelevant on Win32 platforms since the MS libraries
53     // don't support extended-precision floating-point computations
54     #ifdef WIN32
55     #undef FPU_USE_ISO_C99
56     #endif
57    
58     // Use faster implementation of math functions, but this could cause
59     // some incorrect results (?)
60     // TODO: actually implement the slower but safer versions
61     #define FPU_FAST_MATH 1
62    
63     #if FPU_USE_ISO_C99
64     // NOTE: no prior <math.h> shall be included at this point
65     #define __USE_ISOC99 1 // for glibc 2.2.X and newer
66     #define __USE_ISOC9X 1 // for glibc 2.1.X
67     #include <math.h>
68     #else
69     #include <cmath>
70     using namespace std;
71     #endif
72    
73     /* -------------------------------------------------------------------------- */
74     /* --- Floating-point register types --- */
75     /* -------------------------------------------------------------------------- */
76    
77     // Single : S 8*E 23*F
78     #define FP_SINGLE_EXP_MAX 0xff
79     #define FP_SINGLE_EXP_BIAS 0x7f
80    
81     // Double : S 11*E 52*F
82     #define FP_DOUBLE_EXP_MAX 0x7ff
83     #define FP_DOUBLE_EXP_BIAS 0x3ff
84    
85     // Extended : S 15*E 64*F
86     #define FP_EXTENDED_EXP_MAX 0x7fff
87     #define FP_EXTENDED_EXP_BIAS 0x3fff
88    
89     // Zeroes : E = 0 & F = 0
90     // Infinities : E = MAX & F = 0
91     // Not-A-Number : E = MAX & F # 0
92    
93     /* -------------------------------------------------------------------------- */
94     /* --- Floating-point type shapes (IEEE-compliant) --- */
95     /* -------------------------------------------------------------------------- */
96    
97     // Taken from glibc 2.2.x: ieee754.h
98    
99     // IEEE-754 float format
100     union fpu_single_shape {
101    
102     fpu_single value;
103    
104     /* This is the IEEE 754 single-precision format. */
105     struct {
106     #if UAE_BYTE_ORDER == UAE_BIG_ENDIAN
107     unsigned int negative:1;
108     unsigned int exponent:8;
109     unsigned int mantissa:23;
110     #endif /* Big endian. */
111     #if UAE_BYTE_ORDER == UAE_LITTLE_ENDIAN
112     unsigned int mantissa:23;
113     unsigned int exponent:8;
114     unsigned int negative:1;
115     #endif /* Little endian. */
116     } ieee;
117    
118     /* This format makes it easier to see if a NaN is a signalling NaN. */
119     struct {
120     #if UAE_BYTE_ORDER == UAE_BIG_ENDIAN
121     unsigned int negative:1;
122     unsigned int exponent:8;
123     unsigned int quiet_nan:1;
124     unsigned int mantissa:22;
125     #endif /* Big endian. */
126     #if UAE_BYTE_ORDER == UAE_LITTLE_ENDIAN
127     unsigned int mantissa:22;
128     unsigned int quiet_nan:1;
129     unsigned int exponent:8;
130     unsigned int negative:1;
131     #endif /* Little endian. */
132     } ieee_nan;
133     };
134    
135     // IEEE-754 double format
136     union fpu_double_shape {
137     fpu_double value;
138    
139     /* This is the IEEE 754 double-precision format. */
140     struct {
141     #if UAE_BYTE_ORDER == UAE_BIG_ENDIAN
142     unsigned int negative:1;
143     unsigned int exponent:11;
144     /* Together these comprise the mantissa. */
145     unsigned int mantissa0:20;
146     unsigned int mantissa1:32;
147     #endif /* Big endian. */
148     #if UAE_BYTE_ORDER == UAE_LITTLE_ENDIAN
149     # if UAE_FLOAT_WORD_ORDER == UAE_BIG_ENDIAN
150     unsigned int mantissa0:20;
151     unsigned int exponent:11;
152     unsigned int negative:1;
153     unsigned int mantissa1:32;
154     # else
155     /* Together these comprise the mantissa. */
156     unsigned int mantissa1:32;
157     unsigned int mantissa0:20;
158     unsigned int exponent:11;
159     unsigned int negative:1;
160     # endif
161     #endif /* Little endian. */
162     } ieee;
163    
164     /* This format makes it easier to see if a NaN is a signalling NaN. */
165     struct {
166     #if UAE_BYTE_ORDER == UAE_BIG_ENDIAN
167     unsigned int negative:1;
168     unsigned int exponent:11;
169     unsigned int quiet_nan:1;
170     /* Together these comprise the mantissa. */
171     unsigned int mantissa0:19;
172     unsigned int mantissa1:32;
173     #else
174     # if UAE_FLOAT_WORD_ORDER == UAE_BIG_ENDIAN
175     unsigned int mantissa0:19;
176     unsigned int quiet_nan:1;
177     unsigned int exponent:11;
178     unsigned int negative:1;
179     unsigned int mantissa1:32;
180     # else
181     /* Together these comprise the mantissa. */
182     unsigned int mantissa1:32;
183     unsigned int mantissa0:19;
184     unsigned int quiet_nan:1;
185     unsigned int exponent:11;
186     unsigned int negative:1;
187     # endif
188     #endif
189     } ieee_nan;
190    
191 gbeauche 1.2 /* This format is used to extract the sign_exponent and mantissa parts only */
192     struct {
193     #if UAE_FLOAT_WORD_ORDER == UAE_BIG_ENDIAN
194     unsigned int msw:32;
195     unsigned int lsw:32;
196 gbeauche 1.1 #else
197 gbeauche 1.2 unsigned int lsw:32;
198     unsigned int msw:32;
199 gbeauche 1.1 #endif
200 gbeauche 1.2 } parts;
201     };
202 gbeauche 1.1
203 gbeauche 1.2 #ifdef USE_LONG_DOUBLE
204 gbeauche 1.1 // IEEE-854 long double format
205     union fpu_extended_shape {
206     fpu_extended value;
207    
208     /* This is the IEEE 854 double-extended-precision format. */
209     struct {
210     #if UAE_BYTE_ORDER == UAE_BIG_ENDIAN
211     unsigned int negative:1;
212     unsigned int exponent:15;
213     unsigned int empty:16;
214     unsigned int mantissa0:32;
215     unsigned int mantissa1:32;
216     #endif
217     #if UAE_BYTE_ORDER == UAE_LITTLE_ENDIAN
218     # if UAE_FLOAT_WORD_ORDER == UAE_BIG_ENDIAN
219     unsigned int exponent:15;
220     unsigned int negative:1;
221     unsigned int empty:16;
222     unsigned int mantissa0:32;
223     unsigned int mantissa1:32;
224     # else
225     unsigned int mantissa1:32;
226     unsigned int mantissa0:32;
227     unsigned int exponent:15;
228     unsigned int negative:1;
229     unsigned int empty:16;
230     # endif
231     #endif
232     } ieee;
233    
234     /* This is for NaNs in the IEEE 854 double-extended-precision format. */
235     struct {
236     #if UAE_BYTE_ORDER == UAE_BIG_ENDIAN
237     unsigned int negative:1;
238     unsigned int exponent:15;
239     unsigned int empty:16;
240     unsigned int one:1;
241     unsigned int quiet_nan:1;
242     unsigned int mantissa0:30;
243     unsigned int mantissa1:32;
244     #endif
245     #if UAE_BYTE_ORDER == UAE_LITTLE_ENDIAN
246     # if UAE_FLOAT_WORD_ORDER == UAE_BIG_ENDIAN
247     unsigned int exponent:15;
248     unsigned int negative:1;
249     unsigned int empty:16;
250     unsigned int mantissa0:30;
251     unsigned int quiet_nan:1;
252     unsigned int one:1;
253     unsigned int mantissa1:32;
254     # else
255     unsigned int mantissa1:32;
256     unsigned int mantissa0:30;
257     unsigned int quiet_nan:1;
258     unsigned int one:1;
259     unsigned int exponent:15;
260     unsigned int negative:1;
261     unsigned int empty:16;
262     # endif
263     #endif
264     } ieee_nan;
265    
266     /* This format is used to extract the sign_exponent and mantissa parts only */
267     struct {
268     #if UAE_FLOAT_WORD_ORDER == UAE_BIG_ENDIAN
269     unsigned int sign_exponent:16;
270     unsigned int empty:16;
271     unsigned int msw:32;
272     unsigned int lsw:32;
273     #else
274     unsigned int lsw:32;
275     unsigned int msw:32;
276     unsigned int sign_exponent:16;
277     unsigned int empty:16;
278     #endif
279     } parts;
280     };
281 gbeauche 1.2 #endif
282    
283     #ifdef USE_QUAD_DOUBLE
284 gbeauche 1.1 // IEEE-854 quad double format
285     union fpu_extended_shape {
286     fpu_extended value;
287    
288     /* This is the IEEE 854 quad-precision format. */
289     struct {
290     #if UAE_BYTE_ORDER == UAE_BIG_ENDIAN
291     unsigned int negative:1;
292     unsigned int exponent:15;
293     unsigned int mantissa0:16;
294     unsigned int mantissa1:32;
295     unsigned int mantissa2:32;
296     unsigned int mantissa3:32;
297     #else
298     unsigned int mantissa3:32;
299     unsigned int mantissa2:32;
300     unsigned int mantissa1:32;
301     unsigned int mantissa0:16;
302     unsigned int exponent:15;
303     unsigned int negative:1;
304     #endif
305     } ieee;
306    
307     /* This is for NaNs in the IEEE 854 quad-precision format. */
308     struct {
309     #if UAE_BYTE_ORDER == UAE_BIG_ENDIAN
310     unsigned int negative:1;
311     unsigned int exponent:15;
312     unsigned int quiet_nan:1;
313     unsigned int mantissa0:15;
314     unsigned int mantissa1:30;
315     unsigned int mantissa2:32;
316     unsigned int mantissa3:32;
317     #else
318     unsigned int mantissa3:32;
319     unsigned int mantissa2:32;
320     unsigned int mantissa1:32;
321     unsigned int mantissa0:15;
322     unsigned int quiet_nan:1;
323     unsigned int exponent:15;
324     unsigned int negative:1;
325     #endif
326     } ieee_nan;
327    
328     /* This format is used to extract the sign_exponent and mantissa parts only */
329     #if UAE_FLOAT_WORD_ORDER == UAE_BIG_ENDIAN
330     struct {
331     uae_u64 msw;
332     uae_u64 lsw;
333     } parts64;
334     struct {
335     uae_u32 w0;
336     uae_u32 w1;
337     uae_u32 w2;
338     uae_u32 w3;
339     } parts32;
340     #else
341     struct {
342     uae_u64 lsw;
343     uae_u64 msw;
344     } parts64;
345     struct {
346     uae_u32 w3;
347     uae_u32 w2;
348     uae_u32 w1;
349     uae_u32 w0;
350     } parts32;
351     #endif
352     };
353 gbeauche 1.2 #endif
354 gbeauche 1.1
355     // Declare and initialize a pointer to a shape of the requested FP type
356     #define fp_declare_init_shape(psvar, rfvar, ftype) \
357     fpu_ ## ftype ## _shape * psvar = (fpu_ ## ftype ## _shape *)( &rfvar )
358    
359     /* -------------------------------------------------------------------------- */
360     /* --- Extra Math Functions --- */
361     /* --- (most of them had to be defined before including <fpu/flags.h>) --- */
362     /* -------------------------------------------------------------------------- */
363    
364     #undef isnan
365     #if 0 && defined(HAVE_ISNANL)
366     # define isnan(x) isnanl((x))
367     #else
368     # define isnan(x) fp_do_isnan((x))
369     #endif
370    
371     PRIVATE inline bool FFPU fp_do_isnan(fpu_register const & r)
372     {
373 gbeauche 1.2 #ifdef BRANCHES_ARE_EXPENSIVE
374     #ifndef USE_LONG_DOUBLE
375     fp_declare_init_shape(sxp, r, double);
376     uae_s32 hx = sxp->parts.msw;
377     uae_s32 lx = sxp->parts.lsw;
378     hx &= 0x7fffffff;
379     hx |= (uae_u32)(lx | (-lx)) >> 31;
380     hx = 0x7ff00000 - hx;
381     return (int)(((uae_u32)hx) >> 31);
382     #elif USE_QUAD_DOUBLE
383 gbeauche 1.1 fp_declare_init_shape(sxp, r, extended);
384     uae_s64 hx = sxp->parts64.msw;
385     uae_s64 lx = sxp->parts64.lsw;
386     hx &= 0x7fffffffffffffffLL;
387     hx |= (uae_u64)(lx | (-lx)) >> 63;
388     hx = 0x7fff000000000000LL - hx;
389     return (int)((uae_u64)hx >> 63);
390     #else
391 gbeauche 1.2 fp_declare_init_shape(sxp, r, extended);
392 gbeauche 1.1 uae_s32 se = sxp->parts.sign_exponent;
393     uae_s32 hx = sxp->parts.msw;
394     uae_s32 lx = sxp->parts.lsw;
395     se = (se & 0x7fff) << 1;
396     lx |= hx & 0x7fffffff;
397     se |= (uae_u32)(lx | (-lx)) >> 31;
398     se = 0xfffe - se;
399     // TODO: check whether rshift count is 16 or 31
400     return (int)(((uae_u32)(se)) >> 16);
401     #endif
402     #else
403 gbeauche 1.2 #ifndef USE_LONG_DOUBLE
404     fp_declare_init_shape(sxp, r, double);
405     return (sxp->ieee_nan.exponent == FP_DOUBLE_EXP_MAX)
406     #else
407     fp_declare_init_shape(sxp, r, extended);
408 gbeauche 1.1 return (sxp->ieee_nan.exponent == FP_EXTENDED_EXP_MAX)
409 gbeauche 1.2 #endif
410 gbeauche 1.1 && (sxp->ieee_nan.mantissa0 != 0)
411     && (sxp->ieee_nan.mantissa1 != 0)
412     #ifdef USE_QUAD_DOUBLE
413     && (sxp->ieee_nan.mantissa2 != 0)
414     && (sxp->ieee_nan.mantissa3 != 0)
415     #endif
416     ;
417     #endif
418     }
419    
420     #undef isinf
421     #if 0 && defined(HAVE_ISINFL)
422     # define isinf(x) isinfl((x))
423     #else
424     # define isinf(x) fp_do_isinf((x))
425     #endif
426    
427     PRIVATE inline bool FFPU fp_do_isinf(fpu_register const & r)
428     {
429 gbeauche 1.2 #ifdef BRANCHES_ARE_EXPENSIVE
430     #ifndef USE_LONG_DOUBLE
431     fp_declare_init_shape(sxp, r, double);
432     uae_s32 hx = sxp->parts.msw;
433     uae_s32 lx = sxp->parts.lsw;
434     lx |= (hx & 0x7fffffff) ^ 0x7ff00000;
435     lx |= -lx;
436     return ~(lx >> 31) & (hx >> 30);
437     #elif USE_QUAD_DOUBLE
438 gbeauche 1.1 fp_declare_init_shape(sxp, r, extended);
439     uae_s64 hx = sxp->parts64.msw;
440     uae_s64 lx = sxp->parts64.lsw;
441     lx |= (hx & 0x7fffffffffffffffLL) ^ 0x7fff000000000000LL;
442     lx |= -lx;
443     return ~(lx >> 63) & (hx >> 62);
444     #else
445 gbeauche 1.2 fp_declare_init_shape(sxp, r, extended);
446 gbeauche 1.1 uae_s32 se = sxp->parts.sign_exponent;
447     uae_s32 hx = sxp->parts.msw;
448     uae_s32 lx = sxp->parts.lsw;
449     /* This additional ^ 0x80000000 is necessary because in Intel's
450     internal representation of the implicit one is explicit.
451     NOTE: anyway, this is equivalent to & 0x7fffffff in that case. */
452     #ifdef __i386__
453     lx |= (hx ^ 0x80000000) | ((se & 0x7fff) ^ 0x7fff);
454     #else
455     lx |= (hx & 0x7fffffff) | ((se & 0x7fff) ^ 0x7fff);
456     #endif
457     lx |= -lx;
458     se &= 0x8000;
459     return ~(lx >> 31) & (1 - (se >> 14));
460     #endif
461     #else
462 gbeauche 1.2 #ifndef USE_LONG_DOUBLE
463     fp_declare_init_shape(sxp, r, double);
464     return (sxp->ieee_nan.exponent == FP_DOUBLE_EXP_MAX)
465     #else
466     fp_declare_init_shape(sxp, r, extended);
467 gbeauche 1.1 return (sxp->ieee_nan.exponent == FP_EXTENDED_EXP_MAX)
468 gbeauche 1.2 #endif
469 gbeauche 1.1 && (sxp->ieee_nan.mantissa0 == 0)
470     && (sxp->ieee_nan.mantissa1 == 0)
471     #ifdef USE_QUAD_DOUBLE
472     && (sxp->ieee_nan.mantissa2 == 0)
473     && (sxp->ieee_nan.mantissa3 == 0)
474     #endif
475     ;
476     #endif
477     }
478    
479     #undef isneg
480     #define isneg(x) fp_do_isneg((x))
481    
482     PRIVATE inline bool FFPU fp_do_isneg(fpu_register const & r)
483     {
484 gbeauche 1.2 #ifndef USE_LONG_DOUBLE
485     fp_declare_init_shape(sxp, r, double);
486     #else
487 gbeauche 1.1 fp_declare_init_shape(sxp, r, extended);
488 gbeauche 1.2 #endif
489     return sxp->ieee.negative;
490 gbeauche 1.1 }
491    
492     #undef iszero
493     #define iszero(x) fp_do_iszero((x))
494    
495     PRIVATE inline bool FFPU fp_do_iszero(fpu_register const & r)
496     {
497     // TODO: BRANCHES_ARE_EXPENSIVE
498 gbeauche 1.2 #ifndef USE_LONG_DOUBLE
499     fp_declare_init_shape(sxp, r, double);
500     #else
501 gbeauche 1.1 fp_declare_init_shape(sxp, r, extended);
502 gbeauche 1.2 #endif
503 gbeauche 1.1 return (sxp->ieee.exponent == 0)
504     && (sxp->ieee.mantissa0 == 0)
505     && (sxp->ieee.mantissa1 == 0)
506     #ifdef USE_QUAD_DOUBLE
507     && (sxp->ieee.mantissa2 == 0)
508     && (sxp->ieee.mantissa3 == 0)
509     #endif
510     ;
511     }
512    
513     PRIVATE inline void FFPU get_dest_flags(fpu_register const & r)
514     {
515     fl_dest.negative = isneg(r);
516     fl_dest.zero = iszero(r);
517     fl_dest.infinity = isinf(r);
518     fl_dest.nan = isnan(r);
519     fl_dest.in_range = !fl_dest.zero && !fl_dest.infinity && !fl_dest.nan;
520     }
521    
522     PRIVATE inline void FFPU get_source_flags(fpu_register const & r)
523     {
524     fl_source.negative = isneg(r);
525     fl_source.zero = iszero(r);
526     fl_source.infinity = isinf(r);
527     fl_source.nan = isnan(r);
528     fl_source.in_range = !fl_source.zero && !fl_source.infinity && !fl_source.nan;
529     }
530    
531     PRIVATE inline void FFPU make_nan(fpu_register & r)
532     {
533     // FIXME: is that correct ?
534 gbeauche 1.2 #ifndef USE_LONG_DOUBLE
535     fp_declare_init_shape(sxp, r, double);
536     sxp->ieee.exponent = FP_DOUBLE_EXP_MAX;
537     sxp->ieee.mantissa0 = 0xfffff;
538     #else
539 gbeauche 1.1 fp_declare_init_shape(sxp, r, extended);
540     sxp->ieee.exponent = FP_EXTENDED_EXP_MAX;
541     sxp->ieee.mantissa0 = 0xffffffff;
542 gbeauche 1.2 #endif
543 gbeauche 1.1 sxp->ieee.mantissa1 = 0xffffffff;
544     #ifdef USE_QUAD_DOUBLE
545     sxp->ieee.mantissa2 = 0xffffffff;
546     sxp->ieee.mantissa3 = 0xffffffff;
547     #endif
548     }
549    
550     PRIVATE inline void FFPU make_zero_positive(fpu_register & r)
551     {
552     #if 1
553     r = +0.0;
554     #else
555 gbeauche 1.2 #ifndef USE_LONG_DOUBLE
556     fp_declare_init_shape(sxp, r, double);
557     #else
558 gbeauche 1.1 fp_declare_init_shape(sxp, r, extended);
559 gbeauche 1.2 #endif
560 gbeauche 1.1 sxp->ieee.negative = 0;
561     sxp->ieee.exponent = 0;
562     sxp->ieee.mantissa0 = 0;
563     sxp->ieee.mantissa1 = 0;
564     #ifdef USE_QUAD_DOUBLE
565     sxp->ieee.mantissa2 = 0;
566     sxp->ieee.mantissa3 = 0;
567     #endif
568     #endif
569     }
570    
571     PRIVATE inline void FFPU make_zero_negative(fpu_register & r)
572     {
573     #if 1
574     r = -0.0;
575     #else
576 gbeauche 1.2 #ifndef USE_LONG_DOUBLE
577     fp_declare_init_shape(sxp, r, double);
578     #else
579 gbeauche 1.1 fp_declare_init_shape(sxp, r, extended);
580 gbeauche 1.2 #endif
581 gbeauche 1.1 sxp->ieee.negative = 1;
582     sxp->ieee.exponent = 0;
583     sxp->ieee.mantissa0 = 0;
584     sxp->ieee.mantissa1 = 0;
585     #ifdef USE_QUAD_DOUBLE
586     sxp->ieee.mantissa2 = 0;
587     sxp->ieee.mantissa3 = 0;
588     #endif
589     #endif
590     }
591    
592     PRIVATE inline void FFPU make_inf_positive(fpu_register & r)
593     {
594 gbeauche 1.2 #ifndef USE_LONG_DOUBLE
595     fp_declare_init_shape(sxp, r, double);
596     sxp->ieee_nan.exponent = FP_DOUBLE_EXP_MAX;
597     #else
598 gbeauche 1.1 fp_declare_init_shape(sxp, r, extended);
599 gbeauche 1.2 sxp->ieee_nan.exponent = FP_EXTENDED_EXP_MAX;
600     #endif
601 gbeauche 1.1 sxp->ieee_nan.negative = 0;
602     sxp->ieee_nan.mantissa0 = 0;
603     sxp->ieee_nan.mantissa1 = 0;
604     #ifdef USE_QUAD_DOUBLE
605     sxp->ieee_nan.mantissa2 = 0;
606     sxp->ieee_nan.mantissa3 = 0;
607     #endif
608     }
609    
610     PRIVATE inline void FFPU make_inf_negative(fpu_register & r)
611     {
612 gbeauche 1.2 #ifndef USE_LONG_DOUBLE
613     fp_declare_init_shape(sxp, r, double);
614     sxp->ieee_nan.exponent = FP_DOUBLE_EXP_MAX;
615     #else
616 gbeauche 1.1 fp_declare_init_shape(sxp, r, extended);
617 gbeauche 1.2 sxp->ieee_nan.exponent = FP_EXTENDED_EXP_MAX;
618     #endif
619 gbeauche 1.1 sxp->ieee_nan.negative = 1;
620     sxp->ieee_nan.mantissa0 = 0;
621     sxp->ieee_nan.mantissa1 = 0;
622     #ifdef USE_QUAD_DOUBLE
623     sxp->ieee_nan.mantissa2 = 0;
624     sxp->ieee_nan.mantissa3 = 0;
625     #endif
626     }
627    
628     PRIVATE inline fpu_register FFPU fast_fgetexp(fpu_register const & r)
629     {
630 gbeauche 1.2 #ifndef USE_LONG_DOUBLE
631     fp_declare_init_shape(sxp, r, double);
632     return (sxp->ieee.exponent - FP_DOUBLE_EXP_BIAS);
633     #else
634 gbeauche 1.1 fp_declare_init_shape(sxp, r, extended);
635     return (sxp->ieee.exponent - FP_EXTENDED_EXP_BIAS);
636 gbeauche 1.2 #endif
637 gbeauche 1.1 }
638    
639     // Normalize to range 1..2
640     PRIVATE inline void FFPU fast_remove_exponent(fpu_register & r)
641     {
642 gbeauche 1.2 #ifndef USE_LONG_DOUBLE
643     fp_declare_init_shape(sxp, r, double);
644     sxp->ieee.exponent = FP_DOUBLE_EXP_BIAS;
645     #else
646 gbeauche 1.1 fp_declare_init_shape(sxp, r, extended);
647     sxp->ieee.exponent = FP_EXTENDED_EXP_BIAS;
648 gbeauche 1.2 #endif
649 gbeauche 1.1 }
650    
651     // The sign of the quotient is the exclusive-OR of the sign bits
652     // of the source and destination operands.
653     PRIVATE inline uae_u32 FFPU get_quotient_sign(fpu_register const & ra, fpu_register const & rb)
654     {
655 gbeauche 1.2 #ifndef USE_LONG_DOUBLE
656     fp_declare_init_shape(sap, ra, double);
657     fp_declare_init_shape(sbp, rb, double);
658     #else
659 gbeauche 1.1 fp_declare_init_shape(sap, ra, extended);
660     fp_declare_init_shape(sbp, rb, extended);
661 gbeauche 1.2 #endif
662     return ((sap->ieee.negative ^ sbp->ieee.negative) ? FPSR_QUOTIENT_SIGN : 0);
663 gbeauche 1.1 }
664    
665     /* -------------------------------------------------------------------------- */
666     /* --- Math functions --- */
667     /* -------------------------------------------------------------------------- */
668    
669 gbeauche 1.2 #if FPU_USE_ISO_C99 && USE_LONG_DOUBLE
670 gbeauche 1.1 # define fp_log logl
671     # define fp_log10 log10l
672     # define fp_exp expl
673     # define fp_pow powl
674     # define fp_fabs fabsl
675     # define fp_sqrt sqrtl
676     # define fp_sin sinl
677     # define fp_cos cosl
678     # define fp_tan tanl
679     # define fp_sinh sinhl
680     # define fp_cosh coshl
681     # define fp_tanh tanhl
682     # define fp_asin asinl
683     # define fp_acos acosl
684     # define fp_atan atanl
685     # define fp_asinh asinhl
686     # define fp_acosh acoshl
687     # define fp_atanh atanhl
688     # define fp_floor floorl
689     # define fp_ceil ceill
690     #else
691     # define fp_log log
692     # define fp_log10 log10
693     # define fp_exp exp
694     # define fp_pow pow
695     # define fp_fabs fabs
696     # define fp_sqrt sqrt
697     # define fp_sin sin
698     # define fp_cos cos
699     # define fp_tan tan
700     # define fp_sinh sinh
701     # define fp_cosh cosh
702     # define fp_tanh tanh
703     # define fp_asin asin
704     # define fp_acos acos
705     # define fp_atan atan
706     # define fp_asinh asinh
707     # define fp_acosh acosh
708     # define fp_atanh atanh
709     # define fp_floor floor
710     # define fp_ceil ceil
711     #endif
712    
713     #if defined(FPU_IEEE) && defined(X86_ASSEMBLY)
714     // Assembly optimized support functions. Taken from glibc 2.2.2
715    
716     #undef fp_log
717     #define fp_log fp_do_log
718    
719     #ifndef FPU_FAST_MATH
720     // FIXME: unimplemented
721     PRIVATE fpu_extended fp_do_log(fpu_extended x);
722     #else
723     PRIVATE inline fpu_extended fp_do_log(fpu_extended x)
724     {
725     fpu_extended value;
726     __asm__ __volatile__("fldln2; fxch; fyl2x" : "=t" (value) : "0" (x) : "st(1)");
727     return value;
728     }
729     #endif
730    
731     #undef fp_log10
732     #define fp_log10 fp_do_log10
733    
734     #ifndef FPU_FAST_MATH
735     // FIXME: unimplemented
736     PRIVATE fpu_extended fp_do_log10(fpu_extended x);
737     #else
738     PRIVATE inline fpu_extended fp_do_log10(fpu_extended x)
739     {
740     fpu_extended value;
741     __asm__ __volatile__("fldlg2; fxch; fyl2x" : "=t" (value) : "0" (x) : "st(1)");
742     return value;
743     }
744     #endif
745    
746     #undef fp_exp
747     #define fp_exp fp_do_exp
748    
749     #ifndef FPU_FAST_MATH
750     // FIXME: unimplemented
751     PRIVATE fpu_extended fp_do_exp(fpu_extended x);
752     #else
753     PRIVATE inline fpu_extended fp_do_exp(fpu_extended x)
754     {
755     fpu_extended value, exponent;
756     __asm__ __volatile__("fldl2e # e^x = 2^(x * log2(e))\n\t"
757     "fmul %%st(1) # x * log2(e)\n\t"
758     "fst %%st(1)\n\t"
759     "frndint # int(x * log2(e))\n\t"
760     "fxch\n\t"
761     "fsub %%st(1) # fract(x * log2(e))\n\t"
762     "f2xm1 # 2^(fract(x * log2(e))) - 1\n\t"
763     : "=t" (value), "=u" (exponent) : "0" (x));
764     value += 1.0;
765     __asm__ __volatile__("fscale" : "=t" (value) : "0" (value), "u" (exponent));
766     return value;
767     }
768     #endif
769    
770     #undef fp_pow
771     #define fp_pow fp_do_pow
772    
773     PRIVATE fpu_extended fp_do_pow(fpu_extended x, fpu_extended y);
774    
775     #undef fp_fabs
776     #define fp_fabs fp_do_fabs
777    
778     PRIVATE inline fpu_extended fp_do_fabs(fpu_extended x)
779     {
780     fpu_extended value;
781     __asm__ __volatile__("fabs" : "=t" (value) : "0" (x));
782     return value;
783     }
784    
785     #undef fp_sqrt
786     #define fp_sqrt fp_do_sqrt
787    
788     PRIVATE inline fpu_extended fp_do_sqrt(fpu_extended x)
789     {
790     fpu_extended value;
791     __asm__ __volatile__("fsqrt" : "=t" (value) : "0" (x));
792     return value;
793     }
794    
795     #undef fp_sin
796     #define fp_sin fp_do_sin
797    
798     PRIVATE inline fpu_extended fp_do_sin(fpu_extended x)
799     {
800     fpu_extended value;
801     __asm__ __volatile__("fsin" : "=t" (value) : "0" (x));
802     return value;
803     }
804    
805     #undef fp_cos
806     #define fp_cos fp_do_cos
807    
808     PRIVATE inline fpu_extended fp_do_cos(fpu_extended x)
809     {
810     fpu_extended value;
811     __asm__ __volatile__("fcos" : "=t" (value) : "0" (x));
812     return value;
813     }
814    
815     #undef fp_tan
816     #define fp_tan fp_do_tan
817    
818     PRIVATE inline fpu_extended fp_do_tan(fpu_extended x)
819     {
820     fpu_extended value;
821     __asm__ __volatile__("fptan" : "=t" (value) : "0" (x));
822     return value;
823     }
824    
825     #undef fp_expm1
826     #define fp_expm1 fp_do_expm1
827    
828     // Returns: exp(X) - 1.0
829     PRIVATE inline fpu_extended fp_do_expm1(fpu_extended x)
830     {
831     fpu_extended value, exponent, temp;
832     __asm__ __volatile__("fldl2e # e^x - 1 = 2^(x * log2(e)) - 1\n\t"
833     "fmul %%st(1) # x * log2(e)\n\t"
834     "fst %%st(1)\n\t"
835     "frndint # int(x * log2(e))\n\t"
836     "fxch\n\t"
837     "fsub %%st(1) # fract(x * log2(e))\n\t"
838     "f2xm1 # 2^(fract(x * log2(e))) - 1\n\t"
839     "fscale # 2^(x * log2(e)) - 2^(int(x * log2(e)))\n\t"
840     : "=t" (value), "=u" (exponent) : "0" (x));
841     __asm__ __volatile__("fscale" : "=t" (temp) : "0" (1.0), "u" (exponent));
842     temp -= 1.0;
843     return temp + value ? temp + value : x;
844     }
845    
846     #undef fp_sgn1
847     #define fp_sgn1 fp_do_sgn1
848    
849     PRIVATE inline fpu_extended fp_do_sgn1(fpu_extended x)
850     {
851     fp_declare_init_shape(sxp, x, extended);
852     sxp->ieee_nan.exponent = FP_EXTENDED_EXP_MAX;
853     sxp->ieee_nan.one = 1;
854     sxp->ieee_nan.quiet_nan = 0;
855     sxp->ieee_nan.mantissa0 = 0;
856     sxp->ieee_nan.mantissa1 = 0;
857     return x;
858     }
859    
860     #undef fp_sinh
861     #define fp_sinh fp_do_sinh
862    
863     #ifndef FPU_FAST_MATH
864     // FIXME: unimplemented
865     PRIVATE fpu_extended fp_do_sinh(fpu_extended x);
866     #else
867     PRIVATE inline fpu_extended fp_do_sinh(fpu_extended x)
868     {
869     fpu_extended exm1 = fp_expm1(fp_fabs(x));
870     return 0.5 * (exm1 / (exm1 + 1.0) + exm1) * fp_sgn1(x);
871     }
872     #endif
873    
874     #undef fp_cosh
875     #define fp_cosh fp_do_cosh
876    
877     #ifndef FPU_FAST_MATH
878     // FIXME: unimplemented
879     PRIVATE fpu_extended fp_do_cosh(fpu_extended x);
880     #else
881     PRIVATE inline fpu_extended fp_do_cosh(fpu_extended x)
882     {
883     fpu_extended ex = fp_exp(x);
884     return 0.5 * (ex + 1.0 / ex);
885     }
886     #endif
887    
888     #undef fp_tanh
889     #define fp_tanh fp_do_tanh
890    
891     #ifndef FPU_FAST_MATH
892     // FIXME: unimplemented
893     PRIVATE fpu_extended fp_do_tanh(fpu_extended x);
894     #else
895     PRIVATE inline fpu_extended fp_do_tanh(fpu_extended x)
896     {
897     fpu_extended exm1 = fp_expm1(-fp_fabs(x + x));
898     return exm1 / (exm1 + 2.0) * fp_sgn1(-x);
899     }
900     #endif
901    
902     #undef fp_atan2
903     #define fp_atan2 fp_do_atan2
904    
905     PRIVATE inline fpu_extended fp_do_atan2(fpu_extended y, fpu_extended x)
906     {
907     fpu_extended value;
908     __asm__ __volatile__("fpatan" : "=t" (value) : "0" (x), "u" (y) : "st(1)");
909     return value;
910     }
911    
912     #undef fp_asin
913     #define fp_asin fp_do_asin
914    
915     PRIVATE inline fpu_extended fp_do_asin(fpu_extended x)
916     {
917     return fp_atan2(x, fp_sqrt(1.0 - x * x));
918     }
919    
920     #undef fp_acos
921     #define fp_acos fp_do_acos
922    
923     PRIVATE inline fpu_extended fp_do_acos(fpu_extended x)
924     {
925     return fp_atan2(fp_sqrt(1.0 - x * x), x);
926     }
927    
928     #undef fp_atan
929     #define fp_atan fp_do_atan
930    
931     PRIVATE inline fpu_extended fp_do_atan(fpu_extended x)
932     {
933     fpu_extended value;
934     __asm__ __volatile__("fld1; fpatan" : "=t" (value) : "0" (x) : "st(1)");
935     return value;
936     }
937    
938     #undef fp_log1p
939     #define fp_log1p fp_do_log1p
940    
941     // Returns: ln(1.0 + X)
942     PRIVATE fpu_extended fp_do_log1p(fpu_extended x);
943    
944     #undef fp_asinh
945     #define fp_asinh fp_do_asinh
946    
947     PRIVATE inline fpu_extended fp_do_asinh(fpu_extended x)
948     {
949     fpu_extended y = fp_fabs(x);
950     return (fp_log1p(y * y / (fp_sqrt(y * y + 1.0) + 1.0) + y) * fp_sgn1(x));
951     }
952    
953     #undef fp_acosh
954     #define fp_acosh fp_do_acosh
955    
956     PRIVATE inline fpu_extended fp_do_acosh(fpu_extended x)
957     {
958     return fp_log(x + fp_sqrt(x - 1.0) * fp_sqrt(x + 1.0));
959     }
960    
961     #undef fp_atanh
962     #define fp_atanh fp_do_atanh
963    
964     PRIVATE inline fpu_extended fp_do_atanh(fpu_extended x)
965     {
966     fpu_extended y = fp_fabs(x);
967     return -0.5 * fp_log1p(-(y + y) / (1.0 + y)) * fp_sgn1(x);
968     }
969    
970     #undef fp_floor
971     #define fp_floor fp_do_floor
972    
973     PRIVATE inline fpu_extended fp_do_floor(fpu_extended x)
974     {
975     volatile unsigned int cw;
976     __asm__ __volatile__("fnstcw %0" : "=m" (cw));
977     volatile unsigned int cw_temp = (cw & 0xf3ff) | 0x0400; // rounding down
978     __asm__ __volatile__("fldcw %0" : : "m" (cw_temp));
979     fpu_extended value;
980     __asm__ __volatile__("frndint" : "=t" (value) : "0" (x));
981     __asm__ __volatile__("fldcw %0" : : "m" (cw));
982     return value;
983     }
984    
985     #undef fp_ceil
986     #define fp_ceil fp_do_ceil
987    
988     PRIVATE inline fpu_extended fp_do_ceil(fpu_extended x)
989     {
990     volatile unsigned int cw;
991     __asm__ __volatile__("fnstcw %0" : "=m" (cw));
992     volatile unsigned int cw_temp = (cw & 0xf3ff) | 0x0800; // rounding up
993     __asm__ __volatile__("fldcw %0" : : "m" (cw_temp));
994     fpu_extended value;
995     __asm__ __volatile__("frndint" : "=t" (value) : "0" (x));
996     __asm__ __volatile__("fldcw %0" : : "m" (cw));
997     return value;
998     }
999    
1000     #define DEFINE_ROUND_FUNC(rounding_mode_str, rounding_mode) \
1001     PRIVATE inline fpu_extended fp_do_round_to_ ## rounding_mode_str(fpu_extended x) \
1002     { \
1003     volatile unsigned int cw; \
1004     __asm__ __volatile__("fnstcw %0" : "=m" (cw)); \
1005     volatile unsigned int cw_temp = (cw & 0xf3ff) | (rounding_mode); \
1006     __asm__ __volatile__("fldcw %0" : : "m" (cw_temp)); \
1007     fpu_extended value; \
1008     __asm__ __volatile__("frndint" : "=t" (value) : "0" (x)); \
1009     __asm__ __volatile__("fldcw %0" : : "m" (cw)); \
1010     return value; \
1011     }
1012    
1013     #undef fp_round_to_minus_infinity
1014     #define fp_round_to_minus_infinity fp_do_round_to_minus_infinity
1015    
1016     DEFINE_ROUND_FUNC(minus_infinity, 0x400)
1017    
1018     #undef fp_round_to_plus_infinity
1019     #define fp_round_to_plus_infinity fp_do_round_to_plus_infinity
1020    
1021     DEFINE_ROUND_FUNC(plus_infinity, 0x800)
1022    
1023     #undef fp_round_to_zero
1024     #define fp_round_to_zero fp_do_round_to_zero
1025    
1026     DEFINE_ROUND_FUNC(zero, 0xc00)
1027    
1028     #undef fp_round_to_nearest
1029     #define fp_round_to_nearest fp_do_round_to_nearest
1030    
1031     DEFINE_ROUND_FUNC(nearest, 0x000)
1032    
1033     #endif /* X86_ASSEMBLY */
1034    
1035     #ifndef fp_round_to_minus_infinity
1036     #define fp_round_to_minus_infinity(x) fp_floor(x)
1037     #endif
1038    
1039     #ifndef fp_round_to_plus_infinity
1040     #define fp_round_to_plus_infinity(x) fp_ceil(x)
1041     #endif
1042    
1043     #ifndef fp_round_to_zero
1044     #define fp_round_to_zero(x) ((int)(x))
1045     #endif
1046    
1047     #ifndef fp_round_to_nearest
1048     #define fp_round_to_nearest(x) ((int)((x) + 0.5))
1049     #endif
1050    
1051     #endif /* FPU_MATHLIB_H */