ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/fpu/mathlib.h
Revision: 1.9
Committed: 2005-01-30T21:42:16Z (19 years, 10 months ago) by gbeauche
Content type: text/plain
Branch: MAIN
CVS Tags: nigel-build-19, nigel-build-17
Changes since 1.8: +1 -1 lines
Log Message:
Happy New Year!

File Contents

# User Rev Content
1 gbeauche 1.1 /*
2     * fpu/mathlib.h - Floating-point math support library
3     *
4 gbeauche 1.9 * Basilisk II (C) 1997-2005 Christian Bauer
5 gbeauche 1.1 *
6     * MC68881/68040 fpu emulation
7     *
8     * Original UAE FPU, copyright 1996 Herman ten Brugge
9     * Rewrite for x86, copyright 1999-2001 Lauri Pesonen
10     * New framework, copyright 2000-2001 Gwenole Beauchesne
11     * Adapted for JIT compilation (c) Bernd Meyer, 2000-2001
12     *
13     * This program is free software; you can redistribute it and/or modify
14     * it under the terms of the GNU General Public License as published by
15     * the Free Software Foundation; either version 2 of the License, or
16     * (at your option) any later version.
17     *
18     * This program is distributed in the hope that it will be useful,
19     * but WITHOUT ANY WARRANTY; without even the implied warranty of
20     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21     * GNU General Public License for more details.
22     *
23     * You should have received a copy of the GNU General Public License
24     * along with this program; if not, write to the Free Software
25     * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26     */
27    
28     #ifndef FPU_MATHLIB_H
29     #define FPU_MATHLIB_H
30    
31     /* NOTE: this file shall be included only from fpu/fpu_*.cpp */
32     #undef PUBLIC
33     #define PUBLIC extern
34    
35     #undef PRIVATE
36     #define PRIVATE static
37    
38     #undef FFPU
39     #define FFPU /**/
40    
41     #undef FPU
42     #define FPU fpu.
43    
44     // Define the following macro if branches are expensive. If so,
45     // integer-based isnan() and isinf() functions are implemented.
46     // TODO: move to Makefile.in
47     #define BRANCHES_ARE_EXPENSIVE 1
48    
49     // Use ISO C99 extended-precision math functions (glibc 2.1+)
50     #define FPU_USE_ISO_C99 1
51    
52     // NOTE: this is irrelevant on Win32 platforms since the MS libraries
53     // don't support extended-precision floating-point computations
54     #ifdef WIN32
55     #undef FPU_USE_ISO_C99
56     #endif
57    
58     // Use faster implementation of math functions, but this could cause
59     // some incorrect results (?)
60     // TODO: actually implement the slower but safer versions
61     #define FPU_FAST_MATH 1
62    
63     #if FPU_USE_ISO_C99
64     // NOTE: no prior <math.h> shall be included at this point
65     #define __USE_ISOC99 1 // for glibc 2.2.X and newer
66     #define __USE_ISOC9X 1 // for glibc 2.1.X
67     #include <math.h>
68     #else
69     #include <cmath>
70     using namespace std;
71     #endif
72    
73     /* -------------------------------------------------------------------------- */
74     /* --- Floating-point register types --- */
75     /* -------------------------------------------------------------------------- */
76    
77     // Single : S 8*E 23*F
78     #define FP_SINGLE_EXP_MAX 0xff
79     #define FP_SINGLE_EXP_BIAS 0x7f
80    
81     // Double : S 11*E 52*F
82     #define FP_DOUBLE_EXP_MAX 0x7ff
83     #define FP_DOUBLE_EXP_BIAS 0x3ff
84    
85     // Extended : S 15*E 64*F
86     #define FP_EXTENDED_EXP_MAX 0x7fff
87     #define FP_EXTENDED_EXP_BIAS 0x3fff
88    
89     // Zeroes : E = 0 & F = 0
90     // Infinities : E = MAX & F = 0
91     // Not-A-Number : E = MAX & F # 0
92    
93     /* -------------------------------------------------------------------------- */
94     /* --- Floating-point type shapes (IEEE-compliant) --- */
95     /* -------------------------------------------------------------------------- */
96    
97     // Taken from glibc 2.2.x: ieee754.h
98    
99     // IEEE-754 float format
100     union fpu_single_shape {
101    
102     fpu_single value;
103    
104     /* This is the IEEE 754 single-precision format. */
105     struct {
106 gbeauche 1.3 #ifdef WORDS_BIGENDIAN
107 gbeauche 1.1 unsigned int negative:1;
108     unsigned int exponent:8;
109     unsigned int mantissa:23;
110 gbeauche 1.3 #else
111 gbeauche 1.1 unsigned int mantissa:23;
112     unsigned int exponent:8;
113     unsigned int negative:1;
114 gbeauche 1.3 #endif
115 gbeauche 1.1 } ieee;
116    
117     /* This format makes it easier to see if a NaN is a signalling NaN. */
118     struct {
119 gbeauche 1.3 #ifdef WORDS_BIGENDIAN
120 gbeauche 1.1 unsigned int negative:1;
121     unsigned int exponent:8;
122     unsigned int quiet_nan:1;
123     unsigned int mantissa:22;
124 gbeauche 1.3 #else
125 gbeauche 1.1 unsigned int mantissa:22;
126     unsigned int quiet_nan:1;
127     unsigned int exponent:8;
128     unsigned int negative:1;
129 gbeauche 1.3 #endif
130 gbeauche 1.1 } ieee_nan;
131     };
132    
133     // IEEE-754 double format
134     union fpu_double_shape {
135     fpu_double value;
136    
137     /* This is the IEEE 754 double-precision format. */
138     struct {
139 gbeauche 1.3 #ifdef WORDS_BIGENDIAN
140 gbeauche 1.1 unsigned int negative:1;
141     unsigned int exponent:11;
142     /* Together these comprise the mantissa. */
143     unsigned int mantissa0:20;
144     unsigned int mantissa1:32;
145 gbeauche 1.3 #else
146     # if HOST_FLOAT_WORDS_BIG_ENDIAN
147 gbeauche 1.1 unsigned int mantissa0:20;
148     unsigned int exponent:11;
149     unsigned int negative:1;
150     unsigned int mantissa1:32;
151     # else
152     /* Together these comprise the mantissa. */
153     unsigned int mantissa1:32;
154     unsigned int mantissa0:20;
155     unsigned int exponent:11;
156     unsigned int negative:1;
157     # endif
158 gbeauche 1.3 #endif
159 gbeauche 1.1 } ieee;
160    
161     /* This format makes it easier to see if a NaN is a signalling NaN. */
162     struct {
163 gbeauche 1.3 #ifdef WORDS_BIGENDIAN
164 gbeauche 1.1 unsigned int negative:1;
165     unsigned int exponent:11;
166     unsigned int quiet_nan:1;
167     /* Together these comprise the mantissa. */
168     unsigned int mantissa0:19;
169     unsigned int mantissa1:32;
170     #else
171 gbeauche 1.3 # if HOST_FLOAT_WORDS_BIG_ENDIAN
172 gbeauche 1.1 unsigned int mantissa0:19;
173     unsigned int quiet_nan:1;
174     unsigned int exponent:11;
175     unsigned int negative:1;
176     unsigned int mantissa1:32;
177     # else
178     /* Together these comprise the mantissa. */
179     unsigned int mantissa1:32;
180     unsigned int mantissa0:19;
181     unsigned int quiet_nan:1;
182     unsigned int exponent:11;
183     unsigned int negative:1;
184     # endif
185     #endif
186     } ieee_nan;
187    
188 gbeauche 1.2 /* This format is used to extract the sign_exponent and mantissa parts only */
189     struct {
190 gbeauche 1.3 #if HOST_FLOAT_WORDS_BIG_ENDIAN
191 gbeauche 1.2 unsigned int msw:32;
192     unsigned int lsw:32;
193 gbeauche 1.1 #else
194 gbeauche 1.2 unsigned int lsw:32;
195     unsigned int msw:32;
196 gbeauche 1.1 #endif
197 gbeauche 1.2 } parts;
198     };
199 gbeauche 1.1
200 gbeauche 1.2 #ifdef USE_LONG_DOUBLE
201 gbeauche 1.1 // IEEE-854 long double format
202     union fpu_extended_shape {
203     fpu_extended value;
204    
205     /* This is the IEEE 854 double-extended-precision format. */
206     struct {
207 gbeauche 1.3 #ifdef WORDS_BIGENDIAN
208 gbeauche 1.1 unsigned int negative:1;
209     unsigned int exponent:15;
210     unsigned int empty:16;
211     unsigned int mantissa0:32;
212     unsigned int mantissa1:32;
213 gbeauche 1.3 #else
214     # if HOST_FLOAT_WORDS_BIG_ENDIAN
215 gbeauche 1.1 unsigned int exponent:15;
216     unsigned int negative:1;
217     unsigned int empty:16;
218     unsigned int mantissa0:32;
219     unsigned int mantissa1:32;
220     # else
221     unsigned int mantissa1:32;
222     unsigned int mantissa0:32;
223     unsigned int exponent:15;
224     unsigned int negative:1;
225     unsigned int empty:16;
226     # endif
227     #endif
228     } ieee;
229    
230     /* This is for NaNs in the IEEE 854 double-extended-precision format. */
231     struct {
232 gbeauche 1.3 #ifdef WORDS_BIGENDIAN
233 gbeauche 1.1 unsigned int negative:1;
234     unsigned int exponent:15;
235     unsigned int empty:16;
236     unsigned int one:1;
237     unsigned int quiet_nan:1;
238     unsigned int mantissa0:30;
239     unsigned int mantissa1:32;
240 gbeauche 1.3 #else
241     # if HOST_FLOAT_WORDS_BIG_ENDIAN
242 gbeauche 1.1 unsigned int exponent:15;
243     unsigned int negative:1;
244     unsigned int empty:16;
245     unsigned int mantissa0:30;
246     unsigned int quiet_nan:1;
247     unsigned int one:1;
248     unsigned int mantissa1:32;
249     # else
250     unsigned int mantissa1:32;
251     unsigned int mantissa0:30;
252     unsigned int quiet_nan:1;
253     unsigned int one:1;
254     unsigned int exponent:15;
255     unsigned int negative:1;
256     unsigned int empty:16;
257     # endif
258     #endif
259     } ieee_nan;
260    
261     /* This format is used to extract the sign_exponent and mantissa parts only */
262     struct {
263 gbeauche 1.3 #if HOST_FLOAT_WORDS_BIG_ENDIAN
264 gbeauche 1.1 unsigned int sign_exponent:16;
265     unsigned int empty:16;
266     unsigned int msw:32;
267     unsigned int lsw:32;
268     #else
269     unsigned int lsw:32;
270     unsigned int msw:32;
271     unsigned int sign_exponent:16;
272     unsigned int empty:16;
273     #endif
274     } parts;
275     };
276 gbeauche 1.2 #endif
277    
278     #ifdef USE_QUAD_DOUBLE
279 gbeauche 1.1 // IEEE-854 quad double format
280     union fpu_extended_shape {
281     fpu_extended value;
282    
283     /* This is the IEEE 854 quad-precision format. */
284     struct {
285 gbeauche 1.3 #ifdef WORDS_BIGENDIAN
286 gbeauche 1.1 unsigned int negative:1;
287     unsigned int exponent:15;
288     unsigned int mantissa0:16;
289     unsigned int mantissa1:32;
290     unsigned int mantissa2:32;
291     unsigned int mantissa3:32;
292     #else
293     unsigned int mantissa3:32;
294     unsigned int mantissa2:32;
295     unsigned int mantissa1:32;
296     unsigned int mantissa0:16;
297     unsigned int exponent:15;
298     unsigned int negative:1;
299     #endif
300     } ieee;
301    
302     /* This is for NaNs in the IEEE 854 quad-precision format. */
303     struct {
304 gbeauche 1.3 #ifdef WORDS_BIGENDIAN
305 gbeauche 1.1 unsigned int negative:1;
306     unsigned int exponent:15;
307     unsigned int quiet_nan:1;
308     unsigned int mantissa0:15;
309     unsigned int mantissa1:30;
310     unsigned int mantissa2:32;
311     unsigned int mantissa3:32;
312     #else
313     unsigned int mantissa3:32;
314     unsigned int mantissa2:32;
315     unsigned int mantissa1:32;
316     unsigned int mantissa0:15;
317     unsigned int quiet_nan:1;
318     unsigned int exponent:15;
319     unsigned int negative:1;
320     #endif
321     } ieee_nan;
322    
323     /* This format is used to extract the sign_exponent and mantissa parts only */
324 gbeauche 1.3 #if HOST_FLOAT_WORDS_BIG_ENDIAN
325 gbeauche 1.1 struct {
326     uae_u64 msw;
327     uae_u64 lsw;
328     } parts64;
329     struct {
330     uae_u32 w0;
331     uae_u32 w1;
332     uae_u32 w2;
333     uae_u32 w3;
334     } parts32;
335     #else
336     struct {
337     uae_u64 lsw;
338     uae_u64 msw;
339     } parts64;
340     struct {
341     uae_u32 w3;
342     uae_u32 w2;
343     uae_u32 w1;
344     uae_u32 w0;
345     } parts32;
346     #endif
347     };
348 gbeauche 1.2 #endif
349 gbeauche 1.1
350     // Declare and initialize a pointer to a shape of the requested FP type
351     #define fp_declare_init_shape(psvar, rfvar, ftype) \
352     fpu_ ## ftype ## _shape * psvar = (fpu_ ## ftype ## _shape *)( &rfvar )
353    
354     /* -------------------------------------------------------------------------- */
355     /* --- Extra Math Functions --- */
356     /* --- (most of them had to be defined before including <fpu/flags.h>) --- */
357     /* -------------------------------------------------------------------------- */
358    
359     #undef isnan
360     #if 0 && defined(HAVE_ISNANL)
361     # define isnan(x) isnanl((x))
362     #else
363     # define isnan(x) fp_do_isnan((x))
364     #endif
365    
366     PRIVATE inline bool FFPU fp_do_isnan(fpu_register const & r)
367     {
368 gbeauche 1.2 #ifdef BRANCHES_ARE_EXPENSIVE
369     #ifndef USE_LONG_DOUBLE
370     fp_declare_init_shape(sxp, r, double);
371     uae_s32 hx = sxp->parts.msw;
372     uae_s32 lx = sxp->parts.lsw;
373     hx &= 0x7fffffff;
374     hx |= (uae_u32)(lx | (-lx)) >> 31;
375     hx = 0x7ff00000 - hx;
376     return (int)(((uae_u32)hx) >> 31);
377     #elif USE_QUAD_DOUBLE
378 gbeauche 1.1 fp_declare_init_shape(sxp, r, extended);
379     uae_s64 hx = sxp->parts64.msw;
380     uae_s64 lx = sxp->parts64.lsw;
381     hx &= 0x7fffffffffffffffLL;
382     hx |= (uae_u64)(lx | (-lx)) >> 63;
383     hx = 0x7fff000000000000LL - hx;
384     return (int)((uae_u64)hx >> 63);
385     #else
386 gbeauche 1.2 fp_declare_init_shape(sxp, r, extended);
387 gbeauche 1.1 uae_s32 se = sxp->parts.sign_exponent;
388     uae_s32 hx = sxp->parts.msw;
389     uae_s32 lx = sxp->parts.lsw;
390     se = (se & 0x7fff) << 1;
391     lx |= hx & 0x7fffffff;
392     se |= (uae_u32)(lx | (-lx)) >> 31;
393     se = 0xfffe - se;
394     // TODO: check whether rshift count is 16 or 31
395     return (int)(((uae_u32)(se)) >> 16);
396     #endif
397     #else
398 gbeauche 1.5 #if USE_LONG_DOUBLE || USE_QUAD_DOUBLE
399     fp_declare_init_shape(sxp, r, extended);
400     return (sxp->ieee_nan.exponent == FP_EXTENDED_EXP_MAX)
401     #else
402 gbeauche 1.2 fp_declare_init_shape(sxp, r, double);
403     return (sxp->ieee_nan.exponent == FP_DOUBLE_EXP_MAX)
404     #endif
405 gbeauche 1.1 && (sxp->ieee_nan.mantissa0 != 0)
406     && (sxp->ieee_nan.mantissa1 != 0)
407     #ifdef USE_QUAD_DOUBLE
408     && (sxp->ieee_nan.mantissa2 != 0)
409     && (sxp->ieee_nan.mantissa3 != 0)
410     #endif
411     ;
412     #endif
413     }
414    
415     #undef isinf
416     #if 0 && defined(HAVE_ISINFL)
417     # define isinf(x) isinfl((x))
418     #else
419     # define isinf(x) fp_do_isinf((x))
420     #endif
421    
422     PRIVATE inline bool FFPU fp_do_isinf(fpu_register const & r)
423     {
424 gbeauche 1.2 #ifdef BRANCHES_ARE_EXPENSIVE
425     #ifndef USE_LONG_DOUBLE
426     fp_declare_init_shape(sxp, r, double);
427     uae_s32 hx = sxp->parts.msw;
428     uae_s32 lx = sxp->parts.lsw;
429     lx |= (hx & 0x7fffffff) ^ 0x7ff00000;
430     lx |= -lx;
431     return ~(lx >> 31) & (hx >> 30);
432     #elif USE_QUAD_DOUBLE
433 gbeauche 1.1 fp_declare_init_shape(sxp, r, extended);
434     uae_s64 hx = sxp->parts64.msw;
435     uae_s64 lx = sxp->parts64.lsw;
436     lx |= (hx & 0x7fffffffffffffffLL) ^ 0x7fff000000000000LL;
437     lx |= -lx;
438     return ~(lx >> 63) & (hx >> 62);
439     #else
440 gbeauche 1.2 fp_declare_init_shape(sxp, r, extended);
441 gbeauche 1.1 uae_s32 se = sxp->parts.sign_exponent;
442     uae_s32 hx = sxp->parts.msw;
443     uae_s32 lx = sxp->parts.lsw;
444     /* This additional ^ 0x80000000 is necessary because in Intel's
445     internal representation of the implicit one is explicit.
446     NOTE: anyway, this is equivalent to & 0x7fffffff in that case. */
447     #ifdef __i386__
448     lx |= (hx ^ 0x80000000) | ((se & 0x7fff) ^ 0x7fff);
449     #else
450     lx |= (hx & 0x7fffffff) | ((se & 0x7fff) ^ 0x7fff);
451     #endif
452     lx |= -lx;
453     se &= 0x8000;
454     return ~(lx >> 31) & (1 - (se >> 14));
455     #endif
456     #else
457 gbeauche 1.5 #if USE_LONG_DOUBLE || USE_QUAD_DOUBLE
458     fp_declare_init_shape(sxp, r, extended);
459     return (sxp->ieee_nan.exponent == FP_EXTENDED_EXP_MAX)
460     #else
461 gbeauche 1.2 fp_declare_init_shape(sxp, r, double);
462     return (sxp->ieee_nan.exponent == FP_DOUBLE_EXP_MAX)
463     #endif
464 gbeauche 1.1 && (sxp->ieee_nan.mantissa0 == 0)
465     && (sxp->ieee_nan.mantissa1 == 0)
466     #ifdef USE_QUAD_DOUBLE
467     && (sxp->ieee_nan.mantissa2 == 0)
468     && (sxp->ieee_nan.mantissa3 == 0)
469     #endif
470     ;
471     #endif
472     }
473    
474     #undef isneg
475     #define isneg(x) fp_do_isneg((x))
476    
477     PRIVATE inline bool FFPU fp_do_isneg(fpu_register const & r)
478     {
479 gbeauche 1.5 #if USE_LONG_DOUBLE || USE_QUAD_DOUBLE
480     fp_declare_init_shape(sxp, r, extended);
481     #else
482 gbeauche 1.2 fp_declare_init_shape(sxp, r, double);
483     #endif
484     return sxp->ieee.negative;
485 gbeauche 1.1 }
486    
487     #undef iszero
488     #define iszero(x) fp_do_iszero((x))
489    
490     PRIVATE inline bool FFPU fp_do_iszero(fpu_register const & r)
491     {
492     // TODO: BRANCHES_ARE_EXPENSIVE
493 gbeauche 1.5 #if USE_LONG_DOUBLE || USE_QUAD_DOUBLE
494     fp_declare_init_shape(sxp, r, extended);
495     #else
496 gbeauche 1.2 fp_declare_init_shape(sxp, r, double);
497     #endif
498 gbeauche 1.1 return (sxp->ieee.exponent == 0)
499     && (sxp->ieee.mantissa0 == 0)
500     && (sxp->ieee.mantissa1 == 0)
501     #ifdef USE_QUAD_DOUBLE
502     && (sxp->ieee.mantissa2 == 0)
503     && (sxp->ieee.mantissa3 == 0)
504     #endif
505     ;
506     }
507    
508     PRIVATE inline void FFPU get_dest_flags(fpu_register const & r)
509     {
510     fl_dest.negative = isneg(r);
511     fl_dest.zero = iszero(r);
512     fl_dest.infinity = isinf(r);
513     fl_dest.nan = isnan(r);
514     fl_dest.in_range = !fl_dest.zero && !fl_dest.infinity && !fl_dest.nan;
515     }
516    
517     PRIVATE inline void FFPU get_source_flags(fpu_register const & r)
518     {
519     fl_source.negative = isneg(r);
520     fl_source.zero = iszero(r);
521     fl_source.infinity = isinf(r);
522     fl_source.nan = isnan(r);
523     fl_source.in_range = !fl_source.zero && !fl_source.infinity && !fl_source.nan;
524     }
525    
526     PRIVATE inline void FFPU make_nan(fpu_register & r)
527     {
528     // FIXME: is that correct ?
529 gbeauche 1.5 #if USE_LONG_DOUBLE || USE_QUAD_DOUBLE
530     fp_declare_init_shape(sxp, r, extended);
531     sxp->ieee.exponent = FP_EXTENDED_EXP_MAX;
532     sxp->ieee.mantissa0 = 0xffffffff;
533     #else
534 gbeauche 1.2 fp_declare_init_shape(sxp, r, double);
535     sxp->ieee.exponent = FP_DOUBLE_EXP_MAX;
536     sxp->ieee.mantissa0 = 0xfffff;
537     #endif
538 gbeauche 1.1 sxp->ieee.mantissa1 = 0xffffffff;
539     #ifdef USE_QUAD_DOUBLE
540     sxp->ieee.mantissa2 = 0xffffffff;
541     sxp->ieee.mantissa3 = 0xffffffff;
542     #endif
543     }
544    
545     PRIVATE inline void FFPU make_zero_positive(fpu_register & r)
546     {
547     #if 1
548     r = +0.0;
549     #else
550 gbeauche 1.5 #if USE_LONG_DOUBLE || USE_QUAD_DOUBLE
551     fp_declare_init_shape(sxp, r, extended);
552     #else
553 gbeauche 1.2 fp_declare_init_shape(sxp, r, double);
554     #endif
555 gbeauche 1.1 sxp->ieee.negative = 0;
556     sxp->ieee.exponent = 0;
557     sxp->ieee.mantissa0 = 0;
558     sxp->ieee.mantissa1 = 0;
559     #ifdef USE_QUAD_DOUBLE
560     sxp->ieee.mantissa2 = 0;
561     sxp->ieee.mantissa3 = 0;
562     #endif
563     #endif
564     }
565    
566     PRIVATE inline void FFPU make_zero_negative(fpu_register & r)
567     {
568     #if 1
569     r = -0.0;
570     #else
571 gbeauche 1.5 #if USE_LONG_DOUBLE || USE_QUAD_DOUBLE
572     fp_declare_init_shape(sxp, r, extended);
573     #else
574 gbeauche 1.2 fp_declare_init_shape(sxp, r, double);
575     #endif
576 gbeauche 1.1 sxp->ieee.negative = 1;
577     sxp->ieee.exponent = 0;
578     sxp->ieee.mantissa0 = 0;
579     sxp->ieee.mantissa1 = 0;
580     #ifdef USE_QUAD_DOUBLE
581     sxp->ieee.mantissa2 = 0;
582     sxp->ieee.mantissa3 = 0;
583     #endif
584     #endif
585     }
586    
587     PRIVATE inline void FFPU make_inf_positive(fpu_register & r)
588     {
589 gbeauche 1.5 #if USE_LONG_DOUBLE || USE_QUAD_DOUBLE
590     fp_declare_init_shape(sxp, r, extended);
591     sxp->ieee_nan.exponent = FP_EXTENDED_EXP_MAX;
592     #else
593 gbeauche 1.2 fp_declare_init_shape(sxp, r, double);
594     sxp->ieee_nan.exponent = FP_DOUBLE_EXP_MAX;
595     #endif
596 gbeauche 1.1 sxp->ieee_nan.negative = 0;
597     sxp->ieee_nan.mantissa0 = 0;
598     sxp->ieee_nan.mantissa1 = 0;
599     #ifdef USE_QUAD_DOUBLE
600     sxp->ieee_nan.mantissa2 = 0;
601     sxp->ieee_nan.mantissa3 = 0;
602     #endif
603     }
604    
605     PRIVATE inline void FFPU make_inf_negative(fpu_register & r)
606     {
607 gbeauche 1.5 #if USE_LONG_DOUBLE || USE_QUAD_DOUBLE
608     fp_declare_init_shape(sxp, r, extended);
609     sxp->ieee_nan.exponent = FP_EXTENDED_EXP_MAX;
610     #else
611 gbeauche 1.2 fp_declare_init_shape(sxp, r, double);
612     sxp->ieee_nan.exponent = FP_DOUBLE_EXP_MAX;
613     #endif
614 gbeauche 1.1 sxp->ieee_nan.negative = 1;
615     sxp->ieee_nan.mantissa0 = 0;
616     sxp->ieee_nan.mantissa1 = 0;
617     #ifdef USE_QUAD_DOUBLE
618     sxp->ieee_nan.mantissa2 = 0;
619     sxp->ieee_nan.mantissa3 = 0;
620     #endif
621     }
622    
623     PRIVATE inline fpu_register FFPU fast_fgetexp(fpu_register const & r)
624     {
625 gbeauche 1.5 #if USE_LONG_DOUBLE || USE_QUAD_DOUBLE
626     fp_declare_init_shape(sxp, r, extended);
627     return (sxp->ieee.exponent - FP_EXTENDED_EXP_BIAS);
628     #else
629 gbeauche 1.2 fp_declare_init_shape(sxp, r, double);
630     return (sxp->ieee.exponent - FP_DOUBLE_EXP_BIAS);
631     #endif
632 gbeauche 1.1 }
633    
634     // Normalize to range 1..2
635     PRIVATE inline void FFPU fast_remove_exponent(fpu_register & r)
636     {
637 gbeauche 1.5 #if USE_LONG_DOUBLE || USE_QUAD_DOUBLE
638     fp_declare_init_shape(sxp, r, extended);
639     sxp->ieee.exponent = FP_EXTENDED_EXP_BIAS;
640     #else
641 gbeauche 1.2 fp_declare_init_shape(sxp, r, double);
642     sxp->ieee.exponent = FP_DOUBLE_EXP_BIAS;
643     #endif
644 gbeauche 1.1 }
645    
646     // The sign of the quotient is the exclusive-OR of the sign bits
647     // of the source and destination operands.
648     PRIVATE inline uae_u32 FFPU get_quotient_sign(fpu_register const & ra, fpu_register const & rb)
649     {
650 gbeauche 1.5 #if USE_LONG_DOUBLE || USE_QUAD_DOUBLE
651     fp_declare_init_shape(sap, ra, extended);
652     fp_declare_init_shape(sbp, rb, extended);
653     #else
654 gbeauche 1.2 fp_declare_init_shape(sap, ra, double);
655     fp_declare_init_shape(sbp, rb, double);
656     #endif
657     return ((sap->ieee.negative ^ sbp->ieee.negative) ? FPSR_QUOTIENT_SIGN : 0);
658 gbeauche 1.1 }
659    
660     /* -------------------------------------------------------------------------- */
661     /* --- Math functions --- */
662     /* -------------------------------------------------------------------------- */
663    
664 gbeauche 1.5 #if FPU_USE_ISO_C99 && (USE_LONG_DOUBLE || USE_QUAD_DOUBLE)
665 gbeauche 1.4 # ifdef HAVE_LOGL
666     # define fp_log logl
667     # endif
668     # ifdef HAVE_LOG10L
669     # define fp_log10 log10l
670     # endif
671     # ifdef HAVE_EXPL
672     # define fp_exp expl
673     # endif
674     # ifdef HAVE_POWL
675     # define fp_pow powl
676     # endif
677     # ifdef HAVE_FABSL
678     # define fp_fabs fabsl
679     # endif
680     # ifdef HAVE_SQRTL
681     # define fp_sqrt sqrtl
682     # endif
683     # ifdef HAVE_SINL
684     # define fp_sin sinl
685     # endif
686     # ifdef HAVE_COSL
687     # define fp_cos cosl
688     # endif
689     # ifdef HAVE_TANL
690     # define fp_tan tanl
691     # endif
692     # ifdef HAVE_SINHL
693     # define fp_sinh sinhl
694     # endif
695     # ifdef HAVE_COSHL
696     # define fp_cosh coshl
697     # endif
698     # ifdef HAVE_TANHL
699     # define fp_tanh tanhl
700     # endif
701     # ifdef HAVE_ASINL
702     # define fp_asin asinl
703     # endif
704     # ifdef HAVE_ACOSL
705     # define fp_acos acosl
706     # endif
707     # ifdef HAVE_ATANL
708     # define fp_atan atanl
709     # endif
710     # ifdef HAVE_ASINHL
711     # define fp_asinh asinhl
712     # endif
713     # ifdef HAVE_ACOSHL
714     # define fp_acosh acoshl
715     # endif
716     # ifdef HAVE_ATANHL
717     # define fp_atanh atanhl
718     # endif
719     # ifdef HAVE_FLOORL
720     # define fp_floor floorl
721     # endif
722     # ifdef HAVE_CEILL
723     # define fp_ceil ceill
724     # endif
725     #endif
726    
727     #ifndef fp_log
728 gbeauche 1.1 # define fp_log log
729 gbeauche 1.4 #endif
730     #ifndef fp_log10
731 gbeauche 1.1 # define fp_log10 log10
732 gbeauche 1.4 #endif
733     #ifndef fp_exp
734 gbeauche 1.1 # define fp_exp exp
735 gbeauche 1.4 #endif
736     #ifndef fp_pow
737 gbeauche 1.1 # define fp_pow pow
738 gbeauche 1.4 #endif
739     #ifndef fp_fabs
740 gbeauche 1.1 # define fp_fabs fabs
741 gbeauche 1.4 #endif
742     #ifndef fp_sqrt
743 gbeauche 1.1 # define fp_sqrt sqrt
744 gbeauche 1.4 #endif
745     #ifndef fp_sin
746 gbeauche 1.1 # define fp_sin sin
747 gbeauche 1.4 #endif
748     #ifndef fp_cos
749 gbeauche 1.1 # define fp_cos cos
750 gbeauche 1.4 #endif
751     #ifndef fp_tan
752 gbeauche 1.1 # define fp_tan tan
753 gbeauche 1.4 #endif
754     #ifndef fp_sinh
755 gbeauche 1.1 # define fp_sinh sinh
756 gbeauche 1.4 #endif
757     #ifndef fp_cosh
758 gbeauche 1.1 # define fp_cosh cosh
759 gbeauche 1.4 #endif
760     #ifndef fp_tanh
761 gbeauche 1.1 # define fp_tanh tanh
762 gbeauche 1.4 #endif
763     #ifndef fp_asin
764 gbeauche 1.1 # define fp_asin asin
765 gbeauche 1.4 #endif
766     #ifndef fp_acos
767 gbeauche 1.1 # define fp_acos acos
768 gbeauche 1.4 #endif
769     #ifndef fp_atan
770 gbeauche 1.1 # define fp_atan atan
771 gbeauche 1.4 #endif
772     #ifndef fp_asinh
773 gbeauche 1.1 # define fp_asinh asinh
774 gbeauche 1.4 #endif
775     #ifndef fp_acosh
776 gbeauche 1.1 # define fp_acosh acosh
777 gbeauche 1.4 #endif
778     #ifndef fp_atanh
779 gbeauche 1.1 # define fp_atanh atanh
780 gbeauche 1.4 #endif
781     #ifndef fp_floor
782 gbeauche 1.1 # define fp_floor floor
783 gbeauche 1.4 #endif
784     #ifndef fp_ceil
785 gbeauche 1.1 # define fp_ceil ceil
786     #endif
787    
788 gbeauche 1.6 #if defined(FPU_IEEE) && defined(USE_X87_ASSEMBLY)
789 gbeauche 1.1 // Assembly optimized support functions. Taken from glibc 2.2.2
790    
791     #undef fp_log
792     #define fp_log fp_do_log
793    
794     #ifndef FPU_FAST_MATH
795     // FIXME: unimplemented
796     PRIVATE fpu_extended fp_do_log(fpu_extended x);
797     #else
798     PRIVATE inline fpu_extended fp_do_log(fpu_extended x)
799     {
800     fpu_extended value;
801     __asm__ __volatile__("fldln2; fxch; fyl2x" : "=t" (value) : "0" (x) : "st(1)");
802     return value;
803     }
804     #endif
805    
806     #undef fp_log10
807     #define fp_log10 fp_do_log10
808    
809     #ifndef FPU_FAST_MATH
810     // FIXME: unimplemented
811     PRIVATE fpu_extended fp_do_log10(fpu_extended x);
812     #else
813     PRIVATE inline fpu_extended fp_do_log10(fpu_extended x)
814     {
815     fpu_extended value;
816     __asm__ __volatile__("fldlg2; fxch; fyl2x" : "=t" (value) : "0" (x) : "st(1)");
817     return value;
818     }
819     #endif
820    
821     #undef fp_exp
822     #define fp_exp fp_do_exp
823    
824     #ifndef FPU_FAST_MATH
825     // FIXME: unimplemented
826     PRIVATE fpu_extended fp_do_exp(fpu_extended x);
827     #else
828     PRIVATE inline fpu_extended fp_do_exp(fpu_extended x)
829     {
830     fpu_extended value, exponent;
831     __asm__ __volatile__("fldl2e # e^x = 2^(x * log2(e))\n\t"
832     "fmul %%st(1) # x * log2(e)\n\t"
833     "fst %%st(1)\n\t"
834     "frndint # int(x * log2(e))\n\t"
835     "fxch\n\t"
836     "fsub %%st(1) # fract(x * log2(e))\n\t"
837     "f2xm1 # 2^(fract(x * log2(e))) - 1\n\t"
838     : "=t" (value), "=u" (exponent) : "0" (x));
839     value += 1.0;
840     __asm__ __volatile__("fscale" : "=t" (value) : "0" (value), "u" (exponent));
841     return value;
842     }
843     #endif
844    
845     #undef fp_pow
846     #define fp_pow fp_do_pow
847    
848     PRIVATE fpu_extended fp_do_pow(fpu_extended x, fpu_extended y);
849    
850     #undef fp_fabs
851     #define fp_fabs fp_do_fabs
852    
853     PRIVATE inline fpu_extended fp_do_fabs(fpu_extended x)
854     {
855     fpu_extended value;
856     __asm__ __volatile__("fabs" : "=t" (value) : "0" (x));
857     return value;
858     }
859    
860     #undef fp_sqrt
861     #define fp_sqrt fp_do_sqrt
862    
863     PRIVATE inline fpu_extended fp_do_sqrt(fpu_extended x)
864     {
865     fpu_extended value;
866     __asm__ __volatile__("fsqrt" : "=t" (value) : "0" (x));
867     return value;
868     }
869    
870     #undef fp_sin
871     #define fp_sin fp_do_sin
872    
873     PRIVATE inline fpu_extended fp_do_sin(fpu_extended x)
874     {
875     fpu_extended value;
876     __asm__ __volatile__("fsin" : "=t" (value) : "0" (x));
877     return value;
878     }
879    
880     #undef fp_cos
881     #define fp_cos fp_do_cos
882    
883     PRIVATE inline fpu_extended fp_do_cos(fpu_extended x)
884     {
885     fpu_extended value;
886     __asm__ __volatile__("fcos" : "=t" (value) : "0" (x));
887     return value;
888     }
889    
890     #undef fp_tan
891     #define fp_tan fp_do_tan
892    
893     PRIVATE inline fpu_extended fp_do_tan(fpu_extended x)
894     {
895     fpu_extended value;
896     __asm__ __volatile__("fptan" : "=t" (value) : "0" (x));
897     return value;
898     }
899    
900     #undef fp_expm1
901     #define fp_expm1 fp_do_expm1
902    
903     // Returns: exp(X) - 1.0
904     PRIVATE inline fpu_extended fp_do_expm1(fpu_extended x)
905     {
906     fpu_extended value, exponent, temp;
907     __asm__ __volatile__("fldl2e # e^x - 1 = 2^(x * log2(e)) - 1\n\t"
908     "fmul %%st(1) # x * log2(e)\n\t"
909     "fst %%st(1)\n\t"
910     "frndint # int(x * log2(e))\n\t"
911     "fxch\n\t"
912     "fsub %%st(1) # fract(x * log2(e))\n\t"
913     "f2xm1 # 2^(fract(x * log2(e))) - 1\n\t"
914     "fscale # 2^(x * log2(e)) - 2^(int(x * log2(e)))\n\t"
915     : "=t" (value), "=u" (exponent) : "0" (x));
916     __asm__ __volatile__("fscale" : "=t" (temp) : "0" (1.0), "u" (exponent));
917     temp -= 1.0;
918     return temp + value ? temp + value : x;
919     }
920    
921     #undef fp_sgn1
922     #define fp_sgn1 fp_do_sgn1
923    
924     PRIVATE inline fpu_extended fp_do_sgn1(fpu_extended x)
925     {
926 gbeauche 1.7 #if USE_LONG_DOUBLE || USE_QUAD_DOUBLE
927 gbeauche 1.1 fp_declare_init_shape(sxp, x, extended);
928     sxp->ieee_nan.exponent = FP_EXTENDED_EXP_MAX;
929     sxp->ieee_nan.one = 1;
930 gbeauche 1.7 #else
931     fp_declare_init_shape(sxp, x, double);
932     sxp->ieee_nan.exponent = FP_DOUBLE_EXP_MAX;
933     #endif
934 gbeauche 1.1 sxp->ieee_nan.quiet_nan = 0;
935     sxp->ieee_nan.mantissa0 = 0;
936     sxp->ieee_nan.mantissa1 = 0;
937     return x;
938     }
939    
940     #undef fp_sinh
941     #define fp_sinh fp_do_sinh
942    
943     #ifndef FPU_FAST_MATH
944     // FIXME: unimplemented
945     PRIVATE fpu_extended fp_do_sinh(fpu_extended x);
946     #else
947     PRIVATE inline fpu_extended fp_do_sinh(fpu_extended x)
948     {
949     fpu_extended exm1 = fp_expm1(fp_fabs(x));
950     return 0.5 * (exm1 / (exm1 + 1.0) + exm1) * fp_sgn1(x);
951     }
952     #endif
953    
954     #undef fp_cosh
955     #define fp_cosh fp_do_cosh
956    
957     #ifndef FPU_FAST_MATH
958     // FIXME: unimplemented
959     PRIVATE fpu_extended fp_do_cosh(fpu_extended x);
960     #else
961     PRIVATE inline fpu_extended fp_do_cosh(fpu_extended x)
962     {
963     fpu_extended ex = fp_exp(x);
964     return 0.5 * (ex + 1.0 / ex);
965     }
966     #endif
967    
968     #undef fp_tanh
969     #define fp_tanh fp_do_tanh
970    
971     #ifndef FPU_FAST_MATH
972     // FIXME: unimplemented
973     PRIVATE fpu_extended fp_do_tanh(fpu_extended x);
974     #else
975     PRIVATE inline fpu_extended fp_do_tanh(fpu_extended x)
976     {
977     fpu_extended exm1 = fp_expm1(-fp_fabs(x + x));
978     return exm1 / (exm1 + 2.0) * fp_sgn1(-x);
979     }
980     #endif
981    
982     #undef fp_atan2
983     #define fp_atan2 fp_do_atan2
984    
985     PRIVATE inline fpu_extended fp_do_atan2(fpu_extended y, fpu_extended x)
986     {
987     fpu_extended value;
988     __asm__ __volatile__("fpatan" : "=t" (value) : "0" (x), "u" (y) : "st(1)");
989     return value;
990     }
991    
992     #undef fp_asin
993     #define fp_asin fp_do_asin
994    
995     PRIVATE inline fpu_extended fp_do_asin(fpu_extended x)
996     {
997     return fp_atan2(x, fp_sqrt(1.0 - x * x));
998     }
999    
1000     #undef fp_acos
1001     #define fp_acos fp_do_acos
1002    
1003     PRIVATE inline fpu_extended fp_do_acos(fpu_extended x)
1004     {
1005     return fp_atan2(fp_sqrt(1.0 - x * x), x);
1006     }
1007    
1008     #undef fp_atan
1009     #define fp_atan fp_do_atan
1010    
1011     PRIVATE inline fpu_extended fp_do_atan(fpu_extended x)
1012     {
1013     fpu_extended value;
1014     __asm__ __volatile__("fld1; fpatan" : "=t" (value) : "0" (x) : "st(1)");
1015     return value;
1016     }
1017    
1018     #undef fp_log1p
1019     #define fp_log1p fp_do_log1p
1020    
1021     // Returns: ln(1.0 + X)
1022     PRIVATE fpu_extended fp_do_log1p(fpu_extended x);
1023    
1024     #undef fp_asinh
1025     #define fp_asinh fp_do_asinh
1026    
1027     PRIVATE inline fpu_extended fp_do_asinh(fpu_extended x)
1028     {
1029     fpu_extended y = fp_fabs(x);
1030     return (fp_log1p(y * y / (fp_sqrt(y * y + 1.0) + 1.0) + y) * fp_sgn1(x));
1031     }
1032    
1033     #undef fp_acosh
1034     #define fp_acosh fp_do_acosh
1035    
1036     PRIVATE inline fpu_extended fp_do_acosh(fpu_extended x)
1037     {
1038     return fp_log(x + fp_sqrt(x - 1.0) * fp_sqrt(x + 1.0));
1039     }
1040    
1041     #undef fp_atanh
1042     #define fp_atanh fp_do_atanh
1043    
1044     PRIVATE inline fpu_extended fp_do_atanh(fpu_extended x)
1045     {
1046     fpu_extended y = fp_fabs(x);
1047     return -0.5 * fp_log1p(-(y + y) / (1.0 + y)) * fp_sgn1(x);
1048     }
1049    
1050     #undef fp_floor
1051     #define fp_floor fp_do_floor
1052    
1053     PRIVATE inline fpu_extended fp_do_floor(fpu_extended x)
1054     {
1055     volatile unsigned int cw;
1056     __asm__ __volatile__("fnstcw %0" : "=m" (cw));
1057     volatile unsigned int cw_temp = (cw & 0xf3ff) | 0x0400; // rounding down
1058     __asm__ __volatile__("fldcw %0" : : "m" (cw_temp));
1059     fpu_extended value;
1060     __asm__ __volatile__("frndint" : "=t" (value) : "0" (x));
1061     __asm__ __volatile__("fldcw %0" : : "m" (cw));
1062     return value;
1063     }
1064    
1065     #undef fp_ceil
1066     #define fp_ceil fp_do_ceil
1067    
1068     PRIVATE inline fpu_extended fp_do_ceil(fpu_extended x)
1069     {
1070     volatile unsigned int cw;
1071     __asm__ __volatile__("fnstcw %0" : "=m" (cw));
1072     volatile unsigned int cw_temp = (cw & 0xf3ff) | 0x0800; // rounding up
1073     __asm__ __volatile__("fldcw %0" : : "m" (cw_temp));
1074     fpu_extended value;
1075     __asm__ __volatile__("frndint" : "=t" (value) : "0" (x));
1076     __asm__ __volatile__("fldcw %0" : : "m" (cw));
1077     return value;
1078     }
1079    
1080     #define DEFINE_ROUND_FUNC(rounding_mode_str, rounding_mode) \
1081     PRIVATE inline fpu_extended fp_do_round_to_ ## rounding_mode_str(fpu_extended x) \
1082     { \
1083     volatile unsigned int cw; \
1084     __asm__ __volatile__("fnstcw %0" : "=m" (cw)); \
1085     volatile unsigned int cw_temp = (cw & 0xf3ff) | (rounding_mode); \
1086     __asm__ __volatile__("fldcw %0" : : "m" (cw_temp)); \
1087     fpu_extended value; \
1088     __asm__ __volatile__("frndint" : "=t" (value) : "0" (x)); \
1089     __asm__ __volatile__("fldcw %0" : : "m" (cw)); \
1090     return value; \
1091     }
1092    
1093     #undef fp_round_to_minus_infinity
1094     #define fp_round_to_minus_infinity fp_do_round_to_minus_infinity
1095    
1096     DEFINE_ROUND_FUNC(minus_infinity, 0x400)
1097    
1098     #undef fp_round_to_plus_infinity
1099     #define fp_round_to_plus_infinity fp_do_round_to_plus_infinity
1100    
1101     DEFINE_ROUND_FUNC(plus_infinity, 0x800)
1102    
1103     #undef fp_round_to_zero
1104     #define fp_round_to_zero fp_do_round_to_zero
1105    
1106     DEFINE_ROUND_FUNC(zero, 0xc00)
1107    
1108     #undef fp_round_to_nearest
1109     #define fp_round_to_nearest fp_do_round_to_nearest
1110    
1111     DEFINE_ROUND_FUNC(nearest, 0x000)
1112    
1113 gbeauche 1.6 #endif /* USE_X87_ASSEMBLY */
1114 gbeauche 1.1
1115     #ifndef fp_round_to_minus_infinity
1116     #define fp_round_to_minus_infinity(x) fp_floor(x)
1117     #endif
1118    
1119     #ifndef fp_round_to_plus_infinity
1120     #define fp_round_to_plus_infinity(x) fp_ceil(x)
1121     #endif
1122    
1123     #ifndef fp_round_to_zero
1124     #define fp_round_to_zero(x) ((int)(x))
1125     #endif
1126    
1127     #ifndef fp_round_to_nearest
1128     #define fp_round_to_nearest(x) ((int)((x) + 0.5))
1129     #endif
1130    
1131     #endif /* FPU_MATHLIB_H */