ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/fpu/mathlib.h
Revision: 1.3
Committed: 2002-09-16T12:01:38Z (22 years, 2 months ago) by gbeauche
Content type: text/plain
Branch: MAIN
Changes since 1.2: +23 -28 lines
Log Message:
- FP endianness is now testing at configure time
- Fix junk introduced in previous rev for extract_extended()

File Contents

# User Rev Content
1 gbeauche 1.1 /*
2     * fpu/mathlib.h - Floating-point math support library
3     *
4     * Basilisk II (C) 1997-2001 Christian Bauer
5     *
6     * MC68881/68040 fpu emulation
7     *
8     * Original UAE FPU, copyright 1996 Herman ten Brugge
9     * Rewrite for x86, copyright 1999-2001 Lauri Pesonen
10     * New framework, copyright 2000-2001 Gwenole Beauchesne
11     * Adapted for JIT compilation (c) Bernd Meyer, 2000-2001
12     *
13     * This program is free software; you can redistribute it and/or modify
14     * it under the terms of the GNU General Public License as published by
15     * the Free Software Foundation; either version 2 of the License, or
16     * (at your option) any later version.
17     *
18     * This program is distributed in the hope that it will be useful,
19     * but WITHOUT ANY WARRANTY; without even the implied warranty of
20     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21     * GNU General Public License for more details.
22     *
23     * You should have received a copy of the GNU General Public License
24     * along with this program; if not, write to the Free Software
25     * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26     */
27    
28     #ifndef FPU_MATHLIB_H
29     #define FPU_MATHLIB_H
30    
31     /* NOTE: this file shall be included only from fpu/fpu_*.cpp */
32     #undef PUBLIC
33     #define PUBLIC extern
34    
35     #undef PRIVATE
36     #define PRIVATE static
37    
38     #undef FFPU
39     #define FFPU /**/
40    
41     #undef FPU
42     #define FPU fpu.
43    
44     // Define the following macro if branches are expensive. If so,
45     // integer-based isnan() and isinf() functions are implemented.
46     // TODO: move to Makefile.in
47     #define BRANCHES_ARE_EXPENSIVE 1
48    
49     // Use ISO C99 extended-precision math functions (glibc 2.1+)
50     #define FPU_USE_ISO_C99 1
51    
52     // NOTE: this is irrelevant on Win32 platforms since the MS libraries
53     // don't support extended-precision floating-point computations
54     #ifdef WIN32
55     #undef FPU_USE_ISO_C99
56     #endif
57    
58     // Use faster implementation of math functions, but this could cause
59     // some incorrect results (?)
60     // TODO: actually implement the slower but safer versions
61     #define FPU_FAST_MATH 1
62    
63     #if FPU_USE_ISO_C99
64     // NOTE: no prior <math.h> shall be included at this point
65     #define __USE_ISOC99 1 // for glibc 2.2.X and newer
66     #define __USE_ISOC9X 1 // for glibc 2.1.X
67     #include <math.h>
68     #else
69     #include <cmath>
70     using namespace std;
71     #endif
72    
73     /* -------------------------------------------------------------------------- */
74     /* --- Floating-point register types --- */
75     /* -------------------------------------------------------------------------- */
76    
77     // Single : S 8*E 23*F
78     #define FP_SINGLE_EXP_MAX 0xff
79     #define FP_SINGLE_EXP_BIAS 0x7f
80    
81     // Double : S 11*E 52*F
82     #define FP_DOUBLE_EXP_MAX 0x7ff
83     #define FP_DOUBLE_EXP_BIAS 0x3ff
84    
85     // Extended : S 15*E 64*F
86     #define FP_EXTENDED_EXP_MAX 0x7fff
87     #define FP_EXTENDED_EXP_BIAS 0x3fff
88    
89     // Zeroes : E = 0 & F = 0
90     // Infinities : E = MAX & F = 0
91     // Not-A-Number : E = MAX & F # 0
92    
93     /* -------------------------------------------------------------------------- */
94     /* --- Floating-point type shapes (IEEE-compliant) --- */
95     /* -------------------------------------------------------------------------- */
96    
97     // Taken from glibc 2.2.x: ieee754.h
98    
99     // IEEE-754 float format
100     union fpu_single_shape {
101    
102     fpu_single value;
103    
104     /* This is the IEEE 754 single-precision format. */
105     struct {
106 gbeauche 1.3 #ifdef WORDS_BIGENDIAN
107 gbeauche 1.1 unsigned int negative:1;
108     unsigned int exponent:8;
109     unsigned int mantissa:23;
110 gbeauche 1.3 #else
111 gbeauche 1.1 unsigned int mantissa:23;
112     unsigned int exponent:8;
113     unsigned int negative:1;
114 gbeauche 1.3 #endif
115 gbeauche 1.1 } ieee;
116    
117     /* This format makes it easier to see if a NaN is a signalling NaN. */
118     struct {
119 gbeauche 1.3 #ifdef WORDS_BIGENDIAN
120 gbeauche 1.1 unsigned int negative:1;
121     unsigned int exponent:8;
122     unsigned int quiet_nan:1;
123     unsigned int mantissa:22;
124 gbeauche 1.3 #else
125 gbeauche 1.1 unsigned int mantissa:22;
126     unsigned int quiet_nan:1;
127     unsigned int exponent:8;
128     unsigned int negative:1;
129 gbeauche 1.3 #endif
130 gbeauche 1.1 } ieee_nan;
131     };
132    
133     // IEEE-754 double format
134     union fpu_double_shape {
135     fpu_double value;
136    
137     /* This is the IEEE 754 double-precision format. */
138     struct {
139 gbeauche 1.3 #ifdef WORDS_BIGENDIAN
140 gbeauche 1.1 unsigned int negative:1;
141     unsigned int exponent:11;
142     /* Together these comprise the mantissa. */
143     unsigned int mantissa0:20;
144     unsigned int mantissa1:32;
145 gbeauche 1.3 #else
146     # if HOST_FLOAT_WORDS_BIG_ENDIAN
147 gbeauche 1.1 unsigned int mantissa0:20;
148     unsigned int exponent:11;
149     unsigned int negative:1;
150     unsigned int mantissa1:32;
151     # else
152     /* Together these comprise the mantissa. */
153     unsigned int mantissa1:32;
154     unsigned int mantissa0:20;
155     unsigned int exponent:11;
156     unsigned int negative:1;
157     # endif
158 gbeauche 1.3 #endif
159 gbeauche 1.1 } ieee;
160    
161     /* This format makes it easier to see if a NaN is a signalling NaN. */
162     struct {
163 gbeauche 1.3 #ifdef WORDS_BIGENDIAN
164 gbeauche 1.1 unsigned int negative:1;
165     unsigned int exponent:11;
166     unsigned int quiet_nan:1;
167     /* Together these comprise the mantissa. */
168     unsigned int mantissa0:19;
169     unsigned int mantissa1:32;
170     #else
171 gbeauche 1.3 # if HOST_FLOAT_WORDS_BIG_ENDIAN
172 gbeauche 1.1 unsigned int mantissa0:19;
173     unsigned int quiet_nan:1;
174     unsigned int exponent:11;
175     unsigned int negative:1;
176     unsigned int mantissa1:32;
177     # else
178     /* Together these comprise the mantissa. */
179     unsigned int mantissa1:32;
180     unsigned int mantissa0:19;
181     unsigned int quiet_nan:1;
182     unsigned int exponent:11;
183     unsigned int negative:1;
184     # endif
185     #endif
186     } ieee_nan;
187    
188 gbeauche 1.2 /* This format is used to extract the sign_exponent and mantissa parts only */
189     struct {
190 gbeauche 1.3 #if HOST_FLOAT_WORDS_BIG_ENDIAN
191 gbeauche 1.2 unsigned int msw:32;
192     unsigned int lsw:32;
193 gbeauche 1.1 #else
194 gbeauche 1.2 unsigned int lsw:32;
195     unsigned int msw:32;
196 gbeauche 1.1 #endif
197 gbeauche 1.2 } parts;
198     };
199 gbeauche 1.1
200 gbeauche 1.2 #ifdef USE_LONG_DOUBLE
201 gbeauche 1.1 // IEEE-854 long double format
202     union fpu_extended_shape {
203     fpu_extended value;
204    
205     /* This is the IEEE 854 double-extended-precision format. */
206     struct {
207 gbeauche 1.3 #ifdef WORDS_BIGENDIAN
208 gbeauche 1.1 unsigned int negative:1;
209     unsigned int exponent:15;
210     unsigned int empty:16;
211     unsigned int mantissa0:32;
212     unsigned int mantissa1:32;
213 gbeauche 1.3 #else
214     # if HOST_FLOAT_WORDS_BIG_ENDIAN
215 gbeauche 1.1 unsigned int exponent:15;
216     unsigned int negative:1;
217     unsigned int empty:16;
218     unsigned int mantissa0:32;
219     unsigned int mantissa1:32;
220     # else
221     unsigned int mantissa1:32;
222     unsigned int mantissa0:32;
223     unsigned int exponent:15;
224     unsigned int negative:1;
225     unsigned int empty:16;
226     # endif
227     #endif
228     } ieee;
229    
230     /* This is for NaNs in the IEEE 854 double-extended-precision format. */
231     struct {
232 gbeauche 1.3 #ifdef WORDS_BIGENDIAN
233 gbeauche 1.1 unsigned int negative:1;
234     unsigned int exponent:15;
235     unsigned int empty:16;
236     unsigned int one:1;
237     unsigned int quiet_nan:1;
238     unsigned int mantissa0:30;
239     unsigned int mantissa1:32;
240 gbeauche 1.3 #else
241     # if HOST_FLOAT_WORDS_BIG_ENDIAN
242 gbeauche 1.1 unsigned int exponent:15;
243     unsigned int negative:1;
244     unsigned int empty:16;
245     unsigned int mantissa0:30;
246     unsigned int quiet_nan:1;
247     unsigned int one:1;
248     unsigned int mantissa1:32;
249     # else
250     unsigned int mantissa1:32;
251     unsigned int mantissa0:30;
252     unsigned int quiet_nan:1;
253     unsigned int one:1;
254     unsigned int exponent:15;
255     unsigned int negative:1;
256     unsigned int empty:16;
257     # endif
258     #endif
259     } ieee_nan;
260    
261     /* This format is used to extract the sign_exponent and mantissa parts only */
262     struct {
263 gbeauche 1.3 #if HOST_FLOAT_WORDS_BIG_ENDIAN
264 gbeauche 1.1 unsigned int sign_exponent:16;
265     unsigned int empty:16;
266     unsigned int msw:32;
267     unsigned int lsw:32;
268     #else
269     unsigned int lsw:32;
270     unsigned int msw:32;
271     unsigned int sign_exponent:16;
272     unsigned int empty:16;
273     #endif
274     } parts;
275     };
276 gbeauche 1.2 #endif
277    
278     #ifdef USE_QUAD_DOUBLE
279 gbeauche 1.1 // IEEE-854 quad double format
280     union fpu_extended_shape {
281     fpu_extended value;
282    
283     /* This is the IEEE 854 quad-precision format. */
284     struct {
285 gbeauche 1.3 #ifdef WORDS_BIGENDIAN
286 gbeauche 1.1 unsigned int negative:1;
287     unsigned int exponent:15;
288     unsigned int mantissa0:16;
289     unsigned int mantissa1:32;
290     unsigned int mantissa2:32;
291     unsigned int mantissa3:32;
292     #else
293     unsigned int mantissa3:32;
294     unsigned int mantissa2:32;
295     unsigned int mantissa1:32;
296     unsigned int mantissa0:16;
297     unsigned int exponent:15;
298     unsigned int negative:1;
299     #endif
300     } ieee;
301    
302     /* This is for NaNs in the IEEE 854 quad-precision format. */
303     struct {
304 gbeauche 1.3 #ifdef WORDS_BIGENDIAN
305 gbeauche 1.1 unsigned int negative:1;
306     unsigned int exponent:15;
307     unsigned int quiet_nan:1;
308     unsigned int mantissa0:15;
309     unsigned int mantissa1:30;
310     unsigned int mantissa2:32;
311     unsigned int mantissa3:32;
312     #else
313     unsigned int mantissa3:32;
314     unsigned int mantissa2:32;
315     unsigned int mantissa1:32;
316     unsigned int mantissa0:15;
317     unsigned int quiet_nan:1;
318     unsigned int exponent:15;
319     unsigned int negative:1;
320     #endif
321     } ieee_nan;
322    
323     /* This format is used to extract the sign_exponent and mantissa parts only */
324 gbeauche 1.3 #if HOST_FLOAT_WORDS_BIG_ENDIAN
325 gbeauche 1.1 struct {
326     uae_u64 msw;
327     uae_u64 lsw;
328     } parts64;
329     struct {
330     uae_u32 w0;
331     uae_u32 w1;
332     uae_u32 w2;
333     uae_u32 w3;
334     } parts32;
335     #else
336     struct {
337     uae_u64 lsw;
338     uae_u64 msw;
339     } parts64;
340     struct {
341     uae_u32 w3;
342     uae_u32 w2;
343     uae_u32 w1;
344     uae_u32 w0;
345     } parts32;
346     #endif
347     };
348 gbeauche 1.2 #endif
349 gbeauche 1.1
350     // Declare and initialize a pointer to a shape of the requested FP type
351     #define fp_declare_init_shape(psvar, rfvar, ftype) \
352     fpu_ ## ftype ## _shape * psvar = (fpu_ ## ftype ## _shape *)( &rfvar )
353    
354     /* -------------------------------------------------------------------------- */
355     /* --- Extra Math Functions --- */
356     /* --- (most of them had to be defined before including <fpu/flags.h>) --- */
357     /* -------------------------------------------------------------------------- */
358    
359     #undef isnan
360     #if 0 && defined(HAVE_ISNANL)
361     # define isnan(x) isnanl((x))
362     #else
363     # define isnan(x) fp_do_isnan((x))
364     #endif
365    
366     PRIVATE inline bool FFPU fp_do_isnan(fpu_register const & r)
367     {
368 gbeauche 1.2 #ifdef BRANCHES_ARE_EXPENSIVE
369     #ifndef USE_LONG_DOUBLE
370     fp_declare_init_shape(sxp, r, double);
371     uae_s32 hx = sxp->parts.msw;
372     uae_s32 lx = sxp->parts.lsw;
373     hx &= 0x7fffffff;
374     hx |= (uae_u32)(lx | (-lx)) >> 31;
375     hx = 0x7ff00000 - hx;
376     return (int)(((uae_u32)hx) >> 31);
377     #elif USE_QUAD_DOUBLE
378 gbeauche 1.1 fp_declare_init_shape(sxp, r, extended);
379     uae_s64 hx = sxp->parts64.msw;
380     uae_s64 lx = sxp->parts64.lsw;
381     hx &= 0x7fffffffffffffffLL;
382     hx |= (uae_u64)(lx | (-lx)) >> 63;
383     hx = 0x7fff000000000000LL - hx;
384     return (int)((uae_u64)hx >> 63);
385     #else
386 gbeauche 1.2 fp_declare_init_shape(sxp, r, extended);
387 gbeauche 1.1 uae_s32 se = sxp->parts.sign_exponent;
388     uae_s32 hx = sxp->parts.msw;
389     uae_s32 lx = sxp->parts.lsw;
390     se = (se & 0x7fff) << 1;
391     lx |= hx & 0x7fffffff;
392     se |= (uae_u32)(lx | (-lx)) >> 31;
393     se = 0xfffe - se;
394     // TODO: check whether rshift count is 16 or 31
395     return (int)(((uae_u32)(se)) >> 16);
396     #endif
397     #else
398 gbeauche 1.2 #ifndef USE_LONG_DOUBLE
399     fp_declare_init_shape(sxp, r, double);
400     return (sxp->ieee_nan.exponent == FP_DOUBLE_EXP_MAX)
401     #else
402     fp_declare_init_shape(sxp, r, extended);
403 gbeauche 1.1 return (sxp->ieee_nan.exponent == FP_EXTENDED_EXP_MAX)
404 gbeauche 1.2 #endif
405 gbeauche 1.1 && (sxp->ieee_nan.mantissa0 != 0)
406     && (sxp->ieee_nan.mantissa1 != 0)
407     #ifdef USE_QUAD_DOUBLE
408     && (sxp->ieee_nan.mantissa2 != 0)
409     && (sxp->ieee_nan.mantissa3 != 0)
410     #endif
411     ;
412     #endif
413     }
414    
415     #undef isinf
416     #if 0 && defined(HAVE_ISINFL)
417     # define isinf(x) isinfl((x))
418     #else
419     # define isinf(x) fp_do_isinf((x))
420     #endif
421    
422     PRIVATE inline bool FFPU fp_do_isinf(fpu_register const & r)
423     {
424 gbeauche 1.2 #ifdef BRANCHES_ARE_EXPENSIVE
425     #ifndef USE_LONG_DOUBLE
426     fp_declare_init_shape(sxp, r, double);
427     uae_s32 hx = sxp->parts.msw;
428     uae_s32 lx = sxp->parts.lsw;
429     lx |= (hx & 0x7fffffff) ^ 0x7ff00000;
430     lx |= -lx;
431     return ~(lx >> 31) & (hx >> 30);
432     #elif USE_QUAD_DOUBLE
433 gbeauche 1.1 fp_declare_init_shape(sxp, r, extended);
434     uae_s64 hx = sxp->parts64.msw;
435     uae_s64 lx = sxp->parts64.lsw;
436     lx |= (hx & 0x7fffffffffffffffLL) ^ 0x7fff000000000000LL;
437     lx |= -lx;
438     return ~(lx >> 63) & (hx >> 62);
439     #else
440 gbeauche 1.2 fp_declare_init_shape(sxp, r, extended);
441 gbeauche 1.1 uae_s32 se = sxp->parts.sign_exponent;
442     uae_s32 hx = sxp->parts.msw;
443     uae_s32 lx = sxp->parts.lsw;
444     /* This additional ^ 0x80000000 is necessary because in Intel's
445     internal representation of the implicit one is explicit.
446     NOTE: anyway, this is equivalent to & 0x7fffffff in that case. */
447     #ifdef __i386__
448     lx |= (hx ^ 0x80000000) | ((se & 0x7fff) ^ 0x7fff);
449     #else
450     lx |= (hx & 0x7fffffff) | ((se & 0x7fff) ^ 0x7fff);
451     #endif
452     lx |= -lx;
453     se &= 0x8000;
454     return ~(lx >> 31) & (1 - (se >> 14));
455     #endif
456     #else
457 gbeauche 1.2 #ifndef USE_LONG_DOUBLE
458     fp_declare_init_shape(sxp, r, double);
459     return (sxp->ieee_nan.exponent == FP_DOUBLE_EXP_MAX)
460     #else
461     fp_declare_init_shape(sxp, r, extended);
462 gbeauche 1.1 return (sxp->ieee_nan.exponent == FP_EXTENDED_EXP_MAX)
463 gbeauche 1.2 #endif
464 gbeauche 1.1 && (sxp->ieee_nan.mantissa0 == 0)
465     && (sxp->ieee_nan.mantissa1 == 0)
466     #ifdef USE_QUAD_DOUBLE
467     && (sxp->ieee_nan.mantissa2 == 0)
468     && (sxp->ieee_nan.mantissa3 == 0)
469     #endif
470     ;
471     #endif
472     }
473    
474     #undef isneg
475     #define isneg(x) fp_do_isneg((x))
476    
477     PRIVATE inline bool FFPU fp_do_isneg(fpu_register const & r)
478     {
479 gbeauche 1.2 #ifndef USE_LONG_DOUBLE
480     fp_declare_init_shape(sxp, r, double);
481     #else
482 gbeauche 1.1 fp_declare_init_shape(sxp, r, extended);
483 gbeauche 1.2 #endif
484     return sxp->ieee.negative;
485 gbeauche 1.1 }
486    
487     #undef iszero
488     #define iszero(x) fp_do_iszero((x))
489    
490     PRIVATE inline bool FFPU fp_do_iszero(fpu_register const & r)
491     {
492     // TODO: BRANCHES_ARE_EXPENSIVE
493 gbeauche 1.2 #ifndef USE_LONG_DOUBLE
494     fp_declare_init_shape(sxp, r, double);
495     #else
496 gbeauche 1.1 fp_declare_init_shape(sxp, r, extended);
497 gbeauche 1.2 #endif
498 gbeauche 1.1 return (sxp->ieee.exponent == 0)
499     && (sxp->ieee.mantissa0 == 0)
500     && (sxp->ieee.mantissa1 == 0)
501     #ifdef USE_QUAD_DOUBLE
502     && (sxp->ieee.mantissa2 == 0)
503     && (sxp->ieee.mantissa3 == 0)
504     #endif
505     ;
506     }
507    
508     PRIVATE inline void FFPU get_dest_flags(fpu_register const & r)
509     {
510     fl_dest.negative = isneg(r);
511     fl_dest.zero = iszero(r);
512     fl_dest.infinity = isinf(r);
513     fl_dest.nan = isnan(r);
514     fl_dest.in_range = !fl_dest.zero && !fl_dest.infinity && !fl_dest.nan;
515     }
516    
517     PRIVATE inline void FFPU get_source_flags(fpu_register const & r)
518     {
519     fl_source.negative = isneg(r);
520     fl_source.zero = iszero(r);
521     fl_source.infinity = isinf(r);
522     fl_source.nan = isnan(r);
523     fl_source.in_range = !fl_source.zero && !fl_source.infinity && !fl_source.nan;
524     }
525    
526     PRIVATE inline void FFPU make_nan(fpu_register & r)
527     {
528     // FIXME: is that correct ?
529 gbeauche 1.2 #ifndef USE_LONG_DOUBLE
530     fp_declare_init_shape(sxp, r, double);
531     sxp->ieee.exponent = FP_DOUBLE_EXP_MAX;
532     sxp->ieee.mantissa0 = 0xfffff;
533     #else
534 gbeauche 1.1 fp_declare_init_shape(sxp, r, extended);
535     sxp->ieee.exponent = FP_EXTENDED_EXP_MAX;
536     sxp->ieee.mantissa0 = 0xffffffff;
537 gbeauche 1.2 #endif
538 gbeauche 1.1 sxp->ieee.mantissa1 = 0xffffffff;
539     #ifdef USE_QUAD_DOUBLE
540     sxp->ieee.mantissa2 = 0xffffffff;
541     sxp->ieee.mantissa3 = 0xffffffff;
542     #endif
543     }
544    
545     PRIVATE inline void FFPU make_zero_positive(fpu_register & r)
546     {
547     #if 1
548     r = +0.0;
549     #else
550 gbeauche 1.2 #ifndef USE_LONG_DOUBLE
551     fp_declare_init_shape(sxp, r, double);
552     #else
553 gbeauche 1.1 fp_declare_init_shape(sxp, r, extended);
554 gbeauche 1.2 #endif
555 gbeauche 1.1 sxp->ieee.negative = 0;
556     sxp->ieee.exponent = 0;
557     sxp->ieee.mantissa0 = 0;
558     sxp->ieee.mantissa1 = 0;
559     #ifdef USE_QUAD_DOUBLE
560     sxp->ieee.mantissa2 = 0;
561     sxp->ieee.mantissa3 = 0;
562     #endif
563     #endif
564     }
565    
566     PRIVATE inline void FFPU make_zero_negative(fpu_register & r)
567     {
568     #if 1
569     r = -0.0;
570     #else
571 gbeauche 1.2 #ifndef USE_LONG_DOUBLE
572     fp_declare_init_shape(sxp, r, double);
573     #else
574 gbeauche 1.1 fp_declare_init_shape(sxp, r, extended);
575 gbeauche 1.2 #endif
576 gbeauche 1.1 sxp->ieee.negative = 1;
577     sxp->ieee.exponent = 0;
578     sxp->ieee.mantissa0 = 0;
579     sxp->ieee.mantissa1 = 0;
580     #ifdef USE_QUAD_DOUBLE
581     sxp->ieee.mantissa2 = 0;
582     sxp->ieee.mantissa3 = 0;
583     #endif
584     #endif
585     }
586    
587     PRIVATE inline void FFPU make_inf_positive(fpu_register & r)
588     {
589 gbeauche 1.2 #ifndef USE_LONG_DOUBLE
590     fp_declare_init_shape(sxp, r, double);
591     sxp->ieee_nan.exponent = FP_DOUBLE_EXP_MAX;
592     #else
593 gbeauche 1.1 fp_declare_init_shape(sxp, r, extended);
594 gbeauche 1.2 sxp->ieee_nan.exponent = FP_EXTENDED_EXP_MAX;
595     #endif
596 gbeauche 1.1 sxp->ieee_nan.negative = 0;
597     sxp->ieee_nan.mantissa0 = 0;
598     sxp->ieee_nan.mantissa1 = 0;
599     #ifdef USE_QUAD_DOUBLE
600     sxp->ieee_nan.mantissa2 = 0;
601     sxp->ieee_nan.mantissa3 = 0;
602     #endif
603     }
604    
605     PRIVATE inline void FFPU make_inf_negative(fpu_register & r)
606     {
607 gbeauche 1.2 #ifndef USE_LONG_DOUBLE
608     fp_declare_init_shape(sxp, r, double);
609     sxp->ieee_nan.exponent = FP_DOUBLE_EXP_MAX;
610     #else
611 gbeauche 1.1 fp_declare_init_shape(sxp, r, extended);
612 gbeauche 1.2 sxp->ieee_nan.exponent = FP_EXTENDED_EXP_MAX;
613     #endif
614 gbeauche 1.1 sxp->ieee_nan.negative = 1;
615     sxp->ieee_nan.mantissa0 = 0;
616     sxp->ieee_nan.mantissa1 = 0;
617     #ifdef USE_QUAD_DOUBLE
618     sxp->ieee_nan.mantissa2 = 0;
619     sxp->ieee_nan.mantissa3 = 0;
620     #endif
621     }
622    
623     PRIVATE inline fpu_register FFPU fast_fgetexp(fpu_register const & r)
624     {
625 gbeauche 1.2 #ifndef USE_LONG_DOUBLE
626     fp_declare_init_shape(sxp, r, double);
627     return (sxp->ieee.exponent - FP_DOUBLE_EXP_BIAS);
628     #else
629 gbeauche 1.1 fp_declare_init_shape(sxp, r, extended);
630     return (sxp->ieee.exponent - FP_EXTENDED_EXP_BIAS);
631 gbeauche 1.2 #endif
632 gbeauche 1.1 }
633    
634     // Normalize to range 1..2
635     PRIVATE inline void FFPU fast_remove_exponent(fpu_register & r)
636     {
637 gbeauche 1.2 #ifndef USE_LONG_DOUBLE
638     fp_declare_init_shape(sxp, r, double);
639     sxp->ieee.exponent = FP_DOUBLE_EXP_BIAS;
640     #else
641 gbeauche 1.1 fp_declare_init_shape(sxp, r, extended);
642     sxp->ieee.exponent = FP_EXTENDED_EXP_BIAS;
643 gbeauche 1.2 #endif
644 gbeauche 1.1 }
645    
646     // The sign of the quotient is the exclusive-OR of the sign bits
647     // of the source and destination operands.
648     PRIVATE inline uae_u32 FFPU get_quotient_sign(fpu_register const & ra, fpu_register const & rb)
649     {
650 gbeauche 1.2 #ifndef USE_LONG_DOUBLE
651     fp_declare_init_shape(sap, ra, double);
652     fp_declare_init_shape(sbp, rb, double);
653     #else
654 gbeauche 1.1 fp_declare_init_shape(sap, ra, extended);
655     fp_declare_init_shape(sbp, rb, extended);
656 gbeauche 1.2 #endif
657     return ((sap->ieee.negative ^ sbp->ieee.negative) ? FPSR_QUOTIENT_SIGN : 0);
658 gbeauche 1.1 }
659    
660     /* -------------------------------------------------------------------------- */
661     /* --- Math functions --- */
662     /* -------------------------------------------------------------------------- */
663    
664 gbeauche 1.2 #if FPU_USE_ISO_C99 && USE_LONG_DOUBLE
665 gbeauche 1.1 # define fp_log logl
666     # define fp_log10 log10l
667     # define fp_exp expl
668     # define fp_pow powl
669     # define fp_fabs fabsl
670     # define fp_sqrt sqrtl
671     # define fp_sin sinl
672     # define fp_cos cosl
673     # define fp_tan tanl
674     # define fp_sinh sinhl
675     # define fp_cosh coshl
676     # define fp_tanh tanhl
677     # define fp_asin asinl
678     # define fp_acos acosl
679     # define fp_atan atanl
680     # define fp_asinh asinhl
681     # define fp_acosh acoshl
682     # define fp_atanh atanhl
683     # define fp_floor floorl
684     # define fp_ceil ceill
685     #else
686     # define fp_log log
687     # define fp_log10 log10
688     # define fp_exp exp
689     # define fp_pow pow
690     # define fp_fabs fabs
691     # define fp_sqrt sqrt
692     # define fp_sin sin
693     # define fp_cos cos
694     # define fp_tan tan
695     # define fp_sinh sinh
696     # define fp_cosh cosh
697     # define fp_tanh tanh
698     # define fp_asin asin
699     # define fp_acos acos
700     # define fp_atan atan
701     # define fp_asinh asinh
702     # define fp_acosh acosh
703     # define fp_atanh atanh
704     # define fp_floor floor
705     # define fp_ceil ceil
706     #endif
707    
708     #if defined(FPU_IEEE) && defined(X86_ASSEMBLY)
709     // Assembly optimized support functions. Taken from glibc 2.2.2
710    
711     #undef fp_log
712     #define fp_log fp_do_log
713    
714     #ifndef FPU_FAST_MATH
715     // FIXME: unimplemented
716     PRIVATE fpu_extended fp_do_log(fpu_extended x);
717     #else
718     PRIVATE inline fpu_extended fp_do_log(fpu_extended x)
719     {
720     fpu_extended value;
721     __asm__ __volatile__("fldln2; fxch; fyl2x" : "=t" (value) : "0" (x) : "st(1)");
722     return value;
723     }
724     #endif
725    
726     #undef fp_log10
727     #define fp_log10 fp_do_log10
728    
729     #ifndef FPU_FAST_MATH
730     // FIXME: unimplemented
731     PRIVATE fpu_extended fp_do_log10(fpu_extended x);
732     #else
733     PRIVATE inline fpu_extended fp_do_log10(fpu_extended x)
734     {
735     fpu_extended value;
736     __asm__ __volatile__("fldlg2; fxch; fyl2x" : "=t" (value) : "0" (x) : "st(1)");
737     return value;
738     }
739     #endif
740    
741     #undef fp_exp
742     #define fp_exp fp_do_exp
743    
744     #ifndef FPU_FAST_MATH
745     // FIXME: unimplemented
746     PRIVATE fpu_extended fp_do_exp(fpu_extended x);
747     #else
748     PRIVATE inline fpu_extended fp_do_exp(fpu_extended x)
749     {
750     fpu_extended value, exponent;
751     __asm__ __volatile__("fldl2e # e^x = 2^(x * log2(e))\n\t"
752     "fmul %%st(1) # x * log2(e)\n\t"
753     "fst %%st(1)\n\t"
754     "frndint # int(x * log2(e))\n\t"
755     "fxch\n\t"
756     "fsub %%st(1) # fract(x * log2(e))\n\t"
757     "f2xm1 # 2^(fract(x * log2(e))) - 1\n\t"
758     : "=t" (value), "=u" (exponent) : "0" (x));
759     value += 1.0;
760     __asm__ __volatile__("fscale" : "=t" (value) : "0" (value), "u" (exponent));
761     return value;
762     }
763     #endif
764    
765     #undef fp_pow
766     #define fp_pow fp_do_pow
767    
768     PRIVATE fpu_extended fp_do_pow(fpu_extended x, fpu_extended y);
769    
770     #undef fp_fabs
771     #define fp_fabs fp_do_fabs
772    
773     PRIVATE inline fpu_extended fp_do_fabs(fpu_extended x)
774     {
775     fpu_extended value;
776     __asm__ __volatile__("fabs" : "=t" (value) : "0" (x));
777     return value;
778     }
779    
780     #undef fp_sqrt
781     #define fp_sqrt fp_do_sqrt
782    
783     PRIVATE inline fpu_extended fp_do_sqrt(fpu_extended x)
784     {
785     fpu_extended value;
786     __asm__ __volatile__("fsqrt" : "=t" (value) : "0" (x));
787     return value;
788     }
789    
790     #undef fp_sin
791     #define fp_sin fp_do_sin
792    
793     PRIVATE inline fpu_extended fp_do_sin(fpu_extended x)
794     {
795     fpu_extended value;
796     __asm__ __volatile__("fsin" : "=t" (value) : "0" (x));
797     return value;
798     }
799    
800     #undef fp_cos
801     #define fp_cos fp_do_cos
802    
803     PRIVATE inline fpu_extended fp_do_cos(fpu_extended x)
804     {
805     fpu_extended value;
806     __asm__ __volatile__("fcos" : "=t" (value) : "0" (x));
807     return value;
808     }
809    
810     #undef fp_tan
811     #define fp_tan fp_do_tan
812    
813     PRIVATE inline fpu_extended fp_do_tan(fpu_extended x)
814     {
815     fpu_extended value;
816     __asm__ __volatile__("fptan" : "=t" (value) : "0" (x));
817     return value;
818     }
819    
820     #undef fp_expm1
821     #define fp_expm1 fp_do_expm1
822    
823     // Returns: exp(X) - 1.0
824     PRIVATE inline fpu_extended fp_do_expm1(fpu_extended x)
825     {
826     fpu_extended value, exponent, temp;
827     __asm__ __volatile__("fldl2e # e^x - 1 = 2^(x * log2(e)) - 1\n\t"
828     "fmul %%st(1) # x * log2(e)\n\t"
829     "fst %%st(1)\n\t"
830     "frndint # int(x * log2(e))\n\t"
831     "fxch\n\t"
832     "fsub %%st(1) # fract(x * log2(e))\n\t"
833     "f2xm1 # 2^(fract(x * log2(e))) - 1\n\t"
834     "fscale # 2^(x * log2(e)) - 2^(int(x * log2(e)))\n\t"
835     : "=t" (value), "=u" (exponent) : "0" (x));
836     __asm__ __volatile__("fscale" : "=t" (temp) : "0" (1.0), "u" (exponent));
837     temp -= 1.0;
838     return temp + value ? temp + value : x;
839     }
840    
841     #undef fp_sgn1
842     #define fp_sgn1 fp_do_sgn1
843    
844     PRIVATE inline fpu_extended fp_do_sgn1(fpu_extended x)
845     {
846     fp_declare_init_shape(sxp, x, extended);
847     sxp->ieee_nan.exponent = FP_EXTENDED_EXP_MAX;
848     sxp->ieee_nan.one = 1;
849     sxp->ieee_nan.quiet_nan = 0;
850     sxp->ieee_nan.mantissa0 = 0;
851     sxp->ieee_nan.mantissa1 = 0;
852     return x;
853     }
854    
855     #undef fp_sinh
856     #define fp_sinh fp_do_sinh
857    
858     #ifndef FPU_FAST_MATH
859     // FIXME: unimplemented
860     PRIVATE fpu_extended fp_do_sinh(fpu_extended x);
861     #else
862     PRIVATE inline fpu_extended fp_do_sinh(fpu_extended x)
863     {
864     fpu_extended exm1 = fp_expm1(fp_fabs(x));
865     return 0.5 * (exm1 / (exm1 + 1.0) + exm1) * fp_sgn1(x);
866     }
867     #endif
868    
869     #undef fp_cosh
870     #define fp_cosh fp_do_cosh
871    
872     #ifndef FPU_FAST_MATH
873     // FIXME: unimplemented
874     PRIVATE fpu_extended fp_do_cosh(fpu_extended x);
875     #else
876     PRIVATE inline fpu_extended fp_do_cosh(fpu_extended x)
877     {
878     fpu_extended ex = fp_exp(x);
879     return 0.5 * (ex + 1.0 / ex);
880     }
881     #endif
882    
883     #undef fp_tanh
884     #define fp_tanh fp_do_tanh
885    
886     #ifndef FPU_FAST_MATH
887     // FIXME: unimplemented
888     PRIVATE fpu_extended fp_do_tanh(fpu_extended x);
889     #else
890     PRIVATE inline fpu_extended fp_do_tanh(fpu_extended x)
891     {
892     fpu_extended exm1 = fp_expm1(-fp_fabs(x + x));
893     return exm1 / (exm1 + 2.0) * fp_sgn1(-x);
894     }
895     #endif
896    
897     #undef fp_atan2
898     #define fp_atan2 fp_do_atan2
899    
900     PRIVATE inline fpu_extended fp_do_atan2(fpu_extended y, fpu_extended x)
901     {
902     fpu_extended value;
903     __asm__ __volatile__("fpatan" : "=t" (value) : "0" (x), "u" (y) : "st(1)");
904     return value;
905     }
906    
907     #undef fp_asin
908     #define fp_asin fp_do_asin
909    
910     PRIVATE inline fpu_extended fp_do_asin(fpu_extended x)
911     {
912     return fp_atan2(x, fp_sqrt(1.0 - x * x));
913     }
914    
915     #undef fp_acos
916     #define fp_acos fp_do_acos
917    
918     PRIVATE inline fpu_extended fp_do_acos(fpu_extended x)
919     {
920     return fp_atan2(fp_sqrt(1.0 - x * x), x);
921     }
922    
923     #undef fp_atan
924     #define fp_atan fp_do_atan
925    
926     PRIVATE inline fpu_extended fp_do_atan(fpu_extended x)
927     {
928     fpu_extended value;
929     __asm__ __volatile__("fld1; fpatan" : "=t" (value) : "0" (x) : "st(1)");
930     return value;
931     }
932    
933     #undef fp_log1p
934     #define fp_log1p fp_do_log1p
935    
936     // Returns: ln(1.0 + X)
937     PRIVATE fpu_extended fp_do_log1p(fpu_extended x);
938    
939     #undef fp_asinh
940     #define fp_asinh fp_do_asinh
941    
942     PRIVATE inline fpu_extended fp_do_asinh(fpu_extended x)
943     {
944     fpu_extended y = fp_fabs(x);
945     return (fp_log1p(y * y / (fp_sqrt(y * y + 1.0) + 1.0) + y) * fp_sgn1(x));
946     }
947    
948     #undef fp_acosh
949     #define fp_acosh fp_do_acosh
950    
951     PRIVATE inline fpu_extended fp_do_acosh(fpu_extended x)
952     {
953     return fp_log(x + fp_sqrt(x - 1.0) * fp_sqrt(x + 1.0));
954     }
955    
956     #undef fp_atanh
957     #define fp_atanh fp_do_atanh
958    
959     PRIVATE inline fpu_extended fp_do_atanh(fpu_extended x)
960     {
961     fpu_extended y = fp_fabs(x);
962     return -0.5 * fp_log1p(-(y + y) / (1.0 + y)) * fp_sgn1(x);
963     }
964    
965     #undef fp_floor
966     #define fp_floor fp_do_floor
967    
968     PRIVATE inline fpu_extended fp_do_floor(fpu_extended x)
969     {
970     volatile unsigned int cw;
971     __asm__ __volatile__("fnstcw %0" : "=m" (cw));
972     volatile unsigned int cw_temp = (cw & 0xf3ff) | 0x0400; // rounding down
973     __asm__ __volatile__("fldcw %0" : : "m" (cw_temp));
974     fpu_extended value;
975     __asm__ __volatile__("frndint" : "=t" (value) : "0" (x));
976     __asm__ __volatile__("fldcw %0" : : "m" (cw));
977     return value;
978     }
979    
980     #undef fp_ceil
981     #define fp_ceil fp_do_ceil
982    
983     PRIVATE inline fpu_extended fp_do_ceil(fpu_extended x)
984     {
985     volatile unsigned int cw;
986     __asm__ __volatile__("fnstcw %0" : "=m" (cw));
987     volatile unsigned int cw_temp = (cw & 0xf3ff) | 0x0800; // rounding up
988     __asm__ __volatile__("fldcw %0" : : "m" (cw_temp));
989     fpu_extended value;
990     __asm__ __volatile__("frndint" : "=t" (value) : "0" (x));
991     __asm__ __volatile__("fldcw %0" : : "m" (cw));
992     return value;
993     }
994    
995     #define DEFINE_ROUND_FUNC(rounding_mode_str, rounding_mode) \
996     PRIVATE inline fpu_extended fp_do_round_to_ ## rounding_mode_str(fpu_extended x) \
997     { \
998     volatile unsigned int cw; \
999     __asm__ __volatile__("fnstcw %0" : "=m" (cw)); \
1000     volatile unsigned int cw_temp = (cw & 0xf3ff) | (rounding_mode); \
1001     __asm__ __volatile__("fldcw %0" : : "m" (cw_temp)); \
1002     fpu_extended value; \
1003     __asm__ __volatile__("frndint" : "=t" (value) : "0" (x)); \
1004     __asm__ __volatile__("fldcw %0" : : "m" (cw)); \
1005     return value; \
1006     }
1007    
1008     #undef fp_round_to_minus_infinity
1009     #define fp_round_to_minus_infinity fp_do_round_to_minus_infinity
1010    
1011     DEFINE_ROUND_FUNC(minus_infinity, 0x400)
1012    
1013     #undef fp_round_to_plus_infinity
1014     #define fp_round_to_plus_infinity fp_do_round_to_plus_infinity
1015    
1016     DEFINE_ROUND_FUNC(plus_infinity, 0x800)
1017    
1018     #undef fp_round_to_zero
1019     #define fp_round_to_zero fp_do_round_to_zero
1020    
1021     DEFINE_ROUND_FUNC(zero, 0xc00)
1022    
1023     #undef fp_round_to_nearest
1024     #define fp_round_to_nearest fp_do_round_to_nearest
1025    
1026     DEFINE_ROUND_FUNC(nearest, 0x000)
1027    
1028     #endif /* X86_ASSEMBLY */
1029    
1030     #ifndef fp_round_to_minus_infinity
1031     #define fp_round_to_minus_infinity(x) fp_floor(x)
1032     #endif
1033    
1034     #ifndef fp_round_to_plus_infinity
1035     #define fp_round_to_plus_infinity(x) fp_ceil(x)
1036     #endif
1037    
1038     #ifndef fp_round_to_zero
1039     #define fp_round_to_zero(x) ((int)(x))
1040     #endif
1041    
1042     #ifndef fp_round_to_nearest
1043     #define fp_round_to_nearest(x) ((int)((x) + 0.5))
1044     #endif
1045    
1046     #endif /* FPU_MATHLIB_H */