ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/fpu/mathlib.h
Revision: 1.1
Committed: 2002-09-13T12:50:40Z (21 years, 9 months ago) by gbeauche
Content type: text/plain
Branch: MAIN
Log Message:
* Basilisk II JIT integration, phase 2:
- Add new FPU core architecture
- Clean fpu_x86_asm.h as it is no longer automatically generated

File Contents

# User Rev Content
1 gbeauche 1.1 /*
2     * fpu/mathlib.h - Floating-point math support library
3     *
4     * Basilisk II (C) 1997-2001 Christian Bauer
5     *
6     * MC68881/68040 fpu emulation
7     *
8     * Original UAE FPU, copyright 1996 Herman ten Brugge
9     * Rewrite for x86, copyright 1999-2001 Lauri Pesonen
10     * New framework, copyright 2000-2001 Gwenole Beauchesne
11     * Adapted for JIT compilation (c) Bernd Meyer, 2000-2001
12     *
13     * This program is free software; you can redistribute it and/or modify
14     * it under the terms of the GNU General Public License as published by
15     * the Free Software Foundation; either version 2 of the License, or
16     * (at your option) any later version.
17     *
18     * This program is distributed in the hope that it will be useful,
19     * but WITHOUT ANY WARRANTY; without even the implied warranty of
20     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21     * GNU General Public License for more details.
22     *
23     * You should have received a copy of the GNU General Public License
24     * along with this program; if not, write to the Free Software
25     * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26     */
27    
28     #ifndef FPU_MATHLIB_H
29     #define FPU_MATHLIB_H
30    
31     /* NOTE: this file shall be included only from fpu/fpu_*.cpp */
32     #undef PUBLIC
33     #define PUBLIC extern
34    
35     #undef PRIVATE
36     #define PRIVATE static
37    
38     #undef FFPU
39     #define FFPU /**/
40    
41     #undef FPU
42     #define FPU fpu.
43    
44     // Define the following macro if branches are expensive. If so,
45     // integer-based isnan() and isinf() functions are implemented.
46     // TODO: move to Makefile.in
47     #define BRANCHES_ARE_EXPENSIVE 1
48    
49     // Use ISO C99 extended-precision math functions (glibc 2.1+)
50     #define FPU_USE_ISO_C99 1
51    
52     // NOTE: this is irrelevant on Win32 platforms since the MS libraries
53     // don't support extended-precision floating-point computations
54     #ifdef WIN32
55     #undef FPU_USE_ISO_C99
56     #endif
57    
58     // Use faster implementation of math functions, but this could cause
59     // some incorrect results (?)
60     // TODO: actually implement the slower but safer versions
61     #define FPU_FAST_MATH 1
62    
63     #if FPU_USE_ISO_C99
64     // NOTE: no prior <math.h> shall be included at this point
65     #define __USE_ISOC99 1 // for glibc 2.2.X and newer
66     #define __USE_ISOC9X 1 // for glibc 2.1.X
67     #include <math.h>
68     #else
69     #include <cmath>
70     using namespace std;
71     #endif
72    
73     /* -------------------------------------------------------------------------- */
74     /* --- Floating-point register types --- */
75     /* -------------------------------------------------------------------------- */
76    
77     // Single : S 8*E 23*F
78     #define FP_SINGLE_EXP_MAX 0xff
79     #define FP_SINGLE_EXP_BIAS 0x7f
80    
81     // Double : S 11*E 52*F
82     #define FP_DOUBLE_EXP_MAX 0x7ff
83     #define FP_DOUBLE_EXP_BIAS 0x3ff
84    
85     // Extended : S 15*E 64*F
86     #define FP_EXTENDED_EXP_MAX 0x7fff
87     #define FP_EXTENDED_EXP_BIAS 0x3fff
88    
89     // Zeroes : E = 0 & F = 0
90     // Infinities : E = MAX & F = 0
91     // Not-A-Number : E = MAX & F # 0
92    
93     /* -------------------------------------------------------------------------- */
94     /* --- Floating-point type shapes (IEEE-compliant) --- */
95     /* -------------------------------------------------------------------------- */
96    
97     // Taken from glibc 2.2.x: ieee754.h
98    
99     // IEEE-754 float format
100     union fpu_single_shape {
101    
102     fpu_single value;
103    
104     /* This is the IEEE 754 single-precision format. */
105     struct {
106     #if UAE_BYTE_ORDER == UAE_BIG_ENDIAN
107     unsigned int negative:1;
108     unsigned int exponent:8;
109     unsigned int mantissa:23;
110     #endif /* Big endian. */
111     #if UAE_BYTE_ORDER == UAE_LITTLE_ENDIAN
112     unsigned int mantissa:23;
113     unsigned int exponent:8;
114     unsigned int negative:1;
115     #endif /* Little endian. */
116     } ieee;
117    
118     /* This format makes it easier to see if a NaN is a signalling NaN. */
119     struct {
120     #if UAE_BYTE_ORDER == UAE_BIG_ENDIAN
121     unsigned int negative:1;
122     unsigned int exponent:8;
123     unsigned int quiet_nan:1;
124     unsigned int mantissa:22;
125     #endif /* Big endian. */
126     #if UAE_BYTE_ORDER == UAE_LITTLE_ENDIAN
127     unsigned int mantissa:22;
128     unsigned int quiet_nan:1;
129     unsigned int exponent:8;
130     unsigned int negative:1;
131     #endif /* Little endian. */
132     } ieee_nan;
133     };
134    
135     // IEEE-754 double format
136     union fpu_double_shape {
137     fpu_double value;
138    
139     /* This is the IEEE 754 double-precision format. */
140     struct {
141     #if UAE_BYTE_ORDER == UAE_BIG_ENDIAN
142     unsigned int negative:1;
143     unsigned int exponent:11;
144     /* Together these comprise the mantissa. */
145     unsigned int mantissa0:20;
146     unsigned int mantissa1:32;
147     #endif /* Big endian. */
148     #if UAE_BYTE_ORDER == UAE_LITTLE_ENDIAN
149     # if UAE_FLOAT_WORD_ORDER == UAE_BIG_ENDIAN
150     unsigned int mantissa0:20;
151     unsigned int exponent:11;
152     unsigned int negative:1;
153     unsigned int mantissa1:32;
154     # else
155     /* Together these comprise the mantissa. */
156     unsigned int mantissa1:32;
157     unsigned int mantissa0:20;
158     unsigned int exponent:11;
159     unsigned int negative:1;
160     # endif
161     #endif /* Little endian. */
162     } ieee;
163    
164     /* This format makes it easier to see if a NaN is a signalling NaN. */
165     struct {
166     #if UAE_BYTE_ORDER == UAE_BIG_ENDIAN
167     unsigned int negative:1;
168     unsigned int exponent:11;
169     unsigned int quiet_nan:1;
170     /* Together these comprise the mantissa. */
171     unsigned int mantissa0:19;
172     unsigned int mantissa1:32;
173     #else
174     # if UAE_FLOAT_WORD_ORDER == UAE_BIG_ENDIAN
175     unsigned int mantissa0:19;
176     unsigned int quiet_nan:1;
177     unsigned int exponent:11;
178     unsigned int negative:1;
179     unsigned int mantissa1:32;
180     # else
181     /* Together these comprise the mantissa. */
182     unsigned int mantissa1:32;
183     unsigned int mantissa0:19;
184     unsigned int quiet_nan:1;
185     unsigned int exponent:11;
186     unsigned int negative:1;
187     # endif
188     #endif
189     } ieee_nan;
190     };
191    
192     #if SIZEOF_LONG_DOUBLE == 12
193     # undef USE_QUAD_DOUBLE
194     #elif SIZEOF_LONG_DOUBLE == 16
195     # define USE_QUAD_DOUBLE
196     #else
197     # error "unsupported long double format"
198     #endif
199    
200     #ifndef USE_QUAD_DOUBLE
201     // IEEE-854 long double format
202     union fpu_extended_shape {
203     fpu_extended value;
204    
205     /* This is the IEEE 854 double-extended-precision format. */
206     struct {
207     #if UAE_BYTE_ORDER == UAE_BIG_ENDIAN
208     unsigned int negative:1;
209     unsigned int exponent:15;
210     unsigned int empty:16;
211     unsigned int mantissa0:32;
212     unsigned int mantissa1:32;
213     #endif
214     #if UAE_BYTE_ORDER == UAE_LITTLE_ENDIAN
215     # if UAE_FLOAT_WORD_ORDER == UAE_BIG_ENDIAN
216     unsigned int exponent:15;
217     unsigned int negative:1;
218     unsigned int empty:16;
219     unsigned int mantissa0:32;
220     unsigned int mantissa1:32;
221     # else
222     unsigned int mantissa1:32;
223     unsigned int mantissa0:32;
224     unsigned int exponent:15;
225     unsigned int negative:1;
226     unsigned int empty:16;
227     # endif
228     #endif
229     } ieee;
230    
231     /* This is for NaNs in the IEEE 854 double-extended-precision format. */
232     struct {
233     #if UAE_BYTE_ORDER == UAE_BIG_ENDIAN
234     unsigned int negative:1;
235     unsigned int exponent:15;
236     unsigned int empty:16;
237     unsigned int one:1;
238     unsigned int quiet_nan:1;
239     unsigned int mantissa0:30;
240     unsigned int mantissa1:32;
241     #endif
242     #if UAE_BYTE_ORDER == UAE_LITTLE_ENDIAN
243     # if UAE_FLOAT_WORD_ORDER == UAE_BIG_ENDIAN
244     unsigned int exponent:15;
245     unsigned int negative:1;
246     unsigned int empty:16;
247     unsigned int mantissa0:30;
248     unsigned int quiet_nan:1;
249     unsigned int one:1;
250     unsigned int mantissa1:32;
251     # else
252     unsigned int mantissa1:32;
253     unsigned int mantissa0:30;
254     unsigned int quiet_nan:1;
255     unsigned int one:1;
256     unsigned int exponent:15;
257     unsigned int negative:1;
258     unsigned int empty:16;
259     # endif
260     #endif
261     } ieee_nan;
262    
263     /* This format is used to extract the sign_exponent and mantissa parts only */
264     struct {
265     #if UAE_FLOAT_WORD_ORDER == UAE_BIG_ENDIAN
266     unsigned int sign_exponent:16;
267     unsigned int empty:16;
268     unsigned int msw:32;
269     unsigned int lsw:32;
270     #else
271     unsigned int lsw:32;
272     unsigned int msw:32;
273     unsigned int sign_exponent:16;
274     unsigned int empty:16;
275     #endif
276     } parts;
277     };
278     #else
279     // IEEE-854 quad double format
280     union fpu_extended_shape {
281     fpu_extended value;
282    
283     /* This is the IEEE 854 quad-precision format. */
284     struct {
285     #if UAE_BYTE_ORDER == UAE_BIG_ENDIAN
286     unsigned int negative:1;
287     unsigned int exponent:15;
288     unsigned int mantissa0:16;
289     unsigned int mantissa1:32;
290     unsigned int mantissa2:32;
291     unsigned int mantissa3:32;
292     #else
293     unsigned int mantissa3:32;
294     unsigned int mantissa2:32;
295     unsigned int mantissa1:32;
296     unsigned int mantissa0:16;
297     unsigned int exponent:15;
298     unsigned int negative:1;
299     #endif
300     } ieee;
301    
302     /* This is for NaNs in the IEEE 854 quad-precision format. */
303     struct {
304     #if UAE_BYTE_ORDER == UAE_BIG_ENDIAN
305     unsigned int negative:1;
306     unsigned int exponent:15;
307     unsigned int quiet_nan:1;
308     unsigned int mantissa0:15;
309     unsigned int mantissa1:30;
310     unsigned int mantissa2:32;
311     unsigned int mantissa3:32;
312     #else
313     unsigned int mantissa3:32;
314     unsigned int mantissa2:32;
315     unsigned int mantissa1:32;
316     unsigned int mantissa0:15;
317     unsigned int quiet_nan:1;
318     unsigned int exponent:15;
319     unsigned int negative:1;
320     #endif
321     } ieee_nan;
322    
323     /* This format is used to extract the sign_exponent and mantissa parts only */
324     #if UAE_FLOAT_WORD_ORDER == UAE_BIG_ENDIAN
325     struct {
326     uae_u64 msw;
327     uae_u64 lsw;
328     } parts64;
329     struct {
330     uae_u32 w0;
331     uae_u32 w1;
332     uae_u32 w2;
333     uae_u32 w3;
334     } parts32;
335     #else
336     struct {
337     uae_u64 lsw;
338     uae_u64 msw;
339     } parts64;
340     struct {
341     uae_u32 w3;
342     uae_u32 w2;
343     uae_u32 w1;
344     uae_u32 w0;
345     } parts32;
346     #endif
347     };
348     #endif // !USE_QUAD_DOUBLE
349    
350     // Declare and initialize a pointer to a shape of the requested FP type
351     #define fp_declare_init_shape(psvar, rfvar, ftype) \
352     fpu_ ## ftype ## _shape * psvar = (fpu_ ## ftype ## _shape *)( &rfvar )
353    
354     /* -------------------------------------------------------------------------- */
355     /* --- Extra Math Functions --- */
356     /* --- (most of them had to be defined before including <fpu/flags.h>) --- */
357     /* -------------------------------------------------------------------------- */
358    
359     #undef isnan
360     #if 0 && defined(HAVE_ISNANL)
361     # define isnan(x) isnanl((x))
362     #else
363     # define isnan(x) fp_do_isnan((x))
364     #endif
365    
366     PRIVATE inline bool FFPU fp_do_isnan(fpu_register const & r)
367     {
368     fp_declare_init_shape(sxp, r, extended);
369     #ifdef BRANCHES_ARE_EXPENSIVE
370     #ifdef USE_QUAD_DOUBLE
371     uae_s64 hx = sxp->parts64.msw;
372     uae_s64 lx = sxp->parts64.lsw;
373     hx &= 0x7fffffffffffffffLL;
374     hx |= (uae_u64)(lx | (-lx)) >> 63;
375     hx = 0x7fff000000000000LL - hx;
376     return (int)((uae_u64)hx >> 63);
377     #else
378     uae_s32 se = sxp->parts.sign_exponent;
379     uae_s32 hx = sxp->parts.msw;
380     uae_s32 lx = sxp->parts.lsw;
381     se = (se & 0x7fff) << 1;
382     lx |= hx & 0x7fffffff;
383     se |= (uae_u32)(lx | (-lx)) >> 31;
384     se = 0xfffe - se;
385     // TODO: check whether rshift count is 16 or 31
386     return (int)(((uae_u32)(se)) >> 16);
387     #endif
388     #else
389     return (sxp->ieee_nan.exponent == FP_EXTENDED_EXP_MAX)
390     && (sxp->ieee_nan.mantissa0 != 0)
391     && (sxp->ieee_nan.mantissa1 != 0)
392     #ifdef USE_QUAD_DOUBLE
393     && (sxp->ieee_nan.mantissa2 != 0)
394     && (sxp->ieee_nan.mantissa3 != 0)
395     #endif
396     ;
397     #endif
398     }
399    
400     #undef isinf
401     #if 0 && defined(HAVE_ISINFL)
402     # define isinf(x) isinfl((x))
403     #else
404     # define isinf(x) fp_do_isinf((x))
405     #endif
406    
407     PRIVATE inline bool FFPU fp_do_isinf(fpu_register const & r)
408     {
409     fp_declare_init_shape(sxp, r, extended);
410     #ifdef BRANCHES_ARE_EXPENSIVE
411     #ifdef USE_QUAD_DOUBLE
412     uae_s64 hx = sxp->parts64.msw;
413     uae_s64 lx = sxp->parts64.lsw;
414     lx |= (hx & 0x7fffffffffffffffLL) ^ 0x7fff000000000000LL;
415     lx |= -lx;
416     return ~(lx >> 63) & (hx >> 62);
417     #else
418     uae_s32 se = sxp->parts.sign_exponent;
419     uae_s32 hx = sxp->parts.msw;
420     uae_s32 lx = sxp->parts.lsw;
421     /* This additional ^ 0x80000000 is necessary because in Intel's
422     internal representation of the implicit one is explicit.
423     NOTE: anyway, this is equivalent to & 0x7fffffff in that case. */
424     #ifdef __i386__
425     lx |= (hx ^ 0x80000000) | ((se & 0x7fff) ^ 0x7fff);
426     #else
427     lx |= (hx & 0x7fffffff) | ((se & 0x7fff) ^ 0x7fff);
428     #endif
429     lx |= -lx;
430     se &= 0x8000;
431     return ~(lx >> 31) & (1 - (se >> 14));
432     #endif
433     #else
434     return (sxp->ieee_nan.exponent == FP_EXTENDED_EXP_MAX)
435     && (sxp->ieee_nan.mantissa0 == 0)
436     && (sxp->ieee_nan.mantissa1 == 0)
437     #ifdef USE_QUAD_DOUBLE
438     && (sxp->ieee_nan.mantissa2 == 0)
439     && (sxp->ieee_nan.mantissa3 == 0)
440     #endif
441     ;
442     #endif
443     }
444    
445     #undef isneg
446     #define isneg(x) fp_do_isneg((x))
447    
448     PRIVATE inline bool FFPU fp_do_isneg(fpu_register const & r)
449     {
450     fp_declare_init_shape(sxp, r, extended);
451     return (sxp->ieee.negative)
452     ;
453     }
454    
455     #undef iszero
456     #define iszero(x) fp_do_iszero((x))
457    
458     PRIVATE inline bool FFPU fp_do_iszero(fpu_register const & r)
459     {
460     // TODO: BRANCHES_ARE_EXPENSIVE
461     fp_declare_init_shape(sxp, r, extended);
462     return (sxp->ieee.exponent == 0)
463     && (sxp->ieee.mantissa0 == 0)
464     && (sxp->ieee.mantissa1 == 0)
465     #ifdef USE_QUAD_DOUBLE
466     && (sxp->ieee.mantissa2 == 0)
467     && (sxp->ieee.mantissa3 == 0)
468     #endif
469     ;
470     }
471    
472     PRIVATE inline void FFPU get_dest_flags(fpu_register const & r)
473     {
474     fl_dest.negative = isneg(r);
475     fl_dest.zero = iszero(r);
476     fl_dest.infinity = isinf(r);
477     fl_dest.nan = isnan(r);
478     fl_dest.in_range = !fl_dest.zero && !fl_dest.infinity && !fl_dest.nan;
479     }
480    
481     PRIVATE inline void FFPU get_source_flags(fpu_register const & r)
482     {
483     fl_source.negative = isneg(r);
484     fl_source.zero = iszero(r);
485     fl_source.infinity = isinf(r);
486     fl_source.nan = isnan(r);
487     fl_source.in_range = !fl_source.zero && !fl_source.infinity && !fl_source.nan;
488     }
489    
490     PRIVATE inline void FFPU make_nan(fpu_register & r)
491     {
492     // FIXME: is that correct ?
493     fp_declare_init_shape(sxp, r, extended);
494     sxp->ieee.exponent = FP_EXTENDED_EXP_MAX;
495     sxp->ieee.mantissa0 = 0xffffffff;
496     sxp->ieee.mantissa1 = 0xffffffff;
497     #ifdef USE_QUAD_DOUBLE
498     sxp->ieee.mantissa2 = 0xffffffff;
499     sxp->ieee.mantissa3 = 0xffffffff;
500     #endif
501     }
502    
503     PRIVATE inline void FFPU make_zero_positive(fpu_register & r)
504     {
505     #if 1
506     r = +0.0;
507     #else
508     fp_declare_init_shape(sxp, r, extended);
509     sxp->ieee.negative = 0;
510     sxp->ieee.exponent = 0;
511     sxp->ieee.mantissa0 = 0;
512     sxp->ieee.mantissa1 = 0;
513     #ifdef USE_QUAD_DOUBLE
514     sxp->ieee.mantissa2 = 0;
515     sxp->ieee.mantissa3 = 0;
516     #endif
517     #endif
518     }
519    
520     PRIVATE inline void FFPU make_zero_negative(fpu_register & r)
521     {
522     #if 1
523     r = -0.0;
524     #else
525     fp_declare_init_shape(sxp, r, extended);
526     sxp->ieee.negative = 1;
527     sxp->ieee.exponent = 0;
528     sxp->ieee.mantissa0 = 0;
529     sxp->ieee.mantissa1 = 0;
530     #ifdef USE_QUAD_DOUBLE
531     sxp->ieee.mantissa2 = 0;
532     sxp->ieee.mantissa3 = 0;
533     #endif
534     #endif
535     }
536    
537     PRIVATE inline void FFPU make_inf_positive(fpu_register & r)
538     {
539     fp_declare_init_shape(sxp, r, extended);
540     sxp->ieee_nan.negative = 0;
541     sxp->ieee_nan.exponent = FP_EXTENDED_EXP_MAX;
542     sxp->ieee_nan.mantissa0 = 0;
543     sxp->ieee_nan.mantissa1 = 0;
544     #ifdef USE_QUAD_DOUBLE
545     sxp->ieee_nan.mantissa2 = 0;
546     sxp->ieee_nan.mantissa3 = 0;
547     #endif
548     }
549    
550     PRIVATE inline void FFPU make_inf_negative(fpu_register & r)
551     {
552     fp_declare_init_shape(sxp, r, extended);
553     sxp->ieee_nan.negative = 1;
554     sxp->ieee_nan.exponent = FP_EXTENDED_EXP_MAX;
555     sxp->ieee_nan.mantissa0 = 0;
556     sxp->ieee_nan.mantissa1 = 0;
557     #ifdef USE_QUAD_DOUBLE
558     sxp->ieee_nan.mantissa2 = 0;
559     sxp->ieee_nan.mantissa3 = 0;
560     #endif
561     }
562    
563     PRIVATE inline void FFPU fast_scale(fpu_register & r, int add)
564     {
565     fp_declare_init_shape(sxp, r, extended);
566     // TODO: overflow flags
567     int exp = sxp->ieee.exponent + add;
568     // FIXME: this test is not correct: see fpuop_fscale()
569     if (exp > FP_EXTENDED_EXP_BIAS) {
570     make_inf_positive(r);
571     } else if (exp < 0) {
572     // keep sign (+/- 0)
573     if (isneg(r))
574     make_inf_negative(r);
575     else
576     make_inf_positive(r);
577     // p[FHI] &= 0x80000000;
578     } else {
579     sxp->ieee.exponent = exp;
580     // p[FHI] = (p[FHI] & 0x800FFFFF) | ((uae_u32)exp << 20);
581     }
582     }
583    
584     PRIVATE inline fpu_register FFPU fast_fgetexp(fpu_register const & r)
585     {
586     fp_declare_init_shape(sxp, r, extended);
587     return (sxp->ieee.exponent - FP_EXTENDED_EXP_BIAS);
588     }
589    
590     // Normalize to range 1..2
591     PRIVATE inline void FFPU fast_remove_exponent(fpu_register & r)
592     {
593     fp_declare_init_shape(sxp, r, extended);
594     sxp->ieee.exponent = FP_EXTENDED_EXP_BIAS;
595     }
596    
597     // The sign of the quotient is the exclusive-OR of the sign bits
598     // of the source and destination operands.
599     PRIVATE inline uae_u32 FFPU get_quotient_sign(fpu_register const & ra, fpu_register const & rb)
600     {
601     fp_declare_init_shape(sap, ra, extended);
602     fp_declare_init_shape(sbp, rb, extended);
603     return (((sap->ieee.mantissa0 ^ sbp->ieee.mantissa0) & 0x80000000) ? 0x800000 : 0);
604     }
605    
606     /* -------------------------------------------------------------------------- */
607     /* --- Math functions --- */
608     /* -------------------------------------------------------------------------- */
609    
610     #if FPU_USE_ISO_C99
611     # define fp_log logl
612     # define fp_log10 log10l
613     # define fp_exp expl
614     # define fp_pow powl
615     # define fp_fabs fabsl
616     # define fp_sqrt sqrtl
617     # define fp_sin sinl
618     # define fp_cos cosl
619     # define fp_tan tanl
620     # define fp_sinh sinhl
621     # define fp_cosh coshl
622     # define fp_tanh tanhl
623     # define fp_asin asinl
624     # define fp_acos acosl
625     # define fp_atan atanl
626     # define fp_asinh asinhl
627     # define fp_acosh acoshl
628     # define fp_atanh atanhl
629     # define fp_floor floorl
630     # define fp_ceil ceill
631     #else
632     # define fp_log log
633     # define fp_log10 log10
634     # define fp_exp exp
635     # define fp_pow pow
636     # define fp_fabs fabs
637     # define fp_sqrt sqrt
638     # define fp_sin sin
639     # define fp_cos cos
640     # define fp_tan tan
641     # define fp_sinh sinh
642     # define fp_cosh cosh
643     # define fp_tanh tanh
644     # define fp_asin asin
645     # define fp_acos acos
646     # define fp_atan atan
647     # define fp_asinh asinh
648     # define fp_acosh acosh
649     # define fp_atanh atanh
650     # define fp_floor floor
651     # define fp_ceil ceil
652     #endif
653    
654     #if defined(FPU_IEEE) && defined(X86_ASSEMBLY)
655     // Assembly optimized support functions. Taken from glibc 2.2.2
656    
657     #undef fp_log
658     #define fp_log fp_do_log
659    
660     #ifndef FPU_FAST_MATH
661     // FIXME: unimplemented
662     PRIVATE fpu_extended fp_do_log(fpu_extended x);
663     #else
664     PRIVATE inline fpu_extended fp_do_log(fpu_extended x)
665     {
666     fpu_extended value;
667     __asm__ __volatile__("fldln2; fxch; fyl2x" : "=t" (value) : "0" (x) : "st(1)");
668     return value;
669     }
670     #endif
671    
672     #undef fp_log10
673     #define fp_log10 fp_do_log10
674    
675     #ifndef FPU_FAST_MATH
676     // FIXME: unimplemented
677     PRIVATE fpu_extended fp_do_log10(fpu_extended x);
678     #else
679     PRIVATE inline fpu_extended fp_do_log10(fpu_extended x)
680     {
681     fpu_extended value;
682     __asm__ __volatile__("fldlg2; fxch; fyl2x" : "=t" (value) : "0" (x) : "st(1)");
683     return value;
684     }
685     #endif
686    
687     #undef fp_exp
688     #define fp_exp fp_do_exp
689    
690     #ifndef FPU_FAST_MATH
691     // FIXME: unimplemented
692     PRIVATE fpu_extended fp_do_exp(fpu_extended x);
693     #else
694     PRIVATE inline fpu_extended fp_do_exp(fpu_extended x)
695     {
696     fpu_extended value, exponent;
697     __asm__ __volatile__("fldl2e # e^x = 2^(x * log2(e))\n\t"
698     "fmul %%st(1) # x * log2(e)\n\t"
699     "fst %%st(1)\n\t"
700     "frndint # int(x * log2(e))\n\t"
701     "fxch\n\t"
702     "fsub %%st(1) # fract(x * log2(e))\n\t"
703     "f2xm1 # 2^(fract(x * log2(e))) - 1\n\t"
704     : "=t" (value), "=u" (exponent) : "0" (x));
705     value += 1.0;
706     __asm__ __volatile__("fscale" : "=t" (value) : "0" (value), "u" (exponent));
707     return value;
708     }
709     #endif
710    
711     #undef fp_pow
712     #define fp_pow fp_do_pow
713    
714     PRIVATE fpu_extended fp_do_pow(fpu_extended x, fpu_extended y);
715    
716     #undef fp_fabs
717     #define fp_fabs fp_do_fabs
718    
719     PRIVATE inline fpu_extended fp_do_fabs(fpu_extended x)
720     {
721     fpu_extended value;
722     __asm__ __volatile__("fabs" : "=t" (value) : "0" (x));
723     return value;
724     }
725    
726     #undef fp_sqrt
727     #define fp_sqrt fp_do_sqrt
728    
729     PRIVATE inline fpu_extended fp_do_sqrt(fpu_extended x)
730     {
731     fpu_extended value;
732     __asm__ __volatile__("fsqrt" : "=t" (value) : "0" (x));
733     return value;
734     }
735    
736     #undef fp_sin
737     #define fp_sin fp_do_sin
738    
739     PRIVATE inline fpu_extended fp_do_sin(fpu_extended x)
740     {
741     fpu_extended value;
742     __asm__ __volatile__("fsin" : "=t" (value) : "0" (x));
743     return value;
744     }
745    
746     #undef fp_cos
747     #define fp_cos fp_do_cos
748    
749     PRIVATE inline fpu_extended fp_do_cos(fpu_extended x)
750     {
751     fpu_extended value;
752     __asm__ __volatile__("fcos" : "=t" (value) : "0" (x));
753     return value;
754     }
755    
756     #undef fp_tan
757     #define fp_tan fp_do_tan
758    
759     PRIVATE inline fpu_extended fp_do_tan(fpu_extended x)
760     {
761     fpu_extended value;
762     __asm__ __volatile__("fptan" : "=t" (value) : "0" (x));
763     return value;
764     }
765    
766     #undef fp_expm1
767     #define fp_expm1 fp_do_expm1
768    
769     // Returns: exp(X) - 1.0
770     PRIVATE inline fpu_extended fp_do_expm1(fpu_extended x)
771     {
772     fpu_extended value, exponent, temp;
773     __asm__ __volatile__("fldl2e # e^x - 1 = 2^(x * log2(e)) - 1\n\t"
774     "fmul %%st(1) # x * log2(e)\n\t"
775     "fst %%st(1)\n\t"
776     "frndint # int(x * log2(e))\n\t"
777     "fxch\n\t"
778     "fsub %%st(1) # fract(x * log2(e))\n\t"
779     "f2xm1 # 2^(fract(x * log2(e))) - 1\n\t"
780     "fscale # 2^(x * log2(e)) - 2^(int(x * log2(e)))\n\t"
781     : "=t" (value), "=u" (exponent) : "0" (x));
782     __asm__ __volatile__("fscale" : "=t" (temp) : "0" (1.0), "u" (exponent));
783     temp -= 1.0;
784     return temp + value ? temp + value : x;
785     }
786    
787     #undef fp_sgn1
788     #define fp_sgn1 fp_do_sgn1
789    
790     PRIVATE inline fpu_extended fp_do_sgn1(fpu_extended x)
791     {
792     fp_declare_init_shape(sxp, x, extended);
793     sxp->ieee_nan.exponent = FP_EXTENDED_EXP_MAX;
794     sxp->ieee_nan.one = 1;
795     sxp->ieee_nan.quiet_nan = 0;
796     sxp->ieee_nan.mantissa0 = 0;
797     sxp->ieee_nan.mantissa1 = 0;
798     return x;
799     }
800    
801     #undef fp_sinh
802     #define fp_sinh fp_do_sinh
803    
804     #ifndef FPU_FAST_MATH
805     // FIXME: unimplemented
806     PRIVATE fpu_extended fp_do_sinh(fpu_extended x);
807     #else
808     PRIVATE inline fpu_extended fp_do_sinh(fpu_extended x)
809     {
810     fpu_extended exm1 = fp_expm1(fp_fabs(x));
811     return 0.5 * (exm1 / (exm1 + 1.0) + exm1) * fp_sgn1(x);
812     }
813     #endif
814    
815     #undef fp_cosh
816     #define fp_cosh fp_do_cosh
817    
818     #ifndef FPU_FAST_MATH
819     // FIXME: unimplemented
820     PRIVATE fpu_extended fp_do_cosh(fpu_extended x);
821     #else
822     PRIVATE inline fpu_extended fp_do_cosh(fpu_extended x)
823     {
824     fpu_extended ex = fp_exp(x);
825     return 0.5 * (ex + 1.0 / ex);
826     }
827     #endif
828    
829     #undef fp_tanh
830     #define fp_tanh fp_do_tanh
831    
832     #ifndef FPU_FAST_MATH
833     // FIXME: unimplemented
834     PRIVATE fpu_extended fp_do_tanh(fpu_extended x);
835     #else
836     PRIVATE inline fpu_extended fp_do_tanh(fpu_extended x)
837     {
838     fpu_extended exm1 = fp_expm1(-fp_fabs(x + x));
839     return exm1 / (exm1 + 2.0) * fp_sgn1(-x);
840     }
841     #endif
842    
843     #undef fp_atan2
844     #define fp_atan2 fp_do_atan2
845    
846     PRIVATE inline fpu_extended fp_do_atan2(fpu_extended y, fpu_extended x)
847     {
848     fpu_extended value;
849     __asm__ __volatile__("fpatan" : "=t" (value) : "0" (x), "u" (y) : "st(1)");
850     return value;
851     }
852    
853     #undef fp_asin
854     #define fp_asin fp_do_asin
855    
856     PRIVATE inline fpu_extended fp_do_asin(fpu_extended x)
857     {
858     return fp_atan2(x, fp_sqrt(1.0 - x * x));
859     }
860    
861     #undef fp_acos
862     #define fp_acos fp_do_acos
863    
864     PRIVATE inline fpu_extended fp_do_acos(fpu_extended x)
865     {
866     return fp_atan2(fp_sqrt(1.0 - x * x), x);
867     }
868    
869     #undef fp_atan
870     #define fp_atan fp_do_atan
871    
872     PRIVATE inline fpu_extended fp_do_atan(fpu_extended x)
873     {
874     fpu_extended value;
875     __asm__ __volatile__("fld1; fpatan" : "=t" (value) : "0" (x) : "st(1)");
876     return value;
877     }
878    
879     #undef fp_log1p
880     #define fp_log1p fp_do_log1p
881    
882     // Returns: ln(1.0 + X)
883     PRIVATE fpu_extended fp_do_log1p(fpu_extended x);
884    
885     #undef fp_asinh
886     #define fp_asinh fp_do_asinh
887    
888     PRIVATE inline fpu_extended fp_do_asinh(fpu_extended x)
889     {
890     fpu_extended y = fp_fabs(x);
891     return (fp_log1p(y * y / (fp_sqrt(y * y + 1.0) + 1.0) + y) * fp_sgn1(x));
892     }
893    
894     #undef fp_acosh
895     #define fp_acosh fp_do_acosh
896    
897     PRIVATE inline fpu_extended fp_do_acosh(fpu_extended x)
898     {
899     return fp_log(x + fp_sqrt(x - 1.0) * fp_sqrt(x + 1.0));
900     }
901    
902     #undef fp_atanh
903     #define fp_atanh fp_do_atanh
904    
905     PRIVATE inline fpu_extended fp_do_atanh(fpu_extended x)
906     {
907     fpu_extended y = fp_fabs(x);
908     return -0.5 * fp_log1p(-(y + y) / (1.0 + y)) * fp_sgn1(x);
909     }
910    
911     #undef fp_floor
912     #define fp_floor fp_do_floor
913    
914     PRIVATE inline fpu_extended fp_do_floor(fpu_extended x)
915     {
916     volatile unsigned int cw;
917     __asm__ __volatile__("fnstcw %0" : "=m" (cw));
918     volatile unsigned int cw_temp = (cw & 0xf3ff) | 0x0400; // rounding down
919     __asm__ __volatile__("fldcw %0" : : "m" (cw_temp));
920     fpu_extended value;
921     __asm__ __volatile__("frndint" : "=t" (value) : "0" (x));
922     __asm__ __volatile__("fldcw %0" : : "m" (cw));
923     return value;
924     }
925    
926     #undef fp_ceil
927     #define fp_ceil fp_do_ceil
928    
929     PRIVATE inline fpu_extended fp_do_ceil(fpu_extended x)
930     {
931     volatile unsigned int cw;
932     __asm__ __volatile__("fnstcw %0" : "=m" (cw));
933     volatile unsigned int cw_temp = (cw & 0xf3ff) | 0x0800; // rounding up
934     __asm__ __volatile__("fldcw %0" : : "m" (cw_temp));
935     fpu_extended value;
936     __asm__ __volatile__("frndint" : "=t" (value) : "0" (x));
937     __asm__ __volatile__("fldcw %0" : : "m" (cw));
938     return value;
939     }
940    
941     #define DEFINE_ROUND_FUNC(rounding_mode_str, rounding_mode) \
942     PRIVATE inline fpu_extended fp_do_round_to_ ## rounding_mode_str(fpu_extended x) \
943     { \
944     volatile unsigned int cw; \
945     __asm__ __volatile__("fnstcw %0" : "=m" (cw)); \
946     volatile unsigned int cw_temp = (cw & 0xf3ff) | (rounding_mode); \
947     __asm__ __volatile__("fldcw %0" : : "m" (cw_temp)); \
948     fpu_extended value; \
949     __asm__ __volatile__("frndint" : "=t" (value) : "0" (x)); \
950     __asm__ __volatile__("fldcw %0" : : "m" (cw)); \
951     return value; \
952     }
953    
954     #undef fp_round_to_minus_infinity
955     #define fp_round_to_minus_infinity fp_do_round_to_minus_infinity
956    
957     DEFINE_ROUND_FUNC(minus_infinity, 0x400)
958    
959     #undef fp_round_to_plus_infinity
960     #define fp_round_to_plus_infinity fp_do_round_to_plus_infinity
961    
962     DEFINE_ROUND_FUNC(plus_infinity, 0x800)
963    
964     #undef fp_round_to_zero
965     #define fp_round_to_zero fp_do_round_to_zero
966    
967     DEFINE_ROUND_FUNC(zero, 0xc00)
968    
969     #undef fp_round_to_nearest
970     #define fp_round_to_nearest fp_do_round_to_nearest
971    
972     DEFINE_ROUND_FUNC(nearest, 0x000)
973    
974     #endif /* X86_ASSEMBLY */
975    
976     #ifndef fp_round_to_minus_infinity
977     #define fp_round_to_minus_infinity(x) fp_floor(x)
978     #endif
979    
980     #ifndef fp_round_to_plus_infinity
981     #define fp_round_to_plus_infinity(x) fp_ceil(x)
982     #endif
983    
984     #ifndef fp_round_to_zero
985     #define fp_round_to_zero(x) ((int)(x))
986     #endif
987    
988     #ifndef fp_round_to_nearest
989     #define fp_round_to_nearest(x) ((int)((x) + 0.5))
990     #endif
991    
992     #endif /* FPU_MATHLIB_H */