ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/fpu/mathlib.h
Revision: 1.1
Committed: 2002-09-13T12:50:40Z (21 years, 9 months ago) by gbeauche
Content type: text/plain
Branch: MAIN
Log Message:
* Basilisk II JIT integration, phase 2:
- Add new FPU core architecture
- Clean fpu_x86_asm.h as it is no longer automatically generated

File Contents

# Content
1 /*
2 * fpu/mathlib.h - Floating-point math support library
3 *
4 * Basilisk II (C) 1997-2001 Christian Bauer
5 *
6 * MC68881/68040 fpu emulation
7 *
8 * Original UAE FPU, copyright 1996 Herman ten Brugge
9 * Rewrite for x86, copyright 1999-2001 Lauri Pesonen
10 * New framework, copyright 2000-2001 Gwenole Beauchesne
11 * Adapted for JIT compilation (c) Bernd Meyer, 2000-2001
12 *
13 * This program is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation; either version 2 of the License, or
16 * (at your option) any later version.
17 *
18 * This program is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with this program; if not, write to the Free Software
25 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26 */
27
28 #ifndef FPU_MATHLIB_H
29 #define FPU_MATHLIB_H
30
31 /* NOTE: this file shall be included only from fpu/fpu_*.cpp */
32 #undef PUBLIC
33 #define PUBLIC extern
34
35 #undef PRIVATE
36 #define PRIVATE static
37
38 #undef FFPU
39 #define FFPU /**/
40
41 #undef FPU
42 #define FPU fpu.
43
44 // Define the following macro if branches are expensive. If so,
45 // integer-based isnan() and isinf() functions are implemented.
46 // TODO: move to Makefile.in
47 #define BRANCHES_ARE_EXPENSIVE 1
48
49 // Use ISO C99 extended-precision math functions (glibc 2.1+)
50 #define FPU_USE_ISO_C99 1
51
52 // NOTE: this is irrelevant on Win32 platforms since the MS libraries
53 // don't support extended-precision floating-point computations
54 #ifdef WIN32
55 #undef FPU_USE_ISO_C99
56 #endif
57
58 // Use faster implementation of math functions, but this could cause
59 // some incorrect results (?)
60 // TODO: actually implement the slower but safer versions
61 #define FPU_FAST_MATH 1
62
63 #if FPU_USE_ISO_C99
64 // NOTE: no prior <math.h> shall be included at this point
65 #define __USE_ISOC99 1 // for glibc 2.2.X and newer
66 #define __USE_ISOC9X 1 // for glibc 2.1.X
67 #include <math.h>
68 #else
69 #include <cmath>
70 using namespace std;
71 #endif
72
73 /* -------------------------------------------------------------------------- */
74 /* --- Floating-point register types --- */
75 /* -------------------------------------------------------------------------- */
76
77 // Single : S 8*E 23*F
78 #define FP_SINGLE_EXP_MAX 0xff
79 #define FP_SINGLE_EXP_BIAS 0x7f
80
81 // Double : S 11*E 52*F
82 #define FP_DOUBLE_EXP_MAX 0x7ff
83 #define FP_DOUBLE_EXP_BIAS 0x3ff
84
85 // Extended : S 15*E 64*F
86 #define FP_EXTENDED_EXP_MAX 0x7fff
87 #define FP_EXTENDED_EXP_BIAS 0x3fff
88
89 // Zeroes : E = 0 & F = 0
90 // Infinities : E = MAX & F = 0
91 // Not-A-Number : E = MAX & F # 0
92
93 /* -------------------------------------------------------------------------- */
94 /* --- Floating-point type shapes (IEEE-compliant) --- */
95 /* -------------------------------------------------------------------------- */
96
97 // Taken from glibc 2.2.x: ieee754.h
98
99 // IEEE-754 float format
100 union fpu_single_shape {
101
102 fpu_single value;
103
104 /* This is the IEEE 754 single-precision format. */
105 struct {
106 #if UAE_BYTE_ORDER == UAE_BIG_ENDIAN
107 unsigned int negative:1;
108 unsigned int exponent:8;
109 unsigned int mantissa:23;
110 #endif /* Big endian. */
111 #if UAE_BYTE_ORDER == UAE_LITTLE_ENDIAN
112 unsigned int mantissa:23;
113 unsigned int exponent:8;
114 unsigned int negative:1;
115 #endif /* Little endian. */
116 } ieee;
117
118 /* This format makes it easier to see if a NaN is a signalling NaN. */
119 struct {
120 #if UAE_BYTE_ORDER == UAE_BIG_ENDIAN
121 unsigned int negative:1;
122 unsigned int exponent:8;
123 unsigned int quiet_nan:1;
124 unsigned int mantissa:22;
125 #endif /* Big endian. */
126 #if UAE_BYTE_ORDER == UAE_LITTLE_ENDIAN
127 unsigned int mantissa:22;
128 unsigned int quiet_nan:1;
129 unsigned int exponent:8;
130 unsigned int negative:1;
131 #endif /* Little endian. */
132 } ieee_nan;
133 };
134
135 // IEEE-754 double format
136 union fpu_double_shape {
137 fpu_double value;
138
139 /* This is the IEEE 754 double-precision format. */
140 struct {
141 #if UAE_BYTE_ORDER == UAE_BIG_ENDIAN
142 unsigned int negative:1;
143 unsigned int exponent:11;
144 /* Together these comprise the mantissa. */
145 unsigned int mantissa0:20;
146 unsigned int mantissa1:32;
147 #endif /* Big endian. */
148 #if UAE_BYTE_ORDER == UAE_LITTLE_ENDIAN
149 # if UAE_FLOAT_WORD_ORDER == UAE_BIG_ENDIAN
150 unsigned int mantissa0:20;
151 unsigned int exponent:11;
152 unsigned int negative:1;
153 unsigned int mantissa1:32;
154 # else
155 /* Together these comprise the mantissa. */
156 unsigned int mantissa1:32;
157 unsigned int mantissa0:20;
158 unsigned int exponent:11;
159 unsigned int negative:1;
160 # endif
161 #endif /* Little endian. */
162 } ieee;
163
164 /* This format makes it easier to see if a NaN is a signalling NaN. */
165 struct {
166 #if UAE_BYTE_ORDER == UAE_BIG_ENDIAN
167 unsigned int negative:1;
168 unsigned int exponent:11;
169 unsigned int quiet_nan:1;
170 /* Together these comprise the mantissa. */
171 unsigned int mantissa0:19;
172 unsigned int mantissa1:32;
173 #else
174 # if UAE_FLOAT_WORD_ORDER == UAE_BIG_ENDIAN
175 unsigned int mantissa0:19;
176 unsigned int quiet_nan:1;
177 unsigned int exponent:11;
178 unsigned int negative:1;
179 unsigned int mantissa1:32;
180 # else
181 /* Together these comprise the mantissa. */
182 unsigned int mantissa1:32;
183 unsigned int mantissa0:19;
184 unsigned int quiet_nan:1;
185 unsigned int exponent:11;
186 unsigned int negative:1;
187 # endif
188 #endif
189 } ieee_nan;
190 };
191
192 #if SIZEOF_LONG_DOUBLE == 12
193 # undef USE_QUAD_DOUBLE
194 #elif SIZEOF_LONG_DOUBLE == 16
195 # define USE_QUAD_DOUBLE
196 #else
197 # error "unsupported long double format"
198 #endif
199
200 #ifndef USE_QUAD_DOUBLE
201 // IEEE-854 long double format
202 union fpu_extended_shape {
203 fpu_extended value;
204
205 /* This is the IEEE 854 double-extended-precision format. */
206 struct {
207 #if UAE_BYTE_ORDER == UAE_BIG_ENDIAN
208 unsigned int negative:1;
209 unsigned int exponent:15;
210 unsigned int empty:16;
211 unsigned int mantissa0:32;
212 unsigned int mantissa1:32;
213 #endif
214 #if UAE_BYTE_ORDER == UAE_LITTLE_ENDIAN
215 # if UAE_FLOAT_WORD_ORDER == UAE_BIG_ENDIAN
216 unsigned int exponent:15;
217 unsigned int negative:1;
218 unsigned int empty:16;
219 unsigned int mantissa0:32;
220 unsigned int mantissa1:32;
221 # else
222 unsigned int mantissa1:32;
223 unsigned int mantissa0:32;
224 unsigned int exponent:15;
225 unsigned int negative:1;
226 unsigned int empty:16;
227 # endif
228 #endif
229 } ieee;
230
231 /* This is for NaNs in the IEEE 854 double-extended-precision format. */
232 struct {
233 #if UAE_BYTE_ORDER == UAE_BIG_ENDIAN
234 unsigned int negative:1;
235 unsigned int exponent:15;
236 unsigned int empty:16;
237 unsigned int one:1;
238 unsigned int quiet_nan:1;
239 unsigned int mantissa0:30;
240 unsigned int mantissa1:32;
241 #endif
242 #if UAE_BYTE_ORDER == UAE_LITTLE_ENDIAN
243 # if UAE_FLOAT_WORD_ORDER == UAE_BIG_ENDIAN
244 unsigned int exponent:15;
245 unsigned int negative:1;
246 unsigned int empty:16;
247 unsigned int mantissa0:30;
248 unsigned int quiet_nan:1;
249 unsigned int one:1;
250 unsigned int mantissa1:32;
251 # else
252 unsigned int mantissa1:32;
253 unsigned int mantissa0:30;
254 unsigned int quiet_nan:1;
255 unsigned int one:1;
256 unsigned int exponent:15;
257 unsigned int negative:1;
258 unsigned int empty:16;
259 # endif
260 #endif
261 } ieee_nan;
262
263 /* This format is used to extract the sign_exponent and mantissa parts only */
264 struct {
265 #if UAE_FLOAT_WORD_ORDER == UAE_BIG_ENDIAN
266 unsigned int sign_exponent:16;
267 unsigned int empty:16;
268 unsigned int msw:32;
269 unsigned int lsw:32;
270 #else
271 unsigned int lsw:32;
272 unsigned int msw:32;
273 unsigned int sign_exponent:16;
274 unsigned int empty:16;
275 #endif
276 } parts;
277 };
278 #else
279 // IEEE-854 quad double format
280 union fpu_extended_shape {
281 fpu_extended value;
282
283 /* This is the IEEE 854 quad-precision format. */
284 struct {
285 #if UAE_BYTE_ORDER == UAE_BIG_ENDIAN
286 unsigned int negative:1;
287 unsigned int exponent:15;
288 unsigned int mantissa0:16;
289 unsigned int mantissa1:32;
290 unsigned int mantissa2:32;
291 unsigned int mantissa3:32;
292 #else
293 unsigned int mantissa3:32;
294 unsigned int mantissa2:32;
295 unsigned int mantissa1:32;
296 unsigned int mantissa0:16;
297 unsigned int exponent:15;
298 unsigned int negative:1;
299 #endif
300 } ieee;
301
302 /* This is for NaNs in the IEEE 854 quad-precision format. */
303 struct {
304 #if UAE_BYTE_ORDER == UAE_BIG_ENDIAN
305 unsigned int negative:1;
306 unsigned int exponent:15;
307 unsigned int quiet_nan:1;
308 unsigned int mantissa0:15;
309 unsigned int mantissa1:30;
310 unsigned int mantissa2:32;
311 unsigned int mantissa3:32;
312 #else
313 unsigned int mantissa3:32;
314 unsigned int mantissa2:32;
315 unsigned int mantissa1:32;
316 unsigned int mantissa0:15;
317 unsigned int quiet_nan:1;
318 unsigned int exponent:15;
319 unsigned int negative:1;
320 #endif
321 } ieee_nan;
322
323 /* This format is used to extract the sign_exponent and mantissa parts only */
324 #if UAE_FLOAT_WORD_ORDER == UAE_BIG_ENDIAN
325 struct {
326 uae_u64 msw;
327 uae_u64 lsw;
328 } parts64;
329 struct {
330 uae_u32 w0;
331 uae_u32 w1;
332 uae_u32 w2;
333 uae_u32 w3;
334 } parts32;
335 #else
336 struct {
337 uae_u64 lsw;
338 uae_u64 msw;
339 } parts64;
340 struct {
341 uae_u32 w3;
342 uae_u32 w2;
343 uae_u32 w1;
344 uae_u32 w0;
345 } parts32;
346 #endif
347 };
348 #endif // !USE_QUAD_DOUBLE
349
350 // Declare and initialize a pointer to a shape of the requested FP type
351 #define fp_declare_init_shape(psvar, rfvar, ftype) \
352 fpu_ ## ftype ## _shape * psvar = (fpu_ ## ftype ## _shape *)( &rfvar )
353
354 /* -------------------------------------------------------------------------- */
355 /* --- Extra Math Functions --- */
356 /* --- (most of them had to be defined before including <fpu/flags.h>) --- */
357 /* -------------------------------------------------------------------------- */
358
359 #undef isnan
360 #if 0 && defined(HAVE_ISNANL)
361 # define isnan(x) isnanl((x))
362 #else
363 # define isnan(x) fp_do_isnan((x))
364 #endif
365
366 PRIVATE inline bool FFPU fp_do_isnan(fpu_register const & r)
367 {
368 fp_declare_init_shape(sxp, r, extended);
369 #ifdef BRANCHES_ARE_EXPENSIVE
370 #ifdef USE_QUAD_DOUBLE
371 uae_s64 hx = sxp->parts64.msw;
372 uae_s64 lx = sxp->parts64.lsw;
373 hx &= 0x7fffffffffffffffLL;
374 hx |= (uae_u64)(lx | (-lx)) >> 63;
375 hx = 0x7fff000000000000LL - hx;
376 return (int)((uae_u64)hx >> 63);
377 #else
378 uae_s32 se = sxp->parts.sign_exponent;
379 uae_s32 hx = sxp->parts.msw;
380 uae_s32 lx = sxp->parts.lsw;
381 se = (se & 0x7fff) << 1;
382 lx |= hx & 0x7fffffff;
383 se |= (uae_u32)(lx | (-lx)) >> 31;
384 se = 0xfffe - se;
385 // TODO: check whether rshift count is 16 or 31
386 return (int)(((uae_u32)(se)) >> 16);
387 #endif
388 #else
389 return (sxp->ieee_nan.exponent == FP_EXTENDED_EXP_MAX)
390 && (sxp->ieee_nan.mantissa0 != 0)
391 && (sxp->ieee_nan.mantissa1 != 0)
392 #ifdef USE_QUAD_DOUBLE
393 && (sxp->ieee_nan.mantissa2 != 0)
394 && (sxp->ieee_nan.mantissa3 != 0)
395 #endif
396 ;
397 #endif
398 }
399
400 #undef isinf
401 #if 0 && defined(HAVE_ISINFL)
402 # define isinf(x) isinfl((x))
403 #else
404 # define isinf(x) fp_do_isinf((x))
405 #endif
406
407 PRIVATE inline bool FFPU fp_do_isinf(fpu_register const & r)
408 {
409 fp_declare_init_shape(sxp, r, extended);
410 #ifdef BRANCHES_ARE_EXPENSIVE
411 #ifdef USE_QUAD_DOUBLE
412 uae_s64 hx = sxp->parts64.msw;
413 uae_s64 lx = sxp->parts64.lsw;
414 lx |= (hx & 0x7fffffffffffffffLL) ^ 0x7fff000000000000LL;
415 lx |= -lx;
416 return ~(lx >> 63) & (hx >> 62);
417 #else
418 uae_s32 se = sxp->parts.sign_exponent;
419 uae_s32 hx = sxp->parts.msw;
420 uae_s32 lx = sxp->parts.lsw;
421 /* This additional ^ 0x80000000 is necessary because in Intel's
422 internal representation of the implicit one is explicit.
423 NOTE: anyway, this is equivalent to & 0x7fffffff in that case. */
424 #ifdef __i386__
425 lx |= (hx ^ 0x80000000) | ((se & 0x7fff) ^ 0x7fff);
426 #else
427 lx |= (hx & 0x7fffffff) | ((se & 0x7fff) ^ 0x7fff);
428 #endif
429 lx |= -lx;
430 se &= 0x8000;
431 return ~(lx >> 31) & (1 - (se >> 14));
432 #endif
433 #else
434 return (sxp->ieee_nan.exponent == FP_EXTENDED_EXP_MAX)
435 && (sxp->ieee_nan.mantissa0 == 0)
436 && (sxp->ieee_nan.mantissa1 == 0)
437 #ifdef USE_QUAD_DOUBLE
438 && (sxp->ieee_nan.mantissa2 == 0)
439 && (sxp->ieee_nan.mantissa3 == 0)
440 #endif
441 ;
442 #endif
443 }
444
445 #undef isneg
446 #define isneg(x) fp_do_isneg((x))
447
448 PRIVATE inline bool FFPU fp_do_isneg(fpu_register const & r)
449 {
450 fp_declare_init_shape(sxp, r, extended);
451 return (sxp->ieee.negative)
452 ;
453 }
454
455 #undef iszero
456 #define iszero(x) fp_do_iszero((x))
457
458 PRIVATE inline bool FFPU fp_do_iszero(fpu_register const & r)
459 {
460 // TODO: BRANCHES_ARE_EXPENSIVE
461 fp_declare_init_shape(sxp, r, extended);
462 return (sxp->ieee.exponent == 0)
463 && (sxp->ieee.mantissa0 == 0)
464 && (sxp->ieee.mantissa1 == 0)
465 #ifdef USE_QUAD_DOUBLE
466 && (sxp->ieee.mantissa2 == 0)
467 && (sxp->ieee.mantissa3 == 0)
468 #endif
469 ;
470 }
471
472 PRIVATE inline void FFPU get_dest_flags(fpu_register const & r)
473 {
474 fl_dest.negative = isneg(r);
475 fl_dest.zero = iszero(r);
476 fl_dest.infinity = isinf(r);
477 fl_dest.nan = isnan(r);
478 fl_dest.in_range = !fl_dest.zero && !fl_dest.infinity && !fl_dest.nan;
479 }
480
481 PRIVATE inline void FFPU get_source_flags(fpu_register const & r)
482 {
483 fl_source.negative = isneg(r);
484 fl_source.zero = iszero(r);
485 fl_source.infinity = isinf(r);
486 fl_source.nan = isnan(r);
487 fl_source.in_range = !fl_source.zero && !fl_source.infinity && !fl_source.nan;
488 }
489
490 PRIVATE inline void FFPU make_nan(fpu_register & r)
491 {
492 // FIXME: is that correct ?
493 fp_declare_init_shape(sxp, r, extended);
494 sxp->ieee.exponent = FP_EXTENDED_EXP_MAX;
495 sxp->ieee.mantissa0 = 0xffffffff;
496 sxp->ieee.mantissa1 = 0xffffffff;
497 #ifdef USE_QUAD_DOUBLE
498 sxp->ieee.mantissa2 = 0xffffffff;
499 sxp->ieee.mantissa3 = 0xffffffff;
500 #endif
501 }
502
503 PRIVATE inline void FFPU make_zero_positive(fpu_register & r)
504 {
505 #if 1
506 r = +0.0;
507 #else
508 fp_declare_init_shape(sxp, r, extended);
509 sxp->ieee.negative = 0;
510 sxp->ieee.exponent = 0;
511 sxp->ieee.mantissa0 = 0;
512 sxp->ieee.mantissa1 = 0;
513 #ifdef USE_QUAD_DOUBLE
514 sxp->ieee.mantissa2 = 0;
515 sxp->ieee.mantissa3 = 0;
516 #endif
517 #endif
518 }
519
520 PRIVATE inline void FFPU make_zero_negative(fpu_register & r)
521 {
522 #if 1
523 r = -0.0;
524 #else
525 fp_declare_init_shape(sxp, r, extended);
526 sxp->ieee.negative = 1;
527 sxp->ieee.exponent = 0;
528 sxp->ieee.mantissa0 = 0;
529 sxp->ieee.mantissa1 = 0;
530 #ifdef USE_QUAD_DOUBLE
531 sxp->ieee.mantissa2 = 0;
532 sxp->ieee.mantissa3 = 0;
533 #endif
534 #endif
535 }
536
537 PRIVATE inline void FFPU make_inf_positive(fpu_register & r)
538 {
539 fp_declare_init_shape(sxp, r, extended);
540 sxp->ieee_nan.negative = 0;
541 sxp->ieee_nan.exponent = FP_EXTENDED_EXP_MAX;
542 sxp->ieee_nan.mantissa0 = 0;
543 sxp->ieee_nan.mantissa1 = 0;
544 #ifdef USE_QUAD_DOUBLE
545 sxp->ieee_nan.mantissa2 = 0;
546 sxp->ieee_nan.mantissa3 = 0;
547 #endif
548 }
549
550 PRIVATE inline void FFPU make_inf_negative(fpu_register & r)
551 {
552 fp_declare_init_shape(sxp, r, extended);
553 sxp->ieee_nan.negative = 1;
554 sxp->ieee_nan.exponent = FP_EXTENDED_EXP_MAX;
555 sxp->ieee_nan.mantissa0 = 0;
556 sxp->ieee_nan.mantissa1 = 0;
557 #ifdef USE_QUAD_DOUBLE
558 sxp->ieee_nan.mantissa2 = 0;
559 sxp->ieee_nan.mantissa3 = 0;
560 #endif
561 }
562
563 PRIVATE inline void FFPU fast_scale(fpu_register & r, int add)
564 {
565 fp_declare_init_shape(sxp, r, extended);
566 // TODO: overflow flags
567 int exp = sxp->ieee.exponent + add;
568 // FIXME: this test is not correct: see fpuop_fscale()
569 if (exp > FP_EXTENDED_EXP_BIAS) {
570 make_inf_positive(r);
571 } else if (exp < 0) {
572 // keep sign (+/- 0)
573 if (isneg(r))
574 make_inf_negative(r);
575 else
576 make_inf_positive(r);
577 // p[FHI] &= 0x80000000;
578 } else {
579 sxp->ieee.exponent = exp;
580 // p[FHI] = (p[FHI] & 0x800FFFFF) | ((uae_u32)exp << 20);
581 }
582 }
583
584 PRIVATE inline fpu_register FFPU fast_fgetexp(fpu_register const & r)
585 {
586 fp_declare_init_shape(sxp, r, extended);
587 return (sxp->ieee.exponent - FP_EXTENDED_EXP_BIAS);
588 }
589
590 // Normalize to range 1..2
591 PRIVATE inline void FFPU fast_remove_exponent(fpu_register & r)
592 {
593 fp_declare_init_shape(sxp, r, extended);
594 sxp->ieee.exponent = FP_EXTENDED_EXP_BIAS;
595 }
596
597 // The sign of the quotient is the exclusive-OR of the sign bits
598 // of the source and destination operands.
599 PRIVATE inline uae_u32 FFPU get_quotient_sign(fpu_register const & ra, fpu_register const & rb)
600 {
601 fp_declare_init_shape(sap, ra, extended);
602 fp_declare_init_shape(sbp, rb, extended);
603 return (((sap->ieee.mantissa0 ^ sbp->ieee.mantissa0) & 0x80000000) ? 0x800000 : 0);
604 }
605
606 /* -------------------------------------------------------------------------- */
607 /* --- Math functions --- */
608 /* -------------------------------------------------------------------------- */
609
610 #if FPU_USE_ISO_C99
611 # define fp_log logl
612 # define fp_log10 log10l
613 # define fp_exp expl
614 # define fp_pow powl
615 # define fp_fabs fabsl
616 # define fp_sqrt sqrtl
617 # define fp_sin sinl
618 # define fp_cos cosl
619 # define fp_tan tanl
620 # define fp_sinh sinhl
621 # define fp_cosh coshl
622 # define fp_tanh tanhl
623 # define fp_asin asinl
624 # define fp_acos acosl
625 # define fp_atan atanl
626 # define fp_asinh asinhl
627 # define fp_acosh acoshl
628 # define fp_atanh atanhl
629 # define fp_floor floorl
630 # define fp_ceil ceill
631 #else
632 # define fp_log log
633 # define fp_log10 log10
634 # define fp_exp exp
635 # define fp_pow pow
636 # define fp_fabs fabs
637 # define fp_sqrt sqrt
638 # define fp_sin sin
639 # define fp_cos cos
640 # define fp_tan tan
641 # define fp_sinh sinh
642 # define fp_cosh cosh
643 # define fp_tanh tanh
644 # define fp_asin asin
645 # define fp_acos acos
646 # define fp_atan atan
647 # define fp_asinh asinh
648 # define fp_acosh acosh
649 # define fp_atanh atanh
650 # define fp_floor floor
651 # define fp_ceil ceil
652 #endif
653
654 #if defined(FPU_IEEE) && defined(X86_ASSEMBLY)
655 // Assembly optimized support functions. Taken from glibc 2.2.2
656
657 #undef fp_log
658 #define fp_log fp_do_log
659
660 #ifndef FPU_FAST_MATH
661 // FIXME: unimplemented
662 PRIVATE fpu_extended fp_do_log(fpu_extended x);
663 #else
664 PRIVATE inline fpu_extended fp_do_log(fpu_extended x)
665 {
666 fpu_extended value;
667 __asm__ __volatile__("fldln2; fxch; fyl2x" : "=t" (value) : "0" (x) : "st(1)");
668 return value;
669 }
670 #endif
671
672 #undef fp_log10
673 #define fp_log10 fp_do_log10
674
675 #ifndef FPU_FAST_MATH
676 // FIXME: unimplemented
677 PRIVATE fpu_extended fp_do_log10(fpu_extended x);
678 #else
679 PRIVATE inline fpu_extended fp_do_log10(fpu_extended x)
680 {
681 fpu_extended value;
682 __asm__ __volatile__("fldlg2; fxch; fyl2x" : "=t" (value) : "0" (x) : "st(1)");
683 return value;
684 }
685 #endif
686
687 #undef fp_exp
688 #define fp_exp fp_do_exp
689
690 #ifndef FPU_FAST_MATH
691 // FIXME: unimplemented
692 PRIVATE fpu_extended fp_do_exp(fpu_extended x);
693 #else
694 PRIVATE inline fpu_extended fp_do_exp(fpu_extended x)
695 {
696 fpu_extended value, exponent;
697 __asm__ __volatile__("fldl2e # e^x = 2^(x * log2(e))\n\t"
698 "fmul %%st(1) # x * log2(e)\n\t"
699 "fst %%st(1)\n\t"
700 "frndint # int(x * log2(e))\n\t"
701 "fxch\n\t"
702 "fsub %%st(1) # fract(x * log2(e))\n\t"
703 "f2xm1 # 2^(fract(x * log2(e))) - 1\n\t"
704 : "=t" (value), "=u" (exponent) : "0" (x));
705 value += 1.0;
706 __asm__ __volatile__("fscale" : "=t" (value) : "0" (value), "u" (exponent));
707 return value;
708 }
709 #endif
710
711 #undef fp_pow
712 #define fp_pow fp_do_pow
713
714 PRIVATE fpu_extended fp_do_pow(fpu_extended x, fpu_extended y);
715
716 #undef fp_fabs
717 #define fp_fabs fp_do_fabs
718
719 PRIVATE inline fpu_extended fp_do_fabs(fpu_extended x)
720 {
721 fpu_extended value;
722 __asm__ __volatile__("fabs" : "=t" (value) : "0" (x));
723 return value;
724 }
725
726 #undef fp_sqrt
727 #define fp_sqrt fp_do_sqrt
728
729 PRIVATE inline fpu_extended fp_do_sqrt(fpu_extended x)
730 {
731 fpu_extended value;
732 __asm__ __volatile__("fsqrt" : "=t" (value) : "0" (x));
733 return value;
734 }
735
736 #undef fp_sin
737 #define fp_sin fp_do_sin
738
739 PRIVATE inline fpu_extended fp_do_sin(fpu_extended x)
740 {
741 fpu_extended value;
742 __asm__ __volatile__("fsin" : "=t" (value) : "0" (x));
743 return value;
744 }
745
746 #undef fp_cos
747 #define fp_cos fp_do_cos
748
749 PRIVATE inline fpu_extended fp_do_cos(fpu_extended x)
750 {
751 fpu_extended value;
752 __asm__ __volatile__("fcos" : "=t" (value) : "0" (x));
753 return value;
754 }
755
756 #undef fp_tan
757 #define fp_tan fp_do_tan
758
759 PRIVATE inline fpu_extended fp_do_tan(fpu_extended x)
760 {
761 fpu_extended value;
762 __asm__ __volatile__("fptan" : "=t" (value) : "0" (x));
763 return value;
764 }
765
766 #undef fp_expm1
767 #define fp_expm1 fp_do_expm1
768
769 // Returns: exp(X) - 1.0
770 PRIVATE inline fpu_extended fp_do_expm1(fpu_extended x)
771 {
772 fpu_extended value, exponent, temp;
773 __asm__ __volatile__("fldl2e # e^x - 1 = 2^(x * log2(e)) - 1\n\t"
774 "fmul %%st(1) # x * log2(e)\n\t"
775 "fst %%st(1)\n\t"
776 "frndint # int(x * log2(e))\n\t"
777 "fxch\n\t"
778 "fsub %%st(1) # fract(x * log2(e))\n\t"
779 "f2xm1 # 2^(fract(x * log2(e))) - 1\n\t"
780 "fscale # 2^(x * log2(e)) - 2^(int(x * log2(e)))\n\t"
781 : "=t" (value), "=u" (exponent) : "0" (x));
782 __asm__ __volatile__("fscale" : "=t" (temp) : "0" (1.0), "u" (exponent));
783 temp -= 1.0;
784 return temp + value ? temp + value : x;
785 }
786
787 #undef fp_sgn1
788 #define fp_sgn1 fp_do_sgn1
789
790 PRIVATE inline fpu_extended fp_do_sgn1(fpu_extended x)
791 {
792 fp_declare_init_shape(sxp, x, extended);
793 sxp->ieee_nan.exponent = FP_EXTENDED_EXP_MAX;
794 sxp->ieee_nan.one = 1;
795 sxp->ieee_nan.quiet_nan = 0;
796 sxp->ieee_nan.mantissa0 = 0;
797 sxp->ieee_nan.mantissa1 = 0;
798 return x;
799 }
800
801 #undef fp_sinh
802 #define fp_sinh fp_do_sinh
803
804 #ifndef FPU_FAST_MATH
805 // FIXME: unimplemented
806 PRIVATE fpu_extended fp_do_sinh(fpu_extended x);
807 #else
808 PRIVATE inline fpu_extended fp_do_sinh(fpu_extended x)
809 {
810 fpu_extended exm1 = fp_expm1(fp_fabs(x));
811 return 0.5 * (exm1 / (exm1 + 1.0) + exm1) * fp_sgn1(x);
812 }
813 #endif
814
815 #undef fp_cosh
816 #define fp_cosh fp_do_cosh
817
818 #ifndef FPU_FAST_MATH
819 // FIXME: unimplemented
820 PRIVATE fpu_extended fp_do_cosh(fpu_extended x);
821 #else
822 PRIVATE inline fpu_extended fp_do_cosh(fpu_extended x)
823 {
824 fpu_extended ex = fp_exp(x);
825 return 0.5 * (ex + 1.0 / ex);
826 }
827 #endif
828
829 #undef fp_tanh
830 #define fp_tanh fp_do_tanh
831
832 #ifndef FPU_FAST_MATH
833 // FIXME: unimplemented
834 PRIVATE fpu_extended fp_do_tanh(fpu_extended x);
835 #else
836 PRIVATE inline fpu_extended fp_do_tanh(fpu_extended x)
837 {
838 fpu_extended exm1 = fp_expm1(-fp_fabs(x + x));
839 return exm1 / (exm1 + 2.0) * fp_sgn1(-x);
840 }
841 #endif
842
843 #undef fp_atan2
844 #define fp_atan2 fp_do_atan2
845
846 PRIVATE inline fpu_extended fp_do_atan2(fpu_extended y, fpu_extended x)
847 {
848 fpu_extended value;
849 __asm__ __volatile__("fpatan" : "=t" (value) : "0" (x), "u" (y) : "st(1)");
850 return value;
851 }
852
853 #undef fp_asin
854 #define fp_asin fp_do_asin
855
856 PRIVATE inline fpu_extended fp_do_asin(fpu_extended x)
857 {
858 return fp_atan2(x, fp_sqrt(1.0 - x * x));
859 }
860
861 #undef fp_acos
862 #define fp_acos fp_do_acos
863
864 PRIVATE inline fpu_extended fp_do_acos(fpu_extended x)
865 {
866 return fp_atan2(fp_sqrt(1.0 - x * x), x);
867 }
868
869 #undef fp_atan
870 #define fp_atan fp_do_atan
871
872 PRIVATE inline fpu_extended fp_do_atan(fpu_extended x)
873 {
874 fpu_extended value;
875 __asm__ __volatile__("fld1; fpatan" : "=t" (value) : "0" (x) : "st(1)");
876 return value;
877 }
878
879 #undef fp_log1p
880 #define fp_log1p fp_do_log1p
881
882 // Returns: ln(1.0 + X)
883 PRIVATE fpu_extended fp_do_log1p(fpu_extended x);
884
885 #undef fp_asinh
886 #define fp_asinh fp_do_asinh
887
888 PRIVATE inline fpu_extended fp_do_asinh(fpu_extended x)
889 {
890 fpu_extended y = fp_fabs(x);
891 return (fp_log1p(y * y / (fp_sqrt(y * y + 1.0) + 1.0) + y) * fp_sgn1(x));
892 }
893
894 #undef fp_acosh
895 #define fp_acosh fp_do_acosh
896
897 PRIVATE inline fpu_extended fp_do_acosh(fpu_extended x)
898 {
899 return fp_log(x + fp_sqrt(x - 1.0) * fp_sqrt(x + 1.0));
900 }
901
902 #undef fp_atanh
903 #define fp_atanh fp_do_atanh
904
905 PRIVATE inline fpu_extended fp_do_atanh(fpu_extended x)
906 {
907 fpu_extended y = fp_fabs(x);
908 return -0.5 * fp_log1p(-(y + y) / (1.0 + y)) * fp_sgn1(x);
909 }
910
911 #undef fp_floor
912 #define fp_floor fp_do_floor
913
914 PRIVATE inline fpu_extended fp_do_floor(fpu_extended x)
915 {
916 volatile unsigned int cw;
917 __asm__ __volatile__("fnstcw %0" : "=m" (cw));
918 volatile unsigned int cw_temp = (cw & 0xf3ff) | 0x0400; // rounding down
919 __asm__ __volatile__("fldcw %0" : : "m" (cw_temp));
920 fpu_extended value;
921 __asm__ __volatile__("frndint" : "=t" (value) : "0" (x));
922 __asm__ __volatile__("fldcw %0" : : "m" (cw));
923 return value;
924 }
925
926 #undef fp_ceil
927 #define fp_ceil fp_do_ceil
928
929 PRIVATE inline fpu_extended fp_do_ceil(fpu_extended x)
930 {
931 volatile unsigned int cw;
932 __asm__ __volatile__("fnstcw %0" : "=m" (cw));
933 volatile unsigned int cw_temp = (cw & 0xf3ff) | 0x0800; // rounding up
934 __asm__ __volatile__("fldcw %0" : : "m" (cw_temp));
935 fpu_extended value;
936 __asm__ __volatile__("frndint" : "=t" (value) : "0" (x));
937 __asm__ __volatile__("fldcw %0" : : "m" (cw));
938 return value;
939 }
940
941 #define DEFINE_ROUND_FUNC(rounding_mode_str, rounding_mode) \
942 PRIVATE inline fpu_extended fp_do_round_to_ ## rounding_mode_str(fpu_extended x) \
943 { \
944 volatile unsigned int cw; \
945 __asm__ __volatile__("fnstcw %0" : "=m" (cw)); \
946 volatile unsigned int cw_temp = (cw & 0xf3ff) | (rounding_mode); \
947 __asm__ __volatile__("fldcw %0" : : "m" (cw_temp)); \
948 fpu_extended value; \
949 __asm__ __volatile__("frndint" : "=t" (value) : "0" (x)); \
950 __asm__ __volatile__("fldcw %0" : : "m" (cw)); \
951 return value; \
952 }
953
954 #undef fp_round_to_minus_infinity
955 #define fp_round_to_minus_infinity fp_do_round_to_minus_infinity
956
957 DEFINE_ROUND_FUNC(minus_infinity, 0x400)
958
959 #undef fp_round_to_plus_infinity
960 #define fp_round_to_plus_infinity fp_do_round_to_plus_infinity
961
962 DEFINE_ROUND_FUNC(plus_infinity, 0x800)
963
964 #undef fp_round_to_zero
965 #define fp_round_to_zero fp_do_round_to_zero
966
967 DEFINE_ROUND_FUNC(zero, 0xc00)
968
969 #undef fp_round_to_nearest
970 #define fp_round_to_nearest fp_do_round_to_nearest
971
972 DEFINE_ROUND_FUNC(nearest, 0x000)
973
974 #endif /* X86_ASSEMBLY */
975
976 #ifndef fp_round_to_minus_infinity
977 #define fp_round_to_minus_infinity(x) fp_floor(x)
978 #endif
979
980 #ifndef fp_round_to_plus_infinity
981 #define fp_round_to_plus_infinity(x) fp_ceil(x)
982 #endif
983
984 #ifndef fp_round_to_zero
985 #define fp_round_to_zero(x) ((int)(x))
986 #endif
987
988 #ifndef fp_round_to_nearest
989 #define fp_round_to_nearest(x) ((int)((x) + 0.5))
990 #endif
991
992 #endif /* FPU_MATHLIB_H */