ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/fpu/mathlib.h
Revision: 1.3
Committed: 2002-09-16T12:01:38Z (22 years, 2 months ago) by gbeauche
Content type: text/plain
Branch: MAIN
Changes since 1.2: +23 -28 lines
Log Message:
- FP endianness is now testing at configure time
- Fix junk introduced in previous rev for extract_extended()

File Contents

# Content
1 /*
2 * fpu/mathlib.h - Floating-point math support library
3 *
4 * Basilisk II (C) 1997-2001 Christian Bauer
5 *
6 * MC68881/68040 fpu emulation
7 *
8 * Original UAE FPU, copyright 1996 Herman ten Brugge
9 * Rewrite for x86, copyright 1999-2001 Lauri Pesonen
10 * New framework, copyright 2000-2001 Gwenole Beauchesne
11 * Adapted for JIT compilation (c) Bernd Meyer, 2000-2001
12 *
13 * This program is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation; either version 2 of the License, or
16 * (at your option) any later version.
17 *
18 * This program is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with this program; if not, write to the Free Software
25 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26 */
27
28 #ifndef FPU_MATHLIB_H
29 #define FPU_MATHLIB_H
30
31 /* NOTE: this file shall be included only from fpu/fpu_*.cpp */
32 #undef PUBLIC
33 #define PUBLIC extern
34
35 #undef PRIVATE
36 #define PRIVATE static
37
38 #undef FFPU
39 #define FFPU /**/
40
41 #undef FPU
42 #define FPU fpu.
43
44 // Define the following macro if branches are expensive. If so,
45 // integer-based isnan() and isinf() functions are implemented.
46 // TODO: move to Makefile.in
47 #define BRANCHES_ARE_EXPENSIVE 1
48
49 // Use ISO C99 extended-precision math functions (glibc 2.1+)
50 #define FPU_USE_ISO_C99 1
51
52 // NOTE: this is irrelevant on Win32 platforms since the MS libraries
53 // don't support extended-precision floating-point computations
54 #ifdef WIN32
55 #undef FPU_USE_ISO_C99
56 #endif
57
58 // Use faster implementation of math functions, but this could cause
59 // some incorrect results (?)
60 // TODO: actually implement the slower but safer versions
61 #define FPU_FAST_MATH 1
62
63 #if FPU_USE_ISO_C99
64 // NOTE: no prior <math.h> shall be included at this point
65 #define __USE_ISOC99 1 // for glibc 2.2.X and newer
66 #define __USE_ISOC9X 1 // for glibc 2.1.X
67 #include <math.h>
68 #else
69 #include <cmath>
70 using namespace std;
71 #endif
72
73 /* -------------------------------------------------------------------------- */
74 /* --- Floating-point register types --- */
75 /* -------------------------------------------------------------------------- */
76
77 // Single : S 8*E 23*F
78 #define FP_SINGLE_EXP_MAX 0xff
79 #define FP_SINGLE_EXP_BIAS 0x7f
80
81 // Double : S 11*E 52*F
82 #define FP_DOUBLE_EXP_MAX 0x7ff
83 #define FP_DOUBLE_EXP_BIAS 0x3ff
84
85 // Extended : S 15*E 64*F
86 #define FP_EXTENDED_EXP_MAX 0x7fff
87 #define FP_EXTENDED_EXP_BIAS 0x3fff
88
89 // Zeroes : E = 0 & F = 0
90 // Infinities : E = MAX & F = 0
91 // Not-A-Number : E = MAX & F # 0
92
93 /* -------------------------------------------------------------------------- */
94 /* --- Floating-point type shapes (IEEE-compliant) --- */
95 /* -------------------------------------------------------------------------- */
96
97 // Taken from glibc 2.2.x: ieee754.h
98
99 // IEEE-754 float format
100 union fpu_single_shape {
101
102 fpu_single value;
103
104 /* This is the IEEE 754 single-precision format. */
105 struct {
106 #ifdef WORDS_BIGENDIAN
107 unsigned int negative:1;
108 unsigned int exponent:8;
109 unsigned int mantissa:23;
110 #else
111 unsigned int mantissa:23;
112 unsigned int exponent:8;
113 unsigned int negative:1;
114 #endif
115 } ieee;
116
117 /* This format makes it easier to see if a NaN is a signalling NaN. */
118 struct {
119 #ifdef WORDS_BIGENDIAN
120 unsigned int negative:1;
121 unsigned int exponent:8;
122 unsigned int quiet_nan:1;
123 unsigned int mantissa:22;
124 #else
125 unsigned int mantissa:22;
126 unsigned int quiet_nan:1;
127 unsigned int exponent:8;
128 unsigned int negative:1;
129 #endif
130 } ieee_nan;
131 };
132
133 // IEEE-754 double format
134 union fpu_double_shape {
135 fpu_double value;
136
137 /* This is the IEEE 754 double-precision format. */
138 struct {
139 #ifdef WORDS_BIGENDIAN
140 unsigned int negative:1;
141 unsigned int exponent:11;
142 /* Together these comprise the mantissa. */
143 unsigned int mantissa0:20;
144 unsigned int mantissa1:32;
145 #else
146 # if HOST_FLOAT_WORDS_BIG_ENDIAN
147 unsigned int mantissa0:20;
148 unsigned int exponent:11;
149 unsigned int negative:1;
150 unsigned int mantissa1:32;
151 # else
152 /* Together these comprise the mantissa. */
153 unsigned int mantissa1:32;
154 unsigned int mantissa0:20;
155 unsigned int exponent:11;
156 unsigned int negative:1;
157 # endif
158 #endif
159 } ieee;
160
161 /* This format makes it easier to see if a NaN is a signalling NaN. */
162 struct {
163 #ifdef WORDS_BIGENDIAN
164 unsigned int negative:1;
165 unsigned int exponent:11;
166 unsigned int quiet_nan:1;
167 /* Together these comprise the mantissa. */
168 unsigned int mantissa0:19;
169 unsigned int mantissa1:32;
170 #else
171 # if HOST_FLOAT_WORDS_BIG_ENDIAN
172 unsigned int mantissa0:19;
173 unsigned int quiet_nan:1;
174 unsigned int exponent:11;
175 unsigned int negative:1;
176 unsigned int mantissa1:32;
177 # else
178 /* Together these comprise the mantissa. */
179 unsigned int mantissa1:32;
180 unsigned int mantissa0:19;
181 unsigned int quiet_nan:1;
182 unsigned int exponent:11;
183 unsigned int negative:1;
184 # endif
185 #endif
186 } ieee_nan;
187
188 /* This format is used to extract the sign_exponent and mantissa parts only */
189 struct {
190 #if HOST_FLOAT_WORDS_BIG_ENDIAN
191 unsigned int msw:32;
192 unsigned int lsw:32;
193 #else
194 unsigned int lsw:32;
195 unsigned int msw:32;
196 #endif
197 } parts;
198 };
199
200 #ifdef USE_LONG_DOUBLE
201 // IEEE-854 long double format
202 union fpu_extended_shape {
203 fpu_extended value;
204
205 /* This is the IEEE 854 double-extended-precision format. */
206 struct {
207 #ifdef WORDS_BIGENDIAN
208 unsigned int negative:1;
209 unsigned int exponent:15;
210 unsigned int empty:16;
211 unsigned int mantissa0:32;
212 unsigned int mantissa1:32;
213 #else
214 # if HOST_FLOAT_WORDS_BIG_ENDIAN
215 unsigned int exponent:15;
216 unsigned int negative:1;
217 unsigned int empty:16;
218 unsigned int mantissa0:32;
219 unsigned int mantissa1:32;
220 # else
221 unsigned int mantissa1:32;
222 unsigned int mantissa0:32;
223 unsigned int exponent:15;
224 unsigned int negative:1;
225 unsigned int empty:16;
226 # endif
227 #endif
228 } ieee;
229
230 /* This is for NaNs in the IEEE 854 double-extended-precision format. */
231 struct {
232 #ifdef WORDS_BIGENDIAN
233 unsigned int negative:1;
234 unsigned int exponent:15;
235 unsigned int empty:16;
236 unsigned int one:1;
237 unsigned int quiet_nan:1;
238 unsigned int mantissa0:30;
239 unsigned int mantissa1:32;
240 #else
241 # if HOST_FLOAT_WORDS_BIG_ENDIAN
242 unsigned int exponent:15;
243 unsigned int negative:1;
244 unsigned int empty:16;
245 unsigned int mantissa0:30;
246 unsigned int quiet_nan:1;
247 unsigned int one:1;
248 unsigned int mantissa1:32;
249 # else
250 unsigned int mantissa1:32;
251 unsigned int mantissa0:30;
252 unsigned int quiet_nan:1;
253 unsigned int one:1;
254 unsigned int exponent:15;
255 unsigned int negative:1;
256 unsigned int empty:16;
257 # endif
258 #endif
259 } ieee_nan;
260
261 /* This format is used to extract the sign_exponent and mantissa parts only */
262 struct {
263 #if HOST_FLOAT_WORDS_BIG_ENDIAN
264 unsigned int sign_exponent:16;
265 unsigned int empty:16;
266 unsigned int msw:32;
267 unsigned int lsw:32;
268 #else
269 unsigned int lsw:32;
270 unsigned int msw:32;
271 unsigned int sign_exponent:16;
272 unsigned int empty:16;
273 #endif
274 } parts;
275 };
276 #endif
277
278 #ifdef USE_QUAD_DOUBLE
279 // IEEE-854 quad double format
280 union fpu_extended_shape {
281 fpu_extended value;
282
283 /* This is the IEEE 854 quad-precision format. */
284 struct {
285 #ifdef WORDS_BIGENDIAN
286 unsigned int negative:1;
287 unsigned int exponent:15;
288 unsigned int mantissa0:16;
289 unsigned int mantissa1:32;
290 unsigned int mantissa2:32;
291 unsigned int mantissa3:32;
292 #else
293 unsigned int mantissa3:32;
294 unsigned int mantissa2:32;
295 unsigned int mantissa1:32;
296 unsigned int mantissa0:16;
297 unsigned int exponent:15;
298 unsigned int negative:1;
299 #endif
300 } ieee;
301
302 /* This is for NaNs in the IEEE 854 quad-precision format. */
303 struct {
304 #ifdef WORDS_BIGENDIAN
305 unsigned int negative:1;
306 unsigned int exponent:15;
307 unsigned int quiet_nan:1;
308 unsigned int mantissa0:15;
309 unsigned int mantissa1:30;
310 unsigned int mantissa2:32;
311 unsigned int mantissa3:32;
312 #else
313 unsigned int mantissa3:32;
314 unsigned int mantissa2:32;
315 unsigned int mantissa1:32;
316 unsigned int mantissa0:15;
317 unsigned int quiet_nan:1;
318 unsigned int exponent:15;
319 unsigned int negative:1;
320 #endif
321 } ieee_nan;
322
323 /* This format is used to extract the sign_exponent and mantissa parts only */
324 #if HOST_FLOAT_WORDS_BIG_ENDIAN
325 struct {
326 uae_u64 msw;
327 uae_u64 lsw;
328 } parts64;
329 struct {
330 uae_u32 w0;
331 uae_u32 w1;
332 uae_u32 w2;
333 uae_u32 w3;
334 } parts32;
335 #else
336 struct {
337 uae_u64 lsw;
338 uae_u64 msw;
339 } parts64;
340 struct {
341 uae_u32 w3;
342 uae_u32 w2;
343 uae_u32 w1;
344 uae_u32 w0;
345 } parts32;
346 #endif
347 };
348 #endif
349
350 // Declare and initialize a pointer to a shape of the requested FP type
351 #define fp_declare_init_shape(psvar, rfvar, ftype) \
352 fpu_ ## ftype ## _shape * psvar = (fpu_ ## ftype ## _shape *)( &rfvar )
353
354 /* -------------------------------------------------------------------------- */
355 /* --- Extra Math Functions --- */
356 /* --- (most of them had to be defined before including <fpu/flags.h>) --- */
357 /* -------------------------------------------------------------------------- */
358
359 #undef isnan
360 #if 0 && defined(HAVE_ISNANL)
361 # define isnan(x) isnanl((x))
362 #else
363 # define isnan(x) fp_do_isnan((x))
364 #endif
365
366 PRIVATE inline bool FFPU fp_do_isnan(fpu_register const & r)
367 {
368 #ifdef BRANCHES_ARE_EXPENSIVE
369 #ifndef USE_LONG_DOUBLE
370 fp_declare_init_shape(sxp, r, double);
371 uae_s32 hx = sxp->parts.msw;
372 uae_s32 lx = sxp->parts.lsw;
373 hx &= 0x7fffffff;
374 hx |= (uae_u32)(lx | (-lx)) >> 31;
375 hx = 0x7ff00000 - hx;
376 return (int)(((uae_u32)hx) >> 31);
377 #elif USE_QUAD_DOUBLE
378 fp_declare_init_shape(sxp, r, extended);
379 uae_s64 hx = sxp->parts64.msw;
380 uae_s64 lx = sxp->parts64.lsw;
381 hx &= 0x7fffffffffffffffLL;
382 hx |= (uae_u64)(lx | (-lx)) >> 63;
383 hx = 0x7fff000000000000LL - hx;
384 return (int)((uae_u64)hx >> 63);
385 #else
386 fp_declare_init_shape(sxp, r, extended);
387 uae_s32 se = sxp->parts.sign_exponent;
388 uae_s32 hx = sxp->parts.msw;
389 uae_s32 lx = sxp->parts.lsw;
390 se = (se & 0x7fff) << 1;
391 lx |= hx & 0x7fffffff;
392 se |= (uae_u32)(lx | (-lx)) >> 31;
393 se = 0xfffe - se;
394 // TODO: check whether rshift count is 16 or 31
395 return (int)(((uae_u32)(se)) >> 16);
396 #endif
397 #else
398 #ifndef USE_LONG_DOUBLE
399 fp_declare_init_shape(sxp, r, double);
400 return (sxp->ieee_nan.exponent == FP_DOUBLE_EXP_MAX)
401 #else
402 fp_declare_init_shape(sxp, r, extended);
403 return (sxp->ieee_nan.exponent == FP_EXTENDED_EXP_MAX)
404 #endif
405 && (sxp->ieee_nan.mantissa0 != 0)
406 && (sxp->ieee_nan.mantissa1 != 0)
407 #ifdef USE_QUAD_DOUBLE
408 && (sxp->ieee_nan.mantissa2 != 0)
409 && (sxp->ieee_nan.mantissa3 != 0)
410 #endif
411 ;
412 #endif
413 }
414
415 #undef isinf
416 #if 0 && defined(HAVE_ISINFL)
417 # define isinf(x) isinfl((x))
418 #else
419 # define isinf(x) fp_do_isinf((x))
420 #endif
421
422 PRIVATE inline bool FFPU fp_do_isinf(fpu_register const & r)
423 {
424 #ifdef BRANCHES_ARE_EXPENSIVE
425 #ifndef USE_LONG_DOUBLE
426 fp_declare_init_shape(sxp, r, double);
427 uae_s32 hx = sxp->parts.msw;
428 uae_s32 lx = sxp->parts.lsw;
429 lx |= (hx & 0x7fffffff) ^ 0x7ff00000;
430 lx |= -lx;
431 return ~(lx >> 31) & (hx >> 30);
432 #elif USE_QUAD_DOUBLE
433 fp_declare_init_shape(sxp, r, extended);
434 uae_s64 hx = sxp->parts64.msw;
435 uae_s64 lx = sxp->parts64.lsw;
436 lx |= (hx & 0x7fffffffffffffffLL) ^ 0x7fff000000000000LL;
437 lx |= -lx;
438 return ~(lx >> 63) & (hx >> 62);
439 #else
440 fp_declare_init_shape(sxp, r, extended);
441 uae_s32 se = sxp->parts.sign_exponent;
442 uae_s32 hx = sxp->parts.msw;
443 uae_s32 lx = sxp->parts.lsw;
444 /* This additional ^ 0x80000000 is necessary because in Intel's
445 internal representation of the implicit one is explicit.
446 NOTE: anyway, this is equivalent to & 0x7fffffff in that case. */
447 #ifdef __i386__
448 lx |= (hx ^ 0x80000000) | ((se & 0x7fff) ^ 0x7fff);
449 #else
450 lx |= (hx & 0x7fffffff) | ((se & 0x7fff) ^ 0x7fff);
451 #endif
452 lx |= -lx;
453 se &= 0x8000;
454 return ~(lx >> 31) & (1 - (se >> 14));
455 #endif
456 #else
457 #ifndef USE_LONG_DOUBLE
458 fp_declare_init_shape(sxp, r, double);
459 return (sxp->ieee_nan.exponent == FP_DOUBLE_EXP_MAX)
460 #else
461 fp_declare_init_shape(sxp, r, extended);
462 return (sxp->ieee_nan.exponent == FP_EXTENDED_EXP_MAX)
463 #endif
464 && (sxp->ieee_nan.mantissa0 == 0)
465 && (sxp->ieee_nan.mantissa1 == 0)
466 #ifdef USE_QUAD_DOUBLE
467 && (sxp->ieee_nan.mantissa2 == 0)
468 && (sxp->ieee_nan.mantissa3 == 0)
469 #endif
470 ;
471 #endif
472 }
473
474 #undef isneg
475 #define isneg(x) fp_do_isneg((x))
476
477 PRIVATE inline bool FFPU fp_do_isneg(fpu_register const & r)
478 {
479 #ifndef USE_LONG_DOUBLE
480 fp_declare_init_shape(sxp, r, double);
481 #else
482 fp_declare_init_shape(sxp, r, extended);
483 #endif
484 return sxp->ieee.negative;
485 }
486
487 #undef iszero
488 #define iszero(x) fp_do_iszero((x))
489
490 PRIVATE inline bool FFPU fp_do_iszero(fpu_register const & r)
491 {
492 // TODO: BRANCHES_ARE_EXPENSIVE
493 #ifndef USE_LONG_DOUBLE
494 fp_declare_init_shape(sxp, r, double);
495 #else
496 fp_declare_init_shape(sxp, r, extended);
497 #endif
498 return (sxp->ieee.exponent == 0)
499 && (sxp->ieee.mantissa0 == 0)
500 && (sxp->ieee.mantissa1 == 0)
501 #ifdef USE_QUAD_DOUBLE
502 && (sxp->ieee.mantissa2 == 0)
503 && (sxp->ieee.mantissa3 == 0)
504 #endif
505 ;
506 }
507
508 PRIVATE inline void FFPU get_dest_flags(fpu_register const & r)
509 {
510 fl_dest.negative = isneg(r);
511 fl_dest.zero = iszero(r);
512 fl_dest.infinity = isinf(r);
513 fl_dest.nan = isnan(r);
514 fl_dest.in_range = !fl_dest.zero && !fl_dest.infinity && !fl_dest.nan;
515 }
516
517 PRIVATE inline void FFPU get_source_flags(fpu_register const & r)
518 {
519 fl_source.negative = isneg(r);
520 fl_source.zero = iszero(r);
521 fl_source.infinity = isinf(r);
522 fl_source.nan = isnan(r);
523 fl_source.in_range = !fl_source.zero && !fl_source.infinity && !fl_source.nan;
524 }
525
526 PRIVATE inline void FFPU make_nan(fpu_register & r)
527 {
528 // FIXME: is that correct ?
529 #ifndef USE_LONG_DOUBLE
530 fp_declare_init_shape(sxp, r, double);
531 sxp->ieee.exponent = FP_DOUBLE_EXP_MAX;
532 sxp->ieee.mantissa0 = 0xfffff;
533 #else
534 fp_declare_init_shape(sxp, r, extended);
535 sxp->ieee.exponent = FP_EXTENDED_EXP_MAX;
536 sxp->ieee.mantissa0 = 0xffffffff;
537 #endif
538 sxp->ieee.mantissa1 = 0xffffffff;
539 #ifdef USE_QUAD_DOUBLE
540 sxp->ieee.mantissa2 = 0xffffffff;
541 sxp->ieee.mantissa3 = 0xffffffff;
542 #endif
543 }
544
545 PRIVATE inline void FFPU make_zero_positive(fpu_register & r)
546 {
547 #if 1
548 r = +0.0;
549 #else
550 #ifndef USE_LONG_DOUBLE
551 fp_declare_init_shape(sxp, r, double);
552 #else
553 fp_declare_init_shape(sxp, r, extended);
554 #endif
555 sxp->ieee.negative = 0;
556 sxp->ieee.exponent = 0;
557 sxp->ieee.mantissa0 = 0;
558 sxp->ieee.mantissa1 = 0;
559 #ifdef USE_QUAD_DOUBLE
560 sxp->ieee.mantissa2 = 0;
561 sxp->ieee.mantissa3 = 0;
562 #endif
563 #endif
564 }
565
566 PRIVATE inline void FFPU make_zero_negative(fpu_register & r)
567 {
568 #if 1
569 r = -0.0;
570 #else
571 #ifndef USE_LONG_DOUBLE
572 fp_declare_init_shape(sxp, r, double);
573 #else
574 fp_declare_init_shape(sxp, r, extended);
575 #endif
576 sxp->ieee.negative = 1;
577 sxp->ieee.exponent = 0;
578 sxp->ieee.mantissa0 = 0;
579 sxp->ieee.mantissa1 = 0;
580 #ifdef USE_QUAD_DOUBLE
581 sxp->ieee.mantissa2 = 0;
582 sxp->ieee.mantissa3 = 0;
583 #endif
584 #endif
585 }
586
587 PRIVATE inline void FFPU make_inf_positive(fpu_register & r)
588 {
589 #ifndef USE_LONG_DOUBLE
590 fp_declare_init_shape(sxp, r, double);
591 sxp->ieee_nan.exponent = FP_DOUBLE_EXP_MAX;
592 #else
593 fp_declare_init_shape(sxp, r, extended);
594 sxp->ieee_nan.exponent = FP_EXTENDED_EXP_MAX;
595 #endif
596 sxp->ieee_nan.negative = 0;
597 sxp->ieee_nan.mantissa0 = 0;
598 sxp->ieee_nan.mantissa1 = 0;
599 #ifdef USE_QUAD_DOUBLE
600 sxp->ieee_nan.mantissa2 = 0;
601 sxp->ieee_nan.mantissa3 = 0;
602 #endif
603 }
604
605 PRIVATE inline void FFPU make_inf_negative(fpu_register & r)
606 {
607 #ifndef USE_LONG_DOUBLE
608 fp_declare_init_shape(sxp, r, double);
609 sxp->ieee_nan.exponent = FP_DOUBLE_EXP_MAX;
610 #else
611 fp_declare_init_shape(sxp, r, extended);
612 sxp->ieee_nan.exponent = FP_EXTENDED_EXP_MAX;
613 #endif
614 sxp->ieee_nan.negative = 1;
615 sxp->ieee_nan.mantissa0 = 0;
616 sxp->ieee_nan.mantissa1 = 0;
617 #ifdef USE_QUAD_DOUBLE
618 sxp->ieee_nan.mantissa2 = 0;
619 sxp->ieee_nan.mantissa3 = 0;
620 #endif
621 }
622
623 PRIVATE inline fpu_register FFPU fast_fgetexp(fpu_register const & r)
624 {
625 #ifndef USE_LONG_DOUBLE
626 fp_declare_init_shape(sxp, r, double);
627 return (sxp->ieee.exponent - FP_DOUBLE_EXP_BIAS);
628 #else
629 fp_declare_init_shape(sxp, r, extended);
630 return (sxp->ieee.exponent - FP_EXTENDED_EXP_BIAS);
631 #endif
632 }
633
634 // Normalize to range 1..2
635 PRIVATE inline void FFPU fast_remove_exponent(fpu_register & r)
636 {
637 #ifndef USE_LONG_DOUBLE
638 fp_declare_init_shape(sxp, r, double);
639 sxp->ieee.exponent = FP_DOUBLE_EXP_BIAS;
640 #else
641 fp_declare_init_shape(sxp, r, extended);
642 sxp->ieee.exponent = FP_EXTENDED_EXP_BIAS;
643 #endif
644 }
645
646 // The sign of the quotient is the exclusive-OR of the sign bits
647 // of the source and destination operands.
648 PRIVATE inline uae_u32 FFPU get_quotient_sign(fpu_register const & ra, fpu_register const & rb)
649 {
650 #ifndef USE_LONG_DOUBLE
651 fp_declare_init_shape(sap, ra, double);
652 fp_declare_init_shape(sbp, rb, double);
653 #else
654 fp_declare_init_shape(sap, ra, extended);
655 fp_declare_init_shape(sbp, rb, extended);
656 #endif
657 return ((sap->ieee.negative ^ sbp->ieee.negative) ? FPSR_QUOTIENT_SIGN : 0);
658 }
659
660 /* -------------------------------------------------------------------------- */
661 /* --- Math functions --- */
662 /* -------------------------------------------------------------------------- */
663
664 #if FPU_USE_ISO_C99 && USE_LONG_DOUBLE
665 # define fp_log logl
666 # define fp_log10 log10l
667 # define fp_exp expl
668 # define fp_pow powl
669 # define fp_fabs fabsl
670 # define fp_sqrt sqrtl
671 # define fp_sin sinl
672 # define fp_cos cosl
673 # define fp_tan tanl
674 # define fp_sinh sinhl
675 # define fp_cosh coshl
676 # define fp_tanh tanhl
677 # define fp_asin asinl
678 # define fp_acos acosl
679 # define fp_atan atanl
680 # define fp_asinh asinhl
681 # define fp_acosh acoshl
682 # define fp_atanh atanhl
683 # define fp_floor floorl
684 # define fp_ceil ceill
685 #else
686 # define fp_log log
687 # define fp_log10 log10
688 # define fp_exp exp
689 # define fp_pow pow
690 # define fp_fabs fabs
691 # define fp_sqrt sqrt
692 # define fp_sin sin
693 # define fp_cos cos
694 # define fp_tan tan
695 # define fp_sinh sinh
696 # define fp_cosh cosh
697 # define fp_tanh tanh
698 # define fp_asin asin
699 # define fp_acos acos
700 # define fp_atan atan
701 # define fp_asinh asinh
702 # define fp_acosh acosh
703 # define fp_atanh atanh
704 # define fp_floor floor
705 # define fp_ceil ceil
706 #endif
707
708 #if defined(FPU_IEEE) && defined(X86_ASSEMBLY)
709 // Assembly optimized support functions. Taken from glibc 2.2.2
710
711 #undef fp_log
712 #define fp_log fp_do_log
713
714 #ifndef FPU_FAST_MATH
715 // FIXME: unimplemented
716 PRIVATE fpu_extended fp_do_log(fpu_extended x);
717 #else
718 PRIVATE inline fpu_extended fp_do_log(fpu_extended x)
719 {
720 fpu_extended value;
721 __asm__ __volatile__("fldln2; fxch; fyl2x" : "=t" (value) : "0" (x) : "st(1)");
722 return value;
723 }
724 #endif
725
726 #undef fp_log10
727 #define fp_log10 fp_do_log10
728
729 #ifndef FPU_FAST_MATH
730 // FIXME: unimplemented
731 PRIVATE fpu_extended fp_do_log10(fpu_extended x);
732 #else
733 PRIVATE inline fpu_extended fp_do_log10(fpu_extended x)
734 {
735 fpu_extended value;
736 __asm__ __volatile__("fldlg2; fxch; fyl2x" : "=t" (value) : "0" (x) : "st(1)");
737 return value;
738 }
739 #endif
740
741 #undef fp_exp
742 #define fp_exp fp_do_exp
743
744 #ifndef FPU_FAST_MATH
745 // FIXME: unimplemented
746 PRIVATE fpu_extended fp_do_exp(fpu_extended x);
747 #else
748 PRIVATE inline fpu_extended fp_do_exp(fpu_extended x)
749 {
750 fpu_extended value, exponent;
751 __asm__ __volatile__("fldl2e # e^x = 2^(x * log2(e))\n\t"
752 "fmul %%st(1) # x * log2(e)\n\t"
753 "fst %%st(1)\n\t"
754 "frndint # int(x * log2(e))\n\t"
755 "fxch\n\t"
756 "fsub %%st(1) # fract(x * log2(e))\n\t"
757 "f2xm1 # 2^(fract(x * log2(e))) - 1\n\t"
758 : "=t" (value), "=u" (exponent) : "0" (x));
759 value += 1.0;
760 __asm__ __volatile__("fscale" : "=t" (value) : "0" (value), "u" (exponent));
761 return value;
762 }
763 #endif
764
765 #undef fp_pow
766 #define fp_pow fp_do_pow
767
768 PRIVATE fpu_extended fp_do_pow(fpu_extended x, fpu_extended y);
769
770 #undef fp_fabs
771 #define fp_fabs fp_do_fabs
772
773 PRIVATE inline fpu_extended fp_do_fabs(fpu_extended x)
774 {
775 fpu_extended value;
776 __asm__ __volatile__("fabs" : "=t" (value) : "0" (x));
777 return value;
778 }
779
780 #undef fp_sqrt
781 #define fp_sqrt fp_do_sqrt
782
783 PRIVATE inline fpu_extended fp_do_sqrt(fpu_extended x)
784 {
785 fpu_extended value;
786 __asm__ __volatile__("fsqrt" : "=t" (value) : "0" (x));
787 return value;
788 }
789
790 #undef fp_sin
791 #define fp_sin fp_do_sin
792
793 PRIVATE inline fpu_extended fp_do_sin(fpu_extended x)
794 {
795 fpu_extended value;
796 __asm__ __volatile__("fsin" : "=t" (value) : "0" (x));
797 return value;
798 }
799
800 #undef fp_cos
801 #define fp_cos fp_do_cos
802
803 PRIVATE inline fpu_extended fp_do_cos(fpu_extended x)
804 {
805 fpu_extended value;
806 __asm__ __volatile__("fcos" : "=t" (value) : "0" (x));
807 return value;
808 }
809
810 #undef fp_tan
811 #define fp_tan fp_do_tan
812
813 PRIVATE inline fpu_extended fp_do_tan(fpu_extended x)
814 {
815 fpu_extended value;
816 __asm__ __volatile__("fptan" : "=t" (value) : "0" (x));
817 return value;
818 }
819
820 #undef fp_expm1
821 #define fp_expm1 fp_do_expm1
822
823 // Returns: exp(X) - 1.0
824 PRIVATE inline fpu_extended fp_do_expm1(fpu_extended x)
825 {
826 fpu_extended value, exponent, temp;
827 __asm__ __volatile__("fldl2e # e^x - 1 = 2^(x * log2(e)) - 1\n\t"
828 "fmul %%st(1) # x * log2(e)\n\t"
829 "fst %%st(1)\n\t"
830 "frndint # int(x * log2(e))\n\t"
831 "fxch\n\t"
832 "fsub %%st(1) # fract(x * log2(e))\n\t"
833 "f2xm1 # 2^(fract(x * log2(e))) - 1\n\t"
834 "fscale # 2^(x * log2(e)) - 2^(int(x * log2(e)))\n\t"
835 : "=t" (value), "=u" (exponent) : "0" (x));
836 __asm__ __volatile__("fscale" : "=t" (temp) : "0" (1.0), "u" (exponent));
837 temp -= 1.0;
838 return temp + value ? temp + value : x;
839 }
840
841 #undef fp_sgn1
842 #define fp_sgn1 fp_do_sgn1
843
844 PRIVATE inline fpu_extended fp_do_sgn1(fpu_extended x)
845 {
846 fp_declare_init_shape(sxp, x, extended);
847 sxp->ieee_nan.exponent = FP_EXTENDED_EXP_MAX;
848 sxp->ieee_nan.one = 1;
849 sxp->ieee_nan.quiet_nan = 0;
850 sxp->ieee_nan.mantissa0 = 0;
851 sxp->ieee_nan.mantissa1 = 0;
852 return x;
853 }
854
855 #undef fp_sinh
856 #define fp_sinh fp_do_sinh
857
858 #ifndef FPU_FAST_MATH
859 // FIXME: unimplemented
860 PRIVATE fpu_extended fp_do_sinh(fpu_extended x);
861 #else
862 PRIVATE inline fpu_extended fp_do_sinh(fpu_extended x)
863 {
864 fpu_extended exm1 = fp_expm1(fp_fabs(x));
865 return 0.5 * (exm1 / (exm1 + 1.0) + exm1) * fp_sgn1(x);
866 }
867 #endif
868
869 #undef fp_cosh
870 #define fp_cosh fp_do_cosh
871
872 #ifndef FPU_FAST_MATH
873 // FIXME: unimplemented
874 PRIVATE fpu_extended fp_do_cosh(fpu_extended x);
875 #else
876 PRIVATE inline fpu_extended fp_do_cosh(fpu_extended x)
877 {
878 fpu_extended ex = fp_exp(x);
879 return 0.5 * (ex + 1.0 / ex);
880 }
881 #endif
882
883 #undef fp_tanh
884 #define fp_tanh fp_do_tanh
885
886 #ifndef FPU_FAST_MATH
887 // FIXME: unimplemented
888 PRIVATE fpu_extended fp_do_tanh(fpu_extended x);
889 #else
890 PRIVATE inline fpu_extended fp_do_tanh(fpu_extended x)
891 {
892 fpu_extended exm1 = fp_expm1(-fp_fabs(x + x));
893 return exm1 / (exm1 + 2.0) * fp_sgn1(-x);
894 }
895 #endif
896
897 #undef fp_atan2
898 #define fp_atan2 fp_do_atan2
899
900 PRIVATE inline fpu_extended fp_do_atan2(fpu_extended y, fpu_extended x)
901 {
902 fpu_extended value;
903 __asm__ __volatile__("fpatan" : "=t" (value) : "0" (x), "u" (y) : "st(1)");
904 return value;
905 }
906
907 #undef fp_asin
908 #define fp_asin fp_do_asin
909
910 PRIVATE inline fpu_extended fp_do_asin(fpu_extended x)
911 {
912 return fp_atan2(x, fp_sqrt(1.0 - x * x));
913 }
914
915 #undef fp_acos
916 #define fp_acos fp_do_acos
917
918 PRIVATE inline fpu_extended fp_do_acos(fpu_extended x)
919 {
920 return fp_atan2(fp_sqrt(1.0 - x * x), x);
921 }
922
923 #undef fp_atan
924 #define fp_atan fp_do_atan
925
926 PRIVATE inline fpu_extended fp_do_atan(fpu_extended x)
927 {
928 fpu_extended value;
929 __asm__ __volatile__("fld1; fpatan" : "=t" (value) : "0" (x) : "st(1)");
930 return value;
931 }
932
933 #undef fp_log1p
934 #define fp_log1p fp_do_log1p
935
936 // Returns: ln(1.0 + X)
937 PRIVATE fpu_extended fp_do_log1p(fpu_extended x);
938
939 #undef fp_asinh
940 #define fp_asinh fp_do_asinh
941
942 PRIVATE inline fpu_extended fp_do_asinh(fpu_extended x)
943 {
944 fpu_extended y = fp_fabs(x);
945 return (fp_log1p(y * y / (fp_sqrt(y * y + 1.0) + 1.0) + y) * fp_sgn1(x));
946 }
947
948 #undef fp_acosh
949 #define fp_acosh fp_do_acosh
950
951 PRIVATE inline fpu_extended fp_do_acosh(fpu_extended x)
952 {
953 return fp_log(x + fp_sqrt(x - 1.0) * fp_sqrt(x + 1.0));
954 }
955
956 #undef fp_atanh
957 #define fp_atanh fp_do_atanh
958
959 PRIVATE inline fpu_extended fp_do_atanh(fpu_extended x)
960 {
961 fpu_extended y = fp_fabs(x);
962 return -0.5 * fp_log1p(-(y + y) / (1.0 + y)) * fp_sgn1(x);
963 }
964
965 #undef fp_floor
966 #define fp_floor fp_do_floor
967
968 PRIVATE inline fpu_extended fp_do_floor(fpu_extended x)
969 {
970 volatile unsigned int cw;
971 __asm__ __volatile__("fnstcw %0" : "=m" (cw));
972 volatile unsigned int cw_temp = (cw & 0xf3ff) | 0x0400; // rounding down
973 __asm__ __volatile__("fldcw %0" : : "m" (cw_temp));
974 fpu_extended value;
975 __asm__ __volatile__("frndint" : "=t" (value) : "0" (x));
976 __asm__ __volatile__("fldcw %0" : : "m" (cw));
977 return value;
978 }
979
980 #undef fp_ceil
981 #define fp_ceil fp_do_ceil
982
983 PRIVATE inline fpu_extended fp_do_ceil(fpu_extended x)
984 {
985 volatile unsigned int cw;
986 __asm__ __volatile__("fnstcw %0" : "=m" (cw));
987 volatile unsigned int cw_temp = (cw & 0xf3ff) | 0x0800; // rounding up
988 __asm__ __volatile__("fldcw %0" : : "m" (cw_temp));
989 fpu_extended value;
990 __asm__ __volatile__("frndint" : "=t" (value) : "0" (x));
991 __asm__ __volatile__("fldcw %0" : : "m" (cw));
992 return value;
993 }
994
995 #define DEFINE_ROUND_FUNC(rounding_mode_str, rounding_mode) \
996 PRIVATE inline fpu_extended fp_do_round_to_ ## rounding_mode_str(fpu_extended x) \
997 { \
998 volatile unsigned int cw; \
999 __asm__ __volatile__("fnstcw %0" : "=m" (cw)); \
1000 volatile unsigned int cw_temp = (cw & 0xf3ff) | (rounding_mode); \
1001 __asm__ __volatile__("fldcw %0" : : "m" (cw_temp)); \
1002 fpu_extended value; \
1003 __asm__ __volatile__("frndint" : "=t" (value) : "0" (x)); \
1004 __asm__ __volatile__("fldcw %0" : : "m" (cw)); \
1005 return value; \
1006 }
1007
1008 #undef fp_round_to_minus_infinity
1009 #define fp_round_to_minus_infinity fp_do_round_to_minus_infinity
1010
1011 DEFINE_ROUND_FUNC(minus_infinity, 0x400)
1012
1013 #undef fp_round_to_plus_infinity
1014 #define fp_round_to_plus_infinity fp_do_round_to_plus_infinity
1015
1016 DEFINE_ROUND_FUNC(plus_infinity, 0x800)
1017
1018 #undef fp_round_to_zero
1019 #define fp_round_to_zero fp_do_round_to_zero
1020
1021 DEFINE_ROUND_FUNC(zero, 0xc00)
1022
1023 #undef fp_round_to_nearest
1024 #define fp_round_to_nearest fp_do_round_to_nearest
1025
1026 DEFINE_ROUND_FUNC(nearest, 0x000)
1027
1028 #endif /* X86_ASSEMBLY */
1029
1030 #ifndef fp_round_to_minus_infinity
1031 #define fp_round_to_minus_infinity(x) fp_floor(x)
1032 #endif
1033
1034 #ifndef fp_round_to_plus_infinity
1035 #define fp_round_to_plus_infinity(x) fp_ceil(x)
1036 #endif
1037
1038 #ifndef fp_round_to_zero
1039 #define fp_round_to_zero(x) ((int)(x))
1040 #endif
1041
1042 #ifndef fp_round_to_nearest
1043 #define fp_round_to_nearest(x) ((int)((x) + 0.5))
1044 #endif
1045
1046 #endif /* FPU_MATHLIB_H */