ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/fpu/mathlib.h
Revision: 1.10
Committed: 2008-01-01T09:40:36Z (16 years, 10 months ago) by gbeauche
Content type: text/plain
Branch: MAIN
CVS Tags: HEAD
Changes since 1.9: +1 -1 lines
Log Message:
Happy New Year!

File Contents

# Content
1 /*
2 * fpu/mathlib.h - Floating-point math support library
3 *
4 * Basilisk II (C) 1997-2008 Christian Bauer
5 *
6 * MC68881/68040 fpu emulation
7 *
8 * Original UAE FPU, copyright 1996 Herman ten Brugge
9 * Rewrite for x86, copyright 1999-2001 Lauri Pesonen
10 * New framework, copyright 2000-2001 Gwenole Beauchesne
11 * Adapted for JIT compilation (c) Bernd Meyer, 2000-2001
12 *
13 * This program is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation; either version 2 of the License, or
16 * (at your option) any later version.
17 *
18 * This program is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with this program; if not, write to the Free Software
25 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26 */
27
28 #ifndef FPU_MATHLIB_H
29 #define FPU_MATHLIB_H
30
31 /* NOTE: this file shall be included only from fpu/fpu_*.cpp */
32 #undef PUBLIC
33 #define PUBLIC extern
34
35 #undef PRIVATE
36 #define PRIVATE static
37
38 #undef FFPU
39 #define FFPU /**/
40
41 #undef FPU
42 #define FPU fpu.
43
44 // Define the following macro if branches are expensive. If so,
45 // integer-based isnan() and isinf() functions are implemented.
46 // TODO: move to Makefile.in
47 #define BRANCHES_ARE_EXPENSIVE 1
48
49 // Use ISO C99 extended-precision math functions (glibc 2.1+)
50 #define FPU_USE_ISO_C99 1
51
52 // NOTE: this is irrelevant on Win32 platforms since the MS libraries
53 // don't support extended-precision floating-point computations
54 #ifdef WIN32
55 #undef FPU_USE_ISO_C99
56 #endif
57
58 // Use faster implementation of math functions, but this could cause
59 // some incorrect results (?)
60 // TODO: actually implement the slower but safer versions
61 #define FPU_FAST_MATH 1
62
63 #if FPU_USE_ISO_C99
64 // NOTE: no prior <math.h> shall be included at this point
65 #define __USE_ISOC99 1 // for glibc 2.2.X and newer
66 #define __USE_ISOC9X 1 // for glibc 2.1.X
67 #include <math.h>
68 #else
69 #include <cmath>
70 using namespace std;
71 #endif
72
73 /* -------------------------------------------------------------------------- */
74 /* --- Floating-point register types --- */
75 /* -------------------------------------------------------------------------- */
76
77 // Single : S 8*E 23*F
78 #define FP_SINGLE_EXP_MAX 0xff
79 #define FP_SINGLE_EXP_BIAS 0x7f
80
81 // Double : S 11*E 52*F
82 #define FP_DOUBLE_EXP_MAX 0x7ff
83 #define FP_DOUBLE_EXP_BIAS 0x3ff
84
85 // Extended : S 15*E 64*F
86 #define FP_EXTENDED_EXP_MAX 0x7fff
87 #define FP_EXTENDED_EXP_BIAS 0x3fff
88
89 // Zeroes : E = 0 & F = 0
90 // Infinities : E = MAX & F = 0
91 // Not-A-Number : E = MAX & F # 0
92
93 /* -------------------------------------------------------------------------- */
94 /* --- Floating-point type shapes (IEEE-compliant) --- */
95 /* -------------------------------------------------------------------------- */
96
97 // Taken from glibc 2.2.x: ieee754.h
98
99 // IEEE-754 float format
100 union fpu_single_shape {
101
102 fpu_single value;
103
104 /* This is the IEEE 754 single-precision format. */
105 struct {
106 #ifdef WORDS_BIGENDIAN
107 unsigned int negative:1;
108 unsigned int exponent:8;
109 unsigned int mantissa:23;
110 #else
111 unsigned int mantissa:23;
112 unsigned int exponent:8;
113 unsigned int negative:1;
114 #endif
115 } ieee;
116
117 /* This format makes it easier to see if a NaN is a signalling NaN. */
118 struct {
119 #ifdef WORDS_BIGENDIAN
120 unsigned int negative:1;
121 unsigned int exponent:8;
122 unsigned int quiet_nan:1;
123 unsigned int mantissa:22;
124 #else
125 unsigned int mantissa:22;
126 unsigned int quiet_nan:1;
127 unsigned int exponent:8;
128 unsigned int negative:1;
129 #endif
130 } ieee_nan;
131 };
132
133 // IEEE-754 double format
134 union fpu_double_shape {
135 fpu_double value;
136
137 /* This is the IEEE 754 double-precision format. */
138 struct {
139 #ifdef WORDS_BIGENDIAN
140 unsigned int negative:1;
141 unsigned int exponent:11;
142 /* Together these comprise the mantissa. */
143 unsigned int mantissa0:20;
144 unsigned int mantissa1:32;
145 #else
146 # if HOST_FLOAT_WORDS_BIG_ENDIAN
147 unsigned int mantissa0:20;
148 unsigned int exponent:11;
149 unsigned int negative:1;
150 unsigned int mantissa1:32;
151 # else
152 /* Together these comprise the mantissa. */
153 unsigned int mantissa1:32;
154 unsigned int mantissa0:20;
155 unsigned int exponent:11;
156 unsigned int negative:1;
157 # endif
158 #endif
159 } ieee;
160
161 /* This format makes it easier to see if a NaN is a signalling NaN. */
162 struct {
163 #ifdef WORDS_BIGENDIAN
164 unsigned int negative:1;
165 unsigned int exponent:11;
166 unsigned int quiet_nan:1;
167 /* Together these comprise the mantissa. */
168 unsigned int mantissa0:19;
169 unsigned int mantissa1:32;
170 #else
171 # if HOST_FLOAT_WORDS_BIG_ENDIAN
172 unsigned int mantissa0:19;
173 unsigned int quiet_nan:1;
174 unsigned int exponent:11;
175 unsigned int negative:1;
176 unsigned int mantissa1:32;
177 # else
178 /* Together these comprise the mantissa. */
179 unsigned int mantissa1:32;
180 unsigned int mantissa0:19;
181 unsigned int quiet_nan:1;
182 unsigned int exponent:11;
183 unsigned int negative:1;
184 # endif
185 #endif
186 } ieee_nan;
187
188 /* This format is used to extract the sign_exponent and mantissa parts only */
189 struct {
190 #if HOST_FLOAT_WORDS_BIG_ENDIAN
191 unsigned int msw:32;
192 unsigned int lsw:32;
193 #else
194 unsigned int lsw:32;
195 unsigned int msw:32;
196 #endif
197 } parts;
198 };
199
200 #ifdef USE_LONG_DOUBLE
201 // IEEE-854 long double format
202 union fpu_extended_shape {
203 fpu_extended value;
204
205 /* This is the IEEE 854 double-extended-precision format. */
206 struct {
207 #ifdef WORDS_BIGENDIAN
208 unsigned int negative:1;
209 unsigned int exponent:15;
210 unsigned int empty:16;
211 unsigned int mantissa0:32;
212 unsigned int mantissa1:32;
213 #else
214 # if HOST_FLOAT_WORDS_BIG_ENDIAN
215 unsigned int exponent:15;
216 unsigned int negative:1;
217 unsigned int empty:16;
218 unsigned int mantissa0:32;
219 unsigned int mantissa1:32;
220 # else
221 unsigned int mantissa1:32;
222 unsigned int mantissa0:32;
223 unsigned int exponent:15;
224 unsigned int negative:1;
225 unsigned int empty:16;
226 # endif
227 #endif
228 } ieee;
229
230 /* This is for NaNs in the IEEE 854 double-extended-precision format. */
231 struct {
232 #ifdef WORDS_BIGENDIAN
233 unsigned int negative:1;
234 unsigned int exponent:15;
235 unsigned int empty:16;
236 unsigned int one:1;
237 unsigned int quiet_nan:1;
238 unsigned int mantissa0:30;
239 unsigned int mantissa1:32;
240 #else
241 # if HOST_FLOAT_WORDS_BIG_ENDIAN
242 unsigned int exponent:15;
243 unsigned int negative:1;
244 unsigned int empty:16;
245 unsigned int mantissa0:30;
246 unsigned int quiet_nan:1;
247 unsigned int one:1;
248 unsigned int mantissa1:32;
249 # else
250 unsigned int mantissa1:32;
251 unsigned int mantissa0:30;
252 unsigned int quiet_nan:1;
253 unsigned int one:1;
254 unsigned int exponent:15;
255 unsigned int negative:1;
256 unsigned int empty:16;
257 # endif
258 #endif
259 } ieee_nan;
260
261 /* This format is used to extract the sign_exponent and mantissa parts only */
262 struct {
263 #if HOST_FLOAT_WORDS_BIG_ENDIAN
264 unsigned int sign_exponent:16;
265 unsigned int empty:16;
266 unsigned int msw:32;
267 unsigned int lsw:32;
268 #else
269 unsigned int lsw:32;
270 unsigned int msw:32;
271 unsigned int sign_exponent:16;
272 unsigned int empty:16;
273 #endif
274 } parts;
275 };
276 #endif
277
278 #ifdef USE_QUAD_DOUBLE
279 // IEEE-854 quad double format
280 union fpu_extended_shape {
281 fpu_extended value;
282
283 /* This is the IEEE 854 quad-precision format. */
284 struct {
285 #ifdef WORDS_BIGENDIAN
286 unsigned int negative:1;
287 unsigned int exponent:15;
288 unsigned int mantissa0:16;
289 unsigned int mantissa1:32;
290 unsigned int mantissa2:32;
291 unsigned int mantissa3:32;
292 #else
293 unsigned int mantissa3:32;
294 unsigned int mantissa2:32;
295 unsigned int mantissa1:32;
296 unsigned int mantissa0:16;
297 unsigned int exponent:15;
298 unsigned int negative:1;
299 #endif
300 } ieee;
301
302 /* This is for NaNs in the IEEE 854 quad-precision format. */
303 struct {
304 #ifdef WORDS_BIGENDIAN
305 unsigned int negative:1;
306 unsigned int exponent:15;
307 unsigned int quiet_nan:1;
308 unsigned int mantissa0:15;
309 unsigned int mantissa1:30;
310 unsigned int mantissa2:32;
311 unsigned int mantissa3:32;
312 #else
313 unsigned int mantissa3:32;
314 unsigned int mantissa2:32;
315 unsigned int mantissa1:32;
316 unsigned int mantissa0:15;
317 unsigned int quiet_nan:1;
318 unsigned int exponent:15;
319 unsigned int negative:1;
320 #endif
321 } ieee_nan;
322
323 /* This format is used to extract the sign_exponent and mantissa parts only */
324 #if HOST_FLOAT_WORDS_BIG_ENDIAN
325 struct {
326 uae_u64 msw;
327 uae_u64 lsw;
328 } parts64;
329 struct {
330 uae_u32 w0;
331 uae_u32 w1;
332 uae_u32 w2;
333 uae_u32 w3;
334 } parts32;
335 #else
336 struct {
337 uae_u64 lsw;
338 uae_u64 msw;
339 } parts64;
340 struct {
341 uae_u32 w3;
342 uae_u32 w2;
343 uae_u32 w1;
344 uae_u32 w0;
345 } parts32;
346 #endif
347 };
348 #endif
349
350 // Declare and initialize a pointer to a shape of the requested FP type
351 #define fp_declare_init_shape(psvar, rfvar, ftype) \
352 fpu_ ## ftype ## _shape * psvar = (fpu_ ## ftype ## _shape *)( &rfvar )
353
354 /* -------------------------------------------------------------------------- */
355 /* --- Extra Math Functions --- */
356 /* --- (most of them had to be defined before including <fpu/flags.h>) --- */
357 /* -------------------------------------------------------------------------- */
358
359 #undef isnan
360 #if 0 && defined(HAVE_ISNANL)
361 # define isnan(x) isnanl((x))
362 #else
363 # define isnan(x) fp_do_isnan((x))
364 #endif
365
366 PRIVATE inline bool FFPU fp_do_isnan(fpu_register const & r)
367 {
368 #ifdef BRANCHES_ARE_EXPENSIVE
369 #ifndef USE_LONG_DOUBLE
370 fp_declare_init_shape(sxp, r, double);
371 uae_s32 hx = sxp->parts.msw;
372 uae_s32 lx = sxp->parts.lsw;
373 hx &= 0x7fffffff;
374 hx |= (uae_u32)(lx | (-lx)) >> 31;
375 hx = 0x7ff00000 - hx;
376 return (int)(((uae_u32)hx) >> 31);
377 #elif USE_QUAD_DOUBLE
378 fp_declare_init_shape(sxp, r, extended);
379 uae_s64 hx = sxp->parts64.msw;
380 uae_s64 lx = sxp->parts64.lsw;
381 hx &= 0x7fffffffffffffffLL;
382 hx |= (uae_u64)(lx | (-lx)) >> 63;
383 hx = 0x7fff000000000000LL - hx;
384 return (int)((uae_u64)hx >> 63);
385 #else
386 fp_declare_init_shape(sxp, r, extended);
387 uae_s32 se = sxp->parts.sign_exponent;
388 uae_s32 hx = sxp->parts.msw;
389 uae_s32 lx = sxp->parts.lsw;
390 se = (se & 0x7fff) << 1;
391 lx |= hx & 0x7fffffff;
392 se |= (uae_u32)(lx | (-lx)) >> 31;
393 se = 0xfffe - se;
394 // TODO: check whether rshift count is 16 or 31
395 return (int)(((uae_u32)(se)) >> 16);
396 #endif
397 #else
398 #if USE_LONG_DOUBLE || USE_QUAD_DOUBLE
399 fp_declare_init_shape(sxp, r, extended);
400 return (sxp->ieee_nan.exponent == FP_EXTENDED_EXP_MAX)
401 #else
402 fp_declare_init_shape(sxp, r, double);
403 return (sxp->ieee_nan.exponent == FP_DOUBLE_EXP_MAX)
404 #endif
405 && (sxp->ieee_nan.mantissa0 != 0)
406 && (sxp->ieee_nan.mantissa1 != 0)
407 #ifdef USE_QUAD_DOUBLE
408 && (sxp->ieee_nan.mantissa2 != 0)
409 && (sxp->ieee_nan.mantissa3 != 0)
410 #endif
411 ;
412 #endif
413 }
414
415 #undef isinf
416 #if 0 && defined(HAVE_ISINFL)
417 # define isinf(x) isinfl((x))
418 #else
419 # define isinf(x) fp_do_isinf((x))
420 #endif
421
422 PRIVATE inline bool FFPU fp_do_isinf(fpu_register const & r)
423 {
424 #ifdef BRANCHES_ARE_EXPENSIVE
425 #ifndef USE_LONG_DOUBLE
426 fp_declare_init_shape(sxp, r, double);
427 uae_s32 hx = sxp->parts.msw;
428 uae_s32 lx = sxp->parts.lsw;
429 lx |= (hx & 0x7fffffff) ^ 0x7ff00000;
430 lx |= -lx;
431 return ~(lx >> 31) & (hx >> 30);
432 #elif USE_QUAD_DOUBLE
433 fp_declare_init_shape(sxp, r, extended);
434 uae_s64 hx = sxp->parts64.msw;
435 uae_s64 lx = sxp->parts64.lsw;
436 lx |= (hx & 0x7fffffffffffffffLL) ^ 0x7fff000000000000LL;
437 lx |= -lx;
438 return ~(lx >> 63) & (hx >> 62);
439 #else
440 fp_declare_init_shape(sxp, r, extended);
441 uae_s32 se = sxp->parts.sign_exponent;
442 uae_s32 hx = sxp->parts.msw;
443 uae_s32 lx = sxp->parts.lsw;
444 /* This additional ^ 0x80000000 is necessary because in Intel's
445 internal representation of the implicit one is explicit.
446 NOTE: anyway, this is equivalent to & 0x7fffffff in that case. */
447 #ifdef __i386__
448 lx |= (hx ^ 0x80000000) | ((se & 0x7fff) ^ 0x7fff);
449 #else
450 lx |= (hx & 0x7fffffff) | ((se & 0x7fff) ^ 0x7fff);
451 #endif
452 lx |= -lx;
453 se &= 0x8000;
454 return ~(lx >> 31) & (1 - (se >> 14));
455 #endif
456 #else
457 #if USE_LONG_DOUBLE || USE_QUAD_DOUBLE
458 fp_declare_init_shape(sxp, r, extended);
459 return (sxp->ieee_nan.exponent == FP_EXTENDED_EXP_MAX)
460 #else
461 fp_declare_init_shape(sxp, r, double);
462 return (sxp->ieee_nan.exponent == FP_DOUBLE_EXP_MAX)
463 #endif
464 && (sxp->ieee_nan.mantissa0 == 0)
465 && (sxp->ieee_nan.mantissa1 == 0)
466 #ifdef USE_QUAD_DOUBLE
467 && (sxp->ieee_nan.mantissa2 == 0)
468 && (sxp->ieee_nan.mantissa3 == 0)
469 #endif
470 ;
471 #endif
472 }
473
474 #undef isneg
475 #define isneg(x) fp_do_isneg((x))
476
477 PRIVATE inline bool FFPU fp_do_isneg(fpu_register const & r)
478 {
479 #if USE_LONG_DOUBLE || USE_QUAD_DOUBLE
480 fp_declare_init_shape(sxp, r, extended);
481 #else
482 fp_declare_init_shape(sxp, r, double);
483 #endif
484 return sxp->ieee.negative;
485 }
486
487 #undef iszero
488 #define iszero(x) fp_do_iszero((x))
489
490 PRIVATE inline bool FFPU fp_do_iszero(fpu_register const & r)
491 {
492 // TODO: BRANCHES_ARE_EXPENSIVE
493 #if USE_LONG_DOUBLE || USE_QUAD_DOUBLE
494 fp_declare_init_shape(sxp, r, extended);
495 #else
496 fp_declare_init_shape(sxp, r, double);
497 #endif
498 return (sxp->ieee.exponent == 0)
499 && (sxp->ieee.mantissa0 == 0)
500 && (sxp->ieee.mantissa1 == 0)
501 #ifdef USE_QUAD_DOUBLE
502 && (sxp->ieee.mantissa2 == 0)
503 && (sxp->ieee.mantissa3 == 0)
504 #endif
505 ;
506 }
507
508 PRIVATE inline void FFPU get_dest_flags(fpu_register const & r)
509 {
510 fl_dest.negative = isneg(r);
511 fl_dest.zero = iszero(r);
512 fl_dest.infinity = isinf(r);
513 fl_dest.nan = isnan(r);
514 fl_dest.in_range = !fl_dest.zero && !fl_dest.infinity && !fl_dest.nan;
515 }
516
517 PRIVATE inline void FFPU get_source_flags(fpu_register const & r)
518 {
519 fl_source.negative = isneg(r);
520 fl_source.zero = iszero(r);
521 fl_source.infinity = isinf(r);
522 fl_source.nan = isnan(r);
523 fl_source.in_range = !fl_source.zero && !fl_source.infinity && !fl_source.nan;
524 }
525
526 PRIVATE inline void FFPU make_nan(fpu_register & r)
527 {
528 // FIXME: is that correct ?
529 #if USE_LONG_DOUBLE || USE_QUAD_DOUBLE
530 fp_declare_init_shape(sxp, r, extended);
531 sxp->ieee.exponent = FP_EXTENDED_EXP_MAX;
532 sxp->ieee.mantissa0 = 0xffffffff;
533 #else
534 fp_declare_init_shape(sxp, r, double);
535 sxp->ieee.exponent = FP_DOUBLE_EXP_MAX;
536 sxp->ieee.mantissa0 = 0xfffff;
537 #endif
538 sxp->ieee.mantissa1 = 0xffffffff;
539 #ifdef USE_QUAD_DOUBLE
540 sxp->ieee.mantissa2 = 0xffffffff;
541 sxp->ieee.mantissa3 = 0xffffffff;
542 #endif
543 }
544
545 PRIVATE inline void FFPU make_zero_positive(fpu_register & r)
546 {
547 #if 1
548 r = +0.0;
549 #else
550 #if USE_LONG_DOUBLE || USE_QUAD_DOUBLE
551 fp_declare_init_shape(sxp, r, extended);
552 #else
553 fp_declare_init_shape(sxp, r, double);
554 #endif
555 sxp->ieee.negative = 0;
556 sxp->ieee.exponent = 0;
557 sxp->ieee.mantissa0 = 0;
558 sxp->ieee.mantissa1 = 0;
559 #ifdef USE_QUAD_DOUBLE
560 sxp->ieee.mantissa2 = 0;
561 sxp->ieee.mantissa3 = 0;
562 #endif
563 #endif
564 }
565
566 PRIVATE inline void FFPU make_zero_negative(fpu_register & r)
567 {
568 #if 1
569 r = -0.0;
570 #else
571 #if USE_LONG_DOUBLE || USE_QUAD_DOUBLE
572 fp_declare_init_shape(sxp, r, extended);
573 #else
574 fp_declare_init_shape(sxp, r, double);
575 #endif
576 sxp->ieee.negative = 1;
577 sxp->ieee.exponent = 0;
578 sxp->ieee.mantissa0 = 0;
579 sxp->ieee.mantissa1 = 0;
580 #ifdef USE_QUAD_DOUBLE
581 sxp->ieee.mantissa2 = 0;
582 sxp->ieee.mantissa3 = 0;
583 #endif
584 #endif
585 }
586
587 PRIVATE inline void FFPU make_inf_positive(fpu_register & r)
588 {
589 #if USE_LONG_DOUBLE || USE_QUAD_DOUBLE
590 fp_declare_init_shape(sxp, r, extended);
591 sxp->ieee_nan.exponent = FP_EXTENDED_EXP_MAX;
592 #else
593 fp_declare_init_shape(sxp, r, double);
594 sxp->ieee_nan.exponent = FP_DOUBLE_EXP_MAX;
595 #endif
596 sxp->ieee_nan.negative = 0;
597 sxp->ieee_nan.mantissa0 = 0;
598 sxp->ieee_nan.mantissa1 = 0;
599 #ifdef USE_QUAD_DOUBLE
600 sxp->ieee_nan.mantissa2 = 0;
601 sxp->ieee_nan.mantissa3 = 0;
602 #endif
603 }
604
605 PRIVATE inline void FFPU make_inf_negative(fpu_register & r)
606 {
607 #if USE_LONG_DOUBLE || USE_QUAD_DOUBLE
608 fp_declare_init_shape(sxp, r, extended);
609 sxp->ieee_nan.exponent = FP_EXTENDED_EXP_MAX;
610 #else
611 fp_declare_init_shape(sxp, r, double);
612 sxp->ieee_nan.exponent = FP_DOUBLE_EXP_MAX;
613 #endif
614 sxp->ieee_nan.negative = 1;
615 sxp->ieee_nan.mantissa0 = 0;
616 sxp->ieee_nan.mantissa1 = 0;
617 #ifdef USE_QUAD_DOUBLE
618 sxp->ieee_nan.mantissa2 = 0;
619 sxp->ieee_nan.mantissa3 = 0;
620 #endif
621 }
622
623 PRIVATE inline fpu_register FFPU fast_fgetexp(fpu_register const & r)
624 {
625 #if USE_LONG_DOUBLE || USE_QUAD_DOUBLE
626 fp_declare_init_shape(sxp, r, extended);
627 return (sxp->ieee.exponent - FP_EXTENDED_EXP_BIAS);
628 #else
629 fp_declare_init_shape(sxp, r, double);
630 return (sxp->ieee.exponent - FP_DOUBLE_EXP_BIAS);
631 #endif
632 }
633
634 // Normalize to range 1..2
635 PRIVATE inline void FFPU fast_remove_exponent(fpu_register & r)
636 {
637 #if USE_LONG_DOUBLE || USE_QUAD_DOUBLE
638 fp_declare_init_shape(sxp, r, extended);
639 sxp->ieee.exponent = FP_EXTENDED_EXP_BIAS;
640 #else
641 fp_declare_init_shape(sxp, r, double);
642 sxp->ieee.exponent = FP_DOUBLE_EXP_BIAS;
643 #endif
644 }
645
646 // The sign of the quotient is the exclusive-OR of the sign bits
647 // of the source and destination operands.
648 PRIVATE inline uae_u32 FFPU get_quotient_sign(fpu_register const & ra, fpu_register const & rb)
649 {
650 #if USE_LONG_DOUBLE || USE_QUAD_DOUBLE
651 fp_declare_init_shape(sap, ra, extended);
652 fp_declare_init_shape(sbp, rb, extended);
653 #else
654 fp_declare_init_shape(sap, ra, double);
655 fp_declare_init_shape(sbp, rb, double);
656 #endif
657 return ((sap->ieee.negative ^ sbp->ieee.negative) ? FPSR_QUOTIENT_SIGN : 0);
658 }
659
660 /* -------------------------------------------------------------------------- */
661 /* --- Math functions --- */
662 /* -------------------------------------------------------------------------- */
663
664 #if FPU_USE_ISO_C99 && (USE_LONG_DOUBLE || USE_QUAD_DOUBLE)
665 # ifdef HAVE_LOGL
666 # define fp_log logl
667 # endif
668 # ifdef HAVE_LOG10L
669 # define fp_log10 log10l
670 # endif
671 # ifdef HAVE_EXPL
672 # define fp_exp expl
673 # endif
674 # ifdef HAVE_POWL
675 # define fp_pow powl
676 # endif
677 # ifdef HAVE_FABSL
678 # define fp_fabs fabsl
679 # endif
680 # ifdef HAVE_SQRTL
681 # define fp_sqrt sqrtl
682 # endif
683 # ifdef HAVE_SINL
684 # define fp_sin sinl
685 # endif
686 # ifdef HAVE_COSL
687 # define fp_cos cosl
688 # endif
689 # ifdef HAVE_TANL
690 # define fp_tan tanl
691 # endif
692 # ifdef HAVE_SINHL
693 # define fp_sinh sinhl
694 # endif
695 # ifdef HAVE_COSHL
696 # define fp_cosh coshl
697 # endif
698 # ifdef HAVE_TANHL
699 # define fp_tanh tanhl
700 # endif
701 # ifdef HAVE_ASINL
702 # define fp_asin asinl
703 # endif
704 # ifdef HAVE_ACOSL
705 # define fp_acos acosl
706 # endif
707 # ifdef HAVE_ATANL
708 # define fp_atan atanl
709 # endif
710 # ifdef HAVE_ASINHL
711 # define fp_asinh asinhl
712 # endif
713 # ifdef HAVE_ACOSHL
714 # define fp_acosh acoshl
715 # endif
716 # ifdef HAVE_ATANHL
717 # define fp_atanh atanhl
718 # endif
719 # ifdef HAVE_FLOORL
720 # define fp_floor floorl
721 # endif
722 # ifdef HAVE_CEILL
723 # define fp_ceil ceill
724 # endif
725 #endif
726
727 #ifndef fp_log
728 # define fp_log log
729 #endif
730 #ifndef fp_log10
731 # define fp_log10 log10
732 #endif
733 #ifndef fp_exp
734 # define fp_exp exp
735 #endif
736 #ifndef fp_pow
737 # define fp_pow pow
738 #endif
739 #ifndef fp_fabs
740 # define fp_fabs fabs
741 #endif
742 #ifndef fp_sqrt
743 # define fp_sqrt sqrt
744 #endif
745 #ifndef fp_sin
746 # define fp_sin sin
747 #endif
748 #ifndef fp_cos
749 # define fp_cos cos
750 #endif
751 #ifndef fp_tan
752 # define fp_tan tan
753 #endif
754 #ifndef fp_sinh
755 # define fp_sinh sinh
756 #endif
757 #ifndef fp_cosh
758 # define fp_cosh cosh
759 #endif
760 #ifndef fp_tanh
761 # define fp_tanh tanh
762 #endif
763 #ifndef fp_asin
764 # define fp_asin asin
765 #endif
766 #ifndef fp_acos
767 # define fp_acos acos
768 #endif
769 #ifndef fp_atan
770 # define fp_atan atan
771 #endif
772 #ifndef fp_asinh
773 # define fp_asinh asinh
774 #endif
775 #ifndef fp_acosh
776 # define fp_acosh acosh
777 #endif
778 #ifndef fp_atanh
779 # define fp_atanh atanh
780 #endif
781 #ifndef fp_floor
782 # define fp_floor floor
783 #endif
784 #ifndef fp_ceil
785 # define fp_ceil ceil
786 #endif
787
788 #if defined(FPU_IEEE) && defined(USE_X87_ASSEMBLY)
789 // Assembly optimized support functions. Taken from glibc 2.2.2
790
791 #undef fp_log
792 #define fp_log fp_do_log
793
794 #ifndef FPU_FAST_MATH
795 // FIXME: unimplemented
796 PRIVATE fpu_extended fp_do_log(fpu_extended x);
797 #else
798 PRIVATE inline fpu_extended fp_do_log(fpu_extended x)
799 {
800 fpu_extended value;
801 __asm__ __volatile__("fldln2; fxch; fyl2x" : "=t" (value) : "0" (x) : "st(1)");
802 return value;
803 }
804 #endif
805
806 #undef fp_log10
807 #define fp_log10 fp_do_log10
808
809 #ifndef FPU_FAST_MATH
810 // FIXME: unimplemented
811 PRIVATE fpu_extended fp_do_log10(fpu_extended x);
812 #else
813 PRIVATE inline fpu_extended fp_do_log10(fpu_extended x)
814 {
815 fpu_extended value;
816 __asm__ __volatile__("fldlg2; fxch; fyl2x" : "=t" (value) : "0" (x) : "st(1)");
817 return value;
818 }
819 #endif
820
821 #undef fp_exp
822 #define fp_exp fp_do_exp
823
824 #ifndef FPU_FAST_MATH
825 // FIXME: unimplemented
826 PRIVATE fpu_extended fp_do_exp(fpu_extended x);
827 #else
828 PRIVATE inline fpu_extended fp_do_exp(fpu_extended x)
829 {
830 fpu_extended value, exponent;
831 __asm__ __volatile__("fldl2e # e^x = 2^(x * log2(e))\n\t"
832 "fmul %%st(1) # x * log2(e)\n\t"
833 "fst %%st(1)\n\t"
834 "frndint # int(x * log2(e))\n\t"
835 "fxch\n\t"
836 "fsub %%st(1) # fract(x * log2(e))\n\t"
837 "f2xm1 # 2^(fract(x * log2(e))) - 1\n\t"
838 : "=t" (value), "=u" (exponent) : "0" (x));
839 value += 1.0;
840 __asm__ __volatile__("fscale" : "=t" (value) : "0" (value), "u" (exponent));
841 return value;
842 }
843 #endif
844
845 #undef fp_pow
846 #define fp_pow fp_do_pow
847
848 PRIVATE fpu_extended fp_do_pow(fpu_extended x, fpu_extended y);
849
850 #undef fp_fabs
851 #define fp_fabs fp_do_fabs
852
853 PRIVATE inline fpu_extended fp_do_fabs(fpu_extended x)
854 {
855 fpu_extended value;
856 __asm__ __volatile__("fabs" : "=t" (value) : "0" (x));
857 return value;
858 }
859
860 #undef fp_sqrt
861 #define fp_sqrt fp_do_sqrt
862
863 PRIVATE inline fpu_extended fp_do_sqrt(fpu_extended x)
864 {
865 fpu_extended value;
866 __asm__ __volatile__("fsqrt" : "=t" (value) : "0" (x));
867 return value;
868 }
869
870 #undef fp_sin
871 #define fp_sin fp_do_sin
872
873 PRIVATE inline fpu_extended fp_do_sin(fpu_extended x)
874 {
875 fpu_extended value;
876 __asm__ __volatile__("fsin" : "=t" (value) : "0" (x));
877 return value;
878 }
879
880 #undef fp_cos
881 #define fp_cos fp_do_cos
882
883 PRIVATE inline fpu_extended fp_do_cos(fpu_extended x)
884 {
885 fpu_extended value;
886 __asm__ __volatile__("fcos" : "=t" (value) : "0" (x));
887 return value;
888 }
889
890 #undef fp_tan
891 #define fp_tan fp_do_tan
892
893 PRIVATE inline fpu_extended fp_do_tan(fpu_extended x)
894 {
895 fpu_extended value;
896 __asm__ __volatile__("fptan" : "=t" (value) : "0" (x));
897 return value;
898 }
899
900 #undef fp_expm1
901 #define fp_expm1 fp_do_expm1
902
903 // Returns: exp(X) - 1.0
904 PRIVATE inline fpu_extended fp_do_expm1(fpu_extended x)
905 {
906 fpu_extended value, exponent, temp;
907 __asm__ __volatile__("fldl2e # e^x - 1 = 2^(x * log2(e)) - 1\n\t"
908 "fmul %%st(1) # x * log2(e)\n\t"
909 "fst %%st(1)\n\t"
910 "frndint # int(x * log2(e))\n\t"
911 "fxch\n\t"
912 "fsub %%st(1) # fract(x * log2(e))\n\t"
913 "f2xm1 # 2^(fract(x * log2(e))) - 1\n\t"
914 "fscale # 2^(x * log2(e)) - 2^(int(x * log2(e)))\n\t"
915 : "=t" (value), "=u" (exponent) : "0" (x));
916 __asm__ __volatile__("fscale" : "=t" (temp) : "0" (1.0), "u" (exponent));
917 temp -= 1.0;
918 return temp + value ? temp + value : x;
919 }
920
921 #undef fp_sgn1
922 #define fp_sgn1 fp_do_sgn1
923
924 PRIVATE inline fpu_extended fp_do_sgn1(fpu_extended x)
925 {
926 #if USE_LONG_DOUBLE || USE_QUAD_DOUBLE
927 fp_declare_init_shape(sxp, x, extended);
928 sxp->ieee_nan.exponent = FP_EXTENDED_EXP_MAX;
929 sxp->ieee_nan.one = 1;
930 #else
931 fp_declare_init_shape(sxp, x, double);
932 sxp->ieee_nan.exponent = FP_DOUBLE_EXP_MAX;
933 #endif
934 sxp->ieee_nan.quiet_nan = 0;
935 sxp->ieee_nan.mantissa0 = 0;
936 sxp->ieee_nan.mantissa1 = 0;
937 return x;
938 }
939
940 #undef fp_sinh
941 #define fp_sinh fp_do_sinh
942
943 #ifndef FPU_FAST_MATH
944 // FIXME: unimplemented
945 PRIVATE fpu_extended fp_do_sinh(fpu_extended x);
946 #else
947 PRIVATE inline fpu_extended fp_do_sinh(fpu_extended x)
948 {
949 fpu_extended exm1 = fp_expm1(fp_fabs(x));
950 return 0.5 * (exm1 / (exm1 + 1.0) + exm1) * fp_sgn1(x);
951 }
952 #endif
953
954 #undef fp_cosh
955 #define fp_cosh fp_do_cosh
956
957 #ifndef FPU_FAST_MATH
958 // FIXME: unimplemented
959 PRIVATE fpu_extended fp_do_cosh(fpu_extended x);
960 #else
961 PRIVATE inline fpu_extended fp_do_cosh(fpu_extended x)
962 {
963 fpu_extended ex = fp_exp(x);
964 return 0.5 * (ex + 1.0 / ex);
965 }
966 #endif
967
968 #undef fp_tanh
969 #define fp_tanh fp_do_tanh
970
971 #ifndef FPU_FAST_MATH
972 // FIXME: unimplemented
973 PRIVATE fpu_extended fp_do_tanh(fpu_extended x);
974 #else
975 PRIVATE inline fpu_extended fp_do_tanh(fpu_extended x)
976 {
977 fpu_extended exm1 = fp_expm1(-fp_fabs(x + x));
978 return exm1 / (exm1 + 2.0) * fp_sgn1(-x);
979 }
980 #endif
981
982 #undef fp_atan2
983 #define fp_atan2 fp_do_atan2
984
985 PRIVATE inline fpu_extended fp_do_atan2(fpu_extended y, fpu_extended x)
986 {
987 fpu_extended value;
988 __asm__ __volatile__("fpatan" : "=t" (value) : "0" (x), "u" (y) : "st(1)");
989 return value;
990 }
991
992 #undef fp_asin
993 #define fp_asin fp_do_asin
994
995 PRIVATE inline fpu_extended fp_do_asin(fpu_extended x)
996 {
997 return fp_atan2(x, fp_sqrt(1.0 - x * x));
998 }
999
1000 #undef fp_acos
1001 #define fp_acos fp_do_acos
1002
1003 PRIVATE inline fpu_extended fp_do_acos(fpu_extended x)
1004 {
1005 return fp_atan2(fp_sqrt(1.0 - x * x), x);
1006 }
1007
1008 #undef fp_atan
1009 #define fp_atan fp_do_atan
1010
1011 PRIVATE inline fpu_extended fp_do_atan(fpu_extended x)
1012 {
1013 fpu_extended value;
1014 __asm__ __volatile__("fld1; fpatan" : "=t" (value) : "0" (x) : "st(1)");
1015 return value;
1016 }
1017
1018 #undef fp_log1p
1019 #define fp_log1p fp_do_log1p
1020
1021 // Returns: ln(1.0 + X)
1022 PRIVATE fpu_extended fp_do_log1p(fpu_extended x);
1023
1024 #undef fp_asinh
1025 #define fp_asinh fp_do_asinh
1026
1027 PRIVATE inline fpu_extended fp_do_asinh(fpu_extended x)
1028 {
1029 fpu_extended y = fp_fabs(x);
1030 return (fp_log1p(y * y / (fp_sqrt(y * y + 1.0) + 1.0) + y) * fp_sgn1(x));
1031 }
1032
1033 #undef fp_acosh
1034 #define fp_acosh fp_do_acosh
1035
1036 PRIVATE inline fpu_extended fp_do_acosh(fpu_extended x)
1037 {
1038 return fp_log(x + fp_sqrt(x - 1.0) * fp_sqrt(x + 1.0));
1039 }
1040
1041 #undef fp_atanh
1042 #define fp_atanh fp_do_atanh
1043
1044 PRIVATE inline fpu_extended fp_do_atanh(fpu_extended x)
1045 {
1046 fpu_extended y = fp_fabs(x);
1047 return -0.5 * fp_log1p(-(y + y) / (1.0 + y)) * fp_sgn1(x);
1048 }
1049
1050 #undef fp_floor
1051 #define fp_floor fp_do_floor
1052
1053 PRIVATE inline fpu_extended fp_do_floor(fpu_extended x)
1054 {
1055 volatile unsigned int cw;
1056 __asm__ __volatile__("fnstcw %0" : "=m" (cw));
1057 volatile unsigned int cw_temp = (cw & 0xf3ff) | 0x0400; // rounding down
1058 __asm__ __volatile__("fldcw %0" : : "m" (cw_temp));
1059 fpu_extended value;
1060 __asm__ __volatile__("frndint" : "=t" (value) : "0" (x));
1061 __asm__ __volatile__("fldcw %0" : : "m" (cw));
1062 return value;
1063 }
1064
1065 #undef fp_ceil
1066 #define fp_ceil fp_do_ceil
1067
1068 PRIVATE inline fpu_extended fp_do_ceil(fpu_extended x)
1069 {
1070 volatile unsigned int cw;
1071 __asm__ __volatile__("fnstcw %0" : "=m" (cw));
1072 volatile unsigned int cw_temp = (cw & 0xf3ff) | 0x0800; // rounding up
1073 __asm__ __volatile__("fldcw %0" : : "m" (cw_temp));
1074 fpu_extended value;
1075 __asm__ __volatile__("frndint" : "=t" (value) : "0" (x));
1076 __asm__ __volatile__("fldcw %0" : : "m" (cw));
1077 return value;
1078 }
1079
1080 #define DEFINE_ROUND_FUNC(rounding_mode_str, rounding_mode) \
1081 PRIVATE inline fpu_extended fp_do_round_to_ ## rounding_mode_str(fpu_extended x) \
1082 { \
1083 volatile unsigned int cw; \
1084 __asm__ __volatile__("fnstcw %0" : "=m" (cw)); \
1085 volatile unsigned int cw_temp = (cw & 0xf3ff) | (rounding_mode); \
1086 __asm__ __volatile__("fldcw %0" : : "m" (cw_temp)); \
1087 fpu_extended value; \
1088 __asm__ __volatile__("frndint" : "=t" (value) : "0" (x)); \
1089 __asm__ __volatile__("fldcw %0" : : "m" (cw)); \
1090 return value; \
1091 }
1092
1093 #undef fp_round_to_minus_infinity
1094 #define fp_round_to_minus_infinity fp_do_round_to_minus_infinity
1095
1096 DEFINE_ROUND_FUNC(minus_infinity, 0x400)
1097
1098 #undef fp_round_to_plus_infinity
1099 #define fp_round_to_plus_infinity fp_do_round_to_plus_infinity
1100
1101 DEFINE_ROUND_FUNC(plus_infinity, 0x800)
1102
1103 #undef fp_round_to_zero
1104 #define fp_round_to_zero fp_do_round_to_zero
1105
1106 DEFINE_ROUND_FUNC(zero, 0xc00)
1107
1108 #undef fp_round_to_nearest
1109 #define fp_round_to_nearest fp_do_round_to_nearest
1110
1111 DEFINE_ROUND_FUNC(nearest, 0x000)
1112
1113 #endif /* USE_X87_ASSEMBLY */
1114
1115 #ifndef fp_round_to_minus_infinity
1116 #define fp_round_to_minus_infinity(x) fp_floor(x)
1117 #endif
1118
1119 #ifndef fp_round_to_plus_infinity
1120 #define fp_round_to_plus_infinity(x) fp_ceil(x)
1121 #endif
1122
1123 #ifndef fp_round_to_zero
1124 #define fp_round_to_zero(x) ((int)(x))
1125 #endif
1126
1127 #ifndef fp_round_to_nearest
1128 #define fp_round_to_nearest(x) ((int)((x) + 0.5))
1129 #endif
1130
1131 #endif /* FPU_MATHLIB_H */