ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/fpu/mathlib.h
Revision: 1.6
Committed: 2002-11-16T15:28:25Z (22 years ago) by gbeauche
Content type: text/plain
Branch: MAIN
CVS Tags: nigel-build-12, nigel-build-13
Changes since 1.5: +2 -2 lines
Log Message:
Use old x87 FPU stack on x86-64 too because we now use long doubles there for
better accuracy. Aka. prefer compatibility over speed.

File Contents

# Content
1 /*
2 * fpu/mathlib.h - Floating-point math support library
3 *
4 * Basilisk II (C) 1997-2001 Christian Bauer
5 *
6 * MC68881/68040 fpu emulation
7 *
8 * Original UAE FPU, copyright 1996 Herman ten Brugge
9 * Rewrite for x86, copyright 1999-2001 Lauri Pesonen
10 * New framework, copyright 2000-2001 Gwenole Beauchesne
11 * Adapted for JIT compilation (c) Bernd Meyer, 2000-2001
12 *
13 * This program is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation; either version 2 of the License, or
16 * (at your option) any later version.
17 *
18 * This program is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with this program; if not, write to the Free Software
25 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26 */
27
28 #ifndef FPU_MATHLIB_H
29 #define FPU_MATHLIB_H
30
31 /* NOTE: this file shall be included only from fpu/fpu_*.cpp */
32 #undef PUBLIC
33 #define PUBLIC extern
34
35 #undef PRIVATE
36 #define PRIVATE static
37
38 #undef FFPU
39 #define FFPU /**/
40
41 #undef FPU
42 #define FPU fpu.
43
44 // Define the following macro if branches are expensive. If so,
45 // integer-based isnan() and isinf() functions are implemented.
46 // TODO: move to Makefile.in
47 #define BRANCHES_ARE_EXPENSIVE 1
48
49 // Use ISO C99 extended-precision math functions (glibc 2.1+)
50 #define FPU_USE_ISO_C99 1
51
52 // NOTE: this is irrelevant on Win32 platforms since the MS libraries
53 // don't support extended-precision floating-point computations
54 #ifdef WIN32
55 #undef FPU_USE_ISO_C99
56 #endif
57
58 // Use faster implementation of math functions, but this could cause
59 // some incorrect results (?)
60 // TODO: actually implement the slower but safer versions
61 #define FPU_FAST_MATH 1
62
63 #if FPU_USE_ISO_C99
64 // NOTE: no prior <math.h> shall be included at this point
65 #define __USE_ISOC99 1 // for glibc 2.2.X and newer
66 #define __USE_ISOC9X 1 // for glibc 2.1.X
67 #include <math.h>
68 #else
69 #include <cmath>
70 using namespace std;
71 #endif
72
73 /* -------------------------------------------------------------------------- */
74 /* --- Floating-point register types --- */
75 /* -------------------------------------------------------------------------- */
76
77 // Single : S 8*E 23*F
78 #define FP_SINGLE_EXP_MAX 0xff
79 #define FP_SINGLE_EXP_BIAS 0x7f
80
81 // Double : S 11*E 52*F
82 #define FP_DOUBLE_EXP_MAX 0x7ff
83 #define FP_DOUBLE_EXP_BIAS 0x3ff
84
85 // Extended : S 15*E 64*F
86 #define FP_EXTENDED_EXP_MAX 0x7fff
87 #define FP_EXTENDED_EXP_BIAS 0x3fff
88
89 // Zeroes : E = 0 & F = 0
90 // Infinities : E = MAX & F = 0
91 // Not-A-Number : E = MAX & F # 0
92
93 /* -------------------------------------------------------------------------- */
94 /* --- Floating-point type shapes (IEEE-compliant) --- */
95 /* -------------------------------------------------------------------------- */
96
97 // Taken from glibc 2.2.x: ieee754.h
98
99 // IEEE-754 float format
100 union fpu_single_shape {
101
102 fpu_single value;
103
104 /* This is the IEEE 754 single-precision format. */
105 struct {
106 #ifdef WORDS_BIGENDIAN
107 unsigned int negative:1;
108 unsigned int exponent:8;
109 unsigned int mantissa:23;
110 #else
111 unsigned int mantissa:23;
112 unsigned int exponent:8;
113 unsigned int negative:1;
114 #endif
115 } ieee;
116
117 /* This format makes it easier to see if a NaN is a signalling NaN. */
118 struct {
119 #ifdef WORDS_BIGENDIAN
120 unsigned int negative:1;
121 unsigned int exponent:8;
122 unsigned int quiet_nan:1;
123 unsigned int mantissa:22;
124 #else
125 unsigned int mantissa:22;
126 unsigned int quiet_nan:1;
127 unsigned int exponent:8;
128 unsigned int negative:1;
129 #endif
130 } ieee_nan;
131 };
132
133 // IEEE-754 double format
134 union fpu_double_shape {
135 fpu_double value;
136
137 /* This is the IEEE 754 double-precision format. */
138 struct {
139 #ifdef WORDS_BIGENDIAN
140 unsigned int negative:1;
141 unsigned int exponent:11;
142 /* Together these comprise the mantissa. */
143 unsigned int mantissa0:20;
144 unsigned int mantissa1:32;
145 #else
146 # if HOST_FLOAT_WORDS_BIG_ENDIAN
147 unsigned int mantissa0:20;
148 unsigned int exponent:11;
149 unsigned int negative:1;
150 unsigned int mantissa1:32;
151 # else
152 /* Together these comprise the mantissa. */
153 unsigned int mantissa1:32;
154 unsigned int mantissa0:20;
155 unsigned int exponent:11;
156 unsigned int negative:1;
157 # endif
158 #endif
159 } ieee;
160
161 /* This format makes it easier to see if a NaN is a signalling NaN. */
162 struct {
163 #ifdef WORDS_BIGENDIAN
164 unsigned int negative:1;
165 unsigned int exponent:11;
166 unsigned int quiet_nan:1;
167 /* Together these comprise the mantissa. */
168 unsigned int mantissa0:19;
169 unsigned int mantissa1:32;
170 #else
171 # if HOST_FLOAT_WORDS_BIG_ENDIAN
172 unsigned int mantissa0:19;
173 unsigned int quiet_nan:1;
174 unsigned int exponent:11;
175 unsigned int negative:1;
176 unsigned int mantissa1:32;
177 # else
178 /* Together these comprise the mantissa. */
179 unsigned int mantissa1:32;
180 unsigned int mantissa0:19;
181 unsigned int quiet_nan:1;
182 unsigned int exponent:11;
183 unsigned int negative:1;
184 # endif
185 #endif
186 } ieee_nan;
187
188 /* This format is used to extract the sign_exponent and mantissa parts only */
189 struct {
190 #if HOST_FLOAT_WORDS_BIG_ENDIAN
191 unsigned int msw:32;
192 unsigned int lsw:32;
193 #else
194 unsigned int lsw:32;
195 unsigned int msw:32;
196 #endif
197 } parts;
198 };
199
200 #ifdef USE_LONG_DOUBLE
201 // IEEE-854 long double format
202 union fpu_extended_shape {
203 fpu_extended value;
204
205 /* This is the IEEE 854 double-extended-precision format. */
206 struct {
207 #ifdef WORDS_BIGENDIAN
208 unsigned int negative:1;
209 unsigned int exponent:15;
210 unsigned int empty:16;
211 unsigned int mantissa0:32;
212 unsigned int mantissa1:32;
213 #else
214 # if HOST_FLOAT_WORDS_BIG_ENDIAN
215 unsigned int exponent:15;
216 unsigned int negative:1;
217 unsigned int empty:16;
218 unsigned int mantissa0:32;
219 unsigned int mantissa1:32;
220 # else
221 unsigned int mantissa1:32;
222 unsigned int mantissa0:32;
223 unsigned int exponent:15;
224 unsigned int negative:1;
225 unsigned int empty:16;
226 # endif
227 #endif
228 } ieee;
229
230 /* This is for NaNs in the IEEE 854 double-extended-precision format. */
231 struct {
232 #ifdef WORDS_BIGENDIAN
233 unsigned int negative:1;
234 unsigned int exponent:15;
235 unsigned int empty:16;
236 unsigned int one:1;
237 unsigned int quiet_nan:1;
238 unsigned int mantissa0:30;
239 unsigned int mantissa1:32;
240 #else
241 # if HOST_FLOAT_WORDS_BIG_ENDIAN
242 unsigned int exponent:15;
243 unsigned int negative:1;
244 unsigned int empty:16;
245 unsigned int mantissa0:30;
246 unsigned int quiet_nan:1;
247 unsigned int one:1;
248 unsigned int mantissa1:32;
249 # else
250 unsigned int mantissa1:32;
251 unsigned int mantissa0:30;
252 unsigned int quiet_nan:1;
253 unsigned int one:1;
254 unsigned int exponent:15;
255 unsigned int negative:1;
256 unsigned int empty:16;
257 # endif
258 #endif
259 } ieee_nan;
260
261 /* This format is used to extract the sign_exponent and mantissa parts only */
262 struct {
263 #if HOST_FLOAT_WORDS_BIG_ENDIAN
264 unsigned int sign_exponent:16;
265 unsigned int empty:16;
266 unsigned int msw:32;
267 unsigned int lsw:32;
268 #else
269 unsigned int lsw:32;
270 unsigned int msw:32;
271 unsigned int sign_exponent:16;
272 unsigned int empty:16;
273 #endif
274 } parts;
275 };
276 #endif
277
278 #ifdef USE_QUAD_DOUBLE
279 // IEEE-854 quad double format
280 union fpu_extended_shape {
281 fpu_extended value;
282
283 /* This is the IEEE 854 quad-precision format. */
284 struct {
285 #ifdef WORDS_BIGENDIAN
286 unsigned int negative:1;
287 unsigned int exponent:15;
288 unsigned int mantissa0:16;
289 unsigned int mantissa1:32;
290 unsigned int mantissa2:32;
291 unsigned int mantissa3:32;
292 #else
293 unsigned int mantissa3:32;
294 unsigned int mantissa2:32;
295 unsigned int mantissa1:32;
296 unsigned int mantissa0:16;
297 unsigned int exponent:15;
298 unsigned int negative:1;
299 #endif
300 } ieee;
301
302 /* This is for NaNs in the IEEE 854 quad-precision format. */
303 struct {
304 #ifdef WORDS_BIGENDIAN
305 unsigned int negative:1;
306 unsigned int exponent:15;
307 unsigned int quiet_nan:1;
308 unsigned int mantissa0:15;
309 unsigned int mantissa1:30;
310 unsigned int mantissa2:32;
311 unsigned int mantissa3:32;
312 #else
313 unsigned int mantissa3:32;
314 unsigned int mantissa2:32;
315 unsigned int mantissa1:32;
316 unsigned int mantissa0:15;
317 unsigned int quiet_nan:1;
318 unsigned int exponent:15;
319 unsigned int negative:1;
320 #endif
321 } ieee_nan;
322
323 /* This format is used to extract the sign_exponent and mantissa parts only */
324 #if HOST_FLOAT_WORDS_BIG_ENDIAN
325 struct {
326 uae_u64 msw;
327 uae_u64 lsw;
328 } parts64;
329 struct {
330 uae_u32 w0;
331 uae_u32 w1;
332 uae_u32 w2;
333 uae_u32 w3;
334 } parts32;
335 #else
336 struct {
337 uae_u64 lsw;
338 uae_u64 msw;
339 } parts64;
340 struct {
341 uae_u32 w3;
342 uae_u32 w2;
343 uae_u32 w1;
344 uae_u32 w0;
345 } parts32;
346 #endif
347 };
348 #endif
349
350 // Declare and initialize a pointer to a shape of the requested FP type
351 #define fp_declare_init_shape(psvar, rfvar, ftype) \
352 fpu_ ## ftype ## _shape * psvar = (fpu_ ## ftype ## _shape *)( &rfvar )
353
354 /* -------------------------------------------------------------------------- */
355 /* --- Extra Math Functions --- */
356 /* --- (most of them had to be defined before including <fpu/flags.h>) --- */
357 /* -------------------------------------------------------------------------- */
358
359 #undef isnan
360 #if 0 && defined(HAVE_ISNANL)
361 # define isnan(x) isnanl((x))
362 #else
363 # define isnan(x) fp_do_isnan((x))
364 #endif
365
366 PRIVATE inline bool FFPU fp_do_isnan(fpu_register const & r)
367 {
368 #ifdef BRANCHES_ARE_EXPENSIVE
369 #ifndef USE_LONG_DOUBLE
370 fp_declare_init_shape(sxp, r, double);
371 uae_s32 hx = sxp->parts.msw;
372 uae_s32 lx = sxp->parts.lsw;
373 hx &= 0x7fffffff;
374 hx |= (uae_u32)(lx | (-lx)) >> 31;
375 hx = 0x7ff00000 - hx;
376 return (int)(((uae_u32)hx) >> 31);
377 #elif USE_QUAD_DOUBLE
378 fp_declare_init_shape(sxp, r, extended);
379 uae_s64 hx = sxp->parts64.msw;
380 uae_s64 lx = sxp->parts64.lsw;
381 hx &= 0x7fffffffffffffffLL;
382 hx |= (uae_u64)(lx | (-lx)) >> 63;
383 hx = 0x7fff000000000000LL - hx;
384 return (int)((uae_u64)hx >> 63);
385 #else
386 fp_declare_init_shape(sxp, r, extended);
387 uae_s32 se = sxp->parts.sign_exponent;
388 uae_s32 hx = sxp->parts.msw;
389 uae_s32 lx = sxp->parts.lsw;
390 se = (se & 0x7fff) << 1;
391 lx |= hx & 0x7fffffff;
392 se |= (uae_u32)(lx | (-lx)) >> 31;
393 se = 0xfffe - se;
394 // TODO: check whether rshift count is 16 or 31
395 return (int)(((uae_u32)(se)) >> 16);
396 #endif
397 #else
398 #if USE_LONG_DOUBLE || USE_QUAD_DOUBLE
399 fp_declare_init_shape(sxp, r, extended);
400 return (sxp->ieee_nan.exponent == FP_EXTENDED_EXP_MAX)
401 #else
402 fp_declare_init_shape(sxp, r, double);
403 return (sxp->ieee_nan.exponent == FP_DOUBLE_EXP_MAX)
404 #endif
405 && (sxp->ieee_nan.mantissa0 != 0)
406 && (sxp->ieee_nan.mantissa1 != 0)
407 #ifdef USE_QUAD_DOUBLE
408 && (sxp->ieee_nan.mantissa2 != 0)
409 && (sxp->ieee_nan.mantissa3 != 0)
410 #endif
411 ;
412 #endif
413 }
414
415 #undef isinf
416 #if 0 && defined(HAVE_ISINFL)
417 # define isinf(x) isinfl((x))
418 #else
419 # define isinf(x) fp_do_isinf((x))
420 #endif
421
422 PRIVATE inline bool FFPU fp_do_isinf(fpu_register const & r)
423 {
424 #ifdef BRANCHES_ARE_EXPENSIVE
425 #ifndef USE_LONG_DOUBLE
426 fp_declare_init_shape(sxp, r, double);
427 uae_s32 hx = sxp->parts.msw;
428 uae_s32 lx = sxp->parts.lsw;
429 lx |= (hx & 0x7fffffff) ^ 0x7ff00000;
430 lx |= -lx;
431 return ~(lx >> 31) & (hx >> 30);
432 #elif USE_QUAD_DOUBLE
433 fp_declare_init_shape(sxp, r, extended);
434 uae_s64 hx = sxp->parts64.msw;
435 uae_s64 lx = sxp->parts64.lsw;
436 lx |= (hx & 0x7fffffffffffffffLL) ^ 0x7fff000000000000LL;
437 lx |= -lx;
438 return ~(lx >> 63) & (hx >> 62);
439 #else
440 fp_declare_init_shape(sxp, r, extended);
441 uae_s32 se = sxp->parts.sign_exponent;
442 uae_s32 hx = sxp->parts.msw;
443 uae_s32 lx = sxp->parts.lsw;
444 /* This additional ^ 0x80000000 is necessary because in Intel's
445 internal representation of the implicit one is explicit.
446 NOTE: anyway, this is equivalent to & 0x7fffffff in that case. */
447 #ifdef __i386__
448 lx |= (hx ^ 0x80000000) | ((se & 0x7fff) ^ 0x7fff);
449 #else
450 lx |= (hx & 0x7fffffff) | ((se & 0x7fff) ^ 0x7fff);
451 #endif
452 lx |= -lx;
453 se &= 0x8000;
454 return ~(lx >> 31) & (1 - (se >> 14));
455 #endif
456 #else
457 #if USE_LONG_DOUBLE || USE_QUAD_DOUBLE
458 fp_declare_init_shape(sxp, r, extended);
459 return (sxp->ieee_nan.exponent == FP_EXTENDED_EXP_MAX)
460 #else
461 fp_declare_init_shape(sxp, r, double);
462 return (sxp->ieee_nan.exponent == FP_DOUBLE_EXP_MAX)
463 #endif
464 && (sxp->ieee_nan.mantissa0 == 0)
465 && (sxp->ieee_nan.mantissa1 == 0)
466 #ifdef USE_QUAD_DOUBLE
467 && (sxp->ieee_nan.mantissa2 == 0)
468 && (sxp->ieee_nan.mantissa3 == 0)
469 #endif
470 ;
471 #endif
472 }
473
474 #undef isneg
475 #define isneg(x) fp_do_isneg((x))
476
477 PRIVATE inline bool FFPU fp_do_isneg(fpu_register const & r)
478 {
479 #if USE_LONG_DOUBLE || USE_QUAD_DOUBLE
480 fp_declare_init_shape(sxp, r, extended);
481 #else
482 fp_declare_init_shape(sxp, r, double);
483 #endif
484 return sxp->ieee.negative;
485 }
486
487 #undef iszero
488 #define iszero(x) fp_do_iszero((x))
489
490 PRIVATE inline bool FFPU fp_do_iszero(fpu_register const & r)
491 {
492 // TODO: BRANCHES_ARE_EXPENSIVE
493 #if USE_LONG_DOUBLE || USE_QUAD_DOUBLE
494 fp_declare_init_shape(sxp, r, extended);
495 #else
496 fp_declare_init_shape(sxp, r, double);
497 #endif
498 return (sxp->ieee.exponent == 0)
499 && (sxp->ieee.mantissa0 == 0)
500 && (sxp->ieee.mantissa1 == 0)
501 #ifdef USE_QUAD_DOUBLE
502 && (sxp->ieee.mantissa2 == 0)
503 && (sxp->ieee.mantissa3 == 0)
504 #endif
505 ;
506 }
507
508 PRIVATE inline void FFPU get_dest_flags(fpu_register const & r)
509 {
510 fl_dest.negative = isneg(r);
511 fl_dest.zero = iszero(r);
512 fl_dest.infinity = isinf(r);
513 fl_dest.nan = isnan(r);
514 fl_dest.in_range = !fl_dest.zero && !fl_dest.infinity && !fl_dest.nan;
515 }
516
517 PRIVATE inline void FFPU get_source_flags(fpu_register const & r)
518 {
519 fl_source.negative = isneg(r);
520 fl_source.zero = iszero(r);
521 fl_source.infinity = isinf(r);
522 fl_source.nan = isnan(r);
523 fl_source.in_range = !fl_source.zero && !fl_source.infinity && !fl_source.nan;
524 }
525
526 PRIVATE inline void FFPU make_nan(fpu_register & r)
527 {
528 // FIXME: is that correct ?
529 #if USE_LONG_DOUBLE || USE_QUAD_DOUBLE
530 fp_declare_init_shape(sxp, r, extended);
531 sxp->ieee.exponent = FP_EXTENDED_EXP_MAX;
532 sxp->ieee.mantissa0 = 0xffffffff;
533 #else
534 fp_declare_init_shape(sxp, r, double);
535 sxp->ieee.exponent = FP_DOUBLE_EXP_MAX;
536 sxp->ieee.mantissa0 = 0xfffff;
537 #endif
538 sxp->ieee.mantissa1 = 0xffffffff;
539 #ifdef USE_QUAD_DOUBLE
540 sxp->ieee.mantissa2 = 0xffffffff;
541 sxp->ieee.mantissa3 = 0xffffffff;
542 #endif
543 }
544
545 PRIVATE inline void FFPU make_zero_positive(fpu_register & r)
546 {
547 #if 1
548 r = +0.0;
549 #else
550 #if USE_LONG_DOUBLE || USE_QUAD_DOUBLE
551 fp_declare_init_shape(sxp, r, extended);
552 #else
553 fp_declare_init_shape(sxp, r, double);
554 #endif
555 sxp->ieee.negative = 0;
556 sxp->ieee.exponent = 0;
557 sxp->ieee.mantissa0 = 0;
558 sxp->ieee.mantissa1 = 0;
559 #ifdef USE_QUAD_DOUBLE
560 sxp->ieee.mantissa2 = 0;
561 sxp->ieee.mantissa3 = 0;
562 #endif
563 #endif
564 }
565
566 PRIVATE inline void FFPU make_zero_negative(fpu_register & r)
567 {
568 #if 1
569 r = -0.0;
570 #else
571 #if USE_LONG_DOUBLE || USE_QUAD_DOUBLE
572 fp_declare_init_shape(sxp, r, extended);
573 #else
574 fp_declare_init_shape(sxp, r, double);
575 #endif
576 sxp->ieee.negative = 1;
577 sxp->ieee.exponent = 0;
578 sxp->ieee.mantissa0 = 0;
579 sxp->ieee.mantissa1 = 0;
580 #ifdef USE_QUAD_DOUBLE
581 sxp->ieee.mantissa2 = 0;
582 sxp->ieee.mantissa3 = 0;
583 #endif
584 #endif
585 }
586
587 PRIVATE inline void FFPU make_inf_positive(fpu_register & r)
588 {
589 #if USE_LONG_DOUBLE || USE_QUAD_DOUBLE
590 fp_declare_init_shape(sxp, r, extended);
591 sxp->ieee_nan.exponent = FP_EXTENDED_EXP_MAX;
592 #else
593 fp_declare_init_shape(sxp, r, double);
594 sxp->ieee_nan.exponent = FP_DOUBLE_EXP_MAX;
595 #endif
596 sxp->ieee_nan.negative = 0;
597 sxp->ieee_nan.mantissa0 = 0;
598 sxp->ieee_nan.mantissa1 = 0;
599 #ifdef USE_QUAD_DOUBLE
600 sxp->ieee_nan.mantissa2 = 0;
601 sxp->ieee_nan.mantissa3 = 0;
602 #endif
603 }
604
605 PRIVATE inline void FFPU make_inf_negative(fpu_register & r)
606 {
607 #if USE_LONG_DOUBLE || USE_QUAD_DOUBLE
608 fp_declare_init_shape(sxp, r, extended);
609 sxp->ieee_nan.exponent = FP_EXTENDED_EXP_MAX;
610 #else
611 fp_declare_init_shape(sxp, r, double);
612 sxp->ieee_nan.exponent = FP_DOUBLE_EXP_MAX;
613 #endif
614 sxp->ieee_nan.negative = 1;
615 sxp->ieee_nan.mantissa0 = 0;
616 sxp->ieee_nan.mantissa1 = 0;
617 #ifdef USE_QUAD_DOUBLE
618 sxp->ieee_nan.mantissa2 = 0;
619 sxp->ieee_nan.mantissa3 = 0;
620 #endif
621 }
622
623 PRIVATE inline fpu_register FFPU fast_fgetexp(fpu_register const & r)
624 {
625 #if USE_LONG_DOUBLE || USE_QUAD_DOUBLE
626 fp_declare_init_shape(sxp, r, extended);
627 return (sxp->ieee.exponent - FP_EXTENDED_EXP_BIAS);
628 #else
629 fp_declare_init_shape(sxp, r, double);
630 return (sxp->ieee.exponent - FP_DOUBLE_EXP_BIAS);
631 #endif
632 }
633
634 // Normalize to range 1..2
635 PRIVATE inline void FFPU fast_remove_exponent(fpu_register & r)
636 {
637 #if USE_LONG_DOUBLE || USE_QUAD_DOUBLE
638 fp_declare_init_shape(sxp, r, extended);
639 sxp->ieee.exponent = FP_EXTENDED_EXP_BIAS;
640 #else
641 fp_declare_init_shape(sxp, r, double);
642 sxp->ieee.exponent = FP_DOUBLE_EXP_BIAS;
643 #endif
644 }
645
646 // The sign of the quotient is the exclusive-OR of the sign bits
647 // of the source and destination operands.
648 PRIVATE inline uae_u32 FFPU get_quotient_sign(fpu_register const & ra, fpu_register const & rb)
649 {
650 #if USE_LONG_DOUBLE || USE_QUAD_DOUBLE
651 fp_declare_init_shape(sap, ra, extended);
652 fp_declare_init_shape(sbp, rb, extended);
653 #else
654 fp_declare_init_shape(sap, ra, double);
655 fp_declare_init_shape(sbp, rb, double);
656 #endif
657 return ((sap->ieee.negative ^ sbp->ieee.negative) ? FPSR_QUOTIENT_SIGN : 0);
658 }
659
660 /* -------------------------------------------------------------------------- */
661 /* --- Math functions --- */
662 /* -------------------------------------------------------------------------- */
663
664 #if FPU_USE_ISO_C99 && (USE_LONG_DOUBLE || USE_QUAD_DOUBLE)
665 # ifdef HAVE_LOGL
666 # define fp_log logl
667 # endif
668 # ifdef HAVE_LOG10L
669 # define fp_log10 log10l
670 # endif
671 # ifdef HAVE_EXPL
672 # define fp_exp expl
673 # endif
674 # ifdef HAVE_POWL
675 # define fp_pow powl
676 # endif
677 # ifdef HAVE_FABSL
678 # define fp_fabs fabsl
679 # endif
680 # ifdef HAVE_SQRTL
681 # define fp_sqrt sqrtl
682 # endif
683 # ifdef HAVE_SINL
684 # define fp_sin sinl
685 # endif
686 # ifdef HAVE_COSL
687 # define fp_cos cosl
688 # endif
689 # ifdef HAVE_TANL
690 # define fp_tan tanl
691 # endif
692 # ifdef HAVE_SINHL
693 # define fp_sinh sinhl
694 # endif
695 # ifdef HAVE_COSHL
696 # define fp_cosh coshl
697 # endif
698 # ifdef HAVE_TANHL
699 # define fp_tanh tanhl
700 # endif
701 # ifdef HAVE_ASINL
702 # define fp_asin asinl
703 # endif
704 # ifdef HAVE_ACOSL
705 # define fp_acos acosl
706 # endif
707 # ifdef HAVE_ATANL
708 # define fp_atan atanl
709 # endif
710 # ifdef HAVE_ASINHL
711 # define fp_asinh asinhl
712 # endif
713 # ifdef HAVE_ACOSHL
714 # define fp_acosh acoshl
715 # endif
716 # ifdef HAVE_ATANHL
717 # define fp_atanh atanhl
718 # endif
719 # ifdef HAVE_FLOORL
720 # define fp_floor floorl
721 # endif
722 # ifdef HAVE_CEILL
723 # define fp_ceil ceill
724 # endif
725 #endif
726
727 #ifndef fp_log
728 # define fp_log log
729 #endif
730 #ifndef fp_log10
731 # define fp_log10 log10
732 #endif
733 #ifndef fp_exp
734 # define fp_exp exp
735 #endif
736 #ifndef fp_pow
737 # define fp_pow pow
738 #endif
739 #ifndef fp_fabs
740 # define fp_fabs fabs
741 #endif
742 #ifndef fp_sqrt
743 # define fp_sqrt sqrt
744 #endif
745 #ifndef fp_sin
746 # define fp_sin sin
747 #endif
748 #ifndef fp_cos
749 # define fp_cos cos
750 #endif
751 #ifndef fp_tan
752 # define fp_tan tan
753 #endif
754 #ifndef fp_sinh
755 # define fp_sinh sinh
756 #endif
757 #ifndef fp_cosh
758 # define fp_cosh cosh
759 #endif
760 #ifndef fp_tanh
761 # define fp_tanh tanh
762 #endif
763 #ifndef fp_asin
764 # define fp_asin asin
765 #endif
766 #ifndef fp_acos
767 # define fp_acos acos
768 #endif
769 #ifndef fp_atan
770 # define fp_atan atan
771 #endif
772 #ifndef fp_asinh
773 # define fp_asinh asinh
774 #endif
775 #ifndef fp_acosh
776 # define fp_acosh acosh
777 #endif
778 #ifndef fp_atanh
779 # define fp_atanh atanh
780 #endif
781 #ifndef fp_floor
782 # define fp_floor floor
783 #endif
784 #ifndef fp_ceil
785 # define fp_ceil ceil
786 #endif
787
788 #if defined(FPU_IEEE) && defined(USE_X87_ASSEMBLY)
789 // Assembly optimized support functions. Taken from glibc 2.2.2
790
791 #undef fp_log
792 #define fp_log fp_do_log
793
794 #ifndef FPU_FAST_MATH
795 // FIXME: unimplemented
796 PRIVATE fpu_extended fp_do_log(fpu_extended x);
797 #else
798 PRIVATE inline fpu_extended fp_do_log(fpu_extended x)
799 {
800 fpu_extended value;
801 __asm__ __volatile__("fldln2; fxch; fyl2x" : "=t" (value) : "0" (x) : "st(1)");
802 return value;
803 }
804 #endif
805
806 #undef fp_log10
807 #define fp_log10 fp_do_log10
808
809 #ifndef FPU_FAST_MATH
810 // FIXME: unimplemented
811 PRIVATE fpu_extended fp_do_log10(fpu_extended x);
812 #else
813 PRIVATE inline fpu_extended fp_do_log10(fpu_extended x)
814 {
815 fpu_extended value;
816 __asm__ __volatile__("fldlg2; fxch; fyl2x" : "=t" (value) : "0" (x) : "st(1)");
817 return value;
818 }
819 #endif
820
821 #undef fp_exp
822 #define fp_exp fp_do_exp
823
824 #ifndef FPU_FAST_MATH
825 // FIXME: unimplemented
826 PRIVATE fpu_extended fp_do_exp(fpu_extended x);
827 #else
828 PRIVATE inline fpu_extended fp_do_exp(fpu_extended x)
829 {
830 fpu_extended value, exponent;
831 __asm__ __volatile__("fldl2e # e^x = 2^(x * log2(e))\n\t"
832 "fmul %%st(1) # x * log2(e)\n\t"
833 "fst %%st(1)\n\t"
834 "frndint # int(x * log2(e))\n\t"
835 "fxch\n\t"
836 "fsub %%st(1) # fract(x * log2(e))\n\t"
837 "f2xm1 # 2^(fract(x * log2(e))) - 1\n\t"
838 : "=t" (value), "=u" (exponent) : "0" (x));
839 value += 1.0;
840 __asm__ __volatile__("fscale" : "=t" (value) : "0" (value), "u" (exponent));
841 return value;
842 }
843 #endif
844
845 #undef fp_pow
846 #define fp_pow fp_do_pow
847
848 PRIVATE fpu_extended fp_do_pow(fpu_extended x, fpu_extended y);
849
850 #undef fp_fabs
851 #define fp_fabs fp_do_fabs
852
853 PRIVATE inline fpu_extended fp_do_fabs(fpu_extended x)
854 {
855 fpu_extended value;
856 __asm__ __volatile__("fabs" : "=t" (value) : "0" (x));
857 return value;
858 }
859
860 #undef fp_sqrt
861 #define fp_sqrt fp_do_sqrt
862
863 PRIVATE inline fpu_extended fp_do_sqrt(fpu_extended x)
864 {
865 fpu_extended value;
866 __asm__ __volatile__("fsqrt" : "=t" (value) : "0" (x));
867 return value;
868 }
869
870 #undef fp_sin
871 #define fp_sin fp_do_sin
872
873 PRIVATE inline fpu_extended fp_do_sin(fpu_extended x)
874 {
875 fpu_extended value;
876 __asm__ __volatile__("fsin" : "=t" (value) : "0" (x));
877 return value;
878 }
879
880 #undef fp_cos
881 #define fp_cos fp_do_cos
882
883 PRIVATE inline fpu_extended fp_do_cos(fpu_extended x)
884 {
885 fpu_extended value;
886 __asm__ __volatile__("fcos" : "=t" (value) : "0" (x));
887 return value;
888 }
889
890 #undef fp_tan
891 #define fp_tan fp_do_tan
892
893 PRIVATE inline fpu_extended fp_do_tan(fpu_extended x)
894 {
895 fpu_extended value;
896 __asm__ __volatile__("fptan" : "=t" (value) : "0" (x));
897 return value;
898 }
899
900 #undef fp_expm1
901 #define fp_expm1 fp_do_expm1
902
903 // Returns: exp(X) - 1.0
904 PRIVATE inline fpu_extended fp_do_expm1(fpu_extended x)
905 {
906 fpu_extended value, exponent, temp;
907 __asm__ __volatile__("fldl2e # e^x - 1 = 2^(x * log2(e)) - 1\n\t"
908 "fmul %%st(1) # x * log2(e)\n\t"
909 "fst %%st(1)\n\t"
910 "frndint # int(x * log2(e))\n\t"
911 "fxch\n\t"
912 "fsub %%st(1) # fract(x * log2(e))\n\t"
913 "f2xm1 # 2^(fract(x * log2(e))) - 1\n\t"
914 "fscale # 2^(x * log2(e)) - 2^(int(x * log2(e)))\n\t"
915 : "=t" (value), "=u" (exponent) : "0" (x));
916 __asm__ __volatile__("fscale" : "=t" (temp) : "0" (1.0), "u" (exponent));
917 temp -= 1.0;
918 return temp + value ? temp + value : x;
919 }
920
921 #undef fp_sgn1
922 #define fp_sgn1 fp_do_sgn1
923
924 PRIVATE inline fpu_extended fp_do_sgn1(fpu_extended x)
925 {
926 fp_declare_init_shape(sxp, x, extended);
927 sxp->ieee_nan.exponent = FP_EXTENDED_EXP_MAX;
928 sxp->ieee_nan.one = 1;
929 sxp->ieee_nan.quiet_nan = 0;
930 sxp->ieee_nan.mantissa0 = 0;
931 sxp->ieee_nan.mantissa1 = 0;
932 return x;
933 }
934
935 #undef fp_sinh
936 #define fp_sinh fp_do_sinh
937
938 #ifndef FPU_FAST_MATH
939 // FIXME: unimplemented
940 PRIVATE fpu_extended fp_do_sinh(fpu_extended x);
941 #else
942 PRIVATE inline fpu_extended fp_do_sinh(fpu_extended x)
943 {
944 fpu_extended exm1 = fp_expm1(fp_fabs(x));
945 return 0.5 * (exm1 / (exm1 + 1.0) + exm1) * fp_sgn1(x);
946 }
947 #endif
948
949 #undef fp_cosh
950 #define fp_cosh fp_do_cosh
951
952 #ifndef FPU_FAST_MATH
953 // FIXME: unimplemented
954 PRIVATE fpu_extended fp_do_cosh(fpu_extended x);
955 #else
956 PRIVATE inline fpu_extended fp_do_cosh(fpu_extended x)
957 {
958 fpu_extended ex = fp_exp(x);
959 return 0.5 * (ex + 1.0 / ex);
960 }
961 #endif
962
963 #undef fp_tanh
964 #define fp_tanh fp_do_tanh
965
966 #ifndef FPU_FAST_MATH
967 // FIXME: unimplemented
968 PRIVATE fpu_extended fp_do_tanh(fpu_extended x);
969 #else
970 PRIVATE inline fpu_extended fp_do_tanh(fpu_extended x)
971 {
972 fpu_extended exm1 = fp_expm1(-fp_fabs(x + x));
973 return exm1 / (exm1 + 2.0) * fp_sgn1(-x);
974 }
975 #endif
976
977 #undef fp_atan2
978 #define fp_atan2 fp_do_atan2
979
980 PRIVATE inline fpu_extended fp_do_atan2(fpu_extended y, fpu_extended x)
981 {
982 fpu_extended value;
983 __asm__ __volatile__("fpatan" : "=t" (value) : "0" (x), "u" (y) : "st(1)");
984 return value;
985 }
986
987 #undef fp_asin
988 #define fp_asin fp_do_asin
989
990 PRIVATE inline fpu_extended fp_do_asin(fpu_extended x)
991 {
992 return fp_atan2(x, fp_sqrt(1.0 - x * x));
993 }
994
995 #undef fp_acos
996 #define fp_acos fp_do_acos
997
998 PRIVATE inline fpu_extended fp_do_acos(fpu_extended x)
999 {
1000 return fp_atan2(fp_sqrt(1.0 - x * x), x);
1001 }
1002
1003 #undef fp_atan
1004 #define fp_atan fp_do_atan
1005
1006 PRIVATE inline fpu_extended fp_do_atan(fpu_extended x)
1007 {
1008 fpu_extended value;
1009 __asm__ __volatile__("fld1; fpatan" : "=t" (value) : "0" (x) : "st(1)");
1010 return value;
1011 }
1012
1013 #undef fp_log1p
1014 #define fp_log1p fp_do_log1p
1015
1016 // Returns: ln(1.0 + X)
1017 PRIVATE fpu_extended fp_do_log1p(fpu_extended x);
1018
1019 #undef fp_asinh
1020 #define fp_asinh fp_do_asinh
1021
1022 PRIVATE inline fpu_extended fp_do_asinh(fpu_extended x)
1023 {
1024 fpu_extended y = fp_fabs(x);
1025 return (fp_log1p(y * y / (fp_sqrt(y * y + 1.0) + 1.0) + y) * fp_sgn1(x));
1026 }
1027
1028 #undef fp_acosh
1029 #define fp_acosh fp_do_acosh
1030
1031 PRIVATE inline fpu_extended fp_do_acosh(fpu_extended x)
1032 {
1033 return fp_log(x + fp_sqrt(x - 1.0) * fp_sqrt(x + 1.0));
1034 }
1035
1036 #undef fp_atanh
1037 #define fp_atanh fp_do_atanh
1038
1039 PRIVATE inline fpu_extended fp_do_atanh(fpu_extended x)
1040 {
1041 fpu_extended y = fp_fabs(x);
1042 return -0.5 * fp_log1p(-(y + y) / (1.0 + y)) * fp_sgn1(x);
1043 }
1044
1045 #undef fp_floor
1046 #define fp_floor fp_do_floor
1047
1048 PRIVATE inline fpu_extended fp_do_floor(fpu_extended x)
1049 {
1050 volatile unsigned int cw;
1051 __asm__ __volatile__("fnstcw %0" : "=m" (cw));
1052 volatile unsigned int cw_temp = (cw & 0xf3ff) | 0x0400; // rounding down
1053 __asm__ __volatile__("fldcw %0" : : "m" (cw_temp));
1054 fpu_extended value;
1055 __asm__ __volatile__("frndint" : "=t" (value) : "0" (x));
1056 __asm__ __volatile__("fldcw %0" : : "m" (cw));
1057 return value;
1058 }
1059
1060 #undef fp_ceil
1061 #define fp_ceil fp_do_ceil
1062
1063 PRIVATE inline fpu_extended fp_do_ceil(fpu_extended x)
1064 {
1065 volatile unsigned int cw;
1066 __asm__ __volatile__("fnstcw %0" : "=m" (cw));
1067 volatile unsigned int cw_temp = (cw & 0xf3ff) | 0x0800; // rounding up
1068 __asm__ __volatile__("fldcw %0" : : "m" (cw_temp));
1069 fpu_extended value;
1070 __asm__ __volatile__("frndint" : "=t" (value) : "0" (x));
1071 __asm__ __volatile__("fldcw %0" : : "m" (cw));
1072 return value;
1073 }
1074
1075 #define DEFINE_ROUND_FUNC(rounding_mode_str, rounding_mode) \
1076 PRIVATE inline fpu_extended fp_do_round_to_ ## rounding_mode_str(fpu_extended x) \
1077 { \
1078 volatile unsigned int cw; \
1079 __asm__ __volatile__("fnstcw %0" : "=m" (cw)); \
1080 volatile unsigned int cw_temp = (cw & 0xf3ff) | (rounding_mode); \
1081 __asm__ __volatile__("fldcw %0" : : "m" (cw_temp)); \
1082 fpu_extended value; \
1083 __asm__ __volatile__("frndint" : "=t" (value) : "0" (x)); \
1084 __asm__ __volatile__("fldcw %0" : : "m" (cw)); \
1085 return value; \
1086 }
1087
1088 #undef fp_round_to_minus_infinity
1089 #define fp_round_to_minus_infinity fp_do_round_to_minus_infinity
1090
1091 DEFINE_ROUND_FUNC(minus_infinity, 0x400)
1092
1093 #undef fp_round_to_plus_infinity
1094 #define fp_round_to_plus_infinity fp_do_round_to_plus_infinity
1095
1096 DEFINE_ROUND_FUNC(plus_infinity, 0x800)
1097
1098 #undef fp_round_to_zero
1099 #define fp_round_to_zero fp_do_round_to_zero
1100
1101 DEFINE_ROUND_FUNC(zero, 0xc00)
1102
1103 #undef fp_round_to_nearest
1104 #define fp_round_to_nearest fp_do_round_to_nearest
1105
1106 DEFINE_ROUND_FUNC(nearest, 0x000)
1107
1108 #endif /* USE_X87_ASSEMBLY */
1109
1110 #ifndef fp_round_to_minus_infinity
1111 #define fp_round_to_minus_infinity(x) fp_floor(x)
1112 #endif
1113
1114 #ifndef fp_round_to_plus_infinity
1115 #define fp_round_to_plus_infinity(x) fp_ceil(x)
1116 #endif
1117
1118 #ifndef fp_round_to_zero
1119 #define fp_round_to_zero(x) ((int)(x))
1120 #endif
1121
1122 #ifndef fp_round_to_nearest
1123 #define fp_round_to_nearest(x) ((int)((x) + 0.5))
1124 #endif
1125
1126 #endif /* FPU_MATHLIB_H */