ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/cebix/BasiliskII/src/uae_cpu/fpu/mathlib.h
Revision: 1.2
Committed: 2002-09-15T18:21:13Z (22 years ago) by gbeauche
Content type: text/plain
Branch: MAIN
Changes since 1.1: +99 -40 lines
Log Message:
Fix "ieee" FPU core on big endian and without long double > double support
- Handle conversions to/from host double for m68k long doubles formats
- Make mathlib aware of sizeof(long double) == sizeof(double) arches
- Attempt to fix FSCALE implementation

File Contents

# Content
1 /*
2 * fpu/mathlib.h - Floating-point math support library
3 *
4 * Basilisk II (C) 1997-2001 Christian Bauer
5 *
6 * MC68881/68040 fpu emulation
7 *
8 * Original UAE FPU, copyright 1996 Herman ten Brugge
9 * Rewrite for x86, copyright 1999-2001 Lauri Pesonen
10 * New framework, copyright 2000-2001 Gwenole Beauchesne
11 * Adapted for JIT compilation (c) Bernd Meyer, 2000-2001
12 *
13 * This program is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation; either version 2 of the License, or
16 * (at your option) any later version.
17 *
18 * This program is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with this program; if not, write to the Free Software
25 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26 */
27
28 #ifndef FPU_MATHLIB_H
29 #define FPU_MATHLIB_H
30
31 /* NOTE: this file shall be included only from fpu/fpu_*.cpp */
32 #undef PUBLIC
33 #define PUBLIC extern
34
35 #undef PRIVATE
36 #define PRIVATE static
37
38 #undef FFPU
39 #define FFPU /**/
40
41 #undef FPU
42 #define FPU fpu.
43
44 // Define the following macro if branches are expensive. If so,
45 // integer-based isnan() and isinf() functions are implemented.
46 // TODO: move to Makefile.in
47 #define BRANCHES_ARE_EXPENSIVE 1
48
49 // Use ISO C99 extended-precision math functions (glibc 2.1+)
50 #define FPU_USE_ISO_C99 1
51
52 // NOTE: this is irrelevant on Win32 platforms since the MS libraries
53 // don't support extended-precision floating-point computations
54 #ifdef WIN32
55 #undef FPU_USE_ISO_C99
56 #endif
57
58 // Use faster implementation of math functions, but this could cause
59 // some incorrect results (?)
60 // TODO: actually implement the slower but safer versions
61 #define FPU_FAST_MATH 1
62
63 #if FPU_USE_ISO_C99
64 // NOTE: no prior <math.h> shall be included at this point
65 #define __USE_ISOC99 1 // for glibc 2.2.X and newer
66 #define __USE_ISOC9X 1 // for glibc 2.1.X
67 #include <math.h>
68 #else
69 #include <cmath>
70 using namespace std;
71 #endif
72
73 /* -------------------------------------------------------------------------- */
74 /* --- Floating-point register types --- */
75 /* -------------------------------------------------------------------------- */
76
77 // Single : S 8*E 23*F
78 #define FP_SINGLE_EXP_MAX 0xff
79 #define FP_SINGLE_EXP_BIAS 0x7f
80
81 // Double : S 11*E 52*F
82 #define FP_DOUBLE_EXP_MAX 0x7ff
83 #define FP_DOUBLE_EXP_BIAS 0x3ff
84
85 // Extended : S 15*E 64*F
86 #define FP_EXTENDED_EXP_MAX 0x7fff
87 #define FP_EXTENDED_EXP_BIAS 0x3fff
88
89 // Zeroes : E = 0 & F = 0
90 // Infinities : E = MAX & F = 0
91 // Not-A-Number : E = MAX & F # 0
92
93 /* -------------------------------------------------------------------------- */
94 /* --- Floating-point type shapes (IEEE-compliant) --- */
95 /* -------------------------------------------------------------------------- */
96
97 // Taken from glibc 2.2.x: ieee754.h
98
99 // IEEE-754 float format
100 union fpu_single_shape {
101
102 fpu_single value;
103
104 /* This is the IEEE 754 single-precision format. */
105 struct {
106 #if UAE_BYTE_ORDER == UAE_BIG_ENDIAN
107 unsigned int negative:1;
108 unsigned int exponent:8;
109 unsigned int mantissa:23;
110 #endif /* Big endian. */
111 #if UAE_BYTE_ORDER == UAE_LITTLE_ENDIAN
112 unsigned int mantissa:23;
113 unsigned int exponent:8;
114 unsigned int negative:1;
115 #endif /* Little endian. */
116 } ieee;
117
118 /* This format makes it easier to see if a NaN is a signalling NaN. */
119 struct {
120 #if UAE_BYTE_ORDER == UAE_BIG_ENDIAN
121 unsigned int negative:1;
122 unsigned int exponent:8;
123 unsigned int quiet_nan:1;
124 unsigned int mantissa:22;
125 #endif /* Big endian. */
126 #if UAE_BYTE_ORDER == UAE_LITTLE_ENDIAN
127 unsigned int mantissa:22;
128 unsigned int quiet_nan:1;
129 unsigned int exponent:8;
130 unsigned int negative:1;
131 #endif /* Little endian. */
132 } ieee_nan;
133 };
134
135 // IEEE-754 double format
136 union fpu_double_shape {
137 fpu_double value;
138
139 /* This is the IEEE 754 double-precision format. */
140 struct {
141 #if UAE_BYTE_ORDER == UAE_BIG_ENDIAN
142 unsigned int negative:1;
143 unsigned int exponent:11;
144 /* Together these comprise the mantissa. */
145 unsigned int mantissa0:20;
146 unsigned int mantissa1:32;
147 #endif /* Big endian. */
148 #if UAE_BYTE_ORDER == UAE_LITTLE_ENDIAN
149 # if UAE_FLOAT_WORD_ORDER == UAE_BIG_ENDIAN
150 unsigned int mantissa0:20;
151 unsigned int exponent:11;
152 unsigned int negative:1;
153 unsigned int mantissa1:32;
154 # else
155 /* Together these comprise the mantissa. */
156 unsigned int mantissa1:32;
157 unsigned int mantissa0:20;
158 unsigned int exponent:11;
159 unsigned int negative:1;
160 # endif
161 #endif /* Little endian. */
162 } ieee;
163
164 /* This format makes it easier to see if a NaN is a signalling NaN. */
165 struct {
166 #if UAE_BYTE_ORDER == UAE_BIG_ENDIAN
167 unsigned int negative:1;
168 unsigned int exponent:11;
169 unsigned int quiet_nan:1;
170 /* Together these comprise the mantissa. */
171 unsigned int mantissa0:19;
172 unsigned int mantissa1:32;
173 #else
174 # if UAE_FLOAT_WORD_ORDER == UAE_BIG_ENDIAN
175 unsigned int mantissa0:19;
176 unsigned int quiet_nan:1;
177 unsigned int exponent:11;
178 unsigned int negative:1;
179 unsigned int mantissa1:32;
180 # else
181 /* Together these comprise the mantissa. */
182 unsigned int mantissa1:32;
183 unsigned int mantissa0:19;
184 unsigned int quiet_nan:1;
185 unsigned int exponent:11;
186 unsigned int negative:1;
187 # endif
188 #endif
189 } ieee_nan;
190
191 /* This format is used to extract the sign_exponent and mantissa parts only */
192 struct {
193 #if UAE_FLOAT_WORD_ORDER == UAE_BIG_ENDIAN
194 unsigned int msw:32;
195 unsigned int lsw:32;
196 #else
197 unsigned int lsw:32;
198 unsigned int msw:32;
199 #endif
200 } parts;
201 };
202
203 #ifdef USE_LONG_DOUBLE
204 // IEEE-854 long double format
205 union fpu_extended_shape {
206 fpu_extended value;
207
208 /* This is the IEEE 854 double-extended-precision format. */
209 struct {
210 #if UAE_BYTE_ORDER == UAE_BIG_ENDIAN
211 unsigned int negative:1;
212 unsigned int exponent:15;
213 unsigned int empty:16;
214 unsigned int mantissa0:32;
215 unsigned int mantissa1:32;
216 #endif
217 #if UAE_BYTE_ORDER == UAE_LITTLE_ENDIAN
218 # if UAE_FLOAT_WORD_ORDER == UAE_BIG_ENDIAN
219 unsigned int exponent:15;
220 unsigned int negative:1;
221 unsigned int empty:16;
222 unsigned int mantissa0:32;
223 unsigned int mantissa1:32;
224 # else
225 unsigned int mantissa1:32;
226 unsigned int mantissa0:32;
227 unsigned int exponent:15;
228 unsigned int negative:1;
229 unsigned int empty:16;
230 # endif
231 #endif
232 } ieee;
233
234 /* This is for NaNs in the IEEE 854 double-extended-precision format. */
235 struct {
236 #if UAE_BYTE_ORDER == UAE_BIG_ENDIAN
237 unsigned int negative:1;
238 unsigned int exponent:15;
239 unsigned int empty:16;
240 unsigned int one:1;
241 unsigned int quiet_nan:1;
242 unsigned int mantissa0:30;
243 unsigned int mantissa1:32;
244 #endif
245 #if UAE_BYTE_ORDER == UAE_LITTLE_ENDIAN
246 # if UAE_FLOAT_WORD_ORDER == UAE_BIG_ENDIAN
247 unsigned int exponent:15;
248 unsigned int negative:1;
249 unsigned int empty:16;
250 unsigned int mantissa0:30;
251 unsigned int quiet_nan:1;
252 unsigned int one:1;
253 unsigned int mantissa1:32;
254 # else
255 unsigned int mantissa1:32;
256 unsigned int mantissa0:30;
257 unsigned int quiet_nan:1;
258 unsigned int one:1;
259 unsigned int exponent:15;
260 unsigned int negative:1;
261 unsigned int empty:16;
262 # endif
263 #endif
264 } ieee_nan;
265
266 /* This format is used to extract the sign_exponent and mantissa parts only */
267 struct {
268 #if UAE_FLOAT_WORD_ORDER == UAE_BIG_ENDIAN
269 unsigned int sign_exponent:16;
270 unsigned int empty:16;
271 unsigned int msw:32;
272 unsigned int lsw:32;
273 #else
274 unsigned int lsw:32;
275 unsigned int msw:32;
276 unsigned int sign_exponent:16;
277 unsigned int empty:16;
278 #endif
279 } parts;
280 };
281 #endif
282
283 #ifdef USE_QUAD_DOUBLE
284 // IEEE-854 quad double format
285 union fpu_extended_shape {
286 fpu_extended value;
287
288 /* This is the IEEE 854 quad-precision format. */
289 struct {
290 #if UAE_BYTE_ORDER == UAE_BIG_ENDIAN
291 unsigned int negative:1;
292 unsigned int exponent:15;
293 unsigned int mantissa0:16;
294 unsigned int mantissa1:32;
295 unsigned int mantissa2:32;
296 unsigned int mantissa3:32;
297 #else
298 unsigned int mantissa3:32;
299 unsigned int mantissa2:32;
300 unsigned int mantissa1:32;
301 unsigned int mantissa0:16;
302 unsigned int exponent:15;
303 unsigned int negative:1;
304 #endif
305 } ieee;
306
307 /* This is for NaNs in the IEEE 854 quad-precision format. */
308 struct {
309 #if UAE_BYTE_ORDER == UAE_BIG_ENDIAN
310 unsigned int negative:1;
311 unsigned int exponent:15;
312 unsigned int quiet_nan:1;
313 unsigned int mantissa0:15;
314 unsigned int mantissa1:30;
315 unsigned int mantissa2:32;
316 unsigned int mantissa3:32;
317 #else
318 unsigned int mantissa3:32;
319 unsigned int mantissa2:32;
320 unsigned int mantissa1:32;
321 unsigned int mantissa0:15;
322 unsigned int quiet_nan:1;
323 unsigned int exponent:15;
324 unsigned int negative:1;
325 #endif
326 } ieee_nan;
327
328 /* This format is used to extract the sign_exponent and mantissa parts only */
329 #if UAE_FLOAT_WORD_ORDER == UAE_BIG_ENDIAN
330 struct {
331 uae_u64 msw;
332 uae_u64 lsw;
333 } parts64;
334 struct {
335 uae_u32 w0;
336 uae_u32 w1;
337 uae_u32 w2;
338 uae_u32 w3;
339 } parts32;
340 #else
341 struct {
342 uae_u64 lsw;
343 uae_u64 msw;
344 } parts64;
345 struct {
346 uae_u32 w3;
347 uae_u32 w2;
348 uae_u32 w1;
349 uae_u32 w0;
350 } parts32;
351 #endif
352 };
353 #endif
354
355 // Declare and initialize a pointer to a shape of the requested FP type
356 #define fp_declare_init_shape(psvar, rfvar, ftype) \
357 fpu_ ## ftype ## _shape * psvar = (fpu_ ## ftype ## _shape *)( &rfvar )
358
359 /* -------------------------------------------------------------------------- */
360 /* --- Extra Math Functions --- */
361 /* --- (most of them had to be defined before including <fpu/flags.h>) --- */
362 /* -------------------------------------------------------------------------- */
363
364 #undef isnan
365 #if 0 && defined(HAVE_ISNANL)
366 # define isnan(x) isnanl((x))
367 #else
368 # define isnan(x) fp_do_isnan((x))
369 #endif
370
371 PRIVATE inline bool FFPU fp_do_isnan(fpu_register const & r)
372 {
373 #ifdef BRANCHES_ARE_EXPENSIVE
374 #ifndef USE_LONG_DOUBLE
375 fp_declare_init_shape(sxp, r, double);
376 uae_s32 hx = sxp->parts.msw;
377 uae_s32 lx = sxp->parts.lsw;
378 hx &= 0x7fffffff;
379 hx |= (uae_u32)(lx | (-lx)) >> 31;
380 hx = 0x7ff00000 - hx;
381 return (int)(((uae_u32)hx) >> 31);
382 #elif USE_QUAD_DOUBLE
383 fp_declare_init_shape(sxp, r, extended);
384 uae_s64 hx = sxp->parts64.msw;
385 uae_s64 lx = sxp->parts64.lsw;
386 hx &= 0x7fffffffffffffffLL;
387 hx |= (uae_u64)(lx | (-lx)) >> 63;
388 hx = 0x7fff000000000000LL - hx;
389 return (int)((uae_u64)hx >> 63);
390 #else
391 fp_declare_init_shape(sxp, r, extended);
392 uae_s32 se = sxp->parts.sign_exponent;
393 uae_s32 hx = sxp->parts.msw;
394 uae_s32 lx = sxp->parts.lsw;
395 se = (se & 0x7fff) << 1;
396 lx |= hx & 0x7fffffff;
397 se |= (uae_u32)(lx | (-lx)) >> 31;
398 se = 0xfffe - se;
399 // TODO: check whether rshift count is 16 or 31
400 return (int)(((uae_u32)(se)) >> 16);
401 #endif
402 #else
403 #ifndef USE_LONG_DOUBLE
404 fp_declare_init_shape(sxp, r, double);
405 return (sxp->ieee_nan.exponent == FP_DOUBLE_EXP_MAX)
406 #else
407 fp_declare_init_shape(sxp, r, extended);
408 return (sxp->ieee_nan.exponent == FP_EXTENDED_EXP_MAX)
409 #endif
410 && (sxp->ieee_nan.mantissa0 != 0)
411 && (sxp->ieee_nan.mantissa1 != 0)
412 #ifdef USE_QUAD_DOUBLE
413 && (sxp->ieee_nan.mantissa2 != 0)
414 && (sxp->ieee_nan.mantissa3 != 0)
415 #endif
416 ;
417 #endif
418 }
419
420 #undef isinf
421 #if 0 && defined(HAVE_ISINFL)
422 # define isinf(x) isinfl((x))
423 #else
424 # define isinf(x) fp_do_isinf((x))
425 #endif
426
427 PRIVATE inline bool FFPU fp_do_isinf(fpu_register const & r)
428 {
429 #ifdef BRANCHES_ARE_EXPENSIVE
430 #ifndef USE_LONG_DOUBLE
431 fp_declare_init_shape(sxp, r, double);
432 uae_s32 hx = sxp->parts.msw;
433 uae_s32 lx = sxp->parts.lsw;
434 lx |= (hx & 0x7fffffff) ^ 0x7ff00000;
435 lx |= -lx;
436 return ~(lx >> 31) & (hx >> 30);
437 #elif USE_QUAD_DOUBLE
438 fp_declare_init_shape(sxp, r, extended);
439 uae_s64 hx = sxp->parts64.msw;
440 uae_s64 lx = sxp->parts64.lsw;
441 lx |= (hx & 0x7fffffffffffffffLL) ^ 0x7fff000000000000LL;
442 lx |= -lx;
443 return ~(lx >> 63) & (hx >> 62);
444 #else
445 fp_declare_init_shape(sxp, r, extended);
446 uae_s32 se = sxp->parts.sign_exponent;
447 uae_s32 hx = sxp->parts.msw;
448 uae_s32 lx = sxp->parts.lsw;
449 /* This additional ^ 0x80000000 is necessary because in Intel's
450 internal representation of the implicit one is explicit.
451 NOTE: anyway, this is equivalent to & 0x7fffffff in that case. */
452 #ifdef __i386__
453 lx |= (hx ^ 0x80000000) | ((se & 0x7fff) ^ 0x7fff);
454 #else
455 lx |= (hx & 0x7fffffff) | ((se & 0x7fff) ^ 0x7fff);
456 #endif
457 lx |= -lx;
458 se &= 0x8000;
459 return ~(lx >> 31) & (1 - (se >> 14));
460 #endif
461 #else
462 #ifndef USE_LONG_DOUBLE
463 fp_declare_init_shape(sxp, r, double);
464 return (sxp->ieee_nan.exponent == FP_DOUBLE_EXP_MAX)
465 #else
466 fp_declare_init_shape(sxp, r, extended);
467 return (sxp->ieee_nan.exponent == FP_EXTENDED_EXP_MAX)
468 #endif
469 && (sxp->ieee_nan.mantissa0 == 0)
470 && (sxp->ieee_nan.mantissa1 == 0)
471 #ifdef USE_QUAD_DOUBLE
472 && (sxp->ieee_nan.mantissa2 == 0)
473 && (sxp->ieee_nan.mantissa3 == 0)
474 #endif
475 ;
476 #endif
477 }
478
479 #undef isneg
480 #define isneg(x) fp_do_isneg((x))
481
482 PRIVATE inline bool FFPU fp_do_isneg(fpu_register const & r)
483 {
484 #ifndef USE_LONG_DOUBLE
485 fp_declare_init_shape(sxp, r, double);
486 #else
487 fp_declare_init_shape(sxp, r, extended);
488 #endif
489 return sxp->ieee.negative;
490 }
491
492 #undef iszero
493 #define iszero(x) fp_do_iszero((x))
494
495 PRIVATE inline bool FFPU fp_do_iszero(fpu_register const & r)
496 {
497 // TODO: BRANCHES_ARE_EXPENSIVE
498 #ifndef USE_LONG_DOUBLE
499 fp_declare_init_shape(sxp, r, double);
500 #else
501 fp_declare_init_shape(sxp, r, extended);
502 #endif
503 return (sxp->ieee.exponent == 0)
504 && (sxp->ieee.mantissa0 == 0)
505 && (sxp->ieee.mantissa1 == 0)
506 #ifdef USE_QUAD_DOUBLE
507 && (sxp->ieee.mantissa2 == 0)
508 && (sxp->ieee.mantissa3 == 0)
509 #endif
510 ;
511 }
512
513 PRIVATE inline void FFPU get_dest_flags(fpu_register const & r)
514 {
515 fl_dest.negative = isneg(r);
516 fl_dest.zero = iszero(r);
517 fl_dest.infinity = isinf(r);
518 fl_dest.nan = isnan(r);
519 fl_dest.in_range = !fl_dest.zero && !fl_dest.infinity && !fl_dest.nan;
520 }
521
522 PRIVATE inline void FFPU get_source_flags(fpu_register const & r)
523 {
524 fl_source.negative = isneg(r);
525 fl_source.zero = iszero(r);
526 fl_source.infinity = isinf(r);
527 fl_source.nan = isnan(r);
528 fl_source.in_range = !fl_source.zero && !fl_source.infinity && !fl_source.nan;
529 }
530
531 PRIVATE inline void FFPU make_nan(fpu_register & r)
532 {
533 // FIXME: is that correct ?
534 #ifndef USE_LONG_DOUBLE
535 fp_declare_init_shape(sxp, r, double);
536 sxp->ieee.exponent = FP_DOUBLE_EXP_MAX;
537 sxp->ieee.mantissa0 = 0xfffff;
538 #else
539 fp_declare_init_shape(sxp, r, extended);
540 sxp->ieee.exponent = FP_EXTENDED_EXP_MAX;
541 sxp->ieee.mantissa0 = 0xffffffff;
542 #endif
543 sxp->ieee.mantissa1 = 0xffffffff;
544 #ifdef USE_QUAD_DOUBLE
545 sxp->ieee.mantissa2 = 0xffffffff;
546 sxp->ieee.mantissa3 = 0xffffffff;
547 #endif
548 }
549
550 PRIVATE inline void FFPU make_zero_positive(fpu_register & r)
551 {
552 #if 1
553 r = +0.0;
554 #else
555 #ifndef USE_LONG_DOUBLE
556 fp_declare_init_shape(sxp, r, double);
557 #else
558 fp_declare_init_shape(sxp, r, extended);
559 #endif
560 sxp->ieee.negative = 0;
561 sxp->ieee.exponent = 0;
562 sxp->ieee.mantissa0 = 0;
563 sxp->ieee.mantissa1 = 0;
564 #ifdef USE_QUAD_DOUBLE
565 sxp->ieee.mantissa2 = 0;
566 sxp->ieee.mantissa3 = 0;
567 #endif
568 #endif
569 }
570
571 PRIVATE inline void FFPU make_zero_negative(fpu_register & r)
572 {
573 #if 1
574 r = -0.0;
575 #else
576 #ifndef USE_LONG_DOUBLE
577 fp_declare_init_shape(sxp, r, double);
578 #else
579 fp_declare_init_shape(sxp, r, extended);
580 #endif
581 sxp->ieee.negative = 1;
582 sxp->ieee.exponent = 0;
583 sxp->ieee.mantissa0 = 0;
584 sxp->ieee.mantissa1 = 0;
585 #ifdef USE_QUAD_DOUBLE
586 sxp->ieee.mantissa2 = 0;
587 sxp->ieee.mantissa3 = 0;
588 #endif
589 #endif
590 }
591
592 PRIVATE inline void FFPU make_inf_positive(fpu_register & r)
593 {
594 #ifndef USE_LONG_DOUBLE
595 fp_declare_init_shape(sxp, r, double);
596 sxp->ieee_nan.exponent = FP_DOUBLE_EXP_MAX;
597 #else
598 fp_declare_init_shape(sxp, r, extended);
599 sxp->ieee_nan.exponent = FP_EXTENDED_EXP_MAX;
600 #endif
601 sxp->ieee_nan.negative = 0;
602 sxp->ieee_nan.mantissa0 = 0;
603 sxp->ieee_nan.mantissa1 = 0;
604 #ifdef USE_QUAD_DOUBLE
605 sxp->ieee_nan.mantissa2 = 0;
606 sxp->ieee_nan.mantissa3 = 0;
607 #endif
608 }
609
610 PRIVATE inline void FFPU make_inf_negative(fpu_register & r)
611 {
612 #ifndef USE_LONG_DOUBLE
613 fp_declare_init_shape(sxp, r, double);
614 sxp->ieee_nan.exponent = FP_DOUBLE_EXP_MAX;
615 #else
616 fp_declare_init_shape(sxp, r, extended);
617 sxp->ieee_nan.exponent = FP_EXTENDED_EXP_MAX;
618 #endif
619 sxp->ieee_nan.negative = 1;
620 sxp->ieee_nan.mantissa0 = 0;
621 sxp->ieee_nan.mantissa1 = 0;
622 #ifdef USE_QUAD_DOUBLE
623 sxp->ieee_nan.mantissa2 = 0;
624 sxp->ieee_nan.mantissa3 = 0;
625 #endif
626 }
627
628 PRIVATE inline fpu_register FFPU fast_fgetexp(fpu_register const & r)
629 {
630 #ifndef USE_LONG_DOUBLE
631 fp_declare_init_shape(sxp, r, double);
632 return (sxp->ieee.exponent - FP_DOUBLE_EXP_BIAS);
633 #else
634 fp_declare_init_shape(sxp, r, extended);
635 return (sxp->ieee.exponent - FP_EXTENDED_EXP_BIAS);
636 #endif
637 }
638
639 // Normalize to range 1..2
640 PRIVATE inline void FFPU fast_remove_exponent(fpu_register & r)
641 {
642 #ifndef USE_LONG_DOUBLE
643 fp_declare_init_shape(sxp, r, double);
644 sxp->ieee.exponent = FP_DOUBLE_EXP_BIAS;
645 #else
646 fp_declare_init_shape(sxp, r, extended);
647 sxp->ieee.exponent = FP_EXTENDED_EXP_BIAS;
648 #endif
649 }
650
651 // The sign of the quotient is the exclusive-OR of the sign bits
652 // of the source and destination operands.
653 PRIVATE inline uae_u32 FFPU get_quotient_sign(fpu_register const & ra, fpu_register const & rb)
654 {
655 #ifndef USE_LONG_DOUBLE
656 fp_declare_init_shape(sap, ra, double);
657 fp_declare_init_shape(sbp, rb, double);
658 #else
659 fp_declare_init_shape(sap, ra, extended);
660 fp_declare_init_shape(sbp, rb, extended);
661 #endif
662 return ((sap->ieee.negative ^ sbp->ieee.negative) ? FPSR_QUOTIENT_SIGN : 0);
663 }
664
665 /* -------------------------------------------------------------------------- */
666 /* --- Math functions --- */
667 /* -------------------------------------------------------------------------- */
668
669 #if FPU_USE_ISO_C99 && USE_LONG_DOUBLE
670 # define fp_log logl
671 # define fp_log10 log10l
672 # define fp_exp expl
673 # define fp_pow powl
674 # define fp_fabs fabsl
675 # define fp_sqrt sqrtl
676 # define fp_sin sinl
677 # define fp_cos cosl
678 # define fp_tan tanl
679 # define fp_sinh sinhl
680 # define fp_cosh coshl
681 # define fp_tanh tanhl
682 # define fp_asin asinl
683 # define fp_acos acosl
684 # define fp_atan atanl
685 # define fp_asinh asinhl
686 # define fp_acosh acoshl
687 # define fp_atanh atanhl
688 # define fp_floor floorl
689 # define fp_ceil ceill
690 #else
691 # define fp_log log
692 # define fp_log10 log10
693 # define fp_exp exp
694 # define fp_pow pow
695 # define fp_fabs fabs
696 # define fp_sqrt sqrt
697 # define fp_sin sin
698 # define fp_cos cos
699 # define fp_tan tan
700 # define fp_sinh sinh
701 # define fp_cosh cosh
702 # define fp_tanh tanh
703 # define fp_asin asin
704 # define fp_acos acos
705 # define fp_atan atan
706 # define fp_asinh asinh
707 # define fp_acosh acosh
708 # define fp_atanh atanh
709 # define fp_floor floor
710 # define fp_ceil ceil
711 #endif
712
713 #if defined(FPU_IEEE) && defined(X86_ASSEMBLY)
714 // Assembly optimized support functions. Taken from glibc 2.2.2
715
716 #undef fp_log
717 #define fp_log fp_do_log
718
719 #ifndef FPU_FAST_MATH
720 // FIXME: unimplemented
721 PRIVATE fpu_extended fp_do_log(fpu_extended x);
722 #else
723 PRIVATE inline fpu_extended fp_do_log(fpu_extended x)
724 {
725 fpu_extended value;
726 __asm__ __volatile__("fldln2; fxch; fyl2x" : "=t" (value) : "0" (x) : "st(1)");
727 return value;
728 }
729 #endif
730
731 #undef fp_log10
732 #define fp_log10 fp_do_log10
733
734 #ifndef FPU_FAST_MATH
735 // FIXME: unimplemented
736 PRIVATE fpu_extended fp_do_log10(fpu_extended x);
737 #else
738 PRIVATE inline fpu_extended fp_do_log10(fpu_extended x)
739 {
740 fpu_extended value;
741 __asm__ __volatile__("fldlg2; fxch; fyl2x" : "=t" (value) : "0" (x) : "st(1)");
742 return value;
743 }
744 #endif
745
746 #undef fp_exp
747 #define fp_exp fp_do_exp
748
749 #ifndef FPU_FAST_MATH
750 // FIXME: unimplemented
751 PRIVATE fpu_extended fp_do_exp(fpu_extended x);
752 #else
753 PRIVATE inline fpu_extended fp_do_exp(fpu_extended x)
754 {
755 fpu_extended value, exponent;
756 __asm__ __volatile__("fldl2e # e^x = 2^(x * log2(e))\n\t"
757 "fmul %%st(1) # x * log2(e)\n\t"
758 "fst %%st(1)\n\t"
759 "frndint # int(x * log2(e))\n\t"
760 "fxch\n\t"
761 "fsub %%st(1) # fract(x * log2(e))\n\t"
762 "f2xm1 # 2^(fract(x * log2(e))) - 1\n\t"
763 : "=t" (value), "=u" (exponent) : "0" (x));
764 value += 1.0;
765 __asm__ __volatile__("fscale" : "=t" (value) : "0" (value), "u" (exponent));
766 return value;
767 }
768 #endif
769
770 #undef fp_pow
771 #define fp_pow fp_do_pow
772
773 PRIVATE fpu_extended fp_do_pow(fpu_extended x, fpu_extended y);
774
775 #undef fp_fabs
776 #define fp_fabs fp_do_fabs
777
778 PRIVATE inline fpu_extended fp_do_fabs(fpu_extended x)
779 {
780 fpu_extended value;
781 __asm__ __volatile__("fabs" : "=t" (value) : "0" (x));
782 return value;
783 }
784
785 #undef fp_sqrt
786 #define fp_sqrt fp_do_sqrt
787
788 PRIVATE inline fpu_extended fp_do_sqrt(fpu_extended x)
789 {
790 fpu_extended value;
791 __asm__ __volatile__("fsqrt" : "=t" (value) : "0" (x));
792 return value;
793 }
794
795 #undef fp_sin
796 #define fp_sin fp_do_sin
797
798 PRIVATE inline fpu_extended fp_do_sin(fpu_extended x)
799 {
800 fpu_extended value;
801 __asm__ __volatile__("fsin" : "=t" (value) : "0" (x));
802 return value;
803 }
804
805 #undef fp_cos
806 #define fp_cos fp_do_cos
807
808 PRIVATE inline fpu_extended fp_do_cos(fpu_extended x)
809 {
810 fpu_extended value;
811 __asm__ __volatile__("fcos" : "=t" (value) : "0" (x));
812 return value;
813 }
814
815 #undef fp_tan
816 #define fp_tan fp_do_tan
817
818 PRIVATE inline fpu_extended fp_do_tan(fpu_extended x)
819 {
820 fpu_extended value;
821 __asm__ __volatile__("fptan" : "=t" (value) : "0" (x));
822 return value;
823 }
824
825 #undef fp_expm1
826 #define fp_expm1 fp_do_expm1
827
828 // Returns: exp(X) - 1.0
829 PRIVATE inline fpu_extended fp_do_expm1(fpu_extended x)
830 {
831 fpu_extended value, exponent, temp;
832 __asm__ __volatile__("fldl2e # e^x - 1 = 2^(x * log2(e)) - 1\n\t"
833 "fmul %%st(1) # x * log2(e)\n\t"
834 "fst %%st(1)\n\t"
835 "frndint # int(x * log2(e))\n\t"
836 "fxch\n\t"
837 "fsub %%st(1) # fract(x * log2(e))\n\t"
838 "f2xm1 # 2^(fract(x * log2(e))) - 1\n\t"
839 "fscale # 2^(x * log2(e)) - 2^(int(x * log2(e)))\n\t"
840 : "=t" (value), "=u" (exponent) : "0" (x));
841 __asm__ __volatile__("fscale" : "=t" (temp) : "0" (1.0), "u" (exponent));
842 temp -= 1.0;
843 return temp + value ? temp + value : x;
844 }
845
846 #undef fp_sgn1
847 #define fp_sgn1 fp_do_sgn1
848
849 PRIVATE inline fpu_extended fp_do_sgn1(fpu_extended x)
850 {
851 fp_declare_init_shape(sxp, x, extended);
852 sxp->ieee_nan.exponent = FP_EXTENDED_EXP_MAX;
853 sxp->ieee_nan.one = 1;
854 sxp->ieee_nan.quiet_nan = 0;
855 sxp->ieee_nan.mantissa0 = 0;
856 sxp->ieee_nan.mantissa1 = 0;
857 return x;
858 }
859
860 #undef fp_sinh
861 #define fp_sinh fp_do_sinh
862
863 #ifndef FPU_FAST_MATH
864 // FIXME: unimplemented
865 PRIVATE fpu_extended fp_do_sinh(fpu_extended x);
866 #else
867 PRIVATE inline fpu_extended fp_do_sinh(fpu_extended x)
868 {
869 fpu_extended exm1 = fp_expm1(fp_fabs(x));
870 return 0.5 * (exm1 / (exm1 + 1.0) + exm1) * fp_sgn1(x);
871 }
872 #endif
873
874 #undef fp_cosh
875 #define fp_cosh fp_do_cosh
876
877 #ifndef FPU_FAST_MATH
878 // FIXME: unimplemented
879 PRIVATE fpu_extended fp_do_cosh(fpu_extended x);
880 #else
881 PRIVATE inline fpu_extended fp_do_cosh(fpu_extended x)
882 {
883 fpu_extended ex = fp_exp(x);
884 return 0.5 * (ex + 1.0 / ex);
885 }
886 #endif
887
888 #undef fp_tanh
889 #define fp_tanh fp_do_tanh
890
891 #ifndef FPU_FAST_MATH
892 // FIXME: unimplemented
893 PRIVATE fpu_extended fp_do_tanh(fpu_extended x);
894 #else
895 PRIVATE inline fpu_extended fp_do_tanh(fpu_extended x)
896 {
897 fpu_extended exm1 = fp_expm1(-fp_fabs(x + x));
898 return exm1 / (exm1 + 2.0) * fp_sgn1(-x);
899 }
900 #endif
901
902 #undef fp_atan2
903 #define fp_atan2 fp_do_atan2
904
905 PRIVATE inline fpu_extended fp_do_atan2(fpu_extended y, fpu_extended x)
906 {
907 fpu_extended value;
908 __asm__ __volatile__("fpatan" : "=t" (value) : "0" (x), "u" (y) : "st(1)");
909 return value;
910 }
911
912 #undef fp_asin
913 #define fp_asin fp_do_asin
914
915 PRIVATE inline fpu_extended fp_do_asin(fpu_extended x)
916 {
917 return fp_atan2(x, fp_sqrt(1.0 - x * x));
918 }
919
920 #undef fp_acos
921 #define fp_acos fp_do_acos
922
923 PRIVATE inline fpu_extended fp_do_acos(fpu_extended x)
924 {
925 return fp_atan2(fp_sqrt(1.0 - x * x), x);
926 }
927
928 #undef fp_atan
929 #define fp_atan fp_do_atan
930
931 PRIVATE inline fpu_extended fp_do_atan(fpu_extended x)
932 {
933 fpu_extended value;
934 __asm__ __volatile__("fld1; fpatan" : "=t" (value) : "0" (x) : "st(1)");
935 return value;
936 }
937
938 #undef fp_log1p
939 #define fp_log1p fp_do_log1p
940
941 // Returns: ln(1.0 + X)
942 PRIVATE fpu_extended fp_do_log1p(fpu_extended x);
943
944 #undef fp_asinh
945 #define fp_asinh fp_do_asinh
946
947 PRIVATE inline fpu_extended fp_do_asinh(fpu_extended x)
948 {
949 fpu_extended y = fp_fabs(x);
950 return (fp_log1p(y * y / (fp_sqrt(y * y + 1.0) + 1.0) + y) * fp_sgn1(x));
951 }
952
953 #undef fp_acosh
954 #define fp_acosh fp_do_acosh
955
956 PRIVATE inline fpu_extended fp_do_acosh(fpu_extended x)
957 {
958 return fp_log(x + fp_sqrt(x - 1.0) * fp_sqrt(x + 1.0));
959 }
960
961 #undef fp_atanh
962 #define fp_atanh fp_do_atanh
963
964 PRIVATE inline fpu_extended fp_do_atanh(fpu_extended x)
965 {
966 fpu_extended y = fp_fabs(x);
967 return -0.5 * fp_log1p(-(y + y) / (1.0 + y)) * fp_sgn1(x);
968 }
969
970 #undef fp_floor
971 #define fp_floor fp_do_floor
972
973 PRIVATE inline fpu_extended fp_do_floor(fpu_extended x)
974 {
975 volatile unsigned int cw;
976 __asm__ __volatile__("fnstcw %0" : "=m" (cw));
977 volatile unsigned int cw_temp = (cw & 0xf3ff) | 0x0400; // rounding down
978 __asm__ __volatile__("fldcw %0" : : "m" (cw_temp));
979 fpu_extended value;
980 __asm__ __volatile__("frndint" : "=t" (value) : "0" (x));
981 __asm__ __volatile__("fldcw %0" : : "m" (cw));
982 return value;
983 }
984
985 #undef fp_ceil
986 #define fp_ceil fp_do_ceil
987
988 PRIVATE inline fpu_extended fp_do_ceil(fpu_extended x)
989 {
990 volatile unsigned int cw;
991 __asm__ __volatile__("fnstcw %0" : "=m" (cw));
992 volatile unsigned int cw_temp = (cw & 0xf3ff) | 0x0800; // rounding up
993 __asm__ __volatile__("fldcw %0" : : "m" (cw_temp));
994 fpu_extended value;
995 __asm__ __volatile__("frndint" : "=t" (value) : "0" (x));
996 __asm__ __volatile__("fldcw %0" : : "m" (cw));
997 return value;
998 }
999
1000 #define DEFINE_ROUND_FUNC(rounding_mode_str, rounding_mode) \
1001 PRIVATE inline fpu_extended fp_do_round_to_ ## rounding_mode_str(fpu_extended x) \
1002 { \
1003 volatile unsigned int cw; \
1004 __asm__ __volatile__("fnstcw %0" : "=m" (cw)); \
1005 volatile unsigned int cw_temp = (cw & 0xf3ff) | (rounding_mode); \
1006 __asm__ __volatile__("fldcw %0" : : "m" (cw_temp)); \
1007 fpu_extended value; \
1008 __asm__ __volatile__("frndint" : "=t" (value) : "0" (x)); \
1009 __asm__ __volatile__("fldcw %0" : : "m" (cw)); \
1010 return value; \
1011 }
1012
1013 #undef fp_round_to_minus_infinity
1014 #define fp_round_to_minus_infinity fp_do_round_to_minus_infinity
1015
1016 DEFINE_ROUND_FUNC(minus_infinity, 0x400)
1017
1018 #undef fp_round_to_plus_infinity
1019 #define fp_round_to_plus_infinity fp_do_round_to_plus_infinity
1020
1021 DEFINE_ROUND_FUNC(plus_infinity, 0x800)
1022
1023 #undef fp_round_to_zero
1024 #define fp_round_to_zero fp_do_round_to_zero
1025
1026 DEFINE_ROUND_FUNC(zero, 0xc00)
1027
1028 #undef fp_round_to_nearest
1029 #define fp_round_to_nearest fp_do_round_to_nearest
1030
1031 DEFINE_ROUND_FUNC(nearest, 0x000)
1032
1033 #endif /* X86_ASSEMBLY */
1034
1035 #ifndef fp_round_to_minus_infinity
1036 #define fp_round_to_minus_infinity(x) fp_floor(x)
1037 #endif
1038
1039 #ifndef fp_round_to_plus_infinity
1040 #define fp_round_to_plus_infinity(x) fp_ceil(x)
1041 #endif
1042
1043 #ifndef fp_round_to_zero
1044 #define fp_round_to_zero(x) ((int)(x))
1045 #endif
1046
1047 #ifndef fp_round_to_nearest
1048 #define fp_round_to_nearest(x) ((int)((x) + 0.5))
1049 #endif
1050
1051 #endif /* FPU_MATHLIB_H */