1 |
/* |
2 |
* fpu/mathlib.h - Floating-point math support library |
3 |
* |
4 |
* Basilisk II (C) 1997-2001 Christian Bauer |
5 |
* |
6 |
* MC68881/68040 fpu emulation |
7 |
* |
8 |
* Original UAE FPU, copyright 1996 Herman ten Brugge |
9 |
* Rewrite for x86, copyright 1999-2001 Lauri Pesonen |
10 |
* New framework, copyright 2000-2001 Gwenole Beauchesne |
11 |
* Adapted for JIT compilation (c) Bernd Meyer, 2000-2001 |
12 |
* |
13 |
* This program is free software; you can redistribute it and/or modify |
14 |
* it under the terms of the GNU General Public License as published by |
15 |
* the Free Software Foundation; either version 2 of the License, or |
16 |
* (at your option) any later version. |
17 |
* |
18 |
* This program is distributed in the hope that it will be useful, |
19 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
20 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
21 |
* GNU General Public License for more details. |
22 |
* |
23 |
* You should have received a copy of the GNU General Public License |
24 |
* along with this program; if not, write to the Free Software |
25 |
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
26 |
*/ |
27 |
|
28 |
#ifndef FPU_MATHLIB_H |
29 |
#define FPU_MATHLIB_H |
30 |
|
31 |
/* NOTE: this file shall be included only from fpu/fpu_*.cpp */ |
32 |
#undef PUBLIC |
33 |
#define PUBLIC extern |
34 |
|
35 |
#undef PRIVATE |
36 |
#define PRIVATE static |
37 |
|
38 |
#undef FFPU |
39 |
#define FFPU /**/ |
40 |
|
41 |
#undef FPU |
42 |
#define FPU fpu. |
43 |
|
44 |
// Define the following macro if branches are expensive. If so, |
45 |
// integer-based isnan() and isinf() functions are implemented. |
46 |
// TODO: move to Makefile.in |
47 |
#define BRANCHES_ARE_EXPENSIVE 1 |
48 |
|
49 |
// Use ISO C99 extended-precision math functions (glibc 2.1+) |
50 |
#define FPU_USE_ISO_C99 1 |
51 |
|
52 |
// NOTE: this is irrelevant on Win32 platforms since the MS libraries |
53 |
// don't support extended-precision floating-point computations |
54 |
#ifdef WIN32 |
55 |
#undef FPU_USE_ISO_C99 |
56 |
#endif |
57 |
|
58 |
// Use faster implementation of math functions, but this could cause |
59 |
// some incorrect results (?) |
60 |
// TODO: actually implement the slower but safer versions |
61 |
#define FPU_FAST_MATH 1 |
62 |
|
63 |
#if FPU_USE_ISO_C99 |
64 |
// NOTE: no prior <math.h> shall be included at this point |
65 |
#define __USE_ISOC99 1 // for glibc 2.2.X and newer |
66 |
#define __USE_ISOC9X 1 // for glibc 2.1.X |
67 |
#include <math.h> |
68 |
#else |
69 |
#include <cmath> |
70 |
using namespace std; |
71 |
#endif |
72 |
|
73 |
/* -------------------------------------------------------------------------- */ |
74 |
/* --- Floating-point register types --- */ |
75 |
/* -------------------------------------------------------------------------- */ |
76 |
|
77 |
// Single : S 8*E 23*F |
78 |
#define FP_SINGLE_EXP_MAX 0xff |
79 |
#define FP_SINGLE_EXP_BIAS 0x7f |
80 |
|
81 |
// Double : S 11*E 52*F |
82 |
#define FP_DOUBLE_EXP_MAX 0x7ff |
83 |
#define FP_DOUBLE_EXP_BIAS 0x3ff |
84 |
|
85 |
// Extended : S 15*E 64*F |
86 |
#define FP_EXTENDED_EXP_MAX 0x7fff |
87 |
#define FP_EXTENDED_EXP_BIAS 0x3fff |
88 |
|
89 |
// Zeroes : E = 0 & F = 0 |
90 |
// Infinities : E = MAX & F = 0 |
91 |
// Not-A-Number : E = MAX & F # 0 |
92 |
|
93 |
/* -------------------------------------------------------------------------- */ |
94 |
/* --- Floating-point type shapes (IEEE-compliant) --- */ |
95 |
/* -------------------------------------------------------------------------- */ |
96 |
|
97 |
// Taken from glibc 2.2.x: ieee754.h |
98 |
|
99 |
// IEEE-754 float format |
100 |
union fpu_single_shape { |
101 |
|
102 |
fpu_single value; |
103 |
|
104 |
/* This is the IEEE 754 single-precision format. */ |
105 |
struct { |
106 |
#ifdef WORDS_BIGENDIAN |
107 |
unsigned int negative:1; |
108 |
unsigned int exponent:8; |
109 |
unsigned int mantissa:23; |
110 |
#else |
111 |
unsigned int mantissa:23; |
112 |
unsigned int exponent:8; |
113 |
unsigned int negative:1; |
114 |
#endif |
115 |
} ieee; |
116 |
|
117 |
/* This format makes it easier to see if a NaN is a signalling NaN. */ |
118 |
struct { |
119 |
#ifdef WORDS_BIGENDIAN |
120 |
unsigned int negative:1; |
121 |
unsigned int exponent:8; |
122 |
unsigned int quiet_nan:1; |
123 |
unsigned int mantissa:22; |
124 |
#else |
125 |
unsigned int mantissa:22; |
126 |
unsigned int quiet_nan:1; |
127 |
unsigned int exponent:8; |
128 |
unsigned int negative:1; |
129 |
#endif |
130 |
} ieee_nan; |
131 |
}; |
132 |
|
133 |
// IEEE-754 double format |
134 |
union fpu_double_shape { |
135 |
fpu_double value; |
136 |
|
137 |
/* This is the IEEE 754 double-precision format. */ |
138 |
struct { |
139 |
#ifdef WORDS_BIGENDIAN |
140 |
unsigned int negative:1; |
141 |
unsigned int exponent:11; |
142 |
/* Together these comprise the mantissa. */ |
143 |
unsigned int mantissa0:20; |
144 |
unsigned int mantissa1:32; |
145 |
#else |
146 |
# if HOST_FLOAT_WORDS_BIG_ENDIAN |
147 |
unsigned int mantissa0:20; |
148 |
unsigned int exponent:11; |
149 |
unsigned int negative:1; |
150 |
unsigned int mantissa1:32; |
151 |
# else |
152 |
/* Together these comprise the mantissa. */ |
153 |
unsigned int mantissa1:32; |
154 |
unsigned int mantissa0:20; |
155 |
unsigned int exponent:11; |
156 |
unsigned int negative:1; |
157 |
# endif |
158 |
#endif |
159 |
} ieee; |
160 |
|
161 |
/* This format makes it easier to see if a NaN is a signalling NaN. */ |
162 |
struct { |
163 |
#ifdef WORDS_BIGENDIAN |
164 |
unsigned int negative:1; |
165 |
unsigned int exponent:11; |
166 |
unsigned int quiet_nan:1; |
167 |
/* Together these comprise the mantissa. */ |
168 |
unsigned int mantissa0:19; |
169 |
unsigned int mantissa1:32; |
170 |
#else |
171 |
# if HOST_FLOAT_WORDS_BIG_ENDIAN |
172 |
unsigned int mantissa0:19; |
173 |
unsigned int quiet_nan:1; |
174 |
unsigned int exponent:11; |
175 |
unsigned int negative:1; |
176 |
unsigned int mantissa1:32; |
177 |
# else |
178 |
/* Together these comprise the mantissa. */ |
179 |
unsigned int mantissa1:32; |
180 |
unsigned int mantissa0:19; |
181 |
unsigned int quiet_nan:1; |
182 |
unsigned int exponent:11; |
183 |
unsigned int negative:1; |
184 |
# endif |
185 |
#endif |
186 |
} ieee_nan; |
187 |
|
188 |
/* This format is used to extract the sign_exponent and mantissa parts only */ |
189 |
struct { |
190 |
#if HOST_FLOAT_WORDS_BIG_ENDIAN |
191 |
unsigned int msw:32; |
192 |
unsigned int lsw:32; |
193 |
#else |
194 |
unsigned int lsw:32; |
195 |
unsigned int msw:32; |
196 |
#endif |
197 |
} parts; |
198 |
}; |
199 |
|
200 |
#ifdef USE_LONG_DOUBLE |
201 |
// IEEE-854 long double format |
202 |
union fpu_extended_shape { |
203 |
fpu_extended value; |
204 |
|
205 |
/* This is the IEEE 854 double-extended-precision format. */ |
206 |
struct { |
207 |
#ifdef WORDS_BIGENDIAN |
208 |
unsigned int negative:1; |
209 |
unsigned int exponent:15; |
210 |
unsigned int empty:16; |
211 |
unsigned int mantissa0:32; |
212 |
unsigned int mantissa1:32; |
213 |
#else |
214 |
# if HOST_FLOAT_WORDS_BIG_ENDIAN |
215 |
unsigned int exponent:15; |
216 |
unsigned int negative:1; |
217 |
unsigned int empty:16; |
218 |
unsigned int mantissa0:32; |
219 |
unsigned int mantissa1:32; |
220 |
# else |
221 |
unsigned int mantissa1:32; |
222 |
unsigned int mantissa0:32; |
223 |
unsigned int exponent:15; |
224 |
unsigned int negative:1; |
225 |
unsigned int empty:16; |
226 |
# endif |
227 |
#endif |
228 |
} ieee; |
229 |
|
230 |
/* This is for NaNs in the IEEE 854 double-extended-precision format. */ |
231 |
struct { |
232 |
#ifdef WORDS_BIGENDIAN |
233 |
unsigned int negative:1; |
234 |
unsigned int exponent:15; |
235 |
unsigned int empty:16; |
236 |
unsigned int one:1; |
237 |
unsigned int quiet_nan:1; |
238 |
unsigned int mantissa0:30; |
239 |
unsigned int mantissa1:32; |
240 |
#else |
241 |
# if HOST_FLOAT_WORDS_BIG_ENDIAN |
242 |
unsigned int exponent:15; |
243 |
unsigned int negative:1; |
244 |
unsigned int empty:16; |
245 |
unsigned int mantissa0:30; |
246 |
unsigned int quiet_nan:1; |
247 |
unsigned int one:1; |
248 |
unsigned int mantissa1:32; |
249 |
# else |
250 |
unsigned int mantissa1:32; |
251 |
unsigned int mantissa0:30; |
252 |
unsigned int quiet_nan:1; |
253 |
unsigned int one:1; |
254 |
unsigned int exponent:15; |
255 |
unsigned int negative:1; |
256 |
unsigned int empty:16; |
257 |
# endif |
258 |
#endif |
259 |
} ieee_nan; |
260 |
|
261 |
/* This format is used to extract the sign_exponent and mantissa parts only */ |
262 |
struct { |
263 |
#if HOST_FLOAT_WORDS_BIG_ENDIAN |
264 |
unsigned int sign_exponent:16; |
265 |
unsigned int empty:16; |
266 |
unsigned int msw:32; |
267 |
unsigned int lsw:32; |
268 |
#else |
269 |
unsigned int lsw:32; |
270 |
unsigned int msw:32; |
271 |
unsigned int sign_exponent:16; |
272 |
unsigned int empty:16; |
273 |
#endif |
274 |
} parts; |
275 |
}; |
276 |
#endif |
277 |
|
278 |
#ifdef USE_QUAD_DOUBLE |
279 |
// IEEE-854 quad double format |
280 |
union fpu_extended_shape { |
281 |
fpu_extended value; |
282 |
|
283 |
/* This is the IEEE 854 quad-precision format. */ |
284 |
struct { |
285 |
#ifdef WORDS_BIGENDIAN |
286 |
unsigned int negative:1; |
287 |
unsigned int exponent:15; |
288 |
unsigned int mantissa0:16; |
289 |
unsigned int mantissa1:32; |
290 |
unsigned int mantissa2:32; |
291 |
unsigned int mantissa3:32; |
292 |
#else |
293 |
unsigned int mantissa3:32; |
294 |
unsigned int mantissa2:32; |
295 |
unsigned int mantissa1:32; |
296 |
unsigned int mantissa0:16; |
297 |
unsigned int exponent:15; |
298 |
unsigned int negative:1; |
299 |
#endif |
300 |
} ieee; |
301 |
|
302 |
/* This is for NaNs in the IEEE 854 quad-precision format. */ |
303 |
struct { |
304 |
#ifdef WORDS_BIGENDIAN |
305 |
unsigned int negative:1; |
306 |
unsigned int exponent:15; |
307 |
unsigned int quiet_nan:1; |
308 |
unsigned int mantissa0:15; |
309 |
unsigned int mantissa1:30; |
310 |
unsigned int mantissa2:32; |
311 |
unsigned int mantissa3:32; |
312 |
#else |
313 |
unsigned int mantissa3:32; |
314 |
unsigned int mantissa2:32; |
315 |
unsigned int mantissa1:32; |
316 |
unsigned int mantissa0:15; |
317 |
unsigned int quiet_nan:1; |
318 |
unsigned int exponent:15; |
319 |
unsigned int negative:1; |
320 |
#endif |
321 |
} ieee_nan; |
322 |
|
323 |
/* This format is used to extract the sign_exponent and mantissa parts only */ |
324 |
#if HOST_FLOAT_WORDS_BIG_ENDIAN |
325 |
struct { |
326 |
uae_u64 msw; |
327 |
uae_u64 lsw; |
328 |
} parts64; |
329 |
struct { |
330 |
uae_u32 w0; |
331 |
uae_u32 w1; |
332 |
uae_u32 w2; |
333 |
uae_u32 w3; |
334 |
} parts32; |
335 |
#else |
336 |
struct { |
337 |
uae_u64 lsw; |
338 |
uae_u64 msw; |
339 |
} parts64; |
340 |
struct { |
341 |
uae_u32 w3; |
342 |
uae_u32 w2; |
343 |
uae_u32 w1; |
344 |
uae_u32 w0; |
345 |
} parts32; |
346 |
#endif |
347 |
}; |
348 |
#endif |
349 |
|
350 |
// Declare and initialize a pointer to a shape of the requested FP type |
351 |
#define fp_declare_init_shape(psvar, rfvar, ftype) \ |
352 |
fpu_ ## ftype ## _shape * psvar = (fpu_ ## ftype ## _shape *)( &rfvar ) |
353 |
|
354 |
/* -------------------------------------------------------------------------- */ |
355 |
/* --- Extra Math Functions --- */ |
356 |
/* --- (most of them had to be defined before including <fpu/flags.h>) --- */ |
357 |
/* -------------------------------------------------------------------------- */ |
358 |
|
359 |
#undef isnan |
360 |
#if 0 && defined(HAVE_ISNANL) |
361 |
# define isnan(x) isnanl((x)) |
362 |
#else |
363 |
# define isnan(x) fp_do_isnan((x)) |
364 |
#endif |
365 |
|
366 |
PRIVATE inline bool FFPU fp_do_isnan(fpu_register const & r) |
367 |
{ |
368 |
#ifdef BRANCHES_ARE_EXPENSIVE |
369 |
#ifndef USE_LONG_DOUBLE |
370 |
fp_declare_init_shape(sxp, r, double); |
371 |
uae_s32 hx = sxp->parts.msw; |
372 |
uae_s32 lx = sxp->parts.lsw; |
373 |
hx &= 0x7fffffff; |
374 |
hx |= (uae_u32)(lx | (-lx)) >> 31; |
375 |
hx = 0x7ff00000 - hx; |
376 |
return (int)(((uae_u32)hx) >> 31); |
377 |
#elif USE_QUAD_DOUBLE |
378 |
fp_declare_init_shape(sxp, r, extended); |
379 |
uae_s64 hx = sxp->parts64.msw; |
380 |
uae_s64 lx = sxp->parts64.lsw; |
381 |
hx &= 0x7fffffffffffffffLL; |
382 |
hx |= (uae_u64)(lx | (-lx)) >> 63; |
383 |
hx = 0x7fff000000000000LL - hx; |
384 |
return (int)((uae_u64)hx >> 63); |
385 |
#else |
386 |
fp_declare_init_shape(sxp, r, extended); |
387 |
uae_s32 se = sxp->parts.sign_exponent; |
388 |
uae_s32 hx = sxp->parts.msw; |
389 |
uae_s32 lx = sxp->parts.lsw; |
390 |
se = (se & 0x7fff) << 1; |
391 |
lx |= hx & 0x7fffffff; |
392 |
se |= (uae_u32)(lx | (-lx)) >> 31; |
393 |
se = 0xfffe - se; |
394 |
// TODO: check whether rshift count is 16 or 31 |
395 |
return (int)(((uae_u32)(se)) >> 16); |
396 |
#endif |
397 |
#else |
398 |
#ifndef USE_LONG_DOUBLE |
399 |
fp_declare_init_shape(sxp, r, double); |
400 |
return (sxp->ieee_nan.exponent == FP_DOUBLE_EXP_MAX) |
401 |
#else |
402 |
fp_declare_init_shape(sxp, r, extended); |
403 |
return (sxp->ieee_nan.exponent == FP_EXTENDED_EXP_MAX) |
404 |
#endif |
405 |
&& (sxp->ieee_nan.mantissa0 != 0) |
406 |
&& (sxp->ieee_nan.mantissa1 != 0) |
407 |
#ifdef USE_QUAD_DOUBLE |
408 |
&& (sxp->ieee_nan.mantissa2 != 0) |
409 |
&& (sxp->ieee_nan.mantissa3 != 0) |
410 |
#endif |
411 |
; |
412 |
#endif |
413 |
} |
414 |
|
415 |
#undef isinf |
416 |
#if 0 && defined(HAVE_ISINFL) |
417 |
# define isinf(x) isinfl((x)) |
418 |
#else |
419 |
# define isinf(x) fp_do_isinf((x)) |
420 |
#endif |
421 |
|
422 |
PRIVATE inline bool FFPU fp_do_isinf(fpu_register const & r) |
423 |
{ |
424 |
#ifdef BRANCHES_ARE_EXPENSIVE |
425 |
#ifndef USE_LONG_DOUBLE |
426 |
fp_declare_init_shape(sxp, r, double); |
427 |
uae_s32 hx = sxp->parts.msw; |
428 |
uae_s32 lx = sxp->parts.lsw; |
429 |
lx |= (hx & 0x7fffffff) ^ 0x7ff00000; |
430 |
lx |= -lx; |
431 |
return ~(lx >> 31) & (hx >> 30); |
432 |
#elif USE_QUAD_DOUBLE |
433 |
fp_declare_init_shape(sxp, r, extended); |
434 |
uae_s64 hx = sxp->parts64.msw; |
435 |
uae_s64 lx = sxp->parts64.lsw; |
436 |
lx |= (hx & 0x7fffffffffffffffLL) ^ 0x7fff000000000000LL; |
437 |
lx |= -lx; |
438 |
return ~(lx >> 63) & (hx >> 62); |
439 |
#else |
440 |
fp_declare_init_shape(sxp, r, extended); |
441 |
uae_s32 se = sxp->parts.sign_exponent; |
442 |
uae_s32 hx = sxp->parts.msw; |
443 |
uae_s32 lx = sxp->parts.lsw; |
444 |
/* This additional ^ 0x80000000 is necessary because in Intel's |
445 |
internal representation of the implicit one is explicit. |
446 |
NOTE: anyway, this is equivalent to & 0x7fffffff in that case. */ |
447 |
#ifdef __i386__ |
448 |
lx |= (hx ^ 0x80000000) | ((se & 0x7fff) ^ 0x7fff); |
449 |
#else |
450 |
lx |= (hx & 0x7fffffff) | ((se & 0x7fff) ^ 0x7fff); |
451 |
#endif |
452 |
lx |= -lx; |
453 |
se &= 0x8000; |
454 |
return ~(lx >> 31) & (1 - (se >> 14)); |
455 |
#endif |
456 |
#else |
457 |
#ifndef USE_LONG_DOUBLE |
458 |
fp_declare_init_shape(sxp, r, double); |
459 |
return (sxp->ieee_nan.exponent == FP_DOUBLE_EXP_MAX) |
460 |
#else |
461 |
fp_declare_init_shape(sxp, r, extended); |
462 |
return (sxp->ieee_nan.exponent == FP_EXTENDED_EXP_MAX) |
463 |
#endif |
464 |
&& (sxp->ieee_nan.mantissa0 == 0) |
465 |
&& (sxp->ieee_nan.mantissa1 == 0) |
466 |
#ifdef USE_QUAD_DOUBLE |
467 |
&& (sxp->ieee_nan.mantissa2 == 0) |
468 |
&& (sxp->ieee_nan.mantissa3 == 0) |
469 |
#endif |
470 |
; |
471 |
#endif |
472 |
} |
473 |
|
474 |
#undef isneg |
475 |
#define isneg(x) fp_do_isneg((x)) |
476 |
|
477 |
PRIVATE inline bool FFPU fp_do_isneg(fpu_register const & r) |
478 |
{ |
479 |
#ifndef USE_LONG_DOUBLE |
480 |
fp_declare_init_shape(sxp, r, double); |
481 |
#else |
482 |
fp_declare_init_shape(sxp, r, extended); |
483 |
#endif |
484 |
return sxp->ieee.negative; |
485 |
} |
486 |
|
487 |
#undef iszero |
488 |
#define iszero(x) fp_do_iszero((x)) |
489 |
|
490 |
PRIVATE inline bool FFPU fp_do_iszero(fpu_register const & r) |
491 |
{ |
492 |
// TODO: BRANCHES_ARE_EXPENSIVE |
493 |
#ifndef USE_LONG_DOUBLE |
494 |
fp_declare_init_shape(sxp, r, double); |
495 |
#else |
496 |
fp_declare_init_shape(sxp, r, extended); |
497 |
#endif |
498 |
return (sxp->ieee.exponent == 0) |
499 |
&& (sxp->ieee.mantissa0 == 0) |
500 |
&& (sxp->ieee.mantissa1 == 0) |
501 |
#ifdef USE_QUAD_DOUBLE |
502 |
&& (sxp->ieee.mantissa2 == 0) |
503 |
&& (sxp->ieee.mantissa3 == 0) |
504 |
#endif |
505 |
; |
506 |
} |
507 |
|
508 |
PRIVATE inline void FFPU get_dest_flags(fpu_register const & r) |
509 |
{ |
510 |
fl_dest.negative = isneg(r); |
511 |
fl_dest.zero = iszero(r); |
512 |
fl_dest.infinity = isinf(r); |
513 |
fl_dest.nan = isnan(r); |
514 |
fl_dest.in_range = !fl_dest.zero && !fl_dest.infinity && !fl_dest.nan; |
515 |
} |
516 |
|
517 |
PRIVATE inline void FFPU get_source_flags(fpu_register const & r) |
518 |
{ |
519 |
fl_source.negative = isneg(r); |
520 |
fl_source.zero = iszero(r); |
521 |
fl_source.infinity = isinf(r); |
522 |
fl_source.nan = isnan(r); |
523 |
fl_source.in_range = !fl_source.zero && !fl_source.infinity && !fl_source.nan; |
524 |
} |
525 |
|
526 |
PRIVATE inline void FFPU make_nan(fpu_register & r) |
527 |
{ |
528 |
// FIXME: is that correct ? |
529 |
#ifndef USE_LONG_DOUBLE |
530 |
fp_declare_init_shape(sxp, r, double); |
531 |
sxp->ieee.exponent = FP_DOUBLE_EXP_MAX; |
532 |
sxp->ieee.mantissa0 = 0xfffff; |
533 |
#else |
534 |
fp_declare_init_shape(sxp, r, extended); |
535 |
sxp->ieee.exponent = FP_EXTENDED_EXP_MAX; |
536 |
sxp->ieee.mantissa0 = 0xffffffff; |
537 |
#endif |
538 |
sxp->ieee.mantissa1 = 0xffffffff; |
539 |
#ifdef USE_QUAD_DOUBLE |
540 |
sxp->ieee.mantissa2 = 0xffffffff; |
541 |
sxp->ieee.mantissa3 = 0xffffffff; |
542 |
#endif |
543 |
} |
544 |
|
545 |
PRIVATE inline void FFPU make_zero_positive(fpu_register & r) |
546 |
{ |
547 |
#if 1 |
548 |
r = +0.0; |
549 |
#else |
550 |
#ifndef USE_LONG_DOUBLE |
551 |
fp_declare_init_shape(sxp, r, double); |
552 |
#else |
553 |
fp_declare_init_shape(sxp, r, extended); |
554 |
#endif |
555 |
sxp->ieee.negative = 0; |
556 |
sxp->ieee.exponent = 0; |
557 |
sxp->ieee.mantissa0 = 0; |
558 |
sxp->ieee.mantissa1 = 0; |
559 |
#ifdef USE_QUAD_DOUBLE |
560 |
sxp->ieee.mantissa2 = 0; |
561 |
sxp->ieee.mantissa3 = 0; |
562 |
#endif |
563 |
#endif |
564 |
} |
565 |
|
566 |
PRIVATE inline void FFPU make_zero_negative(fpu_register & r) |
567 |
{ |
568 |
#if 1 |
569 |
r = -0.0; |
570 |
#else |
571 |
#ifndef USE_LONG_DOUBLE |
572 |
fp_declare_init_shape(sxp, r, double); |
573 |
#else |
574 |
fp_declare_init_shape(sxp, r, extended); |
575 |
#endif |
576 |
sxp->ieee.negative = 1; |
577 |
sxp->ieee.exponent = 0; |
578 |
sxp->ieee.mantissa0 = 0; |
579 |
sxp->ieee.mantissa1 = 0; |
580 |
#ifdef USE_QUAD_DOUBLE |
581 |
sxp->ieee.mantissa2 = 0; |
582 |
sxp->ieee.mantissa3 = 0; |
583 |
#endif |
584 |
#endif |
585 |
} |
586 |
|
587 |
PRIVATE inline void FFPU make_inf_positive(fpu_register & r) |
588 |
{ |
589 |
#ifndef USE_LONG_DOUBLE |
590 |
fp_declare_init_shape(sxp, r, double); |
591 |
sxp->ieee_nan.exponent = FP_DOUBLE_EXP_MAX; |
592 |
#else |
593 |
fp_declare_init_shape(sxp, r, extended); |
594 |
sxp->ieee_nan.exponent = FP_EXTENDED_EXP_MAX; |
595 |
#endif |
596 |
sxp->ieee_nan.negative = 0; |
597 |
sxp->ieee_nan.mantissa0 = 0; |
598 |
sxp->ieee_nan.mantissa1 = 0; |
599 |
#ifdef USE_QUAD_DOUBLE |
600 |
sxp->ieee_nan.mantissa2 = 0; |
601 |
sxp->ieee_nan.mantissa3 = 0; |
602 |
#endif |
603 |
} |
604 |
|
605 |
PRIVATE inline void FFPU make_inf_negative(fpu_register & r) |
606 |
{ |
607 |
#ifndef USE_LONG_DOUBLE |
608 |
fp_declare_init_shape(sxp, r, double); |
609 |
sxp->ieee_nan.exponent = FP_DOUBLE_EXP_MAX; |
610 |
#else |
611 |
fp_declare_init_shape(sxp, r, extended); |
612 |
sxp->ieee_nan.exponent = FP_EXTENDED_EXP_MAX; |
613 |
#endif |
614 |
sxp->ieee_nan.negative = 1; |
615 |
sxp->ieee_nan.mantissa0 = 0; |
616 |
sxp->ieee_nan.mantissa1 = 0; |
617 |
#ifdef USE_QUAD_DOUBLE |
618 |
sxp->ieee_nan.mantissa2 = 0; |
619 |
sxp->ieee_nan.mantissa3 = 0; |
620 |
#endif |
621 |
} |
622 |
|
623 |
PRIVATE inline fpu_register FFPU fast_fgetexp(fpu_register const & r) |
624 |
{ |
625 |
#ifndef USE_LONG_DOUBLE |
626 |
fp_declare_init_shape(sxp, r, double); |
627 |
return (sxp->ieee.exponent - FP_DOUBLE_EXP_BIAS); |
628 |
#else |
629 |
fp_declare_init_shape(sxp, r, extended); |
630 |
return (sxp->ieee.exponent - FP_EXTENDED_EXP_BIAS); |
631 |
#endif |
632 |
} |
633 |
|
634 |
// Normalize to range 1..2 |
635 |
PRIVATE inline void FFPU fast_remove_exponent(fpu_register & r) |
636 |
{ |
637 |
#ifndef USE_LONG_DOUBLE |
638 |
fp_declare_init_shape(sxp, r, double); |
639 |
sxp->ieee.exponent = FP_DOUBLE_EXP_BIAS; |
640 |
#else |
641 |
fp_declare_init_shape(sxp, r, extended); |
642 |
sxp->ieee.exponent = FP_EXTENDED_EXP_BIAS; |
643 |
#endif |
644 |
} |
645 |
|
646 |
// The sign of the quotient is the exclusive-OR of the sign bits |
647 |
// of the source and destination operands. |
648 |
PRIVATE inline uae_u32 FFPU get_quotient_sign(fpu_register const & ra, fpu_register const & rb) |
649 |
{ |
650 |
#ifndef USE_LONG_DOUBLE |
651 |
fp_declare_init_shape(sap, ra, double); |
652 |
fp_declare_init_shape(sbp, rb, double); |
653 |
#else |
654 |
fp_declare_init_shape(sap, ra, extended); |
655 |
fp_declare_init_shape(sbp, rb, extended); |
656 |
#endif |
657 |
return ((sap->ieee.negative ^ sbp->ieee.negative) ? FPSR_QUOTIENT_SIGN : 0); |
658 |
} |
659 |
|
660 |
/* -------------------------------------------------------------------------- */ |
661 |
/* --- Math functions --- */ |
662 |
/* -------------------------------------------------------------------------- */ |
663 |
|
664 |
#if FPU_USE_ISO_C99 && USE_LONG_DOUBLE |
665 |
# define fp_log logl |
666 |
# define fp_log10 log10l |
667 |
# define fp_exp expl |
668 |
# define fp_pow powl |
669 |
# define fp_fabs fabsl |
670 |
# define fp_sqrt sqrtl |
671 |
# define fp_sin sinl |
672 |
# define fp_cos cosl |
673 |
# define fp_tan tanl |
674 |
# define fp_sinh sinhl |
675 |
# define fp_cosh coshl |
676 |
# define fp_tanh tanhl |
677 |
# define fp_asin asinl |
678 |
# define fp_acos acosl |
679 |
# define fp_atan atanl |
680 |
# define fp_asinh asinhl |
681 |
# define fp_acosh acoshl |
682 |
# define fp_atanh atanhl |
683 |
# define fp_floor floorl |
684 |
# define fp_ceil ceill |
685 |
#else |
686 |
# define fp_log log |
687 |
# define fp_log10 log10 |
688 |
# define fp_exp exp |
689 |
# define fp_pow pow |
690 |
# define fp_fabs fabs |
691 |
# define fp_sqrt sqrt |
692 |
# define fp_sin sin |
693 |
# define fp_cos cos |
694 |
# define fp_tan tan |
695 |
# define fp_sinh sinh |
696 |
# define fp_cosh cosh |
697 |
# define fp_tanh tanh |
698 |
# define fp_asin asin |
699 |
# define fp_acos acos |
700 |
# define fp_atan atan |
701 |
# define fp_asinh asinh |
702 |
# define fp_acosh acosh |
703 |
# define fp_atanh atanh |
704 |
# define fp_floor floor |
705 |
# define fp_ceil ceil |
706 |
#endif |
707 |
|
708 |
#if defined(FPU_IEEE) && defined(X86_ASSEMBLY) |
709 |
// Assembly optimized support functions. Taken from glibc 2.2.2 |
710 |
|
711 |
#undef fp_log |
712 |
#define fp_log fp_do_log |
713 |
|
714 |
#ifndef FPU_FAST_MATH |
715 |
// FIXME: unimplemented |
716 |
PRIVATE fpu_extended fp_do_log(fpu_extended x); |
717 |
#else |
718 |
PRIVATE inline fpu_extended fp_do_log(fpu_extended x) |
719 |
{ |
720 |
fpu_extended value; |
721 |
__asm__ __volatile__("fldln2; fxch; fyl2x" : "=t" (value) : "0" (x) : "st(1)"); |
722 |
return value; |
723 |
} |
724 |
#endif |
725 |
|
726 |
#undef fp_log10 |
727 |
#define fp_log10 fp_do_log10 |
728 |
|
729 |
#ifndef FPU_FAST_MATH |
730 |
// FIXME: unimplemented |
731 |
PRIVATE fpu_extended fp_do_log10(fpu_extended x); |
732 |
#else |
733 |
PRIVATE inline fpu_extended fp_do_log10(fpu_extended x) |
734 |
{ |
735 |
fpu_extended value; |
736 |
__asm__ __volatile__("fldlg2; fxch; fyl2x" : "=t" (value) : "0" (x) : "st(1)"); |
737 |
return value; |
738 |
} |
739 |
#endif |
740 |
|
741 |
#undef fp_exp |
742 |
#define fp_exp fp_do_exp |
743 |
|
744 |
#ifndef FPU_FAST_MATH |
745 |
// FIXME: unimplemented |
746 |
PRIVATE fpu_extended fp_do_exp(fpu_extended x); |
747 |
#else |
748 |
PRIVATE inline fpu_extended fp_do_exp(fpu_extended x) |
749 |
{ |
750 |
fpu_extended value, exponent; |
751 |
__asm__ __volatile__("fldl2e # e^x = 2^(x * log2(e))\n\t" |
752 |
"fmul %%st(1) # x * log2(e)\n\t" |
753 |
"fst %%st(1)\n\t" |
754 |
"frndint # int(x * log2(e))\n\t" |
755 |
"fxch\n\t" |
756 |
"fsub %%st(1) # fract(x * log2(e))\n\t" |
757 |
"f2xm1 # 2^(fract(x * log2(e))) - 1\n\t" |
758 |
: "=t" (value), "=u" (exponent) : "0" (x)); |
759 |
value += 1.0; |
760 |
__asm__ __volatile__("fscale" : "=t" (value) : "0" (value), "u" (exponent)); |
761 |
return value; |
762 |
} |
763 |
#endif |
764 |
|
765 |
#undef fp_pow |
766 |
#define fp_pow fp_do_pow |
767 |
|
768 |
PRIVATE fpu_extended fp_do_pow(fpu_extended x, fpu_extended y); |
769 |
|
770 |
#undef fp_fabs |
771 |
#define fp_fabs fp_do_fabs |
772 |
|
773 |
PRIVATE inline fpu_extended fp_do_fabs(fpu_extended x) |
774 |
{ |
775 |
fpu_extended value; |
776 |
__asm__ __volatile__("fabs" : "=t" (value) : "0" (x)); |
777 |
return value; |
778 |
} |
779 |
|
780 |
#undef fp_sqrt |
781 |
#define fp_sqrt fp_do_sqrt |
782 |
|
783 |
PRIVATE inline fpu_extended fp_do_sqrt(fpu_extended x) |
784 |
{ |
785 |
fpu_extended value; |
786 |
__asm__ __volatile__("fsqrt" : "=t" (value) : "0" (x)); |
787 |
return value; |
788 |
} |
789 |
|
790 |
#undef fp_sin |
791 |
#define fp_sin fp_do_sin |
792 |
|
793 |
PRIVATE inline fpu_extended fp_do_sin(fpu_extended x) |
794 |
{ |
795 |
fpu_extended value; |
796 |
__asm__ __volatile__("fsin" : "=t" (value) : "0" (x)); |
797 |
return value; |
798 |
} |
799 |
|
800 |
#undef fp_cos |
801 |
#define fp_cos fp_do_cos |
802 |
|
803 |
PRIVATE inline fpu_extended fp_do_cos(fpu_extended x) |
804 |
{ |
805 |
fpu_extended value; |
806 |
__asm__ __volatile__("fcos" : "=t" (value) : "0" (x)); |
807 |
return value; |
808 |
} |
809 |
|
810 |
#undef fp_tan |
811 |
#define fp_tan fp_do_tan |
812 |
|
813 |
PRIVATE inline fpu_extended fp_do_tan(fpu_extended x) |
814 |
{ |
815 |
fpu_extended value; |
816 |
__asm__ __volatile__("fptan" : "=t" (value) : "0" (x)); |
817 |
return value; |
818 |
} |
819 |
|
820 |
#undef fp_expm1 |
821 |
#define fp_expm1 fp_do_expm1 |
822 |
|
823 |
// Returns: exp(X) - 1.0 |
824 |
PRIVATE inline fpu_extended fp_do_expm1(fpu_extended x) |
825 |
{ |
826 |
fpu_extended value, exponent, temp; |
827 |
__asm__ __volatile__("fldl2e # e^x - 1 = 2^(x * log2(e)) - 1\n\t" |
828 |
"fmul %%st(1) # x * log2(e)\n\t" |
829 |
"fst %%st(1)\n\t" |
830 |
"frndint # int(x * log2(e))\n\t" |
831 |
"fxch\n\t" |
832 |
"fsub %%st(1) # fract(x * log2(e))\n\t" |
833 |
"f2xm1 # 2^(fract(x * log2(e))) - 1\n\t" |
834 |
"fscale # 2^(x * log2(e)) - 2^(int(x * log2(e)))\n\t" |
835 |
: "=t" (value), "=u" (exponent) : "0" (x)); |
836 |
__asm__ __volatile__("fscale" : "=t" (temp) : "0" (1.0), "u" (exponent)); |
837 |
temp -= 1.0; |
838 |
return temp + value ? temp + value : x; |
839 |
} |
840 |
|
841 |
#undef fp_sgn1 |
842 |
#define fp_sgn1 fp_do_sgn1 |
843 |
|
844 |
PRIVATE inline fpu_extended fp_do_sgn1(fpu_extended x) |
845 |
{ |
846 |
fp_declare_init_shape(sxp, x, extended); |
847 |
sxp->ieee_nan.exponent = FP_EXTENDED_EXP_MAX; |
848 |
sxp->ieee_nan.one = 1; |
849 |
sxp->ieee_nan.quiet_nan = 0; |
850 |
sxp->ieee_nan.mantissa0 = 0; |
851 |
sxp->ieee_nan.mantissa1 = 0; |
852 |
return x; |
853 |
} |
854 |
|
855 |
#undef fp_sinh |
856 |
#define fp_sinh fp_do_sinh |
857 |
|
858 |
#ifndef FPU_FAST_MATH |
859 |
// FIXME: unimplemented |
860 |
PRIVATE fpu_extended fp_do_sinh(fpu_extended x); |
861 |
#else |
862 |
PRIVATE inline fpu_extended fp_do_sinh(fpu_extended x) |
863 |
{ |
864 |
fpu_extended exm1 = fp_expm1(fp_fabs(x)); |
865 |
return 0.5 * (exm1 / (exm1 + 1.0) + exm1) * fp_sgn1(x); |
866 |
} |
867 |
#endif |
868 |
|
869 |
#undef fp_cosh |
870 |
#define fp_cosh fp_do_cosh |
871 |
|
872 |
#ifndef FPU_FAST_MATH |
873 |
// FIXME: unimplemented |
874 |
PRIVATE fpu_extended fp_do_cosh(fpu_extended x); |
875 |
#else |
876 |
PRIVATE inline fpu_extended fp_do_cosh(fpu_extended x) |
877 |
{ |
878 |
fpu_extended ex = fp_exp(x); |
879 |
return 0.5 * (ex + 1.0 / ex); |
880 |
} |
881 |
#endif |
882 |
|
883 |
#undef fp_tanh |
884 |
#define fp_tanh fp_do_tanh |
885 |
|
886 |
#ifndef FPU_FAST_MATH |
887 |
// FIXME: unimplemented |
888 |
PRIVATE fpu_extended fp_do_tanh(fpu_extended x); |
889 |
#else |
890 |
PRIVATE inline fpu_extended fp_do_tanh(fpu_extended x) |
891 |
{ |
892 |
fpu_extended exm1 = fp_expm1(-fp_fabs(x + x)); |
893 |
return exm1 / (exm1 + 2.0) * fp_sgn1(-x); |
894 |
} |
895 |
#endif |
896 |
|
897 |
#undef fp_atan2 |
898 |
#define fp_atan2 fp_do_atan2 |
899 |
|
900 |
PRIVATE inline fpu_extended fp_do_atan2(fpu_extended y, fpu_extended x) |
901 |
{ |
902 |
fpu_extended value; |
903 |
__asm__ __volatile__("fpatan" : "=t" (value) : "0" (x), "u" (y) : "st(1)"); |
904 |
return value; |
905 |
} |
906 |
|
907 |
#undef fp_asin |
908 |
#define fp_asin fp_do_asin |
909 |
|
910 |
PRIVATE inline fpu_extended fp_do_asin(fpu_extended x) |
911 |
{ |
912 |
return fp_atan2(x, fp_sqrt(1.0 - x * x)); |
913 |
} |
914 |
|
915 |
#undef fp_acos |
916 |
#define fp_acos fp_do_acos |
917 |
|
918 |
PRIVATE inline fpu_extended fp_do_acos(fpu_extended x) |
919 |
{ |
920 |
return fp_atan2(fp_sqrt(1.0 - x * x), x); |
921 |
} |
922 |
|
923 |
#undef fp_atan |
924 |
#define fp_atan fp_do_atan |
925 |
|
926 |
PRIVATE inline fpu_extended fp_do_atan(fpu_extended x) |
927 |
{ |
928 |
fpu_extended value; |
929 |
__asm__ __volatile__("fld1; fpatan" : "=t" (value) : "0" (x) : "st(1)"); |
930 |
return value; |
931 |
} |
932 |
|
933 |
#undef fp_log1p |
934 |
#define fp_log1p fp_do_log1p |
935 |
|
936 |
// Returns: ln(1.0 + X) |
937 |
PRIVATE fpu_extended fp_do_log1p(fpu_extended x); |
938 |
|
939 |
#undef fp_asinh |
940 |
#define fp_asinh fp_do_asinh |
941 |
|
942 |
PRIVATE inline fpu_extended fp_do_asinh(fpu_extended x) |
943 |
{ |
944 |
fpu_extended y = fp_fabs(x); |
945 |
return (fp_log1p(y * y / (fp_sqrt(y * y + 1.0) + 1.0) + y) * fp_sgn1(x)); |
946 |
} |
947 |
|
948 |
#undef fp_acosh |
949 |
#define fp_acosh fp_do_acosh |
950 |
|
951 |
PRIVATE inline fpu_extended fp_do_acosh(fpu_extended x) |
952 |
{ |
953 |
return fp_log(x + fp_sqrt(x - 1.0) * fp_sqrt(x + 1.0)); |
954 |
} |
955 |
|
956 |
#undef fp_atanh |
957 |
#define fp_atanh fp_do_atanh |
958 |
|
959 |
PRIVATE inline fpu_extended fp_do_atanh(fpu_extended x) |
960 |
{ |
961 |
fpu_extended y = fp_fabs(x); |
962 |
return -0.5 * fp_log1p(-(y + y) / (1.0 + y)) * fp_sgn1(x); |
963 |
} |
964 |
|
965 |
#undef fp_floor |
966 |
#define fp_floor fp_do_floor |
967 |
|
968 |
PRIVATE inline fpu_extended fp_do_floor(fpu_extended x) |
969 |
{ |
970 |
volatile unsigned int cw; |
971 |
__asm__ __volatile__("fnstcw %0" : "=m" (cw)); |
972 |
volatile unsigned int cw_temp = (cw & 0xf3ff) | 0x0400; // rounding down |
973 |
__asm__ __volatile__("fldcw %0" : : "m" (cw_temp)); |
974 |
fpu_extended value; |
975 |
__asm__ __volatile__("frndint" : "=t" (value) : "0" (x)); |
976 |
__asm__ __volatile__("fldcw %0" : : "m" (cw)); |
977 |
return value; |
978 |
} |
979 |
|
980 |
#undef fp_ceil |
981 |
#define fp_ceil fp_do_ceil |
982 |
|
983 |
PRIVATE inline fpu_extended fp_do_ceil(fpu_extended x) |
984 |
{ |
985 |
volatile unsigned int cw; |
986 |
__asm__ __volatile__("fnstcw %0" : "=m" (cw)); |
987 |
volatile unsigned int cw_temp = (cw & 0xf3ff) | 0x0800; // rounding up |
988 |
__asm__ __volatile__("fldcw %0" : : "m" (cw_temp)); |
989 |
fpu_extended value; |
990 |
__asm__ __volatile__("frndint" : "=t" (value) : "0" (x)); |
991 |
__asm__ __volatile__("fldcw %0" : : "m" (cw)); |
992 |
return value; |
993 |
} |
994 |
|
995 |
#define DEFINE_ROUND_FUNC(rounding_mode_str, rounding_mode) \ |
996 |
PRIVATE inline fpu_extended fp_do_round_to_ ## rounding_mode_str(fpu_extended x) \ |
997 |
{ \ |
998 |
volatile unsigned int cw; \ |
999 |
__asm__ __volatile__("fnstcw %0" : "=m" (cw)); \ |
1000 |
volatile unsigned int cw_temp = (cw & 0xf3ff) | (rounding_mode); \ |
1001 |
__asm__ __volatile__("fldcw %0" : : "m" (cw_temp)); \ |
1002 |
fpu_extended value; \ |
1003 |
__asm__ __volatile__("frndint" : "=t" (value) : "0" (x)); \ |
1004 |
__asm__ __volatile__("fldcw %0" : : "m" (cw)); \ |
1005 |
return value; \ |
1006 |
} |
1007 |
|
1008 |
#undef fp_round_to_minus_infinity |
1009 |
#define fp_round_to_minus_infinity fp_do_round_to_minus_infinity |
1010 |
|
1011 |
DEFINE_ROUND_FUNC(minus_infinity, 0x400) |
1012 |
|
1013 |
#undef fp_round_to_plus_infinity |
1014 |
#define fp_round_to_plus_infinity fp_do_round_to_plus_infinity |
1015 |
|
1016 |
DEFINE_ROUND_FUNC(plus_infinity, 0x800) |
1017 |
|
1018 |
#undef fp_round_to_zero |
1019 |
#define fp_round_to_zero fp_do_round_to_zero |
1020 |
|
1021 |
DEFINE_ROUND_FUNC(zero, 0xc00) |
1022 |
|
1023 |
#undef fp_round_to_nearest |
1024 |
#define fp_round_to_nearest fp_do_round_to_nearest |
1025 |
|
1026 |
DEFINE_ROUND_FUNC(nearest, 0x000) |
1027 |
|
1028 |
#endif /* X86_ASSEMBLY */ |
1029 |
|
1030 |
#ifndef fp_round_to_minus_infinity |
1031 |
#define fp_round_to_minus_infinity(x) fp_floor(x) |
1032 |
#endif |
1033 |
|
1034 |
#ifndef fp_round_to_plus_infinity |
1035 |
#define fp_round_to_plus_infinity(x) fp_ceil(x) |
1036 |
#endif |
1037 |
|
1038 |
#ifndef fp_round_to_zero |
1039 |
#define fp_round_to_zero(x) ((int)(x)) |
1040 |
#endif |
1041 |
|
1042 |
#ifndef fp_round_to_nearest |
1043 |
#define fp_round_to_nearest(x) ((int)((x) + 0.5)) |
1044 |
#endif |
1045 |
|
1046 |
#endif /* FPU_MATHLIB_H */ |