1 |
/* |
2 |
* fpu/mathlib.h - Floating-point math support library |
3 |
* |
4 |
* Basilisk II (C) 1997-2001 Christian Bauer |
5 |
* |
6 |
* MC68881/68040 fpu emulation |
7 |
* |
8 |
* Original UAE FPU, copyright 1996 Herman ten Brugge |
9 |
* Rewrite for x86, copyright 1999-2001 Lauri Pesonen |
10 |
* New framework, copyright 2000-2001 Gwenole Beauchesne |
11 |
* Adapted for JIT compilation (c) Bernd Meyer, 2000-2001 |
12 |
* |
13 |
* This program is free software; you can redistribute it and/or modify |
14 |
* it under the terms of the GNU General Public License as published by |
15 |
* the Free Software Foundation; either version 2 of the License, or |
16 |
* (at your option) any later version. |
17 |
* |
18 |
* This program is distributed in the hope that it will be useful, |
19 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
20 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
21 |
* GNU General Public License for more details. |
22 |
* |
23 |
* You should have received a copy of the GNU General Public License |
24 |
* along with this program; if not, write to the Free Software |
25 |
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
26 |
*/ |
27 |
|
28 |
#ifndef FPU_MATHLIB_H |
29 |
#define FPU_MATHLIB_H |
30 |
|
31 |
/* NOTE: this file shall be included only from fpu/fpu_*.cpp */ |
32 |
#undef PUBLIC |
33 |
#define PUBLIC extern |
34 |
|
35 |
#undef PRIVATE |
36 |
#define PRIVATE static |
37 |
|
38 |
#undef FFPU |
39 |
#define FFPU /**/ |
40 |
|
41 |
#undef FPU |
42 |
#define FPU fpu. |
43 |
|
44 |
// Define the following macro if branches are expensive. If so, |
45 |
// integer-based isnan() and isinf() functions are implemented. |
46 |
// TODO: move to Makefile.in |
47 |
#define BRANCHES_ARE_EXPENSIVE 1 |
48 |
|
49 |
// Use ISO C99 extended-precision math functions (glibc 2.1+) |
50 |
#define FPU_USE_ISO_C99 1 |
51 |
|
52 |
// NOTE: this is irrelevant on Win32 platforms since the MS libraries |
53 |
// don't support extended-precision floating-point computations |
54 |
#ifdef WIN32 |
55 |
#undef FPU_USE_ISO_C99 |
56 |
#endif |
57 |
|
58 |
// Use faster implementation of math functions, but this could cause |
59 |
// some incorrect results (?) |
60 |
// TODO: actually implement the slower but safer versions |
61 |
#define FPU_FAST_MATH 1 |
62 |
|
63 |
#if FPU_USE_ISO_C99 |
64 |
// NOTE: no prior <math.h> shall be included at this point |
65 |
#define __USE_ISOC99 1 // for glibc 2.2.X and newer |
66 |
#define __USE_ISOC9X 1 // for glibc 2.1.X |
67 |
#include <math.h> |
68 |
#else |
69 |
#include <cmath> |
70 |
using namespace std; |
71 |
#endif |
72 |
|
73 |
/* -------------------------------------------------------------------------- */ |
74 |
/* --- Floating-point register types --- */ |
75 |
/* -------------------------------------------------------------------------- */ |
76 |
|
77 |
// Single : S 8*E 23*F |
78 |
#define FP_SINGLE_EXP_MAX 0xff |
79 |
#define FP_SINGLE_EXP_BIAS 0x7f |
80 |
|
81 |
// Double : S 11*E 52*F |
82 |
#define FP_DOUBLE_EXP_MAX 0x7ff |
83 |
#define FP_DOUBLE_EXP_BIAS 0x3ff |
84 |
|
85 |
// Extended : S 15*E 64*F |
86 |
#define FP_EXTENDED_EXP_MAX 0x7fff |
87 |
#define FP_EXTENDED_EXP_BIAS 0x3fff |
88 |
|
89 |
// Zeroes : E = 0 & F = 0 |
90 |
// Infinities : E = MAX & F = 0 |
91 |
// Not-A-Number : E = MAX & F # 0 |
92 |
|
93 |
/* -------------------------------------------------------------------------- */ |
94 |
/* --- Floating-point type shapes (IEEE-compliant) --- */ |
95 |
/* -------------------------------------------------------------------------- */ |
96 |
|
97 |
// Taken from glibc 2.2.x: ieee754.h |
98 |
|
99 |
// IEEE-754 float format |
100 |
union fpu_single_shape { |
101 |
|
102 |
fpu_single value; |
103 |
|
104 |
/* This is the IEEE 754 single-precision format. */ |
105 |
struct { |
106 |
#if UAE_BYTE_ORDER == UAE_BIG_ENDIAN |
107 |
unsigned int negative:1; |
108 |
unsigned int exponent:8; |
109 |
unsigned int mantissa:23; |
110 |
#endif /* Big endian. */ |
111 |
#if UAE_BYTE_ORDER == UAE_LITTLE_ENDIAN |
112 |
unsigned int mantissa:23; |
113 |
unsigned int exponent:8; |
114 |
unsigned int negative:1; |
115 |
#endif /* Little endian. */ |
116 |
} ieee; |
117 |
|
118 |
/* This format makes it easier to see if a NaN is a signalling NaN. */ |
119 |
struct { |
120 |
#if UAE_BYTE_ORDER == UAE_BIG_ENDIAN |
121 |
unsigned int negative:1; |
122 |
unsigned int exponent:8; |
123 |
unsigned int quiet_nan:1; |
124 |
unsigned int mantissa:22; |
125 |
#endif /* Big endian. */ |
126 |
#if UAE_BYTE_ORDER == UAE_LITTLE_ENDIAN |
127 |
unsigned int mantissa:22; |
128 |
unsigned int quiet_nan:1; |
129 |
unsigned int exponent:8; |
130 |
unsigned int negative:1; |
131 |
#endif /* Little endian. */ |
132 |
} ieee_nan; |
133 |
}; |
134 |
|
135 |
// IEEE-754 double format |
136 |
union fpu_double_shape { |
137 |
fpu_double value; |
138 |
|
139 |
/* This is the IEEE 754 double-precision format. */ |
140 |
struct { |
141 |
#if UAE_BYTE_ORDER == UAE_BIG_ENDIAN |
142 |
unsigned int negative:1; |
143 |
unsigned int exponent:11; |
144 |
/* Together these comprise the mantissa. */ |
145 |
unsigned int mantissa0:20; |
146 |
unsigned int mantissa1:32; |
147 |
#endif /* Big endian. */ |
148 |
#if UAE_BYTE_ORDER == UAE_LITTLE_ENDIAN |
149 |
# if UAE_FLOAT_WORD_ORDER == UAE_BIG_ENDIAN |
150 |
unsigned int mantissa0:20; |
151 |
unsigned int exponent:11; |
152 |
unsigned int negative:1; |
153 |
unsigned int mantissa1:32; |
154 |
# else |
155 |
/* Together these comprise the mantissa. */ |
156 |
unsigned int mantissa1:32; |
157 |
unsigned int mantissa0:20; |
158 |
unsigned int exponent:11; |
159 |
unsigned int negative:1; |
160 |
# endif |
161 |
#endif /* Little endian. */ |
162 |
} ieee; |
163 |
|
164 |
/* This format makes it easier to see if a NaN is a signalling NaN. */ |
165 |
struct { |
166 |
#if UAE_BYTE_ORDER == UAE_BIG_ENDIAN |
167 |
unsigned int negative:1; |
168 |
unsigned int exponent:11; |
169 |
unsigned int quiet_nan:1; |
170 |
/* Together these comprise the mantissa. */ |
171 |
unsigned int mantissa0:19; |
172 |
unsigned int mantissa1:32; |
173 |
#else |
174 |
# if UAE_FLOAT_WORD_ORDER == UAE_BIG_ENDIAN |
175 |
unsigned int mantissa0:19; |
176 |
unsigned int quiet_nan:1; |
177 |
unsigned int exponent:11; |
178 |
unsigned int negative:1; |
179 |
unsigned int mantissa1:32; |
180 |
# else |
181 |
/* Together these comprise the mantissa. */ |
182 |
unsigned int mantissa1:32; |
183 |
unsigned int mantissa0:19; |
184 |
unsigned int quiet_nan:1; |
185 |
unsigned int exponent:11; |
186 |
unsigned int negative:1; |
187 |
# endif |
188 |
#endif |
189 |
} ieee_nan; |
190 |
|
191 |
/* This format is used to extract the sign_exponent and mantissa parts only */ |
192 |
struct { |
193 |
#if UAE_FLOAT_WORD_ORDER == UAE_BIG_ENDIAN |
194 |
unsigned int msw:32; |
195 |
unsigned int lsw:32; |
196 |
#else |
197 |
unsigned int lsw:32; |
198 |
unsigned int msw:32; |
199 |
#endif |
200 |
} parts; |
201 |
}; |
202 |
|
203 |
#ifdef USE_LONG_DOUBLE |
204 |
// IEEE-854 long double format |
205 |
union fpu_extended_shape { |
206 |
fpu_extended value; |
207 |
|
208 |
/* This is the IEEE 854 double-extended-precision format. */ |
209 |
struct { |
210 |
#if UAE_BYTE_ORDER == UAE_BIG_ENDIAN |
211 |
unsigned int negative:1; |
212 |
unsigned int exponent:15; |
213 |
unsigned int empty:16; |
214 |
unsigned int mantissa0:32; |
215 |
unsigned int mantissa1:32; |
216 |
#endif |
217 |
#if UAE_BYTE_ORDER == UAE_LITTLE_ENDIAN |
218 |
# if UAE_FLOAT_WORD_ORDER == UAE_BIG_ENDIAN |
219 |
unsigned int exponent:15; |
220 |
unsigned int negative:1; |
221 |
unsigned int empty:16; |
222 |
unsigned int mantissa0:32; |
223 |
unsigned int mantissa1:32; |
224 |
# else |
225 |
unsigned int mantissa1:32; |
226 |
unsigned int mantissa0:32; |
227 |
unsigned int exponent:15; |
228 |
unsigned int negative:1; |
229 |
unsigned int empty:16; |
230 |
# endif |
231 |
#endif |
232 |
} ieee; |
233 |
|
234 |
/* This is for NaNs in the IEEE 854 double-extended-precision format. */ |
235 |
struct { |
236 |
#if UAE_BYTE_ORDER == UAE_BIG_ENDIAN |
237 |
unsigned int negative:1; |
238 |
unsigned int exponent:15; |
239 |
unsigned int empty:16; |
240 |
unsigned int one:1; |
241 |
unsigned int quiet_nan:1; |
242 |
unsigned int mantissa0:30; |
243 |
unsigned int mantissa1:32; |
244 |
#endif |
245 |
#if UAE_BYTE_ORDER == UAE_LITTLE_ENDIAN |
246 |
# if UAE_FLOAT_WORD_ORDER == UAE_BIG_ENDIAN |
247 |
unsigned int exponent:15; |
248 |
unsigned int negative:1; |
249 |
unsigned int empty:16; |
250 |
unsigned int mantissa0:30; |
251 |
unsigned int quiet_nan:1; |
252 |
unsigned int one:1; |
253 |
unsigned int mantissa1:32; |
254 |
# else |
255 |
unsigned int mantissa1:32; |
256 |
unsigned int mantissa0:30; |
257 |
unsigned int quiet_nan:1; |
258 |
unsigned int one:1; |
259 |
unsigned int exponent:15; |
260 |
unsigned int negative:1; |
261 |
unsigned int empty:16; |
262 |
# endif |
263 |
#endif |
264 |
} ieee_nan; |
265 |
|
266 |
/* This format is used to extract the sign_exponent and mantissa parts only */ |
267 |
struct { |
268 |
#if UAE_FLOAT_WORD_ORDER == UAE_BIG_ENDIAN |
269 |
unsigned int sign_exponent:16; |
270 |
unsigned int empty:16; |
271 |
unsigned int msw:32; |
272 |
unsigned int lsw:32; |
273 |
#else |
274 |
unsigned int lsw:32; |
275 |
unsigned int msw:32; |
276 |
unsigned int sign_exponent:16; |
277 |
unsigned int empty:16; |
278 |
#endif |
279 |
} parts; |
280 |
}; |
281 |
#endif |
282 |
|
283 |
#ifdef USE_QUAD_DOUBLE |
284 |
// IEEE-854 quad double format |
285 |
union fpu_extended_shape { |
286 |
fpu_extended value; |
287 |
|
288 |
/* This is the IEEE 854 quad-precision format. */ |
289 |
struct { |
290 |
#if UAE_BYTE_ORDER == UAE_BIG_ENDIAN |
291 |
unsigned int negative:1; |
292 |
unsigned int exponent:15; |
293 |
unsigned int mantissa0:16; |
294 |
unsigned int mantissa1:32; |
295 |
unsigned int mantissa2:32; |
296 |
unsigned int mantissa3:32; |
297 |
#else |
298 |
unsigned int mantissa3:32; |
299 |
unsigned int mantissa2:32; |
300 |
unsigned int mantissa1:32; |
301 |
unsigned int mantissa0:16; |
302 |
unsigned int exponent:15; |
303 |
unsigned int negative:1; |
304 |
#endif |
305 |
} ieee; |
306 |
|
307 |
/* This is for NaNs in the IEEE 854 quad-precision format. */ |
308 |
struct { |
309 |
#if UAE_BYTE_ORDER == UAE_BIG_ENDIAN |
310 |
unsigned int negative:1; |
311 |
unsigned int exponent:15; |
312 |
unsigned int quiet_nan:1; |
313 |
unsigned int mantissa0:15; |
314 |
unsigned int mantissa1:30; |
315 |
unsigned int mantissa2:32; |
316 |
unsigned int mantissa3:32; |
317 |
#else |
318 |
unsigned int mantissa3:32; |
319 |
unsigned int mantissa2:32; |
320 |
unsigned int mantissa1:32; |
321 |
unsigned int mantissa0:15; |
322 |
unsigned int quiet_nan:1; |
323 |
unsigned int exponent:15; |
324 |
unsigned int negative:1; |
325 |
#endif |
326 |
} ieee_nan; |
327 |
|
328 |
/* This format is used to extract the sign_exponent and mantissa parts only */ |
329 |
#if UAE_FLOAT_WORD_ORDER == UAE_BIG_ENDIAN |
330 |
struct { |
331 |
uae_u64 msw; |
332 |
uae_u64 lsw; |
333 |
} parts64; |
334 |
struct { |
335 |
uae_u32 w0; |
336 |
uae_u32 w1; |
337 |
uae_u32 w2; |
338 |
uae_u32 w3; |
339 |
} parts32; |
340 |
#else |
341 |
struct { |
342 |
uae_u64 lsw; |
343 |
uae_u64 msw; |
344 |
} parts64; |
345 |
struct { |
346 |
uae_u32 w3; |
347 |
uae_u32 w2; |
348 |
uae_u32 w1; |
349 |
uae_u32 w0; |
350 |
} parts32; |
351 |
#endif |
352 |
}; |
353 |
#endif |
354 |
|
355 |
// Declare and initialize a pointer to a shape of the requested FP type |
356 |
#define fp_declare_init_shape(psvar, rfvar, ftype) \ |
357 |
fpu_ ## ftype ## _shape * psvar = (fpu_ ## ftype ## _shape *)( &rfvar ) |
358 |
|
359 |
/* -------------------------------------------------------------------------- */ |
360 |
/* --- Extra Math Functions --- */ |
361 |
/* --- (most of them had to be defined before including <fpu/flags.h>) --- */ |
362 |
/* -------------------------------------------------------------------------- */ |
363 |
|
364 |
#undef isnan |
365 |
#if 0 && defined(HAVE_ISNANL) |
366 |
# define isnan(x) isnanl((x)) |
367 |
#else |
368 |
# define isnan(x) fp_do_isnan((x)) |
369 |
#endif |
370 |
|
371 |
PRIVATE inline bool FFPU fp_do_isnan(fpu_register const & r) |
372 |
{ |
373 |
#ifdef BRANCHES_ARE_EXPENSIVE |
374 |
#ifndef USE_LONG_DOUBLE |
375 |
fp_declare_init_shape(sxp, r, double); |
376 |
uae_s32 hx = sxp->parts.msw; |
377 |
uae_s32 lx = sxp->parts.lsw; |
378 |
hx &= 0x7fffffff; |
379 |
hx |= (uae_u32)(lx | (-lx)) >> 31; |
380 |
hx = 0x7ff00000 - hx; |
381 |
return (int)(((uae_u32)hx) >> 31); |
382 |
#elif USE_QUAD_DOUBLE |
383 |
fp_declare_init_shape(sxp, r, extended); |
384 |
uae_s64 hx = sxp->parts64.msw; |
385 |
uae_s64 lx = sxp->parts64.lsw; |
386 |
hx &= 0x7fffffffffffffffLL; |
387 |
hx |= (uae_u64)(lx | (-lx)) >> 63; |
388 |
hx = 0x7fff000000000000LL - hx; |
389 |
return (int)((uae_u64)hx >> 63); |
390 |
#else |
391 |
fp_declare_init_shape(sxp, r, extended); |
392 |
uae_s32 se = sxp->parts.sign_exponent; |
393 |
uae_s32 hx = sxp->parts.msw; |
394 |
uae_s32 lx = sxp->parts.lsw; |
395 |
se = (se & 0x7fff) << 1; |
396 |
lx |= hx & 0x7fffffff; |
397 |
se |= (uae_u32)(lx | (-lx)) >> 31; |
398 |
se = 0xfffe - se; |
399 |
// TODO: check whether rshift count is 16 or 31 |
400 |
return (int)(((uae_u32)(se)) >> 16); |
401 |
#endif |
402 |
#else |
403 |
#ifndef USE_LONG_DOUBLE |
404 |
fp_declare_init_shape(sxp, r, double); |
405 |
return (sxp->ieee_nan.exponent == FP_DOUBLE_EXP_MAX) |
406 |
#else |
407 |
fp_declare_init_shape(sxp, r, extended); |
408 |
return (sxp->ieee_nan.exponent == FP_EXTENDED_EXP_MAX) |
409 |
#endif |
410 |
&& (sxp->ieee_nan.mantissa0 != 0) |
411 |
&& (sxp->ieee_nan.mantissa1 != 0) |
412 |
#ifdef USE_QUAD_DOUBLE |
413 |
&& (sxp->ieee_nan.mantissa2 != 0) |
414 |
&& (sxp->ieee_nan.mantissa3 != 0) |
415 |
#endif |
416 |
; |
417 |
#endif |
418 |
} |
419 |
|
420 |
#undef isinf |
421 |
#if 0 && defined(HAVE_ISINFL) |
422 |
# define isinf(x) isinfl((x)) |
423 |
#else |
424 |
# define isinf(x) fp_do_isinf((x)) |
425 |
#endif |
426 |
|
427 |
PRIVATE inline bool FFPU fp_do_isinf(fpu_register const & r) |
428 |
{ |
429 |
#ifdef BRANCHES_ARE_EXPENSIVE |
430 |
#ifndef USE_LONG_DOUBLE |
431 |
fp_declare_init_shape(sxp, r, double); |
432 |
uae_s32 hx = sxp->parts.msw; |
433 |
uae_s32 lx = sxp->parts.lsw; |
434 |
lx |= (hx & 0x7fffffff) ^ 0x7ff00000; |
435 |
lx |= -lx; |
436 |
return ~(lx >> 31) & (hx >> 30); |
437 |
#elif USE_QUAD_DOUBLE |
438 |
fp_declare_init_shape(sxp, r, extended); |
439 |
uae_s64 hx = sxp->parts64.msw; |
440 |
uae_s64 lx = sxp->parts64.lsw; |
441 |
lx |= (hx & 0x7fffffffffffffffLL) ^ 0x7fff000000000000LL; |
442 |
lx |= -lx; |
443 |
return ~(lx >> 63) & (hx >> 62); |
444 |
#else |
445 |
fp_declare_init_shape(sxp, r, extended); |
446 |
uae_s32 se = sxp->parts.sign_exponent; |
447 |
uae_s32 hx = sxp->parts.msw; |
448 |
uae_s32 lx = sxp->parts.lsw; |
449 |
/* This additional ^ 0x80000000 is necessary because in Intel's |
450 |
internal representation of the implicit one is explicit. |
451 |
NOTE: anyway, this is equivalent to & 0x7fffffff in that case. */ |
452 |
#ifdef __i386__ |
453 |
lx |= (hx ^ 0x80000000) | ((se & 0x7fff) ^ 0x7fff); |
454 |
#else |
455 |
lx |= (hx & 0x7fffffff) | ((se & 0x7fff) ^ 0x7fff); |
456 |
#endif |
457 |
lx |= -lx; |
458 |
se &= 0x8000; |
459 |
return ~(lx >> 31) & (1 - (se >> 14)); |
460 |
#endif |
461 |
#else |
462 |
#ifndef USE_LONG_DOUBLE |
463 |
fp_declare_init_shape(sxp, r, double); |
464 |
return (sxp->ieee_nan.exponent == FP_DOUBLE_EXP_MAX) |
465 |
#else |
466 |
fp_declare_init_shape(sxp, r, extended); |
467 |
return (sxp->ieee_nan.exponent == FP_EXTENDED_EXP_MAX) |
468 |
#endif |
469 |
&& (sxp->ieee_nan.mantissa0 == 0) |
470 |
&& (sxp->ieee_nan.mantissa1 == 0) |
471 |
#ifdef USE_QUAD_DOUBLE |
472 |
&& (sxp->ieee_nan.mantissa2 == 0) |
473 |
&& (sxp->ieee_nan.mantissa3 == 0) |
474 |
#endif |
475 |
; |
476 |
#endif |
477 |
} |
478 |
|
479 |
#undef isneg |
480 |
#define isneg(x) fp_do_isneg((x)) |
481 |
|
482 |
PRIVATE inline bool FFPU fp_do_isneg(fpu_register const & r) |
483 |
{ |
484 |
#ifndef USE_LONG_DOUBLE |
485 |
fp_declare_init_shape(sxp, r, double); |
486 |
#else |
487 |
fp_declare_init_shape(sxp, r, extended); |
488 |
#endif |
489 |
return sxp->ieee.negative; |
490 |
} |
491 |
|
492 |
#undef iszero |
493 |
#define iszero(x) fp_do_iszero((x)) |
494 |
|
495 |
PRIVATE inline bool FFPU fp_do_iszero(fpu_register const & r) |
496 |
{ |
497 |
// TODO: BRANCHES_ARE_EXPENSIVE |
498 |
#ifndef USE_LONG_DOUBLE |
499 |
fp_declare_init_shape(sxp, r, double); |
500 |
#else |
501 |
fp_declare_init_shape(sxp, r, extended); |
502 |
#endif |
503 |
return (sxp->ieee.exponent == 0) |
504 |
&& (sxp->ieee.mantissa0 == 0) |
505 |
&& (sxp->ieee.mantissa1 == 0) |
506 |
#ifdef USE_QUAD_DOUBLE |
507 |
&& (sxp->ieee.mantissa2 == 0) |
508 |
&& (sxp->ieee.mantissa3 == 0) |
509 |
#endif |
510 |
; |
511 |
} |
512 |
|
513 |
PRIVATE inline void FFPU get_dest_flags(fpu_register const & r) |
514 |
{ |
515 |
fl_dest.negative = isneg(r); |
516 |
fl_dest.zero = iszero(r); |
517 |
fl_dest.infinity = isinf(r); |
518 |
fl_dest.nan = isnan(r); |
519 |
fl_dest.in_range = !fl_dest.zero && !fl_dest.infinity && !fl_dest.nan; |
520 |
} |
521 |
|
522 |
PRIVATE inline void FFPU get_source_flags(fpu_register const & r) |
523 |
{ |
524 |
fl_source.negative = isneg(r); |
525 |
fl_source.zero = iszero(r); |
526 |
fl_source.infinity = isinf(r); |
527 |
fl_source.nan = isnan(r); |
528 |
fl_source.in_range = !fl_source.zero && !fl_source.infinity && !fl_source.nan; |
529 |
} |
530 |
|
531 |
PRIVATE inline void FFPU make_nan(fpu_register & r) |
532 |
{ |
533 |
// FIXME: is that correct ? |
534 |
#ifndef USE_LONG_DOUBLE |
535 |
fp_declare_init_shape(sxp, r, double); |
536 |
sxp->ieee.exponent = FP_DOUBLE_EXP_MAX; |
537 |
sxp->ieee.mantissa0 = 0xfffff; |
538 |
#else |
539 |
fp_declare_init_shape(sxp, r, extended); |
540 |
sxp->ieee.exponent = FP_EXTENDED_EXP_MAX; |
541 |
sxp->ieee.mantissa0 = 0xffffffff; |
542 |
#endif |
543 |
sxp->ieee.mantissa1 = 0xffffffff; |
544 |
#ifdef USE_QUAD_DOUBLE |
545 |
sxp->ieee.mantissa2 = 0xffffffff; |
546 |
sxp->ieee.mantissa3 = 0xffffffff; |
547 |
#endif |
548 |
} |
549 |
|
550 |
PRIVATE inline void FFPU make_zero_positive(fpu_register & r) |
551 |
{ |
552 |
#if 1 |
553 |
r = +0.0; |
554 |
#else |
555 |
#ifndef USE_LONG_DOUBLE |
556 |
fp_declare_init_shape(sxp, r, double); |
557 |
#else |
558 |
fp_declare_init_shape(sxp, r, extended); |
559 |
#endif |
560 |
sxp->ieee.negative = 0; |
561 |
sxp->ieee.exponent = 0; |
562 |
sxp->ieee.mantissa0 = 0; |
563 |
sxp->ieee.mantissa1 = 0; |
564 |
#ifdef USE_QUAD_DOUBLE |
565 |
sxp->ieee.mantissa2 = 0; |
566 |
sxp->ieee.mantissa3 = 0; |
567 |
#endif |
568 |
#endif |
569 |
} |
570 |
|
571 |
PRIVATE inline void FFPU make_zero_negative(fpu_register & r) |
572 |
{ |
573 |
#if 1 |
574 |
r = -0.0; |
575 |
#else |
576 |
#ifndef USE_LONG_DOUBLE |
577 |
fp_declare_init_shape(sxp, r, double); |
578 |
#else |
579 |
fp_declare_init_shape(sxp, r, extended); |
580 |
#endif |
581 |
sxp->ieee.negative = 1; |
582 |
sxp->ieee.exponent = 0; |
583 |
sxp->ieee.mantissa0 = 0; |
584 |
sxp->ieee.mantissa1 = 0; |
585 |
#ifdef USE_QUAD_DOUBLE |
586 |
sxp->ieee.mantissa2 = 0; |
587 |
sxp->ieee.mantissa3 = 0; |
588 |
#endif |
589 |
#endif |
590 |
} |
591 |
|
592 |
PRIVATE inline void FFPU make_inf_positive(fpu_register & r) |
593 |
{ |
594 |
#ifndef USE_LONG_DOUBLE |
595 |
fp_declare_init_shape(sxp, r, double); |
596 |
sxp->ieee_nan.exponent = FP_DOUBLE_EXP_MAX; |
597 |
#else |
598 |
fp_declare_init_shape(sxp, r, extended); |
599 |
sxp->ieee_nan.exponent = FP_EXTENDED_EXP_MAX; |
600 |
#endif |
601 |
sxp->ieee_nan.negative = 0; |
602 |
sxp->ieee_nan.mantissa0 = 0; |
603 |
sxp->ieee_nan.mantissa1 = 0; |
604 |
#ifdef USE_QUAD_DOUBLE |
605 |
sxp->ieee_nan.mantissa2 = 0; |
606 |
sxp->ieee_nan.mantissa3 = 0; |
607 |
#endif |
608 |
} |
609 |
|
610 |
PRIVATE inline void FFPU make_inf_negative(fpu_register & r) |
611 |
{ |
612 |
#ifndef USE_LONG_DOUBLE |
613 |
fp_declare_init_shape(sxp, r, double); |
614 |
sxp->ieee_nan.exponent = FP_DOUBLE_EXP_MAX; |
615 |
#else |
616 |
fp_declare_init_shape(sxp, r, extended); |
617 |
sxp->ieee_nan.exponent = FP_EXTENDED_EXP_MAX; |
618 |
#endif |
619 |
sxp->ieee_nan.negative = 1; |
620 |
sxp->ieee_nan.mantissa0 = 0; |
621 |
sxp->ieee_nan.mantissa1 = 0; |
622 |
#ifdef USE_QUAD_DOUBLE |
623 |
sxp->ieee_nan.mantissa2 = 0; |
624 |
sxp->ieee_nan.mantissa3 = 0; |
625 |
#endif |
626 |
} |
627 |
|
628 |
PRIVATE inline fpu_register FFPU fast_fgetexp(fpu_register const & r) |
629 |
{ |
630 |
#ifndef USE_LONG_DOUBLE |
631 |
fp_declare_init_shape(sxp, r, double); |
632 |
return (sxp->ieee.exponent - FP_DOUBLE_EXP_BIAS); |
633 |
#else |
634 |
fp_declare_init_shape(sxp, r, extended); |
635 |
return (sxp->ieee.exponent - FP_EXTENDED_EXP_BIAS); |
636 |
#endif |
637 |
} |
638 |
|
639 |
// Normalize to range 1..2 |
640 |
PRIVATE inline void FFPU fast_remove_exponent(fpu_register & r) |
641 |
{ |
642 |
#ifndef USE_LONG_DOUBLE |
643 |
fp_declare_init_shape(sxp, r, double); |
644 |
sxp->ieee.exponent = FP_DOUBLE_EXP_BIAS; |
645 |
#else |
646 |
fp_declare_init_shape(sxp, r, extended); |
647 |
sxp->ieee.exponent = FP_EXTENDED_EXP_BIAS; |
648 |
#endif |
649 |
} |
650 |
|
651 |
// The sign of the quotient is the exclusive-OR of the sign bits |
652 |
// of the source and destination operands. |
653 |
PRIVATE inline uae_u32 FFPU get_quotient_sign(fpu_register const & ra, fpu_register const & rb) |
654 |
{ |
655 |
#ifndef USE_LONG_DOUBLE |
656 |
fp_declare_init_shape(sap, ra, double); |
657 |
fp_declare_init_shape(sbp, rb, double); |
658 |
#else |
659 |
fp_declare_init_shape(sap, ra, extended); |
660 |
fp_declare_init_shape(sbp, rb, extended); |
661 |
#endif |
662 |
return ((sap->ieee.negative ^ sbp->ieee.negative) ? FPSR_QUOTIENT_SIGN : 0); |
663 |
} |
664 |
|
665 |
/* -------------------------------------------------------------------------- */ |
666 |
/* --- Math functions --- */ |
667 |
/* -------------------------------------------------------------------------- */ |
668 |
|
669 |
#if FPU_USE_ISO_C99 && USE_LONG_DOUBLE |
670 |
# define fp_log logl |
671 |
# define fp_log10 log10l |
672 |
# define fp_exp expl |
673 |
# define fp_pow powl |
674 |
# define fp_fabs fabsl |
675 |
# define fp_sqrt sqrtl |
676 |
# define fp_sin sinl |
677 |
# define fp_cos cosl |
678 |
# define fp_tan tanl |
679 |
# define fp_sinh sinhl |
680 |
# define fp_cosh coshl |
681 |
# define fp_tanh tanhl |
682 |
# define fp_asin asinl |
683 |
# define fp_acos acosl |
684 |
# define fp_atan atanl |
685 |
# define fp_asinh asinhl |
686 |
# define fp_acosh acoshl |
687 |
# define fp_atanh atanhl |
688 |
# define fp_floor floorl |
689 |
# define fp_ceil ceill |
690 |
#else |
691 |
# define fp_log log |
692 |
# define fp_log10 log10 |
693 |
# define fp_exp exp |
694 |
# define fp_pow pow |
695 |
# define fp_fabs fabs |
696 |
# define fp_sqrt sqrt |
697 |
# define fp_sin sin |
698 |
# define fp_cos cos |
699 |
# define fp_tan tan |
700 |
# define fp_sinh sinh |
701 |
# define fp_cosh cosh |
702 |
# define fp_tanh tanh |
703 |
# define fp_asin asin |
704 |
# define fp_acos acos |
705 |
# define fp_atan atan |
706 |
# define fp_asinh asinh |
707 |
# define fp_acosh acosh |
708 |
# define fp_atanh atanh |
709 |
# define fp_floor floor |
710 |
# define fp_ceil ceil |
711 |
#endif |
712 |
|
713 |
#if defined(FPU_IEEE) && defined(X86_ASSEMBLY) |
714 |
// Assembly optimized support functions. Taken from glibc 2.2.2 |
715 |
|
716 |
#undef fp_log |
717 |
#define fp_log fp_do_log |
718 |
|
719 |
#ifndef FPU_FAST_MATH |
720 |
// FIXME: unimplemented |
721 |
PRIVATE fpu_extended fp_do_log(fpu_extended x); |
722 |
#else |
723 |
PRIVATE inline fpu_extended fp_do_log(fpu_extended x) |
724 |
{ |
725 |
fpu_extended value; |
726 |
__asm__ __volatile__("fldln2; fxch; fyl2x" : "=t" (value) : "0" (x) : "st(1)"); |
727 |
return value; |
728 |
} |
729 |
#endif |
730 |
|
731 |
#undef fp_log10 |
732 |
#define fp_log10 fp_do_log10 |
733 |
|
734 |
#ifndef FPU_FAST_MATH |
735 |
// FIXME: unimplemented |
736 |
PRIVATE fpu_extended fp_do_log10(fpu_extended x); |
737 |
#else |
738 |
PRIVATE inline fpu_extended fp_do_log10(fpu_extended x) |
739 |
{ |
740 |
fpu_extended value; |
741 |
__asm__ __volatile__("fldlg2; fxch; fyl2x" : "=t" (value) : "0" (x) : "st(1)"); |
742 |
return value; |
743 |
} |
744 |
#endif |
745 |
|
746 |
#undef fp_exp |
747 |
#define fp_exp fp_do_exp |
748 |
|
749 |
#ifndef FPU_FAST_MATH |
750 |
// FIXME: unimplemented |
751 |
PRIVATE fpu_extended fp_do_exp(fpu_extended x); |
752 |
#else |
753 |
PRIVATE inline fpu_extended fp_do_exp(fpu_extended x) |
754 |
{ |
755 |
fpu_extended value, exponent; |
756 |
__asm__ __volatile__("fldl2e # e^x = 2^(x * log2(e))\n\t" |
757 |
"fmul %%st(1) # x * log2(e)\n\t" |
758 |
"fst %%st(1)\n\t" |
759 |
"frndint # int(x * log2(e))\n\t" |
760 |
"fxch\n\t" |
761 |
"fsub %%st(1) # fract(x * log2(e))\n\t" |
762 |
"f2xm1 # 2^(fract(x * log2(e))) - 1\n\t" |
763 |
: "=t" (value), "=u" (exponent) : "0" (x)); |
764 |
value += 1.0; |
765 |
__asm__ __volatile__("fscale" : "=t" (value) : "0" (value), "u" (exponent)); |
766 |
return value; |
767 |
} |
768 |
#endif |
769 |
|
770 |
#undef fp_pow |
771 |
#define fp_pow fp_do_pow |
772 |
|
773 |
PRIVATE fpu_extended fp_do_pow(fpu_extended x, fpu_extended y); |
774 |
|
775 |
#undef fp_fabs |
776 |
#define fp_fabs fp_do_fabs |
777 |
|
778 |
PRIVATE inline fpu_extended fp_do_fabs(fpu_extended x) |
779 |
{ |
780 |
fpu_extended value; |
781 |
__asm__ __volatile__("fabs" : "=t" (value) : "0" (x)); |
782 |
return value; |
783 |
} |
784 |
|
785 |
#undef fp_sqrt |
786 |
#define fp_sqrt fp_do_sqrt |
787 |
|
788 |
PRIVATE inline fpu_extended fp_do_sqrt(fpu_extended x) |
789 |
{ |
790 |
fpu_extended value; |
791 |
__asm__ __volatile__("fsqrt" : "=t" (value) : "0" (x)); |
792 |
return value; |
793 |
} |
794 |
|
795 |
#undef fp_sin |
796 |
#define fp_sin fp_do_sin |
797 |
|
798 |
PRIVATE inline fpu_extended fp_do_sin(fpu_extended x) |
799 |
{ |
800 |
fpu_extended value; |
801 |
__asm__ __volatile__("fsin" : "=t" (value) : "0" (x)); |
802 |
return value; |
803 |
} |
804 |
|
805 |
#undef fp_cos |
806 |
#define fp_cos fp_do_cos |
807 |
|
808 |
PRIVATE inline fpu_extended fp_do_cos(fpu_extended x) |
809 |
{ |
810 |
fpu_extended value; |
811 |
__asm__ __volatile__("fcos" : "=t" (value) : "0" (x)); |
812 |
return value; |
813 |
} |
814 |
|
815 |
#undef fp_tan |
816 |
#define fp_tan fp_do_tan |
817 |
|
818 |
PRIVATE inline fpu_extended fp_do_tan(fpu_extended x) |
819 |
{ |
820 |
fpu_extended value; |
821 |
__asm__ __volatile__("fptan" : "=t" (value) : "0" (x)); |
822 |
return value; |
823 |
} |
824 |
|
825 |
#undef fp_expm1 |
826 |
#define fp_expm1 fp_do_expm1 |
827 |
|
828 |
// Returns: exp(X) - 1.0 |
829 |
PRIVATE inline fpu_extended fp_do_expm1(fpu_extended x) |
830 |
{ |
831 |
fpu_extended value, exponent, temp; |
832 |
__asm__ __volatile__("fldl2e # e^x - 1 = 2^(x * log2(e)) - 1\n\t" |
833 |
"fmul %%st(1) # x * log2(e)\n\t" |
834 |
"fst %%st(1)\n\t" |
835 |
"frndint # int(x * log2(e))\n\t" |
836 |
"fxch\n\t" |
837 |
"fsub %%st(1) # fract(x * log2(e))\n\t" |
838 |
"f2xm1 # 2^(fract(x * log2(e))) - 1\n\t" |
839 |
"fscale # 2^(x * log2(e)) - 2^(int(x * log2(e)))\n\t" |
840 |
: "=t" (value), "=u" (exponent) : "0" (x)); |
841 |
__asm__ __volatile__("fscale" : "=t" (temp) : "0" (1.0), "u" (exponent)); |
842 |
temp -= 1.0; |
843 |
return temp + value ? temp + value : x; |
844 |
} |
845 |
|
846 |
#undef fp_sgn1 |
847 |
#define fp_sgn1 fp_do_sgn1 |
848 |
|
849 |
PRIVATE inline fpu_extended fp_do_sgn1(fpu_extended x) |
850 |
{ |
851 |
fp_declare_init_shape(sxp, x, extended); |
852 |
sxp->ieee_nan.exponent = FP_EXTENDED_EXP_MAX; |
853 |
sxp->ieee_nan.one = 1; |
854 |
sxp->ieee_nan.quiet_nan = 0; |
855 |
sxp->ieee_nan.mantissa0 = 0; |
856 |
sxp->ieee_nan.mantissa1 = 0; |
857 |
return x; |
858 |
} |
859 |
|
860 |
#undef fp_sinh |
861 |
#define fp_sinh fp_do_sinh |
862 |
|
863 |
#ifndef FPU_FAST_MATH |
864 |
// FIXME: unimplemented |
865 |
PRIVATE fpu_extended fp_do_sinh(fpu_extended x); |
866 |
#else |
867 |
PRIVATE inline fpu_extended fp_do_sinh(fpu_extended x) |
868 |
{ |
869 |
fpu_extended exm1 = fp_expm1(fp_fabs(x)); |
870 |
return 0.5 * (exm1 / (exm1 + 1.0) + exm1) * fp_sgn1(x); |
871 |
} |
872 |
#endif |
873 |
|
874 |
#undef fp_cosh |
875 |
#define fp_cosh fp_do_cosh |
876 |
|
877 |
#ifndef FPU_FAST_MATH |
878 |
// FIXME: unimplemented |
879 |
PRIVATE fpu_extended fp_do_cosh(fpu_extended x); |
880 |
#else |
881 |
PRIVATE inline fpu_extended fp_do_cosh(fpu_extended x) |
882 |
{ |
883 |
fpu_extended ex = fp_exp(x); |
884 |
return 0.5 * (ex + 1.0 / ex); |
885 |
} |
886 |
#endif |
887 |
|
888 |
#undef fp_tanh |
889 |
#define fp_tanh fp_do_tanh |
890 |
|
891 |
#ifndef FPU_FAST_MATH |
892 |
// FIXME: unimplemented |
893 |
PRIVATE fpu_extended fp_do_tanh(fpu_extended x); |
894 |
#else |
895 |
PRIVATE inline fpu_extended fp_do_tanh(fpu_extended x) |
896 |
{ |
897 |
fpu_extended exm1 = fp_expm1(-fp_fabs(x + x)); |
898 |
return exm1 / (exm1 + 2.0) * fp_sgn1(-x); |
899 |
} |
900 |
#endif |
901 |
|
902 |
#undef fp_atan2 |
903 |
#define fp_atan2 fp_do_atan2 |
904 |
|
905 |
PRIVATE inline fpu_extended fp_do_atan2(fpu_extended y, fpu_extended x) |
906 |
{ |
907 |
fpu_extended value; |
908 |
__asm__ __volatile__("fpatan" : "=t" (value) : "0" (x), "u" (y) : "st(1)"); |
909 |
return value; |
910 |
} |
911 |
|
912 |
#undef fp_asin |
913 |
#define fp_asin fp_do_asin |
914 |
|
915 |
PRIVATE inline fpu_extended fp_do_asin(fpu_extended x) |
916 |
{ |
917 |
return fp_atan2(x, fp_sqrt(1.0 - x * x)); |
918 |
} |
919 |
|
920 |
#undef fp_acos |
921 |
#define fp_acos fp_do_acos |
922 |
|
923 |
PRIVATE inline fpu_extended fp_do_acos(fpu_extended x) |
924 |
{ |
925 |
return fp_atan2(fp_sqrt(1.0 - x * x), x); |
926 |
} |
927 |
|
928 |
#undef fp_atan |
929 |
#define fp_atan fp_do_atan |
930 |
|
931 |
PRIVATE inline fpu_extended fp_do_atan(fpu_extended x) |
932 |
{ |
933 |
fpu_extended value; |
934 |
__asm__ __volatile__("fld1; fpatan" : "=t" (value) : "0" (x) : "st(1)"); |
935 |
return value; |
936 |
} |
937 |
|
938 |
#undef fp_log1p |
939 |
#define fp_log1p fp_do_log1p |
940 |
|
941 |
// Returns: ln(1.0 + X) |
942 |
PRIVATE fpu_extended fp_do_log1p(fpu_extended x); |
943 |
|
944 |
#undef fp_asinh |
945 |
#define fp_asinh fp_do_asinh |
946 |
|
947 |
PRIVATE inline fpu_extended fp_do_asinh(fpu_extended x) |
948 |
{ |
949 |
fpu_extended y = fp_fabs(x); |
950 |
return (fp_log1p(y * y / (fp_sqrt(y * y + 1.0) + 1.0) + y) * fp_sgn1(x)); |
951 |
} |
952 |
|
953 |
#undef fp_acosh |
954 |
#define fp_acosh fp_do_acosh |
955 |
|
956 |
PRIVATE inline fpu_extended fp_do_acosh(fpu_extended x) |
957 |
{ |
958 |
return fp_log(x + fp_sqrt(x - 1.0) * fp_sqrt(x + 1.0)); |
959 |
} |
960 |
|
961 |
#undef fp_atanh |
962 |
#define fp_atanh fp_do_atanh |
963 |
|
964 |
PRIVATE inline fpu_extended fp_do_atanh(fpu_extended x) |
965 |
{ |
966 |
fpu_extended y = fp_fabs(x); |
967 |
return -0.5 * fp_log1p(-(y + y) / (1.0 + y)) * fp_sgn1(x); |
968 |
} |
969 |
|
970 |
#undef fp_floor |
971 |
#define fp_floor fp_do_floor |
972 |
|
973 |
PRIVATE inline fpu_extended fp_do_floor(fpu_extended x) |
974 |
{ |
975 |
volatile unsigned int cw; |
976 |
__asm__ __volatile__("fnstcw %0" : "=m" (cw)); |
977 |
volatile unsigned int cw_temp = (cw & 0xf3ff) | 0x0400; // rounding down |
978 |
__asm__ __volatile__("fldcw %0" : : "m" (cw_temp)); |
979 |
fpu_extended value; |
980 |
__asm__ __volatile__("frndint" : "=t" (value) : "0" (x)); |
981 |
__asm__ __volatile__("fldcw %0" : : "m" (cw)); |
982 |
return value; |
983 |
} |
984 |
|
985 |
#undef fp_ceil |
986 |
#define fp_ceil fp_do_ceil |
987 |
|
988 |
PRIVATE inline fpu_extended fp_do_ceil(fpu_extended x) |
989 |
{ |
990 |
volatile unsigned int cw; |
991 |
__asm__ __volatile__("fnstcw %0" : "=m" (cw)); |
992 |
volatile unsigned int cw_temp = (cw & 0xf3ff) | 0x0800; // rounding up |
993 |
__asm__ __volatile__("fldcw %0" : : "m" (cw_temp)); |
994 |
fpu_extended value; |
995 |
__asm__ __volatile__("frndint" : "=t" (value) : "0" (x)); |
996 |
__asm__ __volatile__("fldcw %0" : : "m" (cw)); |
997 |
return value; |
998 |
} |
999 |
|
1000 |
#define DEFINE_ROUND_FUNC(rounding_mode_str, rounding_mode) \ |
1001 |
PRIVATE inline fpu_extended fp_do_round_to_ ## rounding_mode_str(fpu_extended x) \ |
1002 |
{ \ |
1003 |
volatile unsigned int cw; \ |
1004 |
__asm__ __volatile__("fnstcw %0" : "=m" (cw)); \ |
1005 |
volatile unsigned int cw_temp = (cw & 0xf3ff) | (rounding_mode); \ |
1006 |
__asm__ __volatile__("fldcw %0" : : "m" (cw_temp)); \ |
1007 |
fpu_extended value; \ |
1008 |
__asm__ __volatile__("frndint" : "=t" (value) : "0" (x)); \ |
1009 |
__asm__ __volatile__("fldcw %0" : : "m" (cw)); \ |
1010 |
return value; \ |
1011 |
} |
1012 |
|
1013 |
#undef fp_round_to_minus_infinity |
1014 |
#define fp_round_to_minus_infinity fp_do_round_to_minus_infinity |
1015 |
|
1016 |
DEFINE_ROUND_FUNC(minus_infinity, 0x400) |
1017 |
|
1018 |
#undef fp_round_to_plus_infinity |
1019 |
#define fp_round_to_plus_infinity fp_do_round_to_plus_infinity |
1020 |
|
1021 |
DEFINE_ROUND_FUNC(plus_infinity, 0x800) |
1022 |
|
1023 |
#undef fp_round_to_zero |
1024 |
#define fp_round_to_zero fp_do_round_to_zero |
1025 |
|
1026 |
DEFINE_ROUND_FUNC(zero, 0xc00) |
1027 |
|
1028 |
#undef fp_round_to_nearest |
1029 |
#define fp_round_to_nearest fp_do_round_to_nearest |
1030 |
|
1031 |
DEFINE_ROUND_FUNC(nearest, 0x000) |
1032 |
|
1033 |
#endif /* X86_ASSEMBLY */ |
1034 |
|
1035 |
#ifndef fp_round_to_minus_infinity |
1036 |
#define fp_round_to_minus_infinity(x) fp_floor(x) |
1037 |
#endif |
1038 |
|
1039 |
#ifndef fp_round_to_plus_infinity |
1040 |
#define fp_round_to_plus_infinity(x) fp_ceil(x) |
1041 |
#endif |
1042 |
|
1043 |
#ifndef fp_round_to_zero |
1044 |
#define fp_round_to_zero(x) ((int)(x)) |
1045 |
#endif |
1046 |
|
1047 |
#ifndef fp_round_to_nearest |
1048 |
#define fp_round_to_nearest(x) ((int)((x) + 0.5)) |
1049 |
#endif |
1050 |
|
1051 |
#endif /* FPU_MATHLIB_H */ |