1 |
gbeauche |
1.1 |
/* |
2 |
|
|
* fpu/mathlib.h - Floating-point math support library |
3 |
|
|
* |
4 |
|
|
* Basilisk II (C) 1997-2001 Christian Bauer |
5 |
|
|
* |
6 |
|
|
* MC68881/68040 fpu emulation |
7 |
|
|
* |
8 |
|
|
* Original UAE FPU, copyright 1996 Herman ten Brugge |
9 |
|
|
* Rewrite for x86, copyright 1999-2001 Lauri Pesonen |
10 |
|
|
* New framework, copyright 2000-2001 Gwenole Beauchesne |
11 |
|
|
* Adapted for JIT compilation (c) Bernd Meyer, 2000-2001 |
12 |
|
|
* |
13 |
|
|
* This program is free software; you can redistribute it and/or modify |
14 |
|
|
* it under the terms of the GNU General Public License as published by |
15 |
|
|
* the Free Software Foundation; either version 2 of the License, or |
16 |
|
|
* (at your option) any later version. |
17 |
|
|
* |
18 |
|
|
* This program is distributed in the hope that it will be useful, |
19 |
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
20 |
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
21 |
|
|
* GNU General Public License for more details. |
22 |
|
|
* |
23 |
|
|
* You should have received a copy of the GNU General Public License |
24 |
|
|
* along with this program; if not, write to the Free Software |
25 |
|
|
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
26 |
|
|
*/ |
27 |
|
|
|
28 |
|
|
#ifndef FPU_MATHLIB_H |
29 |
|
|
#define FPU_MATHLIB_H |
30 |
|
|
|
31 |
|
|
/* NOTE: this file shall be included only from fpu/fpu_*.cpp */ |
32 |
|
|
#undef PUBLIC |
33 |
|
|
#define PUBLIC extern |
34 |
|
|
|
35 |
|
|
#undef PRIVATE |
36 |
|
|
#define PRIVATE static |
37 |
|
|
|
38 |
|
|
#undef FFPU |
39 |
|
|
#define FFPU /**/ |
40 |
|
|
|
41 |
|
|
#undef FPU |
42 |
|
|
#define FPU fpu. |
43 |
|
|
|
44 |
|
|
// Define the following macro if branches are expensive. If so, |
45 |
|
|
// integer-based isnan() and isinf() functions are implemented. |
46 |
|
|
// TODO: move to Makefile.in |
47 |
|
|
#define BRANCHES_ARE_EXPENSIVE 1 |
48 |
|
|
|
49 |
|
|
// Use ISO C99 extended-precision math functions (glibc 2.1+) |
50 |
|
|
#define FPU_USE_ISO_C99 1 |
51 |
|
|
|
52 |
|
|
// NOTE: this is irrelevant on Win32 platforms since the MS libraries |
53 |
|
|
// don't support extended-precision floating-point computations |
54 |
|
|
#ifdef WIN32 |
55 |
|
|
#undef FPU_USE_ISO_C99 |
56 |
|
|
#endif |
57 |
|
|
|
58 |
|
|
// Use faster implementation of math functions, but this could cause |
59 |
|
|
// some incorrect results (?) |
60 |
|
|
// TODO: actually implement the slower but safer versions |
61 |
|
|
#define FPU_FAST_MATH 1 |
62 |
|
|
|
63 |
|
|
#if FPU_USE_ISO_C99 |
64 |
|
|
// NOTE: no prior <math.h> shall be included at this point |
65 |
|
|
#define __USE_ISOC99 1 // for glibc 2.2.X and newer |
66 |
|
|
#define __USE_ISOC9X 1 // for glibc 2.1.X |
67 |
|
|
#include <math.h> |
68 |
|
|
#else |
69 |
|
|
#include <cmath> |
70 |
|
|
using namespace std; |
71 |
|
|
#endif |
72 |
|
|
|
73 |
|
|
/* -------------------------------------------------------------------------- */ |
74 |
|
|
/* --- Floating-point register types --- */ |
75 |
|
|
/* -------------------------------------------------------------------------- */ |
76 |
|
|
|
77 |
|
|
// Single : S 8*E 23*F |
78 |
|
|
#define FP_SINGLE_EXP_MAX 0xff |
79 |
|
|
#define FP_SINGLE_EXP_BIAS 0x7f |
80 |
|
|
|
81 |
|
|
// Double : S 11*E 52*F |
82 |
|
|
#define FP_DOUBLE_EXP_MAX 0x7ff |
83 |
|
|
#define FP_DOUBLE_EXP_BIAS 0x3ff |
84 |
|
|
|
85 |
|
|
// Extended : S 15*E 64*F |
86 |
|
|
#define FP_EXTENDED_EXP_MAX 0x7fff |
87 |
|
|
#define FP_EXTENDED_EXP_BIAS 0x3fff |
88 |
|
|
|
89 |
|
|
// Zeroes : E = 0 & F = 0 |
90 |
|
|
// Infinities : E = MAX & F = 0 |
91 |
|
|
// Not-A-Number : E = MAX & F # 0 |
92 |
|
|
|
93 |
|
|
/* -------------------------------------------------------------------------- */ |
94 |
|
|
/* --- Floating-point type shapes (IEEE-compliant) --- */ |
95 |
|
|
/* -------------------------------------------------------------------------- */ |
96 |
|
|
|
97 |
|
|
// Taken from glibc 2.2.x: ieee754.h |
98 |
|
|
|
99 |
|
|
// IEEE-754 float format |
100 |
|
|
union fpu_single_shape { |
101 |
|
|
|
102 |
|
|
fpu_single value; |
103 |
|
|
|
104 |
|
|
/* This is the IEEE 754 single-precision format. */ |
105 |
|
|
struct { |
106 |
|
|
#if UAE_BYTE_ORDER == UAE_BIG_ENDIAN |
107 |
|
|
unsigned int negative:1; |
108 |
|
|
unsigned int exponent:8; |
109 |
|
|
unsigned int mantissa:23; |
110 |
|
|
#endif /* Big endian. */ |
111 |
|
|
#if UAE_BYTE_ORDER == UAE_LITTLE_ENDIAN |
112 |
|
|
unsigned int mantissa:23; |
113 |
|
|
unsigned int exponent:8; |
114 |
|
|
unsigned int negative:1; |
115 |
|
|
#endif /* Little endian. */ |
116 |
|
|
} ieee; |
117 |
|
|
|
118 |
|
|
/* This format makes it easier to see if a NaN is a signalling NaN. */ |
119 |
|
|
struct { |
120 |
|
|
#if UAE_BYTE_ORDER == UAE_BIG_ENDIAN |
121 |
|
|
unsigned int negative:1; |
122 |
|
|
unsigned int exponent:8; |
123 |
|
|
unsigned int quiet_nan:1; |
124 |
|
|
unsigned int mantissa:22; |
125 |
|
|
#endif /* Big endian. */ |
126 |
|
|
#if UAE_BYTE_ORDER == UAE_LITTLE_ENDIAN |
127 |
|
|
unsigned int mantissa:22; |
128 |
|
|
unsigned int quiet_nan:1; |
129 |
|
|
unsigned int exponent:8; |
130 |
|
|
unsigned int negative:1; |
131 |
|
|
#endif /* Little endian. */ |
132 |
|
|
} ieee_nan; |
133 |
|
|
}; |
134 |
|
|
|
135 |
|
|
// IEEE-754 double format |
136 |
|
|
union fpu_double_shape { |
137 |
|
|
fpu_double value; |
138 |
|
|
|
139 |
|
|
/* This is the IEEE 754 double-precision format. */ |
140 |
|
|
struct { |
141 |
|
|
#if UAE_BYTE_ORDER == UAE_BIG_ENDIAN |
142 |
|
|
unsigned int negative:1; |
143 |
|
|
unsigned int exponent:11; |
144 |
|
|
/* Together these comprise the mantissa. */ |
145 |
|
|
unsigned int mantissa0:20; |
146 |
|
|
unsigned int mantissa1:32; |
147 |
|
|
#endif /* Big endian. */ |
148 |
|
|
#if UAE_BYTE_ORDER == UAE_LITTLE_ENDIAN |
149 |
|
|
# if UAE_FLOAT_WORD_ORDER == UAE_BIG_ENDIAN |
150 |
|
|
unsigned int mantissa0:20; |
151 |
|
|
unsigned int exponent:11; |
152 |
|
|
unsigned int negative:1; |
153 |
|
|
unsigned int mantissa1:32; |
154 |
|
|
# else |
155 |
|
|
/* Together these comprise the mantissa. */ |
156 |
|
|
unsigned int mantissa1:32; |
157 |
|
|
unsigned int mantissa0:20; |
158 |
|
|
unsigned int exponent:11; |
159 |
|
|
unsigned int negative:1; |
160 |
|
|
# endif |
161 |
|
|
#endif /* Little endian. */ |
162 |
|
|
} ieee; |
163 |
|
|
|
164 |
|
|
/* This format makes it easier to see if a NaN is a signalling NaN. */ |
165 |
|
|
struct { |
166 |
|
|
#if UAE_BYTE_ORDER == UAE_BIG_ENDIAN |
167 |
|
|
unsigned int negative:1; |
168 |
|
|
unsigned int exponent:11; |
169 |
|
|
unsigned int quiet_nan:1; |
170 |
|
|
/* Together these comprise the mantissa. */ |
171 |
|
|
unsigned int mantissa0:19; |
172 |
|
|
unsigned int mantissa1:32; |
173 |
|
|
#else |
174 |
|
|
# if UAE_FLOAT_WORD_ORDER == UAE_BIG_ENDIAN |
175 |
|
|
unsigned int mantissa0:19; |
176 |
|
|
unsigned int quiet_nan:1; |
177 |
|
|
unsigned int exponent:11; |
178 |
|
|
unsigned int negative:1; |
179 |
|
|
unsigned int mantissa1:32; |
180 |
|
|
# else |
181 |
|
|
/* Together these comprise the mantissa. */ |
182 |
|
|
unsigned int mantissa1:32; |
183 |
|
|
unsigned int mantissa0:19; |
184 |
|
|
unsigned int quiet_nan:1; |
185 |
|
|
unsigned int exponent:11; |
186 |
|
|
unsigned int negative:1; |
187 |
|
|
# endif |
188 |
|
|
#endif |
189 |
|
|
} ieee_nan; |
190 |
|
|
}; |
191 |
|
|
|
192 |
|
|
#if SIZEOF_LONG_DOUBLE == 12 |
193 |
|
|
# undef USE_QUAD_DOUBLE |
194 |
|
|
#elif SIZEOF_LONG_DOUBLE == 16 |
195 |
|
|
# define USE_QUAD_DOUBLE |
196 |
|
|
#else |
197 |
|
|
# error "unsupported long double format" |
198 |
|
|
#endif |
199 |
|
|
|
200 |
|
|
#ifndef USE_QUAD_DOUBLE |
201 |
|
|
// IEEE-854 long double format |
202 |
|
|
union fpu_extended_shape { |
203 |
|
|
fpu_extended value; |
204 |
|
|
|
205 |
|
|
/* This is the IEEE 854 double-extended-precision format. */ |
206 |
|
|
struct { |
207 |
|
|
#if UAE_BYTE_ORDER == UAE_BIG_ENDIAN |
208 |
|
|
unsigned int negative:1; |
209 |
|
|
unsigned int exponent:15; |
210 |
|
|
unsigned int empty:16; |
211 |
|
|
unsigned int mantissa0:32; |
212 |
|
|
unsigned int mantissa1:32; |
213 |
|
|
#endif |
214 |
|
|
#if UAE_BYTE_ORDER == UAE_LITTLE_ENDIAN |
215 |
|
|
# if UAE_FLOAT_WORD_ORDER == UAE_BIG_ENDIAN |
216 |
|
|
unsigned int exponent:15; |
217 |
|
|
unsigned int negative:1; |
218 |
|
|
unsigned int empty:16; |
219 |
|
|
unsigned int mantissa0:32; |
220 |
|
|
unsigned int mantissa1:32; |
221 |
|
|
# else |
222 |
|
|
unsigned int mantissa1:32; |
223 |
|
|
unsigned int mantissa0:32; |
224 |
|
|
unsigned int exponent:15; |
225 |
|
|
unsigned int negative:1; |
226 |
|
|
unsigned int empty:16; |
227 |
|
|
# endif |
228 |
|
|
#endif |
229 |
|
|
} ieee; |
230 |
|
|
|
231 |
|
|
/* This is for NaNs in the IEEE 854 double-extended-precision format. */ |
232 |
|
|
struct { |
233 |
|
|
#if UAE_BYTE_ORDER == UAE_BIG_ENDIAN |
234 |
|
|
unsigned int negative:1; |
235 |
|
|
unsigned int exponent:15; |
236 |
|
|
unsigned int empty:16; |
237 |
|
|
unsigned int one:1; |
238 |
|
|
unsigned int quiet_nan:1; |
239 |
|
|
unsigned int mantissa0:30; |
240 |
|
|
unsigned int mantissa1:32; |
241 |
|
|
#endif |
242 |
|
|
#if UAE_BYTE_ORDER == UAE_LITTLE_ENDIAN |
243 |
|
|
# if UAE_FLOAT_WORD_ORDER == UAE_BIG_ENDIAN |
244 |
|
|
unsigned int exponent:15; |
245 |
|
|
unsigned int negative:1; |
246 |
|
|
unsigned int empty:16; |
247 |
|
|
unsigned int mantissa0:30; |
248 |
|
|
unsigned int quiet_nan:1; |
249 |
|
|
unsigned int one:1; |
250 |
|
|
unsigned int mantissa1:32; |
251 |
|
|
# else |
252 |
|
|
unsigned int mantissa1:32; |
253 |
|
|
unsigned int mantissa0:30; |
254 |
|
|
unsigned int quiet_nan:1; |
255 |
|
|
unsigned int one:1; |
256 |
|
|
unsigned int exponent:15; |
257 |
|
|
unsigned int negative:1; |
258 |
|
|
unsigned int empty:16; |
259 |
|
|
# endif |
260 |
|
|
#endif |
261 |
|
|
} ieee_nan; |
262 |
|
|
|
263 |
|
|
/* This format is used to extract the sign_exponent and mantissa parts only */ |
264 |
|
|
struct { |
265 |
|
|
#if UAE_FLOAT_WORD_ORDER == UAE_BIG_ENDIAN |
266 |
|
|
unsigned int sign_exponent:16; |
267 |
|
|
unsigned int empty:16; |
268 |
|
|
unsigned int msw:32; |
269 |
|
|
unsigned int lsw:32; |
270 |
|
|
#else |
271 |
|
|
unsigned int lsw:32; |
272 |
|
|
unsigned int msw:32; |
273 |
|
|
unsigned int sign_exponent:16; |
274 |
|
|
unsigned int empty:16; |
275 |
|
|
#endif |
276 |
|
|
} parts; |
277 |
|
|
}; |
278 |
|
|
#else |
279 |
|
|
// IEEE-854 quad double format |
280 |
|
|
union fpu_extended_shape { |
281 |
|
|
fpu_extended value; |
282 |
|
|
|
283 |
|
|
/* This is the IEEE 854 quad-precision format. */ |
284 |
|
|
struct { |
285 |
|
|
#if UAE_BYTE_ORDER == UAE_BIG_ENDIAN |
286 |
|
|
unsigned int negative:1; |
287 |
|
|
unsigned int exponent:15; |
288 |
|
|
unsigned int mantissa0:16; |
289 |
|
|
unsigned int mantissa1:32; |
290 |
|
|
unsigned int mantissa2:32; |
291 |
|
|
unsigned int mantissa3:32; |
292 |
|
|
#else |
293 |
|
|
unsigned int mantissa3:32; |
294 |
|
|
unsigned int mantissa2:32; |
295 |
|
|
unsigned int mantissa1:32; |
296 |
|
|
unsigned int mantissa0:16; |
297 |
|
|
unsigned int exponent:15; |
298 |
|
|
unsigned int negative:1; |
299 |
|
|
#endif |
300 |
|
|
} ieee; |
301 |
|
|
|
302 |
|
|
/* This is for NaNs in the IEEE 854 quad-precision format. */ |
303 |
|
|
struct { |
304 |
|
|
#if UAE_BYTE_ORDER == UAE_BIG_ENDIAN |
305 |
|
|
unsigned int negative:1; |
306 |
|
|
unsigned int exponent:15; |
307 |
|
|
unsigned int quiet_nan:1; |
308 |
|
|
unsigned int mantissa0:15; |
309 |
|
|
unsigned int mantissa1:30; |
310 |
|
|
unsigned int mantissa2:32; |
311 |
|
|
unsigned int mantissa3:32; |
312 |
|
|
#else |
313 |
|
|
unsigned int mantissa3:32; |
314 |
|
|
unsigned int mantissa2:32; |
315 |
|
|
unsigned int mantissa1:32; |
316 |
|
|
unsigned int mantissa0:15; |
317 |
|
|
unsigned int quiet_nan:1; |
318 |
|
|
unsigned int exponent:15; |
319 |
|
|
unsigned int negative:1; |
320 |
|
|
#endif |
321 |
|
|
} ieee_nan; |
322 |
|
|
|
323 |
|
|
/* This format is used to extract the sign_exponent and mantissa parts only */ |
324 |
|
|
#if UAE_FLOAT_WORD_ORDER == UAE_BIG_ENDIAN |
325 |
|
|
struct { |
326 |
|
|
uae_u64 msw; |
327 |
|
|
uae_u64 lsw; |
328 |
|
|
} parts64; |
329 |
|
|
struct { |
330 |
|
|
uae_u32 w0; |
331 |
|
|
uae_u32 w1; |
332 |
|
|
uae_u32 w2; |
333 |
|
|
uae_u32 w3; |
334 |
|
|
} parts32; |
335 |
|
|
#else |
336 |
|
|
struct { |
337 |
|
|
uae_u64 lsw; |
338 |
|
|
uae_u64 msw; |
339 |
|
|
} parts64; |
340 |
|
|
struct { |
341 |
|
|
uae_u32 w3; |
342 |
|
|
uae_u32 w2; |
343 |
|
|
uae_u32 w1; |
344 |
|
|
uae_u32 w0; |
345 |
|
|
} parts32; |
346 |
|
|
#endif |
347 |
|
|
}; |
348 |
|
|
#endif // !USE_QUAD_DOUBLE |
349 |
|
|
|
350 |
|
|
// Declare and initialize a pointer to a shape of the requested FP type |
351 |
|
|
#define fp_declare_init_shape(psvar, rfvar, ftype) \ |
352 |
|
|
fpu_ ## ftype ## _shape * psvar = (fpu_ ## ftype ## _shape *)( &rfvar ) |
353 |
|
|
|
354 |
|
|
/* -------------------------------------------------------------------------- */ |
355 |
|
|
/* --- Extra Math Functions --- */ |
356 |
|
|
/* --- (most of them had to be defined before including <fpu/flags.h>) --- */ |
357 |
|
|
/* -------------------------------------------------------------------------- */ |
358 |
|
|
|
359 |
|
|
#undef isnan |
360 |
|
|
#if 0 && defined(HAVE_ISNANL) |
361 |
|
|
# define isnan(x) isnanl((x)) |
362 |
|
|
#else |
363 |
|
|
# define isnan(x) fp_do_isnan((x)) |
364 |
|
|
#endif |
365 |
|
|
|
366 |
|
|
PRIVATE inline bool FFPU fp_do_isnan(fpu_register const & r) |
367 |
|
|
{ |
368 |
|
|
fp_declare_init_shape(sxp, r, extended); |
369 |
|
|
#ifdef BRANCHES_ARE_EXPENSIVE |
370 |
|
|
#ifdef USE_QUAD_DOUBLE |
371 |
|
|
uae_s64 hx = sxp->parts64.msw; |
372 |
|
|
uae_s64 lx = sxp->parts64.lsw; |
373 |
|
|
hx &= 0x7fffffffffffffffLL; |
374 |
|
|
hx |= (uae_u64)(lx | (-lx)) >> 63; |
375 |
|
|
hx = 0x7fff000000000000LL - hx; |
376 |
|
|
return (int)((uae_u64)hx >> 63); |
377 |
|
|
#else |
378 |
|
|
uae_s32 se = sxp->parts.sign_exponent; |
379 |
|
|
uae_s32 hx = sxp->parts.msw; |
380 |
|
|
uae_s32 lx = sxp->parts.lsw; |
381 |
|
|
se = (se & 0x7fff) << 1; |
382 |
|
|
lx |= hx & 0x7fffffff; |
383 |
|
|
se |= (uae_u32)(lx | (-lx)) >> 31; |
384 |
|
|
se = 0xfffe - se; |
385 |
|
|
// TODO: check whether rshift count is 16 or 31 |
386 |
|
|
return (int)(((uae_u32)(se)) >> 16); |
387 |
|
|
#endif |
388 |
|
|
#else |
389 |
|
|
return (sxp->ieee_nan.exponent == FP_EXTENDED_EXP_MAX) |
390 |
|
|
&& (sxp->ieee_nan.mantissa0 != 0) |
391 |
|
|
&& (sxp->ieee_nan.mantissa1 != 0) |
392 |
|
|
#ifdef USE_QUAD_DOUBLE |
393 |
|
|
&& (sxp->ieee_nan.mantissa2 != 0) |
394 |
|
|
&& (sxp->ieee_nan.mantissa3 != 0) |
395 |
|
|
#endif |
396 |
|
|
; |
397 |
|
|
#endif |
398 |
|
|
} |
399 |
|
|
|
400 |
|
|
#undef isinf |
401 |
|
|
#if 0 && defined(HAVE_ISINFL) |
402 |
|
|
# define isinf(x) isinfl((x)) |
403 |
|
|
#else |
404 |
|
|
# define isinf(x) fp_do_isinf((x)) |
405 |
|
|
#endif |
406 |
|
|
|
407 |
|
|
PRIVATE inline bool FFPU fp_do_isinf(fpu_register const & r) |
408 |
|
|
{ |
409 |
|
|
fp_declare_init_shape(sxp, r, extended); |
410 |
|
|
#ifdef BRANCHES_ARE_EXPENSIVE |
411 |
|
|
#ifdef USE_QUAD_DOUBLE |
412 |
|
|
uae_s64 hx = sxp->parts64.msw; |
413 |
|
|
uae_s64 lx = sxp->parts64.lsw; |
414 |
|
|
lx |= (hx & 0x7fffffffffffffffLL) ^ 0x7fff000000000000LL; |
415 |
|
|
lx |= -lx; |
416 |
|
|
return ~(lx >> 63) & (hx >> 62); |
417 |
|
|
#else |
418 |
|
|
uae_s32 se = sxp->parts.sign_exponent; |
419 |
|
|
uae_s32 hx = sxp->parts.msw; |
420 |
|
|
uae_s32 lx = sxp->parts.lsw; |
421 |
|
|
/* This additional ^ 0x80000000 is necessary because in Intel's |
422 |
|
|
internal representation of the implicit one is explicit. |
423 |
|
|
NOTE: anyway, this is equivalent to & 0x7fffffff in that case. */ |
424 |
|
|
#ifdef __i386__ |
425 |
|
|
lx |= (hx ^ 0x80000000) | ((se & 0x7fff) ^ 0x7fff); |
426 |
|
|
#else |
427 |
|
|
lx |= (hx & 0x7fffffff) | ((se & 0x7fff) ^ 0x7fff); |
428 |
|
|
#endif |
429 |
|
|
lx |= -lx; |
430 |
|
|
se &= 0x8000; |
431 |
|
|
return ~(lx >> 31) & (1 - (se >> 14)); |
432 |
|
|
#endif |
433 |
|
|
#else |
434 |
|
|
return (sxp->ieee_nan.exponent == FP_EXTENDED_EXP_MAX) |
435 |
|
|
&& (sxp->ieee_nan.mantissa0 == 0) |
436 |
|
|
&& (sxp->ieee_nan.mantissa1 == 0) |
437 |
|
|
#ifdef USE_QUAD_DOUBLE |
438 |
|
|
&& (sxp->ieee_nan.mantissa2 == 0) |
439 |
|
|
&& (sxp->ieee_nan.mantissa3 == 0) |
440 |
|
|
#endif |
441 |
|
|
; |
442 |
|
|
#endif |
443 |
|
|
} |
444 |
|
|
|
445 |
|
|
#undef isneg |
446 |
|
|
#define isneg(x) fp_do_isneg((x)) |
447 |
|
|
|
448 |
|
|
PRIVATE inline bool FFPU fp_do_isneg(fpu_register const & r) |
449 |
|
|
{ |
450 |
|
|
fp_declare_init_shape(sxp, r, extended); |
451 |
|
|
return (sxp->ieee.negative) |
452 |
|
|
; |
453 |
|
|
} |
454 |
|
|
|
455 |
|
|
#undef iszero |
456 |
|
|
#define iszero(x) fp_do_iszero((x)) |
457 |
|
|
|
458 |
|
|
PRIVATE inline bool FFPU fp_do_iszero(fpu_register const & r) |
459 |
|
|
{ |
460 |
|
|
// TODO: BRANCHES_ARE_EXPENSIVE |
461 |
|
|
fp_declare_init_shape(sxp, r, extended); |
462 |
|
|
return (sxp->ieee.exponent == 0) |
463 |
|
|
&& (sxp->ieee.mantissa0 == 0) |
464 |
|
|
&& (sxp->ieee.mantissa1 == 0) |
465 |
|
|
#ifdef USE_QUAD_DOUBLE |
466 |
|
|
&& (sxp->ieee.mantissa2 == 0) |
467 |
|
|
&& (sxp->ieee.mantissa3 == 0) |
468 |
|
|
#endif |
469 |
|
|
; |
470 |
|
|
} |
471 |
|
|
|
472 |
|
|
PRIVATE inline void FFPU get_dest_flags(fpu_register const & r) |
473 |
|
|
{ |
474 |
|
|
fl_dest.negative = isneg(r); |
475 |
|
|
fl_dest.zero = iszero(r); |
476 |
|
|
fl_dest.infinity = isinf(r); |
477 |
|
|
fl_dest.nan = isnan(r); |
478 |
|
|
fl_dest.in_range = !fl_dest.zero && !fl_dest.infinity && !fl_dest.nan; |
479 |
|
|
} |
480 |
|
|
|
481 |
|
|
PRIVATE inline void FFPU get_source_flags(fpu_register const & r) |
482 |
|
|
{ |
483 |
|
|
fl_source.negative = isneg(r); |
484 |
|
|
fl_source.zero = iszero(r); |
485 |
|
|
fl_source.infinity = isinf(r); |
486 |
|
|
fl_source.nan = isnan(r); |
487 |
|
|
fl_source.in_range = !fl_source.zero && !fl_source.infinity && !fl_source.nan; |
488 |
|
|
} |
489 |
|
|
|
490 |
|
|
PRIVATE inline void FFPU make_nan(fpu_register & r) |
491 |
|
|
{ |
492 |
|
|
// FIXME: is that correct ? |
493 |
|
|
fp_declare_init_shape(sxp, r, extended); |
494 |
|
|
sxp->ieee.exponent = FP_EXTENDED_EXP_MAX; |
495 |
|
|
sxp->ieee.mantissa0 = 0xffffffff; |
496 |
|
|
sxp->ieee.mantissa1 = 0xffffffff; |
497 |
|
|
#ifdef USE_QUAD_DOUBLE |
498 |
|
|
sxp->ieee.mantissa2 = 0xffffffff; |
499 |
|
|
sxp->ieee.mantissa3 = 0xffffffff; |
500 |
|
|
#endif |
501 |
|
|
} |
502 |
|
|
|
503 |
|
|
PRIVATE inline void FFPU make_zero_positive(fpu_register & r) |
504 |
|
|
{ |
505 |
|
|
#if 1 |
506 |
|
|
r = +0.0; |
507 |
|
|
#else |
508 |
|
|
fp_declare_init_shape(sxp, r, extended); |
509 |
|
|
sxp->ieee.negative = 0; |
510 |
|
|
sxp->ieee.exponent = 0; |
511 |
|
|
sxp->ieee.mantissa0 = 0; |
512 |
|
|
sxp->ieee.mantissa1 = 0; |
513 |
|
|
#ifdef USE_QUAD_DOUBLE |
514 |
|
|
sxp->ieee.mantissa2 = 0; |
515 |
|
|
sxp->ieee.mantissa3 = 0; |
516 |
|
|
#endif |
517 |
|
|
#endif |
518 |
|
|
} |
519 |
|
|
|
520 |
|
|
PRIVATE inline void FFPU make_zero_negative(fpu_register & r) |
521 |
|
|
{ |
522 |
|
|
#if 1 |
523 |
|
|
r = -0.0; |
524 |
|
|
#else |
525 |
|
|
fp_declare_init_shape(sxp, r, extended); |
526 |
|
|
sxp->ieee.negative = 1; |
527 |
|
|
sxp->ieee.exponent = 0; |
528 |
|
|
sxp->ieee.mantissa0 = 0; |
529 |
|
|
sxp->ieee.mantissa1 = 0; |
530 |
|
|
#ifdef USE_QUAD_DOUBLE |
531 |
|
|
sxp->ieee.mantissa2 = 0; |
532 |
|
|
sxp->ieee.mantissa3 = 0; |
533 |
|
|
#endif |
534 |
|
|
#endif |
535 |
|
|
} |
536 |
|
|
|
537 |
|
|
PRIVATE inline void FFPU make_inf_positive(fpu_register & r) |
538 |
|
|
{ |
539 |
|
|
fp_declare_init_shape(sxp, r, extended); |
540 |
|
|
sxp->ieee_nan.negative = 0; |
541 |
|
|
sxp->ieee_nan.exponent = FP_EXTENDED_EXP_MAX; |
542 |
|
|
sxp->ieee_nan.mantissa0 = 0; |
543 |
|
|
sxp->ieee_nan.mantissa1 = 0; |
544 |
|
|
#ifdef USE_QUAD_DOUBLE |
545 |
|
|
sxp->ieee_nan.mantissa2 = 0; |
546 |
|
|
sxp->ieee_nan.mantissa3 = 0; |
547 |
|
|
#endif |
548 |
|
|
} |
549 |
|
|
|
550 |
|
|
PRIVATE inline void FFPU make_inf_negative(fpu_register & r) |
551 |
|
|
{ |
552 |
|
|
fp_declare_init_shape(sxp, r, extended); |
553 |
|
|
sxp->ieee_nan.negative = 1; |
554 |
|
|
sxp->ieee_nan.exponent = FP_EXTENDED_EXP_MAX; |
555 |
|
|
sxp->ieee_nan.mantissa0 = 0; |
556 |
|
|
sxp->ieee_nan.mantissa1 = 0; |
557 |
|
|
#ifdef USE_QUAD_DOUBLE |
558 |
|
|
sxp->ieee_nan.mantissa2 = 0; |
559 |
|
|
sxp->ieee_nan.mantissa3 = 0; |
560 |
|
|
#endif |
561 |
|
|
} |
562 |
|
|
|
563 |
|
|
PRIVATE inline void FFPU fast_scale(fpu_register & r, int add) |
564 |
|
|
{ |
565 |
|
|
fp_declare_init_shape(sxp, r, extended); |
566 |
|
|
// TODO: overflow flags |
567 |
|
|
int exp = sxp->ieee.exponent + add; |
568 |
|
|
// FIXME: this test is not correct: see fpuop_fscale() |
569 |
|
|
if (exp > FP_EXTENDED_EXP_BIAS) { |
570 |
|
|
make_inf_positive(r); |
571 |
|
|
} else if (exp < 0) { |
572 |
|
|
// keep sign (+/- 0) |
573 |
|
|
if (isneg(r)) |
574 |
|
|
make_inf_negative(r); |
575 |
|
|
else |
576 |
|
|
make_inf_positive(r); |
577 |
|
|
// p[FHI] &= 0x80000000; |
578 |
|
|
} else { |
579 |
|
|
sxp->ieee.exponent = exp; |
580 |
|
|
// p[FHI] = (p[FHI] & 0x800FFFFF) | ((uae_u32)exp << 20); |
581 |
|
|
} |
582 |
|
|
} |
583 |
|
|
|
584 |
|
|
PRIVATE inline fpu_register FFPU fast_fgetexp(fpu_register const & r) |
585 |
|
|
{ |
586 |
|
|
fp_declare_init_shape(sxp, r, extended); |
587 |
|
|
return (sxp->ieee.exponent - FP_EXTENDED_EXP_BIAS); |
588 |
|
|
} |
589 |
|
|
|
590 |
|
|
// Normalize to range 1..2 |
591 |
|
|
PRIVATE inline void FFPU fast_remove_exponent(fpu_register & r) |
592 |
|
|
{ |
593 |
|
|
fp_declare_init_shape(sxp, r, extended); |
594 |
|
|
sxp->ieee.exponent = FP_EXTENDED_EXP_BIAS; |
595 |
|
|
} |
596 |
|
|
|
597 |
|
|
// The sign of the quotient is the exclusive-OR of the sign bits |
598 |
|
|
// of the source and destination operands. |
599 |
|
|
PRIVATE inline uae_u32 FFPU get_quotient_sign(fpu_register const & ra, fpu_register const & rb) |
600 |
|
|
{ |
601 |
|
|
fp_declare_init_shape(sap, ra, extended); |
602 |
|
|
fp_declare_init_shape(sbp, rb, extended); |
603 |
|
|
return (((sap->ieee.mantissa0 ^ sbp->ieee.mantissa0) & 0x80000000) ? 0x800000 : 0); |
604 |
|
|
} |
605 |
|
|
|
606 |
|
|
/* -------------------------------------------------------------------------- */ |
607 |
|
|
/* --- Math functions --- */ |
608 |
|
|
/* -------------------------------------------------------------------------- */ |
609 |
|
|
|
610 |
|
|
#if FPU_USE_ISO_C99 |
611 |
|
|
# define fp_log logl |
612 |
|
|
# define fp_log10 log10l |
613 |
|
|
# define fp_exp expl |
614 |
|
|
# define fp_pow powl |
615 |
|
|
# define fp_fabs fabsl |
616 |
|
|
# define fp_sqrt sqrtl |
617 |
|
|
# define fp_sin sinl |
618 |
|
|
# define fp_cos cosl |
619 |
|
|
# define fp_tan tanl |
620 |
|
|
# define fp_sinh sinhl |
621 |
|
|
# define fp_cosh coshl |
622 |
|
|
# define fp_tanh tanhl |
623 |
|
|
# define fp_asin asinl |
624 |
|
|
# define fp_acos acosl |
625 |
|
|
# define fp_atan atanl |
626 |
|
|
# define fp_asinh asinhl |
627 |
|
|
# define fp_acosh acoshl |
628 |
|
|
# define fp_atanh atanhl |
629 |
|
|
# define fp_floor floorl |
630 |
|
|
# define fp_ceil ceill |
631 |
|
|
#else |
632 |
|
|
# define fp_log log |
633 |
|
|
# define fp_log10 log10 |
634 |
|
|
# define fp_exp exp |
635 |
|
|
# define fp_pow pow |
636 |
|
|
# define fp_fabs fabs |
637 |
|
|
# define fp_sqrt sqrt |
638 |
|
|
# define fp_sin sin |
639 |
|
|
# define fp_cos cos |
640 |
|
|
# define fp_tan tan |
641 |
|
|
# define fp_sinh sinh |
642 |
|
|
# define fp_cosh cosh |
643 |
|
|
# define fp_tanh tanh |
644 |
|
|
# define fp_asin asin |
645 |
|
|
# define fp_acos acos |
646 |
|
|
# define fp_atan atan |
647 |
|
|
# define fp_asinh asinh |
648 |
|
|
# define fp_acosh acosh |
649 |
|
|
# define fp_atanh atanh |
650 |
|
|
# define fp_floor floor |
651 |
|
|
# define fp_ceil ceil |
652 |
|
|
#endif |
653 |
|
|
|
654 |
|
|
#if defined(FPU_IEEE) && defined(X86_ASSEMBLY) |
655 |
|
|
// Assembly optimized support functions. Taken from glibc 2.2.2 |
656 |
|
|
|
657 |
|
|
#undef fp_log |
658 |
|
|
#define fp_log fp_do_log |
659 |
|
|
|
660 |
|
|
#ifndef FPU_FAST_MATH |
661 |
|
|
// FIXME: unimplemented |
662 |
|
|
PRIVATE fpu_extended fp_do_log(fpu_extended x); |
663 |
|
|
#else |
664 |
|
|
PRIVATE inline fpu_extended fp_do_log(fpu_extended x) |
665 |
|
|
{ |
666 |
|
|
fpu_extended value; |
667 |
|
|
__asm__ __volatile__("fldln2; fxch; fyl2x" : "=t" (value) : "0" (x) : "st(1)"); |
668 |
|
|
return value; |
669 |
|
|
} |
670 |
|
|
#endif |
671 |
|
|
|
672 |
|
|
#undef fp_log10 |
673 |
|
|
#define fp_log10 fp_do_log10 |
674 |
|
|
|
675 |
|
|
#ifndef FPU_FAST_MATH |
676 |
|
|
// FIXME: unimplemented |
677 |
|
|
PRIVATE fpu_extended fp_do_log10(fpu_extended x); |
678 |
|
|
#else |
679 |
|
|
PRIVATE inline fpu_extended fp_do_log10(fpu_extended x) |
680 |
|
|
{ |
681 |
|
|
fpu_extended value; |
682 |
|
|
__asm__ __volatile__("fldlg2; fxch; fyl2x" : "=t" (value) : "0" (x) : "st(1)"); |
683 |
|
|
return value; |
684 |
|
|
} |
685 |
|
|
#endif |
686 |
|
|
|
687 |
|
|
#undef fp_exp |
688 |
|
|
#define fp_exp fp_do_exp |
689 |
|
|
|
690 |
|
|
#ifndef FPU_FAST_MATH |
691 |
|
|
// FIXME: unimplemented |
692 |
|
|
PRIVATE fpu_extended fp_do_exp(fpu_extended x); |
693 |
|
|
#else |
694 |
|
|
PRIVATE inline fpu_extended fp_do_exp(fpu_extended x) |
695 |
|
|
{ |
696 |
|
|
fpu_extended value, exponent; |
697 |
|
|
__asm__ __volatile__("fldl2e # e^x = 2^(x * log2(e))\n\t" |
698 |
|
|
"fmul %%st(1) # x * log2(e)\n\t" |
699 |
|
|
"fst %%st(1)\n\t" |
700 |
|
|
"frndint # int(x * log2(e))\n\t" |
701 |
|
|
"fxch\n\t" |
702 |
|
|
"fsub %%st(1) # fract(x * log2(e))\n\t" |
703 |
|
|
"f2xm1 # 2^(fract(x * log2(e))) - 1\n\t" |
704 |
|
|
: "=t" (value), "=u" (exponent) : "0" (x)); |
705 |
|
|
value += 1.0; |
706 |
|
|
__asm__ __volatile__("fscale" : "=t" (value) : "0" (value), "u" (exponent)); |
707 |
|
|
return value; |
708 |
|
|
} |
709 |
|
|
#endif |
710 |
|
|
|
711 |
|
|
#undef fp_pow |
712 |
|
|
#define fp_pow fp_do_pow |
713 |
|
|
|
714 |
|
|
PRIVATE fpu_extended fp_do_pow(fpu_extended x, fpu_extended y); |
715 |
|
|
|
716 |
|
|
#undef fp_fabs |
717 |
|
|
#define fp_fabs fp_do_fabs |
718 |
|
|
|
719 |
|
|
PRIVATE inline fpu_extended fp_do_fabs(fpu_extended x) |
720 |
|
|
{ |
721 |
|
|
fpu_extended value; |
722 |
|
|
__asm__ __volatile__("fabs" : "=t" (value) : "0" (x)); |
723 |
|
|
return value; |
724 |
|
|
} |
725 |
|
|
|
726 |
|
|
#undef fp_sqrt |
727 |
|
|
#define fp_sqrt fp_do_sqrt |
728 |
|
|
|
729 |
|
|
PRIVATE inline fpu_extended fp_do_sqrt(fpu_extended x) |
730 |
|
|
{ |
731 |
|
|
fpu_extended value; |
732 |
|
|
__asm__ __volatile__("fsqrt" : "=t" (value) : "0" (x)); |
733 |
|
|
return value; |
734 |
|
|
} |
735 |
|
|
|
736 |
|
|
#undef fp_sin |
737 |
|
|
#define fp_sin fp_do_sin |
738 |
|
|
|
739 |
|
|
PRIVATE inline fpu_extended fp_do_sin(fpu_extended x) |
740 |
|
|
{ |
741 |
|
|
fpu_extended value; |
742 |
|
|
__asm__ __volatile__("fsin" : "=t" (value) : "0" (x)); |
743 |
|
|
return value; |
744 |
|
|
} |
745 |
|
|
|
746 |
|
|
#undef fp_cos |
747 |
|
|
#define fp_cos fp_do_cos |
748 |
|
|
|
749 |
|
|
PRIVATE inline fpu_extended fp_do_cos(fpu_extended x) |
750 |
|
|
{ |
751 |
|
|
fpu_extended value; |
752 |
|
|
__asm__ __volatile__("fcos" : "=t" (value) : "0" (x)); |
753 |
|
|
return value; |
754 |
|
|
} |
755 |
|
|
|
756 |
|
|
#undef fp_tan |
757 |
|
|
#define fp_tan fp_do_tan |
758 |
|
|
|
759 |
|
|
PRIVATE inline fpu_extended fp_do_tan(fpu_extended x) |
760 |
|
|
{ |
761 |
|
|
fpu_extended value; |
762 |
|
|
__asm__ __volatile__("fptan" : "=t" (value) : "0" (x)); |
763 |
|
|
return value; |
764 |
|
|
} |
765 |
|
|
|
766 |
|
|
#undef fp_expm1 |
767 |
|
|
#define fp_expm1 fp_do_expm1 |
768 |
|
|
|
769 |
|
|
// Returns: exp(X) - 1.0 |
770 |
|
|
PRIVATE inline fpu_extended fp_do_expm1(fpu_extended x) |
771 |
|
|
{ |
772 |
|
|
fpu_extended value, exponent, temp; |
773 |
|
|
__asm__ __volatile__("fldl2e # e^x - 1 = 2^(x * log2(e)) - 1\n\t" |
774 |
|
|
"fmul %%st(1) # x * log2(e)\n\t" |
775 |
|
|
"fst %%st(1)\n\t" |
776 |
|
|
"frndint # int(x * log2(e))\n\t" |
777 |
|
|
"fxch\n\t" |
778 |
|
|
"fsub %%st(1) # fract(x * log2(e))\n\t" |
779 |
|
|
"f2xm1 # 2^(fract(x * log2(e))) - 1\n\t" |
780 |
|
|
"fscale # 2^(x * log2(e)) - 2^(int(x * log2(e)))\n\t" |
781 |
|
|
: "=t" (value), "=u" (exponent) : "0" (x)); |
782 |
|
|
__asm__ __volatile__("fscale" : "=t" (temp) : "0" (1.0), "u" (exponent)); |
783 |
|
|
temp -= 1.0; |
784 |
|
|
return temp + value ? temp + value : x; |
785 |
|
|
} |
786 |
|
|
|
787 |
|
|
#undef fp_sgn1 |
788 |
|
|
#define fp_sgn1 fp_do_sgn1 |
789 |
|
|
|
790 |
|
|
PRIVATE inline fpu_extended fp_do_sgn1(fpu_extended x) |
791 |
|
|
{ |
792 |
|
|
fp_declare_init_shape(sxp, x, extended); |
793 |
|
|
sxp->ieee_nan.exponent = FP_EXTENDED_EXP_MAX; |
794 |
|
|
sxp->ieee_nan.one = 1; |
795 |
|
|
sxp->ieee_nan.quiet_nan = 0; |
796 |
|
|
sxp->ieee_nan.mantissa0 = 0; |
797 |
|
|
sxp->ieee_nan.mantissa1 = 0; |
798 |
|
|
return x; |
799 |
|
|
} |
800 |
|
|
|
801 |
|
|
#undef fp_sinh |
802 |
|
|
#define fp_sinh fp_do_sinh |
803 |
|
|
|
804 |
|
|
#ifndef FPU_FAST_MATH |
805 |
|
|
// FIXME: unimplemented |
806 |
|
|
PRIVATE fpu_extended fp_do_sinh(fpu_extended x); |
807 |
|
|
#else |
808 |
|
|
PRIVATE inline fpu_extended fp_do_sinh(fpu_extended x) |
809 |
|
|
{ |
810 |
|
|
fpu_extended exm1 = fp_expm1(fp_fabs(x)); |
811 |
|
|
return 0.5 * (exm1 / (exm1 + 1.0) + exm1) * fp_sgn1(x); |
812 |
|
|
} |
813 |
|
|
#endif |
814 |
|
|
|
815 |
|
|
#undef fp_cosh |
816 |
|
|
#define fp_cosh fp_do_cosh |
817 |
|
|
|
818 |
|
|
#ifndef FPU_FAST_MATH |
819 |
|
|
// FIXME: unimplemented |
820 |
|
|
PRIVATE fpu_extended fp_do_cosh(fpu_extended x); |
821 |
|
|
#else |
822 |
|
|
PRIVATE inline fpu_extended fp_do_cosh(fpu_extended x) |
823 |
|
|
{ |
824 |
|
|
fpu_extended ex = fp_exp(x); |
825 |
|
|
return 0.5 * (ex + 1.0 / ex); |
826 |
|
|
} |
827 |
|
|
#endif |
828 |
|
|
|
829 |
|
|
#undef fp_tanh |
830 |
|
|
#define fp_tanh fp_do_tanh |
831 |
|
|
|
832 |
|
|
#ifndef FPU_FAST_MATH |
833 |
|
|
// FIXME: unimplemented |
834 |
|
|
PRIVATE fpu_extended fp_do_tanh(fpu_extended x); |
835 |
|
|
#else |
836 |
|
|
PRIVATE inline fpu_extended fp_do_tanh(fpu_extended x) |
837 |
|
|
{ |
838 |
|
|
fpu_extended exm1 = fp_expm1(-fp_fabs(x + x)); |
839 |
|
|
return exm1 / (exm1 + 2.0) * fp_sgn1(-x); |
840 |
|
|
} |
841 |
|
|
#endif |
842 |
|
|
|
843 |
|
|
#undef fp_atan2 |
844 |
|
|
#define fp_atan2 fp_do_atan2 |
845 |
|
|
|
846 |
|
|
PRIVATE inline fpu_extended fp_do_atan2(fpu_extended y, fpu_extended x) |
847 |
|
|
{ |
848 |
|
|
fpu_extended value; |
849 |
|
|
__asm__ __volatile__("fpatan" : "=t" (value) : "0" (x), "u" (y) : "st(1)"); |
850 |
|
|
return value; |
851 |
|
|
} |
852 |
|
|
|
853 |
|
|
#undef fp_asin |
854 |
|
|
#define fp_asin fp_do_asin |
855 |
|
|
|
856 |
|
|
PRIVATE inline fpu_extended fp_do_asin(fpu_extended x) |
857 |
|
|
{ |
858 |
|
|
return fp_atan2(x, fp_sqrt(1.0 - x * x)); |
859 |
|
|
} |
860 |
|
|
|
861 |
|
|
#undef fp_acos |
862 |
|
|
#define fp_acos fp_do_acos |
863 |
|
|
|
864 |
|
|
PRIVATE inline fpu_extended fp_do_acos(fpu_extended x) |
865 |
|
|
{ |
866 |
|
|
return fp_atan2(fp_sqrt(1.0 - x * x), x); |
867 |
|
|
} |
868 |
|
|
|
869 |
|
|
#undef fp_atan |
870 |
|
|
#define fp_atan fp_do_atan |
871 |
|
|
|
872 |
|
|
PRIVATE inline fpu_extended fp_do_atan(fpu_extended x) |
873 |
|
|
{ |
874 |
|
|
fpu_extended value; |
875 |
|
|
__asm__ __volatile__("fld1; fpatan" : "=t" (value) : "0" (x) : "st(1)"); |
876 |
|
|
return value; |
877 |
|
|
} |
878 |
|
|
|
879 |
|
|
#undef fp_log1p |
880 |
|
|
#define fp_log1p fp_do_log1p |
881 |
|
|
|
882 |
|
|
// Returns: ln(1.0 + X) |
883 |
|
|
PRIVATE fpu_extended fp_do_log1p(fpu_extended x); |
884 |
|
|
|
885 |
|
|
#undef fp_asinh |
886 |
|
|
#define fp_asinh fp_do_asinh |
887 |
|
|
|
888 |
|
|
PRIVATE inline fpu_extended fp_do_asinh(fpu_extended x) |
889 |
|
|
{ |
890 |
|
|
fpu_extended y = fp_fabs(x); |
891 |
|
|
return (fp_log1p(y * y / (fp_sqrt(y * y + 1.0) + 1.0) + y) * fp_sgn1(x)); |
892 |
|
|
} |
893 |
|
|
|
894 |
|
|
#undef fp_acosh |
895 |
|
|
#define fp_acosh fp_do_acosh |
896 |
|
|
|
897 |
|
|
PRIVATE inline fpu_extended fp_do_acosh(fpu_extended x) |
898 |
|
|
{ |
899 |
|
|
return fp_log(x + fp_sqrt(x - 1.0) * fp_sqrt(x + 1.0)); |
900 |
|
|
} |
901 |
|
|
|
902 |
|
|
#undef fp_atanh |
903 |
|
|
#define fp_atanh fp_do_atanh |
904 |
|
|
|
905 |
|
|
PRIVATE inline fpu_extended fp_do_atanh(fpu_extended x) |
906 |
|
|
{ |
907 |
|
|
fpu_extended y = fp_fabs(x); |
908 |
|
|
return -0.5 * fp_log1p(-(y + y) / (1.0 + y)) * fp_sgn1(x); |
909 |
|
|
} |
910 |
|
|
|
911 |
|
|
#undef fp_floor |
912 |
|
|
#define fp_floor fp_do_floor |
913 |
|
|
|
914 |
|
|
PRIVATE inline fpu_extended fp_do_floor(fpu_extended x) |
915 |
|
|
{ |
916 |
|
|
volatile unsigned int cw; |
917 |
|
|
__asm__ __volatile__("fnstcw %0" : "=m" (cw)); |
918 |
|
|
volatile unsigned int cw_temp = (cw & 0xf3ff) | 0x0400; // rounding down |
919 |
|
|
__asm__ __volatile__("fldcw %0" : : "m" (cw_temp)); |
920 |
|
|
fpu_extended value; |
921 |
|
|
__asm__ __volatile__("frndint" : "=t" (value) : "0" (x)); |
922 |
|
|
__asm__ __volatile__("fldcw %0" : : "m" (cw)); |
923 |
|
|
return value; |
924 |
|
|
} |
925 |
|
|
|
926 |
|
|
#undef fp_ceil |
927 |
|
|
#define fp_ceil fp_do_ceil |
928 |
|
|
|
929 |
|
|
PRIVATE inline fpu_extended fp_do_ceil(fpu_extended x) |
930 |
|
|
{ |
931 |
|
|
volatile unsigned int cw; |
932 |
|
|
__asm__ __volatile__("fnstcw %0" : "=m" (cw)); |
933 |
|
|
volatile unsigned int cw_temp = (cw & 0xf3ff) | 0x0800; // rounding up |
934 |
|
|
__asm__ __volatile__("fldcw %0" : : "m" (cw_temp)); |
935 |
|
|
fpu_extended value; |
936 |
|
|
__asm__ __volatile__("frndint" : "=t" (value) : "0" (x)); |
937 |
|
|
__asm__ __volatile__("fldcw %0" : : "m" (cw)); |
938 |
|
|
return value; |
939 |
|
|
} |
940 |
|
|
|
941 |
|
|
#define DEFINE_ROUND_FUNC(rounding_mode_str, rounding_mode) \ |
942 |
|
|
PRIVATE inline fpu_extended fp_do_round_to_ ## rounding_mode_str(fpu_extended x) \ |
943 |
|
|
{ \ |
944 |
|
|
volatile unsigned int cw; \ |
945 |
|
|
__asm__ __volatile__("fnstcw %0" : "=m" (cw)); \ |
946 |
|
|
volatile unsigned int cw_temp = (cw & 0xf3ff) | (rounding_mode); \ |
947 |
|
|
__asm__ __volatile__("fldcw %0" : : "m" (cw_temp)); \ |
948 |
|
|
fpu_extended value; \ |
949 |
|
|
__asm__ __volatile__("frndint" : "=t" (value) : "0" (x)); \ |
950 |
|
|
__asm__ __volatile__("fldcw %0" : : "m" (cw)); \ |
951 |
|
|
return value; \ |
952 |
|
|
} |
953 |
|
|
|
954 |
|
|
#undef fp_round_to_minus_infinity |
955 |
|
|
#define fp_round_to_minus_infinity fp_do_round_to_minus_infinity |
956 |
|
|
|
957 |
|
|
DEFINE_ROUND_FUNC(minus_infinity, 0x400) |
958 |
|
|
|
959 |
|
|
#undef fp_round_to_plus_infinity |
960 |
|
|
#define fp_round_to_plus_infinity fp_do_round_to_plus_infinity |
961 |
|
|
|
962 |
|
|
DEFINE_ROUND_FUNC(plus_infinity, 0x800) |
963 |
|
|
|
964 |
|
|
#undef fp_round_to_zero |
965 |
|
|
#define fp_round_to_zero fp_do_round_to_zero |
966 |
|
|
|
967 |
|
|
DEFINE_ROUND_FUNC(zero, 0xc00) |
968 |
|
|
|
969 |
|
|
#undef fp_round_to_nearest |
970 |
|
|
#define fp_round_to_nearest fp_do_round_to_nearest |
971 |
|
|
|
972 |
|
|
DEFINE_ROUND_FUNC(nearest, 0x000) |
973 |
|
|
|
974 |
|
|
#endif /* X86_ASSEMBLY */ |
975 |
|
|
|
976 |
|
|
#ifndef fp_round_to_minus_infinity |
977 |
|
|
#define fp_round_to_minus_infinity(x) fp_floor(x) |
978 |
|
|
#endif |
979 |
|
|
|
980 |
|
|
#ifndef fp_round_to_plus_infinity |
981 |
|
|
#define fp_round_to_plus_infinity(x) fp_ceil(x) |
982 |
|
|
#endif |
983 |
|
|
|
984 |
|
|
#ifndef fp_round_to_zero |
985 |
|
|
#define fp_round_to_zero(x) ((int)(x)) |
986 |
|
|
#endif |
987 |
|
|
|
988 |
|
|
#ifndef fp_round_to_nearest |
989 |
|
|
#define fp_round_to_nearest(x) ((int)((x) + 0.5)) |
990 |
|
|
#endif |
991 |
|
|
|
992 |
|
|
#endif /* FPU_MATHLIB_H */ |