1 |
gbeauche |
1.1 |
/* |
2 |
|
|
* fpu/mathlib.h - Floating-point math support library |
3 |
|
|
* |
4 |
|
|
* Basilisk II (C) 1997-2001 Christian Bauer |
5 |
|
|
* |
6 |
|
|
* MC68881/68040 fpu emulation |
7 |
|
|
* |
8 |
|
|
* Original UAE FPU, copyright 1996 Herman ten Brugge |
9 |
|
|
* Rewrite for x86, copyright 1999-2001 Lauri Pesonen |
10 |
|
|
* New framework, copyright 2000-2001 Gwenole Beauchesne |
11 |
|
|
* Adapted for JIT compilation (c) Bernd Meyer, 2000-2001 |
12 |
|
|
* |
13 |
|
|
* This program is free software; you can redistribute it and/or modify |
14 |
|
|
* it under the terms of the GNU General Public License as published by |
15 |
|
|
* the Free Software Foundation; either version 2 of the License, or |
16 |
|
|
* (at your option) any later version. |
17 |
|
|
* |
18 |
|
|
* This program is distributed in the hope that it will be useful, |
19 |
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
20 |
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
21 |
|
|
* GNU General Public License for more details. |
22 |
|
|
* |
23 |
|
|
* You should have received a copy of the GNU General Public License |
24 |
|
|
* along with this program; if not, write to the Free Software |
25 |
|
|
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
26 |
|
|
*/ |
27 |
|
|
|
28 |
|
|
#ifndef FPU_MATHLIB_H |
29 |
|
|
#define FPU_MATHLIB_H |
30 |
|
|
|
31 |
|
|
/* NOTE: this file shall be included only from fpu/fpu_*.cpp */ |
32 |
|
|
#undef PUBLIC |
33 |
|
|
#define PUBLIC extern |
34 |
|
|
|
35 |
|
|
#undef PRIVATE |
36 |
|
|
#define PRIVATE static |
37 |
|
|
|
38 |
|
|
#undef FFPU |
39 |
|
|
#define FFPU /**/ |
40 |
|
|
|
41 |
|
|
#undef FPU |
42 |
|
|
#define FPU fpu. |
43 |
|
|
|
44 |
|
|
// Define the following macro if branches are expensive. If so, |
45 |
|
|
// integer-based isnan() and isinf() functions are implemented. |
46 |
|
|
// TODO: move to Makefile.in |
47 |
|
|
#define BRANCHES_ARE_EXPENSIVE 1 |
48 |
|
|
|
49 |
|
|
// Use ISO C99 extended-precision math functions (glibc 2.1+) |
50 |
|
|
#define FPU_USE_ISO_C99 1 |
51 |
|
|
|
52 |
|
|
// NOTE: this is irrelevant on Win32 platforms since the MS libraries |
53 |
|
|
// don't support extended-precision floating-point computations |
54 |
|
|
#ifdef WIN32 |
55 |
|
|
#undef FPU_USE_ISO_C99 |
56 |
|
|
#endif |
57 |
|
|
|
58 |
|
|
// Use faster implementation of math functions, but this could cause |
59 |
|
|
// some incorrect results (?) |
60 |
|
|
// TODO: actually implement the slower but safer versions |
61 |
|
|
#define FPU_FAST_MATH 1 |
62 |
|
|
|
63 |
|
|
#if FPU_USE_ISO_C99 |
64 |
|
|
// NOTE: no prior <math.h> shall be included at this point |
65 |
|
|
#define __USE_ISOC99 1 // for glibc 2.2.X and newer |
66 |
|
|
#define __USE_ISOC9X 1 // for glibc 2.1.X |
67 |
|
|
#include <math.h> |
68 |
|
|
#else |
69 |
|
|
#include <cmath> |
70 |
|
|
using namespace std; |
71 |
|
|
#endif |
72 |
|
|
|
73 |
|
|
/* -------------------------------------------------------------------------- */ |
74 |
|
|
/* --- Floating-point register types --- */ |
75 |
|
|
/* -------------------------------------------------------------------------- */ |
76 |
|
|
|
77 |
|
|
// Single : S 8*E 23*F |
78 |
|
|
#define FP_SINGLE_EXP_MAX 0xff |
79 |
|
|
#define FP_SINGLE_EXP_BIAS 0x7f |
80 |
|
|
|
81 |
|
|
// Double : S 11*E 52*F |
82 |
|
|
#define FP_DOUBLE_EXP_MAX 0x7ff |
83 |
|
|
#define FP_DOUBLE_EXP_BIAS 0x3ff |
84 |
|
|
|
85 |
|
|
// Extended : S 15*E 64*F |
86 |
|
|
#define FP_EXTENDED_EXP_MAX 0x7fff |
87 |
|
|
#define FP_EXTENDED_EXP_BIAS 0x3fff |
88 |
|
|
|
89 |
|
|
// Zeroes : E = 0 & F = 0 |
90 |
|
|
// Infinities : E = MAX & F = 0 |
91 |
|
|
// Not-A-Number : E = MAX & F # 0 |
92 |
|
|
|
93 |
|
|
/* -------------------------------------------------------------------------- */ |
94 |
|
|
/* --- Floating-point type shapes (IEEE-compliant) --- */ |
95 |
|
|
/* -------------------------------------------------------------------------- */ |
96 |
|
|
|
97 |
|
|
// Taken from glibc 2.2.x: ieee754.h |
98 |
|
|
|
99 |
|
|
// IEEE-754 float format |
100 |
|
|
union fpu_single_shape { |
101 |
|
|
|
102 |
|
|
fpu_single value; |
103 |
|
|
|
104 |
|
|
/* This is the IEEE 754 single-precision format. */ |
105 |
|
|
struct { |
106 |
gbeauche |
1.3 |
#ifdef WORDS_BIGENDIAN |
107 |
gbeauche |
1.1 |
unsigned int negative:1; |
108 |
|
|
unsigned int exponent:8; |
109 |
|
|
unsigned int mantissa:23; |
110 |
gbeauche |
1.3 |
#else |
111 |
gbeauche |
1.1 |
unsigned int mantissa:23; |
112 |
|
|
unsigned int exponent:8; |
113 |
|
|
unsigned int negative:1; |
114 |
gbeauche |
1.3 |
#endif |
115 |
gbeauche |
1.1 |
} ieee; |
116 |
|
|
|
117 |
|
|
/* This format makes it easier to see if a NaN is a signalling NaN. */ |
118 |
|
|
struct { |
119 |
gbeauche |
1.3 |
#ifdef WORDS_BIGENDIAN |
120 |
gbeauche |
1.1 |
unsigned int negative:1; |
121 |
|
|
unsigned int exponent:8; |
122 |
|
|
unsigned int quiet_nan:1; |
123 |
|
|
unsigned int mantissa:22; |
124 |
gbeauche |
1.3 |
#else |
125 |
gbeauche |
1.1 |
unsigned int mantissa:22; |
126 |
|
|
unsigned int quiet_nan:1; |
127 |
|
|
unsigned int exponent:8; |
128 |
|
|
unsigned int negative:1; |
129 |
gbeauche |
1.3 |
#endif |
130 |
gbeauche |
1.1 |
} ieee_nan; |
131 |
|
|
}; |
132 |
|
|
|
133 |
|
|
// IEEE-754 double format |
134 |
|
|
union fpu_double_shape { |
135 |
|
|
fpu_double value; |
136 |
|
|
|
137 |
|
|
/* This is the IEEE 754 double-precision format. */ |
138 |
|
|
struct { |
139 |
gbeauche |
1.3 |
#ifdef WORDS_BIGENDIAN |
140 |
gbeauche |
1.1 |
unsigned int negative:1; |
141 |
|
|
unsigned int exponent:11; |
142 |
|
|
/* Together these comprise the mantissa. */ |
143 |
|
|
unsigned int mantissa0:20; |
144 |
|
|
unsigned int mantissa1:32; |
145 |
gbeauche |
1.3 |
#else |
146 |
|
|
# if HOST_FLOAT_WORDS_BIG_ENDIAN |
147 |
gbeauche |
1.1 |
unsigned int mantissa0:20; |
148 |
|
|
unsigned int exponent:11; |
149 |
|
|
unsigned int negative:1; |
150 |
|
|
unsigned int mantissa1:32; |
151 |
|
|
# else |
152 |
|
|
/* Together these comprise the mantissa. */ |
153 |
|
|
unsigned int mantissa1:32; |
154 |
|
|
unsigned int mantissa0:20; |
155 |
|
|
unsigned int exponent:11; |
156 |
|
|
unsigned int negative:1; |
157 |
|
|
# endif |
158 |
gbeauche |
1.3 |
#endif |
159 |
gbeauche |
1.1 |
} ieee; |
160 |
|
|
|
161 |
|
|
/* This format makes it easier to see if a NaN is a signalling NaN. */ |
162 |
|
|
struct { |
163 |
gbeauche |
1.3 |
#ifdef WORDS_BIGENDIAN |
164 |
gbeauche |
1.1 |
unsigned int negative:1; |
165 |
|
|
unsigned int exponent:11; |
166 |
|
|
unsigned int quiet_nan:1; |
167 |
|
|
/* Together these comprise the mantissa. */ |
168 |
|
|
unsigned int mantissa0:19; |
169 |
|
|
unsigned int mantissa1:32; |
170 |
|
|
#else |
171 |
gbeauche |
1.3 |
# if HOST_FLOAT_WORDS_BIG_ENDIAN |
172 |
gbeauche |
1.1 |
unsigned int mantissa0:19; |
173 |
|
|
unsigned int quiet_nan:1; |
174 |
|
|
unsigned int exponent:11; |
175 |
|
|
unsigned int negative:1; |
176 |
|
|
unsigned int mantissa1:32; |
177 |
|
|
# else |
178 |
|
|
/* Together these comprise the mantissa. */ |
179 |
|
|
unsigned int mantissa1:32; |
180 |
|
|
unsigned int mantissa0:19; |
181 |
|
|
unsigned int quiet_nan:1; |
182 |
|
|
unsigned int exponent:11; |
183 |
|
|
unsigned int negative:1; |
184 |
|
|
# endif |
185 |
|
|
#endif |
186 |
|
|
} ieee_nan; |
187 |
|
|
|
188 |
gbeauche |
1.2 |
/* This format is used to extract the sign_exponent and mantissa parts only */ |
189 |
|
|
struct { |
190 |
gbeauche |
1.3 |
#if HOST_FLOAT_WORDS_BIG_ENDIAN |
191 |
gbeauche |
1.2 |
unsigned int msw:32; |
192 |
|
|
unsigned int lsw:32; |
193 |
gbeauche |
1.1 |
#else |
194 |
gbeauche |
1.2 |
unsigned int lsw:32; |
195 |
|
|
unsigned int msw:32; |
196 |
gbeauche |
1.1 |
#endif |
197 |
gbeauche |
1.2 |
} parts; |
198 |
|
|
}; |
199 |
gbeauche |
1.1 |
|
200 |
gbeauche |
1.2 |
#ifdef USE_LONG_DOUBLE |
201 |
gbeauche |
1.1 |
// IEEE-854 long double format |
202 |
|
|
union fpu_extended_shape { |
203 |
|
|
fpu_extended value; |
204 |
|
|
|
205 |
|
|
/* This is the IEEE 854 double-extended-precision format. */ |
206 |
|
|
struct { |
207 |
gbeauche |
1.3 |
#ifdef WORDS_BIGENDIAN |
208 |
gbeauche |
1.1 |
unsigned int negative:1; |
209 |
|
|
unsigned int exponent:15; |
210 |
|
|
unsigned int empty:16; |
211 |
|
|
unsigned int mantissa0:32; |
212 |
|
|
unsigned int mantissa1:32; |
213 |
gbeauche |
1.3 |
#else |
214 |
|
|
# if HOST_FLOAT_WORDS_BIG_ENDIAN |
215 |
gbeauche |
1.1 |
unsigned int exponent:15; |
216 |
|
|
unsigned int negative:1; |
217 |
|
|
unsigned int empty:16; |
218 |
|
|
unsigned int mantissa0:32; |
219 |
|
|
unsigned int mantissa1:32; |
220 |
|
|
# else |
221 |
|
|
unsigned int mantissa1:32; |
222 |
|
|
unsigned int mantissa0:32; |
223 |
|
|
unsigned int exponent:15; |
224 |
|
|
unsigned int negative:1; |
225 |
|
|
unsigned int empty:16; |
226 |
|
|
# endif |
227 |
|
|
#endif |
228 |
|
|
} ieee; |
229 |
|
|
|
230 |
|
|
/* This is for NaNs in the IEEE 854 double-extended-precision format. */ |
231 |
|
|
struct { |
232 |
gbeauche |
1.3 |
#ifdef WORDS_BIGENDIAN |
233 |
gbeauche |
1.1 |
unsigned int negative:1; |
234 |
|
|
unsigned int exponent:15; |
235 |
|
|
unsigned int empty:16; |
236 |
|
|
unsigned int one:1; |
237 |
|
|
unsigned int quiet_nan:1; |
238 |
|
|
unsigned int mantissa0:30; |
239 |
|
|
unsigned int mantissa1:32; |
240 |
gbeauche |
1.3 |
#else |
241 |
|
|
# if HOST_FLOAT_WORDS_BIG_ENDIAN |
242 |
gbeauche |
1.1 |
unsigned int exponent:15; |
243 |
|
|
unsigned int negative:1; |
244 |
|
|
unsigned int empty:16; |
245 |
|
|
unsigned int mantissa0:30; |
246 |
|
|
unsigned int quiet_nan:1; |
247 |
|
|
unsigned int one:1; |
248 |
|
|
unsigned int mantissa1:32; |
249 |
|
|
# else |
250 |
|
|
unsigned int mantissa1:32; |
251 |
|
|
unsigned int mantissa0:30; |
252 |
|
|
unsigned int quiet_nan:1; |
253 |
|
|
unsigned int one:1; |
254 |
|
|
unsigned int exponent:15; |
255 |
|
|
unsigned int negative:1; |
256 |
|
|
unsigned int empty:16; |
257 |
|
|
# endif |
258 |
|
|
#endif |
259 |
|
|
} ieee_nan; |
260 |
|
|
|
261 |
|
|
/* This format is used to extract the sign_exponent and mantissa parts only */ |
262 |
|
|
struct { |
263 |
gbeauche |
1.3 |
#if HOST_FLOAT_WORDS_BIG_ENDIAN |
264 |
gbeauche |
1.1 |
unsigned int sign_exponent:16; |
265 |
|
|
unsigned int empty:16; |
266 |
|
|
unsigned int msw:32; |
267 |
|
|
unsigned int lsw:32; |
268 |
|
|
#else |
269 |
|
|
unsigned int lsw:32; |
270 |
|
|
unsigned int msw:32; |
271 |
|
|
unsigned int sign_exponent:16; |
272 |
|
|
unsigned int empty:16; |
273 |
|
|
#endif |
274 |
|
|
} parts; |
275 |
|
|
}; |
276 |
gbeauche |
1.2 |
#endif |
277 |
|
|
|
278 |
|
|
#ifdef USE_QUAD_DOUBLE |
279 |
gbeauche |
1.1 |
// IEEE-854 quad double format |
280 |
|
|
union fpu_extended_shape { |
281 |
|
|
fpu_extended value; |
282 |
|
|
|
283 |
|
|
/* This is the IEEE 854 quad-precision format. */ |
284 |
|
|
struct { |
285 |
gbeauche |
1.3 |
#ifdef WORDS_BIGENDIAN |
286 |
gbeauche |
1.1 |
unsigned int negative:1; |
287 |
|
|
unsigned int exponent:15; |
288 |
|
|
unsigned int mantissa0:16; |
289 |
|
|
unsigned int mantissa1:32; |
290 |
|
|
unsigned int mantissa2:32; |
291 |
|
|
unsigned int mantissa3:32; |
292 |
|
|
#else |
293 |
|
|
unsigned int mantissa3:32; |
294 |
|
|
unsigned int mantissa2:32; |
295 |
|
|
unsigned int mantissa1:32; |
296 |
|
|
unsigned int mantissa0:16; |
297 |
|
|
unsigned int exponent:15; |
298 |
|
|
unsigned int negative:1; |
299 |
|
|
#endif |
300 |
|
|
} ieee; |
301 |
|
|
|
302 |
|
|
/* This is for NaNs in the IEEE 854 quad-precision format. */ |
303 |
|
|
struct { |
304 |
gbeauche |
1.3 |
#ifdef WORDS_BIGENDIAN |
305 |
gbeauche |
1.1 |
unsigned int negative:1; |
306 |
|
|
unsigned int exponent:15; |
307 |
|
|
unsigned int quiet_nan:1; |
308 |
|
|
unsigned int mantissa0:15; |
309 |
|
|
unsigned int mantissa1:30; |
310 |
|
|
unsigned int mantissa2:32; |
311 |
|
|
unsigned int mantissa3:32; |
312 |
|
|
#else |
313 |
|
|
unsigned int mantissa3:32; |
314 |
|
|
unsigned int mantissa2:32; |
315 |
|
|
unsigned int mantissa1:32; |
316 |
|
|
unsigned int mantissa0:15; |
317 |
|
|
unsigned int quiet_nan:1; |
318 |
|
|
unsigned int exponent:15; |
319 |
|
|
unsigned int negative:1; |
320 |
|
|
#endif |
321 |
|
|
} ieee_nan; |
322 |
|
|
|
323 |
|
|
/* This format is used to extract the sign_exponent and mantissa parts only */ |
324 |
gbeauche |
1.3 |
#if HOST_FLOAT_WORDS_BIG_ENDIAN |
325 |
gbeauche |
1.1 |
struct { |
326 |
|
|
uae_u64 msw; |
327 |
|
|
uae_u64 lsw; |
328 |
|
|
} parts64; |
329 |
|
|
struct { |
330 |
|
|
uae_u32 w0; |
331 |
|
|
uae_u32 w1; |
332 |
|
|
uae_u32 w2; |
333 |
|
|
uae_u32 w3; |
334 |
|
|
} parts32; |
335 |
|
|
#else |
336 |
|
|
struct { |
337 |
|
|
uae_u64 lsw; |
338 |
|
|
uae_u64 msw; |
339 |
|
|
} parts64; |
340 |
|
|
struct { |
341 |
|
|
uae_u32 w3; |
342 |
|
|
uae_u32 w2; |
343 |
|
|
uae_u32 w1; |
344 |
|
|
uae_u32 w0; |
345 |
|
|
} parts32; |
346 |
|
|
#endif |
347 |
|
|
}; |
348 |
gbeauche |
1.2 |
#endif |
349 |
gbeauche |
1.1 |
|
350 |
|
|
// Declare and initialize a pointer to a shape of the requested FP type |
351 |
|
|
#define fp_declare_init_shape(psvar, rfvar, ftype) \ |
352 |
|
|
fpu_ ## ftype ## _shape * psvar = (fpu_ ## ftype ## _shape *)( &rfvar ) |
353 |
|
|
|
354 |
|
|
/* -------------------------------------------------------------------------- */ |
355 |
|
|
/* --- Extra Math Functions --- */ |
356 |
|
|
/* --- (most of them had to be defined before including <fpu/flags.h>) --- */ |
357 |
|
|
/* -------------------------------------------------------------------------- */ |
358 |
|
|
|
359 |
|
|
#undef isnan |
360 |
|
|
#if 0 && defined(HAVE_ISNANL) |
361 |
|
|
# define isnan(x) isnanl((x)) |
362 |
|
|
#else |
363 |
|
|
# define isnan(x) fp_do_isnan((x)) |
364 |
|
|
#endif |
365 |
|
|
|
366 |
|
|
PRIVATE inline bool FFPU fp_do_isnan(fpu_register const & r) |
367 |
|
|
{ |
368 |
gbeauche |
1.2 |
#ifdef BRANCHES_ARE_EXPENSIVE |
369 |
|
|
#ifndef USE_LONG_DOUBLE |
370 |
|
|
fp_declare_init_shape(sxp, r, double); |
371 |
|
|
uae_s32 hx = sxp->parts.msw; |
372 |
|
|
uae_s32 lx = sxp->parts.lsw; |
373 |
|
|
hx &= 0x7fffffff; |
374 |
|
|
hx |= (uae_u32)(lx | (-lx)) >> 31; |
375 |
|
|
hx = 0x7ff00000 - hx; |
376 |
|
|
return (int)(((uae_u32)hx) >> 31); |
377 |
|
|
#elif USE_QUAD_DOUBLE |
378 |
gbeauche |
1.1 |
fp_declare_init_shape(sxp, r, extended); |
379 |
|
|
uae_s64 hx = sxp->parts64.msw; |
380 |
|
|
uae_s64 lx = sxp->parts64.lsw; |
381 |
|
|
hx &= 0x7fffffffffffffffLL; |
382 |
|
|
hx |= (uae_u64)(lx | (-lx)) >> 63; |
383 |
|
|
hx = 0x7fff000000000000LL - hx; |
384 |
|
|
return (int)((uae_u64)hx >> 63); |
385 |
|
|
#else |
386 |
gbeauche |
1.2 |
fp_declare_init_shape(sxp, r, extended); |
387 |
gbeauche |
1.1 |
uae_s32 se = sxp->parts.sign_exponent; |
388 |
|
|
uae_s32 hx = sxp->parts.msw; |
389 |
|
|
uae_s32 lx = sxp->parts.lsw; |
390 |
|
|
se = (se & 0x7fff) << 1; |
391 |
|
|
lx |= hx & 0x7fffffff; |
392 |
|
|
se |= (uae_u32)(lx | (-lx)) >> 31; |
393 |
|
|
se = 0xfffe - se; |
394 |
|
|
// TODO: check whether rshift count is 16 or 31 |
395 |
|
|
return (int)(((uae_u32)(se)) >> 16); |
396 |
|
|
#endif |
397 |
|
|
#else |
398 |
gbeauche |
1.5 |
#if USE_LONG_DOUBLE || USE_QUAD_DOUBLE |
399 |
|
|
fp_declare_init_shape(sxp, r, extended); |
400 |
|
|
return (sxp->ieee_nan.exponent == FP_EXTENDED_EXP_MAX) |
401 |
|
|
#else |
402 |
gbeauche |
1.2 |
fp_declare_init_shape(sxp, r, double); |
403 |
|
|
return (sxp->ieee_nan.exponent == FP_DOUBLE_EXP_MAX) |
404 |
|
|
#endif |
405 |
gbeauche |
1.1 |
&& (sxp->ieee_nan.mantissa0 != 0) |
406 |
|
|
&& (sxp->ieee_nan.mantissa1 != 0) |
407 |
|
|
#ifdef USE_QUAD_DOUBLE |
408 |
|
|
&& (sxp->ieee_nan.mantissa2 != 0) |
409 |
|
|
&& (sxp->ieee_nan.mantissa3 != 0) |
410 |
|
|
#endif |
411 |
|
|
; |
412 |
|
|
#endif |
413 |
|
|
} |
414 |
|
|
|
415 |
|
|
#undef isinf |
416 |
|
|
#if 0 && defined(HAVE_ISINFL) |
417 |
|
|
# define isinf(x) isinfl((x)) |
418 |
|
|
#else |
419 |
|
|
# define isinf(x) fp_do_isinf((x)) |
420 |
|
|
#endif |
421 |
|
|
|
422 |
|
|
PRIVATE inline bool FFPU fp_do_isinf(fpu_register const & r) |
423 |
|
|
{ |
424 |
gbeauche |
1.2 |
#ifdef BRANCHES_ARE_EXPENSIVE |
425 |
|
|
#ifndef USE_LONG_DOUBLE |
426 |
|
|
fp_declare_init_shape(sxp, r, double); |
427 |
|
|
uae_s32 hx = sxp->parts.msw; |
428 |
|
|
uae_s32 lx = sxp->parts.lsw; |
429 |
|
|
lx |= (hx & 0x7fffffff) ^ 0x7ff00000; |
430 |
|
|
lx |= -lx; |
431 |
|
|
return ~(lx >> 31) & (hx >> 30); |
432 |
|
|
#elif USE_QUAD_DOUBLE |
433 |
gbeauche |
1.1 |
fp_declare_init_shape(sxp, r, extended); |
434 |
|
|
uae_s64 hx = sxp->parts64.msw; |
435 |
|
|
uae_s64 lx = sxp->parts64.lsw; |
436 |
|
|
lx |= (hx & 0x7fffffffffffffffLL) ^ 0x7fff000000000000LL; |
437 |
|
|
lx |= -lx; |
438 |
|
|
return ~(lx >> 63) & (hx >> 62); |
439 |
|
|
#else |
440 |
gbeauche |
1.2 |
fp_declare_init_shape(sxp, r, extended); |
441 |
gbeauche |
1.1 |
uae_s32 se = sxp->parts.sign_exponent; |
442 |
|
|
uae_s32 hx = sxp->parts.msw; |
443 |
|
|
uae_s32 lx = sxp->parts.lsw; |
444 |
|
|
/* This additional ^ 0x80000000 is necessary because in Intel's |
445 |
|
|
internal representation of the implicit one is explicit. |
446 |
|
|
NOTE: anyway, this is equivalent to & 0x7fffffff in that case. */ |
447 |
|
|
#ifdef __i386__ |
448 |
|
|
lx |= (hx ^ 0x80000000) | ((se & 0x7fff) ^ 0x7fff); |
449 |
|
|
#else |
450 |
|
|
lx |= (hx & 0x7fffffff) | ((se & 0x7fff) ^ 0x7fff); |
451 |
|
|
#endif |
452 |
|
|
lx |= -lx; |
453 |
|
|
se &= 0x8000; |
454 |
|
|
return ~(lx >> 31) & (1 - (se >> 14)); |
455 |
|
|
#endif |
456 |
|
|
#else |
457 |
gbeauche |
1.5 |
#if USE_LONG_DOUBLE || USE_QUAD_DOUBLE |
458 |
|
|
fp_declare_init_shape(sxp, r, extended); |
459 |
|
|
return (sxp->ieee_nan.exponent == FP_EXTENDED_EXP_MAX) |
460 |
|
|
#else |
461 |
gbeauche |
1.2 |
fp_declare_init_shape(sxp, r, double); |
462 |
|
|
return (sxp->ieee_nan.exponent == FP_DOUBLE_EXP_MAX) |
463 |
|
|
#endif |
464 |
gbeauche |
1.1 |
&& (sxp->ieee_nan.mantissa0 == 0) |
465 |
|
|
&& (sxp->ieee_nan.mantissa1 == 0) |
466 |
|
|
#ifdef USE_QUAD_DOUBLE |
467 |
|
|
&& (sxp->ieee_nan.mantissa2 == 0) |
468 |
|
|
&& (sxp->ieee_nan.mantissa3 == 0) |
469 |
|
|
#endif |
470 |
|
|
; |
471 |
|
|
#endif |
472 |
|
|
} |
473 |
|
|
|
474 |
|
|
#undef isneg |
475 |
|
|
#define isneg(x) fp_do_isneg((x)) |
476 |
|
|
|
477 |
|
|
PRIVATE inline bool FFPU fp_do_isneg(fpu_register const & r) |
478 |
|
|
{ |
479 |
gbeauche |
1.5 |
#if USE_LONG_DOUBLE || USE_QUAD_DOUBLE |
480 |
|
|
fp_declare_init_shape(sxp, r, extended); |
481 |
|
|
#else |
482 |
gbeauche |
1.2 |
fp_declare_init_shape(sxp, r, double); |
483 |
|
|
#endif |
484 |
|
|
return sxp->ieee.negative; |
485 |
gbeauche |
1.1 |
} |
486 |
|
|
|
487 |
|
|
#undef iszero |
488 |
|
|
#define iszero(x) fp_do_iszero((x)) |
489 |
|
|
|
490 |
|
|
PRIVATE inline bool FFPU fp_do_iszero(fpu_register const & r) |
491 |
|
|
{ |
492 |
|
|
// TODO: BRANCHES_ARE_EXPENSIVE |
493 |
gbeauche |
1.5 |
#if USE_LONG_DOUBLE || USE_QUAD_DOUBLE |
494 |
|
|
fp_declare_init_shape(sxp, r, extended); |
495 |
|
|
#else |
496 |
gbeauche |
1.2 |
fp_declare_init_shape(sxp, r, double); |
497 |
|
|
#endif |
498 |
gbeauche |
1.1 |
return (sxp->ieee.exponent == 0) |
499 |
|
|
&& (sxp->ieee.mantissa0 == 0) |
500 |
|
|
&& (sxp->ieee.mantissa1 == 0) |
501 |
|
|
#ifdef USE_QUAD_DOUBLE |
502 |
|
|
&& (sxp->ieee.mantissa2 == 0) |
503 |
|
|
&& (sxp->ieee.mantissa3 == 0) |
504 |
|
|
#endif |
505 |
|
|
; |
506 |
|
|
} |
507 |
|
|
|
508 |
|
|
PRIVATE inline void FFPU get_dest_flags(fpu_register const & r) |
509 |
|
|
{ |
510 |
|
|
fl_dest.negative = isneg(r); |
511 |
|
|
fl_dest.zero = iszero(r); |
512 |
|
|
fl_dest.infinity = isinf(r); |
513 |
|
|
fl_dest.nan = isnan(r); |
514 |
|
|
fl_dest.in_range = !fl_dest.zero && !fl_dest.infinity && !fl_dest.nan; |
515 |
|
|
} |
516 |
|
|
|
517 |
|
|
PRIVATE inline void FFPU get_source_flags(fpu_register const & r) |
518 |
|
|
{ |
519 |
|
|
fl_source.negative = isneg(r); |
520 |
|
|
fl_source.zero = iszero(r); |
521 |
|
|
fl_source.infinity = isinf(r); |
522 |
|
|
fl_source.nan = isnan(r); |
523 |
|
|
fl_source.in_range = !fl_source.zero && !fl_source.infinity && !fl_source.nan; |
524 |
|
|
} |
525 |
|
|
|
526 |
|
|
PRIVATE inline void FFPU make_nan(fpu_register & r) |
527 |
|
|
{ |
528 |
|
|
// FIXME: is that correct ? |
529 |
gbeauche |
1.5 |
#if USE_LONG_DOUBLE || USE_QUAD_DOUBLE |
530 |
|
|
fp_declare_init_shape(sxp, r, extended); |
531 |
|
|
sxp->ieee.exponent = FP_EXTENDED_EXP_MAX; |
532 |
|
|
sxp->ieee.mantissa0 = 0xffffffff; |
533 |
|
|
#else |
534 |
gbeauche |
1.2 |
fp_declare_init_shape(sxp, r, double); |
535 |
|
|
sxp->ieee.exponent = FP_DOUBLE_EXP_MAX; |
536 |
|
|
sxp->ieee.mantissa0 = 0xfffff; |
537 |
|
|
#endif |
538 |
gbeauche |
1.1 |
sxp->ieee.mantissa1 = 0xffffffff; |
539 |
|
|
#ifdef USE_QUAD_DOUBLE |
540 |
|
|
sxp->ieee.mantissa2 = 0xffffffff; |
541 |
|
|
sxp->ieee.mantissa3 = 0xffffffff; |
542 |
|
|
#endif |
543 |
|
|
} |
544 |
|
|
|
545 |
|
|
PRIVATE inline void FFPU make_zero_positive(fpu_register & r) |
546 |
|
|
{ |
547 |
|
|
#if 1 |
548 |
|
|
r = +0.0; |
549 |
|
|
#else |
550 |
gbeauche |
1.5 |
#if USE_LONG_DOUBLE || USE_QUAD_DOUBLE |
551 |
|
|
fp_declare_init_shape(sxp, r, extended); |
552 |
|
|
#else |
553 |
gbeauche |
1.2 |
fp_declare_init_shape(sxp, r, double); |
554 |
|
|
#endif |
555 |
gbeauche |
1.1 |
sxp->ieee.negative = 0; |
556 |
|
|
sxp->ieee.exponent = 0; |
557 |
|
|
sxp->ieee.mantissa0 = 0; |
558 |
|
|
sxp->ieee.mantissa1 = 0; |
559 |
|
|
#ifdef USE_QUAD_DOUBLE |
560 |
|
|
sxp->ieee.mantissa2 = 0; |
561 |
|
|
sxp->ieee.mantissa3 = 0; |
562 |
|
|
#endif |
563 |
|
|
#endif |
564 |
|
|
} |
565 |
|
|
|
566 |
|
|
PRIVATE inline void FFPU make_zero_negative(fpu_register & r) |
567 |
|
|
{ |
568 |
|
|
#if 1 |
569 |
|
|
r = -0.0; |
570 |
|
|
#else |
571 |
gbeauche |
1.5 |
#if USE_LONG_DOUBLE || USE_QUAD_DOUBLE |
572 |
|
|
fp_declare_init_shape(sxp, r, extended); |
573 |
|
|
#else |
574 |
gbeauche |
1.2 |
fp_declare_init_shape(sxp, r, double); |
575 |
|
|
#endif |
576 |
gbeauche |
1.1 |
sxp->ieee.negative = 1; |
577 |
|
|
sxp->ieee.exponent = 0; |
578 |
|
|
sxp->ieee.mantissa0 = 0; |
579 |
|
|
sxp->ieee.mantissa1 = 0; |
580 |
|
|
#ifdef USE_QUAD_DOUBLE |
581 |
|
|
sxp->ieee.mantissa2 = 0; |
582 |
|
|
sxp->ieee.mantissa3 = 0; |
583 |
|
|
#endif |
584 |
|
|
#endif |
585 |
|
|
} |
586 |
|
|
|
587 |
|
|
PRIVATE inline void FFPU make_inf_positive(fpu_register & r) |
588 |
|
|
{ |
589 |
gbeauche |
1.5 |
#if USE_LONG_DOUBLE || USE_QUAD_DOUBLE |
590 |
|
|
fp_declare_init_shape(sxp, r, extended); |
591 |
|
|
sxp->ieee_nan.exponent = FP_EXTENDED_EXP_MAX; |
592 |
|
|
#else |
593 |
gbeauche |
1.2 |
fp_declare_init_shape(sxp, r, double); |
594 |
|
|
sxp->ieee_nan.exponent = FP_DOUBLE_EXP_MAX; |
595 |
|
|
#endif |
596 |
gbeauche |
1.1 |
sxp->ieee_nan.negative = 0; |
597 |
|
|
sxp->ieee_nan.mantissa0 = 0; |
598 |
|
|
sxp->ieee_nan.mantissa1 = 0; |
599 |
|
|
#ifdef USE_QUAD_DOUBLE |
600 |
|
|
sxp->ieee_nan.mantissa2 = 0; |
601 |
|
|
sxp->ieee_nan.mantissa3 = 0; |
602 |
|
|
#endif |
603 |
|
|
} |
604 |
|
|
|
605 |
|
|
PRIVATE inline void FFPU make_inf_negative(fpu_register & r) |
606 |
|
|
{ |
607 |
gbeauche |
1.5 |
#if USE_LONG_DOUBLE || USE_QUAD_DOUBLE |
608 |
|
|
fp_declare_init_shape(sxp, r, extended); |
609 |
|
|
sxp->ieee_nan.exponent = FP_EXTENDED_EXP_MAX; |
610 |
|
|
#else |
611 |
gbeauche |
1.2 |
fp_declare_init_shape(sxp, r, double); |
612 |
|
|
sxp->ieee_nan.exponent = FP_DOUBLE_EXP_MAX; |
613 |
|
|
#endif |
614 |
gbeauche |
1.1 |
sxp->ieee_nan.negative = 1; |
615 |
|
|
sxp->ieee_nan.mantissa0 = 0; |
616 |
|
|
sxp->ieee_nan.mantissa1 = 0; |
617 |
|
|
#ifdef USE_QUAD_DOUBLE |
618 |
|
|
sxp->ieee_nan.mantissa2 = 0; |
619 |
|
|
sxp->ieee_nan.mantissa3 = 0; |
620 |
|
|
#endif |
621 |
|
|
} |
622 |
|
|
|
623 |
|
|
PRIVATE inline fpu_register FFPU fast_fgetexp(fpu_register const & r) |
624 |
|
|
{ |
625 |
gbeauche |
1.5 |
#if USE_LONG_DOUBLE || USE_QUAD_DOUBLE |
626 |
|
|
fp_declare_init_shape(sxp, r, extended); |
627 |
|
|
return (sxp->ieee.exponent - FP_EXTENDED_EXP_BIAS); |
628 |
|
|
#else |
629 |
gbeauche |
1.2 |
fp_declare_init_shape(sxp, r, double); |
630 |
|
|
return (sxp->ieee.exponent - FP_DOUBLE_EXP_BIAS); |
631 |
|
|
#endif |
632 |
gbeauche |
1.1 |
} |
633 |
|
|
|
634 |
|
|
// Normalize to range 1..2 |
635 |
|
|
PRIVATE inline void FFPU fast_remove_exponent(fpu_register & r) |
636 |
|
|
{ |
637 |
gbeauche |
1.5 |
#if USE_LONG_DOUBLE || USE_QUAD_DOUBLE |
638 |
|
|
fp_declare_init_shape(sxp, r, extended); |
639 |
|
|
sxp->ieee.exponent = FP_EXTENDED_EXP_BIAS; |
640 |
|
|
#else |
641 |
gbeauche |
1.2 |
fp_declare_init_shape(sxp, r, double); |
642 |
|
|
sxp->ieee.exponent = FP_DOUBLE_EXP_BIAS; |
643 |
|
|
#endif |
644 |
gbeauche |
1.1 |
} |
645 |
|
|
|
646 |
|
|
// The sign of the quotient is the exclusive-OR of the sign bits |
647 |
|
|
// of the source and destination operands. |
648 |
|
|
PRIVATE inline uae_u32 FFPU get_quotient_sign(fpu_register const & ra, fpu_register const & rb) |
649 |
|
|
{ |
650 |
gbeauche |
1.5 |
#if USE_LONG_DOUBLE || USE_QUAD_DOUBLE |
651 |
|
|
fp_declare_init_shape(sap, ra, extended); |
652 |
|
|
fp_declare_init_shape(sbp, rb, extended); |
653 |
|
|
#else |
654 |
gbeauche |
1.2 |
fp_declare_init_shape(sap, ra, double); |
655 |
|
|
fp_declare_init_shape(sbp, rb, double); |
656 |
|
|
#endif |
657 |
|
|
return ((sap->ieee.negative ^ sbp->ieee.negative) ? FPSR_QUOTIENT_SIGN : 0); |
658 |
gbeauche |
1.1 |
} |
659 |
|
|
|
660 |
|
|
/* -------------------------------------------------------------------------- */ |
661 |
|
|
/* --- Math functions --- */ |
662 |
|
|
/* -------------------------------------------------------------------------- */ |
663 |
|
|
|
664 |
gbeauche |
1.5 |
#if FPU_USE_ISO_C99 && (USE_LONG_DOUBLE || USE_QUAD_DOUBLE) |
665 |
gbeauche |
1.4 |
# ifdef HAVE_LOGL |
666 |
|
|
# define fp_log logl |
667 |
|
|
# endif |
668 |
|
|
# ifdef HAVE_LOG10L |
669 |
|
|
# define fp_log10 log10l |
670 |
|
|
# endif |
671 |
|
|
# ifdef HAVE_EXPL |
672 |
|
|
# define fp_exp expl |
673 |
|
|
# endif |
674 |
|
|
# ifdef HAVE_POWL |
675 |
|
|
# define fp_pow powl |
676 |
|
|
# endif |
677 |
|
|
# ifdef HAVE_FABSL |
678 |
|
|
# define fp_fabs fabsl |
679 |
|
|
# endif |
680 |
|
|
# ifdef HAVE_SQRTL |
681 |
|
|
# define fp_sqrt sqrtl |
682 |
|
|
# endif |
683 |
|
|
# ifdef HAVE_SINL |
684 |
|
|
# define fp_sin sinl |
685 |
|
|
# endif |
686 |
|
|
# ifdef HAVE_COSL |
687 |
|
|
# define fp_cos cosl |
688 |
|
|
# endif |
689 |
|
|
# ifdef HAVE_TANL |
690 |
|
|
# define fp_tan tanl |
691 |
|
|
# endif |
692 |
|
|
# ifdef HAVE_SINHL |
693 |
|
|
# define fp_sinh sinhl |
694 |
|
|
# endif |
695 |
|
|
# ifdef HAVE_COSHL |
696 |
|
|
# define fp_cosh coshl |
697 |
|
|
# endif |
698 |
|
|
# ifdef HAVE_TANHL |
699 |
|
|
# define fp_tanh tanhl |
700 |
|
|
# endif |
701 |
|
|
# ifdef HAVE_ASINL |
702 |
|
|
# define fp_asin asinl |
703 |
|
|
# endif |
704 |
|
|
# ifdef HAVE_ACOSL |
705 |
|
|
# define fp_acos acosl |
706 |
|
|
# endif |
707 |
|
|
# ifdef HAVE_ATANL |
708 |
|
|
# define fp_atan atanl |
709 |
|
|
# endif |
710 |
|
|
# ifdef HAVE_ASINHL |
711 |
|
|
# define fp_asinh asinhl |
712 |
|
|
# endif |
713 |
|
|
# ifdef HAVE_ACOSHL |
714 |
|
|
# define fp_acosh acoshl |
715 |
|
|
# endif |
716 |
|
|
# ifdef HAVE_ATANHL |
717 |
|
|
# define fp_atanh atanhl |
718 |
|
|
# endif |
719 |
|
|
# ifdef HAVE_FLOORL |
720 |
|
|
# define fp_floor floorl |
721 |
|
|
# endif |
722 |
|
|
# ifdef HAVE_CEILL |
723 |
|
|
# define fp_ceil ceill |
724 |
|
|
# endif |
725 |
|
|
#endif |
726 |
|
|
|
727 |
|
|
#ifndef fp_log |
728 |
gbeauche |
1.1 |
# define fp_log log |
729 |
gbeauche |
1.4 |
#endif |
730 |
|
|
#ifndef fp_log10 |
731 |
gbeauche |
1.1 |
# define fp_log10 log10 |
732 |
gbeauche |
1.4 |
#endif |
733 |
|
|
#ifndef fp_exp |
734 |
gbeauche |
1.1 |
# define fp_exp exp |
735 |
gbeauche |
1.4 |
#endif |
736 |
|
|
#ifndef fp_pow |
737 |
gbeauche |
1.1 |
# define fp_pow pow |
738 |
gbeauche |
1.4 |
#endif |
739 |
|
|
#ifndef fp_fabs |
740 |
gbeauche |
1.1 |
# define fp_fabs fabs |
741 |
gbeauche |
1.4 |
#endif |
742 |
|
|
#ifndef fp_sqrt |
743 |
gbeauche |
1.1 |
# define fp_sqrt sqrt |
744 |
gbeauche |
1.4 |
#endif |
745 |
|
|
#ifndef fp_sin |
746 |
gbeauche |
1.1 |
# define fp_sin sin |
747 |
gbeauche |
1.4 |
#endif |
748 |
|
|
#ifndef fp_cos |
749 |
gbeauche |
1.1 |
# define fp_cos cos |
750 |
gbeauche |
1.4 |
#endif |
751 |
|
|
#ifndef fp_tan |
752 |
gbeauche |
1.1 |
# define fp_tan tan |
753 |
gbeauche |
1.4 |
#endif |
754 |
|
|
#ifndef fp_sinh |
755 |
gbeauche |
1.1 |
# define fp_sinh sinh |
756 |
gbeauche |
1.4 |
#endif |
757 |
|
|
#ifndef fp_cosh |
758 |
gbeauche |
1.1 |
# define fp_cosh cosh |
759 |
gbeauche |
1.4 |
#endif |
760 |
|
|
#ifndef fp_tanh |
761 |
gbeauche |
1.1 |
# define fp_tanh tanh |
762 |
gbeauche |
1.4 |
#endif |
763 |
|
|
#ifndef fp_asin |
764 |
gbeauche |
1.1 |
# define fp_asin asin |
765 |
gbeauche |
1.4 |
#endif |
766 |
|
|
#ifndef fp_acos |
767 |
gbeauche |
1.1 |
# define fp_acos acos |
768 |
gbeauche |
1.4 |
#endif |
769 |
|
|
#ifndef fp_atan |
770 |
gbeauche |
1.1 |
# define fp_atan atan |
771 |
gbeauche |
1.4 |
#endif |
772 |
|
|
#ifndef fp_asinh |
773 |
gbeauche |
1.1 |
# define fp_asinh asinh |
774 |
gbeauche |
1.4 |
#endif |
775 |
|
|
#ifndef fp_acosh |
776 |
gbeauche |
1.1 |
# define fp_acosh acosh |
777 |
gbeauche |
1.4 |
#endif |
778 |
|
|
#ifndef fp_atanh |
779 |
gbeauche |
1.1 |
# define fp_atanh atanh |
780 |
gbeauche |
1.4 |
#endif |
781 |
|
|
#ifndef fp_floor |
782 |
gbeauche |
1.1 |
# define fp_floor floor |
783 |
gbeauche |
1.4 |
#endif |
784 |
|
|
#ifndef fp_ceil |
785 |
gbeauche |
1.1 |
# define fp_ceil ceil |
786 |
|
|
#endif |
787 |
|
|
|
788 |
|
|
#if defined(FPU_IEEE) && defined(X86_ASSEMBLY) |
789 |
|
|
// Assembly optimized support functions. Taken from glibc 2.2.2 |
790 |
|
|
|
791 |
|
|
#undef fp_log |
792 |
|
|
#define fp_log fp_do_log |
793 |
|
|
|
794 |
|
|
#ifndef FPU_FAST_MATH |
795 |
|
|
// FIXME: unimplemented |
796 |
|
|
PRIVATE fpu_extended fp_do_log(fpu_extended x); |
797 |
|
|
#else |
798 |
|
|
PRIVATE inline fpu_extended fp_do_log(fpu_extended x) |
799 |
|
|
{ |
800 |
|
|
fpu_extended value; |
801 |
|
|
__asm__ __volatile__("fldln2; fxch; fyl2x" : "=t" (value) : "0" (x) : "st(1)"); |
802 |
|
|
return value; |
803 |
|
|
} |
804 |
|
|
#endif |
805 |
|
|
|
806 |
|
|
#undef fp_log10 |
807 |
|
|
#define fp_log10 fp_do_log10 |
808 |
|
|
|
809 |
|
|
#ifndef FPU_FAST_MATH |
810 |
|
|
// FIXME: unimplemented |
811 |
|
|
PRIVATE fpu_extended fp_do_log10(fpu_extended x); |
812 |
|
|
#else |
813 |
|
|
PRIVATE inline fpu_extended fp_do_log10(fpu_extended x) |
814 |
|
|
{ |
815 |
|
|
fpu_extended value; |
816 |
|
|
__asm__ __volatile__("fldlg2; fxch; fyl2x" : "=t" (value) : "0" (x) : "st(1)"); |
817 |
|
|
return value; |
818 |
|
|
} |
819 |
|
|
#endif |
820 |
|
|
|
821 |
|
|
#undef fp_exp |
822 |
|
|
#define fp_exp fp_do_exp |
823 |
|
|
|
824 |
|
|
#ifndef FPU_FAST_MATH |
825 |
|
|
// FIXME: unimplemented |
826 |
|
|
PRIVATE fpu_extended fp_do_exp(fpu_extended x); |
827 |
|
|
#else |
828 |
|
|
PRIVATE inline fpu_extended fp_do_exp(fpu_extended x) |
829 |
|
|
{ |
830 |
|
|
fpu_extended value, exponent; |
831 |
|
|
__asm__ __volatile__("fldl2e # e^x = 2^(x * log2(e))\n\t" |
832 |
|
|
"fmul %%st(1) # x * log2(e)\n\t" |
833 |
|
|
"fst %%st(1)\n\t" |
834 |
|
|
"frndint # int(x * log2(e))\n\t" |
835 |
|
|
"fxch\n\t" |
836 |
|
|
"fsub %%st(1) # fract(x * log2(e))\n\t" |
837 |
|
|
"f2xm1 # 2^(fract(x * log2(e))) - 1\n\t" |
838 |
|
|
: "=t" (value), "=u" (exponent) : "0" (x)); |
839 |
|
|
value += 1.0; |
840 |
|
|
__asm__ __volatile__("fscale" : "=t" (value) : "0" (value), "u" (exponent)); |
841 |
|
|
return value; |
842 |
|
|
} |
843 |
|
|
#endif |
844 |
|
|
|
845 |
|
|
#undef fp_pow |
846 |
|
|
#define fp_pow fp_do_pow |
847 |
|
|
|
848 |
|
|
PRIVATE fpu_extended fp_do_pow(fpu_extended x, fpu_extended y); |
849 |
|
|
|
850 |
|
|
#undef fp_fabs |
851 |
|
|
#define fp_fabs fp_do_fabs |
852 |
|
|
|
853 |
|
|
PRIVATE inline fpu_extended fp_do_fabs(fpu_extended x) |
854 |
|
|
{ |
855 |
|
|
fpu_extended value; |
856 |
|
|
__asm__ __volatile__("fabs" : "=t" (value) : "0" (x)); |
857 |
|
|
return value; |
858 |
|
|
} |
859 |
|
|
|
860 |
|
|
#undef fp_sqrt |
861 |
|
|
#define fp_sqrt fp_do_sqrt |
862 |
|
|
|
863 |
|
|
PRIVATE inline fpu_extended fp_do_sqrt(fpu_extended x) |
864 |
|
|
{ |
865 |
|
|
fpu_extended value; |
866 |
|
|
__asm__ __volatile__("fsqrt" : "=t" (value) : "0" (x)); |
867 |
|
|
return value; |
868 |
|
|
} |
869 |
|
|
|
870 |
|
|
#undef fp_sin |
871 |
|
|
#define fp_sin fp_do_sin |
872 |
|
|
|
873 |
|
|
PRIVATE inline fpu_extended fp_do_sin(fpu_extended x) |
874 |
|
|
{ |
875 |
|
|
fpu_extended value; |
876 |
|
|
__asm__ __volatile__("fsin" : "=t" (value) : "0" (x)); |
877 |
|
|
return value; |
878 |
|
|
} |
879 |
|
|
|
880 |
|
|
#undef fp_cos |
881 |
|
|
#define fp_cos fp_do_cos |
882 |
|
|
|
883 |
|
|
PRIVATE inline fpu_extended fp_do_cos(fpu_extended x) |
884 |
|
|
{ |
885 |
|
|
fpu_extended value; |
886 |
|
|
__asm__ __volatile__("fcos" : "=t" (value) : "0" (x)); |
887 |
|
|
return value; |
888 |
|
|
} |
889 |
|
|
|
890 |
|
|
#undef fp_tan |
891 |
|
|
#define fp_tan fp_do_tan |
892 |
|
|
|
893 |
|
|
PRIVATE inline fpu_extended fp_do_tan(fpu_extended x) |
894 |
|
|
{ |
895 |
|
|
fpu_extended value; |
896 |
|
|
__asm__ __volatile__("fptan" : "=t" (value) : "0" (x)); |
897 |
|
|
return value; |
898 |
|
|
} |
899 |
|
|
|
900 |
|
|
#undef fp_expm1 |
901 |
|
|
#define fp_expm1 fp_do_expm1 |
902 |
|
|
|
903 |
|
|
// Returns: exp(X) - 1.0 |
904 |
|
|
PRIVATE inline fpu_extended fp_do_expm1(fpu_extended x) |
905 |
|
|
{ |
906 |
|
|
fpu_extended value, exponent, temp; |
907 |
|
|
__asm__ __volatile__("fldl2e # e^x - 1 = 2^(x * log2(e)) - 1\n\t" |
908 |
|
|
"fmul %%st(1) # x * log2(e)\n\t" |
909 |
|
|
"fst %%st(1)\n\t" |
910 |
|
|
"frndint # int(x * log2(e))\n\t" |
911 |
|
|
"fxch\n\t" |
912 |
|
|
"fsub %%st(1) # fract(x * log2(e))\n\t" |
913 |
|
|
"f2xm1 # 2^(fract(x * log2(e))) - 1\n\t" |
914 |
|
|
"fscale # 2^(x * log2(e)) - 2^(int(x * log2(e)))\n\t" |
915 |
|
|
: "=t" (value), "=u" (exponent) : "0" (x)); |
916 |
|
|
__asm__ __volatile__("fscale" : "=t" (temp) : "0" (1.0), "u" (exponent)); |
917 |
|
|
temp -= 1.0; |
918 |
|
|
return temp + value ? temp + value : x; |
919 |
|
|
} |
920 |
|
|
|
921 |
|
|
#undef fp_sgn1 |
922 |
|
|
#define fp_sgn1 fp_do_sgn1 |
923 |
|
|
|
924 |
|
|
PRIVATE inline fpu_extended fp_do_sgn1(fpu_extended x) |
925 |
|
|
{ |
926 |
|
|
fp_declare_init_shape(sxp, x, extended); |
927 |
|
|
sxp->ieee_nan.exponent = FP_EXTENDED_EXP_MAX; |
928 |
|
|
sxp->ieee_nan.one = 1; |
929 |
|
|
sxp->ieee_nan.quiet_nan = 0; |
930 |
|
|
sxp->ieee_nan.mantissa0 = 0; |
931 |
|
|
sxp->ieee_nan.mantissa1 = 0; |
932 |
|
|
return x; |
933 |
|
|
} |
934 |
|
|
|
935 |
|
|
#undef fp_sinh |
936 |
|
|
#define fp_sinh fp_do_sinh |
937 |
|
|
|
938 |
|
|
#ifndef FPU_FAST_MATH |
939 |
|
|
// FIXME: unimplemented |
940 |
|
|
PRIVATE fpu_extended fp_do_sinh(fpu_extended x); |
941 |
|
|
#else |
942 |
|
|
PRIVATE inline fpu_extended fp_do_sinh(fpu_extended x) |
943 |
|
|
{ |
944 |
|
|
fpu_extended exm1 = fp_expm1(fp_fabs(x)); |
945 |
|
|
return 0.5 * (exm1 / (exm1 + 1.0) + exm1) * fp_sgn1(x); |
946 |
|
|
} |
947 |
|
|
#endif |
948 |
|
|
|
949 |
|
|
#undef fp_cosh |
950 |
|
|
#define fp_cosh fp_do_cosh |
951 |
|
|
|
952 |
|
|
#ifndef FPU_FAST_MATH |
953 |
|
|
// FIXME: unimplemented |
954 |
|
|
PRIVATE fpu_extended fp_do_cosh(fpu_extended x); |
955 |
|
|
#else |
956 |
|
|
PRIVATE inline fpu_extended fp_do_cosh(fpu_extended x) |
957 |
|
|
{ |
958 |
|
|
fpu_extended ex = fp_exp(x); |
959 |
|
|
return 0.5 * (ex + 1.0 / ex); |
960 |
|
|
} |
961 |
|
|
#endif |
962 |
|
|
|
963 |
|
|
#undef fp_tanh |
964 |
|
|
#define fp_tanh fp_do_tanh |
965 |
|
|
|
966 |
|
|
#ifndef FPU_FAST_MATH |
967 |
|
|
// FIXME: unimplemented |
968 |
|
|
PRIVATE fpu_extended fp_do_tanh(fpu_extended x); |
969 |
|
|
#else |
970 |
|
|
PRIVATE inline fpu_extended fp_do_tanh(fpu_extended x) |
971 |
|
|
{ |
972 |
|
|
fpu_extended exm1 = fp_expm1(-fp_fabs(x + x)); |
973 |
|
|
return exm1 / (exm1 + 2.0) * fp_sgn1(-x); |
974 |
|
|
} |
975 |
|
|
#endif |
976 |
|
|
|
977 |
|
|
#undef fp_atan2 |
978 |
|
|
#define fp_atan2 fp_do_atan2 |
979 |
|
|
|
980 |
|
|
PRIVATE inline fpu_extended fp_do_atan2(fpu_extended y, fpu_extended x) |
981 |
|
|
{ |
982 |
|
|
fpu_extended value; |
983 |
|
|
__asm__ __volatile__("fpatan" : "=t" (value) : "0" (x), "u" (y) : "st(1)"); |
984 |
|
|
return value; |
985 |
|
|
} |
986 |
|
|
|
987 |
|
|
#undef fp_asin |
988 |
|
|
#define fp_asin fp_do_asin |
989 |
|
|
|
990 |
|
|
PRIVATE inline fpu_extended fp_do_asin(fpu_extended x) |
991 |
|
|
{ |
992 |
|
|
return fp_atan2(x, fp_sqrt(1.0 - x * x)); |
993 |
|
|
} |
994 |
|
|
|
995 |
|
|
#undef fp_acos |
996 |
|
|
#define fp_acos fp_do_acos |
997 |
|
|
|
998 |
|
|
PRIVATE inline fpu_extended fp_do_acos(fpu_extended x) |
999 |
|
|
{ |
1000 |
|
|
return fp_atan2(fp_sqrt(1.0 - x * x), x); |
1001 |
|
|
} |
1002 |
|
|
|
1003 |
|
|
#undef fp_atan |
1004 |
|
|
#define fp_atan fp_do_atan |
1005 |
|
|
|
1006 |
|
|
PRIVATE inline fpu_extended fp_do_atan(fpu_extended x) |
1007 |
|
|
{ |
1008 |
|
|
fpu_extended value; |
1009 |
|
|
__asm__ __volatile__("fld1; fpatan" : "=t" (value) : "0" (x) : "st(1)"); |
1010 |
|
|
return value; |
1011 |
|
|
} |
1012 |
|
|
|
1013 |
|
|
#undef fp_log1p |
1014 |
|
|
#define fp_log1p fp_do_log1p |
1015 |
|
|
|
1016 |
|
|
// Returns: ln(1.0 + X) |
1017 |
|
|
PRIVATE fpu_extended fp_do_log1p(fpu_extended x); |
1018 |
|
|
|
1019 |
|
|
#undef fp_asinh |
1020 |
|
|
#define fp_asinh fp_do_asinh |
1021 |
|
|
|
1022 |
|
|
PRIVATE inline fpu_extended fp_do_asinh(fpu_extended x) |
1023 |
|
|
{ |
1024 |
|
|
fpu_extended y = fp_fabs(x); |
1025 |
|
|
return (fp_log1p(y * y / (fp_sqrt(y * y + 1.0) + 1.0) + y) * fp_sgn1(x)); |
1026 |
|
|
} |
1027 |
|
|
|
1028 |
|
|
#undef fp_acosh |
1029 |
|
|
#define fp_acosh fp_do_acosh |
1030 |
|
|
|
1031 |
|
|
PRIVATE inline fpu_extended fp_do_acosh(fpu_extended x) |
1032 |
|
|
{ |
1033 |
|
|
return fp_log(x + fp_sqrt(x - 1.0) * fp_sqrt(x + 1.0)); |
1034 |
|
|
} |
1035 |
|
|
|
1036 |
|
|
#undef fp_atanh |
1037 |
|
|
#define fp_atanh fp_do_atanh |
1038 |
|
|
|
1039 |
|
|
PRIVATE inline fpu_extended fp_do_atanh(fpu_extended x) |
1040 |
|
|
{ |
1041 |
|
|
fpu_extended y = fp_fabs(x); |
1042 |
|
|
return -0.5 * fp_log1p(-(y + y) / (1.0 + y)) * fp_sgn1(x); |
1043 |
|
|
} |
1044 |
|
|
|
1045 |
|
|
#undef fp_floor |
1046 |
|
|
#define fp_floor fp_do_floor |
1047 |
|
|
|
1048 |
|
|
PRIVATE inline fpu_extended fp_do_floor(fpu_extended x) |
1049 |
|
|
{ |
1050 |
|
|
volatile unsigned int cw; |
1051 |
|
|
__asm__ __volatile__("fnstcw %0" : "=m" (cw)); |
1052 |
|
|
volatile unsigned int cw_temp = (cw & 0xf3ff) | 0x0400; // rounding down |
1053 |
|
|
__asm__ __volatile__("fldcw %0" : : "m" (cw_temp)); |
1054 |
|
|
fpu_extended value; |
1055 |
|
|
__asm__ __volatile__("frndint" : "=t" (value) : "0" (x)); |
1056 |
|
|
__asm__ __volatile__("fldcw %0" : : "m" (cw)); |
1057 |
|
|
return value; |
1058 |
|
|
} |
1059 |
|
|
|
1060 |
|
|
#undef fp_ceil |
1061 |
|
|
#define fp_ceil fp_do_ceil |
1062 |
|
|
|
1063 |
|
|
PRIVATE inline fpu_extended fp_do_ceil(fpu_extended x) |
1064 |
|
|
{ |
1065 |
|
|
volatile unsigned int cw; |
1066 |
|
|
__asm__ __volatile__("fnstcw %0" : "=m" (cw)); |
1067 |
|
|
volatile unsigned int cw_temp = (cw & 0xf3ff) | 0x0800; // rounding up |
1068 |
|
|
__asm__ __volatile__("fldcw %0" : : "m" (cw_temp)); |
1069 |
|
|
fpu_extended value; |
1070 |
|
|
__asm__ __volatile__("frndint" : "=t" (value) : "0" (x)); |
1071 |
|
|
__asm__ __volatile__("fldcw %0" : : "m" (cw)); |
1072 |
|
|
return value; |
1073 |
|
|
} |
1074 |
|
|
|
1075 |
|
|
#define DEFINE_ROUND_FUNC(rounding_mode_str, rounding_mode) \ |
1076 |
|
|
PRIVATE inline fpu_extended fp_do_round_to_ ## rounding_mode_str(fpu_extended x) \ |
1077 |
|
|
{ \ |
1078 |
|
|
volatile unsigned int cw; \ |
1079 |
|
|
__asm__ __volatile__("fnstcw %0" : "=m" (cw)); \ |
1080 |
|
|
volatile unsigned int cw_temp = (cw & 0xf3ff) | (rounding_mode); \ |
1081 |
|
|
__asm__ __volatile__("fldcw %0" : : "m" (cw_temp)); \ |
1082 |
|
|
fpu_extended value; \ |
1083 |
|
|
__asm__ __volatile__("frndint" : "=t" (value) : "0" (x)); \ |
1084 |
|
|
__asm__ __volatile__("fldcw %0" : : "m" (cw)); \ |
1085 |
|
|
return value; \ |
1086 |
|
|
} |
1087 |
|
|
|
1088 |
|
|
#undef fp_round_to_minus_infinity |
1089 |
|
|
#define fp_round_to_minus_infinity fp_do_round_to_minus_infinity |
1090 |
|
|
|
1091 |
|
|
DEFINE_ROUND_FUNC(minus_infinity, 0x400) |
1092 |
|
|
|
1093 |
|
|
#undef fp_round_to_plus_infinity |
1094 |
|
|
#define fp_round_to_plus_infinity fp_do_round_to_plus_infinity |
1095 |
|
|
|
1096 |
|
|
DEFINE_ROUND_FUNC(plus_infinity, 0x800) |
1097 |
|
|
|
1098 |
|
|
#undef fp_round_to_zero |
1099 |
|
|
#define fp_round_to_zero fp_do_round_to_zero |
1100 |
|
|
|
1101 |
|
|
DEFINE_ROUND_FUNC(zero, 0xc00) |
1102 |
|
|
|
1103 |
|
|
#undef fp_round_to_nearest |
1104 |
|
|
#define fp_round_to_nearest fp_do_round_to_nearest |
1105 |
|
|
|
1106 |
|
|
DEFINE_ROUND_FUNC(nearest, 0x000) |
1107 |
|
|
|
1108 |
|
|
#endif /* X86_ASSEMBLY */ |
1109 |
|
|
|
1110 |
|
|
#ifndef fp_round_to_minus_infinity |
1111 |
|
|
#define fp_round_to_minus_infinity(x) fp_floor(x) |
1112 |
|
|
#endif |
1113 |
|
|
|
1114 |
|
|
#ifndef fp_round_to_plus_infinity |
1115 |
|
|
#define fp_round_to_plus_infinity(x) fp_ceil(x) |
1116 |
|
|
#endif |
1117 |
|
|
|
1118 |
|
|
#ifndef fp_round_to_zero |
1119 |
|
|
#define fp_round_to_zero(x) ((int)(x)) |
1120 |
|
|
#endif |
1121 |
|
|
|
1122 |
|
|
#ifndef fp_round_to_nearest |
1123 |
|
|
#define fp_round_to_nearest(x) ((int)((x) + 0.5)) |
1124 |
|
|
#endif |
1125 |
|
|
|
1126 |
|
|
#endif /* FPU_MATHLIB_H */ |