1 /*- 2 * Copyright (c) 2004-2005 David Schultz <das (at) FreeBSD.ORG> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD: src/lib/msun/i387/fenv.c,v 1.2 2005/03/17 22:21:46 das Exp $ 27 */ 28 29 #include <sys/cdefs.h> 30 #include <sys/types.h> 31 #include "npx.h" 32 #include "fenv.h" 33 34 #define ROUND_MASK (FE_TONEAREST | FE_DOWNWARD | FE_UPWARD | FE_TOWARDZERO) 35 36 /* 37 * As compared to the x87 control word, the SSE unit's control word 38 * has the rounding control bits offset by 3 and the exception mask 39 * bits offset by 7. 40 */ 41 #define _SSE_ROUND_SHIFT 3 42 #define _SSE_EMASK_SHIFT 7 43 44 const fenv_t __fe_dfl_env = { 45 __INITIAL_NPXCW__, /*__control*/ 46 0x0000, /*__mxcsr_hi*/ 47 0x0000, /*__status*/ 48 0x1f80, /*__mxcsr_lo*/ 49 0xffffffff, /*__tag*/ 50 { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 51 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff } /*__other*/ 52 }; 53 54 #define __fldcw(__cw) __asm __volatile("fldcw %0" : : "m" (__cw)) 55 #define __fldenv(__env) __asm __volatile("fldenv %0" : : "m" (__env)) 56 #define __fldenvx(__env) __asm __volatile("fldenv %0" : : "m" (__env) \ 57 : "st", "st(1)", "st(2)", "st(3)", "st(4)", \ 58 "st(5)", "st(6)", "st(7)") 59 #define __fnclex() __asm __volatile("fnclex") 60 #define __fnstenv(__env) __asm __volatile("fnstenv %0" : "=m" (*(__env))) 61 #define __fnstcw(__cw) __asm __volatile("fnstcw %0" : "=m" (*(__cw))) 62 #define __fnstsw(__sw) __asm __volatile("fnstsw %0" : "=am" (*(__sw))) 63 #define __fwait() __asm __volatile("fwait") 64 #define __ldmxcsr(__csr) __asm __volatile("ldmxcsr %0" : : "m" (__csr)) 65 #define __stmxcsr(__csr) __asm __volatile("stmxcsr %0" : "=m" (*(__csr))) 66 67 /* After testing for SSE support once, we cache the result in __has_sse. */ 68 enum __sse_support { __SSE_YES, __SSE_NO, __SSE_UNK }; 69 #ifdef __SSE__ 70 #define __HAS_SSE() 1 71 #else 72 #define __HAS_SSE() (__has_sse == __SSE_YES || \ 73 (__has_sse == __SSE_UNK && __test_sse())) 74 #endif 75 76 enum __sse_support __has_sse = 77 #ifdef __SSE__ 78 __SSE_YES; 79 #else 80 __SSE_UNK; 81 #endif 82 83 #ifndef __SSE__ 84 #define getfl(x) __asm __volatile("pushfl\n\tpopl %0" : "=mr" (*(x))) 85 #define setfl(x) __asm __volatile("pushl %0\n\tpopfl" : : "g" (x)) 86 #define cpuid_dx(x) __asm __volatile("pushl %%ebx\n\tmovl $1, %%eax\n\t" \ 87 "cpuid\n\tpopl %%ebx" \ 88 : "=d" (*(x)) : : "eax", "ecx") 89 90 /* 91 * Test for SSE support on this processor. We need to do this because 92 * we need to use ldmxcsr/stmxcsr to get correct results if any part 93 * of the program was compiled to use SSE floating-point, but we can't 94 * use SSE on older processors. 95 */ 96 int 97 __test_sse(void) 98 { 99 int flag, nflag; 100 int dx_features; 101 102 /* Am I a 486? */ 103 getfl(&flag); 104 nflag = flag ^ 0x200000; 105 setfl(nflag); 106 getfl(&nflag); 107 if (flag != nflag) { 108 /* Not a 486, so CPUID should work. */ 109 cpuid_dx(&dx_features); 110 if (dx_features & 0x2000000) { 111 __has_sse = __SSE_YES; 112 return (1); 113 } 114 } 115 __has_sse = __SSE_NO; 116 return (0); 117 } 118 #endif /* __SSE__ */ 119 120 int 121 fesetexceptflag(const fexcept_t *flagp, int excepts) 122 { 123 fenv_t env; 124 __uint32_t mxcsr; 125 126 excepts &= FE_ALL_EXCEPT; 127 if (excepts) { /* Do nothing if excepts is 0 */ 128 __fnstenv(&env); 129 env.__status &= ~excepts; 130 env.__status |= *flagp & excepts; 131 __fnclex(); 132 __fldenv(env); 133 if (__HAS_SSE()) { 134 __stmxcsr(&mxcsr); 135 mxcsr &= ~excepts; 136 mxcsr |= *flagp & excepts; 137 __ldmxcsr(mxcsr); 138 } 139 } 140 141 return (0); 142 } 143 144 int 145 feraiseexcept(int excepts) 146 { 147 fexcept_t ex = excepts; 148 149 fesetexceptflag(&ex, excepts); 150 __fwait(); 151 return (0); 152 } 153 154 int 155 fegetenv(fenv_t *envp) 156 { 157 __uint32_t mxcsr; 158 159 __fnstenv(envp); 160 /* 161 * fnstenv masks all exceptions, so we need to restore 162 * the old control word to avoid this side effect. 163 */ 164 __fldcw(envp->__control); 165 if (__HAS_SSE()) { 166 __stmxcsr(&mxcsr); 167 envp->__mxcsr_hi = mxcsr >> 16; 168 envp->__mxcsr_lo = mxcsr & 0xffff; 169 } 170 return (0); 171 } 172 173 int 174 feholdexcept(fenv_t *envp) 175 { 176 __uint32_t mxcsr; 177 fenv_t env; 178 179 __fnstenv(&env); 180 *envp = env; 181 env.__status &= ~FE_ALL_EXCEPT; 182 env.__control |= FE_ALL_EXCEPT; 183 __fnclex(); 184 __fldenv(env); 185 if (__HAS_SSE()) { 186 __stmxcsr(&mxcsr); 187 envp->__mxcsr_hi = mxcsr >> 16; 188 envp->__mxcsr_lo = mxcsr & 0xffff; 189 mxcsr &= ~FE_ALL_EXCEPT; 190 mxcsr |= FE_ALL_EXCEPT << _SSE_EMASK_SHIFT; 191 __ldmxcsr(mxcsr); 192 } 193 return (0); 194 } 195 196 int 197 feupdateenv(const fenv_t *envp) 198 { 199 __uint32_t mxcsr; 200 __uint16_t status; 201 202 __fnstsw(&status); 203 if (__HAS_SSE()) { 204 __stmxcsr(&mxcsr); 205 } else { 206 mxcsr = 0; 207 } 208 fesetenv(envp); 209 feraiseexcept((mxcsr | status) & FE_ALL_EXCEPT); 210 return (0); 211 } 212 213 int 214 feenableexcept(int mask) 215 { 216 __uint32_t mxcsr; 217 __uint16_t control, omask; 218 219 mask &= FE_ALL_EXCEPT; 220 __fnstcw(&control); 221 if (__HAS_SSE()) { 222 __stmxcsr(&mxcsr); 223 } else { 224 mxcsr = 0; 225 } 226 omask = ~(control | mxcsr >> _SSE_EMASK_SHIFT) & FE_ALL_EXCEPT; 227 if (mask) { 228 control &= ~mask; 229 __fldcw(control); 230 if (__HAS_SSE()) { 231 mxcsr &= ~(mask << _SSE_EMASK_SHIFT); 232 __ldmxcsr(mxcsr); 233 } 234 } 235 return (omask); 236 } 237 238 int 239 fedisableexcept(int mask) 240 { 241 __uint32_t mxcsr; 242 __uint16_t control, omask; 243 244 mask &= FE_ALL_EXCEPT; 245 __fnstcw(&control); 246 if (__HAS_SSE()) { 247 __stmxcsr(&mxcsr); 248 } else { 249 mxcsr = 0; 250 } 251 omask = ~(control | mxcsr >> _SSE_EMASK_SHIFT) & FE_ALL_EXCEPT; 252 if (mask) { 253 control |= mask; 254 __fldcw(control); 255 if (__HAS_SSE()) { 256 mxcsr |= mask << _SSE_EMASK_SHIFT; 257 __ldmxcsr(mxcsr); 258 } 259 } 260 return (omask); 261 } 262 263 int 264 feclearexcept(int excepts) 265 { 266 fenv_t env; 267 __uint32_t mxcsr; 268 269 excepts &= FE_ALL_EXCEPT; 270 if (excepts) { /* Do nothing if excepts is 0 */ 271 __fnstenv(&env); 272 env.__status &= ~excepts; 273 __fnclex(); 274 __fldenv(env); 275 if (__HAS_SSE()) { 276 __stmxcsr(&mxcsr); 277 mxcsr &= ~excepts; 278 __ldmxcsr(mxcsr); 279 } 280 } 281 return (0); 282 } 283 284 int 285 fegetexceptflag(fexcept_t *flagp, int excepts) 286 { 287 __uint32_t mxcsr; 288 __uint16_t status; 289 290 excepts &= FE_ALL_EXCEPT; 291 __fnstsw(&status); 292 if (__HAS_SSE()) { 293 __stmxcsr(&mxcsr); 294 } else { 295 mxcsr = 0; 296 } 297 *flagp = (status | mxcsr) & excepts; 298 return (0); 299 } 300 301 int 302 fetestexcept(int excepts) 303 { 304 __uint32_t mxcsr; 305 __uint16_t status; 306 307 excepts &= FE_ALL_EXCEPT; 308 if (excepts) { /* Do nothing if excepts is 0 */ 309 __fnstsw(&status); 310 if (__HAS_SSE()) { 311 __stmxcsr(&mxcsr); 312 } else { 313 mxcsr = 0; 314 } 315 return ((status | mxcsr) & excepts); 316 } 317 return (0); 318 } 319 320 int 321 fegetround(void) 322 { 323 __uint16_t control; 324 325 /* 326 * We assume that the x87 and the SSE unit agree on the 327 * rounding mode. Reading the control word on the x87 turns 328 * out to be about 5 times faster than reading it on the SSE 329 * unit on an Opteron 244. 330 */ 331 __fnstcw(&control); 332 return (control & ROUND_MASK); 333 } 334 335 int 336 fesetround(int round) 337 { 338 __uint32_t mxcsr; 339 __uint16_t control; 340 341 if (round & ~ROUND_MASK) { 342 return (-1); 343 } else { 344 __fnstcw(&control); 345 control &= ~ROUND_MASK; 346 control |= round; 347 __fldcw(control); 348 if (__HAS_SSE()) { 349 __stmxcsr(&mxcsr); 350 mxcsr &= ~(ROUND_MASK << _SSE_ROUND_SHIFT); 351 mxcsr |= round << _SSE_ROUND_SHIFT; 352 __ldmxcsr(mxcsr); 353 } 354 return (0); 355 } 356 } 357 358 int 359 fesetenv(const fenv_t *envp) 360 { 361 fenv_t env = *envp; 362 __uint32_t mxcsr; 363 364 mxcsr = (env.__mxcsr_hi << 16) | (env.__mxcsr_lo); 365 env.__mxcsr_hi = 0xffff; 366 env.__mxcsr_lo = 0xffff; 367 /* 368 * XXX Using fldenvx() instead of fldenv() tells the compiler that this 369 * instruction clobbers the i387 register stack. This happens because 370 * we restore the tag word from the saved environment. Normally, this 371 * would happen anyway and we wouldn't care, because the ABI allows 372 * function calls to clobber the i387 regs. However, fesetenv() is 373 * inlined, so we need to be more careful. 374 */ 375 __fldenvx(env); 376 if (__HAS_SSE()) { 377 __ldmxcsr(mxcsr); 378 } 379 return (0); 380 } 381 382 int 383 fegetexcept(void) 384 { 385 __uint16_t control; 386 387 /* 388 * We assume that the masks for the x87 and the SSE unit are 389 * the same. 390 */ 391 __fnstcw(&control); 392 return (~control & FE_ALL_EXCEPT); 393 } 394