1 /*- 2 * Copyright (c) 2004-2005 David Schultz <das (at) FreeBSD.ORG> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD: src/lib/msun/i387/fenv.c,v 1.2 2005/03/17 22:21:46 das Exp $ 27 */ 28 29 #include <sys/cdefs.h> 30 #include <sys/types.h> 31 #include "npx.h" 32 #include "fenv.h" 33 34 /* 35 * As compared to the x87 control word, the SSE unit's control word 36 * has the rounding control bits offset by 3 and the exception mask 37 * bits offset by 7. 38 */ 39 #define _SSE_ROUND_SHIFT 3 40 #define _SSE_EMASK_SHIFT 7 41 42 const fenv_t __fe_dfl_env = { 43 __INITIAL_NPXCW__, /*__control*/ 44 0x0000, /*__mxcsr_hi*/ 45 0x0000, /*__status*/ 46 0x1f80, /*__mxcsr_lo*/ 47 0xffffffff, /*__tag*/ 48 { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 49 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff } /*__other*/ 50 }; 51 52 #define __fldcw(__cw) __asm __volatile("fldcw %0" : : "m" (__cw)) 53 #define __fldenv(__env) __asm __volatile("fldenv %0" : : "m" (__env)) 54 #define __fldenvx(__env) __asm __volatile("fldenv %0" : : "m" (__env) \ 55 : "st", "st(1)", "st(2)", "st(3)", "st(4)", \ 56 "st(5)", "st(6)", "st(7)") 57 #define __fnclex() __asm __volatile("fnclex") 58 #define __fnstenv(__env) __asm __volatile("fnstenv %0" : "=m" (*(__env))) 59 #define __fnstcw(__cw) __asm __volatile("fnstcw %0" : "=m" (*(__cw))) 60 #define __fnstsw(__sw) __asm __volatile("fnstsw %0" : "=am" (*(__sw))) 61 #define __fwait() __asm __volatile("fwait") 62 #define __ldmxcsr(__csr) __asm __volatile("ldmxcsr %0" : : "m" (__csr)) 63 #define __stmxcsr(__csr) __asm __volatile("stmxcsr %0" : "=m" (*(__csr))) 64 65 /* After testing for SSE support once, we cache the result in __has_sse. */ 66 enum __sse_support { __SSE_YES, __SSE_NO, __SSE_UNK }; 67 #ifdef __SSE__ 68 #define __HAS_SSE() 1 69 #else 70 #define __HAS_SSE() (__has_sse == __SSE_YES || \ 71 (__has_sse == __SSE_UNK && __test_sse())) 72 #endif 73 74 enum __sse_support __has_sse = 75 #ifdef __SSE__ 76 __SSE_YES; 77 #else 78 __SSE_UNK; 79 #endif 80 81 #ifndef __SSE__ 82 #define getfl(x) __asm __volatile("pushfl\n\tpopl %0" : "=mr" (*(x))) 83 #define setfl(x) __asm __volatile("pushl %0\n\tpopfl" : : "g" (x)) 84 #define cpuid_dx(x) __asm __volatile("pushl %%ebx\n\tmovl $1, %%eax\n\t" \ 85 "cpuid\n\tpopl %%ebx" \ 86 : "=d" (*(x)) : : "eax", "ecx") 87 88 /* 89 * Test for SSE support on this processor. We need to do this because 90 * we need to use ldmxcsr/stmxcsr to get correct results if any part 91 * of the program was compiled to use SSE floating-point, but we can't 92 * use SSE on older processors. 93 */ 94 int 95 __test_sse(void) 96 { 97 int flag, nflag; 98 int dx_features; 99 100 /* Am I a 486? */ 101 getfl(&flag); 102 nflag = flag ^ 0x200000; 103 setfl(nflag); 104 getfl(&nflag); 105 if (flag != nflag) { 106 /* Not a 486, so CPUID should work. */ 107 cpuid_dx(&dx_features); 108 if (dx_features & 0x2000000) { 109 __has_sse = __SSE_YES; 110 return (1); 111 } 112 } 113 __has_sse = __SSE_NO; 114 return (0); 115 } 116 #endif /* __SSE__ */ 117 118 int 119 fesetexceptflag(const fexcept_t *flagp, int excepts) 120 { 121 fenv_t env; 122 __uint32_t mxcsr; 123 124 excepts &= FE_ALL_EXCEPT; 125 if (excepts) { /* Do nothing if excepts is 0 */ 126 __fnstenv(&env); 127 env.__status &= ~excepts; 128 env.__status |= *flagp & excepts; 129 __fnclex(); 130 __fldenv(env); 131 if (__HAS_SSE()) { 132 __stmxcsr(&mxcsr); 133 mxcsr &= ~excepts; 134 mxcsr |= *flagp & excepts; 135 __ldmxcsr(mxcsr); 136 } 137 } 138 139 return (0); 140 } 141 142 int 143 feraiseexcept(int excepts) 144 { 145 fexcept_t ex = excepts; 146 147 fesetexceptflag(&ex, excepts); 148 __fwait(); 149 return (0); 150 } 151 152 int 153 fegetenv(fenv_t *envp) 154 { 155 __uint32_t mxcsr; 156 157 __fnstenv(envp); 158 /* 159 * fnstenv masks all exceptions, so we need to restore 160 * the old control word to avoid this side effect. 161 */ 162 __fldcw(envp->__control); 163 if (__HAS_SSE()) { 164 __stmxcsr(&mxcsr); 165 envp->__mxcsr_hi = mxcsr >> 16; 166 envp->__mxcsr_lo = mxcsr & 0xffff; 167 } 168 return (0); 169 } 170 171 int 172 feholdexcept(fenv_t *envp) 173 { 174 __uint32_t mxcsr; 175 fenv_t env; 176 177 __fnstenv(&env); 178 *envp = env; 179 env.__status &= ~FE_ALL_EXCEPT; 180 env.__control |= FE_ALL_EXCEPT; 181 __fnclex(); 182 __fldenv(env); 183 if (__HAS_SSE()) { 184 __stmxcsr(&mxcsr); 185 envp->__mxcsr_hi = mxcsr >> 16; 186 envp->__mxcsr_lo = mxcsr & 0xffff; 187 mxcsr &= ~FE_ALL_EXCEPT; 188 mxcsr |= FE_ALL_EXCEPT << _SSE_EMASK_SHIFT; 189 __ldmxcsr(mxcsr); 190 } 191 return (0); 192 } 193 194 int 195 feupdateenv(const fenv_t *envp) 196 { 197 __uint32_t mxcsr; 198 __uint16_t status; 199 200 __fnstsw(&status); 201 if (__HAS_SSE()) { 202 __stmxcsr(&mxcsr); 203 } else { 204 mxcsr = 0; 205 } 206 fesetenv(envp); 207 feraiseexcept((mxcsr | status) & FE_ALL_EXCEPT); 208 return (0); 209 } 210 211 int 212 feenableexcept(int mask) 213 { 214 __uint32_t mxcsr; 215 __uint16_t control, omask; 216 217 mask &= FE_ALL_EXCEPT; 218 __fnstcw(&control); 219 if (__HAS_SSE()) { 220 __stmxcsr(&mxcsr); 221 } else { 222 mxcsr = 0; 223 } 224 omask = ~(control | mxcsr >> _SSE_EMASK_SHIFT) & FE_ALL_EXCEPT; 225 if (mask) { 226 control &= ~mask; 227 __fldcw(control); 228 if (__HAS_SSE()) { 229 mxcsr &= ~(mask << _SSE_EMASK_SHIFT); 230 __ldmxcsr(mxcsr); 231 } 232 } 233 return (omask); 234 } 235 236 int 237 fedisableexcept(int mask) 238 { 239 __uint32_t mxcsr; 240 __uint16_t control, omask; 241 242 mask &= FE_ALL_EXCEPT; 243 __fnstcw(&control); 244 if (__HAS_SSE()) { 245 __stmxcsr(&mxcsr); 246 } else { 247 mxcsr = 0; 248 } 249 omask = ~(control | mxcsr >> _SSE_EMASK_SHIFT) & FE_ALL_EXCEPT; 250 if (mask) { 251 control |= mask; 252 __fldcw(control); 253 if (__HAS_SSE()) { 254 mxcsr |= mask << _SSE_EMASK_SHIFT; 255 __ldmxcsr(mxcsr); 256 } 257 } 258 return (omask); 259 } 260 261 int 262 feclearexcept(int excepts) 263 { 264 fenv_t env; 265 __uint32_t mxcsr; 266 267 excepts &= FE_ALL_EXCEPT; 268 if (excepts) { /* Do nothing if excepts is 0 */ 269 __fnstenv(&env); 270 env.__status &= ~excepts; 271 __fnclex(); 272 __fldenv(env); 273 if (__HAS_SSE()) { 274 __stmxcsr(&mxcsr); 275 mxcsr &= ~excepts; 276 __ldmxcsr(mxcsr); 277 } 278 } 279 return (0); 280 } 281 282 int 283 fegetexceptflag(fexcept_t *flagp, int excepts) 284 { 285 __uint32_t mxcsr; 286 __uint16_t status; 287 288 excepts &= FE_ALL_EXCEPT; 289 __fnstsw(&status); 290 if (__HAS_SSE()) { 291 __stmxcsr(&mxcsr); 292 } else { 293 mxcsr = 0; 294 } 295 *flagp = (status | mxcsr) & excepts; 296 return (0); 297 } 298 299 int 300 fetestexcept(int excepts) 301 { 302 __uint32_t mxcsr; 303 __uint16_t status; 304 305 excepts &= FE_ALL_EXCEPT; 306 if (excepts) { /* Do nothing if excepts is 0 */ 307 __fnstsw(&status); 308 if (__HAS_SSE()) { 309 __stmxcsr(&mxcsr); 310 } else { 311 mxcsr = 0; 312 } 313 return ((status | mxcsr) & excepts); 314 } 315 return (0); 316 } 317 318 int 319 fegetround(void) 320 { 321 __uint16_t control; 322 323 /* 324 * We assume that the x87 and the SSE unit agree on the 325 * rounding mode. Reading the control word on the x87 turns 326 * out to be about 5 times faster than reading it on the SSE 327 * unit on an Opteron 244. 328 */ 329 __fnstcw(&control); 330 return (control & _ROUND_MASK); 331 } 332 333 int 334 fesetround(int round) 335 { 336 __uint32_t mxcsr; 337 __uint16_t control; 338 339 if (round & ~_ROUND_MASK) { 340 return (-1); 341 } else { 342 __fnstcw(&control); 343 control &= ~_ROUND_MASK; 344 control |= round; 345 __fldcw(control); 346 if (__HAS_SSE()) { 347 __stmxcsr(&mxcsr); 348 mxcsr &= ~(_ROUND_MASK << _SSE_ROUND_SHIFT); 349 mxcsr |= round << _SSE_ROUND_SHIFT; 350 __ldmxcsr(mxcsr); 351 } 352 return (0); 353 } 354 } 355 356 int 357 fesetenv(const fenv_t *envp) 358 { 359 fenv_t env = *envp; 360 __uint32_t mxcsr; 361 362 mxcsr = (env.__mxcsr_hi << 16) | (env.__mxcsr_lo); 363 env.__mxcsr_hi = 0xffff; 364 env.__mxcsr_lo = 0xffff; 365 /* 366 * XXX Using fldenvx() instead of fldenv() tells the compiler that this 367 * instruction clobbers the i387 register stack. This happens because 368 * we restore the tag word from the saved environment. Normally, this 369 * would happen anyway and we wouldn't care, because the ABI allows 370 * function calls to clobber the i387 regs. However, fesetenv() is 371 * inlined, so we need to be more careful. 372 */ 373 __fldenvx(env); 374 if (__HAS_SSE()) { 375 __ldmxcsr(mxcsr); 376 } 377 return (0); 378 } 379 380 int 381 fegetexcept(void) 382 { 383 __uint16_t control; 384 385 /* 386 * We assume that the masks for the x87 and the SSE unit are 387 * the same. 388 */ 389 __fnstcw(&control); 390 return (~control & FE_ALL_EXCEPT); 391 } 392