Home | History | Annotate | Download | only in i387
      1 /*-
      2  * Copyright (c) 2004-2005 David Schultz <das (at) FreeBSD.ORG>
      3  * All rights reserved.
      4  *
      5  * Redistribution and use in source and binary forms, with or without
      6  * modification, are permitted provided that the following conditions
      7  * are met:
      8  * 1. Redistributions of source code must retain the above copyright
      9  *    notice, this list of conditions and the following disclaimer.
     10  * 2. Redistributions in binary form must reproduce the above copyright
     11  *    notice, this list of conditions and the following disclaimer in the
     12  *    documentation and/or other materials provided with the distribution.
     13  *
     14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
     15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     24  * SUCH DAMAGE.
     25  *
     26  * $FreeBSD: src/lib/msun/i387/fenv.c,v 1.2 2005/03/17 22:21:46 das Exp $
     27  */
     28 
     29 #include <sys/cdefs.h>
     30 #include <sys/types.h>
     31 #include "npx.h"
     32 #include "fenv.h"
     33 
     34 /*
     35  * As compared to the x87 control word, the SSE unit's control word
     36  * has the rounding control bits offset by 3 and the exception mask
     37  * bits offset by 7.
     38  */
     39 #define	_SSE_ROUND_SHIFT	3
     40 #define	_SSE_EMASK_SHIFT	7
     41 
     42 const fenv_t __fe_dfl_env = {
     43 	__INITIAL_NPXCW__, /*__control*/
     44 	0x0000,            /*__mxcsr_hi*/
     45 	0x0000,            /*__status*/
     46 	0x1f80,            /*__mxcsr_lo*/
     47 	0xffffffff,        /*__tag*/
     48 	{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
     49 	  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff } /*__other*/
     50 };
     51 
     52 #define __fldcw(__cw)           __asm __volatile("fldcw %0" : : "m" (__cw))
     53 #define __fldenv(__env)         __asm __volatile("fldenv %0" : : "m" (__env))
     54 #define	__fldenvx(__env)	__asm __volatile("fldenv %0" : : "m" (__env)  \
     55 				: "st", "st(1)", "st(2)", "st(3)", "st(4)",   \
     56 				"st(5)", "st(6)", "st(7)")
     57 #define __fnclex()              __asm __volatile("fnclex")
     58 #define __fnstenv(__env)        __asm __volatile("fnstenv %0" : "=m" (*(__env)))
     59 #define __fnstcw(__cw)          __asm __volatile("fnstcw %0" : "=m" (*(__cw)))
     60 #define __fnstsw(__sw)          __asm __volatile("fnstsw %0" : "=am" (*(__sw)))
     61 #define __fwait()               __asm __volatile("fwait")
     62 #define __ldmxcsr(__csr)        __asm __volatile("ldmxcsr %0" : : "m" (__csr))
     63 #define __stmxcsr(__csr)        __asm __volatile("stmxcsr %0" : "=m" (*(__csr)))
     64 
     65 /* After testing for SSE support once, we cache the result in __has_sse. */
     66 enum __sse_support { __SSE_YES, __SSE_NO, __SSE_UNK };
     67 #ifdef __SSE__
     68 #define __HAS_SSE()     1
     69 #else
     70 #define __HAS_SSE()     (__has_sse == __SSE_YES ||                      \
     71                          (__has_sse == __SSE_UNK && __test_sse()))
     72 #endif
     73 
     74 enum __sse_support __has_sse =
     75 #ifdef __SSE__
     76 	__SSE_YES;
     77 #else
     78 	__SSE_UNK;
     79 #endif
     80 
     81 #ifndef __SSE__
     82 #define	getfl(x)	__asm __volatile("pushfl\n\tpopl %0" : "=mr" (*(x)))
     83 #define	setfl(x)	__asm __volatile("pushl %0\n\tpopfl" : : "g" (x))
     84 #define	cpuid_dx(x)	__asm __volatile("pushl %%ebx\n\tmovl $1, %%eax\n\t"  \
     85 					 "cpuid\n\tpopl %%ebx"		      \
     86 					: "=d" (*(x)) : : "eax", "ecx")
     87 
     88 /*
     89  * Test for SSE support on this processor.  We need to do this because
     90  * we need to use ldmxcsr/stmxcsr to get correct results if any part
     91  * of the program was compiled to use SSE floating-point, but we can't
     92  * use SSE on older processors.
     93  */
     94 int
     95 __test_sse(void)
     96 {
     97 	int flag, nflag;
     98 	int dx_features;
     99 
    100 	/* Am I a 486? */
    101 	getfl(&flag);
    102 	nflag = flag ^ 0x200000;
    103 	setfl(nflag);
    104 	getfl(&nflag);
    105 	if (flag != nflag) {
    106 		/* Not a 486, so CPUID should work. */
    107 		cpuid_dx(&dx_features);
    108 		if (dx_features & 0x2000000) {
    109 			__has_sse = __SSE_YES;
    110 			return (1);
    111 		}
    112 	}
    113 	__has_sse = __SSE_NO;
    114 	return (0);
    115 }
    116 #endif /* __SSE__ */
    117 
    118 int
    119 fesetexceptflag(const fexcept_t *flagp, int excepts)
    120 {
    121 	fenv_t env;
    122 	__uint32_t mxcsr;
    123 
    124 	excepts &= FE_ALL_EXCEPT;
    125 	if (excepts) { /* Do nothing if excepts is 0 */
    126 		__fnstenv(&env);
    127 		env.__status &= ~excepts;
    128 		env.__status |= *flagp & excepts;
    129 		__fnclex();
    130 		__fldenv(env);
    131 		if (__HAS_SSE()) {
    132 			__stmxcsr(&mxcsr);
    133 			mxcsr &= ~excepts;
    134 			mxcsr |= *flagp & excepts;
    135 			__ldmxcsr(mxcsr);
    136 		}
    137 	}
    138 
    139 	return (0);
    140 }
    141 
    142 int
    143 feraiseexcept(int excepts)
    144 {
    145 	fexcept_t ex = excepts;
    146 
    147 	fesetexceptflag(&ex, excepts);
    148 	__fwait();
    149 	return (0);
    150 }
    151 
    152 int
    153 fegetenv(fenv_t *envp)
    154 {
    155 	__uint32_t mxcsr;
    156 
    157 	__fnstenv(envp);
    158 	/*
    159 	 * fnstenv masks all exceptions, so we need to restore
    160 	 * the old control word to avoid this side effect.
    161 	 */
    162 	__fldcw(envp->__control);
    163 	if (__HAS_SSE()) {
    164 		__stmxcsr(&mxcsr);
    165 		envp->__mxcsr_hi = mxcsr >> 16;
    166 		envp->__mxcsr_lo = mxcsr & 0xffff;
    167 	}
    168 	return (0);
    169 }
    170 
    171 int
    172 feholdexcept(fenv_t *envp)
    173 {
    174 	__uint32_t mxcsr;
    175 	fenv_t env;
    176 
    177 	__fnstenv(&env);
    178 	*envp = env;
    179 	env.__status &= ~FE_ALL_EXCEPT;
    180 	env.__control |= FE_ALL_EXCEPT;
    181 	__fnclex();
    182 	__fldenv(env);
    183 	if (__HAS_SSE()) {
    184 		__stmxcsr(&mxcsr);
    185 		envp->__mxcsr_hi = mxcsr >> 16;
    186 		envp->__mxcsr_lo = mxcsr & 0xffff;
    187 		mxcsr &= ~FE_ALL_EXCEPT;
    188 		mxcsr |= FE_ALL_EXCEPT << _SSE_EMASK_SHIFT;
    189 		__ldmxcsr(mxcsr);
    190 	}
    191 	return (0);
    192 }
    193 
    194 int
    195 feupdateenv(const fenv_t *envp)
    196 {
    197 	__uint32_t mxcsr;
    198 	__uint16_t status;
    199 
    200 	__fnstsw(&status);
    201 	if (__HAS_SSE()) {
    202 		__stmxcsr(&mxcsr);
    203 	} else {
    204 		mxcsr = 0;
    205 	}
    206 	fesetenv(envp);
    207 	feraiseexcept((mxcsr | status) & FE_ALL_EXCEPT);
    208 	return (0);
    209 }
    210 
    211 int
    212 feenableexcept(int mask)
    213 {
    214 	__uint32_t mxcsr;
    215 	__uint16_t control, omask;
    216 
    217 	mask &= FE_ALL_EXCEPT;
    218 	__fnstcw(&control);
    219 	if (__HAS_SSE()) {
    220 		__stmxcsr(&mxcsr);
    221 	} else {
    222 		mxcsr = 0;
    223 	}
    224 	omask = ~(control | mxcsr >> _SSE_EMASK_SHIFT) & FE_ALL_EXCEPT;
    225 	if (mask) {
    226 		control &= ~mask;
    227 		__fldcw(control);
    228 		if (__HAS_SSE()) {
    229 			mxcsr &= ~(mask << _SSE_EMASK_SHIFT);
    230 			__ldmxcsr(mxcsr);
    231 		}
    232 	}
    233 	return (omask);
    234 }
    235 
    236 int
    237 fedisableexcept(int mask)
    238 {
    239 	__uint32_t mxcsr;
    240 	__uint16_t control, omask;
    241 
    242 	mask &= FE_ALL_EXCEPT;
    243 	__fnstcw(&control);
    244 	if (__HAS_SSE()) {
    245 		__stmxcsr(&mxcsr);
    246 	} else {
    247 		mxcsr = 0;
    248 	}
    249 	omask = ~(control | mxcsr >> _SSE_EMASK_SHIFT) & FE_ALL_EXCEPT;
    250 	if (mask) {
    251 		control |= mask;
    252 		__fldcw(control);
    253 		if (__HAS_SSE()) {
    254 			mxcsr |= mask << _SSE_EMASK_SHIFT;
    255 			__ldmxcsr(mxcsr);
    256 		}
    257 	}
    258 	return (omask);
    259 }
    260 
    261 int
    262 feclearexcept(int excepts)
    263 {
    264 	fenv_t env;
    265 	__uint32_t mxcsr;
    266 
    267 	excepts &= FE_ALL_EXCEPT;
    268 	if (excepts) { /* Do nothing if excepts is 0 */
    269 		__fnstenv(&env);
    270 		env.__status &= ~excepts;
    271 		__fnclex();
    272 		__fldenv(env);
    273 		if (__HAS_SSE()) {
    274 			__stmxcsr(&mxcsr);
    275 			mxcsr &= ~excepts;
    276 			__ldmxcsr(mxcsr);
    277 		}
    278 	}
    279 	return (0);
    280 }
    281 
    282 int
    283 fegetexceptflag(fexcept_t *flagp, int excepts)
    284 {
    285 	__uint32_t mxcsr;
    286 	__uint16_t status;
    287 
    288 	excepts &= FE_ALL_EXCEPT;
    289 	__fnstsw(&status);
    290 	if (__HAS_SSE()) {
    291 		__stmxcsr(&mxcsr);
    292 	} else {
    293 		mxcsr = 0;
    294 	}
    295 	*flagp = (status | mxcsr) & excepts;
    296 	return (0);
    297 }
    298 
    299 int
    300 fetestexcept(int excepts)
    301 {
    302 	__uint32_t mxcsr;
    303 	__uint16_t status;
    304 
    305 	excepts &= FE_ALL_EXCEPT;
    306 	if (excepts) { /* Do nothing if excepts is 0 */
    307 		__fnstsw(&status);
    308 		if (__HAS_SSE()) {
    309 			__stmxcsr(&mxcsr);
    310 		} else {
    311 			mxcsr = 0;
    312 		}
    313 		return ((status | mxcsr) & excepts);
    314 	}
    315 	return (0);
    316 }
    317 
    318 int
    319 fegetround(void)
    320 {
    321 	__uint16_t control;
    322 
    323 	/*
    324 	 * We assume that the x87 and the SSE unit agree on the
    325 	 * rounding mode.  Reading the control word on the x87 turns
    326 	 * out to be about 5 times faster than reading it on the SSE
    327 	 * unit on an Opteron 244.
    328 	 */
    329 	__fnstcw(&control);
    330 	return (control & _ROUND_MASK);
    331 }
    332 
    333 int
    334 fesetround(int round)
    335 {
    336 	__uint32_t mxcsr;
    337 	__uint16_t control;
    338 
    339 	if (round & ~_ROUND_MASK) {
    340 		return (-1);
    341 	} else {
    342 		__fnstcw(&control);
    343 		control &= ~_ROUND_MASK;
    344 		control |= round;
    345 		__fldcw(control);
    346 		if (__HAS_SSE()) {
    347 			__stmxcsr(&mxcsr);
    348 			mxcsr &= ~(_ROUND_MASK << _SSE_ROUND_SHIFT);
    349 			mxcsr |= round << _SSE_ROUND_SHIFT;
    350 			__ldmxcsr(mxcsr);
    351 		}
    352 		return (0);
    353 	}
    354 }
    355 
    356 int
    357 fesetenv(const fenv_t *envp)
    358 {
    359 	fenv_t env = *envp;
    360 	__uint32_t mxcsr;
    361 
    362 	mxcsr = (env.__mxcsr_hi << 16) | (env.__mxcsr_lo);
    363 	env.__mxcsr_hi = 0xffff;
    364 	env.__mxcsr_lo = 0xffff;
    365 	/*
    366 	 * XXX Using fldenvx() instead of fldenv() tells the compiler that this
    367 	 * instruction clobbers the i387 register stack.  This happens because
    368 	 * we restore the tag word from the saved environment.  Normally, this
    369 	 * would happen anyway and we wouldn't care, because the ABI allows
    370 	 * function calls to clobber the i387 regs.  However, fesetenv() is
    371 	 * inlined, so we need to be more careful.
    372 	 */
    373 	__fldenvx(env);
    374 	if (__HAS_SSE()) {
    375 		__ldmxcsr(mxcsr);
    376 	}
    377 	return (0);
    378 }
    379 
    380 int
    381 fegetexcept(void)
    382 {
    383 	__uint16_t control;
    384 
    385 	/*
    386 	 * We assume that the masks for the x87 and the SSE unit are
    387 	 * the same.
    388 	 */
    389 	__fnstcw(&control);
    390 	return (~control & FE_ALL_EXCEPT);
    391 }
    392