Home | History | Annotate | Download | only in gtx
      1 ///////////////////////////////////////////////////////////////////////////////////////////////////
      2 // OpenGL Mathematics Copyright (c) 2005 - 2014 G-Truc Creation (www.g-truc.net)
      3 ///////////////////////////////////////////////////////////////////////////////////////////////////
      4 // Created : 2009-05-07
      5 // Updated : 2009-05-07
      6 // Licence : This source is under MIT License
      7 // File    : glm/gtx/simd_vec4.inl
      8 ///////////////////////////////////////////////////////////////////////////////////////////////////
      9 
     10 namespace glm{
     11 namespace detail{
     12 
     13 template <int Value>
     14 struct mask
     15 {
     16 	enum{value = Value};
     17 };
     18 
     19 //////////////////////////////////////
     20 // Implicit basic constructors
     21 
     22 GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD()
     23 #ifdef GLM_SIMD_ENABLE_DEFAULT_INIT
     24     : Data(_mm_set_ps(0.0f, 0.0f, 0.0f, 0.0f))
     25 #endif
     26 {}
     27 
     28 GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(__m128 const & Data) :
     29 	Data(Data)
     30 {}
     31 
     32 GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(fvec4SIMD const & v) :
     33 	Data(v.Data)
     34 {}
     35 
     36 GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(vec4 const & v) :
     37 	Data(_mm_set_ps(v.w, v.z, v.y, v.x))
     38 {}
     39 
     40 //////////////////////////////////////
     41 // Explicit basic constructors
     42 
     43 GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(float const & s) :
     44 	Data(_mm_set1_ps(s))
     45 {}
     46 
     47 GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(float const & x, float const & y, float const & z, float const & w) :
     48 //		Data(_mm_setr_ps(x, y, z, w))
     49 	Data(_mm_set_ps(w, z, y, x))
     50 {}
     51 /*
     52 GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(float const v[4]) :
     53 	Data(_mm_load_ps(v))
     54 {}
     55 */
     56 //////////////////////////////////////
     57 // Swizzle constructors
     58 
     59 //fvec4SIMD(ref4<float> const & r);
     60 
     61 //////////////////////////////////////
     62 // Conversion vector constructors
     63 
     64 GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(vec2 const & v, float const & s1, float const & s2) :
     65 	Data(_mm_set_ps(s2, s1, v.y, v.x))
     66 {}
     67 
     68 GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(float const & s1, vec2 const & v, float const & s2) :
     69 	Data(_mm_set_ps(s2, v.y, v.x, s1))
     70 {}
     71 
     72 GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(float const & s1, float const & s2, vec2 const & v) :
     73 	Data(_mm_set_ps(v.y, v.x, s2, s1))
     74 {}
     75 
     76 GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(vec3 const & v, float const & s) :
     77 	Data(_mm_set_ps(s, v.z, v.y, v.x))
     78 {}
     79 
     80 GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(float const & s, vec3 const & v) :
     81 	Data(_mm_set_ps(v.z, v.y, v.x, s))
     82 {}
     83 
     84 GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(vec2 const & v1, vec2 const & v2) :
     85 	Data(_mm_set_ps(v2.y, v2.x, v1.y, v1.x))
     86 {}
     87 
     88 //GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(ivec4SIMD const & v) :
     89 //	Data(_mm_cvtepi32_ps(v.Data))
     90 //{}
     91 
     92 //////////////////////////////////////
     93 // Unary arithmetic operators
     94 
     95 GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator=(fvec4SIMD const & v)
     96 {
     97 	this->Data = v.Data;
     98 	return *this;
     99 }
    100 
    101 GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator+=(float const & s)
    102 {
    103 	this->Data = _mm_add_ps(Data, _mm_set_ps1(s));
    104 	return *this;
    105 }
    106 
    107 GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator+=(fvec4SIMD const & v)
    108 {
    109 	this->Data = _mm_add_ps(this->Data , v.Data);
    110 	return *this;
    111 }
    112 
    113 GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator-=(float const & s)
    114 {
    115 	this->Data = _mm_sub_ps(Data, _mm_set_ps1(s));
    116 	return *this;
    117 }
    118 
    119 GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator-=(fvec4SIMD const & v)
    120 {
    121 	this->Data = _mm_sub_ps(this->Data , v.Data);
    122 	return *this;
    123 }
    124 
    125 GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator*=(float const & s)
    126 {
    127 	this->Data = _mm_mul_ps(this->Data, _mm_set_ps1(s));
    128 	return *this;
    129 }
    130 
    131 GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator*=(fvec4SIMD const & v)
    132 {
    133 	this->Data = _mm_mul_ps(this->Data , v.Data);
    134 	return *this;
    135 }
    136 
    137 GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator/=(float const & s)
    138 {
    139 	this->Data = _mm_div_ps(Data, _mm_set1_ps(s));
    140 	return *this;
    141 }
    142 
    143 GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator/=(fvec4SIMD const & v)
    144 {
    145 	this->Data = _mm_div_ps(this->Data , v.Data);
    146 	return *this;
    147 }
    148 
    149 GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator++()
    150 {
    151 	this->Data = _mm_add_ps(this->Data , glm::detail::one);
    152 	return *this;
    153 }
    154 
    155 GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator--()
    156 {
    157 	this->Data = _mm_sub_ps(this->Data, glm::detail::one);
    158 	return *this;
    159 }
    160 
    161 //////////////////////////////////////
    162 // Swizzle operators
    163 
    164 template <comp X, comp Y, comp Z, comp W>
    165 GLM_FUNC_QUALIFIER fvec4SIMD fvec4SIMD::swizzle() const
    166 {
    167 	__m128 Data = _mm_shuffle_ps(
    168 		this->Data, this->Data, 
    169 		mask<(W << 6) | (Z << 4) | (Y << 2) | (X << 0)>::value);
    170 	return fvec4SIMD(Data);
    171 }
    172 
    173 template <comp X, comp Y, comp Z, comp W>
    174 GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::swizzle()
    175 {
    176 	this->Data = _mm_shuffle_ps(
    177 		this->Data, this->Data, 
    178 		mask<(W << 6) | (Z << 4) | (Y << 2) | (X << 0)>::value);
    179 	return *this;
    180 }
    181 
    182 // operator+
    183 GLM_FUNC_QUALIFIER fvec4SIMD operator+ (fvec4SIMD const & v, float s)
    184 {
    185 	return fvec4SIMD(_mm_add_ps(v.Data, _mm_set1_ps(s)));
    186 }
    187 
    188 GLM_FUNC_QUALIFIER fvec4SIMD operator+ (float s, fvec4SIMD const & v)
    189 {
    190 	return fvec4SIMD(_mm_add_ps(_mm_set1_ps(s), v.Data));
    191 }
    192 
    193 GLM_FUNC_QUALIFIER fvec4SIMD operator+ (fvec4SIMD const & v1, fvec4SIMD const & v2)
    194 {
    195 	return fvec4SIMD(_mm_add_ps(v1.Data, v2.Data));
    196 }
    197 
    198 //operator-
    199 GLM_FUNC_QUALIFIER fvec4SIMD operator- (fvec4SIMD const & v, float s)
    200 {
    201 	return fvec4SIMD(_mm_sub_ps(v.Data, _mm_set1_ps(s)));
    202 }
    203 
    204 GLM_FUNC_QUALIFIER fvec4SIMD operator- (float s, fvec4SIMD const & v)
    205 {
    206 	return fvec4SIMD(_mm_sub_ps(_mm_set1_ps(s), v.Data));
    207 }
    208 
    209 GLM_FUNC_QUALIFIER fvec4SIMD operator- (fvec4SIMD const & v1, fvec4SIMD const & v2)
    210 {
    211 	return fvec4SIMD(_mm_sub_ps(v1.Data, v2.Data));
    212 }
    213 
    214 //operator*
    215 GLM_FUNC_QUALIFIER fvec4SIMD operator* (fvec4SIMD const & v, float s)
    216 {
    217 	__m128 par0 = v.Data;
    218 	__m128 par1 = _mm_set1_ps(s);
    219 	return fvec4SIMD(_mm_mul_ps(par0, par1));
    220 }
    221 
    222 GLM_FUNC_QUALIFIER fvec4SIMD operator* (float s, fvec4SIMD const & v)
    223 {
    224 	__m128 par0 = _mm_set1_ps(s);
    225 	__m128 par1 = v.Data;
    226 	return fvec4SIMD(_mm_mul_ps(par0, par1));
    227 }
    228 
    229 GLM_FUNC_QUALIFIER fvec4SIMD operator* (fvec4SIMD const & v1, fvec4SIMD const & v2)
    230 {
    231 	return fvec4SIMD(_mm_mul_ps(v1.Data, v2.Data));
    232 }
    233 
    234 //operator/
    235 GLM_FUNC_QUALIFIER fvec4SIMD operator/ (fvec4SIMD const & v, float s)
    236 {
    237 	__m128 par0 = v.Data;
    238 	__m128 par1 = _mm_set1_ps(s);
    239 	return fvec4SIMD(_mm_div_ps(par0, par1));
    240 }
    241 
    242 GLM_FUNC_QUALIFIER fvec4SIMD operator/ (float s, fvec4SIMD const & v)
    243 {
    244 	__m128 par0 = _mm_set1_ps(s);
    245 	__m128 par1 = v.Data;
    246 	return fvec4SIMD(_mm_div_ps(par0, par1));
    247 }
    248 
    249 GLM_FUNC_QUALIFIER fvec4SIMD operator/ (fvec4SIMD const & v1, fvec4SIMD const & v2)
    250 {
    251 	return fvec4SIMD(_mm_div_ps(v1.Data, v2.Data));
    252 }
    253 
    254 // Unary constant operators
    255 GLM_FUNC_QUALIFIER fvec4SIMD operator- (fvec4SIMD const & v)
    256 {
    257 	return fvec4SIMD(_mm_sub_ps(_mm_setzero_ps(), v.Data));
    258 }
    259 
    260 GLM_FUNC_QUALIFIER fvec4SIMD operator++ (fvec4SIMD const & v, int)
    261 {
    262 	return fvec4SIMD(_mm_add_ps(v.Data, glm::detail::one));
    263 }
    264 
    265 GLM_FUNC_QUALIFIER fvec4SIMD operator-- (fvec4SIMD const & v, int)
    266 {
    267 	return fvec4SIMD(_mm_sub_ps(v.Data, glm::detail::one));
    268 }
    269 
    270 }//namespace detail
    271 
    272 GLM_FUNC_QUALIFIER vec4 vec4_cast
    273 (
    274 	detail::fvec4SIMD const & x
    275 )
    276 {
    277 	GLM_ALIGN(16) vec4 Result;
    278 	_mm_store_ps(&Result[0], x.Data);
    279 	return Result;
    280 }
    281 
    282 // Other possible implementation
    283 //float abs(float a)
    284 //{
    285 //  return max(-a, a);
    286 //}
    287 GLM_FUNC_QUALIFIER detail::fvec4SIMD abs
    288 (
    289 	detail::fvec4SIMD const & x
    290 )
    291 {
    292 	return detail::sse_abs_ps(x.Data);
    293 }
    294 
    295 GLM_FUNC_QUALIFIER detail::fvec4SIMD sign
    296 (
    297 	detail::fvec4SIMD const & x
    298 )
    299 {
    300 	return detail::sse_sgn_ps(x.Data);
    301 }
    302 
    303 GLM_FUNC_QUALIFIER detail::fvec4SIMD floor
    304 (
    305 	detail::fvec4SIMD const & x
    306 )
    307 {
    308 	return detail::sse_flr_ps(x.Data);
    309 }
    310 
    311 GLM_FUNC_QUALIFIER detail::fvec4SIMD trunc
    312 (
    313 	detail::fvec4SIMD const & x
    314 )
    315 {
    316     //return x < 0 ? -floor(-x) : floor(x);
    317 
    318 	__m128 Flr0 = detail::sse_flr_ps(_mm_sub_ps(_mm_setzero_ps(), x.Data));
    319 	__m128 Sub0 = _mm_sub_ps(Flr0, x.Data);
    320 	__m128 Flr1 = detail::sse_flr_ps(x.Data);
    321 
    322 	__m128 Cmp0 = _mm_cmplt_ps(x.Data, glm::detail::zero);
    323 	__m128 Cmp1 = _mm_cmpnlt_ps(x.Data, glm::detail::zero);
    324 
    325 	__m128 And0 = _mm_and_ps(Sub0, Cmp0);
    326 	__m128 And1 = _mm_and_ps(Flr1, Cmp1);
    327 
    328 	return _mm_or_ps(And0, And1);
    329 }
    330 
    331 GLM_FUNC_QUALIFIER detail::fvec4SIMD round
    332 (
    333 	detail::fvec4SIMD const & x
    334 )
    335 {
    336 	return detail::sse_rnd_ps(x.Data);
    337 }
    338 
    339 //GLM_FUNC_QUALIFIER detail::fvec4SIMD roundEven
    340 //(
    341 //	detail::fvec4SIMD const & x
    342 //)
    343 //{
    344 
    345 //}
    346 
    347 GLM_FUNC_QUALIFIER detail::fvec4SIMD ceil
    348 (
    349 	detail::fvec4SIMD const & x
    350 )
    351 {
    352 	return detail::sse_ceil_ps(x.Data);
    353 }
    354 
    355 GLM_FUNC_QUALIFIER detail::fvec4SIMD fract
    356 (
    357 	detail::fvec4SIMD const & x
    358 )
    359 {
    360 	return detail::sse_frc_ps(x.Data);
    361 }
    362 
    363 GLM_FUNC_QUALIFIER detail::fvec4SIMD mod
    364 (
    365 	detail::fvec4SIMD const & x, 
    366 	detail::fvec4SIMD const & y
    367 )
    368 {
    369 	return detail::sse_mod_ps(x.Data, y.Data);
    370 }
    371 
    372 GLM_FUNC_QUALIFIER detail::fvec4SIMD mod
    373 (
    374 	detail::fvec4SIMD const & x, 
    375 	float const & y
    376 )
    377 {
    378 	return detail::sse_mod_ps(x.Data, _mm_set1_ps(y));
    379 }
    380 
    381 //GLM_FUNC_QUALIFIER detail::fvec4SIMD modf
    382 //(
    383 //	detail::fvec4SIMD const & x, 
    384 //	detail::fvec4SIMD & i
    385 //)
    386 //{
    387 
    388 //}
    389 
    390 GLM_FUNC_QUALIFIER detail::fvec4SIMD min
    391 (
    392 	detail::fvec4SIMD const & x, 
    393 	detail::fvec4SIMD const & y
    394 )
    395 {
    396 	return _mm_min_ps(x.Data, y.Data);
    397 }
    398 
    399 GLM_FUNC_QUALIFIER detail::fvec4SIMD min
    400 (
    401 	detail::fvec4SIMD const & x, 
    402 	float const & y
    403 )
    404 {
    405 	return _mm_min_ps(x.Data, _mm_set1_ps(y));
    406 }
    407 
    408 GLM_FUNC_QUALIFIER detail::fvec4SIMD max
    409 (
    410 	detail::fvec4SIMD const & x, 
    411 	detail::fvec4SIMD const & y
    412 )
    413 {
    414 	return _mm_max_ps(x.Data, y.Data);
    415 }
    416 
    417 GLM_FUNC_QUALIFIER detail::fvec4SIMD max
    418 (
    419 	detail::fvec4SIMD const & x, 
    420 	float const & y
    421 )
    422 {
    423 	return _mm_max_ps(x.Data, _mm_set1_ps(y));
    424 }
    425 
    426 GLM_FUNC_QUALIFIER detail::fvec4SIMD clamp
    427 (
    428 	detail::fvec4SIMD const & x, 
    429 	detail::fvec4SIMD const & minVal, 
    430 	detail::fvec4SIMD const & maxVal
    431 )
    432 {
    433 	return detail::sse_clp_ps(x.Data, minVal.Data, maxVal.Data);
    434 }
    435 
    436 GLM_FUNC_QUALIFIER detail::fvec4SIMD clamp
    437 (
    438 	detail::fvec4SIMD const & x, 
    439 	float const & minVal, 
    440 	float const & maxVal
    441 ) 
    442 {
    443 	return detail::sse_clp_ps(x.Data, _mm_set1_ps(minVal), _mm_set1_ps(maxVal));
    444 }
    445 
    446 GLM_FUNC_QUALIFIER detail::fvec4SIMD mix
    447 (
    448 	detail::fvec4SIMD const & x, 
    449 	detail::fvec4SIMD const & y, 
    450 	detail::fvec4SIMD const & a
    451 )
    452 {
    453 	__m128 Sub0 = _mm_sub_ps(y.Data, x.Data);
    454 	__m128 Mul0 = _mm_mul_ps(a.Data, Sub0);
    455 	return _mm_add_ps(x.Data, Mul0);
    456 }
    457 
    458 GLM_FUNC_QUALIFIER detail::fvec4SIMD step
    459 (
    460 	detail::fvec4SIMD const & edge, 
    461 	detail::fvec4SIMD const & x
    462 )
    463 {
    464 	__m128 cmp0 = _mm_cmpngt_ps(x.Data, edge.Data);
    465 	return _mm_max_ps(_mm_min_ps(cmp0, _mm_setzero_ps()), detail::one);
    466 }
    467 
    468 GLM_FUNC_QUALIFIER detail::fvec4SIMD step
    469 (
    470 	float const & edge, 
    471 	detail::fvec4SIMD const & x
    472 )
    473 {
    474 	__m128 cmp0 = _mm_cmpngt_ps(x.Data, _mm_set1_ps(edge));
    475 	return _mm_max_ps(_mm_min_ps(cmp0, _mm_setzero_ps()), detail::one);
    476 }
    477 
    478 GLM_FUNC_QUALIFIER detail::fvec4SIMD smoothstep
    479 (
    480 	detail::fvec4SIMD const & edge0, 
    481 	detail::fvec4SIMD const & edge1, 
    482 	detail::fvec4SIMD const & x
    483 )
    484 {
    485 	return detail::sse_ssp_ps(edge0.Data, edge1.Data, x.Data);
    486 }
    487 
    488 GLM_FUNC_QUALIFIER detail::fvec4SIMD smoothstep
    489 (
    490 	float const & edge0, 
    491 	float const & edge1, 
    492 	detail::fvec4SIMD const & x
    493 )
    494 {
    495 	return detail::sse_ssp_ps(_mm_set1_ps(edge0), _mm_set1_ps(edge1), x.Data);
    496 }
    497 
    498 //GLM_FUNC_QUALIFIER bvec4 isnan(detail::fvec4SIMD const & x)
    499 //{
    500 
    501 //}
    502 
    503 //GLM_FUNC_QUALIFIER bvec4 isinf(detail::fvec4SIMD const & x)
    504 //{
    505 
    506 //}
    507 
    508 //GLM_FUNC_QUALIFIER detail::ivec4SIMD floatBitsToInt
    509 //(
    510 //	detail::fvec4SIMD const & value
    511 //)
    512 //{
    513 
    514 //}
    515 
    516 //GLM_FUNC_QUALIFIER detail::fvec4SIMD intBitsToFloat
    517 //(
    518 //	detail::ivec4SIMD const & value
    519 //)
    520 //{
    521 
    522 //}
    523 
    524 GLM_FUNC_QUALIFIER detail::fvec4SIMD fma
    525 (
    526 	detail::fvec4SIMD const & a, 
    527 	detail::fvec4SIMD const & b, 
    528 	detail::fvec4SIMD const & c
    529 )
    530 {
    531 	return _mm_add_ps(_mm_mul_ps(a.Data, b.Data), c.Data);
    532 }
    533 
    534 GLM_FUNC_QUALIFIER float length
    535 (
    536 	detail::fvec4SIMD const & x
    537 )
    538 {
    539 	detail::fvec4SIMD dot0 = detail::sse_dot_ss(x.Data, x.Data);
    540 	detail::fvec4SIMD sqt0 = sqrt(dot0);
    541 	float Result = 0;
    542 	_mm_store_ss(&Result, sqt0.Data);
    543 	return Result;
    544 }
    545 
    546 GLM_FUNC_QUALIFIER float fastLength
    547 (
    548 	detail::fvec4SIMD const & x
    549 )
    550 {
    551 	detail::fvec4SIMD dot0 = detail::sse_dot_ss(x.Data, x.Data);
    552 	detail::fvec4SIMD sqt0 = fastSqrt(dot0);
    553 	float Result = 0;
    554 	_mm_store_ss(&Result, sqt0.Data);
    555 	return Result;
    556 }
    557 
    558 GLM_FUNC_QUALIFIER float niceLength
    559 (
    560 	detail::fvec4SIMD const & x
    561 )
    562 {
    563 	detail::fvec4SIMD dot0 = detail::sse_dot_ss(x.Data, x.Data);
    564 	detail::fvec4SIMD sqt0 = niceSqrt(dot0);
    565 	float Result = 0;
    566 	_mm_store_ss(&Result, sqt0.Data);
    567 	return Result;
    568 }
    569 
    570 GLM_FUNC_QUALIFIER detail::fvec4SIMD length4
    571 (
    572 	detail::fvec4SIMD const & x
    573 )
    574 {
    575 	return sqrt(dot4(x, x));
    576 }
    577 
    578 GLM_FUNC_QUALIFIER detail::fvec4SIMD fastLength4
    579 (
    580 	detail::fvec4SIMD const & x
    581 )
    582 {
    583 	return fastSqrt(dot4(x, x));
    584 }
    585 
    586 GLM_FUNC_QUALIFIER detail::fvec4SIMD niceLength4
    587 (
    588 	detail::fvec4SIMD const & x
    589 )
    590 {
    591 	return niceSqrt(dot4(x, x));
    592 }
    593 
    594 GLM_FUNC_QUALIFIER float distance
    595 (
    596 	detail::fvec4SIMD const & p0,
    597 	detail::fvec4SIMD const & p1
    598 )
    599 {
    600 	float Result = 0;
    601 	_mm_store_ss(&Result, detail::sse_dst_ps(p0.Data, p1.Data));
    602 	return Result;
    603 }
    604 
    605 GLM_FUNC_QUALIFIER detail::fvec4SIMD distance4
    606 (
    607 	detail::fvec4SIMD const & p0,
    608 	detail::fvec4SIMD const & p1
    609 )
    610 {
    611 	return detail::sse_dst_ps(p0.Data, p1.Data);
    612 }
    613 
    614 GLM_FUNC_QUALIFIER float dot
    615 (
    616 	detail::fvec4SIMD const & x,
    617 	detail::fvec4SIMD const & y
    618 )
    619 {
    620 	float Result = 0;
    621 	_mm_store_ss(&Result, detail::sse_dot_ss(x.Data, y.Data));
    622 	return Result;
    623 }
    624 
    625 GLM_FUNC_QUALIFIER detail::fvec4SIMD dot4
    626 (
    627 	detail::fvec4SIMD const & x,
    628 	detail::fvec4SIMD const & y
    629 )
    630 {
    631 	return detail::sse_dot_ps(x.Data, y.Data);
    632 }
    633 
    634 GLM_FUNC_QUALIFIER detail::fvec4SIMD cross
    635 (
    636 	detail::fvec4SIMD const & x,
    637 	detail::fvec4SIMD const & y
    638 )
    639 {
    640 	return detail::sse_xpd_ps(x.Data, y.Data);
    641 }
    642 
    643 GLM_FUNC_QUALIFIER detail::fvec4SIMD normalize
    644 (
    645 	detail::fvec4SIMD const & x
    646 )
    647 {
    648 	__m128 dot0 = detail::sse_dot_ps(x.Data, x.Data);
    649 	__m128 isr0 = inversesqrt(detail::fvec4SIMD(dot0)).Data;
    650 	__m128 mul0 = _mm_mul_ps(x.Data, isr0);
    651 	return mul0;
    652 }
    653 
    654 GLM_FUNC_QUALIFIER detail::fvec4SIMD fastNormalize
    655 (
    656 	detail::fvec4SIMD const & x
    657 )
    658 {
    659 	__m128 dot0 = detail::sse_dot_ps(x.Data, x.Data);
    660 	__m128 isr0 = fastInversesqrt(dot0).Data;
    661 	__m128 mul0 = _mm_mul_ps(x.Data, isr0);
    662 	return mul0;
    663 }
    664 
    665 GLM_FUNC_QUALIFIER detail::fvec4SIMD faceforward
    666 (
    667 	detail::fvec4SIMD const & N,
    668 	detail::fvec4SIMD const & I,
    669 	detail::fvec4SIMD const & Nref
    670 )
    671 {
    672 	return detail::sse_ffd_ps(N.Data, I.Data, Nref.Data);
    673 }
    674 
    675 GLM_FUNC_QUALIFIER detail::fvec4SIMD reflect
    676 (
    677 	detail::fvec4SIMD const & I,
    678 	detail::fvec4SIMD const & N
    679 )
    680 {
    681 	return detail::sse_rfe_ps(I.Data, N.Data);
    682 }
    683 
    684 GLM_FUNC_QUALIFIER detail::fvec4SIMD refract
    685 (
    686 	detail::fvec4SIMD const & I,
    687 	detail::fvec4SIMD const & N,
    688 	float const & eta
    689 )
    690 {
    691 	return detail::sse_rfa_ps(I.Data, N.Data, _mm_set1_ps(eta));
    692 }
    693 
    694 GLM_FUNC_QUALIFIER detail::fvec4SIMD sqrt(detail::fvec4SIMD const & x)
    695 {
    696 	return _mm_mul_ps(inversesqrt(x).Data, x.Data);
    697 }
    698 
    699 GLM_FUNC_QUALIFIER detail::fvec4SIMD niceSqrt(detail::fvec4SIMD const & x)
    700 {
    701 	return _mm_sqrt_ps(x.Data);
    702 }
    703 
    704 GLM_FUNC_QUALIFIER detail::fvec4SIMD fastSqrt(detail::fvec4SIMD const & x)
    705 {
    706 	return _mm_mul_ps(fastInversesqrt(x.Data).Data, x.Data);
    707 }
    708 
    709 // SSE scalar reciprocal sqrt using rsqrt op, plus one Newton-Rhaphson iteration
    710 // By Elan Ruskin, http://assemblyrequired.crashworks.org/
    711 GLM_FUNC_QUALIFIER detail::fvec4SIMD inversesqrt(detail::fvec4SIMD const & x)
    712 {
    713 	GLM_ALIGN(4) static const __m128 three = {3, 3, 3, 3}; // aligned consts for fast load
    714 	GLM_ALIGN(4) static const __m128 half = {0.5,0.5,0.5,0.5};
    715 
    716 	__m128 recip = _mm_rsqrt_ps(x.Data);  // "estimate" opcode
    717 	__m128 halfrecip = _mm_mul_ps(half, recip);
    718 	__m128 threeminus_xrr = _mm_sub_ps(three, _mm_mul_ps(x.Data, _mm_mul_ps(recip, recip)));
    719 	return _mm_mul_ps(halfrecip, threeminus_xrr);
    720 }
    721 
    722 GLM_FUNC_QUALIFIER detail::fvec4SIMD fastInversesqrt(detail::fvec4SIMD const & x)
    723 {
    724 	return _mm_rsqrt_ps(x.Data);
    725 }
    726 
    727 }//namespace glm
    728