Home | History | Annotate | Download | only in gtx
      1 ///////////////////////////////////////////////////////////////////////////////////////////////////
      2 // OpenGL Mathematics Copyright (c) 2005 - 2014 G-Truc Creation (www.g-truc.net)
      3 ///////////////////////////////////////////////////////////////////////////////////////////////////
      4 // Created : 2013-04-22
      5 // Updated : 2013-04-22
      6 // Licence : This source is under MIT License
      7 // File    : glm/gtx/simd_quat.inl
      8 ///////////////////////////////////////////////////////////////////////////////////////////////////
      9 
     10 
     11 namespace glm{
     12 namespace detail{
     13 
     14 
     15 //////////////////////////////////////
     16 // Debugging
     17 #if 0
     18 void print(__m128 v)
     19 {
     20     GLM_ALIGN(16) float result[4];
     21     _mm_store_ps(result, v);
     22 
     23     printf("__m128:    %f %f %f %f\n", result[0], result[1], result[2], result[3]);
     24 }
     25 
     26 void print(const fvec4SIMD &v)
     27 {
     28     printf("fvec4SIMD: %f %f %f %f\n", v.x, v.y, v.z, v.w);
     29 }
     30 #endif
     31 
     32 
     33 //////////////////////////////////////
     34 // Implicit basic constructors
     35 
     36 GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD()
     37 #ifdef GLM_SIMD_ENABLE_DEFAULT_INIT
     38     : Data(_mm_set_ps(1.0f, 0.0f, 0.0f, 0.0f))
     39 #endif
     40 {}
     41 
     42 GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD(__m128 const & Data) :
     43 	Data(Data)
     44 {}
     45 
     46 GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD(fquatSIMD const & q) :
     47 	Data(q.Data)
     48 {}
     49 
     50 
     51 //////////////////////////////////////
     52 // Explicit basic constructors
     53 
     54 GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD(float const & w, float const & x, float const & y, float const & z) :
     55 	Data(_mm_set_ps(w, z, y, x))
     56 {}
     57 
     58 GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD(quat const & q) :
     59 	Data(_mm_set_ps(q.w, q.z, q.y, q.x))
     60 {}
     61 
     62 GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD(vec3 const & eulerAngles)
     63 {
     64     vec3 c = glm::cos(eulerAngles * 0.5f);
     65 	vec3 s = glm::sin(eulerAngles * 0.5f);
     66 
     67     Data = _mm_set_ps(
     68         (c.x * c.y * c.z) + (s.x * s.y * s.z),
     69         (c.x * c.y * s.z) - (s.x * s.y * c.z),
     70         (c.x * s.y * c.z) + (s.x * c.y * s.z),
     71         (s.x * c.y * c.z) - (c.x * s.y * s.z));
     72 }
     73 
     74 
     75 //////////////////////////////////////
     76 // Unary arithmetic operators
     77 
     78 GLM_FUNC_QUALIFIER fquatSIMD& fquatSIMD::operator=(fquatSIMD const & q)
     79 {
     80     this->Data = q.Data;
     81     return *this;
     82 }
     83 
     84 GLM_FUNC_QUALIFIER fquatSIMD& fquatSIMD::operator*=(float const & s)
     85 {
     86 	this->Data = _mm_mul_ps(this->Data, _mm_set_ps1(s));
     87 	return *this;
     88 }
     89 
     90 GLM_FUNC_QUALIFIER fquatSIMD& fquatSIMD::operator/=(float const & s)
     91 {
     92 	this->Data = _mm_div_ps(Data, _mm_set1_ps(s));
     93 	return *this;
     94 }
     95 
     96 
     97 
     98 // negate operator
     99 GLM_FUNC_QUALIFIER fquatSIMD operator- (fquatSIMD const & q)
    100 {
    101     return fquatSIMD(_mm_mul_ps(q.Data, _mm_set_ps(-1.0f, -1.0f, -1.0f, -1.0f)));
    102 }
    103 
    104 // operator+
    105 GLM_FUNC_QUALIFIER fquatSIMD operator+ (fquatSIMD const & q1, fquatSIMD const & q2)
    106 {
    107 	return fquatSIMD(_mm_add_ps(q1.Data, q2.Data));
    108 }
    109 
    110 //operator*
    111 GLM_FUNC_QUALIFIER fquatSIMD operator* (fquatSIMD const & q1, fquatSIMD const & q2)
    112 {
    113     // SSE2 STATS:
    114     //    11 shuffle
    115     //    8  mul
    116     //    8  add
    117     
    118     // SSE4 STATS:
    119     //    3 shuffle
    120     //    4 mul
    121     //    4 dpps
    122 
    123     __m128 mul0 = _mm_mul_ps(q1.Data, _mm_shuffle_ps(q2.Data, q2.Data, _MM_SHUFFLE(0, 1, 2, 3)));
    124     __m128 mul1 = _mm_mul_ps(q1.Data, _mm_shuffle_ps(q2.Data, q2.Data, _MM_SHUFFLE(1, 0, 3, 2)));
    125     __m128 mul2 = _mm_mul_ps(q1.Data, _mm_shuffle_ps(q2.Data, q2.Data, _MM_SHUFFLE(2, 3, 0, 1)));
    126     __m128 mul3 = _mm_mul_ps(q1.Data, q2.Data);
    127 
    128 #   if((GLM_ARCH & GLM_ARCH_SSE4))
    129     __m128 add0 = _mm_dp_ps(mul0, _mm_set_ps(1.0f, -1.0f,  1.0f,  1.0f), 0xff);
    130     __m128 add1 = _mm_dp_ps(mul1, _mm_set_ps(1.0f,  1.0f,  1.0f, -1.0f), 0xff);
    131     __m128 add2 = _mm_dp_ps(mul2, _mm_set_ps(1.0f,  1.0f, -1.0f,  1.0f), 0xff);
    132     __m128 add3 = _mm_dp_ps(mul3, _mm_set_ps(1.0f, -1.0f, -1.0f, -1.0f), 0xff);
    133 #   else
    134            mul0 = _mm_mul_ps(mul0, _mm_set_ps(1.0f, -1.0f,  1.0f,  1.0f));
    135     __m128 add0 = _mm_add_ps(mul0, _mm_movehl_ps(mul0, mul0));
    136            add0 = _mm_add_ss(add0, _mm_shuffle_ps(add0, add0, 1));
    137 
    138            mul1 = _mm_mul_ps(mul1, _mm_set_ps(1.0f,  1.0f,  1.0f, -1.0f));
    139     __m128 add1 = _mm_add_ps(mul1, _mm_movehl_ps(mul1, mul1));
    140            add1 = _mm_add_ss(add1, _mm_shuffle_ps(add1, add1, 1));
    141 
    142            mul2 = _mm_mul_ps(mul2, _mm_set_ps(1.0f,  1.0f, -1.0f,  1.0f));
    143     __m128 add2 = _mm_add_ps(mul2, _mm_movehl_ps(mul2, mul2));
    144            add2 = _mm_add_ss(add2, _mm_shuffle_ps(add2, add2, 1));
    145 
    146            mul3 = _mm_mul_ps(mul3, _mm_set_ps(1.0f, -1.0f, -1.0f, -1.0f));
    147     __m128 add3 = _mm_add_ps(mul3, _mm_movehl_ps(mul3, mul3));
    148            add3 = _mm_add_ss(add3, _mm_shuffle_ps(add3, add3, 1));
    149 #endif
    150 
    151 
    152     // This SIMD code is a politically correct way of doing this, but in every test I've tried it has been slower than
    153     // the final code below. I'll keep this here for reference - maybe somebody else can do something better...
    154     //
    155     //__m128 xxyy = _mm_shuffle_ps(add0, add1, _MM_SHUFFLE(0, 0, 0, 0));
    156     //__m128 zzww = _mm_shuffle_ps(add2, add3, _MM_SHUFFLE(0, 0, 0, 0));
    157     //
    158     //return _mm_shuffle_ps(xxyy, zzww, _MM_SHUFFLE(2, 0, 2, 0));
    159     
    160     float x;
    161     float y;
    162     float z;
    163     float w;
    164 
    165     _mm_store_ss(&x, add0);
    166     _mm_store_ss(&y, add1);
    167     _mm_store_ss(&z, add2);
    168     _mm_store_ss(&w, add3);
    169 
    170     return detail::fquatSIMD(w, x, y, z);
    171 }
    172 
    173 GLM_FUNC_QUALIFIER fvec4SIMD operator* (fquatSIMD const & q, fvec4SIMD const & v)
    174 {
    175     static const __m128 two = _mm_set1_ps(2.0f);
    176 
    177     __m128 q_wwww  = _mm_shuffle_ps(q.Data, q.Data, _MM_SHUFFLE(3, 3, 3, 3));
    178     __m128 q_swp0  = _mm_shuffle_ps(q.Data, q.Data, _MM_SHUFFLE(3, 0, 2, 1));
    179 	__m128 q_swp1  = _mm_shuffle_ps(q.Data, q.Data, _MM_SHUFFLE(3, 1, 0, 2));
    180 	__m128 v_swp0  = _mm_shuffle_ps(v.Data, v.Data, _MM_SHUFFLE(3, 0, 2, 1));
    181 	__m128 v_swp1  = _mm_shuffle_ps(v.Data, v.Data, _MM_SHUFFLE(3, 1, 0, 2));
    182 	
    183 	__m128 uv      = _mm_sub_ps(_mm_mul_ps(q_swp0, v_swp1), _mm_mul_ps(q_swp1, v_swp0));
    184     __m128 uv_swp0 = _mm_shuffle_ps(uv, uv, _MM_SHUFFLE(3, 0, 2, 1));
    185     __m128 uv_swp1 = _mm_shuffle_ps(uv, uv, _MM_SHUFFLE(3, 1, 0, 2));
    186     __m128 uuv     = _mm_sub_ps(_mm_mul_ps(q_swp0, uv_swp1), _mm_mul_ps(q_swp1, uv_swp0));
    187 
    188     
    189     uv  = _mm_mul_ps(uv,  _mm_mul_ps(q_wwww, two));
    190     uuv = _mm_mul_ps(uuv, two);
    191 
    192     return _mm_add_ps(v.Data, _mm_add_ps(uv, uuv));
    193 }
    194 
    195 GLM_FUNC_QUALIFIER fvec4SIMD operator* (fvec4SIMD const & v, fquatSIMD const & q)
    196 {
    197 	return glm::inverse(q) * v;
    198 }
    199 
    200 GLM_FUNC_QUALIFIER fquatSIMD operator* (fquatSIMD const & q, float s)
    201 {
    202 	return fquatSIMD(_mm_mul_ps(q.Data, _mm_set1_ps(s)));
    203 }
    204 
    205 GLM_FUNC_QUALIFIER fquatSIMD operator* (float s, fquatSIMD const & q)
    206 {
    207 	return fquatSIMD(_mm_mul_ps(_mm_set1_ps(s), q.Data));
    208 }
    209 
    210 
    211 //operator/
    212 GLM_FUNC_QUALIFIER fquatSIMD operator/ (fquatSIMD const & q, float s)
    213 {
    214 	return fquatSIMD(_mm_div_ps(q.Data, _mm_set1_ps(s)));
    215 }
    216 
    217 
    218 }//namespace detail
    219 
    220 
    221 GLM_FUNC_QUALIFIER quat quat_cast
    222 (
    223 	detail::fquatSIMD const & x
    224 )
    225 {
    226 	GLM_ALIGN(16) quat Result;
    227 	_mm_store_ps(&Result[0], x.Data);
    228 
    229 	return Result;
    230 }
    231 
    232 template <typename T>
    233 GLM_FUNC_QUALIFIER detail::fquatSIMD quatSIMD_cast_impl(const T m0[], const T m1[], const T m2[])
    234 {
    235     T trace = m0[0] + m1[1] + m2[2] + T(1.0);
    236     if (trace > T(0))
    237     {
    238         T s = static_cast<T>(0.5) / sqrt(trace);
    239 
    240         return _mm_set_ps(
    241             static_cast<float>(T(0.25) / s),
    242             static_cast<float>((m0[1] - m1[0]) * s),
    243             static_cast<float>((m2[0] - m0[2]) * s),
    244             static_cast<float>((m1[2] - m2[1]) * s));
    245     }
    246     else
    247     {
    248         if (m0[0] > m1[1])
    249         {
    250             if (m0[0] > m2[2])
    251             {
    252                 // X is biggest.
    253                 T s = sqrt(m0[0] - m1[1] - m2[2] + T(1.0)) * T(0.5);
    254 
    255                 return _mm_set_ps(
    256                     static_cast<float>((m1[2] - m2[1]) * s),
    257                     static_cast<float>((m2[0] + m0[2]) * s),
    258                     static_cast<float>((m0[1] + m1[0]) * s),
    259                     static_cast<float>(T(0.5)          * s));
    260             }
    261         }
    262         else
    263         {
    264             if (m1[1] > m2[2])
    265             {
    266                 // Y is biggest.
    267                 T s = sqrt(m1[1] - m0[0] - m2[2] + T(1.0)) * T(0.5);
    268 
    269                 return _mm_set_ps(
    270                     static_cast<float>((m2[0] - m0[2]) * s),
    271                     static_cast<float>((m1[2] + m2[1]) * s),
    272                     static_cast<float>(T(0.5)          * s),
    273                     static_cast<float>((m0[1] + m1[0]) * s));
    274             }
    275         }
    276 
    277         // Z is biggest.
    278         T s = sqrt(m2[2] - m0[0] - m1[1] + T(1.0)) * T(0.5);
    279 
    280         return _mm_set_ps(
    281             static_cast<float>((m0[1] - m1[0]) * s),
    282             static_cast<float>(T(0.5)          * s),
    283             static_cast<float>((m1[2] + m2[1]) * s),
    284             static_cast<float>((m2[0] + m0[2]) * s));
    285     }
    286 }
    287 
    288 GLM_FUNC_QUALIFIER detail::fquatSIMD quatSIMD_cast
    289 (
    290 	detail::fmat4x4SIMD const & m
    291 )
    292 {
    293     // Scalar implementation for now.
    294     GLM_ALIGN(16) float m0[4];
    295     GLM_ALIGN(16) float m1[4];
    296     GLM_ALIGN(16) float m2[4];
    297 
    298     _mm_store_ps(m0, m[0].Data);
    299     _mm_store_ps(m1, m[1].Data);
    300     _mm_store_ps(m2, m[2].Data);
    301 
    302     return quatSIMD_cast_impl(m0, m1, m2);
    303 }
    304 
    305 template <typename T, precision P>
    306 GLM_FUNC_QUALIFIER detail::fquatSIMD quatSIMD_cast
    307 (
    308     detail::tmat4x4<T, P> const & m
    309 )
    310 {
    311     return quatSIMD_cast_impl(&m[0][0], &m[1][0], &m[2][0]);
    312 }
    313 
    314 template <typename T, precision P>
    315 GLM_FUNC_QUALIFIER detail::fquatSIMD quatSIMD_cast
    316 (
    317     detail::tmat3x3<T, P> const & m
    318 )
    319 {
    320     return quatSIMD_cast_impl(&m[0][0], &m[1][0], &m[2][0]);
    321 }
    322 
    323 
    324 GLM_FUNC_QUALIFIER detail::fmat4x4SIMD mat4SIMD_cast
    325 (
    326 	detail::fquatSIMD const & q
    327 )
    328 {
    329     detail::fmat4x4SIMD result;
    330 
    331     __m128 _wwww  = _mm_shuffle_ps(q.Data, q.Data, _MM_SHUFFLE(3, 3, 3, 3));
    332     __m128 _xyzw  = q.Data;
    333     __m128 _zxyw  = _mm_shuffle_ps(q.Data, q.Data, _MM_SHUFFLE(3, 1, 0, 2));
    334     __m128 _yzxw  = _mm_shuffle_ps(q.Data, q.Data, _MM_SHUFFLE(3, 0, 2, 1));
    335 
    336     __m128 _xyzw2 = _mm_add_ps(_xyzw, _xyzw);
    337     __m128 _zxyw2 = _mm_shuffle_ps(_xyzw2, _xyzw2, _MM_SHUFFLE(3, 1, 0, 2));
    338     __m128 _yzxw2 = _mm_shuffle_ps(_xyzw2, _xyzw2, _MM_SHUFFLE(3, 0, 2, 1));
    339     
    340     __m128 _tmp0  = _mm_sub_ps(_mm_set1_ps(1.0f), _mm_mul_ps(_yzxw2, _yzxw));
    341            _tmp0  = _mm_sub_ps(_tmp0, _mm_mul_ps(_zxyw2, _zxyw));
    342 
    343     __m128 _tmp1  = _mm_mul_ps(_yzxw2, _xyzw);
    344            _tmp1  = _mm_add_ps(_tmp1, _mm_mul_ps(_zxyw2, _wwww));
    345 
    346     __m128 _tmp2  = _mm_mul_ps(_zxyw2, _xyzw);
    347            _tmp2  = _mm_sub_ps(_tmp2, _mm_mul_ps(_yzxw2, _wwww));
    348 
    349 
    350     // There's probably a better, more politically correct way of doing this...
    351     result[0].Data = _mm_set_ps(
    352         0.0f,
    353         reinterpret_cast<float*>(&_tmp2)[0],
    354         reinterpret_cast<float*>(&_tmp1)[0],
    355         reinterpret_cast<float*>(&_tmp0)[0]);
    356 
    357     result[1].Data = _mm_set_ps(
    358         0.0f,
    359         reinterpret_cast<float*>(&_tmp1)[1],
    360         reinterpret_cast<float*>(&_tmp0)[1],
    361         reinterpret_cast<float*>(&_tmp2)[1]);
    362 
    363     result[2].Data = _mm_set_ps(
    364         0.0f,
    365         reinterpret_cast<float*>(&_tmp0)[2],
    366         reinterpret_cast<float*>(&_tmp2)[2],
    367         reinterpret_cast<float*>(&_tmp1)[2]);
    368 
    369    result[3].Data = _mm_set_ps(
    370         1.0f,
    371         0.0f,
    372         0.0f,
    373         0.0f);
    374 
    375 
    376     return result;
    377 }
    378 
    379 GLM_FUNC_QUALIFIER mat4 mat4_cast
    380 (
    381 	detail::fquatSIMD const & q
    382 )
    383 {
    384     return mat4_cast(mat4SIMD_cast(q));
    385 }
    386 
    387 
    388 
    389 GLM_FUNC_QUALIFIER float length
    390 (
    391 	detail::fquatSIMD const & q
    392 )
    393 {
    394     return glm::sqrt(dot(q, q));
    395 }
    396 
    397 GLM_FUNC_QUALIFIER detail::fquatSIMD normalize
    398 (
    399 	detail::fquatSIMD const & q
    400 )
    401 {
    402     return _mm_mul_ps(q.Data, _mm_set1_ps(1.0f / length(q)));
    403 }
    404 
    405 GLM_FUNC_QUALIFIER float dot
    406 (
    407 	detail::fquatSIMD const & q1,
    408 	detail::fquatSIMD const & q2
    409 )
    410 {
    411     float result;
    412     _mm_store_ss(&result, detail::sse_dot_ps(q1.Data, q2.Data));
    413 
    414     return result;
    415 }
    416 
    417 GLM_FUNC_QUALIFIER detail::fquatSIMD mix
    418 (
    419 	detail::fquatSIMD const & x, 
    420 	detail::fquatSIMD const & y, 
    421 	float const & a
    422 )
    423 {
    424 	float cosTheta = dot(x, y);
    425 
    426     if (cosTheta > 1.0f - glm::epsilon<float>())
    427     {
    428 	    return _mm_add_ps(x.Data, _mm_mul_ps(_mm_set1_ps(a), _mm_sub_ps(y.Data, x.Data)));
    429     }
    430     else
    431     {
    432         float angle = glm::acos(cosTheta);
    433         
    434         
    435         float s0 = glm::sin((1.0f - a) * angle);
    436         float s1 = glm::sin(a * angle);
    437         float d  = 1.0f / glm::sin(angle);
    438 
    439         return (s0 * x + s1 * y) * d;
    440     }
    441 }
    442 
    443 GLM_FUNC_QUALIFIER detail::fquatSIMD lerp
    444 (
    445 	detail::fquatSIMD const & x, 
    446 	detail::fquatSIMD const & y, 
    447 	float const & a
    448 )
    449 {
    450 	// Lerp is only defined in [0, 1]
    451 	assert(a >= 0.0f);
    452 	assert(a <= 1.0f);
    453 
    454     return _mm_add_ps(x.Data, _mm_mul_ps(_mm_set1_ps(a), _mm_sub_ps(y.Data, x.Data)));
    455 }
    456 
    457 GLM_FUNC_QUALIFIER detail::fquatSIMD slerp
    458 (
    459 	detail::fquatSIMD const & x, 
    460 	detail::fquatSIMD const & y, 
    461 	float const & a
    462 )
    463 {
    464 	detail::fquatSIMD z = y;
    465 
    466 	float cosTheta = dot(x, y);
    467 
    468 	// If cosTheta < 0, the interpolation will take the long way around the sphere. 
    469 	// To fix this, one quat must be negated.
    470 	if (cosTheta < 0.0f)
    471 	{
    472 		z        = -y;
    473 		cosTheta = -cosTheta;
    474 	}
    475 
    476 	// Perform a linear interpolation when cosTheta is close to 1 to avoid side effect of sin(angle) becoming a zero denominator
    477 	if(cosTheta > 1.0f - epsilon<float>())
    478 	{
    479 		return _mm_add_ps(x.Data, _mm_mul_ps(_mm_set1_ps(a), _mm_sub_ps(y.Data, x.Data)));
    480 	}
    481 	else
    482 	{
    483         float angle = glm::acos(cosTheta);
    484 
    485 
    486 		float s0 = glm::sin((1.0f - a) * angle);
    487         float s1 = glm::sin(a * angle);
    488         float d  = 1.0f / glm::sin(angle);
    489 
    490         return (s0 * x + s1 * y) * d;
    491 	}
    492 }
    493 
    494 
    495 GLM_FUNC_QUALIFIER detail::fquatSIMD fastMix
    496 (
    497 	detail::fquatSIMD const & x, 
    498 	detail::fquatSIMD const & y, 
    499 	float const & a
    500 )
    501 {
    502 	float cosTheta = dot(x, y);
    503 
    504     if (cosTheta > 1.0f - glm::epsilon<float>())
    505     {
    506 	    return _mm_add_ps(x.Data, _mm_mul_ps(_mm_set1_ps(a), _mm_sub_ps(y.Data, x.Data)));
    507     }
    508     else
    509     {
    510         float angle = glm::fastAcos(cosTheta);
    511 
    512 
    513         __m128 s  = glm::fastSin(_mm_set_ps((1.0f - a) * angle, a * angle, angle, 0.0f));
    514 
    515         __m128 s0 =                               _mm_shuffle_ps(s, s, _MM_SHUFFLE(3, 3, 3, 3));
    516         __m128 s1 =                               _mm_shuffle_ps(s, s, _MM_SHUFFLE(2, 2, 2, 2));
    517         __m128 d  = _mm_div_ps(_mm_set1_ps(1.0f), _mm_shuffle_ps(s, s, _MM_SHUFFLE(1, 1, 1, 1)));
    518         
    519         return _mm_mul_ps(_mm_add_ps(_mm_mul_ps(s0, x.Data), _mm_mul_ps(s1, y.Data)), d);
    520     }
    521 }
    522 
    523 GLM_FUNC_QUALIFIER detail::fquatSIMD fastSlerp
    524 (
    525 	detail::fquatSIMD const & x, 
    526 	detail::fquatSIMD const & y, 
    527 	float const & a
    528 )
    529 {
    530 	detail::fquatSIMD z = y;
    531 
    532 	float cosTheta = dot(x, y);
    533 	if (cosTheta < 0.0f)
    534 	{
    535 		z        = -y;
    536 		cosTheta = -cosTheta;
    537 	}
    538 
    539 
    540 	if(cosTheta > 1.0f - epsilon<float>())
    541 	{
    542 		return _mm_add_ps(x.Data, _mm_mul_ps(_mm_set1_ps(a), _mm_sub_ps(y.Data, x.Data)));
    543 	}
    544 	else
    545 	{
    546         float angle = glm::fastAcos(cosTheta);
    547 
    548 
    549         __m128 s  = glm::fastSin(_mm_set_ps((1.0f - a) * angle, a * angle, angle, 0.0f));
    550 
    551         __m128 s0 =                               _mm_shuffle_ps(s, s, _MM_SHUFFLE(3, 3, 3, 3));
    552         __m128 s1 =                               _mm_shuffle_ps(s, s, _MM_SHUFFLE(2, 2, 2, 2));
    553         __m128 d  = _mm_div_ps(_mm_set1_ps(1.0f), _mm_shuffle_ps(s, s, _MM_SHUFFLE(1, 1, 1, 1)));
    554         
    555         return _mm_mul_ps(_mm_add_ps(_mm_mul_ps(s0, x.Data), _mm_mul_ps(s1, y.Data)), d);
    556 	}
    557 }
    558 
    559 
    560 
    561 GLM_FUNC_QUALIFIER detail::fquatSIMD conjugate
    562 (
    563 	detail::fquatSIMD const & q
    564 )
    565 {
    566 	return detail::fquatSIMD(_mm_mul_ps(q.Data, _mm_set_ps(1.0f, -1.0f, -1.0f, -1.0f)));
    567 }
    568 
    569 GLM_FUNC_QUALIFIER detail::fquatSIMD inverse
    570 (
    571 	detail::fquatSIMD const & q
    572 )
    573 {
    574 	return conjugate(q) / dot(q, q);
    575 }
    576 
    577 
    578 GLM_FUNC_QUALIFIER detail::fquatSIMD angleAxisSIMD
    579 (
    580 	float const & angle,
    581 	vec3 const & v
    582 )
    583 {
    584 #ifdef GLM_FORCE_RADIANS
    585 	float a(angle);
    586 #else
    587 #	pragma message("GLM: rotateZ function taking degrees as parameters is deprecated. #define GLM_FORCE_RADIANS before including GLM headers to remove this message.")
    588 	float a(glm::radians(angle));
    589 #endif
    590 	float s = glm::sin(a * 0.5f);
    591 
    592 	return _mm_set_ps(
    593 		glm::cos(a * 0.5f),
    594 		v.z * s,
    595 		v.y * s,
    596 		v.x * s);
    597 }
    598 
    599 GLM_FUNC_QUALIFIER detail::fquatSIMD angleAxisSIMD
    600 (
    601 	float const & angle, 
    602 	float const & x, 
    603 	float const & y, 
    604 	float const & z
    605 )
    606 {
    607 	return angleAxisSIMD(angle, vec3(x, y, z));
    608 }
    609 
    610 
    611 GLM_FUNC_QUALIFIER __m128 fastSin(__m128 x)
    612 {
    613     static const __m128 c0 = _mm_set1_ps(0.16666666666666666666666666666667f);
    614     static const __m128 c1 = _mm_set1_ps(0.00833333333333333333333333333333f);
    615     static const __m128 c2 = _mm_set1_ps(0.00019841269841269841269841269841f);
    616 
    617     __m128 x3 = _mm_mul_ps(x,  _mm_mul_ps(x, x));
    618     __m128 x5 = _mm_mul_ps(x3, _mm_mul_ps(x, x));
    619     __m128 x7 = _mm_mul_ps(x5, _mm_mul_ps(x, x));
    620 
    621     __m128 y0 = _mm_mul_ps(x3, c0);
    622     __m128 y1 = _mm_mul_ps(x5, c1);
    623     __m128 y2 = _mm_mul_ps(x7, c2);
    624         
    625     return _mm_sub_ps(_mm_add_ps(_mm_sub_ps(x, y0), y1), y2);
    626 }
    627 
    628 
    629 }//namespace glm
    630