1 /////////////////////////////////////////////////////////////////////////////////////////////////// 2 // OpenGL Mathematics Copyright (c) 2005 - 2014 G-Truc Creation (www.g-truc.net) 3 /////////////////////////////////////////////////////////////////////////////////////////////////// 4 // Created : 2013-04-22 5 // Updated : 2013-04-22 6 // Licence : This source is under MIT License 7 // File : glm/gtx/simd_quat.inl 8 /////////////////////////////////////////////////////////////////////////////////////////////////// 9 10 11 namespace glm{ 12 namespace detail{ 13 14 15 ////////////////////////////////////// 16 // Debugging 17 #if 0 18 void print(__m128 v) 19 { 20 GLM_ALIGN(16) float result[4]; 21 _mm_store_ps(result, v); 22 23 printf("__m128: %f %f %f %f\n", result[0], result[1], result[2], result[3]); 24 } 25 26 void print(const fvec4SIMD &v) 27 { 28 printf("fvec4SIMD: %f %f %f %f\n", v.x, v.y, v.z, v.w); 29 } 30 #endif 31 32 33 ////////////////////////////////////// 34 // Implicit basic constructors 35 36 GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD() 37 #ifdef GLM_SIMD_ENABLE_DEFAULT_INIT 38 : Data(_mm_set_ps(1.0f, 0.0f, 0.0f, 0.0f)) 39 #endif 40 {} 41 42 GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD(__m128 const & Data) : 43 Data(Data) 44 {} 45 46 GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD(fquatSIMD const & q) : 47 Data(q.Data) 48 {} 49 50 51 ////////////////////////////////////// 52 // Explicit basic constructors 53 54 GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD(float const & w, float const & x, float const & y, float const & z) : 55 Data(_mm_set_ps(w, z, y, x)) 56 {} 57 58 GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD(quat const & q) : 59 Data(_mm_set_ps(q.w, q.z, q.y, q.x)) 60 {} 61 62 GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD(vec3 const & eulerAngles) 63 { 64 vec3 c = glm::cos(eulerAngles * 0.5f); 65 vec3 s = glm::sin(eulerAngles * 0.5f); 66 67 Data = _mm_set_ps( 68 (c.x * c.y * c.z) + (s.x * s.y * s.z), 69 (c.x * c.y * s.z) - (s.x * s.y * c.z), 70 (c.x * s.y * c.z) + (s.x * c.y * s.z), 71 (s.x * c.y * c.z) - (c.x * s.y * s.z)); 72 } 73 74 75 ////////////////////////////////////// 76 // Unary arithmetic operators 77 78 GLM_FUNC_QUALIFIER fquatSIMD& fquatSIMD::operator=(fquatSIMD const & q) 79 { 80 this->Data = q.Data; 81 return *this; 82 } 83 84 GLM_FUNC_QUALIFIER fquatSIMD& fquatSIMD::operator*=(float const & s) 85 { 86 this->Data = _mm_mul_ps(this->Data, _mm_set_ps1(s)); 87 return *this; 88 } 89 90 GLM_FUNC_QUALIFIER fquatSIMD& fquatSIMD::operator/=(float const & s) 91 { 92 this->Data = _mm_div_ps(Data, _mm_set1_ps(s)); 93 return *this; 94 } 95 96 97 98 // negate operator 99 GLM_FUNC_QUALIFIER fquatSIMD operator- (fquatSIMD const & q) 100 { 101 return fquatSIMD(_mm_mul_ps(q.Data, _mm_set_ps(-1.0f, -1.0f, -1.0f, -1.0f))); 102 } 103 104 // operator+ 105 GLM_FUNC_QUALIFIER fquatSIMD operator+ (fquatSIMD const & q1, fquatSIMD const & q2) 106 { 107 return fquatSIMD(_mm_add_ps(q1.Data, q2.Data)); 108 } 109 110 //operator* 111 GLM_FUNC_QUALIFIER fquatSIMD operator* (fquatSIMD const & q1, fquatSIMD const & q2) 112 { 113 // SSE2 STATS: 114 // 11 shuffle 115 // 8 mul 116 // 8 add 117 118 // SSE4 STATS: 119 // 3 shuffle 120 // 4 mul 121 // 4 dpps 122 123 __m128 mul0 = _mm_mul_ps(q1.Data, _mm_shuffle_ps(q2.Data, q2.Data, _MM_SHUFFLE(0, 1, 2, 3))); 124 __m128 mul1 = _mm_mul_ps(q1.Data, _mm_shuffle_ps(q2.Data, q2.Data, _MM_SHUFFLE(1, 0, 3, 2))); 125 __m128 mul2 = _mm_mul_ps(q1.Data, _mm_shuffle_ps(q2.Data, q2.Data, _MM_SHUFFLE(2, 3, 0, 1))); 126 __m128 mul3 = _mm_mul_ps(q1.Data, q2.Data); 127 128 # if((GLM_ARCH & GLM_ARCH_SSE4)) 129 __m128 add0 = _mm_dp_ps(mul0, _mm_set_ps(1.0f, -1.0f, 1.0f, 1.0f), 0xff); 130 __m128 add1 = _mm_dp_ps(mul1, _mm_set_ps(1.0f, 1.0f, 1.0f, -1.0f), 0xff); 131 __m128 add2 = _mm_dp_ps(mul2, _mm_set_ps(1.0f, 1.0f, -1.0f, 1.0f), 0xff); 132 __m128 add3 = _mm_dp_ps(mul3, _mm_set_ps(1.0f, -1.0f, -1.0f, -1.0f), 0xff); 133 # else 134 mul0 = _mm_mul_ps(mul0, _mm_set_ps(1.0f, -1.0f, 1.0f, 1.0f)); 135 __m128 add0 = _mm_add_ps(mul0, _mm_movehl_ps(mul0, mul0)); 136 add0 = _mm_add_ss(add0, _mm_shuffle_ps(add0, add0, 1)); 137 138 mul1 = _mm_mul_ps(mul1, _mm_set_ps(1.0f, 1.0f, 1.0f, -1.0f)); 139 __m128 add1 = _mm_add_ps(mul1, _mm_movehl_ps(mul1, mul1)); 140 add1 = _mm_add_ss(add1, _mm_shuffle_ps(add1, add1, 1)); 141 142 mul2 = _mm_mul_ps(mul2, _mm_set_ps(1.0f, 1.0f, -1.0f, 1.0f)); 143 __m128 add2 = _mm_add_ps(mul2, _mm_movehl_ps(mul2, mul2)); 144 add2 = _mm_add_ss(add2, _mm_shuffle_ps(add2, add2, 1)); 145 146 mul3 = _mm_mul_ps(mul3, _mm_set_ps(1.0f, -1.0f, -1.0f, -1.0f)); 147 __m128 add3 = _mm_add_ps(mul3, _mm_movehl_ps(mul3, mul3)); 148 add3 = _mm_add_ss(add3, _mm_shuffle_ps(add3, add3, 1)); 149 #endif 150 151 152 // This SIMD code is a politically correct way of doing this, but in every test I've tried it has been slower than 153 // the final code below. I'll keep this here for reference - maybe somebody else can do something better... 154 // 155 //__m128 xxyy = _mm_shuffle_ps(add0, add1, _MM_SHUFFLE(0, 0, 0, 0)); 156 //__m128 zzww = _mm_shuffle_ps(add2, add3, _MM_SHUFFLE(0, 0, 0, 0)); 157 // 158 //return _mm_shuffle_ps(xxyy, zzww, _MM_SHUFFLE(2, 0, 2, 0)); 159 160 float x; 161 float y; 162 float z; 163 float w; 164 165 _mm_store_ss(&x, add0); 166 _mm_store_ss(&y, add1); 167 _mm_store_ss(&z, add2); 168 _mm_store_ss(&w, add3); 169 170 return detail::fquatSIMD(w, x, y, z); 171 } 172 173 GLM_FUNC_QUALIFIER fvec4SIMD operator* (fquatSIMD const & q, fvec4SIMD const & v) 174 { 175 static const __m128 two = _mm_set1_ps(2.0f); 176 177 __m128 q_wwww = _mm_shuffle_ps(q.Data, q.Data, _MM_SHUFFLE(3, 3, 3, 3)); 178 __m128 q_swp0 = _mm_shuffle_ps(q.Data, q.Data, _MM_SHUFFLE(3, 0, 2, 1)); 179 __m128 q_swp1 = _mm_shuffle_ps(q.Data, q.Data, _MM_SHUFFLE(3, 1, 0, 2)); 180 __m128 v_swp0 = _mm_shuffle_ps(v.Data, v.Data, _MM_SHUFFLE(3, 0, 2, 1)); 181 __m128 v_swp1 = _mm_shuffle_ps(v.Data, v.Data, _MM_SHUFFLE(3, 1, 0, 2)); 182 183 __m128 uv = _mm_sub_ps(_mm_mul_ps(q_swp0, v_swp1), _mm_mul_ps(q_swp1, v_swp0)); 184 __m128 uv_swp0 = _mm_shuffle_ps(uv, uv, _MM_SHUFFLE(3, 0, 2, 1)); 185 __m128 uv_swp1 = _mm_shuffle_ps(uv, uv, _MM_SHUFFLE(3, 1, 0, 2)); 186 __m128 uuv = _mm_sub_ps(_mm_mul_ps(q_swp0, uv_swp1), _mm_mul_ps(q_swp1, uv_swp0)); 187 188 189 uv = _mm_mul_ps(uv, _mm_mul_ps(q_wwww, two)); 190 uuv = _mm_mul_ps(uuv, two); 191 192 return _mm_add_ps(v.Data, _mm_add_ps(uv, uuv)); 193 } 194 195 GLM_FUNC_QUALIFIER fvec4SIMD operator* (fvec4SIMD const & v, fquatSIMD const & q) 196 { 197 return glm::inverse(q) * v; 198 } 199 200 GLM_FUNC_QUALIFIER fquatSIMD operator* (fquatSIMD const & q, float s) 201 { 202 return fquatSIMD(_mm_mul_ps(q.Data, _mm_set1_ps(s))); 203 } 204 205 GLM_FUNC_QUALIFIER fquatSIMD operator* (float s, fquatSIMD const & q) 206 { 207 return fquatSIMD(_mm_mul_ps(_mm_set1_ps(s), q.Data)); 208 } 209 210 211 //operator/ 212 GLM_FUNC_QUALIFIER fquatSIMD operator/ (fquatSIMD const & q, float s) 213 { 214 return fquatSIMD(_mm_div_ps(q.Data, _mm_set1_ps(s))); 215 } 216 217 218 }//namespace detail 219 220 221 GLM_FUNC_QUALIFIER quat quat_cast 222 ( 223 detail::fquatSIMD const & x 224 ) 225 { 226 GLM_ALIGN(16) quat Result; 227 _mm_store_ps(&Result[0], x.Data); 228 229 return Result; 230 } 231 232 template <typename T> 233 GLM_FUNC_QUALIFIER detail::fquatSIMD quatSIMD_cast_impl(const T m0[], const T m1[], const T m2[]) 234 { 235 T trace = m0[0] + m1[1] + m2[2] + T(1.0); 236 if (trace > T(0)) 237 { 238 T s = static_cast<T>(0.5) / sqrt(trace); 239 240 return _mm_set_ps( 241 static_cast<float>(T(0.25) / s), 242 static_cast<float>((m0[1] - m1[0]) * s), 243 static_cast<float>((m2[0] - m0[2]) * s), 244 static_cast<float>((m1[2] - m2[1]) * s)); 245 } 246 else 247 { 248 if (m0[0] > m1[1]) 249 { 250 if (m0[0] > m2[2]) 251 { 252 // X is biggest. 253 T s = sqrt(m0[0] - m1[1] - m2[2] + T(1.0)) * T(0.5); 254 255 return _mm_set_ps( 256 static_cast<float>((m1[2] - m2[1]) * s), 257 static_cast<float>((m2[0] + m0[2]) * s), 258 static_cast<float>((m0[1] + m1[0]) * s), 259 static_cast<float>(T(0.5) * s)); 260 } 261 } 262 else 263 { 264 if (m1[1] > m2[2]) 265 { 266 // Y is biggest. 267 T s = sqrt(m1[1] - m0[0] - m2[2] + T(1.0)) * T(0.5); 268 269 return _mm_set_ps( 270 static_cast<float>((m2[0] - m0[2]) * s), 271 static_cast<float>((m1[2] + m2[1]) * s), 272 static_cast<float>(T(0.5) * s), 273 static_cast<float>((m0[1] + m1[0]) * s)); 274 } 275 } 276 277 // Z is biggest. 278 T s = sqrt(m2[2] - m0[0] - m1[1] + T(1.0)) * T(0.5); 279 280 return _mm_set_ps( 281 static_cast<float>((m0[1] - m1[0]) * s), 282 static_cast<float>(T(0.5) * s), 283 static_cast<float>((m1[2] + m2[1]) * s), 284 static_cast<float>((m2[0] + m0[2]) * s)); 285 } 286 } 287 288 GLM_FUNC_QUALIFIER detail::fquatSIMD quatSIMD_cast 289 ( 290 detail::fmat4x4SIMD const & m 291 ) 292 { 293 // Scalar implementation for now. 294 GLM_ALIGN(16) float m0[4]; 295 GLM_ALIGN(16) float m1[4]; 296 GLM_ALIGN(16) float m2[4]; 297 298 _mm_store_ps(m0, m[0].Data); 299 _mm_store_ps(m1, m[1].Data); 300 _mm_store_ps(m2, m[2].Data); 301 302 return quatSIMD_cast_impl(m0, m1, m2); 303 } 304 305 template <typename T, precision P> 306 GLM_FUNC_QUALIFIER detail::fquatSIMD quatSIMD_cast 307 ( 308 detail::tmat4x4<T, P> const & m 309 ) 310 { 311 return quatSIMD_cast_impl(&m[0][0], &m[1][0], &m[2][0]); 312 } 313 314 template <typename T, precision P> 315 GLM_FUNC_QUALIFIER detail::fquatSIMD quatSIMD_cast 316 ( 317 detail::tmat3x3<T, P> const & m 318 ) 319 { 320 return quatSIMD_cast_impl(&m[0][0], &m[1][0], &m[2][0]); 321 } 322 323 324 GLM_FUNC_QUALIFIER detail::fmat4x4SIMD mat4SIMD_cast 325 ( 326 detail::fquatSIMD const & q 327 ) 328 { 329 detail::fmat4x4SIMD result; 330 331 __m128 _wwww = _mm_shuffle_ps(q.Data, q.Data, _MM_SHUFFLE(3, 3, 3, 3)); 332 __m128 _xyzw = q.Data; 333 __m128 _zxyw = _mm_shuffle_ps(q.Data, q.Data, _MM_SHUFFLE(3, 1, 0, 2)); 334 __m128 _yzxw = _mm_shuffle_ps(q.Data, q.Data, _MM_SHUFFLE(3, 0, 2, 1)); 335 336 __m128 _xyzw2 = _mm_add_ps(_xyzw, _xyzw); 337 __m128 _zxyw2 = _mm_shuffle_ps(_xyzw2, _xyzw2, _MM_SHUFFLE(3, 1, 0, 2)); 338 __m128 _yzxw2 = _mm_shuffle_ps(_xyzw2, _xyzw2, _MM_SHUFFLE(3, 0, 2, 1)); 339 340 __m128 _tmp0 = _mm_sub_ps(_mm_set1_ps(1.0f), _mm_mul_ps(_yzxw2, _yzxw)); 341 _tmp0 = _mm_sub_ps(_tmp0, _mm_mul_ps(_zxyw2, _zxyw)); 342 343 __m128 _tmp1 = _mm_mul_ps(_yzxw2, _xyzw); 344 _tmp1 = _mm_add_ps(_tmp1, _mm_mul_ps(_zxyw2, _wwww)); 345 346 __m128 _tmp2 = _mm_mul_ps(_zxyw2, _xyzw); 347 _tmp2 = _mm_sub_ps(_tmp2, _mm_mul_ps(_yzxw2, _wwww)); 348 349 350 // There's probably a better, more politically correct way of doing this... 351 result[0].Data = _mm_set_ps( 352 0.0f, 353 reinterpret_cast<float*>(&_tmp2)[0], 354 reinterpret_cast<float*>(&_tmp1)[0], 355 reinterpret_cast<float*>(&_tmp0)[0]); 356 357 result[1].Data = _mm_set_ps( 358 0.0f, 359 reinterpret_cast<float*>(&_tmp1)[1], 360 reinterpret_cast<float*>(&_tmp0)[1], 361 reinterpret_cast<float*>(&_tmp2)[1]); 362 363 result[2].Data = _mm_set_ps( 364 0.0f, 365 reinterpret_cast<float*>(&_tmp0)[2], 366 reinterpret_cast<float*>(&_tmp2)[2], 367 reinterpret_cast<float*>(&_tmp1)[2]); 368 369 result[3].Data = _mm_set_ps( 370 1.0f, 371 0.0f, 372 0.0f, 373 0.0f); 374 375 376 return result; 377 } 378 379 GLM_FUNC_QUALIFIER mat4 mat4_cast 380 ( 381 detail::fquatSIMD const & q 382 ) 383 { 384 return mat4_cast(mat4SIMD_cast(q)); 385 } 386 387 388 389 GLM_FUNC_QUALIFIER float length 390 ( 391 detail::fquatSIMD const & q 392 ) 393 { 394 return glm::sqrt(dot(q, q)); 395 } 396 397 GLM_FUNC_QUALIFIER detail::fquatSIMD normalize 398 ( 399 detail::fquatSIMD const & q 400 ) 401 { 402 return _mm_mul_ps(q.Data, _mm_set1_ps(1.0f / length(q))); 403 } 404 405 GLM_FUNC_QUALIFIER float dot 406 ( 407 detail::fquatSIMD const & q1, 408 detail::fquatSIMD const & q2 409 ) 410 { 411 float result; 412 _mm_store_ss(&result, detail::sse_dot_ps(q1.Data, q2.Data)); 413 414 return result; 415 } 416 417 GLM_FUNC_QUALIFIER detail::fquatSIMD mix 418 ( 419 detail::fquatSIMD const & x, 420 detail::fquatSIMD const & y, 421 float const & a 422 ) 423 { 424 float cosTheta = dot(x, y); 425 426 if (cosTheta > 1.0f - glm::epsilon<float>()) 427 { 428 return _mm_add_ps(x.Data, _mm_mul_ps(_mm_set1_ps(a), _mm_sub_ps(y.Data, x.Data))); 429 } 430 else 431 { 432 float angle = glm::acos(cosTheta); 433 434 435 float s0 = glm::sin((1.0f - a) * angle); 436 float s1 = glm::sin(a * angle); 437 float d = 1.0f / glm::sin(angle); 438 439 return (s0 * x + s1 * y) * d; 440 } 441 } 442 443 GLM_FUNC_QUALIFIER detail::fquatSIMD lerp 444 ( 445 detail::fquatSIMD const & x, 446 detail::fquatSIMD const & y, 447 float const & a 448 ) 449 { 450 // Lerp is only defined in [0, 1] 451 assert(a >= 0.0f); 452 assert(a <= 1.0f); 453 454 return _mm_add_ps(x.Data, _mm_mul_ps(_mm_set1_ps(a), _mm_sub_ps(y.Data, x.Data))); 455 } 456 457 GLM_FUNC_QUALIFIER detail::fquatSIMD slerp 458 ( 459 detail::fquatSIMD const & x, 460 detail::fquatSIMD const & y, 461 float const & a 462 ) 463 { 464 detail::fquatSIMD z = y; 465 466 float cosTheta = dot(x, y); 467 468 // If cosTheta < 0, the interpolation will take the long way around the sphere. 469 // To fix this, one quat must be negated. 470 if (cosTheta < 0.0f) 471 { 472 z = -y; 473 cosTheta = -cosTheta; 474 } 475 476 // Perform a linear interpolation when cosTheta is close to 1 to avoid side effect of sin(angle) becoming a zero denominator 477 if(cosTheta > 1.0f - epsilon<float>()) 478 { 479 return _mm_add_ps(x.Data, _mm_mul_ps(_mm_set1_ps(a), _mm_sub_ps(y.Data, x.Data))); 480 } 481 else 482 { 483 float angle = glm::acos(cosTheta); 484 485 486 float s0 = glm::sin((1.0f - a) * angle); 487 float s1 = glm::sin(a * angle); 488 float d = 1.0f / glm::sin(angle); 489 490 return (s0 * x + s1 * y) * d; 491 } 492 } 493 494 495 GLM_FUNC_QUALIFIER detail::fquatSIMD fastMix 496 ( 497 detail::fquatSIMD const & x, 498 detail::fquatSIMD const & y, 499 float const & a 500 ) 501 { 502 float cosTheta = dot(x, y); 503 504 if (cosTheta > 1.0f - glm::epsilon<float>()) 505 { 506 return _mm_add_ps(x.Data, _mm_mul_ps(_mm_set1_ps(a), _mm_sub_ps(y.Data, x.Data))); 507 } 508 else 509 { 510 float angle = glm::fastAcos(cosTheta); 511 512 513 __m128 s = glm::fastSin(_mm_set_ps((1.0f - a) * angle, a * angle, angle, 0.0f)); 514 515 __m128 s0 = _mm_shuffle_ps(s, s, _MM_SHUFFLE(3, 3, 3, 3)); 516 __m128 s1 = _mm_shuffle_ps(s, s, _MM_SHUFFLE(2, 2, 2, 2)); 517 __m128 d = _mm_div_ps(_mm_set1_ps(1.0f), _mm_shuffle_ps(s, s, _MM_SHUFFLE(1, 1, 1, 1))); 518 519 return _mm_mul_ps(_mm_add_ps(_mm_mul_ps(s0, x.Data), _mm_mul_ps(s1, y.Data)), d); 520 } 521 } 522 523 GLM_FUNC_QUALIFIER detail::fquatSIMD fastSlerp 524 ( 525 detail::fquatSIMD const & x, 526 detail::fquatSIMD const & y, 527 float const & a 528 ) 529 { 530 detail::fquatSIMD z = y; 531 532 float cosTheta = dot(x, y); 533 if (cosTheta < 0.0f) 534 { 535 z = -y; 536 cosTheta = -cosTheta; 537 } 538 539 540 if(cosTheta > 1.0f - epsilon<float>()) 541 { 542 return _mm_add_ps(x.Data, _mm_mul_ps(_mm_set1_ps(a), _mm_sub_ps(y.Data, x.Data))); 543 } 544 else 545 { 546 float angle = glm::fastAcos(cosTheta); 547 548 549 __m128 s = glm::fastSin(_mm_set_ps((1.0f - a) * angle, a * angle, angle, 0.0f)); 550 551 __m128 s0 = _mm_shuffle_ps(s, s, _MM_SHUFFLE(3, 3, 3, 3)); 552 __m128 s1 = _mm_shuffle_ps(s, s, _MM_SHUFFLE(2, 2, 2, 2)); 553 __m128 d = _mm_div_ps(_mm_set1_ps(1.0f), _mm_shuffle_ps(s, s, _MM_SHUFFLE(1, 1, 1, 1))); 554 555 return _mm_mul_ps(_mm_add_ps(_mm_mul_ps(s0, x.Data), _mm_mul_ps(s1, y.Data)), d); 556 } 557 } 558 559 560 561 GLM_FUNC_QUALIFIER detail::fquatSIMD conjugate 562 ( 563 detail::fquatSIMD const & q 564 ) 565 { 566 return detail::fquatSIMD(_mm_mul_ps(q.Data, _mm_set_ps(1.0f, -1.0f, -1.0f, -1.0f))); 567 } 568 569 GLM_FUNC_QUALIFIER detail::fquatSIMD inverse 570 ( 571 detail::fquatSIMD const & q 572 ) 573 { 574 return conjugate(q) / dot(q, q); 575 } 576 577 578 GLM_FUNC_QUALIFIER detail::fquatSIMD angleAxisSIMD 579 ( 580 float const & angle, 581 vec3 const & v 582 ) 583 { 584 #ifdef GLM_FORCE_RADIANS 585 float a(angle); 586 #else 587 # pragma message("GLM: rotateZ function taking degrees as parameters is deprecated. #define GLM_FORCE_RADIANS before including GLM headers to remove this message.") 588 float a(glm::radians(angle)); 589 #endif 590 float s = glm::sin(a * 0.5f); 591 592 return _mm_set_ps( 593 glm::cos(a * 0.5f), 594 v.z * s, 595 v.y * s, 596 v.x * s); 597 } 598 599 GLM_FUNC_QUALIFIER detail::fquatSIMD angleAxisSIMD 600 ( 601 float const & angle, 602 float const & x, 603 float const & y, 604 float const & z 605 ) 606 { 607 return angleAxisSIMD(angle, vec3(x, y, z)); 608 } 609 610 611 GLM_FUNC_QUALIFIER __m128 fastSin(__m128 x) 612 { 613 static const __m128 c0 = _mm_set1_ps(0.16666666666666666666666666666667f); 614 static const __m128 c1 = _mm_set1_ps(0.00833333333333333333333333333333f); 615 static const __m128 c2 = _mm_set1_ps(0.00019841269841269841269841269841f); 616 617 __m128 x3 = _mm_mul_ps(x, _mm_mul_ps(x, x)); 618 __m128 x5 = _mm_mul_ps(x3, _mm_mul_ps(x, x)); 619 __m128 x7 = _mm_mul_ps(x5, _mm_mul_ps(x, x)); 620 621 __m128 y0 = _mm_mul_ps(x3, c0); 622 __m128 y1 = _mm_mul_ps(x5, c1); 623 __m128 y2 = _mm_mul_ps(x7, c2); 624 625 return _mm_sub_ps(_mm_add_ps(_mm_sub_ps(x, y0), y1), y2); 626 } 627 628 629 }//namespace glm 630