1 /////////////////////////////////////////////////////////////////////////////////////////////////// 2 // OpenGL Mathematics Copyright (c) 2005 - 2014 G-Truc Creation (www.g-truc.net) 3 /////////////////////////////////////////////////////////////////////////////////////////////////// 4 // Created : 2009-05-07 5 // Updated : 2009-05-07 6 // Licence : This source is under MIT License 7 // File : glm/gtx/simd_vec4.inl 8 /////////////////////////////////////////////////////////////////////////////////////////////////// 9 10 namespace glm{ 11 namespace detail{ 12 13 template <int Value> 14 struct mask 15 { 16 enum{value = Value}; 17 }; 18 19 ////////////////////////////////////// 20 // Implicit basic constructors 21 22 GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD() 23 #ifdef GLM_SIMD_ENABLE_DEFAULT_INIT 24 : Data(_mm_set_ps(0.0f, 0.0f, 0.0f, 0.0f)) 25 #endif 26 {} 27 28 GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(__m128 const & Data) : 29 Data(Data) 30 {} 31 32 GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(fvec4SIMD const & v) : 33 Data(v.Data) 34 {} 35 36 GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(vec4 const & v) : 37 Data(_mm_set_ps(v.w, v.z, v.y, v.x)) 38 {} 39 40 ////////////////////////////////////// 41 // Explicit basic constructors 42 43 GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(float const & s) : 44 Data(_mm_set1_ps(s)) 45 {} 46 47 GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(float const & x, float const & y, float const & z, float const & w) : 48 // Data(_mm_setr_ps(x, y, z, w)) 49 Data(_mm_set_ps(w, z, y, x)) 50 {} 51 /* 52 GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(float const v[4]) : 53 Data(_mm_load_ps(v)) 54 {} 55 */ 56 ////////////////////////////////////// 57 // Swizzle constructors 58 59 //fvec4SIMD(ref4<float> const & r); 60 61 ////////////////////////////////////// 62 // Conversion vector constructors 63 64 GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(vec2 const & v, float const & s1, float const & s2) : 65 Data(_mm_set_ps(s2, s1, v.y, v.x)) 66 {} 67 68 GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(float const & s1, vec2 const & v, float const & s2) : 69 Data(_mm_set_ps(s2, v.y, v.x, s1)) 70 {} 71 72 GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(float const & s1, float const & s2, vec2 const & v) : 73 Data(_mm_set_ps(v.y, v.x, s2, s1)) 74 {} 75 76 GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(vec3 const & v, float const & s) : 77 Data(_mm_set_ps(s, v.z, v.y, v.x)) 78 {} 79 80 GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(float const & s, vec3 const & v) : 81 Data(_mm_set_ps(v.z, v.y, v.x, s)) 82 {} 83 84 GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(vec2 const & v1, vec2 const & v2) : 85 Data(_mm_set_ps(v2.y, v2.x, v1.y, v1.x)) 86 {} 87 88 //GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(ivec4SIMD const & v) : 89 // Data(_mm_cvtepi32_ps(v.Data)) 90 //{} 91 92 ////////////////////////////////////// 93 // Unary arithmetic operators 94 95 GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator=(fvec4SIMD const & v) 96 { 97 this->Data = v.Data; 98 return *this; 99 } 100 101 GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator+=(float const & s) 102 { 103 this->Data = _mm_add_ps(Data, _mm_set_ps1(s)); 104 return *this; 105 } 106 107 GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator+=(fvec4SIMD const & v) 108 { 109 this->Data = _mm_add_ps(this->Data , v.Data); 110 return *this; 111 } 112 113 GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator-=(float const & s) 114 { 115 this->Data = _mm_sub_ps(Data, _mm_set_ps1(s)); 116 return *this; 117 } 118 119 GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator-=(fvec4SIMD const & v) 120 { 121 this->Data = _mm_sub_ps(this->Data , v.Data); 122 return *this; 123 } 124 125 GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator*=(float const & s) 126 { 127 this->Data = _mm_mul_ps(this->Data, _mm_set_ps1(s)); 128 return *this; 129 } 130 131 GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator*=(fvec4SIMD const & v) 132 { 133 this->Data = _mm_mul_ps(this->Data , v.Data); 134 return *this; 135 } 136 137 GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator/=(float const & s) 138 { 139 this->Data = _mm_div_ps(Data, _mm_set1_ps(s)); 140 return *this; 141 } 142 143 GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator/=(fvec4SIMD const & v) 144 { 145 this->Data = _mm_div_ps(this->Data , v.Data); 146 return *this; 147 } 148 149 GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator++() 150 { 151 this->Data = _mm_add_ps(this->Data , glm::detail::one); 152 return *this; 153 } 154 155 GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator--() 156 { 157 this->Data = _mm_sub_ps(this->Data, glm::detail::one); 158 return *this; 159 } 160 161 ////////////////////////////////////// 162 // Swizzle operators 163 164 template <comp X, comp Y, comp Z, comp W> 165 GLM_FUNC_QUALIFIER fvec4SIMD fvec4SIMD::swizzle() const 166 { 167 __m128 Data = _mm_shuffle_ps( 168 this->Data, this->Data, 169 mask<(W << 6) | (Z << 4) | (Y << 2) | (X << 0)>::value); 170 return fvec4SIMD(Data); 171 } 172 173 template <comp X, comp Y, comp Z, comp W> 174 GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::swizzle() 175 { 176 this->Data = _mm_shuffle_ps( 177 this->Data, this->Data, 178 mask<(W << 6) | (Z << 4) | (Y << 2) | (X << 0)>::value); 179 return *this; 180 } 181 182 // operator+ 183 GLM_FUNC_QUALIFIER fvec4SIMD operator+ (fvec4SIMD const & v, float s) 184 { 185 return fvec4SIMD(_mm_add_ps(v.Data, _mm_set1_ps(s))); 186 } 187 188 GLM_FUNC_QUALIFIER fvec4SIMD operator+ (float s, fvec4SIMD const & v) 189 { 190 return fvec4SIMD(_mm_add_ps(_mm_set1_ps(s), v.Data)); 191 } 192 193 GLM_FUNC_QUALIFIER fvec4SIMD operator+ (fvec4SIMD const & v1, fvec4SIMD const & v2) 194 { 195 return fvec4SIMD(_mm_add_ps(v1.Data, v2.Data)); 196 } 197 198 //operator- 199 GLM_FUNC_QUALIFIER fvec4SIMD operator- (fvec4SIMD const & v, float s) 200 { 201 return fvec4SIMD(_mm_sub_ps(v.Data, _mm_set1_ps(s))); 202 } 203 204 GLM_FUNC_QUALIFIER fvec4SIMD operator- (float s, fvec4SIMD const & v) 205 { 206 return fvec4SIMD(_mm_sub_ps(_mm_set1_ps(s), v.Data)); 207 } 208 209 GLM_FUNC_QUALIFIER fvec4SIMD operator- (fvec4SIMD const & v1, fvec4SIMD const & v2) 210 { 211 return fvec4SIMD(_mm_sub_ps(v1.Data, v2.Data)); 212 } 213 214 //operator* 215 GLM_FUNC_QUALIFIER fvec4SIMD operator* (fvec4SIMD const & v, float s) 216 { 217 __m128 par0 = v.Data; 218 __m128 par1 = _mm_set1_ps(s); 219 return fvec4SIMD(_mm_mul_ps(par0, par1)); 220 } 221 222 GLM_FUNC_QUALIFIER fvec4SIMD operator* (float s, fvec4SIMD const & v) 223 { 224 __m128 par0 = _mm_set1_ps(s); 225 __m128 par1 = v.Data; 226 return fvec4SIMD(_mm_mul_ps(par0, par1)); 227 } 228 229 GLM_FUNC_QUALIFIER fvec4SIMD operator* (fvec4SIMD const & v1, fvec4SIMD const & v2) 230 { 231 return fvec4SIMD(_mm_mul_ps(v1.Data, v2.Data)); 232 } 233 234 //operator/ 235 GLM_FUNC_QUALIFIER fvec4SIMD operator/ (fvec4SIMD const & v, float s) 236 { 237 __m128 par0 = v.Data; 238 __m128 par1 = _mm_set1_ps(s); 239 return fvec4SIMD(_mm_div_ps(par0, par1)); 240 } 241 242 GLM_FUNC_QUALIFIER fvec4SIMD operator/ (float s, fvec4SIMD const & v) 243 { 244 __m128 par0 = _mm_set1_ps(s); 245 __m128 par1 = v.Data; 246 return fvec4SIMD(_mm_div_ps(par0, par1)); 247 } 248 249 GLM_FUNC_QUALIFIER fvec4SIMD operator/ (fvec4SIMD const & v1, fvec4SIMD const & v2) 250 { 251 return fvec4SIMD(_mm_div_ps(v1.Data, v2.Data)); 252 } 253 254 // Unary constant operators 255 GLM_FUNC_QUALIFIER fvec4SIMD operator- (fvec4SIMD const & v) 256 { 257 return fvec4SIMD(_mm_sub_ps(_mm_setzero_ps(), v.Data)); 258 } 259 260 GLM_FUNC_QUALIFIER fvec4SIMD operator++ (fvec4SIMD const & v, int) 261 { 262 return fvec4SIMD(_mm_add_ps(v.Data, glm::detail::one)); 263 } 264 265 GLM_FUNC_QUALIFIER fvec4SIMD operator-- (fvec4SIMD const & v, int) 266 { 267 return fvec4SIMD(_mm_sub_ps(v.Data, glm::detail::one)); 268 } 269 270 }//namespace detail 271 272 GLM_FUNC_QUALIFIER vec4 vec4_cast 273 ( 274 detail::fvec4SIMD const & x 275 ) 276 { 277 GLM_ALIGN(16) vec4 Result; 278 _mm_store_ps(&Result[0], x.Data); 279 return Result; 280 } 281 282 // Other possible implementation 283 //float abs(float a) 284 //{ 285 // return max(-a, a); 286 //} 287 GLM_FUNC_QUALIFIER detail::fvec4SIMD abs 288 ( 289 detail::fvec4SIMD const & x 290 ) 291 { 292 return detail::sse_abs_ps(x.Data); 293 } 294 295 GLM_FUNC_QUALIFIER detail::fvec4SIMD sign 296 ( 297 detail::fvec4SIMD const & x 298 ) 299 { 300 return detail::sse_sgn_ps(x.Data); 301 } 302 303 GLM_FUNC_QUALIFIER detail::fvec4SIMD floor 304 ( 305 detail::fvec4SIMD const & x 306 ) 307 { 308 return detail::sse_flr_ps(x.Data); 309 } 310 311 GLM_FUNC_QUALIFIER detail::fvec4SIMD trunc 312 ( 313 detail::fvec4SIMD const & x 314 ) 315 { 316 //return x < 0 ? -floor(-x) : floor(x); 317 318 __m128 Flr0 = detail::sse_flr_ps(_mm_sub_ps(_mm_setzero_ps(), x.Data)); 319 __m128 Sub0 = _mm_sub_ps(Flr0, x.Data); 320 __m128 Flr1 = detail::sse_flr_ps(x.Data); 321 322 __m128 Cmp0 = _mm_cmplt_ps(x.Data, glm::detail::zero); 323 __m128 Cmp1 = _mm_cmpnlt_ps(x.Data, glm::detail::zero); 324 325 __m128 And0 = _mm_and_ps(Sub0, Cmp0); 326 __m128 And1 = _mm_and_ps(Flr1, Cmp1); 327 328 return _mm_or_ps(And0, And1); 329 } 330 331 GLM_FUNC_QUALIFIER detail::fvec4SIMD round 332 ( 333 detail::fvec4SIMD const & x 334 ) 335 { 336 return detail::sse_rnd_ps(x.Data); 337 } 338 339 //GLM_FUNC_QUALIFIER detail::fvec4SIMD roundEven 340 //( 341 // detail::fvec4SIMD const & x 342 //) 343 //{ 344 345 //} 346 347 GLM_FUNC_QUALIFIER detail::fvec4SIMD ceil 348 ( 349 detail::fvec4SIMD const & x 350 ) 351 { 352 return detail::sse_ceil_ps(x.Data); 353 } 354 355 GLM_FUNC_QUALIFIER detail::fvec4SIMD fract 356 ( 357 detail::fvec4SIMD const & x 358 ) 359 { 360 return detail::sse_frc_ps(x.Data); 361 } 362 363 GLM_FUNC_QUALIFIER detail::fvec4SIMD mod 364 ( 365 detail::fvec4SIMD const & x, 366 detail::fvec4SIMD const & y 367 ) 368 { 369 return detail::sse_mod_ps(x.Data, y.Data); 370 } 371 372 GLM_FUNC_QUALIFIER detail::fvec4SIMD mod 373 ( 374 detail::fvec4SIMD const & x, 375 float const & y 376 ) 377 { 378 return detail::sse_mod_ps(x.Data, _mm_set1_ps(y)); 379 } 380 381 //GLM_FUNC_QUALIFIER detail::fvec4SIMD modf 382 //( 383 // detail::fvec4SIMD const & x, 384 // detail::fvec4SIMD & i 385 //) 386 //{ 387 388 //} 389 390 GLM_FUNC_QUALIFIER detail::fvec4SIMD min 391 ( 392 detail::fvec4SIMD const & x, 393 detail::fvec4SIMD const & y 394 ) 395 { 396 return _mm_min_ps(x.Data, y.Data); 397 } 398 399 GLM_FUNC_QUALIFIER detail::fvec4SIMD min 400 ( 401 detail::fvec4SIMD const & x, 402 float const & y 403 ) 404 { 405 return _mm_min_ps(x.Data, _mm_set1_ps(y)); 406 } 407 408 GLM_FUNC_QUALIFIER detail::fvec4SIMD max 409 ( 410 detail::fvec4SIMD const & x, 411 detail::fvec4SIMD const & y 412 ) 413 { 414 return _mm_max_ps(x.Data, y.Data); 415 } 416 417 GLM_FUNC_QUALIFIER detail::fvec4SIMD max 418 ( 419 detail::fvec4SIMD const & x, 420 float const & y 421 ) 422 { 423 return _mm_max_ps(x.Data, _mm_set1_ps(y)); 424 } 425 426 GLM_FUNC_QUALIFIER detail::fvec4SIMD clamp 427 ( 428 detail::fvec4SIMD const & x, 429 detail::fvec4SIMD const & minVal, 430 detail::fvec4SIMD const & maxVal 431 ) 432 { 433 return detail::sse_clp_ps(x.Data, minVal.Data, maxVal.Data); 434 } 435 436 GLM_FUNC_QUALIFIER detail::fvec4SIMD clamp 437 ( 438 detail::fvec4SIMD const & x, 439 float const & minVal, 440 float const & maxVal 441 ) 442 { 443 return detail::sse_clp_ps(x.Data, _mm_set1_ps(minVal), _mm_set1_ps(maxVal)); 444 } 445 446 GLM_FUNC_QUALIFIER detail::fvec4SIMD mix 447 ( 448 detail::fvec4SIMD const & x, 449 detail::fvec4SIMD const & y, 450 detail::fvec4SIMD const & a 451 ) 452 { 453 __m128 Sub0 = _mm_sub_ps(y.Data, x.Data); 454 __m128 Mul0 = _mm_mul_ps(a.Data, Sub0); 455 return _mm_add_ps(x.Data, Mul0); 456 } 457 458 GLM_FUNC_QUALIFIER detail::fvec4SIMD step 459 ( 460 detail::fvec4SIMD const & edge, 461 detail::fvec4SIMD const & x 462 ) 463 { 464 __m128 cmp0 = _mm_cmpngt_ps(x.Data, edge.Data); 465 return _mm_max_ps(_mm_min_ps(cmp0, _mm_setzero_ps()), detail::one); 466 } 467 468 GLM_FUNC_QUALIFIER detail::fvec4SIMD step 469 ( 470 float const & edge, 471 detail::fvec4SIMD const & x 472 ) 473 { 474 __m128 cmp0 = _mm_cmpngt_ps(x.Data, _mm_set1_ps(edge)); 475 return _mm_max_ps(_mm_min_ps(cmp0, _mm_setzero_ps()), detail::one); 476 } 477 478 GLM_FUNC_QUALIFIER detail::fvec4SIMD smoothstep 479 ( 480 detail::fvec4SIMD const & edge0, 481 detail::fvec4SIMD const & edge1, 482 detail::fvec4SIMD const & x 483 ) 484 { 485 return detail::sse_ssp_ps(edge0.Data, edge1.Data, x.Data); 486 } 487 488 GLM_FUNC_QUALIFIER detail::fvec4SIMD smoothstep 489 ( 490 float const & edge0, 491 float const & edge1, 492 detail::fvec4SIMD const & x 493 ) 494 { 495 return detail::sse_ssp_ps(_mm_set1_ps(edge0), _mm_set1_ps(edge1), x.Data); 496 } 497 498 //GLM_FUNC_QUALIFIER bvec4 isnan(detail::fvec4SIMD const & x) 499 //{ 500 501 //} 502 503 //GLM_FUNC_QUALIFIER bvec4 isinf(detail::fvec4SIMD const & x) 504 //{ 505 506 //} 507 508 //GLM_FUNC_QUALIFIER detail::ivec4SIMD floatBitsToInt 509 //( 510 // detail::fvec4SIMD const & value 511 //) 512 //{ 513 514 //} 515 516 //GLM_FUNC_QUALIFIER detail::fvec4SIMD intBitsToFloat 517 //( 518 // detail::ivec4SIMD const & value 519 //) 520 //{ 521 522 //} 523 524 GLM_FUNC_QUALIFIER detail::fvec4SIMD fma 525 ( 526 detail::fvec4SIMD const & a, 527 detail::fvec4SIMD const & b, 528 detail::fvec4SIMD const & c 529 ) 530 { 531 return _mm_add_ps(_mm_mul_ps(a.Data, b.Data), c.Data); 532 } 533 534 GLM_FUNC_QUALIFIER float length 535 ( 536 detail::fvec4SIMD const & x 537 ) 538 { 539 detail::fvec4SIMD dot0 = detail::sse_dot_ss(x.Data, x.Data); 540 detail::fvec4SIMD sqt0 = sqrt(dot0); 541 float Result = 0; 542 _mm_store_ss(&Result, sqt0.Data); 543 return Result; 544 } 545 546 GLM_FUNC_QUALIFIER float fastLength 547 ( 548 detail::fvec4SIMD const & x 549 ) 550 { 551 detail::fvec4SIMD dot0 = detail::sse_dot_ss(x.Data, x.Data); 552 detail::fvec4SIMD sqt0 = fastSqrt(dot0); 553 float Result = 0; 554 _mm_store_ss(&Result, sqt0.Data); 555 return Result; 556 } 557 558 GLM_FUNC_QUALIFIER float niceLength 559 ( 560 detail::fvec4SIMD const & x 561 ) 562 { 563 detail::fvec4SIMD dot0 = detail::sse_dot_ss(x.Data, x.Data); 564 detail::fvec4SIMD sqt0 = niceSqrt(dot0); 565 float Result = 0; 566 _mm_store_ss(&Result, sqt0.Data); 567 return Result; 568 } 569 570 GLM_FUNC_QUALIFIER detail::fvec4SIMD length4 571 ( 572 detail::fvec4SIMD const & x 573 ) 574 { 575 return sqrt(dot4(x, x)); 576 } 577 578 GLM_FUNC_QUALIFIER detail::fvec4SIMD fastLength4 579 ( 580 detail::fvec4SIMD const & x 581 ) 582 { 583 return fastSqrt(dot4(x, x)); 584 } 585 586 GLM_FUNC_QUALIFIER detail::fvec4SIMD niceLength4 587 ( 588 detail::fvec4SIMD const & x 589 ) 590 { 591 return niceSqrt(dot4(x, x)); 592 } 593 594 GLM_FUNC_QUALIFIER float distance 595 ( 596 detail::fvec4SIMD const & p0, 597 detail::fvec4SIMD const & p1 598 ) 599 { 600 float Result = 0; 601 _mm_store_ss(&Result, detail::sse_dst_ps(p0.Data, p1.Data)); 602 return Result; 603 } 604 605 GLM_FUNC_QUALIFIER detail::fvec4SIMD distance4 606 ( 607 detail::fvec4SIMD const & p0, 608 detail::fvec4SIMD const & p1 609 ) 610 { 611 return detail::sse_dst_ps(p0.Data, p1.Data); 612 } 613 614 GLM_FUNC_QUALIFIER float dot 615 ( 616 detail::fvec4SIMD const & x, 617 detail::fvec4SIMD const & y 618 ) 619 { 620 float Result = 0; 621 _mm_store_ss(&Result, detail::sse_dot_ss(x.Data, y.Data)); 622 return Result; 623 } 624 625 GLM_FUNC_QUALIFIER detail::fvec4SIMD dot4 626 ( 627 detail::fvec4SIMD const & x, 628 detail::fvec4SIMD const & y 629 ) 630 { 631 return detail::sse_dot_ps(x.Data, y.Data); 632 } 633 634 GLM_FUNC_QUALIFIER detail::fvec4SIMD cross 635 ( 636 detail::fvec4SIMD const & x, 637 detail::fvec4SIMD const & y 638 ) 639 { 640 return detail::sse_xpd_ps(x.Data, y.Data); 641 } 642 643 GLM_FUNC_QUALIFIER detail::fvec4SIMD normalize 644 ( 645 detail::fvec4SIMD const & x 646 ) 647 { 648 __m128 dot0 = detail::sse_dot_ps(x.Data, x.Data); 649 __m128 isr0 = inversesqrt(detail::fvec4SIMD(dot0)).Data; 650 __m128 mul0 = _mm_mul_ps(x.Data, isr0); 651 return mul0; 652 } 653 654 GLM_FUNC_QUALIFIER detail::fvec4SIMD fastNormalize 655 ( 656 detail::fvec4SIMD const & x 657 ) 658 { 659 __m128 dot0 = detail::sse_dot_ps(x.Data, x.Data); 660 __m128 isr0 = fastInversesqrt(dot0).Data; 661 __m128 mul0 = _mm_mul_ps(x.Data, isr0); 662 return mul0; 663 } 664 665 GLM_FUNC_QUALIFIER detail::fvec4SIMD faceforward 666 ( 667 detail::fvec4SIMD const & N, 668 detail::fvec4SIMD const & I, 669 detail::fvec4SIMD const & Nref 670 ) 671 { 672 return detail::sse_ffd_ps(N.Data, I.Data, Nref.Data); 673 } 674 675 GLM_FUNC_QUALIFIER detail::fvec4SIMD reflect 676 ( 677 detail::fvec4SIMD const & I, 678 detail::fvec4SIMD const & N 679 ) 680 { 681 return detail::sse_rfe_ps(I.Data, N.Data); 682 } 683 684 GLM_FUNC_QUALIFIER detail::fvec4SIMD refract 685 ( 686 detail::fvec4SIMD const & I, 687 detail::fvec4SIMD const & N, 688 float const & eta 689 ) 690 { 691 return detail::sse_rfa_ps(I.Data, N.Data, _mm_set1_ps(eta)); 692 } 693 694 GLM_FUNC_QUALIFIER detail::fvec4SIMD sqrt(detail::fvec4SIMD const & x) 695 { 696 return _mm_mul_ps(inversesqrt(x).Data, x.Data); 697 } 698 699 GLM_FUNC_QUALIFIER detail::fvec4SIMD niceSqrt(detail::fvec4SIMD const & x) 700 { 701 return _mm_sqrt_ps(x.Data); 702 } 703 704 GLM_FUNC_QUALIFIER detail::fvec4SIMD fastSqrt(detail::fvec4SIMD const & x) 705 { 706 return _mm_mul_ps(fastInversesqrt(x.Data).Data, x.Data); 707 } 708 709 // SSE scalar reciprocal sqrt using rsqrt op, plus one Newton-Rhaphson iteration 710 // By Elan Ruskin, http://assemblyrequired.crashworks.org/ 711 GLM_FUNC_QUALIFIER detail::fvec4SIMD inversesqrt(detail::fvec4SIMD const & x) 712 { 713 GLM_ALIGN(4) static const __m128 three = {3, 3, 3, 3}; // aligned consts for fast load 714 GLM_ALIGN(4) static const __m128 half = {0.5,0.5,0.5,0.5}; 715 716 __m128 recip = _mm_rsqrt_ps(x.Data); // "estimate" opcode 717 __m128 halfrecip = _mm_mul_ps(half, recip); 718 __m128 threeminus_xrr = _mm_sub_ps(three, _mm_mul_ps(x.Data, _mm_mul_ps(recip, recip))); 719 return _mm_mul_ps(halfrecip, threeminus_xrr); 720 } 721 722 GLM_FUNC_QUALIFIER detail::fvec4SIMD fastInversesqrt(detail::fvec4SIMD const & x) 723 { 724 return _mm_rsqrt_ps(x.Data); 725 } 726 727 }//namespace glm 728