1 /* 2 Copyright (c) 2003-2006 Gino van den Bergen / Erwin Coumans http://continuousphysics.com/Bullet/ 3 4 This software is provided 'as-is', without any express or implied warranty. 5 In no event will the authors be held liable for any damages arising from the use of this software. 6 Permission is granted to anyone to use this software for any purpose, 7 including commercial applications, and to alter it and redistribute it freely, 8 subject to the following restrictions: 9 10 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. 11 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. 12 3. This notice may not be removed or altered from any source distribution. 13 */ 14 15 16 17 #ifndef BT_SIMD__QUATERNION_H_ 18 #define BT_SIMD__QUATERNION_H_ 19 20 21 #include "btVector3.h" 22 #include "btQuadWord.h" 23 24 25 #ifdef BT_USE_DOUBLE_PRECISION 26 #define btQuaternionData btQuaternionDoubleData 27 #define btQuaternionDataName "btQuaternionDoubleData" 28 #else 29 #define btQuaternionData btQuaternionFloatData 30 #define btQuaternionDataName "btQuaternionFloatData" 31 #endif //BT_USE_DOUBLE_PRECISION 32 33 34 35 #ifdef BT_USE_SSE 36 37 //const __m128 ATTRIBUTE_ALIGNED16(vOnes) = {1.0f, 1.0f, 1.0f, 1.0f}; 38 #define vOnes (_mm_set_ps(1.0f, 1.0f, 1.0f, 1.0f)) 39 40 #endif 41 42 #if defined(BT_USE_SSE) 43 44 #define vQInv (_mm_set_ps(+0.0f, -0.0f, -0.0f, -0.0f)) 45 #define vPPPM (_mm_set_ps(-0.0f, +0.0f, +0.0f, +0.0f)) 46 47 #elif defined(BT_USE_NEON) 48 49 const btSimdFloat4 ATTRIBUTE_ALIGNED16(vQInv) = {-0.0f, -0.0f, -0.0f, +0.0f}; 50 const btSimdFloat4 ATTRIBUTE_ALIGNED16(vPPPM) = {+0.0f, +0.0f, +0.0f, -0.0f}; 51 52 #endif 53 54 /**@brief The btQuaternion implements quaternion to perform linear algebra rotations in combination with btMatrix3x3, btVector3 and btTransform. */ 55 class btQuaternion : public btQuadWord { 56 public: 57 /**@brief No initialization constructor */ 58 btQuaternion() {} 59 60 #if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE))|| defined(BT_USE_NEON) 61 // Set Vector 62 SIMD_FORCE_INLINE btQuaternion(const btSimdFloat4 vec) 63 { 64 mVec128 = vec; 65 } 66 67 // Copy constructor 68 SIMD_FORCE_INLINE btQuaternion(const btQuaternion& rhs) 69 { 70 mVec128 = rhs.mVec128; 71 } 72 73 // Assignment Operator 74 SIMD_FORCE_INLINE btQuaternion& 75 operator=(const btQuaternion& v) 76 { 77 mVec128 = v.mVec128; 78 79 return *this; 80 } 81 82 #endif 83 84 // template <typename btScalar> 85 // explicit Quaternion(const btScalar *v) : Tuple4<btScalar>(v) {} 86 /**@brief Constructor from scalars */ 87 btQuaternion(const btScalar& _x, const btScalar& _y, const btScalar& _z, const btScalar& _w) 88 : btQuadWord(_x, _y, _z, _w) 89 {} 90 /**@brief Axis angle Constructor 91 * @param axis The axis which the rotation is around 92 * @param angle The magnitude of the rotation around the angle (Radians) */ 93 btQuaternion(const btVector3& _axis, const btScalar& _angle) 94 { 95 setRotation(_axis, _angle); 96 } 97 /**@brief Constructor from Euler angles 98 * @param yaw Angle around Y unless BT_EULER_DEFAULT_ZYX defined then Z 99 * @param pitch Angle around X unless BT_EULER_DEFAULT_ZYX defined then Y 100 * @param roll Angle around Z unless BT_EULER_DEFAULT_ZYX defined then X */ 101 btQuaternion(const btScalar& yaw, const btScalar& pitch, const btScalar& roll) 102 { 103 #ifndef BT_EULER_DEFAULT_ZYX 104 setEuler(yaw, pitch, roll); 105 #else 106 setEulerZYX(yaw, pitch, roll); 107 #endif 108 } 109 /**@brief Set the rotation using axis angle notation 110 * @param axis The axis around which to rotate 111 * @param angle The magnitude of the rotation in Radians */ 112 void setRotation(const btVector3& axis, const btScalar& _angle) 113 { 114 btScalar d = axis.length(); 115 btAssert(d != btScalar(0.0)); 116 btScalar s = btSin(_angle * btScalar(0.5)) / d; 117 setValue(axis.x() * s, axis.y() * s, axis.z() * s, 118 btCos(_angle * btScalar(0.5))); 119 } 120 /**@brief Set the quaternion using Euler angles 121 * @param yaw Angle around Y 122 * @param pitch Angle around X 123 * @param roll Angle around Z */ 124 void setEuler(const btScalar& yaw, const btScalar& pitch, const btScalar& roll) 125 { 126 btScalar halfYaw = btScalar(yaw) * btScalar(0.5); 127 btScalar halfPitch = btScalar(pitch) * btScalar(0.5); 128 btScalar halfRoll = btScalar(roll) * btScalar(0.5); 129 btScalar cosYaw = btCos(halfYaw); 130 btScalar sinYaw = btSin(halfYaw); 131 btScalar cosPitch = btCos(halfPitch); 132 btScalar sinPitch = btSin(halfPitch); 133 btScalar cosRoll = btCos(halfRoll); 134 btScalar sinRoll = btSin(halfRoll); 135 setValue(cosRoll * sinPitch * cosYaw + sinRoll * cosPitch * sinYaw, 136 cosRoll * cosPitch * sinYaw - sinRoll * sinPitch * cosYaw, 137 sinRoll * cosPitch * cosYaw - cosRoll * sinPitch * sinYaw, 138 cosRoll * cosPitch * cosYaw + sinRoll * sinPitch * sinYaw); 139 } 140 /**@brief Set the quaternion using euler angles 141 * @param yaw Angle around Z 142 * @param pitch Angle around Y 143 * @param roll Angle around X */ 144 void setEulerZYX(const btScalar& yaw, const btScalar& pitch, const btScalar& roll) 145 { 146 btScalar halfYaw = btScalar(yaw) * btScalar(0.5); 147 btScalar halfPitch = btScalar(pitch) * btScalar(0.5); 148 btScalar halfRoll = btScalar(roll) * btScalar(0.5); 149 btScalar cosYaw = btCos(halfYaw); 150 btScalar sinYaw = btSin(halfYaw); 151 btScalar cosPitch = btCos(halfPitch); 152 btScalar sinPitch = btSin(halfPitch); 153 btScalar cosRoll = btCos(halfRoll); 154 btScalar sinRoll = btSin(halfRoll); 155 setValue(sinRoll * cosPitch * cosYaw - cosRoll * sinPitch * sinYaw, //x 156 cosRoll * sinPitch * cosYaw + sinRoll * cosPitch * sinYaw, //y 157 cosRoll * cosPitch * sinYaw - sinRoll * sinPitch * cosYaw, //z 158 cosRoll * cosPitch * cosYaw + sinRoll * sinPitch * sinYaw); //formerly yzx 159 } 160 /**@brief Add two quaternions 161 * @param q The quaternion to add to this one */ 162 SIMD_FORCE_INLINE btQuaternion& operator+=(const btQuaternion& q) 163 { 164 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE) 165 mVec128 = _mm_add_ps(mVec128, q.mVec128); 166 #elif defined(BT_USE_NEON) 167 mVec128 = vaddq_f32(mVec128, q.mVec128); 168 #else 169 m_floats[0] += q.x(); 170 m_floats[1] += q.y(); 171 m_floats[2] += q.z(); 172 m_floats[3] += q.m_floats[3]; 173 #endif 174 return *this; 175 } 176 177 /**@brief Subtract out a quaternion 178 * @param q The quaternion to subtract from this one */ 179 btQuaternion& operator-=(const btQuaternion& q) 180 { 181 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE) 182 mVec128 = _mm_sub_ps(mVec128, q.mVec128); 183 #elif defined(BT_USE_NEON) 184 mVec128 = vsubq_f32(mVec128, q.mVec128); 185 #else 186 m_floats[0] -= q.x(); 187 m_floats[1] -= q.y(); 188 m_floats[2] -= q.z(); 189 m_floats[3] -= q.m_floats[3]; 190 #endif 191 return *this; 192 } 193 194 /**@brief Scale this quaternion 195 * @param s The scalar to scale by */ 196 btQuaternion& operator*=(const btScalar& s) 197 { 198 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE) 199 __m128 vs = _mm_load_ss(&s); // (S 0 0 0) 200 vs = bt_pshufd_ps(vs, 0); // (S S S S) 201 mVec128 = _mm_mul_ps(mVec128, vs); 202 #elif defined(BT_USE_NEON) 203 mVec128 = vmulq_n_f32(mVec128, s); 204 #else 205 m_floats[0] *= s; 206 m_floats[1] *= s; 207 m_floats[2] *= s; 208 m_floats[3] *= s; 209 #endif 210 return *this; 211 } 212 213 /**@brief Multiply this quaternion by q on the right 214 * @param q The other quaternion 215 * Equivilant to this = this * q */ 216 btQuaternion& operator*=(const btQuaternion& q) 217 { 218 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE) 219 __m128 vQ2 = q.get128(); 220 221 __m128 A1 = bt_pshufd_ps(mVec128, BT_SHUFFLE(0,1,2,0)); 222 __m128 B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(3,3,3,0)); 223 224 A1 = A1 * B1; 225 226 __m128 A2 = bt_pshufd_ps(mVec128, BT_SHUFFLE(1,2,0,1)); 227 __m128 B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2,0,1,1)); 228 229 A2 = A2 * B2; 230 231 B1 = bt_pshufd_ps(mVec128, BT_SHUFFLE(2,0,1,2)); 232 B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1,2,0,2)); 233 234 B1 = B1 * B2; // A3 *= B3 235 236 mVec128 = bt_splat_ps(mVec128, 3); // A0 237 mVec128 = mVec128 * vQ2; // A0 * B0 238 239 A1 = A1 + A2; // AB12 240 mVec128 = mVec128 - B1; // AB03 = AB0 - AB3 241 A1 = _mm_xor_ps(A1, vPPPM); // change sign of the last element 242 mVec128 = mVec128+ A1; // AB03 + AB12 243 244 #elif defined(BT_USE_NEON) 245 246 float32x4_t vQ1 = mVec128; 247 float32x4_t vQ2 = q.get128(); 248 float32x4_t A0, A1, B1, A2, B2, A3, B3; 249 float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz; 250 251 { 252 float32x2x2_t tmp; 253 tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) ); // {z x}, {w y} 254 vQ1zx = tmp.val[0]; 255 256 tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) ); // {z x}, {w y} 257 vQ2zx = tmp.val[0]; 258 } 259 vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1); 260 261 vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1); 262 263 vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1); 264 vQ2xz = vext_f32(vQ2zx, vQ2zx, 1); 265 266 A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx); // X Y z x 267 B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx); // W W W X 268 269 A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1)); 270 B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1)); 271 272 A3 = vcombine_f32(vQ1zx, vQ1yz); // Z X Y Z 273 B3 = vcombine_f32(vQ2yz, vQ2xz); // Y Z x z 274 275 A1 = vmulq_f32(A1, B1); 276 A2 = vmulq_f32(A2, B2); 277 A3 = vmulq_f32(A3, B3); // A3 *= B3 278 A0 = vmulq_lane_f32(vQ2, vget_high_f32(vQ1), 1); // A0 * B0 279 280 A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2 281 A0 = vsubq_f32(A0, A3); // AB03 = AB0 - AB3 282 283 // change the sign of the last element 284 A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM); 285 A0 = vaddq_f32(A0, A1); // AB03 + AB12 286 287 mVec128 = A0; 288 #else 289 setValue( 290 m_floats[3] * q.x() + m_floats[0] * q.m_floats[3] + m_floats[1] * q.z() - m_floats[2] * q.y(), 291 m_floats[3] * q.y() + m_floats[1] * q.m_floats[3] + m_floats[2] * q.x() - m_floats[0] * q.z(), 292 m_floats[3] * q.z() + m_floats[2] * q.m_floats[3] + m_floats[0] * q.y() - m_floats[1] * q.x(), 293 m_floats[3] * q.m_floats[3] - m_floats[0] * q.x() - m_floats[1] * q.y() - m_floats[2] * q.z()); 294 #endif 295 return *this; 296 } 297 /**@brief Return the dot product between this quaternion and another 298 * @param q The other quaternion */ 299 btScalar dot(const btQuaternion& q) const 300 { 301 #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE) 302 __m128 vd; 303 304 vd = _mm_mul_ps(mVec128, q.mVec128); 305 306 __m128 t = _mm_movehl_ps(vd, vd); 307 vd = _mm_add_ps(vd, t); 308 t = _mm_shuffle_ps(vd, vd, 0x55); 309 vd = _mm_add_ss(vd, t); 310 311 return _mm_cvtss_f32(vd); 312 #elif defined(BT_USE_NEON) 313 float32x4_t vd = vmulq_f32(mVec128, q.mVec128); 314 float32x2_t x = vpadd_f32(vget_low_f32(vd), vget_high_f32(vd)); 315 x = vpadd_f32(x, x); 316 return vget_lane_f32(x, 0); 317 #else 318 return m_floats[0] * q.x() + 319 m_floats[1] * q.y() + 320 m_floats[2] * q.z() + 321 m_floats[3] * q.m_floats[3]; 322 #endif 323 } 324 325 /**@brief Return the length squared of the quaternion */ 326 btScalar length2() const 327 { 328 return dot(*this); 329 } 330 331 /**@brief Return the length of the quaternion */ 332 btScalar length() const 333 { 334 return btSqrt(length2()); 335 } 336 337 /**@brief Normalize the quaternion 338 * Such that x^2 + y^2 + z^2 +w^2 = 1 */ 339 btQuaternion& normalize() 340 { 341 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE) 342 __m128 vd; 343 344 vd = _mm_mul_ps(mVec128, mVec128); 345 346 __m128 t = _mm_movehl_ps(vd, vd); 347 vd = _mm_add_ps(vd, t); 348 t = _mm_shuffle_ps(vd, vd, 0x55); 349 vd = _mm_add_ss(vd, t); 350 351 vd = _mm_sqrt_ss(vd); 352 vd = _mm_div_ss(vOnes, vd); 353 vd = bt_pshufd_ps(vd, 0); // splat 354 mVec128 = _mm_mul_ps(mVec128, vd); 355 356 return *this; 357 #else 358 return *this /= length(); 359 #endif 360 } 361 362 /**@brief Return a scaled version of this quaternion 363 * @param s The scale factor */ 364 SIMD_FORCE_INLINE btQuaternion 365 operator*(const btScalar& s) const 366 { 367 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE) 368 __m128 vs = _mm_load_ss(&s); // (S 0 0 0) 369 vs = bt_pshufd_ps(vs, 0x00); // (S S S S) 370 371 return btQuaternion(_mm_mul_ps(mVec128, vs)); 372 #elif defined(BT_USE_NEON) 373 return btQuaternion(vmulq_n_f32(mVec128, s)); 374 #else 375 return btQuaternion(x() * s, y() * s, z() * s, m_floats[3] * s); 376 #endif 377 } 378 379 /**@brief Return an inversely scaled versionof this quaternion 380 * @param s The inverse scale factor */ 381 btQuaternion operator/(const btScalar& s) const 382 { 383 btAssert(s != btScalar(0.0)); 384 return *this * (btScalar(1.0) / s); 385 } 386 387 /**@brief Inversely scale this quaternion 388 * @param s The scale factor */ 389 btQuaternion& operator/=(const btScalar& s) 390 { 391 btAssert(s != btScalar(0.0)); 392 return *this *= btScalar(1.0) / s; 393 } 394 395 /**@brief Return a normalized version of this quaternion */ 396 btQuaternion normalized() const 397 { 398 return *this / length(); 399 } 400 /**@brief Return the ***half*** angle between this quaternion and the other 401 * @param q The other quaternion */ 402 btScalar angle(const btQuaternion& q) const 403 { 404 btScalar s = btSqrt(length2() * q.length2()); 405 btAssert(s != btScalar(0.0)); 406 return btAcos(dot(q) / s); 407 } 408 409 /**@brief Return the angle between this quaternion and the other along the shortest path 410 * @param q The other quaternion */ 411 btScalar angleShortestPath(const btQuaternion& q) const 412 { 413 btScalar s = btSqrt(length2() * q.length2()); 414 btAssert(s != btScalar(0.0)); 415 if (dot(q) < 0) // Take care of long angle case see http://en.wikipedia.org/wiki/Slerp 416 return btAcos(dot(-q) / s) * btScalar(2.0); 417 else 418 return btAcos(dot(q) / s) * btScalar(2.0); 419 } 420 421 /**@brief Return the angle of rotation represented by this quaternion */ 422 btScalar getAngle() const 423 { 424 btScalar s = btScalar(2.) * btAcos(m_floats[3]); 425 return s; 426 } 427 428 /**@brief Return the angle of rotation represented by this quaternion along the shortest path*/ 429 btScalar getAngleShortestPath() const 430 { 431 btScalar s; 432 if (dot(*this) < 0) 433 s = btScalar(2.) * btAcos(m_floats[3]); 434 else 435 s = btScalar(2.) * btAcos(-m_floats[3]); 436 437 return s; 438 } 439 440 441 /**@brief Return the axis of the rotation represented by this quaternion */ 442 btVector3 getAxis() const 443 { 444 btScalar s_squared = 1.f-m_floats[3]*m_floats[3]; 445 446 if (s_squared < btScalar(10.) * SIMD_EPSILON) //Check for divide by zero 447 return btVector3(1.0, 0.0, 0.0); // Arbitrary 448 btScalar s = 1.f/btSqrt(s_squared); 449 return btVector3(m_floats[0] * s, m_floats[1] * s, m_floats[2] * s); 450 } 451 452 /**@brief Return the inverse of this quaternion */ 453 btQuaternion inverse() const 454 { 455 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE) 456 return btQuaternion(_mm_xor_ps(mVec128, vQInv)); 457 #elif defined(BT_USE_NEON) 458 return btQuaternion((btSimdFloat4)veorq_s32((int32x4_t)mVec128, (int32x4_t)vQInv)); 459 #else 460 return btQuaternion(-m_floats[0], -m_floats[1], -m_floats[2], m_floats[3]); 461 #endif 462 } 463 464 /**@brief Return the sum of this quaternion and the other 465 * @param q2 The other quaternion */ 466 SIMD_FORCE_INLINE btQuaternion 467 operator+(const btQuaternion& q2) const 468 { 469 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE) 470 return btQuaternion(_mm_add_ps(mVec128, q2.mVec128)); 471 #elif defined(BT_USE_NEON) 472 return btQuaternion(vaddq_f32(mVec128, q2.mVec128)); 473 #else 474 const btQuaternion& q1 = *this; 475 return btQuaternion(q1.x() + q2.x(), q1.y() + q2.y(), q1.z() + q2.z(), q1.m_floats[3] + q2.m_floats[3]); 476 #endif 477 } 478 479 /**@brief Return the difference between this quaternion and the other 480 * @param q2 The other quaternion */ 481 SIMD_FORCE_INLINE btQuaternion 482 operator-(const btQuaternion& q2) const 483 { 484 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE) 485 return btQuaternion(_mm_sub_ps(mVec128, q2.mVec128)); 486 #elif defined(BT_USE_NEON) 487 return btQuaternion(vsubq_f32(mVec128, q2.mVec128)); 488 #else 489 const btQuaternion& q1 = *this; 490 return btQuaternion(q1.x() - q2.x(), q1.y() - q2.y(), q1.z() - q2.z(), q1.m_floats[3] - q2.m_floats[3]); 491 #endif 492 } 493 494 /**@brief Return the negative of this quaternion 495 * This simply negates each element */ 496 SIMD_FORCE_INLINE btQuaternion operator-() const 497 { 498 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE) 499 return btQuaternion(_mm_xor_ps(mVec128, btvMzeroMask)); 500 #elif defined(BT_USE_NEON) 501 return btQuaternion((btSimdFloat4)veorq_s32((int32x4_t)mVec128, (int32x4_t)btvMzeroMask) ); 502 #else 503 const btQuaternion& q2 = *this; 504 return btQuaternion( - q2.x(), - q2.y(), - q2.z(), - q2.m_floats[3]); 505 #endif 506 } 507 /**@todo document this and it's use */ 508 SIMD_FORCE_INLINE btQuaternion farthest( const btQuaternion& qd) const 509 { 510 btQuaternion diff,sum; 511 diff = *this - qd; 512 sum = *this + qd; 513 if( diff.dot(diff) > sum.dot(sum) ) 514 return qd; 515 return (-qd); 516 } 517 518 /**@todo document this and it's use */ 519 SIMD_FORCE_INLINE btQuaternion nearest( const btQuaternion& qd) const 520 { 521 btQuaternion diff,sum; 522 diff = *this - qd; 523 sum = *this + qd; 524 if( diff.dot(diff) < sum.dot(sum) ) 525 return qd; 526 return (-qd); 527 } 528 529 530 /**@brief Return the quaternion which is the result of Spherical Linear Interpolation between this and the other quaternion 531 * @param q The other quaternion to interpolate with 532 * @param t The ratio between this and q to interpolate. If t = 0 the result is this, if t=1 the result is q. 533 * Slerp interpolates assuming constant velocity. */ 534 btQuaternion slerp(const btQuaternion& q, const btScalar& t) const 535 { 536 btScalar magnitude = btSqrt(length2() * q.length2()); 537 btAssert(magnitude > btScalar(0)); 538 539 btScalar product = dot(q) / magnitude; 540 if (btFabs(product) < btScalar(1)) 541 { 542 // Take care of long angle case see http://en.wikipedia.org/wiki/Slerp 543 const btScalar sign = (product < 0) ? btScalar(-1) : btScalar(1); 544 545 const btScalar theta = btAcos(sign * product); 546 const btScalar s1 = btSin(sign * t * theta); 547 const btScalar d = btScalar(1.0) / btSin(theta); 548 const btScalar s0 = btSin((btScalar(1.0) - t) * theta); 549 550 return btQuaternion( 551 (m_floats[0] * s0 + q.x() * s1) * d, 552 (m_floats[1] * s0 + q.y() * s1) * d, 553 (m_floats[2] * s0 + q.z() * s1) * d, 554 (m_floats[3] * s0 + q.m_floats[3] * s1) * d); 555 } 556 else 557 { 558 return *this; 559 } 560 } 561 562 static const btQuaternion& getIdentity() 563 { 564 static const btQuaternion identityQuat(btScalar(0.),btScalar(0.),btScalar(0.),btScalar(1.)); 565 return identityQuat; 566 } 567 568 SIMD_FORCE_INLINE const btScalar& getW() const { return m_floats[3]; } 569 570 SIMD_FORCE_INLINE void serialize(struct btQuaternionData& dataOut) const; 571 572 SIMD_FORCE_INLINE void deSerialize(const struct btQuaternionData& dataIn); 573 574 SIMD_FORCE_INLINE void serializeFloat(struct btQuaternionFloatData& dataOut) const; 575 576 SIMD_FORCE_INLINE void deSerializeFloat(const struct btQuaternionFloatData& dataIn); 577 578 SIMD_FORCE_INLINE void serializeDouble(struct btQuaternionDoubleData& dataOut) const; 579 580 SIMD_FORCE_INLINE void deSerializeDouble(const struct btQuaternionDoubleData& dataIn); 581 582 }; 583 584 585 586 587 588 /**@brief Return the product of two quaternions */ 589 SIMD_FORCE_INLINE btQuaternion 590 operator*(const btQuaternion& q1, const btQuaternion& q2) 591 { 592 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE) 593 __m128 vQ1 = q1.get128(); 594 __m128 vQ2 = q2.get128(); 595 __m128 A0, A1, B1, A2, B2; 596 597 A1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(0,1,2,0)); // X Y z x // vtrn 598 B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(3,3,3,0)); // W W W X // vdup vext 599 600 A1 = A1 * B1; 601 602 A2 = bt_pshufd_ps(vQ1, BT_SHUFFLE(1,2,0,1)); // Y Z X Y // vext 603 B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2,0,1,1)); // z x Y Y // vtrn vdup 604 605 A2 = A2 * B2; 606 607 B1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(2,0,1,2)); // z x Y Z // vtrn vext 608 B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1,2,0,2)); // Y Z x z // vext vtrn 609 610 B1 = B1 * B2; // A3 *= B3 611 612 A0 = bt_splat_ps(vQ1, 3); // A0 613 A0 = A0 * vQ2; // A0 * B0 614 615 A1 = A1 + A2; // AB12 616 A0 = A0 - B1; // AB03 = AB0 - AB3 617 618 A1 = _mm_xor_ps(A1, vPPPM); // change sign of the last element 619 A0 = A0 + A1; // AB03 + AB12 620 621 return btQuaternion(A0); 622 623 #elif defined(BT_USE_NEON) 624 625 float32x4_t vQ1 = q1.get128(); 626 float32x4_t vQ2 = q2.get128(); 627 float32x4_t A0, A1, B1, A2, B2, A3, B3; 628 float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz; 629 630 { 631 float32x2x2_t tmp; 632 tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) ); // {z x}, {w y} 633 vQ1zx = tmp.val[0]; 634 635 tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) ); // {z x}, {w y} 636 vQ2zx = tmp.val[0]; 637 } 638 vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1); 639 640 vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1); 641 642 vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1); 643 vQ2xz = vext_f32(vQ2zx, vQ2zx, 1); 644 645 A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx); // X Y z x 646 B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx); // W W W X 647 648 A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1)); 649 B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1)); 650 651 A3 = vcombine_f32(vQ1zx, vQ1yz); // Z X Y Z 652 B3 = vcombine_f32(vQ2yz, vQ2xz); // Y Z x z 653 654 A1 = vmulq_f32(A1, B1); 655 A2 = vmulq_f32(A2, B2); 656 A3 = vmulq_f32(A3, B3); // A3 *= B3 657 A0 = vmulq_lane_f32(vQ2, vget_high_f32(vQ1), 1); // A0 * B0 658 659 A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2 660 A0 = vsubq_f32(A0, A3); // AB03 = AB0 - AB3 661 662 // change the sign of the last element 663 A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM); 664 A0 = vaddq_f32(A0, A1); // AB03 + AB12 665 666 return btQuaternion(A0); 667 668 #else 669 return btQuaternion( 670 q1.w() * q2.x() + q1.x() * q2.w() + q1.y() * q2.z() - q1.z() * q2.y(), 671 q1.w() * q2.y() + q1.y() * q2.w() + q1.z() * q2.x() - q1.x() * q2.z(), 672 q1.w() * q2.z() + q1.z() * q2.w() + q1.x() * q2.y() - q1.y() * q2.x(), 673 q1.w() * q2.w() - q1.x() * q2.x() - q1.y() * q2.y() - q1.z() * q2.z()); 674 #endif 675 } 676 677 SIMD_FORCE_INLINE btQuaternion 678 operator*(const btQuaternion& q, const btVector3& w) 679 { 680 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE) 681 __m128 vQ1 = q.get128(); 682 __m128 vQ2 = w.get128(); 683 __m128 A1, B1, A2, B2, A3, B3; 684 685 A1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(3,3,3,0)); 686 B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(0,1,2,0)); 687 688 A1 = A1 * B1; 689 690 A2 = bt_pshufd_ps(vQ1, BT_SHUFFLE(1,2,0,1)); 691 B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2,0,1,1)); 692 693 A2 = A2 * B2; 694 695 A3 = bt_pshufd_ps(vQ1, BT_SHUFFLE(2,0,1,2)); 696 B3 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1,2,0,2)); 697 698 A3 = A3 * B3; // A3 *= B3 699 700 A1 = A1 + A2; // AB12 701 A1 = _mm_xor_ps(A1, vPPPM); // change sign of the last element 702 A1 = A1 - A3; // AB123 = AB12 - AB3 703 704 return btQuaternion(A1); 705 706 #elif defined(BT_USE_NEON) 707 708 float32x4_t vQ1 = q.get128(); 709 float32x4_t vQ2 = w.get128(); 710 float32x4_t A1, B1, A2, B2, A3, B3; 711 float32x2_t vQ1wx, vQ2zx, vQ1yz, vQ2yz, vQ1zx, vQ2xz; 712 713 vQ1wx = vext_f32(vget_high_f32(vQ1), vget_low_f32(vQ1), 1); 714 { 715 float32x2x2_t tmp; 716 717 tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) ); // {z x}, {w y} 718 vQ2zx = tmp.val[0]; 719 720 tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) ); // {z x}, {w y} 721 vQ1zx = tmp.val[0]; 722 } 723 724 vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1); 725 726 vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1); 727 vQ2xz = vext_f32(vQ2zx, vQ2zx, 1); 728 729 A1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ1), 1), vQ1wx); // W W W X 730 B1 = vcombine_f32(vget_low_f32(vQ2), vQ2zx); // X Y z x 731 732 A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1)); 733 B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1)); 734 735 A3 = vcombine_f32(vQ1zx, vQ1yz); // Z X Y Z 736 B3 = vcombine_f32(vQ2yz, vQ2xz); // Y Z x z 737 738 A1 = vmulq_f32(A1, B1); 739 A2 = vmulq_f32(A2, B2); 740 A3 = vmulq_f32(A3, B3); // A3 *= B3 741 742 A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2 743 744 // change the sign of the last element 745 A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM); 746 747 A1 = vsubq_f32(A1, A3); // AB123 = AB12 - AB3 748 749 return btQuaternion(A1); 750 751 #else 752 return btQuaternion( 753 q.w() * w.x() + q.y() * w.z() - q.z() * w.y(), 754 q.w() * w.y() + q.z() * w.x() - q.x() * w.z(), 755 q.w() * w.z() + q.x() * w.y() - q.y() * w.x(), 756 -q.x() * w.x() - q.y() * w.y() - q.z() * w.z()); 757 #endif 758 } 759 760 SIMD_FORCE_INLINE btQuaternion 761 operator*(const btVector3& w, const btQuaternion& q) 762 { 763 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE) 764 __m128 vQ1 = w.get128(); 765 __m128 vQ2 = q.get128(); 766 __m128 A1, B1, A2, B2, A3, B3; 767 768 A1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(0,1,2,0)); // X Y z x 769 B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(3,3,3,0)); // W W W X 770 771 A1 = A1 * B1; 772 773 A2 = bt_pshufd_ps(vQ1, BT_SHUFFLE(1,2,0,1)); 774 B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2,0,1,1)); 775 776 A2 = A2 *B2; 777 778 A3 = bt_pshufd_ps(vQ1, BT_SHUFFLE(2,0,1,2)); 779 B3 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1,2,0,2)); 780 781 A3 = A3 * B3; // A3 *= B3 782 783 A1 = A1 + A2; // AB12 784 A1 = _mm_xor_ps(A1, vPPPM); // change sign of the last element 785 A1 = A1 - A3; // AB123 = AB12 - AB3 786 787 return btQuaternion(A1); 788 789 #elif defined(BT_USE_NEON) 790 791 float32x4_t vQ1 = w.get128(); 792 float32x4_t vQ2 = q.get128(); 793 float32x4_t A1, B1, A2, B2, A3, B3; 794 float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz; 795 796 { 797 float32x2x2_t tmp; 798 799 tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) ); // {z x}, {w y} 800 vQ1zx = tmp.val[0]; 801 802 tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) ); // {z x}, {w y} 803 vQ2zx = tmp.val[0]; 804 } 805 vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1); 806 807 vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1); 808 809 vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1); 810 vQ2xz = vext_f32(vQ2zx, vQ2zx, 1); 811 812 A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx); // X Y z x 813 B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx); // W W W X 814 815 A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1)); 816 B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1)); 817 818 A3 = vcombine_f32(vQ1zx, vQ1yz); // Z X Y Z 819 B3 = vcombine_f32(vQ2yz, vQ2xz); // Y Z x z 820 821 A1 = vmulq_f32(A1, B1); 822 A2 = vmulq_f32(A2, B2); 823 A3 = vmulq_f32(A3, B3); // A3 *= B3 824 825 A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2 826 827 // change the sign of the last element 828 A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM); 829 830 A1 = vsubq_f32(A1, A3); // AB123 = AB12 - AB3 831 832 return btQuaternion(A1); 833 834 #else 835 return btQuaternion( 836 +w.x() * q.w() + w.y() * q.z() - w.z() * q.y(), 837 +w.y() * q.w() + w.z() * q.x() - w.x() * q.z(), 838 +w.z() * q.w() + w.x() * q.y() - w.y() * q.x(), 839 -w.x() * q.x() - w.y() * q.y() - w.z() * q.z()); 840 #endif 841 } 842 843 /**@brief Calculate the dot product between two quaternions */ 844 SIMD_FORCE_INLINE btScalar 845 dot(const btQuaternion& q1, const btQuaternion& q2) 846 { 847 return q1.dot(q2); 848 } 849 850 851 /**@brief Return the length of a quaternion */ 852 SIMD_FORCE_INLINE btScalar 853 length(const btQuaternion& q) 854 { 855 return q.length(); 856 } 857 858 /**@brief Return the angle between two quaternions*/ 859 SIMD_FORCE_INLINE btScalar 860 btAngle(const btQuaternion& q1, const btQuaternion& q2) 861 { 862 return q1.angle(q2); 863 } 864 865 /**@brief Return the inverse of a quaternion*/ 866 SIMD_FORCE_INLINE btQuaternion 867 inverse(const btQuaternion& q) 868 { 869 return q.inverse(); 870 } 871 872 /**@brief Return the result of spherical linear interpolation betwen two quaternions 873 * @param q1 The first quaternion 874 * @param q2 The second quaternion 875 * @param t The ration between q1 and q2. t = 0 return q1, t=1 returns q2 876 * Slerp assumes constant velocity between positions. */ 877 SIMD_FORCE_INLINE btQuaternion 878 slerp(const btQuaternion& q1, const btQuaternion& q2, const btScalar& t) 879 { 880 return q1.slerp(q2, t); 881 } 882 883 SIMD_FORCE_INLINE btVector3 884 quatRotate(const btQuaternion& rotation, const btVector3& v) 885 { 886 btQuaternion q = rotation * v; 887 q *= rotation.inverse(); 888 #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE) 889 return btVector3(_mm_and_ps(q.get128(), btvFFF0fMask)); 890 #elif defined(BT_USE_NEON) 891 return btVector3((float32x4_t)vandq_s32((int32x4_t)q.get128(), btvFFF0Mask)); 892 #else 893 return btVector3(q.getX(),q.getY(),q.getZ()); 894 #endif 895 } 896 897 SIMD_FORCE_INLINE btQuaternion 898 shortestArcQuat(const btVector3& v0, const btVector3& v1) // Game Programming Gems 2.10. make sure v0,v1 are normalized 899 { 900 btVector3 c = v0.cross(v1); 901 btScalar d = v0.dot(v1); 902 903 if (d < -1.0 + SIMD_EPSILON) 904 { 905 btVector3 n,unused; 906 btPlaneSpace1(v0,n,unused); 907 return btQuaternion(n.x(),n.y(),n.z(),0.0f); // just pick any vector that is orthogonal to v0 908 } 909 910 btScalar s = btSqrt((1.0f + d) * 2.0f); 911 btScalar rs = 1.0f / s; 912 913 return btQuaternion(c.getX()*rs,c.getY()*rs,c.getZ()*rs,s * 0.5f); 914 } 915 916 SIMD_FORCE_INLINE btQuaternion 917 shortestArcQuatNormalize2(btVector3& v0,btVector3& v1) 918 { 919 v0.normalize(); 920 v1.normalize(); 921 return shortestArcQuat(v0,v1); 922 } 923 924 925 926 927 struct btQuaternionFloatData 928 { 929 float m_floats[4]; 930 }; 931 932 struct btQuaternionDoubleData 933 { 934 double m_floats[4]; 935 936 }; 937 938 SIMD_FORCE_INLINE void btQuaternion::serializeFloat(struct btQuaternionFloatData& dataOut) const 939 { 940 ///could also do a memcpy, check if it is worth it 941 for (int i=0;i<4;i++) 942 dataOut.m_floats[i] = float(m_floats[i]); 943 } 944 945 SIMD_FORCE_INLINE void btQuaternion::deSerializeFloat(const struct btQuaternionFloatData& dataIn) 946 { 947 for (int i=0;i<4;i++) 948 m_floats[i] = btScalar(dataIn.m_floats[i]); 949 } 950 951 952 SIMD_FORCE_INLINE void btQuaternion::serializeDouble(struct btQuaternionDoubleData& dataOut) const 953 { 954 ///could also do a memcpy, check if it is worth it 955 for (int i=0;i<4;i++) 956 dataOut.m_floats[i] = double(m_floats[i]); 957 } 958 959 SIMD_FORCE_INLINE void btQuaternion::deSerializeDouble(const struct btQuaternionDoubleData& dataIn) 960 { 961 for (int i=0;i<4;i++) 962 m_floats[i] = btScalar(dataIn.m_floats[i]); 963 } 964 965 966 SIMD_FORCE_INLINE void btQuaternion::serialize(struct btQuaternionData& dataOut) const 967 { 968 ///could also do a memcpy, check if it is worth it 969 for (int i=0;i<4;i++) 970 dataOut.m_floats[i] = m_floats[i]; 971 } 972 973 SIMD_FORCE_INLINE void btQuaternion::deSerialize(const struct btQuaternionData& dataIn) 974 { 975 for (int i=0;i<4;i++) 976 m_floats[i] = dataIn.m_floats[i]; 977 } 978 979 980 #endif //BT_SIMD__QUATERNION_H_ 981 982 983 984