1 /* 2 Copyright (c) 2003-2006 Gino van den Bergen / Erwin Coumans http://continuousphysics.com/Bullet/ 3 4 This software is provided 'as-is', without any express or implied warranty. 5 In no event will the authors be held liable for any damages arising from the use of this software. 6 Permission is granted to anyone to use this software for any purpose, 7 including commercial applications, and to alter it and redistribute it freely, 8 subject to the following restrictions: 9 10 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. 11 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. 12 3. This notice may not be removed or altered from any source distribution. 13 */ 14 15 16 17 #ifndef BT_VECTOR3_H 18 #define BT_VECTOR3_H 19 20 //#include <stdint.h> 21 #include "btScalar.h" 22 #include "btMinMax.h" 23 #include "btAlignedAllocator.h" 24 25 #ifdef BT_USE_DOUBLE_PRECISION 26 #define btVector3Data btVector3DoubleData 27 #define btVector3DataName "btVector3DoubleData" 28 #else 29 #define btVector3Data btVector3FloatData 30 #define btVector3DataName "btVector3FloatData" 31 #endif //BT_USE_DOUBLE_PRECISION 32 33 #if defined BT_USE_SSE 34 35 //typedef uint32_t __m128i __attribute__ ((vector_size(16))); 36 37 #ifdef _MSC_VER 38 #pragma warning(disable: 4556) // value of intrinsic immediate argument '4294967239' is out of range '0 - 255' 39 #endif 40 41 42 #define BT_SHUFFLE(x,y,z,w) ((w)<<6 | (z)<<4 | (y)<<2 | (x)) 43 //#define bt_pshufd_ps( _a, _mask ) (__m128) _mm_shuffle_epi32((__m128i)(_a), (_mask) ) 44 #define bt_pshufd_ps( _a, _mask ) _mm_shuffle_ps((_a), (_a), (_mask) ) 45 #define bt_splat3_ps( _a, _i ) bt_pshufd_ps((_a), BT_SHUFFLE(_i,_i,_i, 3) ) 46 #define bt_splat_ps( _a, _i ) bt_pshufd_ps((_a), BT_SHUFFLE(_i,_i,_i,_i) ) 47 48 #define btv3AbsiMask (_mm_set_epi32(0x00000000, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF)) 49 #define btvAbsMask (_mm_set_epi32( 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF)) 50 #define btvFFF0Mask (_mm_set_epi32(0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF)) 51 #define btv3AbsfMask btCastiTo128f(btv3AbsiMask) 52 #define btvFFF0fMask btCastiTo128f(btvFFF0Mask) 53 #define btvxyzMaskf btvFFF0fMask 54 #define btvAbsfMask btCastiTo128f(btvAbsMask) 55 56 //there is an issue with XCode 3.2 (LCx errors) 57 #define btvMzeroMask (_mm_set_ps(-0.0f, -0.0f, -0.0f, -0.0f)) 58 #define v1110 (_mm_set_ps(0.0f, 1.0f, 1.0f, 1.0f)) 59 #define vHalf (_mm_set_ps(0.5f, 0.5f, 0.5f, 0.5f)) 60 #define v1_5 (_mm_set_ps(1.5f, 1.5f, 1.5f, 1.5f)) 61 62 //const __m128 ATTRIBUTE_ALIGNED16(btvMzeroMask) = {-0.0f, -0.0f, -0.0f, -0.0f}; 63 //const __m128 ATTRIBUTE_ALIGNED16(v1110) = {1.0f, 1.0f, 1.0f, 0.0f}; 64 //const __m128 ATTRIBUTE_ALIGNED16(vHalf) = {0.5f, 0.5f, 0.5f, 0.5f}; 65 //const __m128 ATTRIBUTE_ALIGNED16(v1_5) = {1.5f, 1.5f, 1.5f, 1.5f}; 66 67 #endif 68 69 #ifdef BT_USE_NEON 70 71 const float32x4_t ATTRIBUTE_ALIGNED16(btvMzeroMask) = (float32x4_t){-0.0f, -0.0f, -0.0f, -0.0f}; 72 const int32x4_t ATTRIBUTE_ALIGNED16(btvFFF0Mask) = (int32x4_t){static_cast<int32_t>(0xFFFFFFFF), 73 static_cast<int32_t>(0xFFFFFFFF), static_cast<int32_t>(0xFFFFFFFF), 0x0}; 74 const int32x4_t ATTRIBUTE_ALIGNED16(btvAbsMask) = (int32x4_t){0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF}; 75 const int32x4_t ATTRIBUTE_ALIGNED16(btv3AbsMask) = (int32x4_t){0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x0}; 76 77 #endif 78 79 /**@brief btVector3 can be used to represent 3D points and vectors. 80 * It has an un-used w component to suit 16-byte alignment when btVector3 is stored in containers. This extra component can be used by derived classes (Quaternion?) or by user 81 * Ideally, this class should be replaced by a platform optimized SIMD version that keeps the data in registers 82 */ 83 ATTRIBUTE_ALIGNED16(class) btVector3 84 { 85 public: 86 87 BT_DECLARE_ALIGNED_ALLOCATOR(); 88 89 #if defined (__SPU__) && defined (__CELLOS_LV2__) 90 btScalar m_floats[4]; 91 public: 92 SIMD_FORCE_INLINE const vec_float4& get128() const 93 { 94 return *((const vec_float4*)&m_floats[0]); 95 } 96 public: 97 #else //__CELLOS_LV2__ __SPU__ 98 #if defined (BT_USE_SSE) || defined(BT_USE_NEON) // _WIN32 || ARM 99 union { 100 btSimdFloat4 mVec128; 101 btScalar m_floats[4]; 102 }; 103 SIMD_FORCE_INLINE btSimdFloat4 get128() const 104 { 105 return mVec128; 106 } 107 SIMD_FORCE_INLINE void set128(btSimdFloat4 v128) 108 { 109 mVec128 = v128; 110 } 111 #else 112 btScalar m_floats[4]; 113 #endif 114 #endif //__CELLOS_LV2__ __SPU__ 115 116 public: 117 118 /**@brief No initialization constructor */ 119 SIMD_FORCE_INLINE btVector3() 120 { 121 122 } 123 124 125 126 /**@brief Constructor from scalars 127 * @param x X value 128 * @param y Y value 129 * @param z Z value 130 */ 131 SIMD_FORCE_INLINE btVector3(const btScalar& _x, const btScalar& _y, const btScalar& _z) 132 { 133 m_floats[0] = _x; 134 m_floats[1] = _y; 135 m_floats[2] = _z; 136 m_floats[3] = btScalar(0.f); 137 } 138 139 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE) )|| defined (BT_USE_NEON) 140 // Set Vector 141 SIMD_FORCE_INLINE btVector3( btSimdFloat4 v) 142 { 143 mVec128 = v; 144 } 145 146 // Copy constructor 147 SIMD_FORCE_INLINE btVector3(const btVector3& rhs) 148 { 149 mVec128 = rhs.mVec128; 150 } 151 152 // Assignment Operator 153 SIMD_FORCE_INLINE btVector3& 154 operator=(const btVector3& v) 155 { 156 mVec128 = v.mVec128; 157 158 return *this; 159 } 160 #endif // #if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON) 161 162 /**@brief Add a vector to this one 163 * @param The vector to add to this one */ 164 SIMD_FORCE_INLINE btVector3& operator+=(const btVector3& v) 165 { 166 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE) 167 mVec128 = _mm_add_ps(mVec128, v.mVec128); 168 #elif defined(BT_USE_NEON) 169 mVec128 = vaddq_f32(mVec128, v.mVec128); 170 #else 171 m_floats[0] += v.m_floats[0]; 172 m_floats[1] += v.m_floats[1]; 173 m_floats[2] += v.m_floats[2]; 174 #endif 175 return *this; 176 } 177 178 179 /**@brief Subtract a vector from this one 180 * @param The vector to subtract */ 181 SIMD_FORCE_INLINE btVector3& operator-=(const btVector3& v) 182 { 183 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE) 184 mVec128 = _mm_sub_ps(mVec128, v.mVec128); 185 #elif defined(BT_USE_NEON) 186 mVec128 = vsubq_f32(mVec128, v.mVec128); 187 #else 188 m_floats[0] -= v.m_floats[0]; 189 m_floats[1] -= v.m_floats[1]; 190 m_floats[2] -= v.m_floats[2]; 191 #endif 192 return *this; 193 } 194 195 /**@brief Scale the vector 196 * @param s Scale factor */ 197 SIMD_FORCE_INLINE btVector3& operator*=(const btScalar& s) 198 { 199 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE) 200 __m128 vs = _mm_load_ss(&s); // (S 0 0 0) 201 vs = bt_pshufd_ps(vs, 0x80); // (S S S 0.0) 202 mVec128 = _mm_mul_ps(mVec128, vs); 203 #elif defined(BT_USE_NEON) 204 mVec128 = vmulq_n_f32(mVec128, s); 205 #else 206 m_floats[0] *= s; 207 m_floats[1] *= s; 208 m_floats[2] *= s; 209 #endif 210 return *this; 211 } 212 213 /**@brief Inversely scale the vector 214 * @param s Scale factor to divide by */ 215 SIMD_FORCE_INLINE btVector3& operator/=(const btScalar& s) 216 { 217 btFullAssert(s != btScalar(0.0)); 218 219 #if 0 //defined(BT_USE_SSE_IN_API) 220 // this code is not faster ! 221 __m128 vs = _mm_load_ss(&s); 222 vs = _mm_div_ss(v1110, vs); 223 vs = bt_pshufd_ps(vs, 0x00); // (S S S S) 224 225 mVec128 = _mm_mul_ps(mVec128, vs); 226 227 return *this; 228 #else 229 return *this *= btScalar(1.0) / s; 230 #endif 231 } 232 233 /**@brief Return the dot product 234 * @param v The other vector in the dot product */ 235 SIMD_FORCE_INLINE btScalar dot(const btVector3& v) const 236 { 237 #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE) 238 __m128 vd = _mm_mul_ps(mVec128, v.mVec128); 239 __m128 z = _mm_movehl_ps(vd, vd); 240 __m128 y = _mm_shuffle_ps(vd, vd, 0x55); 241 vd = _mm_add_ss(vd, y); 242 vd = _mm_add_ss(vd, z); 243 return _mm_cvtss_f32(vd); 244 #elif defined(BT_USE_NEON) 245 float32x4_t vd = vmulq_f32(mVec128, v.mVec128); 246 float32x2_t x = vpadd_f32(vget_low_f32(vd), vget_low_f32(vd)); 247 x = vadd_f32(x, vget_high_f32(vd)); 248 return vget_lane_f32(x, 0); 249 #else 250 return m_floats[0] * v.m_floats[0] + 251 m_floats[1] * v.m_floats[1] + 252 m_floats[2] * v.m_floats[2]; 253 #endif 254 } 255 256 /**@brief Return the length of the vector squared */ 257 SIMD_FORCE_INLINE btScalar length2() const 258 { 259 return dot(*this); 260 } 261 262 /**@brief Return the length of the vector */ 263 SIMD_FORCE_INLINE btScalar length() const 264 { 265 return btSqrt(length2()); 266 } 267 268 /**@brief Return the norm (length) of the vector */ 269 SIMD_FORCE_INLINE btScalar norm() const 270 { 271 return length(); 272 } 273 274 /**@brief Return the distance squared between the ends of this and another vector 275 * This is symantically treating the vector like a point */ 276 SIMD_FORCE_INLINE btScalar distance2(const btVector3& v) const; 277 278 /**@brief Return the distance between the ends of this and another vector 279 * This is symantically treating the vector like a point */ 280 SIMD_FORCE_INLINE btScalar distance(const btVector3& v) const; 281 282 SIMD_FORCE_INLINE btVector3& safeNormalize() 283 { 284 btVector3 absVec = this->absolute(); 285 int maxIndex = absVec.maxAxis(); 286 if (absVec[maxIndex]>0) 287 { 288 *this /= absVec[maxIndex]; 289 return *this /= length(); 290 } 291 setValue(1,0,0); 292 return *this; 293 } 294 295 /**@brief Normalize this vector 296 * x^2 + y^2 + z^2 = 1 */ 297 SIMD_FORCE_INLINE btVector3& normalize() 298 { 299 300 btAssert(!fuzzyZero()); 301 302 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE) 303 // dot product first 304 __m128 vd = _mm_mul_ps(mVec128, mVec128); 305 __m128 z = _mm_movehl_ps(vd, vd); 306 __m128 y = _mm_shuffle_ps(vd, vd, 0x55); 307 vd = _mm_add_ss(vd, y); 308 vd = _mm_add_ss(vd, z); 309 310 #if 0 311 vd = _mm_sqrt_ss(vd); 312 vd = _mm_div_ss(v1110, vd); 313 vd = bt_splat_ps(vd, 0x80); 314 mVec128 = _mm_mul_ps(mVec128, vd); 315 #else 316 317 // NR step 1/sqrt(x) - vd is x, y is output 318 y = _mm_rsqrt_ss(vd); // estimate 319 320 // one step NR 321 z = v1_5; 322 vd = _mm_mul_ss(vd, vHalf); // vd * 0.5 323 //x2 = vd; 324 vd = _mm_mul_ss(vd, y); // vd * 0.5 * y0 325 vd = _mm_mul_ss(vd, y); // vd * 0.5 * y0 * y0 326 z = _mm_sub_ss(z, vd); // 1.5 - vd * 0.5 * y0 * y0 327 328 y = _mm_mul_ss(y, z); // y0 * (1.5 - vd * 0.5 * y0 * y0) 329 330 y = bt_splat_ps(y, 0x80); 331 mVec128 = _mm_mul_ps(mVec128, y); 332 333 #endif 334 335 336 return *this; 337 #else 338 return *this /= length(); 339 #endif 340 } 341 342 /**@brief Return a normalized version of this vector */ 343 SIMD_FORCE_INLINE btVector3 normalized() const; 344 345 /**@brief Return a rotated version of this vector 346 * @param wAxis The axis to rotate about 347 * @param angle The angle to rotate by */ 348 SIMD_FORCE_INLINE btVector3 rotate( const btVector3& wAxis, const btScalar angle ) const; 349 350 /**@brief Return the angle between this and another vector 351 * @param v The other vector */ 352 SIMD_FORCE_INLINE btScalar angle(const btVector3& v) const 353 { 354 btScalar s = btSqrt(length2() * v.length2()); 355 btFullAssert(s != btScalar(0.0)); 356 return btAcos(dot(v) / s); 357 } 358 359 /**@brief Return a vector will the absolute values of each element */ 360 SIMD_FORCE_INLINE btVector3 absolute() const 361 { 362 363 #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE) 364 return btVector3(_mm_and_ps(mVec128, btv3AbsfMask)); 365 #elif defined(BT_USE_NEON) 366 return btVector3(vabsq_f32(mVec128)); 367 #else 368 return btVector3( 369 btFabs(m_floats[0]), 370 btFabs(m_floats[1]), 371 btFabs(m_floats[2])); 372 #endif 373 } 374 375 /**@brief Return the cross product between this and another vector 376 * @param v The other vector */ 377 SIMD_FORCE_INLINE btVector3 cross(const btVector3& v) const 378 { 379 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE) 380 __m128 T, V; 381 382 T = bt_pshufd_ps(mVec128, BT_SHUFFLE(1, 2, 0, 3)); // (Y Z X 0) 383 V = bt_pshufd_ps(v.mVec128, BT_SHUFFLE(1, 2, 0, 3)); // (Y Z X 0) 384 385 V = _mm_mul_ps(V, mVec128); 386 T = _mm_mul_ps(T, v.mVec128); 387 V = _mm_sub_ps(V, T); 388 389 V = bt_pshufd_ps(V, BT_SHUFFLE(1, 2, 0, 3)); 390 return btVector3(V); 391 #elif defined(BT_USE_NEON) 392 float32x4_t T, V; 393 // form (Y, Z, X, _) of mVec128 and v.mVec128 394 float32x2_t Tlow = vget_low_f32(mVec128); 395 float32x2_t Vlow = vget_low_f32(v.mVec128); 396 T = vcombine_f32(vext_f32(Tlow, vget_high_f32(mVec128), 1), Tlow); 397 V = vcombine_f32(vext_f32(Vlow, vget_high_f32(v.mVec128), 1), Vlow); 398 399 V = vmulq_f32(V, mVec128); 400 T = vmulq_f32(T, v.mVec128); 401 V = vsubq_f32(V, T); 402 Vlow = vget_low_f32(V); 403 // form (Y, Z, X, _); 404 V = vcombine_f32(vext_f32(Vlow, vget_high_f32(V), 1), Vlow); 405 V = (float32x4_t)vandq_s32((int32x4_t)V, btvFFF0Mask); 406 407 return btVector3(V); 408 #else 409 return btVector3( 410 m_floats[1] * v.m_floats[2] - m_floats[2] * v.m_floats[1], 411 m_floats[2] * v.m_floats[0] - m_floats[0] * v.m_floats[2], 412 m_floats[0] * v.m_floats[1] - m_floats[1] * v.m_floats[0]); 413 #endif 414 } 415 416 SIMD_FORCE_INLINE btScalar triple(const btVector3& v1, const btVector3& v2) const 417 { 418 #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE) 419 // cross: 420 __m128 T = _mm_shuffle_ps(v1.mVec128, v1.mVec128, BT_SHUFFLE(1, 2, 0, 3)); // (Y Z X 0) 421 __m128 V = _mm_shuffle_ps(v2.mVec128, v2.mVec128, BT_SHUFFLE(1, 2, 0, 3)); // (Y Z X 0) 422 423 V = _mm_mul_ps(V, v1.mVec128); 424 T = _mm_mul_ps(T, v2.mVec128); 425 V = _mm_sub_ps(V, T); 426 427 V = _mm_shuffle_ps(V, V, BT_SHUFFLE(1, 2, 0, 3)); 428 429 // dot: 430 V = _mm_mul_ps(V, mVec128); 431 __m128 z = _mm_movehl_ps(V, V); 432 __m128 y = _mm_shuffle_ps(V, V, 0x55); 433 V = _mm_add_ss(V, y); 434 V = _mm_add_ss(V, z); 435 return _mm_cvtss_f32(V); 436 437 #elif defined(BT_USE_NEON) 438 // cross: 439 float32x4_t T, V; 440 // form (Y, Z, X, _) of mVec128 and v.mVec128 441 float32x2_t Tlow = vget_low_f32(v1.mVec128); 442 float32x2_t Vlow = vget_low_f32(v2.mVec128); 443 T = vcombine_f32(vext_f32(Tlow, vget_high_f32(v1.mVec128), 1), Tlow); 444 V = vcombine_f32(vext_f32(Vlow, vget_high_f32(v2.mVec128), 1), Vlow); 445 446 V = vmulq_f32(V, v1.mVec128); 447 T = vmulq_f32(T, v2.mVec128); 448 V = vsubq_f32(V, T); 449 Vlow = vget_low_f32(V); 450 // form (Y, Z, X, _); 451 V = vcombine_f32(vext_f32(Vlow, vget_high_f32(V), 1), Vlow); 452 453 // dot: 454 V = vmulq_f32(mVec128, V); 455 float32x2_t x = vpadd_f32(vget_low_f32(V), vget_low_f32(V)); 456 x = vadd_f32(x, vget_high_f32(V)); 457 return vget_lane_f32(x, 0); 458 #else 459 return 460 m_floats[0] * (v1.m_floats[1] * v2.m_floats[2] - v1.m_floats[2] * v2.m_floats[1]) + 461 m_floats[1] * (v1.m_floats[2] * v2.m_floats[0] - v1.m_floats[0] * v2.m_floats[2]) + 462 m_floats[2] * (v1.m_floats[0] * v2.m_floats[1] - v1.m_floats[1] * v2.m_floats[0]); 463 #endif 464 } 465 466 /**@brief Return the axis with the smallest value 467 * Note return values are 0,1,2 for x, y, or z */ 468 SIMD_FORCE_INLINE int minAxis() const 469 { 470 return m_floats[0] < m_floats[1] ? (m_floats[0] <m_floats[2] ? 0 : 2) : (m_floats[1] <m_floats[2] ? 1 : 2); 471 } 472 473 /**@brief Return the axis with the largest value 474 * Note return values are 0,1,2 for x, y, or z */ 475 SIMD_FORCE_INLINE int maxAxis() const 476 { 477 return m_floats[0] < m_floats[1] ? (m_floats[1] <m_floats[2] ? 2 : 1) : (m_floats[0] <m_floats[2] ? 2 : 0); 478 } 479 480 SIMD_FORCE_INLINE int furthestAxis() const 481 { 482 return absolute().minAxis(); 483 } 484 485 SIMD_FORCE_INLINE int closestAxis() const 486 { 487 return absolute().maxAxis(); 488 } 489 490 491 SIMD_FORCE_INLINE void setInterpolate3(const btVector3& v0, const btVector3& v1, btScalar rt) 492 { 493 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE) 494 __m128 vrt = _mm_load_ss(&rt); // (rt 0 0 0) 495 btScalar s = btScalar(1.0) - rt; 496 __m128 vs = _mm_load_ss(&s); // (S 0 0 0) 497 vs = bt_pshufd_ps(vs, 0x80); // (S S S 0.0) 498 __m128 r0 = _mm_mul_ps(v0.mVec128, vs); 499 vrt = bt_pshufd_ps(vrt, 0x80); // (rt rt rt 0.0) 500 __m128 r1 = _mm_mul_ps(v1.mVec128, vrt); 501 __m128 tmp3 = _mm_add_ps(r0,r1); 502 mVec128 = tmp3; 503 #elif defined(BT_USE_NEON) 504 float32x4_t vl = vsubq_f32(v1.mVec128, v0.mVec128); 505 vl = vmulq_n_f32(vl, rt); 506 mVec128 = vaddq_f32(vl, v0.mVec128); 507 #else 508 btScalar s = btScalar(1.0) - rt; 509 m_floats[0] = s * v0.m_floats[0] + rt * v1.m_floats[0]; 510 m_floats[1] = s * v0.m_floats[1] + rt * v1.m_floats[1]; 511 m_floats[2] = s * v0.m_floats[2] + rt * v1.m_floats[2]; 512 //don't do the unused w component 513 // m_co[3] = s * v0[3] + rt * v1[3]; 514 #endif 515 } 516 517 /**@brief Return the linear interpolation between this and another vector 518 * @param v The other vector 519 * @param t The ration of this to v (t = 0 => return this, t=1 => return other) */ 520 SIMD_FORCE_INLINE btVector3 lerp(const btVector3& v, const btScalar& t) const 521 { 522 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE) 523 __m128 vt = _mm_load_ss(&t); // (t 0 0 0) 524 vt = bt_pshufd_ps(vt, 0x80); // (rt rt rt 0.0) 525 __m128 vl = _mm_sub_ps(v.mVec128, mVec128); 526 vl = _mm_mul_ps(vl, vt); 527 vl = _mm_add_ps(vl, mVec128); 528 529 return btVector3(vl); 530 #elif defined(BT_USE_NEON) 531 float32x4_t vl = vsubq_f32(v.mVec128, mVec128); 532 vl = vmulq_n_f32(vl, t); 533 vl = vaddq_f32(vl, mVec128); 534 535 return btVector3(vl); 536 #else 537 return 538 btVector3( m_floats[0] + (v.m_floats[0] - m_floats[0]) * t, 539 m_floats[1] + (v.m_floats[1] - m_floats[1]) * t, 540 m_floats[2] + (v.m_floats[2] - m_floats[2]) * t); 541 #endif 542 } 543 544 /**@brief Elementwise multiply this vector by the other 545 * @param v The other vector */ 546 SIMD_FORCE_INLINE btVector3& operator*=(const btVector3& v) 547 { 548 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE) 549 mVec128 = _mm_mul_ps(mVec128, v.mVec128); 550 #elif defined(BT_USE_NEON) 551 mVec128 = vmulq_f32(mVec128, v.mVec128); 552 #else 553 m_floats[0] *= v.m_floats[0]; 554 m_floats[1] *= v.m_floats[1]; 555 m_floats[2] *= v.m_floats[2]; 556 #endif 557 return *this; 558 } 559 560 /**@brief Return the x value */ 561 SIMD_FORCE_INLINE const btScalar& getX() const { return m_floats[0]; } 562 /**@brief Return the y value */ 563 SIMD_FORCE_INLINE const btScalar& getY() const { return m_floats[1]; } 564 /**@brief Return the z value */ 565 SIMD_FORCE_INLINE const btScalar& getZ() const { return m_floats[2]; } 566 /**@brief Set the x value */ 567 SIMD_FORCE_INLINE void setX(btScalar _x) { m_floats[0] = _x;}; 568 /**@brief Set the y value */ 569 SIMD_FORCE_INLINE void setY(btScalar _y) { m_floats[1] = _y;}; 570 /**@brief Set the z value */ 571 SIMD_FORCE_INLINE void setZ(btScalar _z) { m_floats[2] = _z;}; 572 /**@brief Set the w value */ 573 SIMD_FORCE_INLINE void setW(btScalar _w) { m_floats[3] = _w;}; 574 /**@brief Return the x value */ 575 SIMD_FORCE_INLINE const btScalar& x() const { return m_floats[0]; } 576 /**@brief Return the y value */ 577 SIMD_FORCE_INLINE const btScalar& y() const { return m_floats[1]; } 578 /**@brief Return the z value */ 579 SIMD_FORCE_INLINE const btScalar& z() const { return m_floats[2]; } 580 /**@brief Return the w value */ 581 SIMD_FORCE_INLINE const btScalar& w() const { return m_floats[3]; } 582 583 //SIMD_FORCE_INLINE btScalar& operator[](int i) { return (&m_floats[0])[i]; } 584 //SIMD_FORCE_INLINE const btScalar& operator[](int i) const { return (&m_floats[0])[i]; } 585 ///operator btScalar*() replaces operator[], using implicit conversion. We added operator != and operator == to avoid pointer comparisons. 586 SIMD_FORCE_INLINE operator btScalar *() { return &m_floats[0]; } 587 SIMD_FORCE_INLINE operator const btScalar *() const { return &m_floats[0]; } 588 589 SIMD_FORCE_INLINE bool operator==(const btVector3& other) const 590 { 591 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE) 592 return (0xf == _mm_movemask_ps((__m128)_mm_cmpeq_ps(mVec128, other.mVec128))); 593 #else 594 return ((m_floats[3]==other.m_floats[3]) && 595 (m_floats[2]==other.m_floats[2]) && 596 (m_floats[1]==other.m_floats[1]) && 597 (m_floats[0]==other.m_floats[0])); 598 #endif 599 } 600 601 SIMD_FORCE_INLINE bool operator!=(const btVector3& other) const 602 { 603 return !(*this == other); 604 } 605 606 /**@brief Set each element to the max of the current values and the values of another btVector3 607 * @param other The other btVector3 to compare with 608 */ 609 SIMD_FORCE_INLINE void setMax(const btVector3& other) 610 { 611 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE) 612 mVec128 = _mm_max_ps(mVec128, other.mVec128); 613 #elif defined(BT_USE_NEON) 614 mVec128 = vmaxq_f32(mVec128, other.mVec128); 615 #else 616 btSetMax(m_floats[0], other.m_floats[0]); 617 btSetMax(m_floats[1], other.m_floats[1]); 618 btSetMax(m_floats[2], other.m_floats[2]); 619 btSetMax(m_floats[3], other.w()); 620 #endif 621 } 622 623 /**@brief Set each element to the min of the current values and the values of another btVector3 624 * @param other The other btVector3 to compare with 625 */ 626 SIMD_FORCE_INLINE void setMin(const btVector3& other) 627 { 628 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE) 629 mVec128 = _mm_min_ps(mVec128, other.mVec128); 630 #elif defined(BT_USE_NEON) 631 mVec128 = vminq_f32(mVec128, other.mVec128); 632 #else 633 btSetMin(m_floats[0], other.m_floats[0]); 634 btSetMin(m_floats[1], other.m_floats[1]); 635 btSetMin(m_floats[2], other.m_floats[2]); 636 btSetMin(m_floats[3], other.w()); 637 #endif 638 } 639 640 SIMD_FORCE_INLINE void setValue(const btScalar& _x, const btScalar& _y, const btScalar& _z) 641 { 642 m_floats[0]=_x; 643 m_floats[1]=_y; 644 m_floats[2]=_z; 645 m_floats[3] = btScalar(0.f); 646 } 647 648 void getSkewSymmetricMatrix(btVector3* v0,btVector3* v1,btVector3* v2) const 649 { 650 #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE) 651 652 __m128 V = _mm_and_ps(mVec128, btvFFF0fMask); 653 __m128 V0 = _mm_xor_ps(btvMzeroMask, V); 654 __m128 V2 = _mm_movelh_ps(V0, V); 655 656 __m128 V1 = _mm_shuffle_ps(V, V0, 0xCE); 657 658 V0 = _mm_shuffle_ps(V0, V, 0xDB); 659 V2 = _mm_shuffle_ps(V2, V, 0xF9); 660 661 v0->mVec128 = V0; 662 v1->mVec128 = V1; 663 v2->mVec128 = V2; 664 #else 665 v0->setValue(0. ,-z() ,y()); 666 v1->setValue(z() ,0. ,-x()); 667 v2->setValue(-y() ,x() ,0.); 668 #endif 669 } 670 671 void setZero() 672 { 673 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE) 674 mVec128 = (__m128)_mm_xor_ps(mVec128, mVec128); 675 #elif defined(BT_USE_NEON) 676 int32x4_t vi = vdupq_n_s32(0); 677 mVec128 = vreinterpretq_f32_s32(vi); 678 #else 679 setValue(btScalar(0.),btScalar(0.),btScalar(0.)); 680 #endif 681 } 682 683 SIMD_FORCE_INLINE bool isZero() const 684 { 685 return m_floats[0] == btScalar(0) && m_floats[1] == btScalar(0) && m_floats[2] == btScalar(0); 686 } 687 688 689 SIMD_FORCE_INLINE bool fuzzyZero() const 690 { 691 return length2() < SIMD_EPSILON*SIMD_EPSILON; 692 } 693 694 SIMD_FORCE_INLINE void serialize(struct btVector3Data& dataOut) const; 695 696 SIMD_FORCE_INLINE void deSerialize(const struct btVector3Data& dataIn); 697 698 SIMD_FORCE_INLINE void serializeFloat(struct btVector3FloatData& dataOut) const; 699 700 SIMD_FORCE_INLINE void deSerializeFloat(const struct btVector3FloatData& dataIn); 701 702 SIMD_FORCE_INLINE void serializeDouble(struct btVector3DoubleData& dataOut) const; 703 704 SIMD_FORCE_INLINE void deSerializeDouble(const struct btVector3DoubleData& dataIn); 705 706 /**@brief returns index of maximum dot product between this and vectors in array[] 707 * @param array The other vectors 708 * @param array_count The number of other vectors 709 * @param dotOut The maximum dot product */ 710 SIMD_FORCE_INLINE long maxDot( const btVector3 *array, long array_count, btScalar &dotOut ) const; 711 712 /**@brief returns index of minimum dot product between this and vectors in array[] 713 * @param array The other vectors 714 * @param array_count The number of other vectors 715 * @param dotOut The minimum dot product */ 716 SIMD_FORCE_INLINE long minDot( const btVector3 *array, long array_count, btScalar &dotOut ) const; 717 718 /* create a vector as btVector3( this->dot( btVector3 v0 ), this->dot( btVector3 v1), this->dot( btVector3 v2 )) */ 719 SIMD_FORCE_INLINE btVector3 dot3( const btVector3 &v0, const btVector3 &v1, const btVector3 &v2 ) const 720 { 721 #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE) 722 723 __m128 a0 = _mm_mul_ps( v0.mVec128, this->mVec128 ); 724 __m128 a1 = _mm_mul_ps( v1.mVec128, this->mVec128 ); 725 __m128 a2 = _mm_mul_ps( v2.mVec128, this->mVec128 ); 726 __m128 b0 = _mm_unpacklo_ps( a0, a1 ); 727 __m128 b1 = _mm_unpackhi_ps( a0, a1 ); 728 __m128 b2 = _mm_unpacklo_ps( a2, _mm_setzero_ps() ); 729 __m128 r = _mm_movelh_ps( b0, b2 ); 730 r = _mm_add_ps( r, _mm_movehl_ps( b2, b0 )); 731 a2 = _mm_and_ps( a2, btvxyzMaskf); 732 r = _mm_add_ps( r, btCastdTo128f (_mm_move_sd( btCastfTo128d(a2), btCastfTo128d(b1) ))); 733 return btVector3(r); 734 735 #elif defined(BT_USE_NEON) 736 static const uint32x4_t xyzMask = (const uint32x4_t){ static_cast<uint32_t>(-1), static_cast<uint32_t>(-1), static_cast<uint32_t>(-1), 0 }; 737 float32x4_t a0 = vmulq_f32( v0.mVec128, this->mVec128); 738 float32x4_t a1 = vmulq_f32( v1.mVec128, this->mVec128); 739 float32x4_t a2 = vmulq_f32( v2.mVec128, this->mVec128); 740 float32x2x2_t zLo = vtrn_f32( vget_high_f32(a0), vget_high_f32(a1)); 741 a2 = (float32x4_t) vandq_u32((uint32x4_t) a2, xyzMask ); 742 float32x2_t b0 = vadd_f32( vpadd_f32( vget_low_f32(a0), vget_low_f32(a1)), zLo.val[0] ); 743 float32x2_t b1 = vpadd_f32( vpadd_f32( vget_low_f32(a2), vget_high_f32(a2)), vdup_n_f32(0.0f)); 744 return btVector3( vcombine_f32(b0, b1) ); 745 #else 746 return btVector3( dot(v0), dot(v1), dot(v2)); 747 #endif 748 } 749 }; 750 751 /**@brief Return the sum of two vectors (Point symantics)*/ 752 SIMD_FORCE_INLINE btVector3 753 operator+(const btVector3& v1, const btVector3& v2) 754 { 755 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE) 756 return btVector3(_mm_add_ps(v1.mVec128, v2.mVec128)); 757 #elif defined(BT_USE_NEON) 758 return btVector3(vaddq_f32(v1.mVec128, v2.mVec128)); 759 #else 760 return btVector3( 761 v1.m_floats[0] + v2.m_floats[0], 762 v1.m_floats[1] + v2.m_floats[1], 763 v1.m_floats[2] + v2.m_floats[2]); 764 #endif 765 } 766 767 /**@brief Return the elementwise product of two vectors */ 768 SIMD_FORCE_INLINE btVector3 769 operator*(const btVector3& v1, const btVector3& v2) 770 { 771 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE) 772 return btVector3(_mm_mul_ps(v1.mVec128, v2.mVec128)); 773 #elif defined(BT_USE_NEON) 774 return btVector3(vmulq_f32(v1.mVec128, v2.mVec128)); 775 #else 776 return btVector3( 777 v1.m_floats[0] * v2.m_floats[0], 778 v1.m_floats[1] * v2.m_floats[1], 779 v1.m_floats[2] * v2.m_floats[2]); 780 #endif 781 } 782 783 /**@brief Return the difference between two vectors */ 784 SIMD_FORCE_INLINE btVector3 785 operator-(const btVector3& v1, const btVector3& v2) 786 { 787 #if defined BT_USE_SIMD_VECTOR3 && (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)) 788 789 // without _mm_and_ps this code causes slowdown in Concave moving 790 __m128 r = _mm_sub_ps(v1.mVec128, v2.mVec128); 791 return btVector3(_mm_and_ps(r, btvFFF0fMask)); 792 #elif defined(BT_USE_NEON) 793 float32x4_t r = vsubq_f32(v1.mVec128, v2.mVec128); 794 return btVector3((float32x4_t)vandq_s32((int32x4_t)r, btvFFF0Mask)); 795 #else 796 return btVector3( 797 v1.m_floats[0] - v2.m_floats[0], 798 v1.m_floats[1] - v2.m_floats[1], 799 v1.m_floats[2] - v2.m_floats[2]); 800 #endif 801 } 802 803 /**@brief Return the negative of the vector */ 804 SIMD_FORCE_INLINE btVector3 805 operator-(const btVector3& v) 806 { 807 #if defined BT_USE_SIMD_VECTOR3 && (defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)) 808 __m128 r = _mm_xor_ps(v.mVec128, btvMzeroMask); 809 return btVector3(_mm_and_ps(r, btvFFF0fMask)); 810 #elif defined(BT_USE_NEON) 811 return btVector3((btSimdFloat4)veorq_s32((int32x4_t)v.mVec128, (int32x4_t)btvMzeroMask)); 812 #else 813 return btVector3(-v.m_floats[0], -v.m_floats[1], -v.m_floats[2]); 814 #endif 815 } 816 817 /**@brief Return the vector scaled by s */ 818 SIMD_FORCE_INLINE btVector3 819 operator*(const btVector3& v, const btScalar& s) 820 { 821 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE) 822 __m128 vs = _mm_load_ss(&s); // (S 0 0 0) 823 vs = bt_pshufd_ps(vs, 0x80); // (S S S 0.0) 824 return btVector3(_mm_mul_ps(v.mVec128, vs)); 825 #elif defined(BT_USE_NEON) 826 float32x4_t r = vmulq_n_f32(v.mVec128, s); 827 return btVector3((float32x4_t)vandq_s32((int32x4_t)r, btvFFF0Mask)); 828 #else 829 return btVector3(v.m_floats[0] * s, v.m_floats[1] * s, v.m_floats[2] * s); 830 #endif 831 } 832 833 /**@brief Return the vector scaled by s */ 834 SIMD_FORCE_INLINE btVector3 835 operator*(const btScalar& s, const btVector3& v) 836 { 837 return v * s; 838 } 839 840 /**@brief Return the vector inversely scaled by s */ 841 SIMD_FORCE_INLINE btVector3 842 operator/(const btVector3& v, const btScalar& s) 843 { 844 btFullAssert(s != btScalar(0.0)); 845 #if 0 //defined(BT_USE_SSE_IN_API) 846 // this code is not faster ! 847 __m128 vs = _mm_load_ss(&s); 848 vs = _mm_div_ss(v1110, vs); 849 vs = bt_pshufd_ps(vs, 0x00); // (S S S S) 850 851 return btVector3(_mm_mul_ps(v.mVec128, vs)); 852 #else 853 return v * (btScalar(1.0) / s); 854 #endif 855 } 856 857 /**@brief Return the vector inversely scaled by s */ 858 SIMD_FORCE_INLINE btVector3 859 operator/(const btVector3& v1, const btVector3& v2) 860 { 861 #if defined BT_USE_SIMD_VECTOR3 && (defined(BT_USE_SSE_IN_API)&& defined (BT_USE_SSE)) 862 __m128 vec = _mm_div_ps(v1.mVec128, v2.mVec128); 863 vec = _mm_and_ps(vec, btvFFF0fMask); 864 return btVector3(vec); 865 #elif defined(BT_USE_NEON) 866 float32x4_t x, y, v, m; 867 868 x = v1.mVec128; 869 y = v2.mVec128; 870 871 v = vrecpeq_f32(y); // v ~ 1/y 872 m = vrecpsq_f32(y, v); // m = (2-v*y) 873 v = vmulq_f32(v, m); // vv = v*m ~~ 1/y 874 m = vrecpsq_f32(y, v); // mm = (2-vv*y) 875 v = vmulq_f32(v, x); // x*vv 876 v = vmulq_f32(v, m); // (x*vv)*(2-vv*y) = x*(vv(2-vv*y)) ~~~ x/y 877 878 return btVector3(v); 879 #else 880 return btVector3( 881 v1.m_floats[0] / v2.m_floats[0], 882 v1.m_floats[1] / v2.m_floats[1], 883 v1.m_floats[2] / v2.m_floats[2]); 884 #endif 885 } 886 887 /**@brief Return the dot product between two vectors */ 888 SIMD_FORCE_INLINE btScalar 889 btDot(const btVector3& v1, const btVector3& v2) 890 { 891 return v1.dot(v2); 892 } 893 894 895 /**@brief Return the distance squared between two vectors */ 896 SIMD_FORCE_INLINE btScalar 897 btDistance2(const btVector3& v1, const btVector3& v2) 898 { 899 return v1.distance2(v2); 900 } 901 902 903 /**@brief Return the distance between two vectors */ 904 SIMD_FORCE_INLINE btScalar 905 btDistance(const btVector3& v1, const btVector3& v2) 906 { 907 return v1.distance(v2); 908 } 909 910 /**@brief Return the angle between two vectors */ 911 SIMD_FORCE_INLINE btScalar 912 btAngle(const btVector3& v1, const btVector3& v2) 913 { 914 return v1.angle(v2); 915 } 916 917 /**@brief Return the cross product of two vectors */ 918 SIMD_FORCE_INLINE btVector3 919 btCross(const btVector3& v1, const btVector3& v2) 920 { 921 return v1.cross(v2); 922 } 923 924 SIMD_FORCE_INLINE btScalar 925 btTriple(const btVector3& v1, const btVector3& v2, const btVector3& v3) 926 { 927 return v1.triple(v2, v3); 928 } 929 930 /**@brief Return the linear interpolation between two vectors 931 * @param v1 One vector 932 * @param v2 The other vector 933 * @param t The ration of this to v (t = 0 => return v1, t=1 => return v2) */ 934 SIMD_FORCE_INLINE btVector3 935 lerp(const btVector3& v1, const btVector3& v2, const btScalar& t) 936 { 937 return v1.lerp(v2, t); 938 } 939 940 941 942 SIMD_FORCE_INLINE btScalar btVector3::distance2(const btVector3& v) const 943 { 944 return (v - *this).length2(); 945 } 946 947 SIMD_FORCE_INLINE btScalar btVector3::distance(const btVector3& v) const 948 { 949 return (v - *this).length(); 950 } 951 952 SIMD_FORCE_INLINE btVector3 btVector3::normalized() const 953 { 954 btVector3 nrm = *this; 955 956 return nrm.normalize(); 957 } 958 959 SIMD_FORCE_INLINE btVector3 btVector3::rotate( const btVector3& wAxis, const btScalar _angle ) const 960 { 961 // wAxis must be a unit lenght vector 962 963 #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE) 964 965 __m128 O = _mm_mul_ps(wAxis.mVec128, mVec128); 966 btScalar ssin = btSin( _angle ); 967 __m128 C = wAxis.cross( mVec128 ).mVec128; 968 O = _mm_and_ps(O, btvFFF0fMask); 969 btScalar scos = btCos( _angle ); 970 971 __m128 vsin = _mm_load_ss(&ssin); // (S 0 0 0) 972 __m128 vcos = _mm_load_ss(&scos); // (S 0 0 0) 973 974 __m128 Y = bt_pshufd_ps(O, 0xC9); // (Y Z X 0) 975 __m128 Z = bt_pshufd_ps(O, 0xD2); // (Z X Y 0) 976 O = _mm_add_ps(O, Y); 977 vsin = bt_pshufd_ps(vsin, 0x80); // (S S S 0) 978 O = _mm_add_ps(O, Z); 979 vcos = bt_pshufd_ps(vcos, 0x80); // (S S S 0) 980 981 vsin = vsin * C; 982 O = O * wAxis.mVec128; 983 __m128 X = mVec128 - O; 984 985 O = O + vsin; 986 vcos = vcos * X; 987 O = O + vcos; 988 989 return btVector3(O); 990 #else 991 btVector3 o = wAxis * wAxis.dot( *this ); 992 btVector3 _x = *this - o; 993 btVector3 _y; 994 995 _y = wAxis.cross( *this ); 996 997 return ( o + _x * btCos( _angle ) + _y * btSin( _angle ) ); 998 #endif 999 } 1000 1001 SIMD_FORCE_INLINE long btVector3::maxDot( const btVector3 *array, long array_count, btScalar &dotOut ) const 1002 { 1003 #if (defined BT_USE_SSE && defined BT_USE_SIMD_VECTOR3 && defined BT_USE_SSE_IN_API) || defined (BT_USE_NEON) 1004 #if defined _WIN32 || defined (BT_USE_SSE) 1005 const long scalar_cutoff = 10; 1006 long _maxdot_large( const float *array, const float *vec, unsigned long array_count, float *dotOut ); 1007 #elif defined BT_USE_NEON 1008 const long scalar_cutoff = 4; 1009 extern long (*_maxdot_large)( const float *array, const float *vec, unsigned long array_count, float *dotOut ); 1010 #endif 1011 if( array_count < scalar_cutoff ) 1012 #endif 1013 { 1014 btScalar maxDot1 = -SIMD_INFINITY; 1015 int i = 0; 1016 int ptIndex = -1; 1017 for( i = 0; i < array_count; i++ ) 1018 { 1019 btScalar dot = array[i].dot(*this); 1020 1021 if( dot > maxDot1 ) 1022 { 1023 maxDot1 = dot; 1024 ptIndex = i; 1025 } 1026 } 1027 1028 dotOut = maxDot1; 1029 return ptIndex; 1030 } 1031 #if (defined BT_USE_SSE && defined BT_USE_SIMD_VECTOR3 && defined BT_USE_SSE_IN_API) || defined (BT_USE_NEON) 1032 return _maxdot_large( (float*) array, (float*) &m_floats[0], array_count, &dotOut ); 1033 #endif 1034 } 1035 1036 SIMD_FORCE_INLINE long btVector3::minDot( const btVector3 *array, long array_count, btScalar &dotOut ) const 1037 { 1038 #if (defined BT_USE_SSE && defined BT_USE_SIMD_VECTOR3 && defined BT_USE_SSE_IN_API) || defined (BT_USE_NEON) 1039 #if defined BT_USE_SSE 1040 const long scalar_cutoff = 10; 1041 long _mindot_large( const float *array, const float *vec, unsigned long array_count, float *dotOut ); 1042 #elif defined BT_USE_NEON 1043 const long scalar_cutoff = 4; 1044 extern long (*_mindot_large)( const float *array, const float *vec, unsigned long array_count, float *dotOut ); 1045 #else 1046 #error unhandled arch! 1047 #endif 1048 1049 if( array_count < scalar_cutoff ) 1050 #endif 1051 { 1052 btScalar minDot = SIMD_INFINITY; 1053 int i = 0; 1054 int ptIndex = -1; 1055 1056 for( i = 0; i < array_count; i++ ) 1057 { 1058 btScalar dot = array[i].dot(*this); 1059 1060 if( dot < minDot ) 1061 { 1062 minDot = dot; 1063 ptIndex = i; 1064 } 1065 } 1066 1067 dotOut = minDot; 1068 1069 return ptIndex; 1070 } 1071 #if (defined BT_USE_SSE && defined BT_USE_SIMD_VECTOR3 && defined BT_USE_SSE_IN_API) || defined (BT_USE_NEON) 1072 return _mindot_large( (float*) array, (float*) &m_floats[0], array_count, &dotOut ); 1073 #endif//BT_USE_SIMD_VECTOR3 1074 } 1075 1076 1077 class btVector4 : public btVector3 1078 { 1079 public: 1080 1081 SIMD_FORCE_INLINE btVector4() {} 1082 1083 1084 SIMD_FORCE_INLINE btVector4(const btScalar& _x, const btScalar& _y, const btScalar& _z,const btScalar& _w) 1085 : btVector3(_x,_y,_z) 1086 { 1087 m_floats[3] = _w; 1088 } 1089 1090 #if (defined (BT_USE_SSE_IN_API)&& defined (BT_USE_SSE)) || defined (BT_USE_NEON) 1091 SIMD_FORCE_INLINE btVector4(const btSimdFloat4 vec) 1092 { 1093 mVec128 = vec; 1094 } 1095 1096 SIMD_FORCE_INLINE btVector4(const btVector3& rhs) 1097 { 1098 mVec128 = rhs.mVec128; 1099 } 1100 1101 SIMD_FORCE_INLINE btVector4& 1102 operator=(const btVector4& v) 1103 { 1104 mVec128 = v.mVec128; 1105 return *this; 1106 } 1107 #endif // #if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON) 1108 1109 SIMD_FORCE_INLINE btVector4 absolute4() const 1110 { 1111 #if defined BT_USE_SIMD_VECTOR3 && defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE) 1112 return btVector4(_mm_and_ps(mVec128, btvAbsfMask)); 1113 #elif defined(BT_USE_NEON) 1114 return btVector4(vabsq_f32(mVec128)); 1115 #else 1116 return btVector4( 1117 btFabs(m_floats[0]), 1118 btFabs(m_floats[1]), 1119 btFabs(m_floats[2]), 1120 btFabs(m_floats[3])); 1121 #endif 1122 } 1123 1124 1125 btScalar getW() const { return m_floats[3];} 1126 1127 1128 SIMD_FORCE_INLINE int maxAxis4() const 1129 { 1130 int maxIndex = -1; 1131 btScalar maxVal = btScalar(-BT_LARGE_FLOAT); 1132 if (m_floats[0] > maxVal) 1133 { 1134 maxIndex = 0; 1135 maxVal = m_floats[0]; 1136 } 1137 if (m_floats[1] > maxVal) 1138 { 1139 maxIndex = 1; 1140 maxVal = m_floats[1]; 1141 } 1142 if (m_floats[2] > maxVal) 1143 { 1144 maxIndex = 2; 1145 maxVal =m_floats[2]; 1146 } 1147 if (m_floats[3] > maxVal) 1148 { 1149 maxIndex = 3; 1150 maxVal = m_floats[3]; 1151 } 1152 1153 return maxIndex; 1154 } 1155 1156 1157 SIMD_FORCE_INLINE int minAxis4() const 1158 { 1159 int minIndex = -1; 1160 btScalar minVal = btScalar(BT_LARGE_FLOAT); 1161 if (m_floats[0] < minVal) 1162 { 1163 minIndex = 0; 1164 minVal = m_floats[0]; 1165 } 1166 if (m_floats[1] < minVal) 1167 { 1168 minIndex = 1; 1169 minVal = m_floats[1]; 1170 } 1171 if (m_floats[2] < minVal) 1172 { 1173 minIndex = 2; 1174 minVal =m_floats[2]; 1175 } 1176 if (m_floats[3] < minVal) 1177 { 1178 minIndex = 3; 1179 minVal = m_floats[3]; 1180 } 1181 1182 return minIndex; 1183 } 1184 1185 1186 SIMD_FORCE_INLINE int closestAxis4() const 1187 { 1188 return absolute4().maxAxis4(); 1189 } 1190 1191 1192 1193 1194 /**@brief Set x,y,z and zero w 1195 * @param x Value of x 1196 * @param y Value of y 1197 * @param z Value of z 1198 */ 1199 1200 1201 /* void getValue(btScalar *m) const 1202 { 1203 m[0] = m_floats[0]; 1204 m[1] = m_floats[1]; 1205 m[2] =m_floats[2]; 1206 } 1207 */ 1208 /**@brief Set the values 1209 * @param x Value of x 1210 * @param y Value of y 1211 * @param z Value of z 1212 * @param w Value of w 1213 */ 1214 SIMD_FORCE_INLINE void setValue(const btScalar& _x, const btScalar& _y, const btScalar& _z,const btScalar& _w) 1215 { 1216 m_floats[0]=_x; 1217 m_floats[1]=_y; 1218 m_floats[2]=_z; 1219 m_floats[3]=_w; 1220 } 1221 1222 1223 }; 1224 1225 1226 ///btSwapVector3Endian swaps vector endianness, useful for network and cross-platform serialization 1227 SIMD_FORCE_INLINE void btSwapScalarEndian(const btScalar& sourceVal, btScalar& destVal) 1228 { 1229 #ifdef BT_USE_DOUBLE_PRECISION 1230 unsigned char* dest = (unsigned char*) &destVal; 1231 unsigned char* src = (unsigned char*) &sourceVal; 1232 dest[0] = src[7]; 1233 dest[1] = src[6]; 1234 dest[2] = src[5]; 1235 dest[3] = src[4]; 1236 dest[4] = src[3]; 1237 dest[5] = src[2]; 1238 dest[6] = src[1]; 1239 dest[7] = src[0]; 1240 #else 1241 unsigned char* dest = (unsigned char*) &destVal; 1242 unsigned char* src = (unsigned char*) &sourceVal; 1243 dest[0] = src[3]; 1244 dest[1] = src[2]; 1245 dest[2] = src[1]; 1246 dest[3] = src[0]; 1247 #endif //BT_USE_DOUBLE_PRECISION 1248 } 1249 ///btSwapVector3Endian swaps vector endianness, useful for network and cross-platform serialization 1250 SIMD_FORCE_INLINE void btSwapVector3Endian(const btVector3& sourceVec, btVector3& destVec) 1251 { 1252 for (int i=0;i<4;i++) 1253 { 1254 btSwapScalarEndian(sourceVec[i],destVec[i]); 1255 } 1256 1257 } 1258 1259 ///btUnSwapVector3Endian swaps vector endianness, useful for network and cross-platform serialization 1260 SIMD_FORCE_INLINE void btUnSwapVector3Endian(btVector3& vector) 1261 { 1262 1263 btVector3 swappedVec; 1264 for (int i=0;i<4;i++) 1265 { 1266 btSwapScalarEndian(vector[i],swappedVec[i]); 1267 } 1268 vector = swappedVec; 1269 } 1270 1271 template <class T> 1272 SIMD_FORCE_INLINE void btPlaneSpace1 (const T& n, T& p, T& q) 1273 { 1274 if (btFabs(n[2]) > SIMDSQRT12) { 1275 // choose p in y-z plane 1276 btScalar a = n[1]*n[1] + n[2]*n[2]; 1277 btScalar k = btRecipSqrt (a); 1278 p[0] = 0; 1279 p[1] = -n[2]*k; 1280 p[2] = n[1]*k; 1281 // set q = n x p 1282 q[0] = a*k; 1283 q[1] = -n[0]*p[2]; 1284 q[2] = n[0]*p[1]; 1285 } 1286 else { 1287 // choose p in x-y plane 1288 btScalar a = n[0]*n[0] + n[1]*n[1]; 1289 btScalar k = btRecipSqrt (a); 1290 p[0] = -n[1]*k; 1291 p[1] = n[0]*k; 1292 p[2] = 0; 1293 // set q = n x p 1294 q[0] = -n[2]*p[1]; 1295 q[1] = n[2]*p[0]; 1296 q[2] = a*k; 1297 } 1298 } 1299 1300 1301 struct btVector3FloatData 1302 { 1303 float m_floats[4]; 1304 }; 1305 1306 struct btVector3DoubleData 1307 { 1308 double m_floats[4]; 1309 1310 }; 1311 1312 SIMD_FORCE_INLINE void btVector3::serializeFloat(struct btVector3FloatData& dataOut) const 1313 { 1314 ///could also do a memcpy, check if it is worth it 1315 for (int i=0;i<4;i++) 1316 dataOut.m_floats[i] = float(m_floats[i]); 1317 } 1318 1319 SIMD_FORCE_INLINE void btVector3::deSerializeFloat(const struct btVector3FloatData& dataIn) 1320 { 1321 for (int i=0;i<4;i++) 1322 m_floats[i] = btScalar(dataIn.m_floats[i]); 1323 } 1324 1325 1326 SIMD_FORCE_INLINE void btVector3::serializeDouble(struct btVector3DoubleData& dataOut) const 1327 { 1328 ///could also do a memcpy, check if it is worth it 1329 for (int i=0;i<4;i++) 1330 dataOut.m_floats[i] = double(m_floats[i]); 1331 } 1332 1333 SIMD_FORCE_INLINE void btVector3::deSerializeDouble(const struct btVector3DoubleData& dataIn) 1334 { 1335 for (int i=0;i<4;i++) 1336 m_floats[i] = btScalar(dataIn.m_floats[i]); 1337 } 1338 1339 1340 SIMD_FORCE_INLINE void btVector3::serialize(struct btVector3Data& dataOut) const 1341 { 1342 ///could also do a memcpy, check if it is worth it 1343 for (int i=0;i<4;i++) 1344 dataOut.m_floats[i] = m_floats[i]; 1345 } 1346 1347 SIMD_FORCE_INLINE void btVector3::deSerialize(const struct btVector3Data& dataIn) 1348 { 1349 for (int i=0;i<4;i++) 1350 m_floats[i] = dataIn.m_floats[i]; 1351 } 1352 1353 #endif //BT_VECTOR3_H 1354