Home | History | Annotate | Download | only in jni
      1 #include <arm_neon.h>
      2 
      3 namespace math {
      4     namespace internal {
      5 #define _IOS_SHUFFLE_1032(vec) vrev64q_f32(vec)
      6 #define _IOS_SHUFFLE_2301(vec) vcombine_f32(vget_high_f32(vec), vget_low_f32(vec))
      7         inline float32x4_t dot4VecResult(const float32x4_t& vec1, const float32x4_t& vec2) {
      8             float32x4_t result = vmulq_f32(vec1, vec2);
      9             result = vaddq_f32(result, _IOS_SHUFFLE_1032(result));
     10             result = vaddq_f32(result, _IOS_SHUFFLE_2301(result));
     11             return result;
     12         }
     13 
     14         inline float32x4_t fastRSqrt(const float32x4_t& vec) {
     15             float32x4_t result;
     16             result = vrsqrteq_f32(vec);
     17             result = vmulq_f32(vrsqrtsq_f32(vmulq_f32(result, result), vec), result);
     18             return result;
     19         }
     20 
     21     }
     22     typedef float32x4_t Vector3;
     23 
     24     inline Vector3 normalize(const Vector3& v1) {
     25         float32x4_t dot;
     26         dot = vsetq_lane_f32(0.0f, v1, 3);
     27         dot = internal::dot4VecResult(dot, dot);
     28 
     29         if (vgetq_lane_f32(dot, 0) == 0.0f) {
     30             return v1;
     31         } else {
     32             Vector3 result;
     33             result = vmulq_f32(v1, internal::fastRSqrt(dot));
     34             return result;
     35         }
     36     }
     37 
     38     inline Vector3 cross(const Vector3& v1, const Vector3& v2) {
     39         float32x4x2_t v_1203 = vzipq_f32(vcombine_f32(vrev64_f32(vget_low_f32(v1)), vrev64_f32(vget_low_f32(v2))), vcombine_f32(vget_high_f32(v1), vget_high_f32(v2)));
     40         float32x4x2_t v_2013 = vzipq_f32(vcombine_f32(vrev64_f32(vget_low_f32(v_1203.val[0])), vrev64_f32(vget_low_f32(v_1203.val[1]))), vcombine_f32(vget_high_f32(v_1203.val[0]), vget_high_f32(v_1203.val[1])));
     41 
     42         Vector3 result;
     43         result = vmlsq_f32(vmulq_f32(v_1203.val[0], v_2013.val[1]), v_1203.val[1], v_2013.val[0]);
     44         return result;
     45     }
     46 }
     47 
     48 void _f_with_internal_compiler_error_in_reload_cse_simplify_operands(const math::Vector3& v1, const math::Vector3& v2) {
     49     math::normalize(math::cross(v1, v2));
     50 }
     51