Home | History | Annotate | Download | only in detail
      1 ///////////////////////////////////////////////////////////////////////////////////
      2 /// OpenGL Mathematics (glm.g-truc.net)
      3 ///
      4 /// Copyright (c) 2005 - 2014 G-Truc Creation (www.g-truc.net)
      5 /// Permission is hereby granted, free of charge, to any person obtaining a copy
      6 /// of this software and associated documentation files (the "Software"), to deal
      7 /// in the Software without restriction, including without limitation the rights
      8 /// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
      9 /// copies of the Software, and to permit persons to whom the Software is
     10 /// furnished to do so, subject to the following conditions:
     11 /// 
     12 /// The above copyright notice and this permission notice shall be included in
     13 /// all copies or substantial portions of the Software.
     14 /// 
     15 /// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16 /// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17 /// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
     18 /// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19 /// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     20 /// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
     21 /// THE SOFTWARE.
     22 ///
     23 /// @ref core
     24 /// @file glm/core/intrinsic_geometric.inl
     25 /// @date 2009-05-08 / 2011-06-15
     26 /// @author Christophe Riccio
     27 ///////////////////////////////////////////////////////////////////////////////////
     28 
     29 namespace glm{
     30 namespace detail{
     31 
     32 //length
     33 GLM_FUNC_QUALIFIER __m128 sse_len_ps(__m128 x)
     34 {
     35     __m128 dot0 = sse_dot_ps(x, x);
     36 	__m128 sqt0 = _mm_sqrt_ps(dot0);
     37     return sqt0;
     38 }
     39 
     40 //distance
     41 GLM_FUNC_QUALIFIER __m128 sse_dst_ps(__m128 p0, __m128 p1)
     42 {
     43 	__m128 sub0 = _mm_sub_ps(p0, p1);
     44     __m128 len0 = sse_len_ps(sub0);
     45     return len0;
     46 }
     47 
     48 //dot
     49 GLM_FUNC_QUALIFIER __m128 sse_dot_ps(__m128 v1, __m128 v2)
     50 {
     51 #   if((GLM_ARCH & GLM_ARCH_AVX) == GLM_ARCH_AVX)
     52         return _mm_dp_ps(v1, v2, 0xff);
     53 #   else
     54         __m128 mul0 = _mm_mul_ps(v1, v2);
     55         __m128 swp0 = _mm_shuffle_ps(mul0, mul0, _MM_SHUFFLE(2, 3, 0, 1));
     56         __m128 add0 = _mm_add_ps(mul0, swp0);
     57         __m128 swp1 = _mm_shuffle_ps(add0, add0, _MM_SHUFFLE(0, 1, 2, 3));
     58         __m128 add1 = _mm_add_ps(add0, swp1);
     59         return add1;
     60 #   endif
     61 }
     62 
     63 // SSE1
     64 GLM_FUNC_QUALIFIER __m128 sse_dot_ss(__m128 v1, __m128 v2)
     65 {
     66 	__m128 mul0 = _mm_mul_ps(v1, v2);
     67 	__m128 mov0 = _mm_movehl_ps(mul0, mul0);
     68 	__m128 add0 = _mm_add_ps(mov0, mul0);
     69 	__m128 swp1 = _mm_shuffle_ps(add0, add0, 1);
     70 	__m128 add1 = _mm_add_ss(add0, swp1);
     71 	return add1;
     72 }
     73 
     74 //cross
     75 GLM_FUNC_QUALIFIER __m128 sse_xpd_ps(__m128 v1, __m128 v2)
     76 {
     77 	__m128 swp0 = _mm_shuffle_ps(v1, v1, _MM_SHUFFLE(3, 0, 2, 1));
     78 	__m128 swp1 = _mm_shuffle_ps(v1, v1, _MM_SHUFFLE(3, 1, 0, 2));
     79 	__m128 swp2 = _mm_shuffle_ps(v2, v2, _MM_SHUFFLE(3, 0, 2, 1));
     80 	__m128 swp3 = _mm_shuffle_ps(v2, v2, _MM_SHUFFLE(3, 1, 0, 2));
     81 	__m128 mul0 = _mm_mul_ps(swp0, swp3);
     82 	__m128 mul1 = _mm_mul_ps(swp1, swp2);
     83 	__m128 sub0 = _mm_sub_ps(mul0, mul1);
     84 	return sub0;
     85 }
     86 
     87 //normalize
     88 GLM_FUNC_QUALIFIER __m128 sse_nrm_ps(__m128 v)
     89 {
     90 	__m128 dot0 = sse_dot_ps(v, v);
     91 	__m128 isr0 = _mm_rsqrt_ps(dot0);
     92 	__m128 mul0 = _mm_mul_ps(v, isr0);
     93 	return mul0;
     94 }
     95 
     96 //faceforward
     97 GLM_FUNC_QUALIFIER __m128 sse_ffd_ps(__m128 N, __m128 I, __m128 Nref)
     98 {
     99 	//__m128 dot0 = _mm_dot_ps(v, v);
    100 	//__m128 neg0 = _mm_neg_ps(N);
    101 	//__m128 sgn0 = _mm_sgn_ps(dot0);
    102 	//__m128 mix0 = _mm_mix_ps(N, neg0, sgn0);
    103 	//return mix0;
    104 
    105 	__m128 dot0 = sse_dot_ps(Nref, I);
    106 	__m128 sgn0 = sse_sgn_ps(dot0);
    107 	__m128 mul0 = _mm_mul_ps(sgn0, glm::detail::minus_one);
    108 	__m128 mul1 = _mm_mul_ps(N, mul0);
    109 	return mul1;
    110 }
    111 
    112 //reflect
    113 GLM_FUNC_QUALIFIER __m128 sse_rfe_ps(__m128 I, __m128 N)
    114 {
    115 	__m128 dot0 = sse_dot_ps(N, I);
    116 	__m128 mul0 = _mm_mul_ps(N, dot0);
    117 	__m128 mul1 = _mm_mul_ps(mul0, glm::detail::two);
    118 	__m128 sub0 = _mm_sub_ps(I, mul1);
    119 	return sub0;
    120 }
    121 
    122 //refract
    123 GLM_FUNC_QUALIFIER __m128 sse_rfa_ps(__m128 I, __m128 N, __m128 eta)
    124 {
    125 	__m128 dot0 = sse_dot_ps(N, I);
    126 	__m128 mul0 = _mm_mul_ps(eta, eta);
    127 	__m128 mul1 = _mm_mul_ps(dot0, dot0);
    128 	__m128 sub0 = _mm_sub_ps(glm::detail::one, mul0);
    129 	__m128 sub1 = _mm_sub_ps(glm::detail::one, mul1);
    130 	__m128 mul2 = _mm_mul_ps(sub0, sub1);
    131 	
    132 	if(_mm_movemask_ps(_mm_cmplt_ss(mul2, glm::detail::zero)) == 0)
    133 		return glm::detail::zero;
    134 
    135 	__m128 sqt0 = _mm_sqrt_ps(mul2);
    136 	__m128 mul3 = _mm_mul_ps(eta, dot0);
    137 	__m128 add0 = _mm_add_ps(mul3, sqt0);
    138 	__m128 mul4 = _mm_mul_ps(add0, N);
    139 	__m128 mul5 = _mm_mul_ps(eta, I);
    140 	__m128 sub2 = _mm_sub_ps(mul5, mul4);
    141 
    142 	return sub2;
    143 }
    144 
    145 }//namespace detail
    146 }//namespace glm
    147