Home | History | Annotate | Download | only in MoreVectorization
      1 // This file is part of Eigen, a lightweight C++ template library
      2 // for linear algebra.
      3 //
      4 // Copyright (C) 2009 Rohit Garg <rpg.314 (at) gmail.com>
      5 // Copyright (C) 2009 Benoit Jacob <jacob.benoit.1 (at) gmail.com>
      6 //
      7 // This Source Code Form is subject to the terms of the Mozilla
      8 // Public License v. 2.0. If a copy of the MPL was not distributed
      9 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
     10 
     11 #ifndef EIGEN_MOREVECTORIZATION_MATHFUNCTIONS_H
     12 #define EIGEN_MOREVECTORIZATION_MATHFUNCTIONS_H
     13 
     14 namespace Eigen {
     15 
     16 namespace internal {
     17 
     18 /** \internal \returns the arcsin of \a a (coeff-wise) */
     19 template<typename Packet> inline static Packet pasin(Packet a) { return std::asin(a); }
     20 
     21 #ifdef EIGEN_VECTORIZE_SSE
     22 
     23 template<> EIGEN_DONT_INLINE Packet4f pasin(Packet4f x)
     24 {
     25   _EIGEN_DECLARE_CONST_Packet4f(half, 0.5);
     26   _EIGEN_DECLARE_CONST_Packet4f(minus_half, -0.5);
     27   _EIGEN_DECLARE_CONST_Packet4f(3half, 1.5);
     28 
     29   _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(sign_mask, 0x80000000);
     30 
     31   _EIGEN_DECLARE_CONST_Packet4f(pi, 3.141592654);
     32   _EIGEN_DECLARE_CONST_Packet4f(pi_over_2, 3.141592654*0.5);
     33 
     34   _EIGEN_DECLARE_CONST_Packet4f(asin1, 4.2163199048E-2);
     35   _EIGEN_DECLARE_CONST_Packet4f(asin2, 2.4181311049E-2);
     36   _EIGEN_DECLARE_CONST_Packet4f(asin3, 4.5470025998E-2);
     37   _EIGEN_DECLARE_CONST_Packet4f(asin4, 7.4953002686E-2);
     38   _EIGEN_DECLARE_CONST_Packet4f(asin5, 1.6666752422E-1);
     39 
     40   Packet4f a = pabs(x);//got the absolute value
     41 
     42   Packet4f sign_bit= _mm_and_ps(x, p4f_sign_mask);//extracted the sign bit
     43 
     44   Packet4f z1,z2;//will need them during computation
     45 
     46 
     47 //will compute the two branches for asin
     48 //so first compare with half
     49 
     50   Packet4f branch_mask= _mm_cmpgt_ps(a, p4f_half);//this is to select which branch to take
     51 //both will be taken, and finally results will be merged
     52 //the branch for values >0.5
     53 
     54     {
     55 //the core series expansion
     56     z1=pmadd(p4f_minus_half,a,p4f_half);
     57     Packet4f x1=psqrt(z1);
     58     Packet4f s1=pmadd(p4f_asin1, z1, p4f_asin2);
     59     Packet4f s2=pmadd(s1, z1, p4f_asin3);
     60     Packet4f s3=pmadd(s2,z1, p4f_asin4);
     61     Packet4f s4=pmadd(s3,z1, p4f_asin5);
     62     Packet4f temp=pmul(s4,z1);//not really a madd but a mul by z so that the next term can be a madd
     63     z1=pmadd(temp,x1,x1);
     64     z1=padd(z1,z1);
     65     z1=psub(p4f_pi_over_2,z1);
     66     }
     67 
     68     {
     69 //the core series expansion
     70     Packet4f x2=a;
     71     z2=pmul(x2,x2);
     72     Packet4f s1=pmadd(p4f_asin1, z2, p4f_asin2);
     73     Packet4f s2=pmadd(s1, z2, p4f_asin3);
     74     Packet4f s3=pmadd(s2,z2, p4f_asin4);
     75     Packet4f s4=pmadd(s3,z2, p4f_asin5);
     76     Packet4f temp=pmul(s4,z2);//not really a madd but a mul by z so that the next term can be a madd
     77     z2=pmadd(temp,x2,x2);
     78     }
     79 
     80 /* select the correct result from the two branch evaluations */
     81   z1  = _mm_and_ps(branch_mask, z1);
     82   z2  = _mm_andnot_ps(branch_mask, z2);
     83   Packet4f z  = _mm_or_ps(z1,z2);
     84 
     85 /* update the sign */
     86   return _mm_xor_ps(z, sign_bit);
     87 }
     88 
     89 #endif // EIGEN_VECTORIZE_SSE
     90 
     91 } // end namespace internal
     92 
     93 } // end namespace Eigen
     94 
     95 #endif // EIGEN_MOREVECTORIZATION_MATHFUNCTIONS_H
     96