Home | History | Annotate | Download | only in Core
      1 /*
      2  Copyright (c) 2011, Intel Corporation. All rights reserved.
      3 
      4  Redistribution and use in source and binary forms, with or without modification,
      5  are permitted provided that the following conditions are met:
      6 
      7  * Redistributions of source code must retain the above copyright notice, this
      8    list of conditions and the following disclaimer.
      9  * Redistributions in binary form must reproduce the above copyright notice,
     10    this list of conditions and the following disclaimer in the documentation
     11    and/or other materials provided with the distribution.
     12  * Neither the name of Intel Corporation nor the names of its contributors may
     13    be used to endorse or promote products derived from this software without
     14    specific prior written permission.
     15 
     16  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
     17  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     18  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     19  DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
     20  ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
     21  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     22  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
     23  ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     24  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     25  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     26 
     27  ********************************************************************************
     28  *   Content : Eigen bindings to Intel(R) MKL
     29  *   MKL VML support for coefficient-wise unary Eigen expressions like a=b.sin()
     30  ********************************************************************************
     31 */
     32 
     33 #ifndef EIGEN_ASSIGN_VML_H
     34 #define EIGEN_ASSIGN_VML_H
     35 
     36 namespace Eigen {
     37 
     38 namespace internal {
     39 
     40 template<typename Op> struct vml_call
     41 { enum { IsSupported = 0 }; };
     42 
     43 template<typename Dst, typename Src, typename UnaryOp>
     44 class vml_assign_traits
     45 {
     46   private:
     47     enum {
     48       DstHasDirectAccess = Dst::Flags & DirectAccessBit,
     49       SrcHasDirectAccess = Src::Flags & DirectAccessBit,
     50 
     51       StorageOrdersAgree = (int(Dst::IsRowMajor) == int(Src::IsRowMajor)),
     52       InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime)
     53                 : int(Dst::Flags)&RowMajorBit ? int(Dst::ColsAtCompileTime)
     54                 : int(Dst::RowsAtCompileTime),
     55       InnerMaxSize  = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime)
     56                     : int(Dst::Flags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime)
     57                     : int(Dst::MaxRowsAtCompileTime),
     58       MaxSizeAtCompileTime = Dst::SizeAtCompileTime,
     59 
     60       MightEnableVml =  vml_call<UnaryOp>::IsSupported && StorageOrdersAgree && DstHasDirectAccess && SrcHasDirectAccess
     61                      && Src::InnerStrideAtCompileTime==1 && Dst::InnerStrideAtCompileTime==1,
     62       MightLinearize = MightEnableVml && (int(Dst::Flags) & int(Src::Flags) & LinearAccessBit),
     63       VmlSize = MightLinearize ? MaxSizeAtCompileTime : InnerMaxSize,
     64       LargeEnough = VmlSize==Dynamic || VmlSize>=EIGEN_MKL_VML_THRESHOLD,
     65       MayEnableVml = MightEnableVml && LargeEnough,
     66       MayLinearize = MayEnableVml && MightLinearize
     67     };
     68   public:
     69     enum {
     70       Traversal = MayLinearize ? LinearVectorizedTraversal
     71                 : MayEnableVml ? InnerVectorizedTraversal
     72                 : DefaultTraversal
     73     };
     74 };
     75 
     76 template<typename Derived1, typename Derived2, typename UnaryOp, int Traversal, int Unrolling,
     77          int VmlTraversal = vml_assign_traits<Derived1, Derived2, UnaryOp>::Traversal >
     78 struct vml_assign_impl
     79   : assign_impl<Derived1, Eigen::CwiseUnaryOp<UnaryOp, Derived2>,Traversal,Unrolling,BuiltIn>
     80 {
     81 };
     82 
     83 template<typename Derived1, typename Derived2, typename UnaryOp, int Traversal, int Unrolling>
     84 struct vml_assign_impl<Derived1, Derived2, UnaryOp, Traversal, Unrolling, InnerVectorizedTraversal>
     85 {
     86   typedef typename Derived1::Scalar Scalar;
     87   typedef typename Derived1::Index Index;
     88   static inline void run(Derived1& dst, const CwiseUnaryOp<UnaryOp, Derived2>& src)
     89   {
     90     // in case we want to (or have to) skip VML at runtime we can call:
     91     // assign_impl<Derived1,Eigen::CwiseUnaryOp<UnaryOp, Derived2>,Traversal,Unrolling,BuiltIn>::run(dst,src);
     92     const Index innerSize = dst.innerSize();
     93     const Index outerSize = dst.outerSize();
     94     for(Index outer = 0; outer < outerSize; ++outer) {
     95       const Scalar *src_ptr = src.IsRowMajor ?  &(src.nestedExpression().coeffRef(outer,0)) :
     96                                                 &(src.nestedExpression().coeffRef(0, outer));
     97       Scalar *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer));
     98       vml_call<UnaryOp>::run(src.functor(), innerSize, src_ptr, dst_ptr );
     99     }
    100   }
    101 };
    102 
    103 template<typename Derived1, typename Derived2, typename UnaryOp, int Traversal, int Unrolling>
    104 struct vml_assign_impl<Derived1, Derived2, UnaryOp, Traversal, Unrolling, LinearVectorizedTraversal>
    105 {
    106   static inline void run(Derived1& dst, const CwiseUnaryOp<UnaryOp, Derived2>& src)
    107   {
    108     // in case we want to (or have to) skip VML at runtime we can call:
    109     // assign_impl<Derived1,Eigen::CwiseUnaryOp<UnaryOp, Derived2>,Traversal,Unrolling,BuiltIn>::run(dst,src);
    110     vml_call<UnaryOp>::run(src.functor(), dst.size(), src.nestedExpression().data(), dst.data() );
    111   }
    112 };
    113 
    114 // Macroses
    115 
    116 #define EIGEN_MKL_VML_SPECIALIZE_ASSIGN(TRAVERSAL,UNROLLING) \
    117   template<typename Derived1, typename Derived2, typename UnaryOp> \
    118   struct assign_impl<Derived1, Eigen::CwiseUnaryOp<UnaryOp, Derived2>, TRAVERSAL, UNROLLING, Specialized>  {  \
    119     static inline void run(Derived1 &dst, const Eigen::CwiseUnaryOp<UnaryOp, Derived2> &src) { \
    120       vml_assign_impl<Derived1,Derived2,UnaryOp,TRAVERSAL,UNROLLING>::run(dst, src); \
    121     } \
    122   };
    123 
    124 EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,NoUnrolling)
    125 EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,CompleteUnrolling)
    126 EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,InnerUnrolling)
    127 EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearTraversal,NoUnrolling)
    128 EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearTraversal,CompleteUnrolling)
    129 EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,NoUnrolling)
    130 EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,CompleteUnrolling)
    131 EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,InnerUnrolling)
    132 EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearVectorizedTraversal,CompleteUnrolling)
    133 EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearVectorizedTraversal,NoUnrolling)
    134 EIGEN_MKL_VML_SPECIALIZE_ASSIGN(SliceVectorizedTraversal,NoUnrolling)
    135 
    136 
    137 #if !defined (EIGEN_FAST_MATH) || (EIGEN_FAST_MATH != 1)
    138 #define  EIGEN_MKL_VML_MODE VML_HA
    139 #else
    140 #define  EIGEN_MKL_VML_MODE VML_LA
    141 #endif
    142 
    143 #define EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE)     \
    144   template<> struct vml_call< scalar_##EIGENOP##_op<EIGENTYPE> > {               \
    145     enum { IsSupported = 1 };                                                    \
    146     static inline void run( const scalar_##EIGENOP##_op<EIGENTYPE>& /*func*/,        \
    147                             int size, const EIGENTYPE* src, EIGENTYPE* dst) {    \
    148       VMLOP(size, (const VMLTYPE*)src, (VMLTYPE*)dst);                           \
    149     }                                                                            \
    150   };
    151 
    152 #define EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE)  \
    153   template<> struct vml_call< scalar_##EIGENOP##_op<EIGENTYPE> > {               \
    154     enum { IsSupported = 1 };                                                    \
    155     static inline void run( const scalar_##EIGENOP##_op<EIGENTYPE>& /*func*/,        \
    156                             int size, const EIGENTYPE* src, EIGENTYPE* dst) {    \
    157       MKL_INT64 vmlMode = EIGEN_MKL_VML_MODE;                                    \
    158       VMLOP(size, (const VMLTYPE*)src, (VMLTYPE*)dst, vmlMode);                  \
    159     }                                                                            \
    160   };
    161 
    162 #define EIGEN_MKL_VML_DECLARE_POW_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE)       \
    163   template<> struct vml_call< scalar_##EIGENOP##_op<EIGENTYPE> > {               \
    164     enum { IsSupported = 1 };                                                    \
    165     static inline void run( const scalar_##EIGENOP##_op<EIGENTYPE>& func,        \
    166                           int size, const EIGENTYPE* src, EIGENTYPE* dst) {      \
    167       EIGENTYPE exponent = func.m_exponent;                                      \
    168       MKL_INT64 vmlMode = EIGEN_MKL_VML_MODE;                                    \
    169       VMLOP(&size, (const VMLTYPE*)src, (const VMLTYPE*)&exponent,               \
    170                         (VMLTYPE*)dst, &vmlMode);                                \
    171     }                                                                            \
    172   };
    173 
    174 #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP)                   \
    175   EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vs##VMLOP, float, float)             \
    176   EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vd##VMLOP, double, double)
    177 
    178 #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX(EIGENOP, VMLOP)                \
    179   EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vc##VMLOP, scomplex, MKL_Complex8)   \
    180   EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vz##VMLOP, dcomplex, MKL_Complex16)
    181 
    182 #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS(EIGENOP, VMLOP)                        \
    183   EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP)                         \
    184   EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX(EIGENOP, VMLOP)
    185 
    186 
    187 #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL_LA(EIGENOP, VMLOP)                \
    188   EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vms##VMLOP, float, float)         \
    189   EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmd##VMLOP, double, double)
    190 
    191 #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX_LA(EIGENOP, VMLOP)             \
    192   EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmc##VMLOP, scomplex, MKL_Complex8)  \
    193   EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmz##VMLOP, dcomplex, MKL_Complex16)
    194 
    195 #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(EIGENOP, VMLOP)                     \
    196   EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL_LA(EIGENOP, VMLOP)                      \
    197   EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX_LA(EIGENOP, VMLOP)
    198 
    199 
    200 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(sin,  Sin)
    201 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(asin, Asin)
    202 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(cos,  Cos)
    203 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(acos, Acos)
    204 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(tan,  Tan)
    205 //EIGEN_MKL_VML_DECLARE_UNARY_CALLS(abs,  Abs)
    206 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(exp,  Exp)
    207 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(log,  Ln)
    208 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(sqrt, Sqrt)
    209 
    210 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(square, Sqr)
    211 
    212 // The vm*powx functions are not avaibale in the windows version of MKL.
    213 #ifdef _WIN32
    214 EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmspowx_, float, float)
    215 EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmdpowx_, double, double)
    216 EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmcpowx_, scomplex, MKL_Complex8)
    217 EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmzpowx_, dcomplex, MKL_Complex16)
    218 #endif
    219 
    220 } // end namespace internal
    221 
    222 } // end namespace Eigen
    223 
    224 #endif // EIGEN_ASSIGN_VML_H
    225