Home | History | Annotate | Download | only in Core
      1 // This file is part of Eigen, a lightweight C++ template library
      2 // for linear algebra.
      3 //
      4 // Copyright (C) 2008 Gael Guennebaud <gael.guennebaud (at) inria.fr>
      5 // Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1 (at) gmail.com>
      6 //
      7 // This Source Code Form is subject to the terms of the Mozilla
      8 // Public License v. 2.0. If a copy of the MPL was not distributed
      9 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
     10 
     11 #ifndef EIGEN_GENERIC_PACKET_MATH_H
     12 #define EIGEN_GENERIC_PACKET_MATH_H
     13 
     14 namespace Eigen {
     15 
     16 namespace internal {
     17 
     18 /** \internal
     19   * \file GenericPacketMath.h
     20   *
     21   * Default implementation for types not supported by the vectorization.
     22   * In practice these functions are provided to make easier the writing
     23   * of generic vectorized code.
     24   */
     25 
     26 #ifndef EIGEN_DEBUG_ALIGNED_LOAD
     27 #define EIGEN_DEBUG_ALIGNED_LOAD
     28 #endif
     29 
     30 #ifndef EIGEN_DEBUG_UNALIGNED_LOAD
     31 #define EIGEN_DEBUG_UNALIGNED_LOAD
     32 #endif
     33 
     34 #ifndef EIGEN_DEBUG_ALIGNED_STORE
     35 #define EIGEN_DEBUG_ALIGNED_STORE
     36 #endif
     37 
     38 #ifndef EIGEN_DEBUG_UNALIGNED_STORE
     39 #define EIGEN_DEBUG_UNALIGNED_STORE
     40 #endif
     41 
     42 struct default_packet_traits
     43 {
     44   enum {
     45     HasAdd    = 1,
     46     HasSub    = 1,
     47     HasMul    = 1,
     48     HasNegate = 1,
     49     HasAbs    = 1,
     50     HasAbs2   = 1,
     51     HasMin    = 1,
     52     HasMax    = 1,
     53     HasConj   = 1,
     54     HasSetLinear = 1,
     55 
     56     HasDiv    = 0,
     57     HasSqrt   = 0,
     58     HasExp    = 0,
     59     HasLog    = 0,
     60     HasPow    = 0,
     61 
     62     HasSin    = 0,
     63     HasCos    = 0,
     64     HasTan    = 0,
     65     HasASin   = 0,
     66     HasACos   = 0,
     67     HasATan   = 0
     68   };
     69 };
     70 
     71 template<typename T> struct packet_traits : default_packet_traits
     72 {
     73   typedef T type;
     74   enum {
     75     Vectorizable = 0,
     76     size = 1,
     77     AlignedOnScalar = 0
     78   };
     79   enum {
     80     HasAdd    = 0,
     81     HasSub    = 0,
     82     HasMul    = 0,
     83     HasNegate = 0,
     84     HasAbs    = 0,
     85     HasAbs2   = 0,
     86     HasMin    = 0,
     87     HasMax    = 0,
     88     HasConj   = 0,
     89     HasSetLinear = 0
     90   };
     91 };
     92 
     93 /** \internal \returns a + b (coeff-wise) */
     94 template<typename Packet> inline Packet
     95 padd(const Packet& a,
     96         const Packet& b) { return a+b; }
     97 
     98 /** \internal \returns a - b (coeff-wise) */
     99 template<typename Packet> inline Packet
    100 psub(const Packet& a,
    101         const Packet& b) { return a-b; }
    102 
    103 /** \internal \returns -a (coeff-wise) */
    104 template<typename Packet> inline Packet
    105 pnegate(const Packet& a) { return -a; }
    106 
    107 /** \internal \returns conj(a) (coeff-wise) */
    108 template<typename Packet> inline Packet
    109 pconj(const Packet& a) { return conj(a); }
    110 
    111 /** \internal \returns a * b (coeff-wise) */
    112 template<typename Packet> inline Packet
    113 pmul(const Packet& a,
    114         const Packet& b) { return a*b; }
    115 
    116 /** \internal \returns a / b (coeff-wise) */
    117 template<typename Packet> inline Packet
    118 pdiv(const Packet& a,
    119         const Packet& b) { return a/b; }
    120 
    121 /** \internal \returns the min of \a a and \a b  (coeff-wise) */
    122 template<typename Packet> inline Packet
    123 pmin(const Packet& a,
    124         const Packet& b) { using std::min; return (min)(a, b); }
    125 
    126 /** \internal \returns the max of \a a and \a b  (coeff-wise) */
    127 template<typename Packet> inline Packet
    128 pmax(const Packet& a,
    129         const Packet& b) { using std::max; return (max)(a, b); }
    130 
    131 /** \internal \returns the absolute value of \a a */
    132 template<typename Packet> inline Packet
    133 pabs(const Packet& a) { return abs(a); }
    134 
    135 /** \internal \returns the bitwise and of \a a and \a b */
    136 template<typename Packet> inline Packet
    137 pand(const Packet& a, const Packet& b) { return a & b; }
    138 
    139 /** \internal \returns the bitwise or of \a a and \a b */
    140 template<typename Packet> inline Packet
    141 por(const Packet& a, const Packet& b) { return a | b; }
    142 
    143 /** \internal \returns the bitwise xor of \a a and \a b */
    144 template<typename Packet> inline Packet
    145 pxor(const Packet& a, const Packet& b) { return a ^ b; }
    146 
    147 /** \internal \returns the bitwise andnot of \a a and \a b */
    148 template<typename Packet> inline Packet
    149 pandnot(const Packet& a, const Packet& b) { return a & (!b); }
    150 
    151 /** \internal \returns a packet version of \a *from, from must be 16 bytes aligned */
    152 template<typename Packet> inline Packet
    153 pload(const typename unpacket_traits<Packet>::type* from) { return *from; }
    154 
    155 /** \internal \returns a packet version of \a *from, (un-aligned load) */
    156 template<typename Packet> inline Packet
    157 ploadu(const typename unpacket_traits<Packet>::type* from) { return *from; }
    158 
    159 /** \internal \returns a packet with elements of \a *from duplicated, e.g.: (from[0],from[0],from[1],from[1]) */
    160 template<typename Packet> inline Packet
    161 ploaddup(const typename unpacket_traits<Packet>::type* from) { return *from; }
    162 
    163 /** \internal \returns a packet with constant coefficients \a a, e.g.: (a,a,a,a) */
    164 template<typename Packet> inline Packet
    165 pset1(const typename unpacket_traits<Packet>::type& a) { return a; }
    166 
    167 /** \internal \brief Returns a packet with coefficients (a,a+1,...,a+packet_size-1). */
    168 template<typename Scalar> inline typename packet_traits<Scalar>::type
    169 plset(const Scalar& a) { return a; }
    170 
    171 /** \internal copy the packet \a from to \a *to, \a to must be 16 bytes aligned */
    172 template<typename Scalar, typename Packet> inline void pstore(Scalar* to, const Packet& from)
    173 { (*to) = from; }
    174 
    175 /** \internal copy the packet \a from to \a *to, (un-aligned store) */
    176 template<typename Scalar, typename Packet> inline void pstoreu(Scalar* to, const Packet& from)
    177 { (*to) = from; }
    178 
    179 /** \internal tries to do cache prefetching of \a addr */
    180 template<typename Scalar> inline void prefetch(const Scalar* addr)
    181 {
    182 #if !defined(_MSC_VER)
    183 __builtin_prefetch(addr);
    184 #endif
    185 }
    186 
    187 /** \internal \returns the first element of a packet */
    188 template<typename Packet> inline typename unpacket_traits<Packet>::type pfirst(const Packet& a)
    189 { return a; }
    190 
    191 /** \internal \returns a packet where the element i contains the sum of the packet of \a vec[i] */
    192 template<typename Packet> inline Packet
    193 preduxp(const Packet* vecs) { return vecs[0]; }
    194 
    195 /** \internal \returns the sum of the elements of \a a*/
    196 template<typename Packet> inline typename unpacket_traits<Packet>::type predux(const Packet& a)
    197 { return a; }
    198 
    199 /** \internal \returns the product of the elements of \a a*/
    200 template<typename Packet> inline typename unpacket_traits<Packet>::type predux_mul(const Packet& a)
    201 { return a; }
    202 
    203 /** \internal \returns the min of the elements of \a a*/
    204 template<typename Packet> inline typename unpacket_traits<Packet>::type predux_min(const Packet& a)
    205 { return a; }
    206 
    207 /** \internal \returns the max of the elements of \a a*/
    208 template<typename Packet> inline typename unpacket_traits<Packet>::type predux_max(const Packet& a)
    209 { return a; }
    210 
    211 /** \internal \returns the reversed elements of \a a*/
    212 template<typename Packet> inline Packet preverse(const Packet& a)
    213 { return a; }
    214 
    215 
    216 /** \internal \returns \a a with real and imaginary part flipped (for complex type only) */
    217 template<typename Packet> inline Packet pcplxflip(const Packet& a)
    218 { return Packet(imag(a),real(a)); }
    219 
    220 /**************************
    221 * Special math functions
    222 ***************************/
    223 
    224 /** \internal \returns the sine of \a a (coeff-wise) */
    225 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
    226 Packet psin(const Packet& a) { return sin(a); }
    227 
    228 /** \internal \returns the cosine of \a a (coeff-wise) */
    229 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
    230 Packet pcos(const Packet& a) { return cos(a); }
    231 
    232 /** \internal \returns the tan of \a a (coeff-wise) */
    233 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
    234 Packet ptan(const Packet& a) { return tan(a); }
    235 
    236 /** \internal \returns the arc sine of \a a (coeff-wise) */
    237 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
    238 Packet pasin(const Packet& a) { return asin(a); }
    239 
    240 /** \internal \returns the arc cosine of \a a (coeff-wise) */
    241 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
    242 Packet pacos(const Packet& a) { return acos(a); }
    243 
    244 /** \internal \returns the exp of \a a (coeff-wise) */
    245 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
    246 Packet pexp(const Packet& a) { return exp(a); }
    247 
    248 /** \internal \returns the log of \a a (coeff-wise) */
    249 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
    250 Packet plog(const Packet& a) { return log(a); }
    251 
    252 /** \internal \returns the square-root of \a a (coeff-wise) */
    253 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
    254 Packet psqrt(const Packet& a) { return sqrt(a); }
    255 
    256 /***************************************************************************
    257 * The following functions might not have to be overwritten for vectorized types
    258 ***************************************************************************/
    259 
    260 /** \internal copy a packet with constant coeficient \a a (e.g., [a,a,a,a]) to \a *to. \a to must be 16 bytes aligned */
    261 // NOTE: this function must really be templated on the packet type (think about different packet types for the same scalar type)
    262 template<typename Packet>
    263 inline void pstore1(typename unpacket_traits<Packet>::type* to, const typename unpacket_traits<Packet>::type& a)
    264 {
    265   pstore(to, pset1<Packet>(a));
    266 }
    267 
    268 /** \internal \returns a * b + c (coeff-wise) */
    269 template<typename Packet> inline Packet
    270 pmadd(const Packet&  a,
    271          const Packet&  b,
    272          const Packet&  c)
    273 { return padd(pmul(a, b),c); }
    274 
    275 /** \internal \returns a packet version of \a *from.
    276   * If LoadMode equals #Aligned, \a from must be 16 bytes aligned */
    277 template<typename Packet, int LoadMode>
    278 inline Packet ploadt(const typename unpacket_traits<Packet>::type* from)
    279 {
    280   if(LoadMode == Aligned)
    281     return pload<Packet>(from);
    282   else
    283     return ploadu<Packet>(from);
    284 }
    285 
    286 /** \internal copy the packet \a from to \a *to.
    287   * If StoreMode equals #Aligned, \a to must be 16 bytes aligned */
    288 template<typename Scalar, typename Packet, int LoadMode>
    289 inline void pstoret(Scalar* to, const Packet& from)
    290 {
    291   if(LoadMode == Aligned)
    292     pstore(to, from);
    293   else
    294     pstoreu(to, from);
    295 }
    296 
    297 /** \internal default implementation of palign() allowing partial specialization */
    298 template<int Offset,typename PacketType>
    299 struct palign_impl
    300 {
    301   // by default data are aligned, so there is nothing to be done :)
    302   static inline void run(PacketType&, const PacketType&) {}
    303 };
    304 
    305 /** \internal update \a first using the concatenation of the \a Offset last elements
    306   * of \a first and packet_size minus \a Offset first elements of \a second */
    307 template<int Offset,typename PacketType>
    308 inline void palign(PacketType& first, const PacketType& second)
    309 {
    310   palign_impl<Offset,PacketType>::run(first,second);
    311 }
    312 
    313 /***************************************************************************
    314 * Fast complex products (GCC generates a function call which is very slow)
    315 ***************************************************************************/
    316 
    317 template<> inline std::complex<float> pmul(const std::complex<float>& a, const std::complex<float>& b)
    318 { return std::complex<float>(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); }
    319 
    320 template<> inline std::complex<double> pmul(const std::complex<double>& a, const std::complex<double>& b)
    321 { return std::complex<double>(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); }
    322 
    323 } // end namespace internal
    324 
    325 } // end namespace Eigen
    326 
    327 #endif // EIGEN_GENERIC_PACKET_MATH_H
    328 
    329