Home | History | Annotate | Download | only in Core
      1 // This file is part of Eigen, a lightweight C++ template library
      2 // for linear algebra.
      3 //
      4 // Copyright (C) 2008 Gael Guennebaud <gael.guennebaud (at) inria.fr>
      5 // Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1 (at) gmail.com>
      6 //
      7 // This Source Code Form is subject to the terms of the Mozilla
      8 // Public License v. 2.0. If a copy of the MPL was not distributed
      9 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
     10 
     11 #ifndef EIGEN_GENERIC_PACKET_MATH_H
     12 #define EIGEN_GENERIC_PACKET_MATH_H
     13 
     14 namespace Eigen {
     15 
     16 namespace internal {
     17 
     18 /** \internal
     19   * \file GenericPacketMath.h
     20   *
     21   * Default implementation for types not supported by the vectorization.
     22   * In practice these functions are provided to make easier the writing
     23   * of generic vectorized code.
     24   */
     25 
     26 #ifndef EIGEN_DEBUG_ALIGNED_LOAD
     27 #define EIGEN_DEBUG_ALIGNED_LOAD
     28 #endif
     29 
     30 #ifndef EIGEN_DEBUG_UNALIGNED_LOAD
     31 #define EIGEN_DEBUG_UNALIGNED_LOAD
     32 #endif
     33 
     34 #ifndef EIGEN_DEBUG_ALIGNED_STORE
     35 #define EIGEN_DEBUG_ALIGNED_STORE
     36 #endif
     37 
     38 #ifndef EIGEN_DEBUG_UNALIGNED_STORE
     39 #define EIGEN_DEBUG_UNALIGNED_STORE
     40 #endif
     41 
     42 struct default_packet_traits
     43 {
     44   enum {
     45     HasAdd    = 1,
     46     HasSub    = 1,
     47     HasMul    = 1,
     48     HasNegate = 1,
     49     HasAbs    = 1,
     50     HasAbs2   = 1,
     51     HasMin    = 1,
     52     HasMax    = 1,
     53     HasConj   = 1,
     54     HasSetLinear = 1,
     55 
     56     HasDiv    = 0,
     57     HasSqrt   = 0,
     58     HasExp    = 0,
     59     HasLog    = 0,
     60     HasPow    = 0,
     61 
     62     HasSin    = 0,
     63     HasCos    = 0,
     64     HasTan    = 0,
     65     HasASin   = 0,
     66     HasACos   = 0,
     67     HasATan   = 0
     68   };
     69 };
     70 
     71 template<typename T> struct packet_traits : default_packet_traits
     72 {
     73   typedef T type;
     74   enum {
     75     Vectorizable = 0,
     76     size = 1,
     77     AlignedOnScalar = 0
     78   };
     79   enum {
     80     HasAdd    = 0,
     81     HasSub    = 0,
     82     HasMul    = 0,
     83     HasNegate = 0,
     84     HasAbs    = 0,
     85     HasAbs2   = 0,
     86     HasMin    = 0,
     87     HasMax    = 0,
     88     HasConj   = 0,
     89     HasSetLinear = 0
     90   };
     91 };
     92 
     93 /** \internal \returns a + b (coeff-wise) */
     94 template<typename Packet> inline Packet
     95 padd(const Packet& a,
     96         const Packet& b) { return a+b; }
     97 
     98 /** \internal \returns a - b (coeff-wise) */
     99 template<typename Packet> inline Packet
    100 psub(const Packet& a,
    101         const Packet& b) { return a-b; }
    102 
    103 /** \internal \returns -a (coeff-wise) */
    104 template<typename Packet> inline Packet
    105 pnegate(const Packet& a) { return -a; }
    106 
    107 /** \internal \returns conj(a) (coeff-wise) */
    108 template<typename Packet> inline Packet
    109 pconj(const Packet& a) { return numext::conj(a); }
    110 
    111 /** \internal \returns a * b (coeff-wise) */
    112 template<typename Packet> inline Packet
    113 pmul(const Packet& a,
    114         const Packet& b) { return a*b; }
    115 
    116 /** \internal \returns a / b (coeff-wise) */
    117 template<typename Packet> inline Packet
    118 pdiv(const Packet& a,
    119         const Packet& b) { return a/b; }
    120 
    121 /** \internal \returns the min of \a a and \a b  (coeff-wise) */
    122 template<typename Packet> inline Packet
    123 pmin(const Packet& a,
    124         const Packet& b) { using std::min; return (min)(a, b); }
    125 
    126 /** \internal \returns the max of \a a and \a b  (coeff-wise) */
    127 template<typename Packet> inline Packet
    128 pmax(const Packet& a,
    129         const Packet& b) { using std::max; return (max)(a, b); }
    130 
    131 /** \internal \returns the absolute value of \a a */
    132 template<typename Packet> inline Packet
    133 pabs(const Packet& a) { using std::abs; return abs(a); }
    134 
    135 /** \internal \returns the bitwise and of \a a and \a b */
    136 template<typename Packet> inline Packet
    137 pand(const Packet& a, const Packet& b) { return a & b; }
    138 
    139 /** \internal \returns the bitwise or of \a a and \a b */
    140 template<typename Packet> inline Packet
    141 por(const Packet& a, const Packet& b) { return a | b; }
    142 
    143 /** \internal \returns the bitwise xor of \a a and \a b */
    144 template<typename Packet> inline Packet
    145 pxor(const Packet& a, const Packet& b) { return a ^ b; }
    146 
    147 /** \internal \returns the bitwise andnot of \a a and \a b */
    148 template<typename Packet> inline Packet
    149 pandnot(const Packet& a, const Packet& b) { return a & (!b); }
    150 
    151 /** \internal \returns a packet version of \a *from, from must be 16 bytes aligned */
    152 template<typename Packet> inline Packet
    153 pload(const typename unpacket_traits<Packet>::type* from) { return *from; }
    154 
    155 /** \internal \returns a packet version of \a *from, (un-aligned load) */
    156 template<typename Packet> inline Packet
    157 ploadu(const typename unpacket_traits<Packet>::type* from) { return *from; }
    158 
    159 /** \internal \returns a packet with elements of \a *from duplicated.
    160   * For instance, for a packet of 8 elements, 4 scalar will be read from \a *from and
    161   * duplicated to form: {from[0],from[0],from[1],from[1],,from[2],from[2],,from[3],from[3]}
    162   * Currently, this function is only used for scalar * complex products.
    163  */
    164 template<typename Packet> inline Packet
    165 ploaddup(const typename unpacket_traits<Packet>::type* from) { return *from; }
    166 
    167 /** \internal \returns a packet with constant coefficients \a a, e.g.: (a,a,a,a) */
    168 template<typename Packet> inline Packet
    169 pset1(const typename unpacket_traits<Packet>::type& a) { return a; }
    170 
    171 /** \internal \brief Returns a packet with coefficients (a,a+1,...,a+packet_size-1). */
    172 template<typename Scalar> inline typename packet_traits<Scalar>::type
    173 plset(const Scalar& a) { return a; }
    174 
    175 /** \internal copy the packet \a from to \a *to, \a to must be 16 bytes aligned */
    176 template<typename Scalar, typename Packet> inline void pstore(Scalar* to, const Packet& from)
    177 { (*to) = from; }
    178 
    179 /** \internal copy the packet \a from to \a *to, (un-aligned store) */
    180 template<typename Scalar, typename Packet> inline void pstoreu(Scalar* to, const Packet& from)
    181 { (*to) = from; }
    182 
    183 /** \internal tries to do cache prefetching of \a addr */
    184 template<typename Scalar> inline void prefetch(const Scalar* addr)
    185 {
    186 #if !defined(_MSC_VER)
    187 __builtin_prefetch(addr);
    188 #endif
    189 }
    190 
    191 /** \internal \returns the first element of a packet */
    192 template<typename Packet> inline typename unpacket_traits<Packet>::type pfirst(const Packet& a)
    193 { return a; }
    194 
    195 /** \internal \returns a packet where the element i contains the sum of the packet of \a vec[i] */
    196 template<typename Packet> inline Packet
    197 preduxp(const Packet* vecs) { return vecs[0]; }
    198 
    199 /** \internal \returns the sum of the elements of \a a*/
    200 template<typename Packet> inline typename unpacket_traits<Packet>::type predux(const Packet& a)
    201 { return a; }
    202 
    203 /** \internal \returns the product of the elements of \a a*/
    204 template<typename Packet> inline typename unpacket_traits<Packet>::type predux_mul(const Packet& a)
    205 { return a; }
    206 
    207 /** \internal \returns the min of the elements of \a a*/
    208 template<typename Packet> inline typename unpacket_traits<Packet>::type predux_min(const Packet& a)
    209 { return a; }
    210 
    211 /** \internal \returns the max of the elements of \a a*/
    212 template<typename Packet> inline typename unpacket_traits<Packet>::type predux_max(const Packet& a)
    213 { return a; }
    214 
    215 /** \internal \returns the reversed elements of \a a*/
    216 template<typename Packet> inline Packet preverse(const Packet& a)
    217 { return a; }
    218 
    219 
    220 /** \internal \returns \a a with real and imaginary part flipped (for complex type only) */
    221 template<typename Packet> inline Packet pcplxflip(const Packet& a)
    222 {
    223   // FIXME: uncomment the following in case we drop the internal imag and real functions.
    224 //   using std::imag;
    225 //   using std::real;
    226   return Packet(imag(a),real(a));
    227 }
    228 
    229 /**************************
    230 * Special math functions
    231 ***************************/
    232 
    233 /** \internal \returns the sine of \a a (coeff-wise) */
    234 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
    235 Packet psin(const Packet& a) { using std::sin; return sin(a); }
    236 
    237 /** \internal \returns the cosine of \a a (coeff-wise) */
    238 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
    239 Packet pcos(const Packet& a) { using std::cos; return cos(a); }
    240 
    241 /** \internal \returns the tan of \a a (coeff-wise) */
    242 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
    243 Packet ptan(const Packet& a) { using std::tan; return tan(a); }
    244 
    245 /** \internal \returns the arc sine of \a a (coeff-wise) */
    246 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
    247 Packet pasin(const Packet& a) { using std::asin; return asin(a); }
    248 
    249 /** \internal \returns the arc cosine of \a a (coeff-wise) */
    250 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
    251 Packet pacos(const Packet& a) { using std::acos; return acos(a); }
    252 
    253 /** \internal \returns the exp of \a a (coeff-wise) */
    254 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
    255 Packet pexp(const Packet& a) { using std::exp; return exp(a); }
    256 
    257 /** \internal \returns the log of \a a (coeff-wise) */
    258 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
    259 Packet plog(const Packet& a) { using std::log; return log(a); }
    260 
    261 /** \internal \returns the square-root of \a a (coeff-wise) */
    262 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
    263 Packet psqrt(const Packet& a) { using std::sqrt; return sqrt(a); }
    264 
    265 /***************************************************************************
    266 * The following functions might not have to be overwritten for vectorized types
    267 ***************************************************************************/
    268 
    269 /** \internal copy a packet with constant coeficient \a a (e.g., [a,a,a,a]) to \a *to. \a to must be 16 bytes aligned */
    270 // NOTE: this function must really be templated on the packet type (think about different packet types for the same scalar type)
    271 template<typename Packet>
    272 inline void pstore1(typename unpacket_traits<Packet>::type* to, const typename unpacket_traits<Packet>::type& a)
    273 {
    274   pstore(to, pset1<Packet>(a));
    275 }
    276 
    277 /** \internal \returns a * b + c (coeff-wise) */
    278 template<typename Packet> inline Packet
    279 pmadd(const Packet&  a,
    280          const Packet&  b,
    281          const Packet&  c)
    282 { return padd(pmul(a, b),c); }
    283 
    284 /** \internal \returns a packet version of \a *from.
    285   * If LoadMode equals #Aligned, \a from must be 16 bytes aligned */
    286 template<typename Packet, int LoadMode>
    287 inline Packet ploadt(const typename unpacket_traits<Packet>::type* from)
    288 {
    289   if(LoadMode == Aligned)
    290     return pload<Packet>(from);
    291   else
    292     return ploadu<Packet>(from);
    293 }
    294 
    295 /** \internal copy the packet \a from to \a *to.
    296   * If StoreMode equals #Aligned, \a to must be 16 bytes aligned */
    297 template<typename Scalar, typename Packet, int LoadMode>
    298 inline void pstoret(Scalar* to, const Packet& from)
    299 {
    300   if(LoadMode == Aligned)
    301     pstore(to, from);
    302   else
    303     pstoreu(to, from);
    304 }
    305 
    306 /** \internal default implementation of palign() allowing partial specialization */
    307 template<int Offset,typename PacketType>
    308 struct palign_impl
    309 {
    310   // by default data are aligned, so there is nothing to be done :)
    311   static inline void run(PacketType&, const PacketType&) {}
    312 };
    313 
    314 /** \internal update \a first using the concatenation of the packet_size minus \a Offset last elements
    315   * of \a first and \a Offset first elements of \a second.
    316   *
    317   * This function is currently only used to optimize matrix-vector products on unligned matrices.
    318   * It takes 2 packets that represent a contiguous memory array, and returns a packet starting
    319   * at the position \a Offset. For instance, for packets of 4 elements, we have:
    320   *  Input:
    321   *  - first = {f0,f1,f2,f3}
    322   *  - second = {s0,s1,s2,s3}
    323   * Output:
    324   *   - if Offset==0 then {f0,f1,f2,f3}
    325   *   - if Offset==1 then {f1,f2,f3,s0}
    326   *   - if Offset==2 then {f2,f3,s0,s1}
    327   *   - if Offset==3 then {f3,s0,s1,s3}
    328   */
    329 template<int Offset,typename PacketType>
    330 inline void palign(PacketType& first, const PacketType& second)
    331 {
    332   palign_impl<Offset,PacketType>::run(first,second);
    333 }
    334 
    335 /***************************************************************************
    336 * Fast complex products (GCC generates a function call which is very slow)
    337 ***************************************************************************/
    338 
    339 template<> inline std::complex<float> pmul(const std::complex<float>& a, const std::complex<float>& b)
    340 { return std::complex<float>(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); }
    341 
    342 template<> inline std::complex<double> pmul(const std::complex<double>& a, const std::complex<double>& b)
    343 { return std::complex<double>(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); }
    344 
    345 } // end namespace internal
    346 
    347 } // end namespace Eigen
    348 
    349 #endif // EIGEN_GENERIC_PACKET_MATH_H
    350 
    351