Home | History | Annotate | Download | only in Core
      1 // This file is part of Eigen, a lightweight C++ template library
      2 // for linear algebra.
      3 //
      4 // Copyright (C) 2008 Gael Guennebaud <gael.guennebaud (at) inria.fr>
      5 // Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1 (at) gmail.com>
      6 //
      7 // This Source Code Form is subject to the terms of the Mozilla
      8 // Public License v. 2.0. If a copy of the MPL was not distributed
      9 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
     10 
     11 #ifndef EIGEN_REDUX_H
     12 #define EIGEN_REDUX_H
     13 
     14 namespace Eigen {
     15 
     16 namespace internal {
     17 
     18 // TODO
     19 //  * implement other kind of vectorization
     20 //  * factorize code
     21 
     22 /***************************************************************************
     23 * Part 1 : the logic deciding a strategy for vectorization and unrolling
     24 ***************************************************************************/
     25 
     26 template<typename Func, typename Derived>
     27 struct redux_traits
     28 {
     29 public:
     30     typedef typename find_best_packet<typename Derived::Scalar,Derived::SizeAtCompileTime>::type PacketType;
     31   enum {
     32     PacketSize = unpacket_traits<PacketType>::size,
     33     InnerMaxSize = int(Derived::IsRowMajor)
     34                  ? Derived::MaxColsAtCompileTime
     35                  : Derived::MaxRowsAtCompileTime
     36   };
     37 
     38   enum {
     39     MightVectorize = (int(Derived::Flags)&ActualPacketAccessBit)
     40                   && (functor_traits<Func>::PacketAccess),
     41     MayLinearVectorize = bool(MightVectorize) && (int(Derived::Flags)&LinearAccessBit),
     42     MaySliceVectorize  = bool(MightVectorize) && int(InnerMaxSize)>=3*PacketSize
     43   };
     44 
     45 public:
     46   enum {
     47     Traversal = int(MayLinearVectorize) ? int(LinearVectorizedTraversal)
     48               : int(MaySliceVectorize)  ? int(SliceVectorizedTraversal)
     49                                         : int(DefaultTraversal)
     50   };
     51 
     52 public:
     53   enum {
     54     Cost = Derived::SizeAtCompileTime == Dynamic ? HugeCost
     55          : Derived::SizeAtCompileTime * Derived::CoeffReadCost + (Derived::SizeAtCompileTime-1) * functor_traits<Func>::Cost,
     56     UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Traversal) == int(DefaultTraversal) ? 1 : int(PacketSize))
     57   };
     58 
     59 public:
     60   enum {
     61     Unrolling = Cost <= UnrollingLimit ? CompleteUnrolling : NoUnrolling
     62   };
     63 
     64 #ifdef EIGEN_DEBUG_ASSIGN
     65   static void debug()
     66   {
     67     std::cerr << "Xpr: " << typeid(typename Derived::XprType).name() << std::endl;
     68     std::cerr.setf(std::ios::hex, std::ios::basefield);
     69     EIGEN_DEBUG_VAR(Derived::Flags)
     70     std::cerr.unsetf(std::ios::hex);
     71     EIGEN_DEBUG_VAR(InnerMaxSize)
     72     EIGEN_DEBUG_VAR(PacketSize)
     73     EIGEN_DEBUG_VAR(MightVectorize)
     74     EIGEN_DEBUG_VAR(MayLinearVectorize)
     75     EIGEN_DEBUG_VAR(MaySliceVectorize)
     76     EIGEN_DEBUG_VAR(Traversal)
     77     EIGEN_DEBUG_VAR(UnrollingLimit)
     78     EIGEN_DEBUG_VAR(Unrolling)
     79     std::cerr << std::endl;
     80   }
     81 #endif
     82 };
     83 
     84 /***************************************************************************
     85 * Part 2 : unrollers
     86 ***************************************************************************/
     87 
     88 /*** no vectorization ***/
     89 
     90 template<typename Func, typename Derived, int Start, int Length>
     91 struct redux_novec_unroller
     92 {
     93   enum {
     94     HalfLength = Length/2
     95   };
     96 
     97   typedef typename Derived::Scalar Scalar;
     98 
     99   EIGEN_DEVICE_FUNC
    100   static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func)
    101   {
    102     return func(redux_novec_unroller<Func, Derived, Start, HalfLength>::run(mat,func),
    103                 redux_novec_unroller<Func, Derived, Start+HalfLength, Length-HalfLength>::run(mat,func));
    104   }
    105 };
    106 
    107 template<typename Func, typename Derived, int Start>
    108 struct redux_novec_unroller<Func, Derived, Start, 1>
    109 {
    110   enum {
    111     outer = Start / Derived::InnerSizeAtCompileTime,
    112     inner = Start % Derived::InnerSizeAtCompileTime
    113   };
    114 
    115   typedef typename Derived::Scalar Scalar;
    116 
    117   EIGEN_DEVICE_FUNC
    118   static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func&)
    119   {
    120     return mat.coeffByOuterInner(outer, inner);
    121   }
    122 };
    123 
    124 // This is actually dead code and will never be called. It is required
    125 // to prevent false warnings regarding failed inlining though
    126 // for 0 length run() will never be called at all.
    127 template<typename Func, typename Derived, int Start>
    128 struct redux_novec_unroller<Func, Derived, Start, 0>
    129 {
    130   typedef typename Derived::Scalar Scalar;
    131   EIGEN_DEVICE_FUNC
    132   static EIGEN_STRONG_INLINE Scalar run(const Derived&, const Func&) { return Scalar(); }
    133 };
    134 
    135 /*** vectorization ***/
    136 
    137 template<typename Func, typename Derived, int Start, int Length>
    138 struct redux_vec_unroller
    139 {
    140   enum {
    141     PacketSize = redux_traits<Func, Derived>::PacketSize,
    142     HalfLength = Length/2
    143   };
    144 
    145   typedef typename Derived::Scalar Scalar;
    146   typedef typename redux_traits<Func, Derived>::PacketType PacketScalar;
    147 
    148   static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func& func)
    149   {
    150     return func.packetOp(
    151             redux_vec_unroller<Func, Derived, Start, HalfLength>::run(mat,func),
    152             redux_vec_unroller<Func, Derived, Start+HalfLength, Length-HalfLength>::run(mat,func) );
    153   }
    154 };
    155 
    156 template<typename Func, typename Derived, int Start>
    157 struct redux_vec_unroller<Func, Derived, Start, 1>
    158 {
    159   enum {
    160     index = Start * redux_traits<Func, Derived>::PacketSize,
    161     outer = index / int(Derived::InnerSizeAtCompileTime),
    162     inner = index % int(Derived::InnerSizeAtCompileTime),
    163     alignment = Derived::Alignment
    164   };
    165 
    166   typedef typename Derived::Scalar Scalar;
    167   typedef typename redux_traits<Func, Derived>::PacketType PacketScalar;
    168 
    169   static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func&)
    170   {
    171     return mat.template packetByOuterInner<alignment,PacketScalar>(outer, inner);
    172   }
    173 };
    174 
    175 /***************************************************************************
    176 * Part 3 : implementation of all cases
    177 ***************************************************************************/
    178 
    179 template<typename Func, typename Derived,
    180          int Traversal = redux_traits<Func, Derived>::Traversal,
    181          int Unrolling = redux_traits<Func, Derived>::Unrolling
    182 >
    183 struct redux_impl;
    184 
    185 template<typename Func, typename Derived>
    186 struct redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>
    187 {
    188   typedef typename Derived::Scalar Scalar;
    189   EIGEN_DEVICE_FUNC
    190   static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func)
    191   {
    192     eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
    193     Scalar res;
    194     res = mat.coeffByOuterInner(0, 0);
    195     for(Index i = 1; i < mat.innerSize(); ++i)
    196       res = func(res, mat.coeffByOuterInner(0, i));
    197     for(Index i = 1; i < mat.outerSize(); ++i)
    198       for(Index j = 0; j < mat.innerSize(); ++j)
    199         res = func(res, mat.coeffByOuterInner(i, j));
    200     return res;
    201   }
    202 };
    203 
    204 template<typename Func, typename Derived>
    205 struct redux_impl<Func,Derived, DefaultTraversal, CompleteUnrolling>
    206   : public redux_novec_unroller<Func,Derived, 0, Derived::SizeAtCompileTime>
    207 {};
    208 
    209 template<typename Func, typename Derived>
    210 struct redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling>
    211 {
    212   typedef typename Derived::Scalar Scalar;
    213   typedef typename redux_traits<Func, Derived>::PacketType PacketScalar;
    214 
    215   static Scalar run(const Derived &mat, const Func& func)
    216   {
    217     const Index size = mat.size();
    218 
    219     const Index packetSize = redux_traits<Func, Derived>::PacketSize;
    220     const int packetAlignment = unpacket_traits<PacketScalar>::alignment;
    221     enum {
    222       alignment0 = (bool(Derived::Flags & DirectAccessBit) && bool(packet_traits<Scalar>::AlignedOnScalar)) ? int(packetAlignment) : int(Unaligned),
    223       alignment = EIGEN_PLAIN_ENUM_MAX(alignment0, Derived::Alignment)
    224     };
    225     const Index alignedStart = internal::first_default_aligned(mat.nestedExpression());
    226     const Index alignedSize2 = ((size-alignedStart)/(2*packetSize))*(2*packetSize);
    227     const Index alignedSize = ((size-alignedStart)/(packetSize))*(packetSize);
    228     const Index alignedEnd2 = alignedStart + alignedSize2;
    229     const Index alignedEnd  = alignedStart + alignedSize;
    230     Scalar res;
    231     if(alignedSize)
    232     {
    233       PacketScalar packet_res0 = mat.template packet<alignment,PacketScalar>(alignedStart);
    234       if(alignedSize>packetSize) // we have at least two packets to partly unroll the loop
    235       {
    236         PacketScalar packet_res1 = mat.template packet<alignment,PacketScalar>(alignedStart+packetSize);
    237         for(Index index = alignedStart + 2*packetSize; index < alignedEnd2; index += 2*packetSize)
    238         {
    239           packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment,PacketScalar>(index));
    240           packet_res1 = func.packetOp(packet_res1, mat.template packet<alignment,PacketScalar>(index+packetSize));
    241         }
    242 
    243         packet_res0 = func.packetOp(packet_res0,packet_res1);
    244         if(alignedEnd>alignedEnd2)
    245           packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment,PacketScalar>(alignedEnd2));
    246       }
    247       res = func.predux(packet_res0);
    248 
    249       for(Index index = 0; index < alignedStart; ++index)
    250         res = func(res,mat.coeff(index));
    251 
    252       for(Index index = alignedEnd; index < size; ++index)
    253         res = func(res,mat.coeff(index));
    254     }
    255     else // too small to vectorize anything.
    256          // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
    257     {
    258       res = mat.coeff(0);
    259       for(Index index = 1; index < size; ++index)
    260         res = func(res,mat.coeff(index));
    261     }
    262 
    263     return res;
    264   }
    265 };
    266 
    267 // NOTE: for SliceVectorizedTraversal we simply bypass unrolling
    268 template<typename Func, typename Derived, int Unrolling>
    269 struct redux_impl<Func, Derived, SliceVectorizedTraversal, Unrolling>
    270 {
    271   typedef typename Derived::Scalar Scalar;
    272   typedef typename redux_traits<Func, Derived>::PacketType PacketType;
    273 
    274   EIGEN_DEVICE_FUNC static Scalar run(const Derived &mat, const Func& func)
    275   {
    276     eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
    277     const Index innerSize = mat.innerSize();
    278     const Index outerSize = mat.outerSize();
    279     enum {
    280       packetSize = redux_traits<Func, Derived>::PacketSize
    281     };
    282     const Index packetedInnerSize = ((innerSize)/packetSize)*packetSize;
    283     Scalar res;
    284     if(packetedInnerSize)
    285     {
    286       PacketType packet_res = mat.template packet<Unaligned,PacketType>(0,0);
    287       for(Index j=0; j<outerSize; ++j)
    288         for(Index i=(j==0?packetSize:0); i<packetedInnerSize; i+=Index(packetSize))
    289           packet_res = func.packetOp(packet_res, mat.template packetByOuterInner<Unaligned,PacketType>(j,i));
    290 
    291       res = func.predux(packet_res);
    292       for(Index j=0; j<outerSize; ++j)
    293         for(Index i=packetedInnerSize; i<innerSize; ++i)
    294           res = func(res, mat.coeffByOuterInner(j,i));
    295     }
    296     else // too small to vectorize anything.
    297          // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
    298     {
    299       res = redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>::run(mat, func);
    300     }
    301 
    302     return res;
    303   }
    304 };
    305 
    306 template<typename Func, typename Derived>
    307 struct redux_impl<Func, Derived, LinearVectorizedTraversal, CompleteUnrolling>
    308 {
    309   typedef typename Derived::Scalar Scalar;
    310 
    311   typedef typename redux_traits<Func, Derived>::PacketType PacketScalar;
    312   enum {
    313     PacketSize = redux_traits<Func, Derived>::PacketSize,
    314     Size = Derived::SizeAtCompileTime,
    315     VectorizedSize = (Size / PacketSize) * PacketSize
    316   };
    317   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func)
    318   {
    319     eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
    320     if (VectorizedSize > 0) {
    321       Scalar res = func.predux(redux_vec_unroller<Func, Derived, 0, Size / PacketSize>::run(mat,func));
    322       if (VectorizedSize != Size)
    323         res = func(res,redux_novec_unroller<Func, Derived, VectorizedSize, Size-VectorizedSize>::run(mat,func));
    324       return res;
    325     }
    326     else {
    327       return redux_novec_unroller<Func, Derived, 0, Size>::run(mat,func);
    328     }
    329   }
    330 };
    331 
    332 // evaluator adaptor
    333 template<typename _XprType>
    334 class redux_evaluator
    335 {
    336 public:
    337   typedef _XprType XprType;
    338   EIGEN_DEVICE_FUNC explicit redux_evaluator(const XprType &xpr) : m_evaluator(xpr), m_xpr(xpr) {}
    339 
    340   typedef typename XprType::Scalar Scalar;
    341   typedef typename XprType::CoeffReturnType CoeffReturnType;
    342   typedef typename XprType::PacketScalar PacketScalar;
    343   typedef typename XprType::PacketReturnType PacketReturnType;
    344 
    345   enum {
    346     MaxRowsAtCompileTime = XprType::MaxRowsAtCompileTime,
    347     MaxColsAtCompileTime = XprType::MaxColsAtCompileTime,
    348     // TODO we should not remove DirectAccessBit and rather find an elegant way to query the alignment offset at runtime from the evaluator
    349     Flags = evaluator<XprType>::Flags & ~DirectAccessBit,
    350     IsRowMajor = XprType::IsRowMajor,
    351     SizeAtCompileTime = XprType::SizeAtCompileTime,
    352     InnerSizeAtCompileTime = XprType::InnerSizeAtCompileTime,
    353     CoeffReadCost = evaluator<XprType>::CoeffReadCost,
    354     Alignment = evaluator<XprType>::Alignment
    355   };
    356 
    357   EIGEN_DEVICE_FUNC Index rows() const { return m_xpr.rows(); }
    358   EIGEN_DEVICE_FUNC Index cols() const { return m_xpr.cols(); }
    359   EIGEN_DEVICE_FUNC Index size() const { return m_xpr.size(); }
    360   EIGEN_DEVICE_FUNC Index innerSize() const { return m_xpr.innerSize(); }
    361   EIGEN_DEVICE_FUNC Index outerSize() const { return m_xpr.outerSize(); }
    362 
    363   EIGEN_DEVICE_FUNC
    364   CoeffReturnType coeff(Index row, Index col) const
    365   { return m_evaluator.coeff(row, col); }
    366 
    367   EIGEN_DEVICE_FUNC
    368   CoeffReturnType coeff(Index index) const
    369   { return m_evaluator.coeff(index); }
    370 
    371   template<int LoadMode, typename PacketType>
    372   PacketType packet(Index row, Index col) const
    373   { return m_evaluator.template packet<LoadMode,PacketType>(row, col); }
    374 
    375   template<int LoadMode, typename PacketType>
    376   PacketType packet(Index index) const
    377   { return m_evaluator.template packet<LoadMode,PacketType>(index); }
    378 
    379   EIGEN_DEVICE_FUNC
    380   CoeffReturnType coeffByOuterInner(Index outer, Index inner) const
    381   { return m_evaluator.coeff(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }
    382 
    383   template<int LoadMode, typename PacketType>
    384   PacketType packetByOuterInner(Index outer, Index inner) const
    385   { return m_evaluator.template packet<LoadMode,PacketType>(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }
    386 
    387   const XprType & nestedExpression() const { return m_xpr; }
    388 
    389 protected:
    390   internal::evaluator<XprType> m_evaluator;
    391   const XprType &m_xpr;
    392 };
    393 
    394 } // end namespace internal
    395 
    396 /***************************************************************************
    397 * Part 4 : public API
    398 ***************************************************************************/
    399 
    400 
    401 /** \returns the result of a full redux operation on the whole matrix or vector using \a func
    402   *
    403   * The template parameter \a BinaryOp is the type of the functor \a func which must be
    404   * an associative operator. Both current C++98 and C++11 functor styles are handled.
    405   *
    406   * \sa DenseBase::sum(), DenseBase::minCoeff(), DenseBase::maxCoeff(), MatrixBase::colwise(), MatrixBase::rowwise()
    407   */
    408 template<typename Derived>
    409 template<typename Func>
    410 typename internal::traits<Derived>::Scalar
    411 DenseBase<Derived>::redux(const Func& func) const
    412 {
    413   eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix");
    414 
    415   typedef typename internal::redux_evaluator<Derived> ThisEvaluator;
    416   ThisEvaluator thisEval(derived());
    417 
    418   return internal::redux_impl<Func, ThisEvaluator>::run(thisEval, func);
    419 }
    420 
    421 /** \returns the minimum of all coefficients of \c *this.
    422   * \warning the result is undefined if \c *this contains NaN.
    423   */
    424 template<typename Derived>
    425 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
    426 DenseBase<Derived>::minCoeff() const
    427 {
    428   return derived().redux(Eigen::internal::scalar_min_op<Scalar,Scalar>());
    429 }
    430 
    431 /** \returns the maximum of all coefficients of \c *this.
    432   * \warning the result is undefined if \c *this contains NaN.
    433   */
    434 template<typename Derived>
    435 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
    436 DenseBase<Derived>::maxCoeff() const
    437 {
    438   return derived().redux(Eigen::internal::scalar_max_op<Scalar,Scalar>());
    439 }
    440 
    441 /** \returns the sum of all coefficients of \c *this
    442   *
    443   * If \c *this is empty, then the value 0 is returned.
    444   *
    445   * \sa trace(), prod(), mean()
    446   */
    447 template<typename Derived>
    448 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
    449 DenseBase<Derived>::sum() const
    450 {
    451   if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
    452     return Scalar(0);
    453   return derived().redux(Eigen::internal::scalar_sum_op<Scalar,Scalar>());
    454 }
    455 
    456 /** \returns the mean of all coefficients of *this
    457 *
    458 * \sa trace(), prod(), sum()
    459 */
    460 template<typename Derived>
    461 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
    462 DenseBase<Derived>::mean() const
    463 {
    464 #ifdef __INTEL_COMPILER
    465   #pragma warning push
    466   #pragma warning ( disable : 2259 )
    467 #endif
    468   return Scalar(derived().redux(Eigen::internal::scalar_sum_op<Scalar,Scalar>())) / Scalar(this->size());
    469 #ifdef __INTEL_COMPILER
    470   #pragma warning pop
    471 #endif
    472 }
    473 
    474 /** \returns the product of all coefficients of *this
    475   *
    476   * Example: \include MatrixBase_prod.cpp
    477   * Output: \verbinclude MatrixBase_prod.out
    478   *
    479   * \sa sum(), mean(), trace()
    480   */
    481 template<typename Derived>
    482 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
    483 DenseBase<Derived>::prod() const
    484 {
    485   if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
    486     return Scalar(1);
    487   return derived().redux(Eigen::internal::scalar_product_op<Scalar>());
    488 }
    489 
    490 /** \returns the trace of \c *this, i.e. the sum of the coefficients on the main diagonal.
    491   *
    492   * \c *this can be any matrix, not necessarily square.
    493   *
    494   * \sa diagonal(), sum()
    495   */
    496 template<typename Derived>
    497 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
    498 MatrixBase<Derived>::trace() const
    499 {
    500   return derived().diagonal().sum();
    501 }
    502 
    503 } // end namespace Eigen
    504 
    505 #endif // EIGEN_REDUX_H
    506