Home | History | Annotate | Download | only in products
      1 // This file is part of Eigen, a lightweight C++ template library
      2 // for linear algebra.
      3 //
      4 // Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1 (at) gmail.com>
      5 // Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud (at) inria.fr>
      6 //
      7 // This Source Code Form is subject to the terms of the Mozilla
      8 // Public License v. 2.0. If a copy of the MPL was not distributed
      9 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
     10 
     11 #ifndef EIGEN_COEFFBASED_PRODUCT_H
     12 #define EIGEN_COEFFBASED_PRODUCT_H
     13 
     14 namespace Eigen {
     15 
     16 namespace internal {
     17 
     18 /*********************************************************************************
     19 *  Coefficient based product implementation.
     20 *  It is designed for the following use cases:
     21 *  - small fixed sizes
     22 *  - lazy products
     23 *********************************************************************************/
     24 
     25 /* Since the all the dimensions of the product are small, here we can rely
     26  * on the generic Assign mechanism to evaluate the product per coeff (or packet).
     27  *
     28  * Note that here the inner-loops should always be unrolled.
     29  */
     30 
     31 template<int Traversal, int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
     32 struct product_coeff_impl;
     33 
     34 template<int StorageOrder, int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
     35 struct product_packet_impl;
     36 
     37 template<typename LhsNested, typename RhsNested, int NestingFlags>
     38 struct traits<CoeffBasedProduct<LhsNested,RhsNested,NestingFlags> >
     39 {
     40   typedef MatrixXpr XprKind;
     41   typedef typename remove_all<LhsNested>::type _LhsNested;
     42   typedef typename remove_all<RhsNested>::type _RhsNested;
     43   typedef typename scalar_product_traits<typename _LhsNested::Scalar, typename _RhsNested::Scalar>::ReturnType Scalar;
     44   typedef typename promote_storage_type<typename traits<_LhsNested>::StorageKind,
     45                                            typename traits<_RhsNested>::StorageKind>::ret StorageKind;
     46   typedef typename promote_index_type<typename traits<_LhsNested>::Index,
     47                                          typename traits<_RhsNested>::Index>::type Index;
     48 
     49   enum {
     50       LhsCoeffReadCost = _LhsNested::CoeffReadCost,
     51       RhsCoeffReadCost = _RhsNested::CoeffReadCost,
     52       LhsFlags = _LhsNested::Flags,
     53       RhsFlags = _RhsNested::Flags,
     54 
     55       RowsAtCompileTime = _LhsNested::RowsAtCompileTime,
     56       ColsAtCompileTime = _RhsNested::ColsAtCompileTime,
     57       InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(_LhsNested::ColsAtCompileTime, _RhsNested::RowsAtCompileTime),
     58 
     59       MaxRowsAtCompileTime = _LhsNested::MaxRowsAtCompileTime,
     60       MaxColsAtCompileTime = _RhsNested::MaxColsAtCompileTime,
     61 
     62       LhsRowMajor = LhsFlags & RowMajorBit,
     63       RhsRowMajor = RhsFlags & RowMajorBit,
     64 
     65       SameType = is_same<typename _LhsNested::Scalar,typename _RhsNested::Scalar>::value,
     66 
     67       CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit)
     68                       && (ColsAtCompileTime == Dynamic
     69                           || ( (ColsAtCompileTime % packet_traits<Scalar>::size) == 0
     70                               && (RhsFlags&AlignedBit)
     71                              )
     72                          ),
     73 
     74       CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit)
     75                       && (RowsAtCompileTime == Dynamic
     76                           || ( (RowsAtCompileTime % packet_traits<Scalar>::size) == 0
     77                               && (LhsFlags&AlignedBit)
     78                              )
     79                          ),
     80 
     81       EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1
     82                      : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0
     83                      : (RhsRowMajor && !CanVectorizeLhs),
     84 
     85       Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit)
     86             | (EvalToRowMajor ? RowMajorBit : 0)
     87             | NestingFlags
     88             | (LhsFlags & RhsFlags & AlignedBit)
     89             // TODO enable vectorization for mixed types
     90             | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0),
     91 
     92       CoeffReadCost = InnerSize == Dynamic ? Dynamic
     93                     : InnerSize * (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost)
     94                       + (InnerSize - 1) * NumTraits<Scalar>::AddCost,
     95 
     96       /* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside
     97       * of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner
     98       * loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect
     99       * the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI.
    100       */
    101       CanVectorizeInner =    SameType
    102                           && LhsRowMajor
    103                           && (!RhsRowMajor)
    104                           && (LhsFlags & RhsFlags & ActualPacketAccessBit)
    105                           && (LhsFlags & RhsFlags & AlignedBit)
    106                           && (InnerSize % packet_traits<Scalar>::size == 0)
    107     };
    108 };
    109 
    110 } // end namespace internal
    111 
    112 template<typename LhsNested, typename RhsNested, int NestingFlags>
    113 class CoeffBasedProduct
    114   : internal::no_assignment_operator,
    115     public MatrixBase<CoeffBasedProduct<LhsNested, RhsNested, NestingFlags> >
    116 {
    117   public:
    118 
    119     typedef MatrixBase<CoeffBasedProduct> Base;
    120     EIGEN_DENSE_PUBLIC_INTERFACE(CoeffBasedProduct)
    121     typedef typename Base::PlainObject PlainObject;
    122 
    123   private:
    124 
    125     typedef typename internal::traits<CoeffBasedProduct>::_LhsNested _LhsNested;
    126     typedef typename internal::traits<CoeffBasedProduct>::_RhsNested _RhsNested;
    127 
    128     enum {
    129       PacketSize = internal::packet_traits<Scalar>::size,
    130       InnerSize  = internal::traits<CoeffBasedProduct>::InnerSize,
    131       Unroll = CoeffReadCost != Dynamic && CoeffReadCost <= EIGEN_UNROLLING_LIMIT,
    132       CanVectorizeInner = internal::traits<CoeffBasedProduct>::CanVectorizeInner
    133     };
    134 
    135     typedef internal::product_coeff_impl<CanVectorizeInner ? InnerVectorizedTraversal : DefaultTraversal,
    136                                    Unroll ? InnerSize-1 : Dynamic,
    137                                    _LhsNested, _RhsNested, Scalar> ScalarCoeffImpl;
    138 
    139     typedef CoeffBasedProduct<LhsNested,RhsNested,NestByRefBit> LazyCoeffBasedProductType;
    140 
    141   public:
    142 
    143     inline CoeffBasedProduct(const CoeffBasedProduct& other)
    144       : Base(), m_lhs(other.m_lhs), m_rhs(other.m_rhs)
    145     {}
    146 
    147     template<typename Lhs, typename Rhs>
    148     inline CoeffBasedProduct(const Lhs& lhs, const Rhs& rhs)
    149       : m_lhs(lhs), m_rhs(rhs)
    150     {
    151       // we don't allow taking products of matrices of different real types, as that wouldn't be vectorizable.
    152       // We still allow to mix T and complex<T>.
    153       EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::RealScalar, typename Rhs::RealScalar>::value),
    154         YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
    155       eigen_assert(lhs.cols() == rhs.rows()
    156         && "invalid matrix product"
    157         && "if you wanted a coeff-wise or a dot product use the respective explicit functions");
    158     }
    159 
    160     EIGEN_STRONG_INLINE Index rows() const { return m_lhs.rows(); }
    161     EIGEN_STRONG_INLINE Index cols() const { return m_rhs.cols(); }
    162 
    163     EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const
    164     {
    165       Scalar res;
    166       ScalarCoeffImpl::run(row, col, m_lhs, m_rhs, res);
    167       return res;
    168     }
    169 
    170     /* Allow index-based non-packet access. It is impossible though to allow index-based packed access,
    171      * which is why we don't set the LinearAccessBit.
    172      */
    173     EIGEN_STRONG_INLINE const Scalar coeff(Index index) const
    174     {
    175       Scalar res;
    176       const Index row = RowsAtCompileTime == 1 ? 0 : index;
    177       const Index col = RowsAtCompileTime == 1 ? index : 0;
    178       ScalarCoeffImpl::run(row, col, m_lhs, m_rhs, res);
    179       return res;
    180     }
    181 
    182     template<int LoadMode>
    183     EIGEN_STRONG_INLINE const PacketScalar packet(Index row, Index col) const
    184     {
    185       PacketScalar res;
    186       internal::product_packet_impl<Flags&RowMajorBit ? RowMajor : ColMajor,
    187                               Unroll ? InnerSize-1 : Dynamic,
    188                               _LhsNested, _RhsNested, PacketScalar, LoadMode>
    189         ::run(row, col, m_lhs, m_rhs, res);
    190       return res;
    191     }
    192 
    193     // Implicit conversion to the nested type (trigger the evaluation of the product)
    194     EIGEN_STRONG_INLINE operator const PlainObject& () const
    195     {
    196       m_result.lazyAssign(*this);
    197       return m_result;
    198     }
    199 
    200     const _LhsNested& lhs() const { return m_lhs; }
    201     const _RhsNested& rhs() const { return m_rhs; }
    202 
    203     const Diagonal<const LazyCoeffBasedProductType,0> diagonal() const
    204     { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this); }
    205 
    206     template<int DiagonalIndex>
    207     const Diagonal<const LazyCoeffBasedProductType,DiagonalIndex> diagonal() const
    208     { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this); }
    209 
    210     const Diagonal<const LazyCoeffBasedProductType,Dynamic> diagonal(Index index) const
    211     { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this).diagonal(index); }
    212 
    213   protected:
    214     typename internal::add_const_on_value_type<LhsNested>::type m_lhs;
    215     typename internal::add_const_on_value_type<RhsNested>::type m_rhs;
    216 
    217     mutable PlainObject m_result;
    218 };
    219 
    220 namespace internal {
    221 
    222 // here we need to overload the nested rule for products
    223 // such that the nested type is a const reference to a plain matrix
    224 template<typename Lhs, typename Rhs, int N, typename PlainObject>
    225 struct nested<CoeffBasedProduct<Lhs,Rhs,EvalBeforeNestingBit|EvalBeforeAssigningBit>, N, PlainObject>
    226 {
    227   typedef PlainObject const& type;
    228 };
    229 
    230 /***************************************************************************
    231 * Normal product .coeff() implementation (with meta-unrolling)
    232 ***************************************************************************/
    233 
    234 /**************************************
    235 *** Scalar path  - no vectorization ***
    236 **************************************/
    237 
    238 template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
    239 struct product_coeff_impl<DefaultTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
    240 {
    241   typedef typename Lhs::Index Index;
    242   static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
    243   {
    244     product_coeff_impl<DefaultTraversal, UnrollingIndex-1, Lhs, Rhs, RetScalar>::run(row, col, lhs, rhs, res);
    245     res += lhs.coeff(row, UnrollingIndex) * rhs.coeff(UnrollingIndex, col);
    246   }
    247 };
    248 
    249 template<typename Lhs, typename Rhs, typename RetScalar>
    250 struct product_coeff_impl<DefaultTraversal, 0, Lhs, Rhs, RetScalar>
    251 {
    252   typedef typename Lhs::Index Index;
    253   static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
    254   {
    255     res = lhs.coeff(row, 0) * rhs.coeff(0, col);
    256   }
    257 };
    258 
    259 template<typename Lhs, typename Rhs, typename RetScalar>
    260 struct product_coeff_impl<DefaultTraversal, Dynamic, Lhs, Rhs, RetScalar>
    261 {
    262   typedef typename Lhs::Index Index;
    263   static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar& res)
    264   {
    265     eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
    266     res = lhs.coeff(row, 0) * rhs.coeff(0, col);
    267       for(Index i = 1; i < lhs.cols(); ++i)
    268         res += lhs.coeff(row, i) * rhs.coeff(i, col);
    269   }
    270 };
    271 
    272 /*******************************************
    273 *** Scalar path with inner vectorization ***
    274 *******************************************/
    275 
    276 template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet>
    277 struct product_coeff_vectorized_unroller
    278 {
    279   typedef typename Lhs::Index Index;
    280   enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
    281   static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres)
    282   {
    283     product_coeff_vectorized_unroller<UnrollingIndex-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres);
    284     pres = padd(pres, pmul( lhs.template packet<Aligned>(row, UnrollingIndex) , rhs.template packet<Aligned>(UnrollingIndex, col) ));
    285   }
    286 };
    287 
    288 template<typename Lhs, typename Rhs, typename Packet>
    289 struct product_coeff_vectorized_unroller<0, Lhs, Rhs, Packet>
    290 {
    291   typedef typename Lhs::Index Index;
    292   static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres)
    293   {
    294     pres = pmul(lhs.template packet<Aligned>(row, 0) , rhs.template packet<Aligned>(0, col));
    295   }
    296 };
    297 
    298 template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
    299 struct product_coeff_impl<InnerVectorizedTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
    300 {
    301   typedef typename Lhs::PacketScalar Packet;
    302   typedef typename Lhs::Index Index;
    303   enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
    304   static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
    305   {
    306     Packet pres;
    307     product_coeff_vectorized_unroller<UnrollingIndex+1-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres);
    308     product_coeff_impl<DefaultTraversal,UnrollingIndex,Lhs,Rhs,RetScalar>::run(row, col, lhs, rhs, res);
    309     res = predux(pres);
    310   }
    311 };
    312 
    313 template<typename Lhs, typename Rhs, int LhsRows = Lhs::RowsAtCompileTime, int RhsCols = Rhs::ColsAtCompileTime>
    314 struct product_coeff_vectorized_dyn_selector
    315 {
    316   typedef typename Lhs::Index Index;
    317   static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
    318   {
    319     res = lhs.row(row).transpose().cwiseProduct(rhs.col(col)).sum();
    320   }
    321 };
    322 
    323 // NOTE the 3 following specializations are because taking .col(0) on a vector is a bit slower
    324 // NOTE maybe they are now useless since we have a specialization for Block<Matrix>
    325 template<typename Lhs, typename Rhs, int RhsCols>
    326 struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,RhsCols>
    327 {
    328   typedef typename Lhs::Index Index;
    329   static EIGEN_STRONG_INLINE void run(Index /*row*/, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
    330   {
    331     res = lhs.transpose().cwiseProduct(rhs.col(col)).sum();
    332   }
    333 };
    334 
    335 template<typename Lhs, typename Rhs, int LhsRows>
    336 struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,LhsRows,1>
    337 {
    338   typedef typename Lhs::Index Index;
    339   static EIGEN_STRONG_INLINE void run(Index row, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
    340   {
    341     res = lhs.row(row).transpose().cwiseProduct(rhs).sum();
    342   }
    343 };
    344 
    345 template<typename Lhs, typename Rhs>
    346 struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,1>
    347 {
    348   typedef typename Lhs::Index Index;
    349   static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
    350   {
    351     res = lhs.transpose().cwiseProduct(rhs).sum();
    352   }
    353 };
    354 
    355 template<typename Lhs, typename Rhs, typename RetScalar>
    356 struct product_coeff_impl<InnerVectorizedTraversal, Dynamic, Lhs, Rhs, RetScalar>
    357 {
    358   typedef typename Lhs::Index Index;
    359   static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
    360   {
    361     product_coeff_vectorized_dyn_selector<Lhs,Rhs>::run(row, col, lhs, rhs, res);
    362   }
    363 };
    364 
    365 /*******************
    366 *** Packet path  ***
    367 *******************/
    368 
    369 template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
    370 struct product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
    371 {
    372   typedef typename Lhs::Index Index;
    373   static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
    374   {
    375     product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res);
    376     res =  pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex)), rhs.template packet<LoadMode>(UnrollingIndex, col), res);
    377   }
    378 };
    379 
    380 template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
    381 struct product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
    382 {
    383   typedef typename Lhs::Index Index;
    384   static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
    385   {
    386     product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res);
    387     res =  pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex), pset1<Packet>(rhs.coeff(UnrollingIndex, col)), res);
    388   }
    389 };
    390 
    391 template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
    392 struct product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
    393 {
    394   typedef typename Lhs::Index Index;
    395   static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
    396   {
    397     res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
    398   }
    399 };
    400 
    401 template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
    402 struct product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
    403 {
    404   typedef typename Lhs::Index Index;
    405   static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
    406   {
    407     res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
    408   }
    409 };
    410 
    411 template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
    412 struct product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
    413 {
    414   typedef typename Lhs::Index Index;
    415   static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res)
    416   {
    417     eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
    418     res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
    419       for(Index i = 1; i < lhs.cols(); ++i)
    420         res =  pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode>(i, col), res);
    421   }
    422 };
    423 
    424 template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
    425 struct product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
    426 {
    427   typedef typename Lhs::Index Index;
    428   static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res)
    429   {
    430     eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
    431     res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
    432       for(Index i = 1; i < lhs.cols(); ++i)
    433         res =  pmadd(lhs.template packet<LoadMode>(row, i), pset1<Packet>(rhs.coeff(i, col)), res);
    434   }
    435 };
    436 
    437 } // end namespace internal
    438 
    439 } // end namespace Eigen
    440 
    441 #endif // EIGEN_COEFFBASED_PRODUCT_H
    442