1 // This file is part of Eigen, a lightweight C++ template library 2 // for linear algebra. 3 // 4 // Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1 (at) gmail.com> 5 // Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud (at) inria.fr> 6 // 7 // This Source Code Form is subject to the terms of the Mozilla 8 // Public License v. 2.0. If a copy of the MPL was not distributed 9 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 10 11 #ifndef EIGEN_COEFFBASED_PRODUCT_H 12 #define EIGEN_COEFFBASED_PRODUCT_H 13 14 namespace Eigen { 15 16 namespace internal { 17 18 /********************************************************************************* 19 * Coefficient based product implementation. 20 * It is designed for the following use cases: 21 * - small fixed sizes 22 * - lazy products 23 *********************************************************************************/ 24 25 /* Since the all the dimensions of the product are small, here we can rely 26 * on the generic Assign mechanism to evaluate the product per coeff (or packet). 27 * 28 * Note that here the inner-loops should always be unrolled. 29 */ 30 31 template<int Traversal, int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar> 32 struct product_coeff_impl; 33 34 template<int StorageOrder, int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode> 35 struct product_packet_impl; 36 37 template<typename LhsNested, typename RhsNested, int NestingFlags> 38 struct traits<CoeffBasedProduct<LhsNested,RhsNested,NestingFlags> > 39 { 40 typedef MatrixXpr XprKind; 41 typedef typename remove_all<LhsNested>::type _LhsNested; 42 typedef typename remove_all<RhsNested>::type _RhsNested; 43 typedef typename scalar_product_traits<typename _LhsNested::Scalar, typename _RhsNested::Scalar>::ReturnType Scalar; 44 typedef typename promote_storage_type<typename traits<_LhsNested>::StorageKind, 45 typename traits<_RhsNested>::StorageKind>::ret StorageKind; 46 typedef typename promote_index_type<typename traits<_LhsNested>::Index, 47 typename traits<_RhsNested>::Index>::type Index; 48 49 enum { 50 LhsCoeffReadCost = _LhsNested::CoeffReadCost, 51 RhsCoeffReadCost = _RhsNested::CoeffReadCost, 52 LhsFlags = _LhsNested::Flags, 53 RhsFlags = _RhsNested::Flags, 54 55 RowsAtCompileTime = _LhsNested::RowsAtCompileTime, 56 ColsAtCompileTime = _RhsNested::ColsAtCompileTime, 57 InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(_LhsNested::ColsAtCompileTime, _RhsNested::RowsAtCompileTime), 58 59 MaxRowsAtCompileTime = _LhsNested::MaxRowsAtCompileTime, 60 MaxColsAtCompileTime = _RhsNested::MaxColsAtCompileTime, 61 62 LhsRowMajor = LhsFlags & RowMajorBit, 63 RhsRowMajor = RhsFlags & RowMajorBit, 64 65 SameType = is_same<typename _LhsNested::Scalar,typename _RhsNested::Scalar>::value, 66 67 CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit) 68 && (ColsAtCompileTime == Dynamic 69 || ( (ColsAtCompileTime % packet_traits<Scalar>::size) == 0 70 && (RhsFlags&AlignedBit) 71 ) 72 ), 73 74 CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit) 75 && (RowsAtCompileTime == Dynamic 76 || ( (RowsAtCompileTime % packet_traits<Scalar>::size) == 0 77 && (LhsFlags&AlignedBit) 78 ) 79 ), 80 81 EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1 82 : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0 83 : (RhsRowMajor && !CanVectorizeLhs), 84 85 Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit) 86 | (EvalToRowMajor ? RowMajorBit : 0) 87 | NestingFlags 88 | (LhsFlags & RhsFlags & AlignedBit) 89 // TODO enable vectorization for mixed types 90 | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0), 91 92 CoeffReadCost = InnerSize == Dynamic ? Dynamic 93 : InnerSize * (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost) 94 + (InnerSize - 1) * NumTraits<Scalar>::AddCost, 95 96 /* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside 97 * of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner 98 * loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect 99 * the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI. 100 */ 101 CanVectorizeInner = SameType 102 && LhsRowMajor 103 && (!RhsRowMajor) 104 && (LhsFlags & RhsFlags & ActualPacketAccessBit) 105 && (LhsFlags & RhsFlags & AlignedBit) 106 && (InnerSize % packet_traits<Scalar>::size == 0) 107 }; 108 }; 109 110 } // end namespace internal 111 112 template<typename LhsNested, typename RhsNested, int NestingFlags> 113 class CoeffBasedProduct 114 : internal::no_assignment_operator, 115 public MatrixBase<CoeffBasedProduct<LhsNested, RhsNested, NestingFlags> > 116 { 117 public: 118 119 typedef MatrixBase<CoeffBasedProduct> Base; 120 EIGEN_DENSE_PUBLIC_INTERFACE(CoeffBasedProduct) 121 typedef typename Base::PlainObject PlainObject; 122 123 private: 124 125 typedef typename internal::traits<CoeffBasedProduct>::_LhsNested _LhsNested; 126 typedef typename internal::traits<CoeffBasedProduct>::_RhsNested _RhsNested; 127 128 enum { 129 PacketSize = internal::packet_traits<Scalar>::size, 130 InnerSize = internal::traits<CoeffBasedProduct>::InnerSize, 131 Unroll = CoeffReadCost != Dynamic && CoeffReadCost <= EIGEN_UNROLLING_LIMIT, 132 CanVectorizeInner = internal::traits<CoeffBasedProduct>::CanVectorizeInner 133 }; 134 135 typedef internal::product_coeff_impl<CanVectorizeInner ? InnerVectorizedTraversal : DefaultTraversal, 136 Unroll ? InnerSize-1 : Dynamic, 137 _LhsNested, _RhsNested, Scalar> ScalarCoeffImpl; 138 139 typedef CoeffBasedProduct<LhsNested,RhsNested,NestByRefBit> LazyCoeffBasedProductType; 140 141 public: 142 143 inline CoeffBasedProduct(const CoeffBasedProduct& other) 144 : Base(), m_lhs(other.m_lhs), m_rhs(other.m_rhs) 145 {} 146 147 template<typename Lhs, typename Rhs> 148 inline CoeffBasedProduct(const Lhs& lhs, const Rhs& rhs) 149 : m_lhs(lhs), m_rhs(rhs) 150 { 151 // we don't allow taking products of matrices of different real types, as that wouldn't be vectorizable. 152 // We still allow to mix T and complex<T>. 153 EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::RealScalar, typename Rhs::RealScalar>::value), 154 YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) 155 eigen_assert(lhs.cols() == rhs.rows() 156 && "invalid matrix product" 157 && "if you wanted a coeff-wise or a dot product use the respective explicit functions"); 158 } 159 160 EIGEN_STRONG_INLINE Index rows() const { return m_lhs.rows(); } 161 EIGEN_STRONG_INLINE Index cols() const { return m_rhs.cols(); } 162 163 EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const 164 { 165 Scalar res; 166 ScalarCoeffImpl::run(row, col, m_lhs, m_rhs, res); 167 return res; 168 } 169 170 /* Allow index-based non-packet access. It is impossible though to allow index-based packed access, 171 * which is why we don't set the LinearAccessBit. 172 */ 173 EIGEN_STRONG_INLINE const Scalar coeff(Index index) const 174 { 175 Scalar res; 176 const Index row = RowsAtCompileTime == 1 ? 0 : index; 177 const Index col = RowsAtCompileTime == 1 ? index : 0; 178 ScalarCoeffImpl::run(row, col, m_lhs, m_rhs, res); 179 return res; 180 } 181 182 template<int LoadMode> 183 EIGEN_STRONG_INLINE const PacketScalar packet(Index row, Index col) const 184 { 185 PacketScalar res; 186 internal::product_packet_impl<Flags&RowMajorBit ? RowMajor : ColMajor, 187 Unroll ? InnerSize-1 : Dynamic, 188 _LhsNested, _RhsNested, PacketScalar, LoadMode> 189 ::run(row, col, m_lhs, m_rhs, res); 190 return res; 191 } 192 193 // Implicit conversion to the nested type (trigger the evaluation of the product) 194 EIGEN_STRONG_INLINE operator const PlainObject& () const 195 { 196 m_result.lazyAssign(*this); 197 return m_result; 198 } 199 200 const _LhsNested& lhs() const { return m_lhs; } 201 const _RhsNested& rhs() const { return m_rhs; } 202 203 const Diagonal<const LazyCoeffBasedProductType,0> diagonal() const 204 { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this); } 205 206 template<int DiagonalIndex> 207 const Diagonal<const LazyCoeffBasedProductType,DiagonalIndex> diagonal() const 208 { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this); } 209 210 const Diagonal<const LazyCoeffBasedProductType,Dynamic> diagonal(Index index) const 211 { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this).diagonal(index); } 212 213 protected: 214 typename internal::add_const_on_value_type<LhsNested>::type m_lhs; 215 typename internal::add_const_on_value_type<RhsNested>::type m_rhs; 216 217 mutable PlainObject m_result; 218 }; 219 220 namespace internal { 221 222 // here we need to overload the nested rule for products 223 // such that the nested type is a const reference to a plain matrix 224 template<typename Lhs, typename Rhs, int N, typename PlainObject> 225 struct nested<CoeffBasedProduct<Lhs,Rhs,EvalBeforeNestingBit|EvalBeforeAssigningBit>, N, PlainObject> 226 { 227 typedef PlainObject const& type; 228 }; 229 230 /*************************************************************************** 231 * Normal product .coeff() implementation (with meta-unrolling) 232 ***************************************************************************/ 233 234 /************************************** 235 *** Scalar path - no vectorization *** 236 **************************************/ 237 238 template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar> 239 struct product_coeff_impl<DefaultTraversal, UnrollingIndex, Lhs, Rhs, RetScalar> 240 { 241 typedef typename Lhs::Index Index; 242 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res) 243 { 244 product_coeff_impl<DefaultTraversal, UnrollingIndex-1, Lhs, Rhs, RetScalar>::run(row, col, lhs, rhs, res); 245 res += lhs.coeff(row, UnrollingIndex) * rhs.coeff(UnrollingIndex, col); 246 } 247 }; 248 249 template<typename Lhs, typename Rhs, typename RetScalar> 250 struct product_coeff_impl<DefaultTraversal, 0, Lhs, Rhs, RetScalar> 251 { 252 typedef typename Lhs::Index Index; 253 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res) 254 { 255 res = lhs.coeff(row, 0) * rhs.coeff(0, col); 256 } 257 }; 258 259 template<typename Lhs, typename Rhs, typename RetScalar> 260 struct product_coeff_impl<DefaultTraversal, Dynamic, Lhs, Rhs, RetScalar> 261 { 262 typedef typename Lhs::Index Index; 263 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar& res) 264 { 265 eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix"); 266 res = lhs.coeff(row, 0) * rhs.coeff(0, col); 267 for(Index i = 1; i < lhs.cols(); ++i) 268 res += lhs.coeff(row, i) * rhs.coeff(i, col); 269 } 270 }; 271 272 /******************************************* 273 *** Scalar path with inner vectorization *** 274 *******************************************/ 275 276 template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet> 277 struct product_coeff_vectorized_unroller 278 { 279 typedef typename Lhs::Index Index; 280 enum { PacketSize = packet_traits<typename Lhs::Scalar>::size }; 281 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres) 282 { 283 product_coeff_vectorized_unroller<UnrollingIndex-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres); 284 pres = padd(pres, pmul( lhs.template packet<Aligned>(row, UnrollingIndex) , rhs.template packet<Aligned>(UnrollingIndex, col) )); 285 } 286 }; 287 288 template<typename Lhs, typename Rhs, typename Packet> 289 struct product_coeff_vectorized_unroller<0, Lhs, Rhs, Packet> 290 { 291 typedef typename Lhs::Index Index; 292 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres) 293 { 294 pres = pmul(lhs.template packet<Aligned>(row, 0) , rhs.template packet<Aligned>(0, col)); 295 } 296 }; 297 298 template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar> 299 struct product_coeff_impl<InnerVectorizedTraversal, UnrollingIndex, Lhs, Rhs, RetScalar> 300 { 301 typedef typename Lhs::PacketScalar Packet; 302 typedef typename Lhs::Index Index; 303 enum { PacketSize = packet_traits<typename Lhs::Scalar>::size }; 304 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res) 305 { 306 Packet pres; 307 product_coeff_vectorized_unroller<UnrollingIndex+1-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres); 308 product_coeff_impl<DefaultTraversal,UnrollingIndex,Lhs,Rhs,RetScalar>::run(row, col, lhs, rhs, res); 309 res = predux(pres); 310 } 311 }; 312 313 template<typename Lhs, typename Rhs, int LhsRows = Lhs::RowsAtCompileTime, int RhsCols = Rhs::ColsAtCompileTime> 314 struct product_coeff_vectorized_dyn_selector 315 { 316 typedef typename Lhs::Index Index; 317 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) 318 { 319 res = lhs.row(row).transpose().cwiseProduct(rhs.col(col)).sum(); 320 } 321 }; 322 323 // NOTE the 3 following specializations are because taking .col(0) on a vector is a bit slower 324 // NOTE maybe they are now useless since we have a specialization for Block<Matrix> 325 template<typename Lhs, typename Rhs, int RhsCols> 326 struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,RhsCols> 327 { 328 typedef typename Lhs::Index Index; 329 static EIGEN_STRONG_INLINE void run(Index /*row*/, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) 330 { 331 res = lhs.transpose().cwiseProduct(rhs.col(col)).sum(); 332 } 333 }; 334 335 template<typename Lhs, typename Rhs, int LhsRows> 336 struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,LhsRows,1> 337 { 338 typedef typename Lhs::Index Index; 339 static EIGEN_STRONG_INLINE void run(Index row, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) 340 { 341 res = lhs.row(row).transpose().cwiseProduct(rhs).sum(); 342 } 343 }; 344 345 template<typename Lhs, typename Rhs> 346 struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,1> 347 { 348 typedef typename Lhs::Index Index; 349 static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) 350 { 351 res = lhs.transpose().cwiseProduct(rhs).sum(); 352 } 353 }; 354 355 template<typename Lhs, typename Rhs, typename RetScalar> 356 struct product_coeff_impl<InnerVectorizedTraversal, Dynamic, Lhs, Rhs, RetScalar> 357 { 358 typedef typename Lhs::Index Index; 359 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res) 360 { 361 product_coeff_vectorized_dyn_selector<Lhs,Rhs>::run(row, col, lhs, rhs, res); 362 } 363 }; 364 365 /******************* 366 *** Packet path *** 367 *******************/ 368 369 template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode> 370 struct product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode> 371 { 372 typedef typename Lhs::Index Index; 373 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res) 374 { 375 product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res); 376 res = pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex)), rhs.template packet<LoadMode>(UnrollingIndex, col), res); 377 } 378 }; 379 380 template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode> 381 struct product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode> 382 { 383 typedef typename Lhs::Index Index; 384 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res) 385 { 386 product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res); 387 res = pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex), pset1<Packet>(rhs.coeff(UnrollingIndex, col)), res); 388 } 389 }; 390 391 template<typename Lhs, typename Rhs, typename Packet, int LoadMode> 392 struct product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode> 393 { 394 typedef typename Lhs::Index Index; 395 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res) 396 { 397 res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col)); 398 } 399 }; 400 401 template<typename Lhs, typename Rhs, typename Packet, int LoadMode> 402 struct product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode> 403 { 404 typedef typename Lhs::Index Index; 405 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res) 406 { 407 res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col))); 408 } 409 }; 410 411 template<typename Lhs, typename Rhs, typename Packet, int LoadMode> 412 struct product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode> 413 { 414 typedef typename Lhs::Index Index; 415 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res) 416 { 417 eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix"); 418 res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col)); 419 for(Index i = 1; i < lhs.cols(); ++i) 420 res = pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode>(i, col), res); 421 } 422 }; 423 424 template<typename Lhs, typename Rhs, typename Packet, int LoadMode> 425 struct product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode> 426 { 427 typedef typename Lhs::Index Index; 428 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res) 429 { 430 eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix"); 431 res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col))); 432 for(Index i = 1; i < lhs.cols(); ++i) 433 res = pmadd(lhs.template packet<LoadMode>(row, i), pset1<Packet>(rhs.coeff(i, col)), res); 434 } 435 }; 436 437 } // end namespace internal 438 439 } // end namespace Eigen 440 441 #endif // EIGEN_COEFFBASED_PRODUCT_H 442