Home | History | Annotate | Download | only in Tensor
      1 // This file is part of Eigen, a lightweight C++ template library
      2 // for linear algebra.
      3 //
      4 // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog (at) gmail.com>
      5 //
      6 // This Source Code Form is subject to the terms of the Mozilla
      7 // Public License v. 2.0. If a copy of the MPL was not distributed
      8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
      9 
     10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_STRIDING_H
     11 #define EIGEN_CXX11_TENSOR_TENSOR_STRIDING_H
     12 
     13 namespace Eigen {
     14 
     15 /** \class TensorStriding
     16   * \ingroup CXX11_Tensor_Module
     17   *
     18   * \brief Tensor striding class.
     19   *
     20   *
     21   */
     22 namespace internal {
     23 template<typename Strides, typename XprType>
     24 struct traits<TensorStridingOp<Strides, XprType> > : public traits<XprType>
     25 {
     26   typedef typename XprType::Scalar Scalar;
     27   typedef traits<XprType> XprTraits;
     28   typedef typename XprTraits::StorageKind StorageKind;
     29   typedef typename XprTraits::Index Index;
     30   typedef typename XprType::Nested Nested;
     31   typedef typename remove_reference<Nested>::type _Nested;
     32   static const int NumDimensions = XprTraits::NumDimensions;
     33   static const int Layout = XprTraits::Layout;
     34 };
     35 
     36 template<typename Strides, typename XprType>
     37 struct eval<TensorStridingOp<Strides, XprType>, Eigen::Dense>
     38 {
     39   typedef const TensorStridingOp<Strides, XprType>& type;
     40 };
     41 
     42 template<typename Strides, typename XprType>
     43 struct nested<TensorStridingOp<Strides, XprType>, 1, typename eval<TensorStridingOp<Strides, XprType> >::type>
     44 {
     45   typedef TensorStridingOp<Strides, XprType> type;
     46 };
     47 
     48 }  // end namespace internal
     49 
     50 
     51 
     52 template<typename Strides, typename XprType>
     53 class TensorStridingOp : public TensorBase<TensorStridingOp<Strides, XprType> >
     54 {
     55   public:
     56   typedef typename Eigen::internal::traits<TensorStridingOp>::Scalar Scalar;
     57   typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
     58   typedef typename XprType::CoeffReturnType CoeffReturnType;
     59   typedef typename Eigen::internal::nested<TensorStridingOp>::type Nested;
     60   typedef typename Eigen::internal::traits<TensorStridingOp>::StorageKind StorageKind;
     61   typedef typename Eigen::internal::traits<TensorStridingOp>::Index Index;
     62 
     63   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorStridingOp(const XprType& expr, const Strides& dims)
     64       : m_xpr(expr), m_dims(dims) {}
     65 
     66     EIGEN_DEVICE_FUNC
     67     const Strides& strides() const { return m_dims; }
     68 
     69     EIGEN_DEVICE_FUNC
     70     const typename internal::remove_all<typename XprType::Nested>::type&
     71     expression() const { return m_xpr; }
     72 
     73     EIGEN_DEVICE_FUNC
     74     EIGEN_STRONG_INLINE TensorStridingOp& operator = (const TensorStridingOp& other)
     75     {
     76       typedef TensorAssignOp<TensorStridingOp, const TensorStridingOp> Assign;
     77       Assign assign(*this, other);
     78       internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
     79       return *this;
     80     }
     81 
     82     template<typename OtherDerived>
     83     EIGEN_DEVICE_FUNC
     84     EIGEN_STRONG_INLINE TensorStridingOp& operator = (const OtherDerived& other)
     85     {
     86       typedef TensorAssignOp<TensorStridingOp, const OtherDerived> Assign;
     87       Assign assign(*this, other);
     88       internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
     89       return *this;
     90     }
     91 
     92   protected:
     93     typename XprType::Nested m_xpr;
     94     const Strides m_dims;
     95 };
     96 
     97 
     98 // Eval as rvalue
     99 template<typename Strides, typename ArgType, typename Device>
    100 struct TensorEvaluator<const TensorStridingOp<Strides, ArgType>, Device>
    101 {
    102   typedef TensorStridingOp<Strides, ArgType> XprType;
    103   typedef typename XprType::Index Index;
    104   static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
    105   typedef DSizes<Index, NumDims> Dimensions;
    106   typedef typename XprType::Scalar Scalar;
    107   typedef typename XprType::CoeffReturnType CoeffReturnType;
    108   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
    109   static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
    110 
    111   enum {
    112     IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/false,
    113     PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
    114     Layout = TensorEvaluator<ArgType, Device>::Layout,
    115     CoordAccess = false,  // to be implemented
    116     RawAccess = false
    117   };
    118 
    119   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
    120       : m_impl(op.expression(), device)
    121   {
    122     m_dimensions = m_impl.dimensions();
    123     for (int i = 0; i < NumDims; ++i) {
    124       m_dimensions[i] = ceilf(static_cast<float>(m_dimensions[i]) / op.strides()[i]);
    125     }
    126 
    127     const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
    128     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
    129       m_outputStrides[0] = 1;
    130       m_inputStrides[0] = 1;
    131       for (int i = 1; i < NumDims; ++i) {
    132         m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1];
    133         m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1];
    134         m_inputStrides[i-1] *= op.strides()[i-1];
    135       }
    136       m_inputStrides[NumDims-1] *= op.strides()[NumDims-1];
    137     } else {  // RowMajor
    138       m_outputStrides[NumDims-1] = 1;
    139       m_inputStrides[NumDims-1] = 1;
    140       for (int i = NumDims - 2; i >= 0; --i) {
    141         m_outputStrides[i] = m_outputStrides[i+1] * m_dimensions[i+1];
    142         m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1];
    143         m_inputStrides[i+1] *= op.strides()[i+1];
    144       }
    145       m_inputStrides[0] *= op.strides()[0];
    146     }
    147   }
    148 
    149   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
    150 
    151   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) {
    152     m_impl.evalSubExprsIfNeeded(NULL);
    153     return true;
    154   }
    155   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
    156     m_impl.cleanup();
    157   }
    158 
    159   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
    160   {
    161     return m_impl.coeff(srcCoeff(index));
    162   }
    163 
    164   template<int LoadMode>
    165   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
    166   {
    167     EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
    168     eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
    169 
    170     Index inputIndices[] = {0, 0};
    171     Index indices[] = {index, index + PacketSize - 1};
    172     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
    173       for (int i = NumDims - 1; i > 0; --i) {
    174         const Index idx0 = indices[0] / m_outputStrides[i];
    175         const Index idx1 = indices[1] / m_outputStrides[i];
    176         inputIndices[0] += idx0 * m_inputStrides[i];
    177         inputIndices[1] += idx1 * m_inputStrides[i];
    178         indices[0] -= idx0 * m_outputStrides[i];
    179         indices[1] -= idx1 * m_outputStrides[i];
    180       }
    181       inputIndices[0] += indices[0] * m_inputStrides[0];
    182       inputIndices[1] += indices[1] * m_inputStrides[0];
    183     } else {  // RowMajor
    184       for (int i = 0; i < NumDims - 1; ++i) {
    185         const Index idx0 = indices[0] / m_outputStrides[i];
    186         const Index idx1 = indices[1] / m_outputStrides[i];
    187         inputIndices[0] += idx0 * m_inputStrides[i];
    188         inputIndices[1] += idx1 * m_inputStrides[i];
    189         indices[0] -= idx0 * m_outputStrides[i];
    190         indices[1] -= idx1 * m_outputStrides[i];
    191       }
    192       inputIndices[0] += indices[0] * m_inputStrides[NumDims-1];
    193       inputIndices[1] += indices[1] * m_inputStrides[NumDims-1];
    194     }
    195     if (inputIndices[1] - inputIndices[0] == PacketSize - 1) {
    196       PacketReturnType rslt = m_impl.template packet<Unaligned>(inputIndices[0]);
    197       return rslt;
    198     }
    199     else {
    200       EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
    201       values[0] = m_impl.coeff(inputIndices[0]);
    202       values[PacketSize-1] = m_impl.coeff(inputIndices[1]);
    203       for (int i = 1; i < PacketSize-1; ++i) {
    204         values[i] = coeff(index+i);
    205       }
    206       PacketReturnType rslt = internal::pload<PacketReturnType>(values);
    207       return rslt;
    208     }
    209   }
    210 
    211   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
    212     double compute_cost = (NumDims - 1) * (TensorOpCost::AddCost<Index>() +
    213                                            TensorOpCost::MulCost<Index>() +
    214                                            TensorOpCost::DivCost<Index>()) +
    215         TensorOpCost::MulCost<Index>();
    216     if (vectorized) {
    217       compute_cost *= 2;  // packet() computes two indices
    218     }
    219     const int innerDim = (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? 0 : (NumDims - 1);
    220     return m_impl.costPerCoeff(vectorized && m_inputStrides[innerDim] == 1) +
    221         // Computation is not vectorized per se, but it is done once per packet.
    222         TensorOpCost(0, 0, compute_cost, vectorized, PacketSize);
    223   }
    224 
    225   EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; }
    226 
    227  protected:
    228   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const
    229   {
    230     Index inputIndex = 0;
    231     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
    232       for (int i = NumDims - 1; i > 0; --i) {
    233         const Index idx = index / m_outputStrides[i];
    234         inputIndex += idx * m_inputStrides[i];
    235         index -= idx * m_outputStrides[i];
    236       }
    237       inputIndex += index * m_inputStrides[0];
    238     } else {  // RowMajor
    239       for (int i = 0; i < NumDims - 1; ++i) {
    240         const Index idx = index / m_outputStrides[i];
    241         inputIndex += idx * m_inputStrides[i];
    242         index -= idx * m_outputStrides[i];
    243       }
    244       inputIndex += index * m_inputStrides[NumDims-1];
    245     }
    246     return inputIndex;
    247   }
    248 
    249   Dimensions m_dimensions;
    250   array<Index, NumDims> m_outputStrides;
    251   array<Index, NumDims> m_inputStrides;
    252   TensorEvaluator<ArgType, Device> m_impl;
    253 };
    254 
    255 
    256 // Eval as lvalue
    257 template<typename Strides, typename ArgType, typename Device>
    258 struct TensorEvaluator<TensorStridingOp<Strides, ArgType>, Device>
    259     : public TensorEvaluator<const TensorStridingOp<Strides, ArgType>, Device>
    260 {
    261   typedef TensorStridingOp<Strides, ArgType> XprType;
    262   typedef TensorEvaluator<const XprType, Device> Base;
    263   //  typedef typename XprType::Index Index;
    264   static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
    265   //  typedef DSizes<Index, NumDims> Dimensions;
    266 
    267   enum {
    268     IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/false,
    269     PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
    270     Layout = TensorEvaluator<ArgType, Device>::Layout,
    271     CoordAccess = false,  // to be implemented
    272     RawAccess = false
    273   };
    274 
    275   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
    276       : Base(op, device) { }
    277 
    278   typedef typename XprType::Index Index;
    279   typedef typename XprType::Scalar Scalar;
    280   typedef typename XprType::CoeffReturnType CoeffReturnType;
    281   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
    282   static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
    283 
    284   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index)
    285   {
    286     return this->m_impl.coeffRef(this->srcCoeff(index));
    287   }
    288 
    289   template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
    290   void writePacket(Index index, const PacketReturnType& x)
    291   {
    292     EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
    293     eigen_assert(index+PacketSize-1 < this->dimensions().TotalSize());
    294 
    295     Index inputIndices[] = {0, 0};
    296     Index indices[] = {index, index + PacketSize - 1};
    297     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
    298       for (int i = NumDims - 1; i > 0; --i) {
    299         const Index idx0 = indices[0] / this->m_outputStrides[i];
    300         const Index idx1 = indices[1] / this->m_outputStrides[i];
    301         inputIndices[0] += idx0 * this->m_inputStrides[i];
    302         inputIndices[1] += idx1 * this->m_inputStrides[i];
    303         indices[0] -= idx0 * this->m_outputStrides[i];
    304         indices[1] -= idx1 * this->m_outputStrides[i];
    305       }
    306       inputIndices[0] += indices[0] * this->m_inputStrides[0];
    307       inputIndices[1] += indices[1] * this->m_inputStrides[0];
    308     } else {  // RowMajor
    309       for (int i = 0; i < NumDims - 1; ++i) {
    310         const Index idx0 = indices[0] / this->m_outputStrides[i];
    311         const Index idx1 = indices[1] / this->m_outputStrides[i];
    312         inputIndices[0] += idx0 * this->m_inputStrides[i];
    313         inputIndices[1] += idx1 * this->m_inputStrides[i];
    314         indices[0] -= idx0 * this->m_outputStrides[i];
    315         indices[1] -= idx1 * this->m_outputStrides[i];
    316       }
    317       inputIndices[0] += indices[0] * this->m_inputStrides[NumDims-1];
    318       inputIndices[1] += indices[1] * this->m_inputStrides[NumDims-1];
    319     }
    320     if (inputIndices[1] - inputIndices[0] == PacketSize - 1) {
    321       this->m_impl.template writePacket<Unaligned>(inputIndices[0], x);
    322     }
    323     else {
    324       EIGEN_ALIGN_MAX Scalar values[PacketSize];
    325       internal::pstore<Scalar, PacketReturnType>(values, x);
    326       this->m_impl.coeffRef(inputIndices[0]) = values[0];
    327       this->m_impl.coeffRef(inputIndices[1]) = values[PacketSize-1];
    328       for (int i = 1; i < PacketSize-1; ++i) {
    329         this->coeffRef(index+i) = values[i];
    330       }
    331     }
    332   }
    333 };
    334 
    335 
    336 } // end namespace Eigen
    337 
    338 #endif // EIGEN_CXX11_TENSOR_TENSOR_STRIDING_H
    339