Home | History | Annotate | Download | only in Tensor
      1 // This file is part of Eigen, a lightweight C++ template library
      2 // for linear algebra.
      3 //
      4 // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog (at) gmail.com>
      5 //
      6 // This Source Code Form is subject to the terms of the Mozilla
      7 // Public License v. 2.0. If a copy of the MPL was not distributed
      8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
      9 
     10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H
     11 #define EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H
     12 
     13 namespace Eigen {
     14 
     15 /** \class TensorReshaping
     16   * \ingroup CXX11_Tensor_Module
     17   *
     18   * \brief Tensor reshaping class.
     19   *
     20   *
     21   */
     22 namespace internal {
     23 template<typename NewDimensions, typename XprType>
     24 struct traits<TensorReshapingOp<NewDimensions, XprType> > : public traits<XprType>
     25 {
     26   typedef typename XprType::Scalar Scalar;
     27   typedef traits<XprType> XprTraits;
     28   typedef typename XprTraits::StorageKind StorageKind;
     29   typedef typename XprTraits::Index Index;
     30   typedef typename XprType::Nested Nested;
     31   typedef typename remove_reference<Nested>::type _Nested;
     32   static const int NumDimensions = array_size<NewDimensions>::value;
     33   static const int Layout = XprTraits::Layout;
     34 };
     35 
     36 template<typename NewDimensions, typename XprType>
     37 struct eval<TensorReshapingOp<NewDimensions, XprType>, Eigen::Dense>
     38 {
     39   typedef const TensorReshapingOp<NewDimensions, XprType>& type;
     40 };
     41 
     42 template<typename NewDimensions, typename XprType>
     43 struct nested<TensorReshapingOp<NewDimensions, XprType>, 1, typename eval<TensorReshapingOp<NewDimensions, XprType> >::type>
     44 {
     45   typedef TensorReshapingOp<NewDimensions, XprType> type;
     46 };
     47 
     48 }  // end namespace internal
     49 
     50 
     51 
     52 template<typename NewDimensions, typename XprType>
     53 class TensorReshapingOp : public TensorBase<TensorReshapingOp<NewDimensions, XprType>, WriteAccessors>
     54 {
     55   public:
     56   typedef typename Eigen::internal::traits<TensorReshapingOp>::Scalar Scalar;
     57   typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
     58   typedef typename Eigen::internal::nested<TensorReshapingOp>::type Nested;
     59   typedef typename Eigen::internal::traits<TensorReshapingOp>::StorageKind StorageKind;
     60   typedef typename Eigen::internal::traits<TensorReshapingOp>::Index Index;
     61 
     62   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorReshapingOp(const XprType& expr, const NewDimensions& dims)
     63       : m_xpr(expr), m_dims(dims) {}
     64 
     65     EIGEN_DEVICE_FUNC
     66     const NewDimensions& dimensions() const { return m_dims; }
     67 
     68     EIGEN_DEVICE_FUNC
     69     const typename internal::remove_all<typename XprType::Nested>::type&
     70     expression() const { return m_xpr; }
     71 
     72     EIGEN_DEVICE_FUNC
     73     EIGEN_STRONG_INLINE TensorReshapingOp& operator = (const TensorReshapingOp& other)
     74     {
     75       typedef TensorAssignOp<TensorReshapingOp, const TensorReshapingOp> Assign;
     76       Assign assign(*this, other);
     77       internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
     78       return *this;
     79     }
     80 
     81     template<typename OtherDerived>
     82     EIGEN_DEVICE_FUNC
     83     EIGEN_STRONG_INLINE TensorReshapingOp& operator = (const OtherDerived& other)
     84     {
     85       typedef TensorAssignOp<TensorReshapingOp, const OtherDerived> Assign;
     86       Assign assign(*this, other);
     87       internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
     88       return *this;
     89     }
     90 
     91   protected:
     92     typename XprType::Nested m_xpr;
     93     const NewDimensions m_dims;
     94 };
     95 
     96 
     97 // Eval as rvalue
     98 template<typename NewDimensions, typename ArgType, typename Device>
     99 struct TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device>
    100 {
    101   typedef TensorReshapingOp<NewDimensions, ArgType> XprType;
    102   typedef NewDimensions Dimensions;
    103 
    104   enum {
    105     IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
    106     PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
    107     Layout = TensorEvaluator<ArgType, Device>::Layout,
    108     CoordAccess = false,  // to be implemented
    109     RawAccess = TensorEvaluator<ArgType, Device>::RawAccess
    110   };
    111 
    112   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
    113       : m_impl(op.expression(), device), m_dimensions(op.dimensions())
    114   {
    115     // The total size of the reshaped tensor must be equal to the total size
    116     // of the input tensor.
    117     eigen_assert(internal::array_prod(m_impl.dimensions()) == internal::array_prod(op.dimensions()));
    118   }
    119 
    120   typedef typename XprType::Index Index;
    121   typedef typename XprType::Scalar Scalar;
    122   typedef typename XprType::CoeffReturnType CoeffReturnType;
    123   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
    124 
    125   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
    126 
    127   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) {
    128     return m_impl.evalSubExprsIfNeeded(data);
    129   }
    130   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
    131     m_impl.cleanup();
    132   }
    133 
    134   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
    135   {
    136     return m_impl.coeff(index);
    137   }
    138 
    139   template<int LoadMode>
    140   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
    141   {
    142     return m_impl.template packet<LoadMode>(index);
    143   }
    144 
    145   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
    146     return m_impl.costPerCoeff(vectorized);
    147   }
    148 
    149   EIGEN_DEVICE_FUNC Scalar* data() const { return const_cast<Scalar*>(m_impl.data()); }
    150 
    151   EIGEN_DEVICE_FUNC const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
    152 
    153  protected:
    154   TensorEvaluator<ArgType, Device> m_impl;
    155   NewDimensions m_dimensions;
    156 };
    157 
    158 
    159 // Eval as lvalue
    160 template<typename NewDimensions, typename ArgType, typename Device>
    161   struct TensorEvaluator<TensorReshapingOp<NewDimensions, ArgType>, Device>
    162   : public TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device>
    163 
    164 {
    165   typedef TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device> Base;
    166   typedef TensorReshapingOp<NewDimensions, ArgType> XprType;
    167   typedef NewDimensions Dimensions;
    168 
    169   enum {
    170     IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
    171     PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
    172     Layout = TensorEvaluator<ArgType, Device>::Layout,
    173     CoordAccess = false,  // to be implemented
    174     RawAccess = TensorEvaluator<ArgType, Device>::RawAccess
    175   };
    176 
    177   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
    178     : Base(op, device)
    179   { }
    180 
    181   typedef typename XprType::Index Index;
    182   typedef typename XprType::Scalar Scalar;
    183   typedef typename XprType::CoeffReturnType CoeffReturnType;
    184   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
    185 
    186   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index)
    187   {
    188     return this->m_impl.coeffRef(index);
    189   }
    190   template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
    191   void writePacket(Index index, const PacketReturnType& x)
    192   {
    193     this->m_impl.template writePacket<StoreMode>(index, x);
    194   }
    195 };
    196 
    197 
    198 /** \class TensorSlicing
    199   * \ingroup CXX11_Tensor_Module
    200   *
    201   * \brief Tensor slicing class.
    202   *
    203   *
    204   */
    205 namespace internal {
    206 template<typename StartIndices, typename Sizes, typename XprType>
    207 struct traits<TensorSlicingOp<StartIndices, Sizes, XprType> > : public traits<XprType>
    208 {
    209   typedef typename XprType::Scalar Scalar;
    210   typedef traits<XprType> XprTraits;
    211   typedef typename XprTraits::StorageKind StorageKind;
    212   typedef typename XprTraits::Index Index;
    213   typedef typename XprType::Nested Nested;
    214   typedef typename remove_reference<Nested>::type _Nested;
    215   static const int NumDimensions = array_size<StartIndices>::value;
    216   static const int Layout = XprTraits::Layout;
    217 };
    218 
    219 template<typename StartIndices, typename Sizes, typename XprType>
    220 struct eval<TensorSlicingOp<StartIndices, Sizes, XprType>, Eigen::Dense>
    221 {
    222   typedef const TensorSlicingOp<StartIndices, Sizes, XprType>& type;
    223 };
    224 
    225 template<typename StartIndices, typename Sizes, typename XprType>
    226 struct nested<TensorSlicingOp<StartIndices, Sizes, XprType>, 1, typename eval<TensorSlicingOp<StartIndices, Sizes, XprType> >::type>
    227 {
    228   typedef TensorSlicingOp<StartIndices, Sizes, XprType> type;
    229 };
    230 
    231 }  // end namespace internal
    232 
    233 
    234 
    235 template<typename StartIndices, typename Sizes, typename XprType>
    236 class TensorSlicingOp : public TensorBase<TensorSlicingOp<StartIndices, Sizes, XprType> >
    237 {
    238   public:
    239   typedef typename Eigen::internal::traits<TensorSlicingOp>::Scalar Scalar;
    240   typedef typename XprType::CoeffReturnType CoeffReturnType;
    241   typedef typename Eigen::internal::nested<TensorSlicingOp>::type Nested;
    242   typedef typename Eigen::internal::traits<TensorSlicingOp>::StorageKind StorageKind;
    243   typedef typename Eigen::internal::traits<TensorSlicingOp>::Index Index;
    244 
    245   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorSlicingOp(const XprType& expr, const StartIndices& indices, const Sizes& sizes)
    246       : m_xpr(expr), m_indices(indices), m_sizes(sizes) {}
    247 
    248     EIGEN_DEVICE_FUNC
    249     const StartIndices& startIndices() const { return m_indices; }
    250     EIGEN_DEVICE_FUNC
    251     const Sizes& sizes() const { return m_sizes; }
    252 
    253     EIGEN_DEVICE_FUNC
    254     const typename internal::remove_all<typename XprType::Nested>::type&
    255     expression() const { return m_xpr; }
    256 
    257     template<typename OtherDerived>
    258     EIGEN_DEVICE_FUNC
    259     EIGEN_STRONG_INLINE TensorSlicingOp& operator = (const OtherDerived& other)
    260     {
    261       typedef TensorAssignOp<TensorSlicingOp, const OtherDerived> Assign;
    262       Assign assign(*this, other);
    263       internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
    264       return *this;
    265     }
    266 
    267     EIGEN_DEVICE_FUNC
    268     EIGEN_STRONG_INLINE TensorSlicingOp& operator = (const TensorSlicingOp& other)
    269     {
    270       typedef TensorAssignOp<TensorSlicingOp, const TensorSlicingOp> Assign;
    271       Assign assign(*this, other);
    272       internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
    273       return *this;
    274     }
    275 
    276 
    277   protected:
    278     typename XprType::Nested m_xpr;
    279     const StartIndices m_indices;
    280     const Sizes m_sizes;
    281 };
    282 
    283 
    284 // Fixme: figure out the exact threshold
    285 namespace {
    286 template <typename Index, typename Device> struct MemcpyTriggerForSlicing {
    287   EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(const Device& device) : threshold_(2 * device.numThreads()) { }
    288   EIGEN_DEVICE_FUNC bool operator ()(Index val) const { return val > threshold_; }
    289 
    290  private:
    291   Index threshold_;
    292 };
    293 
    294 // It is very expensive to start the memcpy kernel on GPU: we therefore only
    295 // use it for large copies.
    296 #ifdef EIGEN_USE_GPU
    297 template <typename Index> struct MemcpyTriggerForSlicing<Index, GpuDevice>  {
    298   EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(const GpuDevice&) { }
    299   EIGEN_DEVICE_FUNC bool operator ()(Index val) const { return val > 4*1024*1024; }
    300 };
    301 #endif
    302 }
    303 
    304 // Eval as rvalue
    305 template<typename StartIndices, typename Sizes, typename ArgType, typename Device>
    306 struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
    307 {
    308   typedef TensorSlicingOp<StartIndices, Sizes, ArgType> XprType;
    309   static const int NumDims = internal::array_size<Sizes>::value;
    310 
    311   enum {
    312     // Alignment can't be guaranteed at compile time since it depends on the
    313     // slice offsets and sizes.
    314     IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/false,
    315     PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
    316     Layout = TensorEvaluator<ArgType, Device>::Layout,
    317     CoordAccess = false,
    318     RawAccess = false
    319   };
    320 
    321   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
    322       : m_impl(op.expression(), device), m_device(device), m_dimensions(op.sizes()), m_offsets(op.startIndices())
    323   {
    324     for (std::size_t i = 0; i < internal::array_size<Dimensions>::value; ++i) {
    325       eigen_assert(m_impl.dimensions()[i] >= op.sizes()[i] + op.startIndices()[i]);
    326     }
    327 
    328     const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
    329     const Sizes& output_dims = op.sizes();
    330     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
    331       m_inputStrides[0] = 1;
    332       for (int i = 1; i < NumDims; ++i) {
    333         m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1];
    334       }
    335 
    336      // Don't initialize m_fastOutputStrides[0] since it won't ever be accessed.
    337       m_outputStrides[0] = 1;
    338       for (int i = 1; i < NumDims; ++i) {
    339         m_outputStrides[i] = m_outputStrides[i-1] * output_dims[i-1];
    340         m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i]);
    341       }
    342     } else {
    343       m_inputStrides[NumDims-1] = 1;
    344       for (int i = NumDims - 2; i >= 0; --i) {
    345         m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1];
    346       }
    347 
    348      // Don't initialize m_fastOutputStrides[NumDims-1] since it won't ever be accessed.
    349       m_outputStrides[NumDims-1] = 1;
    350       for (int i = NumDims - 2; i >= 0; --i) {
    351         m_outputStrides[i] = m_outputStrides[i+1] * output_dims[i+1];
    352         m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i]);
    353       }
    354     }
    355   }
    356 
    357   typedef typename XprType::Index Index;
    358   typedef typename XprType::Scalar Scalar;
    359   typedef typename XprType::CoeffReturnType CoeffReturnType;
    360   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
    361   typedef Sizes Dimensions;
    362 
    363   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
    364 
    365 
    366   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) {
    367     m_impl.evalSubExprsIfNeeded(NULL);
    368     if (!NumTraits<typename internal::remove_const<Scalar>::type>::RequireInitialization && data && m_impl.data()) {
    369       Index contiguous_values = 1;
    370       if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
    371         for (int i = 0; i < NumDims; ++i) {
    372           contiguous_values *= dimensions()[i];
    373           if (dimensions()[i] != m_impl.dimensions()[i]) {
    374             break;
    375           }
    376         }
    377       } else {
    378         for (int i = NumDims-1; i >= 0; --i) {
    379           contiguous_values *= dimensions()[i];
    380           if (dimensions()[i] != m_impl.dimensions()[i]) {
    381             break;
    382           }
    383         }
    384       }
    385       // Use memcpy if it's going to be faster than using the regular evaluation.
    386       const MemcpyTriggerForSlicing<Index, Device> trigger(m_device);
    387       if (trigger(contiguous_values)) {
    388         Scalar* src = (Scalar*)m_impl.data();
    389         for (int i = 0; i < internal::array_prod(dimensions()); i += contiguous_values) {
    390           Index offset = srcCoeff(i);
    391           m_device.memcpy((void*)(data+i), src+offset, contiguous_values * sizeof(Scalar));
    392         }
    393         return false;
    394       }
    395     }
    396     return true;
    397   }
    398 
    399   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
    400     m_impl.cleanup();
    401   }
    402 
    403   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
    404   {
    405     return m_impl.coeff(srcCoeff(index));
    406   }
    407 
    408   template<int LoadMode>
    409   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
    410   {
    411     const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
    412     EIGEN_STATIC_ASSERT((packetSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
    413     eigen_assert(index+packetSize-1 < internal::array_prod(dimensions()));
    414 
    415     Index inputIndices[] = {0, 0};
    416     Index indices[] = {index, index + packetSize - 1};
    417     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
    418       for (int i = NumDims - 1; i > 0; --i) {
    419         const Index idx0 = indices[0] / m_fastOutputStrides[i];
    420         const Index idx1 = indices[1] / m_fastOutputStrides[i];
    421         inputIndices[0] += (idx0 + m_offsets[i]) * m_inputStrides[i];
    422         inputIndices[1] += (idx1 + m_offsets[i]) * m_inputStrides[i];
    423         indices[0] -= idx0 * m_outputStrides[i];
    424         indices[1] -= idx1 * m_outputStrides[i];
    425       }
    426       inputIndices[0] += (indices[0] + m_offsets[0]);
    427       inputIndices[1] += (indices[1] + m_offsets[0]);
    428     } else {
    429       for (int i = 0; i < NumDims - 1; ++i) {
    430         const Index idx0 = indices[0] / m_fastOutputStrides[i];
    431         const Index idx1 = indices[1] / m_fastOutputStrides[i];
    432         inputIndices[0] += (idx0 + m_offsets[i]) * m_inputStrides[i];
    433         inputIndices[1] += (idx1 + m_offsets[i]) * m_inputStrides[i];
    434         indices[0] -= idx0 * m_outputStrides[i];
    435         indices[1] -= idx1 * m_outputStrides[i];
    436       }
    437       inputIndices[0] += (indices[0] + m_offsets[NumDims-1]);
    438       inputIndices[1] += (indices[1] + m_offsets[NumDims-1]);
    439     }
    440     if (inputIndices[1] - inputIndices[0] == packetSize - 1) {
    441       PacketReturnType rslt = m_impl.template packet<Unaligned>(inputIndices[0]);
    442       return rslt;
    443     }
    444     else {
    445       EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[packetSize];
    446       values[0] = m_impl.coeff(inputIndices[0]);
    447       values[packetSize-1] = m_impl.coeff(inputIndices[1]);
    448       for (int i = 1; i < packetSize-1; ++i) {
    449         values[i] = coeff(index+i);
    450       }
    451       PacketReturnType rslt = internal::pload<PacketReturnType>(values);
    452       return rslt;
    453     }
    454   }
    455 
    456   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
    457     return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, NumDims);
    458   }
    459 
    460 
    461   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar* data() const {
    462     Scalar* result = m_impl.data();
    463     if (result) {
    464       Index offset = 0;
    465       if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
    466         for (int i = 0; i < NumDims; ++i) {
    467           if (m_dimensions[i] != m_impl.dimensions()[i]) {
    468             offset += m_offsets[i] * m_inputStrides[i];
    469             for (int j = i+1; j < NumDims; ++j) {
    470               if (m_dimensions[j] > 1) {
    471                 return NULL;
    472               }
    473               offset += m_offsets[j] * m_inputStrides[j];
    474             }
    475             break;
    476           }
    477         }
    478       } else {
    479         for (int i = NumDims - 1; i >= 0; --i) {
    480           if (m_dimensions[i] != m_impl.dimensions()[i]) {
    481             offset += m_offsets[i] * m_inputStrides[i];
    482             for (int j = i-1; j >= 0; --j) {
    483               if (m_dimensions[j] > 1) {
    484                 return NULL;
    485               }
    486               offset += m_offsets[j] * m_inputStrides[j];
    487             }
    488             break;
    489           }
    490         }
    491       }
    492       return result + offset;
    493     }
    494     return NULL;
    495   }
    496 
    497  protected:
    498   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const
    499   {
    500     Index inputIndex = 0;
    501     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
    502       for (int i = NumDims - 1; i > 0; --i) {
    503         const Index idx = index / m_fastOutputStrides[i];
    504         inputIndex += (idx + m_offsets[i]) * m_inputStrides[i];
    505         index -= idx * m_outputStrides[i];
    506       }
    507       inputIndex += (index + m_offsets[0]);
    508     } else {
    509       for (int i = 0; i < NumDims - 1; ++i) {
    510         const Index idx = index / m_fastOutputStrides[i];
    511         inputIndex += (idx + m_offsets[i]) * m_inputStrides[i];
    512         index -= idx * m_outputStrides[i];
    513       }
    514       inputIndex += (index + m_offsets[NumDims-1]);
    515     }
    516     return inputIndex;
    517   }
    518 
    519   array<Index, NumDims> m_outputStrides;
    520   array<internal::TensorIntDivisor<Index>, NumDims> m_fastOutputStrides;
    521   array<Index, NumDims> m_inputStrides;
    522   TensorEvaluator<ArgType, Device> m_impl;
    523   const Device& m_device;
    524   Dimensions m_dimensions;
    525   const StartIndices m_offsets;
    526 };
    527 
    528 
    529 // Eval as lvalue
    530 template<typename StartIndices, typename Sizes, typename ArgType, typename Device>
    531 struct TensorEvaluator<TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
    532   : public TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
    533 {
    534   typedef TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Device> Base;
    535   typedef TensorSlicingOp<StartIndices, Sizes, ArgType> XprType;
    536   static const int NumDims = internal::array_size<Sizes>::value;
    537 
    538   enum {
    539     IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/false,
    540     PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
    541     Layout = TensorEvaluator<ArgType, Device>::Layout,
    542     CoordAccess = false,
    543     RawAccess = false
    544   };
    545 
    546   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
    547     : Base(op, device)
    548     { }
    549 
    550   typedef typename XprType::Index Index;
    551   typedef typename XprType::Scalar Scalar;
    552   typedef typename XprType::CoeffReturnType CoeffReturnType;
    553   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
    554   typedef Sizes Dimensions;
    555 
    556   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index)
    557   {
    558     return this->m_impl.coeffRef(this->srcCoeff(index));
    559   }
    560 
    561   template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
    562   void writePacket(Index index, const PacketReturnType& x)
    563   {
    564     const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
    565     Index inputIndices[] = {0, 0};
    566     Index indices[] = {index, index + packetSize - 1};
    567     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
    568       for (int i = NumDims - 1; i > 0; --i) {
    569         const Index idx0 = indices[0] / this->m_fastOutputStrides[i];
    570         const Index idx1 = indices[1] / this->m_fastOutputStrides[i];
    571         inputIndices[0] += (idx0 + this->m_offsets[i]) * this->m_inputStrides[i];
    572         inputIndices[1] += (idx1 + this->m_offsets[i]) * this->m_inputStrides[i];
    573         indices[0] -= idx0 * this->m_outputStrides[i];
    574         indices[1] -= idx1 * this->m_outputStrides[i];
    575       }
    576       inputIndices[0] += (indices[0] + this->m_offsets[0]);
    577       inputIndices[1] += (indices[1] + this->m_offsets[0]);
    578     } else {
    579       for (int i = 0; i < NumDims - 1; ++i) {
    580         const Index idx0 = indices[0] / this->m_fastOutputStrides[i];
    581         const Index idx1 = indices[1] / this->m_fastOutputStrides[i];
    582         inputIndices[0] += (idx0 + this->m_offsets[i]) * this->m_inputStrides[i];
    583         inputIndices[1] += (idx1 + this->m_offsets[i]) * this->m_inputStrides[i];
    584         indices[0] -= idx0 * this->m_outputStrides[i];
    585         indices[1] -= idx1 * this->m_outputStrides[i];
    586       }
    587       inputIndices[0] += (indices[0] + this->m_offsets[NumDims-1]);
    588       inputIndices[1] += (indices[1] + this->m_offsets[NumDims-1]);
    589     }
    590     if (inputIndices[1] - inputIndices[0] == packetSize - 1) {
    591       this->m_impl.template writePacket<StoreMode>(inputIndices[0], x);
    592     }
    593     else {
    594       EIGEN_ALIGN_MAX CoeffReturnType values[packetSize];
    595       internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
    596       this->m_impl.coeffRef(inputIndices[0]) = values[0];
    597       this->m_impl.coeffRef(inputIndices[1]) = values[packetSize-1];
    598       for (int i = 1; i < packetSize-1; ++i) {
    599         this->coeffRef(index+i) = values[i];
    600       }
    601     }
    602   }
    603 };
    604 
    605 
    606 
    607 namespace internal {
    608 template<typename StartIndices, typename StopIndices, typename Strides, typename XprType>
    609 struct traits<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> > : public traits<XprType>
    610 {
    611   typedef typename XprType::Scalar Scalar;
    612   typedef traits<XprType> XprTraits;
    613   typedef typename XprTraits::StorageKind StorageKind;
    614   typedef typename XprTraits::Index Index;
    615   typedef typename XprType::Nested Nested;
    616   typedef typename remove_reference<Nested>::type _Nested;
    617   static const int NumDimensions = array_size<StartIndices>::value;
    618   static const int Layout = XprTraits::Layout;
    619 };
    620 
    621 template<typename StartIndices, typename StopIndices, typename Strides, typename XprType>
    622 struct eval<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>, Eigen::Dense>
    623 {
    624   typedef const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>& type;
    625 };
    626 
    627 template<typename StartIndices, typename StopIndices, typename Strides, typename XprType>
    628 struct nested<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>, 1, typename eval<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> >::type>
    629 {
    630   typedef TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> type;
    631 };
    632 
    633 }  // end namespace internal
    634 
    635 
    636 template<typename StartIndices, typename StopIndices, typename Strides, typename XprType>
    637 class TensorStridingSlicingOp : public TensorBase<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> >
    638 {
    639   public:
    640   typedef typename internal::traits<TensorStridingSlicingOp>::Scalar Scalar;
    641   typedef typename XprType::CoeffReturnType CoeffReturnType;
    642   typedef typename internal::nested<TensorStridingSlicingOp>::type Nested;
    643   typedef typename internal::traits<TensorStridingSlicingOp>::StorageKind StorageKind;
    644   typedef typename internal::traits<TensorStridingSlicingOp>::Index Index;
    645 
    646   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorStridingSlicingOp(
    647     const XprType& expr, const StartIndices& startIndices,
    648     const StopIndices& stopIndices, const Strides& strides)
    649       : m_xpr(expr), m_startIndices(startIndices), m_stopIndices(stopIndices),
    650         m_strides(strides) {}
    651 
    652     EIGEN_DEVICE_FUNC
    653     const StartIndices& startIndices() const { return m_startIndices; }
    654     EIGEN_DEVICE_FUNC
    655     const StartIndices& stopIndices() const { return m_stopIndices; }
    656     EIGEN_DEVICE_FUNC
    657     const StartIndices& strides() const { return m_strides; }
    658 
    659     EIGEN_DEVICE_FUNC
    660     const typename internal::remove_all<typename XprType::Nested>::type&
    661     expression() const { return m_xpr; }
    662 
    663     EIGEN_DEVICE_FUNC
    664     EIGEN_STRONG_INLINE TensorStridingSlicingOp& operator = (const TensorStridingSlicingOp& other)
    665     {
    666       typedef TensorAssignOp<TensorStridingSlicingOp, const TensorStridingSlicingOp> Assign;
    667       Assign assign(*this, other);
    668       internal::TensorExecutor<const Assign, DefaultDevice>::run(
    669           assign, DefaultDevice());
    670       return *this;
    671     }
    672 
    673     template<typename OtherDerived>
    674     EIGEN_DEVICE_FUNC
    675     EIGEN_STRONG_INLINE TensorStridingSlicingOp& operator = (const OtherDerived& other)
    676     {
    677       typedef TensorAssignOp<TensorStridingSlicingOp, const OtherDerived> Assign;
    678       Assign assign(*this, other);
    679       internal::TensorExecutor<const Assign, DefaultDevice>::run(
    680           assign, DefaultDevice());
    681       return *this;
    682     }
    683 
    684   protected:
    685     typename XprType::Nested m_xpr;
    686     const StartIndices m_startIndices;
    687     const StopIndices m_stopIndices;
    688     const Strides m_strides;
    689 };
    690 
    691 // Eval as rvalue
    692 template<typename StartIndices, typename StopIndices, typename Strides, typename ArgType, typename Device>
    693 struct TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device>
    694 {
    695   typedef TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType> XprType;
    696   static const int NumDims = internal::array_size<Strides>::value;
    697 
    698   enum {
    699     // Alignment can't be guaranteed at compile time since it depends on the
    700     // slice offsets and sizes.
    701     IsAligned = false,
    702     PacketAccess = false,
    703     BlockAccess = false,
    704     Layout = TensorEvaluator<ArgType, Device>::Layout,
    705     RawAccess = false
    706   };
    707 
    708   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
    709       : m_impl(op.expression(), device), m_device(device), m_strides(op.strides())
    710   {
    711     // Handle degenerate intervals by gracefully clamping and allowing m_dimensions to be zero
    712     DSizes<Index,NumDims> startIndicesClamped, stopIndicesClamped;
    713     for (size_t i = 0; i < internal::array_size<Dimensions>::value; ++i) {
    714       eigen_assert(m_strides[i] != 0 && "0 stride is invalid");
    715       if(m_strides[i]>0){
    716         startIndicesClamped[i] = clamp(op.startIndices()[i], 0, m_impl.dimensions()[i]);
    717         stopIndicesClamped[i] = clamp(op.stopIndices()[i], 0, m_impl.dimensions()[i]);
    718       }else{
    719         /* implies m_strides[i]<0 by assert */
    720         startIndicesClamped[i] = clamp(op.startIndices()[i], -1, m_impl.dimensions()[i] - 1);
    721         stopIndicesClamped[i] = clamp(op.stopIndices()[i], -1, m_impl.dimensions()[i] - 1);
    722       }
    723       m_startIndices[i] = startIndicesClamped[i];
    724     }
    725 
    726     const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
    727 
    728     // check for degenerate intervals and compute output tensor shape
    729     bool degenerate = false;;
    730     for(int i = 0; i < NumDims; i++){
    731       Index interval = stopIndicesClamped[i] - startIndicesClamped[i];
    732       if(interval == 0 || ((interval<0) != (m_strides[i]<0))){
    733         m_dimensions[i] = 0;
    734         degenerate = true;
    735       }else{
    736         m_dimensions[i] = interval / m_strides[i]
    737                           + (interval % m_strides[i] != 0 ? 1 : 0);
    738         eigen_assert(m_dimensions[i] >= 0);
    739       }
    740     }
    741     Strides output_dims = m_dimensions;
    742 
    743     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
    744       m_inputStrides[0] = m_strides[0];
    745       m_offsets[0] = startIndicesClamped[0];
    746       Index previousDimProduct = 1;
    747       for (int i = 1; i < NumDims; ++i) {
    748         previousDimProduct *= input_dims[i-1];
    749         m_inputStrides[i] = previousDimProduct * m_strides[i];
    750         m_offsets[i] = startIndicesClamped[i] * previousDimProduct;
    751       }
    752 
    753       // Don't initialize m_fastOutputStrides[0] since it won't ever be accessed.
    754       m_outputStrides[0] = 1;
    755       for (int i = 1; i < NumDims; ++i) {
    756         m_outputStrides[i] = m_outputStrides[i-1] * output_dims[i-1];
    757         // NOTE: if tensor is degenerate, we send 1 to prevent TensorIntDivisor constructor crash
    758         m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(degenerate ? 1 : m_outputStrides[i]);
    759       }
    760     } else {
    761       m_inputStrides[NumDims-1] = m_strides[NumDims-1];
    762       m_offsets[NumDims-1] = startIndicesClamped[NumDims-1];
    763       Index previousDimProduct = 1;
    764       for (int i = NumDims - 2; i >= 0; --i) {
    765         previousDimProduct *= input_dims[i+1];
    766         m_inputStrides[i] = previousDimProduct * m_strides[i];
    767         m_offsets[i] = startIndicesClamped[i] * previousDimProduct;
    768       }
    769 
    770       m_outputStrides[NumDims-1] = 1;
    771       for (int i = NumDims - 2; i >= 0; --i) {
    772         m_outputStrides[i] = m_outputStrides[i+1] * output_dims[i+1];
    773         // NOTE: if tensor is degenerate, we send 1 to prevent TensorIntDivisor constructor crash
    774         m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(degenerate ? 1 : m_outputStrides[i]);
    775       }
    776     }
    777     m_block_total_size_max = numext::maxi(static_cast<std::size_t>(1),
    778                                           device.lastLevelCacheSize() /
    779                                           sizeof(Scalar));
    780   }
    781 
    782   typedef typename XprType::Index Index;
    783   typedef typename XprType::Scalar Scalar;
    784   typedef typename internal::remove_const<Scalar>::type ScalarNonConst;
    785   typedef typename XprType::CoeffReturnType CoeffReturnType;
    786   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
    787   typedef Strides Dimensions;
    788 
    789   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
    790 
    791 
    792   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) {
    793     m_impl.evalSubExprsIfNeeded(NULL);
    794     return true;
    795   }
    796 
    797   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
    798     m_impl.cleanup();
    799   }
    800 
    801   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
    802   {
    803     return m_impl.coeff(srcCoeff(index));
    804   }
    805 
    806   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
    807     return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, NumDims);
    808   }
    809 
    810   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar* data() const {
    811     return NULL;
    812   }
    813 
    814  protected:
    815   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const
    816   {
    817     Index inputIndex = 0;
    818     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
    819       for (int i = NumDims - 1; i >= 0; --i) {
    820         const Index idx = index / m_fastOutputStrides[i];
    821         inputIndex += idx * m_inputStrides[i] + m_offsets[i];
    822         index -= idx * m_outputStrides[i];
    823       }
    824     } else {
    825       for (int i = 0; i < NumDims; ++i) {
    826         const Index idx = index / m_fastOutputStrides[i];
    827         inputIndex += idx * m_inputStrides[i] + m_offsets[i];
    828         index -= idx * m_outputStrides[i];
    829       }
    830     }
    831     return inputIndex;
    832   }
    833 
    834   static EIGEN_STRONG_INLINE Index clamp(Index value, Index min, Index max) {
    835     return numext::maxi(min, numext::mini(max,value));
    836   }
    837 
    838   array<Index, NumDims> m_outputStrides;
    839   array<internal::TensorIntDivisor<Index>, NumDims> m_fastOutputStrides;
    840   array<Index, NumDims> m_inputStrides;
    841   TensorEvaluator<ArgType, Device> m_impl;
    842   const Device& m_device;
    843   DSizes<Index, NumDims> m_startIndices; // clamped startIndices
    844   DSizes<Index, NumDims> m_dimensions;
    845   DSizes<Index, NumDims> m_offsets; // offset in a flattened shape
    846   const Strides m_strides;
    847   std::size_t m_block_total_size_max;
    848 };
    849 
    850 // Eval as lvalue
    851 template<typename StartIndices, typename StopIndices, typename Strides, typename ArgType, typename Device>
    852 struct TensorEvaluator<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device>
    853   : public TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device>
    854 {
    855   typedef TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device> Base;
    856   typedef TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType> XprType;
    857   static const int NumDims = internal::array_size<Strides>::value;
    858 
    859   enum {
    860     IsAligned = false,
    861     PacketAccess = false,
    862     BlockAccess = false,
    863     Layout = TensorEvaluator<ArgType, Device>::Layout,
    864     CoordAccess = TensorEvaluator<ArgType, Device>::CoordAccess,
    865     RawAccess = false
    866   };
    867 
    868   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
    869     : Base(op, device)
    870     { }
    871 
    872   typedef typename XprType::Index Index;
    873   typedef typename XprType::Scalar Scalar;
    874   typedef typename internal::remove_const<Scalar>::type ScalarNonConst;
    875   typedef typename XprType::CoeffReturnType CoeffReturnType;
    876   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
    877   typedef Strides Dimensions;
    878 
    879   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index)
    880   {
    881     return this->m_impl.coeffRef(this->srcCoeff(index));
    882   }
    883 };
    884 
    885 
    886 } // end namespace Eigen
    887 
    888 #endif // EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H
    889