Home | History | Annotate | Download | only in Tensor
      1 // This file is part of Eigen, a lightweight C++ template library
      2 // for linear algebra.
      3 //
      4 // Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog (at) gmail.com>
      5 //
      6 // This Source Code Form is subject to the terms of the Mozilla
      7 // Public License v. 2.0. If a copy of the MPL was not distributed
      8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
      9 
     10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_CONVERSION_H
     11 #define EIGEN_CXX11_TENSOR_TENSOR_CONVERSION_H
     12 
     13 namespace Eigen {
     14 
     15 /** \class TensorConversionOp
     16   * \ingroup CXX11_Tensor_Module
     17   *
     18   * \brief Tensor conversion class. This class makes it possible to vectorize
     19   * type casting operations when the number of scalars per packet in the source
     20   * and the destination type differ
     21   */
     22 namespace internal {
     23 template<typename TargetType, typename XprType>
     24 struct traits<TensorConversionOp<TargetType, XprType> >
     25 {
     26   // Type promotion to handle the case where the types of the lhs and the rhs are different.
     27   typedef TargetType Scalar;
     28   typedef typename traits<XprType>::StorageKind StorageKind;
     29   typedef typename traits<XprType>::Index Index;
     30   typedef typename XprType::Nested Nested;
     31   typedef typename remove_reference<Nested>::type _Nested;
     32   static const int NumDimensions = traits<XprType>::NumDimensions;
     33   static const int Layout = traits<XprType>::Layout;
     34   enum { Flags = 0 };
     35 };
     36 
     37 template<typename TargetType, typename XprType>
     38 struct eval<TensorConversionOp<TargetType, XprType>, Eigen::Dense>
     39 {
     40   typedef const TensorConversionOp<TargetType, XprType>& type;
     41 };
     42 
     43 template<typename TargetType, typename XprType>
     44 struct nested<TensorConversionOp<TargetType, XprType>, 1, typename eval<TensorConversionOp<TargetType, XprType> >::type>
     45 {
     46   typedef TensorConversionOp<TargetType, XprType> type;
     47 };
     48 
     49 }  // end namespace internal
     50 
     51 
     52 template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket, int SrcCoeffRatio, int TgtCoeffRatio>
     53 struct PacketConverter {
     54   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     55   PacketConverter(const TensorEvaluator& impl)
     56       : m_impl(impl) {}
     57 
     58   template<int LoadMode, typename Index>
     59   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const {
     60     return internal::pcast<SrcPacket, TgtPacket>(m_impl.template packet<LoadMode>(index));
     61   }
     62 
     63  private:
     64   const TensorEvaluator& m_impl;
     65 };
     66 
     67 
     68 template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket>
     69 struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 2, 1> {
     70   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     71   PacketConverter(const TensorEvaluator& impl)
     72       : m_impl(impl) {}
     73 
     74   template<int LoadMode, typename Index>
     75   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const {
     76     const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size;
     77 
     78     SrcPacket src1 = m_impl.template packet<LoadMode>(index);
     79     SrcPacket src2 = m_impl.template packet<LoadMode>(index + SrcPacketSize);
     80     TgtPacket result = internal::pcast<SrcPacket, TgtPacket>(src1, src2);
     81     return result;
     82   }
     83 
     84  private:
     85   const TensorEvaluator& m_impl;
     86 };
     87 
     88 template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket>
     89 struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 4, 1> {
     90   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
     91   PacketConverter(const TensorEvaluator& impl)
     92       : m_impl(impl) {}
     93 
     94   template<int LoadMode, typename Index>
     95   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const {
     96     const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size;
     97 
     98     SrcPacket src1 = m_impl.template packet<LoadMode>(index);
     99     SrcPacket src2 = m_impl.template packet<LoadMode>(index + SrcPacketSize);
    100     SrcPacket src3 = m_impl.template packet<LoadMode>(index + 2 * SrcPacketSize);
    101     SrcPacket src4 = m_impl.template packet<LoadMode>(index + 3 * SrcPacketSize);
    102     TgtPacket result = internal::pcast<SrcPacket, TgtPacket>(src1, src2, src3, src4);
    103     return result;
    104   }
    105 
    106  private:
    107   const TensorEvaluator& m_impl;
    108 };
    109 
    110 template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket>
    111 struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 1, 2> {
    112   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
    113   PacketConverter(const TensorEvaluator& impl)
    114       : m_impl(impl), m_maxIndex(impl.dimensions().TotalSize()) {}
    115 
    116   template<int LoadMode, typename Index>
    117   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const {
    118     const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size;
    119     // Only call m_impl.packet() when we have direct access to the underlying data. This
    120     // ensures that we don't compute the subexpression twice. We may however load some
    121     // coefficients twice, but in practice this doesn't negatively impact performance.
    122     if (m_impl.data() && (index + SrcPacketSize < m_maxIndex)) {
    123       // Force unaligned memory loads since we can't ensure alignment anymore
    124       return internal::pcast<SrcPacket, TgtPacket>(m_impl.template packet<Unaligned>(index));
    125     } else {
    126       const int TgtPacketSize = internal::unpacket_traits<TgtPacket>::size;
    127       typedef typename internal::unpacket_traits<SrcPacket>::type SrcType;
    128       typedef typename internal::unpacket_traits<TgtPacket>::type TgtType;
    129       internal::scalar_cast_op<SrcType, TgtType> converter;
    130       EIGEN_ALIGN_MAX typename internal::unpacket_traits<TgtPacket>::type values[TgtPacketSize];
    131       for (int i = 0; i < TgtPacketSize; ++i) {
    132         values[i] = converter(m_impl.coeff(index+i));
    133       }
    134       TgtPacket rslt = internal::pload<TgtPacket>(values);
    135       return rslt;
    136     }
    137   }
    138 
    139  private:
    140   const TensorEvaluator& m_impl;
    141   const typename TensorEvaluator::Index m_maxIndex;
    142 };
    143 
    144 template<typename TargetType, typename XprType>
    145 class TensorConversionOp : public TensorBase<TensorConversionOp<TargetType, XprType>, ReadOnlyAccessors>
    146 {
    147   public:
    148     typedef typename internal::traits<TensorConversionOp>::Scalar Scalar;
    149     typedef typename internal::traits<TensorConversionOp>::StorageKind StorageKind;
    150     typedef typename internal::traits<TensorConversionOp>::Index Index;
    151     typedef typename internal::nested<TensorConversionOp>::type Nested;
    152     typedef Scalar CoeffReturnType;
    153     typedef typename NumTraits<Scalar>::Real RealScalar;
    154 
    155     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorConversionOp(const XprType& xpr)
    156         : m_xpr(xpr) {}
    157 
    158     EIGEN_DEVICE_FUNC
    159     const typename internal::remove_all<typename XprType::Nested>::type&
    160     expression() const { return m_xpr; }
    161 
    162   protected:
    163     typename XprType::Nested m_xpr;
    164 };
    165 
    166 template <bool SameType, typename Eval, typename Scalar> struct ConversionSubExprEval {
    167   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool run(Eval& impl, Scalar*) {
    168     impl.evalSubExprsIfNeeded(NULL);
    169     return true;
    170   }
    171 };
    172 
    173 template <typename Eval, typename Scalar> struct ConversionSubExprEval<true, Eval, Scalar> {
    174   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool run(Eval& impl, Scalar* data) {
    175     return impl.evalSubExprsIfNeeded(data);
    176   }
    177 };
    178 
    179 
    180 // Eval as rvalue
    181 template<typename TargetType, typename ArgType, typename Device>
    182 struct TensorEvaluator<const TensorConversionOp<TargetType, ArgType>, Device>
    183 {
    184   typedef TensorConversionOp<TargetType, ArgType> XprType;
    185   typedef typename XprType::Index Index;
    186   typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
    187   typedef TargetType Scalar;
    188   typedef TargetType CoeffReturnType;
    189   typedef typename internal::remove_all<typename internal::traits<ArgType>::Scalar>::type SrcType;
    190   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
    191   typedef typename PacketType<SrcType, Device>::type PacketSourceType;
    192   static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
    193 
    194   enum {
    195     IsAligned = false,
    196     PacketAccess = true,
    197     Layout = TensorEvaluator<ArgType, Device>::Layout,
    198     RawAccess = false
    199   };
    200 
    201   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
    202     : m_impl(op.expression(), device)
    203   {
    204   }
    205 
    206   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_impl.dimensions(); }
    207 
    208   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data)
    209   {
    210     return ConversionSubExprEval<internal::is_same<TargetType, SrcType>::value, TensorEvaluator<ArgType, Device>, Scalar>::run(m_impl, data);
    211   }
    212 
    213   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup()
    214   {
    215     m_impl.cleanup();
    216   }
    217 
    218   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
    219   {
    220     internal::scalar_cast_op<SrcType, TargetType> converter;
    221     return converter(m_impl.coeff(index));
    222   }
    223 
    224   template<int LoadMode>
    225   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
    226   {
    227     const bool Vectorizable = TensorEvaluator<ArgType, Device>::PacketAccess &
    228         internal::type_casting_traits<SrcType, TargetType>::VectorizedCast;
    229     return PacketConv<LoadMode, Vectorizable>::run(m_impl, index);
    230   }
    231 
    232   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
    233   costPerCoeff(bool vectorized) const {
    234     const double cast_cost = TensorOpCost::CastCost<SrcType, TargetType>();
    235     if (vectorized) {
    236       const double SrcCoeffRatio =
    237           internal::type_casting_traits<SrcType, TargetType>::SrcCoeffRatio;
    238       const double TgtCoeffRatio =
    239           internal::type_casting_traits<SrcType, TargetType>::TgtCoeffRatio;
    240       return m_impl.costPerCoeff(vectorized) * (SrcCoeffRatio / PacketSize) +
    241           TensorOpCost(0, 0, TgtCoeffRatio * (cast_cost / PacketSize));
    242     } else {
    243       return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, cast_cost);
    244     }
    245   }
    246 
    247   EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; }
    248 
    249   protected:
    250   template <int LoadMode, bool ActuallyVectorize>
    251   struct PacketConv {
    252     static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType run(const TensorEvaluator<ArgType, Device>& impl, Index index) {
    253       internal::scalar_cast_op<SrcType, TargetType> converter;
    254       EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
    255       for (int i = 0; i < PacketSize; ++i) {
    256         values[i] = converter(impl.coeff(index+i));
    257       }
    258       PacketReturnType rslt = internal::pload<PacketReturnType>(values);
    259       return rslt;
    260     }
    261   };
    262 
    263   template <int LoadMode>
    264   struct PacketConv<LoadMode, true> {
    265     static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType run(const TensorEvaluator<ArgType, Device>& impl, Index index) {
    266       const int SrcCoeffRatio = internal::type_casting_traits<SrcType, TargetType>::SrcCoeffRatio;
    267       const int TgtCoeffRatio = internal::type_casting_traits<SrcType, TargetType>::TgtCoeffRatio;
    268       PacketConverter<TensorEvaluator<ArgType, Device>, PacketSourceType, PacketReturnType,
    269                       SrcCoeffRatio, TgtCoeffRatio> converter(impl);
    270       return converter.template packet<LoadMode>(index);
    271     }
    272   };
    273 
    274   TensorEvaluator<ArgType, Device> m_impl;
    275 };
    276 
    277 } // end namespace Eigen
    278 
    279 #endif // EIGEN_CXX11_TENSOR_TENSOR_CONVERSION_H
    280