Home | History | Annotate | Download | only in kernels
      1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 #ifndef TENSORFLOW_KERNELS_MIRROR_PAD_OP_H_
     17 #define TENSORFLOW_KERNELS_MIRROR_PAD_OP_H_
     18 
     19 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
     20 #include "tensorflow/core/framework/tensor_types.h"
     21 #include "tensorflow/core/platform/types.h"
     22 
     23 namespace Eigen {
     24 template <typename PaddingDimensions, typename XprType>
     25 class TensorMirrorPadOp;
     26 
     27 namespace internal {
     28 template <typename PaddingDimensions, typename XprType>
     29 struct traits<TensorMirrorPadOp<PaddingDimensions, XprType>>
     30     : public traits<XprType> {
     31   typedef typename XprType::Scalar Scalar;
     32   typedef traits<XprType> XprTraits;
     33   typedef typename XprTraits::StorageKind StorageKind;
     34   typedef typename XprTraits::Index Index;
     35   typedef typename XprType::Nested Nested;
     36   typedef typename remove_reference<Nested>::type _Nested;
     37   static constexpr int NumDimensions = XprTraits::NumDimensions;
     38   static constexpr int Layout = XprTraits::Layout;
     39 };
     40 
     41 template <typename PaddingDimensions, typename XprType>
     42 struct eval<TensorMirrorPadOp<PaddingDimensions, XprType>, Eigen::Dense> {
     43   typedef const TensorMirrorPadOp<PaddingDimensions, XprType>& type;
     44 };
     45 
     46 template <typename PaddingDimensions, typename XprType>
     47 struct nested<
     48     TensorMirrorPadOp<PaddingDimensions, XprType>, 1,
     49     typename eval<TensorMirrorPadOp<PaddingDimensions, XprType>>::type> {
     50   typedef TensorMirrorPadOp<PaddingDimensions, XprType> type;
     51 };
     52 }  // namespace internal
     53 
     54 template <typename PaddingDimensions, typename XprType>
     55 class TensorMirrorPadOp
     56     : public TensorBase<TensorMirrorPadOp<PaddingDimensions, XprType>,
     57                         ReadOnlyAccessors> {
     58  public:
     59   typedef typename Eigen::internal::traits<TensorMirrorPadOp>::Scalar Scalar;
     60   typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
     61   typedef typename XprType::CoeffReturnType CoeffReturnType;
     62   typedef typename Eigen::internal::nested<TensorMirrorPadOp>::type Nested;
     63   typedef typename Eigen::internal::traits<TensorMirrorPadOp>::StorageKind
     64       StorageKind;
     65   typedef typename Eigen::internal::traits<TensorMirrorPadOp>::Index Index;
     66 
     67   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorMirrorPadOp(
     68       const XprType& expr, const PaddingDimensions& padding_dims, Index offset)
     69       : xpr_(expr), padding_dims_(padding_dims), offset_(offset) {}
     70 
     71   EIGEN_DEVICE_FUNC
     72   const PaddingDimensions& padding() const { return padding_dims_; }
     73 
     74   EIGEN_DEVICE_FUNC
     75   Index offset() const { return offset_; }
     76 
     77   EIGEN_DEVICE_FUNC
     78   const typename internal::remove_all<typename XprType::Nested>::type&
     79   expression() const {
     80     return xpr_;
     81   }
     82 
     83  protected:
     84   typename XprType::Nested xpr_;
     85   const PaddingDimensions padding_dims_;
     86   const Index offset_;
     87 };
     88 
     89 // Eval as rvalue
     90 template <typename PaddingDimensions, typename ArgType, typename Device>
     91 struct TensorEvaluator<const TensorMirrorPadOp<PaddingDimensions, ArgType>,
     92                        Device> {
     93   typedef TensorMirrorPadOp<PaddingDimensions, ArgType> XprType;
     94   typedef typename XprType::Index Index;
     95   static constexpr int Dims = internal::array_size<PaddingDimensions>::value;
     96   typedef DSizes<Index, Dims> Dimensions;
     97   typedef typename XprType::Scalar Scalar;
     98   typedef typename XprType::CoeffReturnType CoeffReturnType;
     99   // Copied from Eigen3 Github version 0e806c1.
    100   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
    101 
    102   enum {
    103     IsAligned = false,
    104     PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
    105     BlockAccess = false,
    106     Layout = TensorEvaluator<ArgType, Device>::Layout,
    107     CoordAccess = true,
    108     RawAccess = false
    109   };
    110 
    111   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op,
    112                                                         const Device& device)
    113       : impl_(op.expression(), device), padding_(op.padding()) {
    114     EIGEN_STATIC_ASSERT(Dims > 0, YOU_MADE_A_PROGRAMMING_MISTAKE)
    115 
    116     // op.offset() == 0 if padding mode is symmetric.
    117     // op.offset() == 1 if padding mode is reflect.
    118     eigen_assert(op.offset() == 0 || op.offset() == 1);
    119     left_offset_ = -1 + op.offset();
    120     right_offset_ = -1 - op.offset();
    121 
    122     // This should trigger compilation error if padding dimensions and
    123     // expression dimensions do not match.
    124     dimensions_ = impl_.dimensions();
    125     for (int dim = 0; dim < Dims; ++dim) {
    126       eigen_assert(padding_[dim].first + op.offset() <= dimensions_[dim]);
    127       eigen_assert(padding_[dim].second + op.offset() <= dimensions_[dim]);
    128       dimensions_[dim] += padding_[dim].first + padding_[dim].second;
    129     }
    130 
    131     const auto& input_dims = impl_.dimensions();
    132     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
    133       input_strides_[0] = 1;
    134       output_strides_[0] = 1;
    135       for (int i = 0; i < Dims - 1; ++i) {
    136         input_strides_[i + 1] = input_strides_[i] * input_dims[i];
    137         output_strides_[i + 1] = output_strides_[i] * dimensions_[i];
    138       }
    139     } else {
    140       input_strides_[numext::maxi(0, Dims - 1)] = 1;
    141       output_strides_[numext::maxi(0, Dims - 1)] = 1;
    142       for (int i = Dims - 1; i > 0; --i) {
    143         input_strides_[i - 1] = input_strides_[i] * input_dims[i];
    144         output_strides_[i - 1] = output_strides_[i] * dimensions_[i];
    145       }
    146     }
    147   }
    148 
    149   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const {
    150     return dimensions_;
    151   }
    152 
    153   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) {
    154     impl_.evalSubExprsIfNeeded(nullptr);
    155     return true;
    156   }
    157 
    158   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { impl_.cleanup(); }
    159 
    160   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType
    161   coeff(Index index) const {
    162     eigen_assert(index < dimensions().TotalSize());
    163     const Index input_index = ToInputIndex(index);
    164     return impl_.coeff(input_index);
    165   }
    166 
    167   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType
    168   coeff(array<Index, Dims> coords) const {
    169     for (int dim = 0; dim < Dims; ++dim) {
    170       coords[dim] = ToInputCoord(coords[dim], dim);
    171     }
    172     ReadInputHelper<TensorEvaluator<ArgType, Device>::CoordAccess> helper;
    173     return helper(coords, input_strides_, impl_);
    174   }
    175 
    176   template <int LoadMode>
    177   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType
    178   packet(Index index) const {
    179     constexpr int kPacketSize =
    180         internal::unpacket_traits<PacketReturnType>::size;
    181 
    182     EIGEN_STATIC_ASSERT(kPacketSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE)
    183     eigen_assert(index + kPacketSize <= dimensions().TotalSize());
    184 
    185     // Find the effective inner-most dimension where padding actually happens.
    186     // NOTE: This is independent of index argument, and can be done in the
    187     // constructor to save computation. However, if packet access does not
    188     // happen, then moving to constructor will incur needless overhead.
    189     int dim = -1;
    190     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
    191       for (int k = 0; k < Dims; ++k) {
    192         if (padding_[k].first != 0 || padding_[k].second != 0) {
    193           dim = k;
    194           break;
    195         }
    196       }
    197     } else {
    198       for (int k = Dims - 1; k >= 0; --k) {
    199         if (padding_[k].first != 0 || padding_[k].second != 0) {
    200           dim = k;
    201           break;
    202         }
    203       }
    204     }
    205 
    206     const Index input_index = ToInputIndex(index);
    207 
    208     // If dim < 0, this means there is no padding at all.
    209     if (dim < 0) {
    210       return impl_.template packet<Unaligned>(input_index);
    211     }
    212 
    213     // Check if the way from the begin of the packet to the end of the packet
    214     // is paved with contiguous road. That is, the indices must be between the
    215     // padded region in the effective inner-most dimension.
    216     const Index left = padding_[dim].first * output_strides_[dim];
    217     const Index right =
    218         (dimensions_[dim] - padding_[dim].second) * output_strides_[dim];
    219 
    220     if (left <= index && (index + kPacketSize - 1) < right) {
    221       return impl_.template packet<Unaligned>(input_index);
    222     }
    223 
    224     // If the road is not contiguous, then fall back to coeff().
    225     EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type
    226         values[kPacketSize];
    227     values[0] = impl_.coeff(input_index);
    228     for (int i = 1; i < kPacketSize; ++i) {
    229       values[i] = coeff(index + i);
    230     }
    231     PacketReturnType result = internal::pload<PacketReturnType>(values);
    232     return result;
    233   }
    234 
    235   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
    236   costPerCoeff(bool vectorized) const {
    237     constexpr int kPacketSize =
    238         internal::unpacket_traits<PacketReturnType>::size;
    239 
    240     const double compute_cost = Dims * (7 * TensorOpCost::AddCost<Index>() +
    241                                         2 * TensorOpCost::MulCost<Index>() +
    242                                         TensorOpCost::DivCost<Index>());
    243     return impl_.costPerCoeff(vectorized) +
    244            TensorOpCost(1, 0, compute_cost, vectorized, kPacketSize);
    245   }
    246 
    247   EIGEN_DEVICE_FUNC Scalar* data() const { return nullptr; }
    248 
    249  protected:
    250   using Coords = array<Index, Dims>;
    251 
    252   // Full template specialization is not allowed within non-fully specialized
    253   // template class. Adding a dummy parameter to make specializations partial.
    254   template <bool CoordAccess, bool dummy = true>
    255   struct ReadInputHelper;
    256 
    257   template <bool dummy>
    258   struct ReadInputHelper<false, dummy> {
    259     template <typename Eval>
    260     EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index
    261     operator()(const Coords& coord, const Coords& strides, const Eval& eval) {
    262       Index index = 0;
    263       for (int k = 0; k < Dims; ++k) {
    264         index += coord[k] * strides[k];
    265       }
    266       return eval.coeff(index);
    267     }
    268   };
    269 
    270   template <bool dummy>
    271   struct ReadInputHelper<true, dummy> {
    272     template <typename Eval>
    273     EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index
    274     operator()(const Coords& coord, const Coords& strides, const Eval& eval) {
    275       return eval.coeff(coord);
    276     }
    277   };
    278 
    279   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index ToInputCoord(Index k,
    280                                                            int dim) const {
    281     const Index m = impl_.dimensions()[dim];
    282     k -= padding_[dim].first;
    283     if (k < 0) {
    284       return -k + left_offset_;
    285     }
    286     if (k < m) {
    287       return k;
    288     }
    289     return m - (k - m) + right_offset_;
    290   }
    291 
    292   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index
    293   ToInputIndex(const Coords& coords) const {
    294     Index input_index = 0;
    295     for (int dim = 0; dim < Dims; ++dim) {
    296       input_index += ToInputCoord(coords[dim], dim) * input_strides_[dim];
    297     }
    298     return input_index;
    299   }
    300 
    301   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index ToInputIndex(Index index) const {
    302     Index input_index = 0;
    303     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
    304       for (int dim = Dims - 1; dim > 0; --dim) {
    305         const Index k = index / output_strides_[dim];
    306         index -= k * output_strides_[dim];
    307         input_index += ToInputCoord(k, dim) * input_strides_[dim];
    308       }
    309       input_index += ToInputCoord(index, 0);
    310     } else {
    311       for (int dim = 0; dim < Dims - 1; ++dim) {
    312         const Index k = index / output_strides_[dim];
    313         index -= k * output_strides_[dim];
    314         input_index += ToInputCoord(k, dim) * input_strides_[dim];
    315       }
    316       input_index += ToInputCoord(index, Dims - 1);
    317     }
    318 
    319     return input_index;
    320   }
    321 
    322   TensorEvaluator<ArgType, Device> impl_;
    323   PaddingDimensions padding_;
    324   Dimensions dimensions_;
    325   array<Index, Dims> input_strides_;
    326   array<Index, Dims> output_strides_;
    327 
    328   Index left_offset_;
    329   Index right_offset_;
    330 };
    331 }  // namespace Eigen
    332 
    333 namespace tensorflow {
    334 namespace functor {
    335 
    336 // offset argument must be either 0 or 1. This controls whether the boundary
    337 // values are replicated (offset == 0) or not replicated (offset == 1).
    338 template <typename Device, typename T, typename Tpaddings, int Dims>
    339 struct MirrorPad {
    340   void operator()(const Device& device,
    341                   typename TTypes<T, Dims, int32>::Tensor output,
    342                   typename TTypes<T, Dims, int32>::ConstTensor input,
    343                   typename TTypes<Tpaddings>::ConstMatrix padding, int offset) {
    344     Eigen::array<Eigen::IndexPair<int32>, Dims> padding_dims;
    345 
    346     for (int i = 0; i < Dims; ++i) {
    347       padding_dims[i] = Eigen::IndexPair<int32>(padding(i, 0), padding(i, 1));
    348     }
    349 
    350     output.device(device) = MirrorPadOp(input, padding_dims, offset);
    351   }
    352 
    353   template <typename PaddingDimensions, typename Derived>
    354   static const Eigen::TensorMirrorPadOp<PaddingDimensions, const Derived>
    355   MirrorPadOp(
    356       const Eigen::TensorBase<Derived, Eigen::ReadOnlyAccessors>& tensor,
    357       const PaddingDimensions& padding, int offset) {
    358     return Eigen::TensorMirrorPadOp<PaddingDimensions, const Derived>(
    359         static_cast<const Derived&>(tensor), padding, offset);
    360   }
    361 };
    362 
    363 // offset argument must be either 0 or 1. This controls whether the boundary
    364 // values are replicated (offset == 0) or not replicated (offset == 1).
    365 template <typename Device, typename T, typename Tpaddings, int Dims>
    366 struct MirrorPadGrad {
    367   void operator()(const Device& device,
    368                   typename TTypes<T, Dims, int32>::Tensor output,
    369                   typename TTypes<T, Dims, int32>::ConstTensor input,
    370                   typename TTypes<Tpaddings>::ConstMatrix paddings, int offset,
    371                   typename TTypes<T, Dims, int32>::Tensor scratch) {
    372     // Copy the gradient input into the scratch buffer.
    373     scratch.device(device) = input;
    374 
    375     Eigen::array<int32, Dims> lhs_offsets;
    376     Eigen::array<int32, Dims> rhs_offsets;
    377     Eigen::array<int32, Dims> extents;
    378     Eigen::array<bool, Dims> reverses;
    379 
    380     for (int i = 0; i < Dims; ++i) {
    381       lhs_offsets[i] = 0;
    382       rhs_offsets[i] = 0;
    383       extents[i] = scratch.dimension(i);
    384       reverses[i] = false;
    385     }
    386 
    387     // At this point, the central part (non-padded area) does not include the
    388     // gradients back-propagated through padded areas. Those gradient components
    389     // need be added to the central part.
    390     //
    391     // Note that a gradient input element falls into a padded area iff in at
    392     // least one dimension i, the coordinate x(i) is in the range (python-style)
    393     // [:paddings(i,0)] or [-paddings(i,1):].
    394 
    395     for (int i = 0; i < Dims; ++i) {
    396       reverses[i] = true;
    397 
    398       // This handles the case when coordinate in dimension i is in the range
    399       // [:paddings(i,0)]. This portion is added to the range
    400       // [paddings(i,0) + offset:2 * paddings(i,0) + offset].
    401       if (paddings(i, 0) > 0) {
    402         rhs_offsets[i] = 0;
    403         lhs_offsets[i] = paddings(i, 0) + offset;
    404         extents[i] = paddings(i, 0);
    405 
    406         scratch.slice(lhs_offsets, extents).device(device) +=
    407             scratch.slice(rhs_offsets, extents).reverse(reverses);
    408       }
    409 
    410       // This handles the case when coordinate in dimension i is in the range
    411       // [-paddings(i,1):]. This portion is added to the range
    412       // [-2 * paddings(i,1) - offset:-paddings(i,1) - offset].
    413       if (paddings(i, 1) > 0) {
    414         rhs_offsets[i] = scratch.dimension(i) - paddings(i, 1);
    415         lhs_offsets[i] = rhs_offsets[i] - paddings(i, 1) - offset;
    416         extents[i] = paddings(i, 1);
    417 
    418         scratch.slice(lhs_offsets, extents).device(device) +=
    419             scratch.slice(rhs_offsets, extents).reverse(reverses);
    420       }
    421 
    422       reverses[i] = false;
    423       lhs_offsets[i] = paddings(i, 0);
    424       rhs_offsets[i] = paddings(i, 0);
    425       extents[i] = output.dimension(i);
    426 
    427       // At this point, scratch buffer contains gradient input as if paddings
    428       // for dimension k = 0,...,i are zeros. Therefore after the loop
    429       // termination, the central part of the scratch buffer contains the folded
    430       // gradients.
    431     }
    432 
    433     // Copy the central part of the scratch buffer to the output.
    434     output.device(device) = scratch.slice(rhs_offsets, extents);
    435   }
    436 };
    437 }  // namespace functor
    438 }  // namespace tensorflow
    439 
    440 #endif  // TENSORFLOW_KERNELS_MIRROR_PAD_OP_H_
    441