1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_KERNELS_MIRROR_PAD_OP_H_ 17 #define TENSORFLOW_KERNELS_MIRROR_PAD_OP_H_ 18 19 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" 20 #include "tensorflow/core/framework/tensor_types.h" 21 #include "tensorflow/core/platform/types.h" 22 23 namespace Eigen { 24 template <typename PaddingDimensions, typename XprType> 25 class TensorMirrorPadOp; 26 27 namespace internal { 28 template <typename PaddingDimensions, typename XprType> 29 struct traits<TensorMirrorPadOp<PaddingDimensions, XprType>> 30 : public traits<XprType> { 31 typedef typename XprType::Scalar Scalar; 32 typedef traits<XprType> XprTraits; 33 typedef typename XprTraits::StorageKind StorageKind; 34 typedef typename XprTraits::Index Index; 35 typedef typename XprType::Nested Nested; 36 typedef typename remove_reference<Nested>::type _Nested; 37 static constexpr int NumDimensions = XprTraits::NumDimensions; 38 static constexpr int Layout = XprTraits::Layout; 39 }; 40 41 template <typename PaddingDimensions, typename XprType> 42 struct eval<TensorMirrorPadOp<PaddingDimensions, XprType>, Eigen::Dense> { 43 typedef const TensorMirrorPadOp<PaddingDimensions, XprType>& type; 44 }; 45 46 template <typename PaddingDimensions, typename XprType> 47 struct nested< 48 TensorMirrorPadOp<PaddingDimensions, XprType>, 1, 49 typename eval<TensorMirrorPadOp<PaddingDimensions, XprType>>::type> { 50 typedef TensorMirrorPadOp<PaddingDimensions, XprType> type; 51 }; 52 } // namespace internal 53 54 template <typename PaddingDimensions, typename XprType> 55 class TensorMirrorPadOp 56 : public TensorBase<TensorMirrorPadOp<PaddingDimensions, XprType>, 57 ReadOnlyAccessors> { 58 public: 59 typedef typename Eigen::internal::traits<TensorMirrorPadOp>::Scalar Scalar; 60 typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; 61 typedef typename XprType::CoeffReturnType CoeffReturnType; 62 typedef typename Eigen::internal::nested<TensorMirrorPadOp>::type Nested; 63 typedef typename Eigen::internal::traits<TensorMirrorPadOp>::StorageKind 64 StorageKind; 65 typedef typename Eigen::internal::traits<TensorMirrorPadOp>::Index Index; 66 67 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorMirrorPadOp( 68 const XprType& expr, const PaddingDimensions& padding_dims, Index offset) 69 : xpr_(expr), padding_dims_(padding_dims), offset_(offset) {} 70 71 EIGEN_DEVICE_FUNC 72 const PaddingDimensions& padding() const { return padding_dims_; } 73 74 EIGEN_DEVICE_FUNC 75 Index offset() const { return offset_; } 76 77 EIGEN_DEVICE_FUNC 78 const typename internal::remove_all<typename XprType::Nested>::type& 79 expression() const { 80 return xpr_; 81 } 82 83 protected: 84 typename XprType::Nested xpr_; 85 const PaddingDimensions padding_dims_; 86 const Index offset_; 87 }; 88 89 // Eval as rvalue 90 template <typename PaddingDimensions, typename ArgType, typename Device> 91 struct TensorEvaluator<const TensorMirrorPadOp<PaddingDimensions, ArgType>, 92 Device> { 93 typedef TensorMirrorPadOp<PaddingDimensions, ArgType> XprType; 94 typedef typename XprType::Index Index; 95 static constexpr int Dims = internal::array_size<PaddingDimensions>::value; 96 typedef DSizes<Index, Dims> Dimensions; 97 typedef typename XprType::Scalar Scalar; 98 typedef typename XprType::CoeffReturnType CoeffReturnType; 99 // Copied from Eigen3 Github version 0e806c1. 100 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; 101 102 enum { 103 IsAligned = false, 104 PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, 105 BlockAccess = false, 106 Layout = TensorEvaluator<ArgType, Device>::Layout, 107 CoordAccess = true, 108 RawAccess = false 109 }; 110 111 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, 112 const Device& device) 113 : impl_(op.expression(), device), padding_(op.padding()) { 114 EIGEN_STATIC_ASSERT(Dims > 0, YOU_MADE_A_PROGRAMMING_MISTAKE) 115 116 // op.offset() == 0 if padding mode is symmetric. 117 // op.offset() == 1 if padding mode is reflect. 118 eigen_assert(op.offset() == 0 || op.offset() == 1); 119 left_offset_ = -1 + op.offset(); 120 right_offset_ = -1 - op.offset(); 121 122 // This should trigger compilation error if padding dimensions and 123 // expression dimensions do not match. 124 dimensions_ = impl_.dimensions(); 125 for (int dim = 0; dim < Dims; ++dim) { 126 eigen_assert(padding_[dim].first + op.offset() <= dimensions_[dim]); 127 eigen_assert(padding_[dim].second + op.offset() <= dimensions_[dim]); 128 dimensions_[dim] += padding_[dim].first + padding_[dim].second; 129 } 130 131 const auto& input_dims = impl_.dimensions(); 132 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { 133 input_strides_[0] = 1; 134 output_strides_[0] = 1; 135 for (int i = 0; i < Dims - 1; ++i) { 136 input_strides_[i + 1] = input_strides_[i] * input_dims[i]; 137 output_strides_[i + 1] = output_strides_[i] * dimensions_[i]; 138 } 139 } else { 140 input_strides_[numext::maxi(0, Dims - 1)] = 1; 141 output_strides_[numext::maxi(0, Dims - 1)] = 1; 142 for (int i = Dims - 1; i > 0; --i) { 143 input_strides_[i - 1] = input_strides_[i] * input_dims[i]; 144 output_strides_[i - 1] = output_strides_[i] * dimensions_[i]; 145 } 146 } 147 } 148 149 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { 150 return dimensions_; 151 } 152 153 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) { 154 impl_.evalSubExprsIfNeeded(nullptr); 155 return true; 156 } 157 158 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { impl_.cleanup(); } 159 160 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType 161 coeff(Index index) const { 162 eigen_assert(index < dimensions().TotalSize()); 163 const Index input_index = ToInputIndex(index); 164 return impl_.coeff(input_index); 165 } 166 167 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType 168 coeff(array<Index, Dims> coords) const { 169 for (int dim = 0; dim < Dims; ++dim) { 170 coords[dim] = ToInputCoord(coords[dim], dim); 171 } 172 ReadInputHelper<TensorEvaluator<ArgType, Device>::CoordAccess> helper; 173 return helper(coords, input_strides_, impl_); 174 } 175 176 template <int LoadMode> 177 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType 178 packet(Index index) const { 179 constexpr int kPacketSize = 180 internal::unpacket_traits<PacketReturnType>::size; 181 182 EIGEN_STATIC_ASSERT(kPacketSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) 183 eigen_assert(index + kPacketSize <= dimensions().TotalSize()); 184 185 // Find the effective inner-most dimension where padding actually happens. 186 // NOTE: This is independent of index argument, and can be done in the 187 // constructor to save computation. However, if packet access does not 188 // happen, then moving to constructor will incur needless overhead. 189 int dim = -1; 190 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { 191 for (int k = 0; k < Dims; ++k) { 192 if (padding_[k].first != 0 || padding_[k].second != 0) { 193 dim = k; 194 break; 195 } 196 } 197 } else { 198 for (int k = Dims - 1; k >= 0; --k) { 199 if (padding_[k].first != 0 || padding_[k].second != 0) { 200 dim = k; 201 break; 202 } 203 } 204 } 205 206 const Index input_index = ToInputIndex(index); 207 208 // If dim < 0, this means there is no padding at all. 209 if (dim < 0) { 210 return impl_.template packet<Unaligned>(input_index); 211 } 212 213 // Check if the way from the begin of the packet to the end of the packet 214 // is paved with contiguous road. That is, the indices must be between the 215 // padded region in the effective inner-most dimension. 216 const Index left = padding_[dim].first * output_strides_[dim]; 217 const Index right = 218 (dimensions_[dim] - padding_[dim].second) * output_strides_[dim]; 219 220 if (left <= index && (index + kPacketSize - 1) < right) { 221 return impl_.template packet<Unaligned>(input_index); 222 } 223 224 // If the road is not contiguous, then fall back to coeff(). 225 EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type 226 values[kPacketSize]; 227 values[0] = impl_.coeff(input_index); 228 for (int i = 1; i < kPacketSize; ++i) { 229 values[i] = coeff(index + i); 230 } 231 PacketReturnType result = internal::pload<PacketReturnType>(values); 232 return result; 233 } 234 235 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost 236 costPerCoeff(bool vectorized) const { 237 constexpr int kPacketSize = 238 internal::unpacket_traits<PacketReturnType>::size; 239 240 const double compute_cost = Dims * (7 * TensorOpCost::AddCost<Index>() + 241 2 * TensorOpCost::MulCost<Index>() + 242 TensorOpCost::DivCost<Index>()); 243 return impl_.costPerCoeff(vectorized) + 244 TensorOpCost(1, 0, compute_cost, vectorized, kPacketSize); 245 } 246 247 EIGEN_DEVICE_FUNC Scalar* data() const { return nullptr; } 248 249 protected: 250 using Coords = array<Index, Dims>; 251 252 // Full template specialization is not allowed within non-fully specialized 253 // template class. Adding a dummy parameter to make specializations partial. 254 template <bool CoordAccess, bool dummy = true> 255 struct ReadInputHelper; 256 257 template <bool dummy> 258 struct ReadInputHelper<false, dummy> { 259 template <typename Eval> 260 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index 261 operator()(const Coords& coord, const Coords& strides, const Eval& eval) { 262 Index index = 0; 263 for (int k = 0; k < Dims; ++k) { 264 index += coord[k] * strides[k]; 265 } 266 return eval.coeff(index); 267 } 268 }; 269 270 template <bool dummy> 271 struct ReadInputHelper<true, dummy> { 272 template <typename Eval> 273 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index 274 operator()(const Coords& coord, const Coords& strides, const Eval& eval) { 275 return eval.coeff(coord); 276 } 277 }; 278 279 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index ToInputCoord(Index k, 280 int dim) const { 281 const Index m = impl_.dimensions()[dim]; 282 k -= padding_[dim].first; 283 if (k < 0) { 284 return -k + left_offset_; 285 } 286 if (k < m) { 287 return k; 288 } 289 return m - (k - m) + right_offset_; 290 } 291 292 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index 293 ToInputIndex(const Coords& coords) const { 294 Index input_index = 0; 295 for (int dim = 0; dim < Dims; ++dim) { 296 input_index += ToInputCoord(coords[dim], dim) * input_strides_[dim]; 297 } 298 return input_index; 299 } 300 301 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index ToInputIndex(Index index) const { 302 Index input_index = 0; 303 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { 304 for (int dim = Dims - 1; dim > 0; --dim) { 305 const Index k = index / output_strides_[dim]; 306 index -= k * output_strides_[dim]; 307 input_index += ToInputCoord(k, dim) * input_strides_[dim]; 308 } 309 input_index += ToInputCoord(index, 0); 310 } else { 311 for (int dim = 0; dim < Dims - 1; ++dim) { 312 const Index k = index / output_strides_[dim]; 313 index -= k * output_strides_[dim]; 314 input_index += ToInputCoord(k, dim) * input_strides_[dim]; 315 } 316 input_index += ToInputCoord(index, Dims - 1); 317 } 318 319 return input_index; 320 } 321 322 TensorEvaluator<ArgType, Device> impl_; 323 PaddingDimensions padding_; 324 Dimensions dimensions_; 325 array<Index, Dims> input_strides_; 326 array<Index, Dims> output_strides_; 327 328 Index left_offset_; 329 Index right_offset_; 330 }; 331 } // namespace Eigen 332 333 namespace tensorflow { 334 namespace functor { 335 336 // offset argument must be either 0 or 1. This controls whether the boundary 337 // values are replicated (offset == 0) or not replicated (offset == 1). 338 template <typename Device, typename T, typename Tpaddings, int Dims> 339 struct MirrorPad { 340 void operator()(const Device& device, 341 typename TTypes<T, Dims, int32>::Tensor output, 342 typename TTypes<T, Dims, int32>::ConstTensor input, 343 typename TTypes<Tpaddings>::ConstMatrix padding, int offset) { 344 Eigen::array<Eigen::IndexPair<int32>, Dims> padding_dims; 345 346 for (int i = 0; i < Dims; ++i) { 347 padding_dims[i] = Eigen::IndexPair<int32>(padding(i, 0), padding(i, 1)); 348 } 349 350 output.device(device) = MirrorPadOp(input, padding_dims, offset); 351 } 352 353 template <typename PaddingDimensions, typename Derived> 354 static const Eigen::TensorMirrorPadOp<PaddingDimensions, const Derived> 355 MirrorPadOp( 356 const Eigen::TensorBase<Derived, Eigen::ReadOnlyAccessors>& tensor, 357 const PaddingDimensions& padding, int offset) { 358 return Eigen::TensorMirrorPadOp<PaddingDimensions, const Derived>( 359 static_cast<const Derived&>(tensor), padding, offset); 360 } 361 }; 362 363 // offset argument must be either 0 or 1. This controls whether the boundary 364 // values are replicated (offset == 0) or not replicated (offset == 1). 365 template <typename Device, typename T, typename Tpaddings, int Dims> 366 struct MirrorPadGrad { 367 void operator()(const Device& device, 368 typename TTypes<T, Dims, int32>::Tensor output, 369 typename TTypes<T, Dims, int32>::ConstTensor input, 370 typename TTypes<Tpaddings>::ConstMatrix paddings, int offset, 371 typename TTypes<T, Dims, int32>::Tensor scratch) { 372 // Copy the gradient input into the scratch buffer. 373 scratch.device(device) = input; 374 375 Eigen::array<int32, Dims> lhs_offsets; 376 Eigen::array<int32, Dims> rhs_offsets; 377 Eigen::array<int32, Dims> extents; 378 Eigen::array<bool, Dims> reverses; 379 380 for (int i = 0; i < Dims; ++i) { 381 lhs_offsets[i] = 0; 382 rhs_offsets[i] = 0; 383 extents[i] = scratch.dimension(i); 384 reverses[i] = false; 385 } 386 387 // At this point, the central part (non-padded area) does not include the 388 // gradients back-propagated through padded areas. Those gradient components 389 // need be added to the central part. 390 // 391 // Note that a gradient input element falls into a padded area iff in at 392 // least one dimension i, the coordinate x(i) is in the range (python-style) 393 // [:paddings(i,0)] or [-paddings(i,1):]. 394 395 for (int i = 0; i < Dims; ++i) { 396 reverses[i] = true; 397 398 // This handles the case when coordinate in dimension i is in the range 399 // [:paddings(i,0)]. This portion is added to the range 400 // [paddings(i,0) + offset:2 * paddings(i,0) + offset]. 401 if (paddings(i, 0) > 0) { 402 rhs_offsets[i] = 0; 403 lhs_offsets[i] = paddings(i, 0) + offset; 404 extents[i] = paddings(i, 0); 405 406 scratch.slice(lhs_offsets, extents).device(device) += 407 scratch.slice(rhs_offsets, extents).reverse(reverses); 408 } 409 410 // This handles the case when coordinate in dimension i is in the range 411 // [-paddings(i,1):]. This portion is added to the range 412 // [-2 * paddings(i,1) - offset:-paddings(i,1) - offset]. 413 if (paddings(i, 1) > 0) { 414 rhs_offsets[i] = scratch.dimension(i) - paddings(i, 1); 415 lhs_offsets[i] = rhs_offsets[i] - paddings(i, 1) - offset; 416 extents[i] = paddings(i, 1); 417 418 scratch.slice(lhs_offsets, extents).device(device) += 419 scratch.slice(rhs_offsets, extents).reverse(reverses); 420 } 421 422 reverses[i] = false; 423 lhs_offsets[i] = paddings(i, 0); 424 rhs_offsets[i] = paddings(i, 0); 425 extents[i] = output.dimension(i); 426 427 // At this point, scratch buffer contains gradient input as if paddings 428 // for dimension k = 0,...,i are zeros. Therefore after the loop 429 // termination, the central part of the scratch buffer contains the folded 430 // gradients. 431 } 432 433 // Copy the central part of the scratch buffer to the output. 434 output.device(device) = scratch.slice(rhs_offsets, extents); 435 } 436 }; 437 } // namespace functor 438 } // namespace tensorflow 439 440 #endif // TENSORFLOW_KERNELS_MIRROR_PAD_OP_H_ 441