Home | History | Annotate | Download | only in kernels
      1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 // Specialization of SpaceToBatchFunctor for a CPUDevice.
     17 
     18 #define EIGEN_USE_THREADS
     19 
     20 #include "tensorflow/core/kernels/spacetobatch_functor.h"
     21 
     22 #include "tensorflow/core/framework/register_types.h"
     23 
     24 namespace tensorflow {
     25 
     26 typedef Eigen::ThreadPoolDevice CPUDevice;
     27 
     28 namespace functor {
     29 
     30 namespace {
     31 
     32 // Implementation of nested loops for SpaceToBatchOpFunctor.
     33 //
     34 // To simplify template implementation given lack of constexpr if, both the
     35 // input and output pointers are non-const.
     36 template <int N, bool B2S>
     37 struct SpaceToBatchHelper {
     38   template <typename T>
     39   static void run(T* space_tensor_ptr, const int64* space_tensor_shape,
     40                   const int64* space_tensor_strides, const int64* block_shape,
     41                   const int64* pad_start, const int64* block_offsets,
     42                   const int64* batch_tensor_shape,
     43                   const int64* batch_tensor_strides, T* batch_tensor_ptr) {
     44     for (int64 batch_tensor_pos = 0; batch_tensor_pos < batch_tensor_shape[0];
     45          ++batch_tensor_pos) {
     46       const int64 space_tensor_pos =
     47           batch_tensor_pos * block_shape[0] + block_offsets[0] - pad_start[0];
     48       if (space_tensor_pos >= 0 && space_tensor_pos < space_tensor_shape[0]) {
     49         SpaceToBatchHelper<N - 1, B2S>::run(
     50             space_tensor_ptr + space_tensor_pos * space_tensor_strides[0],
     51             space_tensor_shape + 1, space_tensor_strides + 1, block_shape + 1,
     52             pad_start + 1, block_offsets + 1, batch_tensor_shape + 1,
     53             batch_tensor_strides + 1, batch_tensor_ptr);
     54       } else {
     55         if (B2S == false) {
     56           // Copy in padding.
     57           for (int64 i = 0; i < batch_tensor_strides[0]; ++i) {
     58             batch_tensor_ptr[i] = static_cast<T>(0);
     59           }
     60         }
     61       }
     62       batch_tensor_ptr += batch_tensor_strides[0];
     63     }
     64   }
     65 };
     66 
     67 template <bool B2S>
     68 struct SpaceToBatchHelper<0, B2S> {
     69   template <typename T>
     70   static void run(T* space_tensor_ptr, const int64* space_tensor_shape,
     71                   const int64* space_tensor_strides, const int64* block_shape,
     72                   const int64* pad_start, const int64* block_offsets,
     73                   const int64* batch_tensor_shape,
     74                   const int64* batch_tensor_strides, T* batch_tensor_ptr) {
     75     for (int64 i = 0; i < batch_tensor_strides[-1]; ++i) {
     76       if (B2S == false) {
     77         batch_tensor_ptr[i] = space_tensor_ptr[i];
     78       } else {
     79         space_tensor_ptr[i] = batch_tensor_ptr[i];
     80       }
     81     }
     82   }
     83 };
     84 
     85 }  // namespace
     86 
     87 template <typename T, int NUM_BLOCK_DIMS, bool B2S>
     88 struct SpaceToBatchFunctor<CPUDevice, T, NUM_BLOCK_DIMS, B2S> {
     89   using SpaceT = typename std::conditional<B2S, T, const T>::type;
     90   using BatchT = typename std::conditional<B2S, const T, T>::type;
     91   Status operator()(
     92       const CPUDevice& d,
     93       typename TTypes<SpaceT, NUM_BLOCK_DIMS + 2>::Tensor space_tensor,
     94       const int64 block_shape_tensor[NUM_BLOCK_DIMS],
     95       const int64 paddings_tensor[NUM_BLOCK_DIMS * 2],
     96       typename TTypes<BatchT, NUM_BLOCK_DIMS + 2>::Tensor batch_tensor) {
     97     const int64 batch_tensor_batch = batch_tensor.dimension(0);
     98 
     99     const int64 space_tensor_batch = space_tensor.dimension(0);
    100 
    101     // Copy into local array so that the compiler is free to place in a
    102     // register.
    103     int64 pad_start[NUM_BLOCK_DIMS];
    104     int64 block_shape[NUM_BLOCK_DIMS];
    105     int64 space_tensor_shape[NUM_BLOCK_DIMS],
    106         batch_tensor_shape[NUM_BLOCK_DIMS];
    107     for (int block_dim = 0; block_dim < NUM_BLOCK_DIMS; ++block_dim) {
    108       pad_start[block_dim] = paddings_tensor[block_dim * 2];
    109       block_shape[block_dim] = block_shape_tensor[block_dim];
    110       space_tensor_shape[block_dim] = space_tensor.dimension(block_dim + 1);
    111       batch_tensor_shape[block_dim] = batch_tensor.dimension(block_dim + 1);
    112     }
    113 
    114     int64 space_tensor_strides[NUM_BLOCK_DIMS + 2],
    115         batch_tensor_strides[NUM_BLOCK_DIMS + 2];
    116     space_tensor_strides[NUM_BLOCK_DIMS + 1] =
    117         batch_tensor_strides[NUM_BLOCK_DIMS + 1] = 1;
    118     for (int dim = NUM_BLOCK_DIMS; dim >= 0; --dim) {
    119       space_tensor_strides[dim] =
    120           space_tensor_strides[dim + 1] * space_tensor.dimension(dim + 1);
    121       batch_tensor_strides[dim] =
    122           batch_tensor_strides[dim + 1] * batch_tensor.dimension(dim + 1);
    123     }
    124 
    125     // Use non-const pointers for both input and output to simplify template
    126     // implementation given lack of constexpr if.
    127     T* space_tensor_ptr = const_cast<T*>(space_tensor.data());
    128     T* batch_tensor_ptr = const_cast<T*>(batch_tensor.data());
    129 
    130     for (int64 batch_tensor_b = 0; batch_tensor_b < batch_tensor_batch;
    131          ++batch_tensor_b) {
    132       const int64 space_tensor_b = batch_tensor_b % space_tensor_batch;
    133       int64 block_index = batch_tensor_b / space_tensor_batch;
    134       int64 block_offsets[NUM_BLOCK_DIMS];
    135       for (int block_dim = NUM_BLOCK_DIMS - 1; block_dim >= 0; --block_dim) {
    136         // Skip unnecessary remainder operation for block_dim == 0.
    137         block_offsets[block_dim] =
    138             block_dim > 0 ? block_index % block_shape[block_dim] : block_index;
    139         block_index /= block_shape[block_dim];
    140       }
    141 
    142       // The compiler should inline the nested loops generated by this template.
    143       SpaceToBatchHelper<NUM_BLOCK_DIMS, B2S>::run(
    144           space_tensor_ptr + space_tensor_b * space_tensor_strides[0],
    145           space_tensor_shape, &space_tensor_strides[1], block_shape, pad_start,
    146           block_offsets, batch_tensor_shape, &batch_tensor_strides[1],
    147           batch_tensor_ptr + batch_tensor_b * batch_tensor_strides[0]);
    148     }
    149     return Status::OK();
    150   }
    151 };
    152 
    153 // Instantiate.
    154 #define INSTANTIATE(NUM_BLOCK_DIMS, T)                                      \
    155   template struct SpaceToBatchFunctor<CPUDevice, T, NUM_BLOCK_DIMS, false>; \
    156   template struct SpaceToBatchFunctor<CPUDevice, T, NUM_BLOCK_DIMS, true>;  \
    157   /**/
    158 
    159 #define INSTANTIATE_FOR_T(T) \
    160   TF_SPACETOBATCH_FOR_EACH_NUM_BLOCK_DIMS(INSTANTIATE, T)
    161 
    162 TF_CALL_REAL_NUMBER_TYPES(INSTANTIATE_FOR_T)
    163 
    164 #undef INSTANTIATE_FOR_T
    165 #undef INSTANTIATE
    166 
    167 }  // namespace functor
    168 }  // end namespace tensorflow
    169