1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 // Specialization of SpaceToBatchFunctor for a CPUDevice. 17 18 #define EIGEN_USE_THREADS 19 20 #include "tensorflow/core/kernels/spacetobatch_functor.h" 21 22 #include "tensorflow/core/framework/register_types.h" 23 24 namespace tensorflow { 25 26 typedef Eigen::ThreadPoolDevice CPUDevice; 27 28 namespace functor { 29 30 namespace { 31 32 // Implementation of nested loops for SpaceToBatchOpFunctor. 33 // 34 // To simplify template implementation given lack of constexpr if, both the 35 // input and output pointers are non-const. 36 template <int N, bool B2S> 37 struct SpaceToBatchHelper { 38 template <typename T> 39 static void run(T* space_tensor_ptr, const int64* space_tensor_shape, 40 const int64* space_tensor_strides, const int64* block_shape, 41 const int64* pad_start, const int64* block_offsets, 42 const int64* batch_tensor_shape, 43 const int64* batch_tensor_strides, T* batch_tensor_ptr) { 44 for (int64 batch_tensor_pos = 0; batch_tensor_pos < batch_tensor_shape[0]; 45 ++batch_tensor_pos) { 46 const int64 space_tensor_pos = 47 batch_tensor_pos * block_shape[0] + block_offsets[0] - pad_start[0]; 48 if (space_tensor_pos >= 0 && space_tensor_pos < space_tensor_shape[0]) { 49 SpaceToBatchHelper<N - 1, B2S>::run( 50 space_tensor_ptr + space_tensor_pos * space_tensor_strides[0], 51 space_tensor_shape + 1, space_tensor_strides + 1, block_shape + 1, 52 pad_start + 1, block_offsets + 1, batch_tensor_shape + 1, 53 batch_tensor_strides + 1, batch_tensor_ptr); 54 } else { 55 if (B2S == false) { 56 // Copy in padding. 57 for (int64 i = 0; i < batch_tensor_strides[0]; ++i) { 58 batch_tensor_ptr[i] = static_cast<T>(0); 59 } 60 } 61 } 62 batch_tensor_ptr += batch_tensor_strides[0]; 63 } 64 } 65 }; 66 67 template <bool B2S> 68 struct SpaceToBatchHelper<0, B2S> { 69 template <typename T> 70 static void run(T* space_tensor_ptr, const int64* space_tensor_shape, 71 const int64* space_tensor_strides, const int64* block_shape, 72 const int64* pad_start, const int64* block_offsets, 73 const int64* batch_tensor_shape, 74 const int64* batch_tensor_strides, T* batch_tensor_ptr) { 75 for (int64 i = 0; i < batch_tensor_strides[-1]; ++i) { 76 if (B2S == false) { 77 batch_tensor_ptr[i] = space_tensor_ptr[i]; 78 } else { 79 space_tensor_ptr[i] = batch_tensor_ptr[i]; 80 } 81 } 82 } 83 }; 84 85 } // namespace 86 87 template <typename T, int NUM_BLOCK_DIMS, bool B2S> 88 struct SpaceToBatchFunctor<CPUDevice, T, NUM_BLOCK_DIMS, B2S> { 89 using SpaceT = typename std::conditional<B2S, T, const T>::type; 90 using BatchT = typename std::conditional<B2S, const T, T>::type; 91 Status operator()( 92 const CPUDevice& d, 93 typename TTypes<SpaceT, NUM_BLOCK_DIMS + 2>::Tensor space_tensor, 94 const int64 block_shape_tensor[NUM_BLOCK_DIMS], 95 const int64 paddings_tensor[NUM_BLOCK_DIMS * 2], 96 typename TTypes<BatchT, NUM_BLOCK_DIMS + 2>::Tensor batch_tensor) { 97 const int64 batch_tensor_batch = batch_tensor.dimension(0); 98 99 const int64 space_tensor_batch = space_tensor.dimension(0); 100 101 // Copy into local array so that the compiler is free to place in a 102 // register. 103 int64 pad_start[NUM_BLOCK_DIMS]; 104 int64 block_shape[NUM_BLOCK_DIMS]; 105 int64 space_tensor_shape[NUM_BLOCK_DIMS], 106 batch_tensor_shape[NUM_BLOCK_DIMS]; 107 for (int block_dim = 0; block_dim < NUM_BLOCK_DIMS; ++block_dim) { 108 pad_start[block_dim] = paddings_tensor[block_dim * 2]; 109 block_shape[block_dim] = block_shape_tensor[block_dim]; 110 space_tensor_shape[block_dim] = space_tensor.dimension(block_dim + 1); 111 batch_tensor_shape[block_dim] = batch_tensor.dimension(block_dim + 1); 112 } 113 114 int64 space_tensor_strides[NUM_BLOCK_DIMS + 2], 115 batch_tensor_strides[NUM_BLOCK_DIMS + 2]; 116 space_tensor_strides[NUM_BLOCK_DIMS + 1] = 117 batch_tensor_strides[NUM_BLOCK_DIMS + 1] = 1; 118 for (int dim = NUM_BLOCK_DIMS; dim >= 0; --dim) { 119 space_tensor_strides[dim] = 120 space_tensor_strides[dim + 1] * space_tensor.dimension(dim + 1); 121 batch_tensor_strides[dim] = 122 batch_tensor_strides[dim + 1] * batch_tensor.dimension(dim + 1); 123 } 124 125 // Use non-const pointers for both input and output to simplify template 126 // implementation given lack of constexpr if. 127 T* space_tensor_ptr = const_cast<T*>(space_tensor.data()); 128 T* batch_tensor_ptr = const_cast<T*>(batch_tensor.data()); 129 130 for (int64 batch_tensor_b = 0; batch_tensor_b < batch_tensor_batch; 131 ++batch_tensor_b) { 132 const int64 space_tensor_b = batch_tensor_b % space_tensor_batch; 133 int64 block_index = batch_tensor_b / space_tensor_batch; 134 int64 block_offsets[NUM_BLOCK_DIMS]; 135 for (int block_dim = NUM_BLOCK_DIMS - 1; block_dim >= 0; --block_dim) { 136 // Skip unnecessary remainder operation for block_dim == 0. 137 block_offsets[block_dim] = 138 block_dim > 0 ? block_index % block_shape[block_dim] : block_index; 139 block_index /= block_shape[block_dim]; 140 } 141 142 // The compiler should inline the nested loops generated by this template. 143 SpaceToBatchHelper<NUM_BLOCK_DIMS, B2S>::run( 144 space_tensor_ptr + space_tensor_b * space_tensor_strides[0], 145 space_tensor_shape, &space_tensor_strides[1], block_shape, pad_start, 146 block_offsets, batch_tensor_shape, &batch_tensor_strides[1], 147 batch_tensor_ptr + batch_tensor_b * batch_tensor_strides[0]); 148 } 149 return Status::OK(); 150 } 151 }; 152 153 // Instantiate. 154 #define INSTANTIATE(NUM_BLOCK_DIMS, T) \ 155 template struct SpaceToBatchFunctor<CPUDevice, T, NUM_BLOCK_DIMS, false>; \ 156 template struct SpaceToBatchFunctor<CPUDevice, T, NUM_BLOCK_DIMS, true>; \ 157 /**/ 158 159 #define INSTANTIATE_FOR_T(T) \ 160 TF_SPACETOBATCH_FOR_EACH_NUM_BLOCK_DIMS(INSTANTIATE, T) 161 162 TF_CALL_REAL_NUMBER_TYPES(INSTANTIATE_FOR_T) 163 164 #undef INSTANTIATE_FOR_T 165 #undef INSTANTIATE 166 167 } // namespace functor 168 } // end namespace tensorflow 169