1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h" 17 #include "tensorflow/core/framework/fake_input.h" 18 #include "tensorflow/core/framework/node_def_builder.h" 19 #include "tensorflow/core/framework/tensor.h" 20 #include "tensorflow/core/kernels/ops_testutil.h" 21 #include "tensorflow/core/lib/strings/str_util.h" 22 #include "tensorflow/core/platform/test.h" 23 #include "tensorflow/core/platform/test_benchmark.h" 24 25 namespace tensorflow { 26 27 class ResizeBicubicOpTest : public OpsTestBase { 28 protected: 29 ResizeBicubicOpTest() { 30 TF_EXPECT_OK(NodeDefBuilder("resize_bicubic_op", "ResizeBicubic") 31 .Input(FakeInput(DT_FLOAT)) 32 .Input(FakeInput(DT_INT32)) 33 .Attr("align_corners", false) 34 .Finalize(node_def())); 35 TF_EXPECT_OK(InitOp()); 36 } 37 38 const Tensor* SetRandomImageInput(const TensorShape& shape) { 39 inputs_.clear(); 40 41 CHECK_EQ(shape.dims(), 4) << "All images must have 4 dimensions."; 42 bool is_ref = IsRefType(input_types_[inputs_.size()]); 43 Tensor* input = new Tensor(device_->GetAllocator(AllocatorAttributes()), 44 DataTypeToEnum<float>::v(), shape); 45 input->flat<float>().setRandom(); 46 tensors_.push_back(input); 47 if (is_ref) { 48 CHECK_EQ(RemoveRefType(input_types_[inputs_.size()]), 49 DataTypeToEnum<float>::v()); 50 inputs_.push_back({&lock_for_refs_, input}); 51 } else { 52 CHECK_EQ(input_types_[inputs_.size()], DataTypeToEnum<float>::v()); 53 inputs_.push_back({nullptr, input}); 54 } 55 return input; 56 } 57 58 private: 59 static const int64 kTableSize = (1 << 10); 60 61 const float* InitCoeffsTable() { 62 // Allocate and initialize coefficients table using Bicubic 63 // convolution algorithm. 64 // https://en.wikipedia.org/wiki/Bicubic_interpolation 65 float* coeffs_tab = new float[(kTableSize + 1) * 2]; 66 static const double A = -0.75; 67 for (int i = 0; i <= kTableSize; ++i) { 68 float x = i * 1.0 / kTableSize; 69 coeffs_tab[i * 2] = ((A + 2) * x - (A + 3)) * x * x + 1; 70 x += 1.0; 71 coeffs_tab[i * 2 + 1] = ((A * x - 5 * A) * x + 8 * A) * x - 4 * A; 72 } 73 return coeffs_tab; 74 } 75 76 const float* GetCoeffsTable() { 77 // Static so that we initialize it on first use 78 static const float* coeffs_tab = InitCoeffsTable(); 79 return coeffs_tab; 80 } 81 82 // Used in the baseline implementation 83 inline int64 Bound(int64 val, int64 limit) { 84 return std::min(limit - 1ll, std::max(int64{0}, val)); 85 } 86 87 // Used in the baseline implementation 88 inline void GetWeightsAndIndices(float scale, int64 out_loc, int64 limit, 89 std::array<float, 4>* weights, 90 std::array<int64, 4>* indices) { 91 const int64 in_loc = scale * out_loc; 92 const float delta = scale * out_loc - in_loc; 93 const int64 offset = lrintf(delta * kTableSize); 94 const float* coeffs_tab = GetCoeffsTable(); 95 *weights = {{coeffs_tab[offset * 2 + 1], coeffs_tab[offset * 2], 96 coeffs_tab[(kTableSize - offset) * 2], 97 coeffs_tab[(kTableSize - offset) * 2 + 1]}}; 98 *indices = {{Bound(in_loc - 1, limit), Bound(in_loc, limit), 99 Bound(in_loc + 1, limit), Bound(in_loc + 2, limit)}}; 100 } 101 102 // Used in the baseline implementation 103 inline float Interpolate1D(const std::array<float, 4>& weights, 104 const std::array<float, 4>& values) { 105 return values[0] * weights[0] + values[1] * weights[1] + 106 values[2] * weights[2] + values[3] * weights[3]; 107 } 108 109 // This is the straight forward unoptimized implementation of resize bicubic 110 // We use this to confirm that the optimized version is exactly identical. 111 void ResizeBicubicBaseline(TTypes<float, 4>::ConstTensor images, 112 TTypes<float, 4>::Tensor output) { 113 const int batch_size = images.dimension(0); 114 const int64 in_height = images.dimension(1); 115 const int64 in_width = images.dimension(2); 116 const int channels = images.dimension(3); 117 118 ASSERT_EQ(batch_size, output.dimension(0)); 119 ASSERT_EQ(channels, output.dimension(3)); 120 121 const int64 out_height = output.dimension(1); 122 const int64 out_width = output.dimension(2); 123 124 const float height_scale = in_height / static_cast<float>(out_height); 125 const float width_scale = in_width / static_cast<float>(out_width); 126 127 std::array<float, 4> coeff = {{0.0, 0.0, 0.0, 0.0}}; 128 for (int64 b = 0; b < batch_size; ++b) { 129 for (int64 y = 0; y < out_height; ++y) { 130 std::array<float, 4> y_weights; 131 std::array<int64, 4> y_indices; 132 GetWeightsAndIndices(height_scale, y, in_height, &y_weights, 133 &y_indices); 134 for (int64 x = 0; x < out_width; ++x) { 135 std::array<float, 4> x_weights; 136 std::array<int64, 4> x_indices; 137 GetWeightsAndIndices(width_scale, x, in_width, &x_weights, 138 &x_indices); 139 for (int64 c = 0; c < channels; ++c) { 140 // Use a 4x4 patch to compute the interpolated output value at 141 // (b, y, x, c). 142 for (int64 i = 0; i < 4; ++i) { 143 const std::array<float, 4> values = { 144 {static_cast<float>(images(b, y_indices[i], x_indices[0], c)), 145 static_cast<float>(images(b, y_indices[i], x_indices[1], c)), 146 static_cast<float>(images(b, y_indices[i], x_indices[2], c)), 147 static_cast<float>( 148 images(b, y_indices[i], x_indices[3], c))}}; 149 coeff[i] = Interpolate1D(x_weights, values); 150 } 151 output(b, y, x, c) = Interpolate1D(y_weights, coeff); 152 } 153 } 154 } 155 } 156 } 157 158 protected: 159 void RunRandomTest(const int batch_size, const int64 in_height, 160 const int64 in_width, const int target_height, 161 const int target_width, int channels) { 162 LOG(INFO) << "Running random test " << in_height << "x" << in_width << "x" 163 << channels << " to " << target_height << "x" << target_width 164 << "x" << channels; 165 const Tensor* input = SetRandomImageInput( 166 TensorShape({batch_size, in_height, in_width, channels})); 167 AddInputFromArray<int32>(TensorShape({2}), {target_height, target_width}); 168 169 TF_ASSERT_OK(RunOpKernel()); 170 171 std::unique_ptr<Tensor> expected(new Tensor( 172 device_->GetAllocator(AllocatorAttributes()), 173 DataTypeToEnum<float>::v(), 174 TensorShape({batch_size, target_height, target_width, channels}))); 175 176 ResizeBicubicBaseline(input->tensor<float, 4>(), 177 expected->tensor<float, 4>()); 178 // Note: the baseline implementation reduces first in the x direction, and 179 // then in the y direction. The optimized version reduces first in the y 180 // direction, and then the X direction. As a result, there may be 181 // some slight floating point inaccuracies. We thus ensure we're within 182 // 0.00001 of the previous implementation. 183 test::ExpectTensorNear<float>(*expected, *GetOutput(0), 0.00001); 184 } 185 186 void RunManyRandomTests(int channels) { 187 for (int batch_size : {1, 2, 5}) { 188 for (int in_w : {2, 4, 7, 20, 165}) { 189 for (int in_h : {1, 3, 5, 8, 100, 233}) { 190 for (int target_height : {1, 2, 3, 50, 113}) { 191 for (int target_width : {target_height, target_height / 2 + 1}) { 192 RunRandomTest(batch_size, in_h, in_w, target_height, target_width, 193 channels); 194 } 195 } 196 } 197 } 198 } 199 } 200 }; 201 202 TEST_F(ResizeBicubicOpTest, TestBicubic2x2To1x1) { 203 // Input: 204 // 1, 2 205 // 3, 4 206 AddInputFromArray<float>(TensorShape({1, 2, 2, 1}), {1, 2, 3, 4}); 207 AddInputFromArray<int32>(TensorShape({2}), {1, 1}); 208 TF_ASSERT_OK(RunOpKernel()); 209 210 // When scaling down, we have to arbitrarily pick a pixel from the 211 // original input. In this case, we choose the top/left most pixel. 212 Tensor expected(allocator(), DT_FLOAT, TensorShape({1, 1, 1, 1})); 213 test::FillValues<float>(&expected, {1.0}); 214 test::ExpectTensorEqual<float>(expected, *GetOutput(0)); 215 } 216 217 TEST_F(ResizeBicubicOpTest, TestBicubic2x2To0x0) { 218 AddInputFromArray<float>(TensorShape({1, 2, 2, 1}), {1, 2, 3, 4}); 219 AddInputFromArray<int32>(TensorShape({2}), {0, 0}); 220 221 Status s = RunOpKernel(); 222 EXPECT_TRUE(str_util::StrContains( 223 s.ToString(), "Invalid argument: output dimensions must be positive")) 224 << s; 225 } 226 227 TEST_F(ResizeBicubicOpTest, TestBicubicRandom141x186) { 228 RunRandomTest(2, 141, 186, 299, 299, 1 /* channels */); 229 RunRandomTest(2, 141, 186, 299, 299, 3 /* channels */); 230 } 231 232 TEST_F(ResizeBicubicOpTest, TestBicubicRandom183x229) { 233 RunRandomTest(2, 183, 229, 299, 299, 1 /* channels */); 234 RunRandomTest(2, 183, 229, 299, 299, 3 /* channels */); 235 } 236 237 TEST_F(ResizeBicubicOpTest, TestBicubicRandom749x603) { 238 RunRandomTest(2, 749, 603, 299, 299, 1 /* channels */); 239 RunRandomTest(2, 749, 603, 299, 299, 3 /* channels */); 240 } 241 242 TEST_F(ResizeBicubicOpTest, TestAreaRandomDataSeveralInputsSizes1Channel) { 243 RunManyRandomTests(1); 244 } 245 246 TEST_F(ResizeBicubicOpTest, TestAreaRandomDataSeveralInputsSizes3Channels) { 247 RunManyRandomTests(3); 248 } 249 250 TEST_F(ResizeBicubicOpTest, TestAreaRandomDataSeveralInputsSizes4Channels) { 251 RunManyRandomTests(4); 252 } 253 254 static Graph* ResizeBicubic(int batch_size, int size, int channels, 255 float scale_y = 0.3, float scale_x = 0.7) { 256 Graph* g = new Graph(OpRegistry::Global()); 257 Tensor input(DT_FLOAT, TensorShape({batch_size, size, size, channels})); 258 input.flat<float>().setRandom(); 259 Tensor shape(DT_INT32, TensorShape({2})); 260 auto shape_t = shape.flat<int32>(); 261 shape_t(0) = scale_y * size; 262 shape_t(1) = scale_x * size; 263 test::graph::Binary(g, "ResizeBicubic", test::graph::Constant(g, input), 264 test::graph::Constant(g, shape)); 265 return g; 266 } 267 268 #define BM_ResizeBicubicDev(BATCH, SIZE, CHANNELS) \ 269 static void BM_ResizeBicubic##_##BATCH##_##SIZE##_##CHANNELS(int iters) { \ 270 testing::ItemsProcessed(static_cast<int64>(iters) * BATCH * SIZE * SIZE * \ 271 CHANNELS); \ 272 test::Benchmark("cpu", ResizeBicubic(BATCH, SIZE, CHANNELS)).Run(iters); \ 273 } \ 274 BENCHMARK(BM_ResizeBicubic##_##BATCH##_##SIZE##_##CHANNELS); 275 276 BM_ResizeBicubicDev(8, 32, 3); 277 BM_ResizeBicubicDev(8, 128, 3); 278 BM_ResizeBicubicDev(8, 512, 3); 279 BM_ResizeBicubicDev(8, 1024, 3); 280 BM_ResizeBicubicDev(16, 32, 3); 281 BM_ResizeBicubicDev(16, 128, 3); 282 BM_ResizeBicubicDev(16, 512, 3); 283 BM_ResizeBicubicDev(16, 1024, 3); 284 BM_ResizeBicubicDev(32, 32, 3); 285 BM_ResizeBicubicDev(32, 128, 3); 286 BM_ResizeBicubicDev(32, 512, 3); 287 BM_ResizeBicubicDev(32, 1024, 3); 288 289 #define BM_ResizeBicubicExpand(BATCH, SIZE, CHANNELS) \ 290 static void BM_ResizeBicubicExpand##_##BATCH##_##SIZE##_##CHANNELS( \ 291 int iters) { \ 292 testing::ItemsProcessed(static_cast<int64>(iters) * BATCH * SIZE * SIZE * \ 293 CHANNELS * 8 * 8); \ 294 test::Benchmark("cpu", ResizeBicubic(BATCH, SIZE, CHANNELS, 8, 8)) \ 295 .Run(iters); \ 296 } \ 297 BENCHMARK(BM_ResizeBicubicExpand##_##BATCH##_##SIZE##_##CHANNELS); 298 299 BM_ResizeBicubicExpand(12, 48, 1); 300 BM_ResizeBicubicExpand(12, 48, 3); 301 BM_ResizeBicubicExpand(12, 48, 40); 302 303 } // end namespace tensorflow 304