Home | History | Annotate | Download | only in kernels
      1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 #include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h"
     17 #include "tensorflow/core/framework/fake_input.h"
     18 #include "tensorflow/core/framework/node_def_builder.h"
     19 #include "tensorflow/core/framework/tensor.h"
     20 #include "tensorflow/core/kernels/ops_testutil.h"
     21 #include "tensorflow/core/lib/strings/str_util.h"
     22 #include "tensorflow/core/platform/test.h"
     23 #include "tensorflow/core/platform/test_benchmark.h"
     24 
     25 namespace tensorflow {
     26 
     27 class ResizeBicubicOpTest : public OpsTestBase {
     28  protected:
     29   ResizeBicubicOpTest() {
     30     TF_EXPECT_OK(NodeDefBuilder("resize_bicubic_op", "ResizeBicubic")
     31                      .Input(FakeInput(DT_FLOAT))
     32                      .Input(FakeInput(DT_INT32))
     33                      .Attr("align_corners", false)
     34                      .Finalize(node_def()));
     35     TF_EXPECT_OK(InitOp());
     36   }
     37 
     38   const Tensor* SetRandomImageInput(const TensorShape& shape) {
     39     inputs_.clear();
     40 
     41     CHECK_EQ(shape.dims(), 4) << "All images must have 4 dimensions.";
     42     bool is_ref = IsRefType(input_types_[inputs_.size()]);
     43     Tensor* input = new Tensor(device_->GetAllocator(AllocatorAttributes()),
     44                                DataTypeToEnum<float>::v(), shape);
     45     input->flat<float>().setRandom();
     46     tensors_.push_back(input);
     47     if (is_ref) {
     48       CHECK_EQ(RemoveRefType(input_types_[inputs_.size()]),
     49                DataTypeToEnum<float>::v());
     50       inputs_.push_back({&lock_for_refs_, input});
     51     } else {
     52       CHECK_EQ(input_types_[inputs_.size()], DataTypeToEnum<float>::v());
     53       inputs_.push_back({nullptr, input});
     54     }
     55     return input;
     56   }
     57 
     58  private:
     59   static const int64 kTableSize = (1 << 10);
     60 
     61   const float* InitCoeffsTable() {
     62     // Allocate and initialize coefficients table using Bicubic
     63     // convolution algorithm.
     64     // https://en.wikipedia.org/wiki/Bicubic_interpolation
     65     float* coeffs_tab = new float[(kTableSize + 1) * 2];
     66     static const double A = -0.75;
     67     for (int i = 0; i <= kTableSize; ++i) {
     68       float x = i * 1.0 / kTableSize;
     69       coeffs_tab[i * 2] = ((A + 2) * x - (A + 3)) * x * x + 1;
     70       x += 1.0;
     71       coeffs_tab[i * 2 + 1] = ((A * x - 5 * A) * x + 8 * A) * x - 4 * A;
     72     }
     73     return coeffs_tab;
     74   }
     75 
     76   const float* GetCoeffsTable() {
     77     // Static so that we initialize it on first use
     78     static const float* coeffs_tab = InitCoeffsTable();
     79     return coeffs_tab;
     80   }
     81 
     82   // Used in the baseline implementation
     83   inline int64 Bound(int64 val, int64 limit) {
     84     return std::min(limit - 1ll, std::max(int64{0}, val));
     85   }
     86 
     87   // Used in the baseline implementation
     88   inline void GetWeightsAndIndices(float scale, int64 out_loc, int64 limit,
     89                                    std::array<float, 4>* weights,
     90                                    std::array<int64, 4>* indices) {
     91     const int64 in_loc = scale * out_loc;
     92     const float delta = scale * out_loc - in_loc;
     93     const int64 offset = lrintf(delta * kTableSize);
     94     const float* coeffs_tab = GetCoeffsTable();
     95     *weights = {{coeffs_tab[offset * 2 + 1], coeffs_tab[offset * 2],
     96                  coeffs_tab[(kTableSize - offset) * 2],
     97                  coeffs_tab[(kTableSize - offset) * 2 + 1]}};
     98     *indices = {{Bound(in_loc - 1, limit), Bound(in_loc, limit),
     99                  Bound(in_loc + 1, limit), Bound(in_loc + 2, limit)}};
    100   }
    101 
    102   // Used in the baseline implementation
    103   inline float Interpolate1D(const std::array<float, 4>& weights,
    104                              const std::array<float, 4>& values) {
    105     return values[0] * weights[0] + values[1] * weights[1] +
    106            values[2] * weights[2] + values[3] * weights[3];
    107   }
    108 
    109   // This is the straight forward unoptimized implementation of resize bicubic
    110   // We use this to confirm that the optimized version is exactly identical.
    111   void ResizeBicubicBaseline(TTypes<float, 4>::ConstTensor images,
    112                              TTypes<float, 4>::Tensor output) {
    113     const int batch_size = images.dimension(0);
    114     const int64 in_height = images.dimension(1);
    115     const int64 in_width = images.dimension(2);
    116     const int channels = images.dimension(3);
    117 
    118     ASSERT_EQ(batch_size, output.dimension(0));
    119     ASSERT_EQ(channels, output.dimension(3));
    120 
    121     const int64 out_height = output.dimension(1);
    122     const int64 out_width = output.dimension(2);
    123 
    124     const float height_scale = in_height / static_cast<float>(out_height);
    125     const float width_scale = in_width / static_cast<float>(out_width);
    126 
    127     std::array<float, 4> coeff = {{0.0, 0.0, 0.0, 0.0}};
    128     for (int64 b = 0; b < batch_size; ++b) {
    129       for (int64 y = 0; y < out_height; ++y) {
    130         std::array<float, 4> y_weights;
    131         std::array<int64, 4> y_indices;
    132         GetWeightsAndIndices(height_scale, y, in_height, &y_weights,
    133                              &y_indices);
    134         for (int64 x = 0; x < out_width; ++x) {
    135           std::array<float, 4> x_weights;
    136           std::array<int64, 4> x_indices;
    137           GetWeightsAndIndices(width_scale, x, in_width, &x_weights,
    138                                &x_indices);
    139           for (int64 c = 0; c < channels; ++c) {
    140             // Use a 4x4 patch to compute the interpolated output value at
    141             // (b, y, x, c).
    142             for (int64 i = 0; i < 4; ++i) {
    143               const std::array<float, 4> values = {
    144                   {static_cast<float>(images(b, y_indices[i], x_indices[0], c)),
    145                    static_cast<float>(images(b, y_indices[i], x_indices[1], c)),
    146                    static_cast<float>(images(b, y_indices[i], x_indices[2], c)),
    147                    static_cast<float>(
    148                        images(b, y_indices[i], x_indices[3], c))}};
    149               coeff[i] = Interpolate1D(x_weights, values);
    150             }
    151             output(b, y, x, c) = Interpolate1D(y_weights, coeff);
    152           }
    153         }
    154       }
    155     }
    156   }
    157 
    158  protected:
    159   void RunRandomTest(const int batch_size, const int64 in_height,
    160                      const int64 in_width, const int target_height,
    161                      const int target_width, int channels) {
    162     LOG(INFO) << "Running random test " << in_height << "x" << in_width << "x"
    163               << channels << " to " << target_height << "x" << target_width
    164               << "x" << channels;
    165     const Tensor* input = SetRandomImageInput(
    166         TensorShape({batch_size, in_height, in_width, channels}));
    167     AddInputFromArray<int32>(TensorShape({2}), {target_height, target_width});
    168 
    169     TF_ASSERT_OK(RunOpKernel());
    170 
    171     std::unique_ptr<Tensor> expected(new Tensor(
    172         device_->GetAllocator(AllocatorAttributes()),
    173         DataTypeToEnum<float>::v(),
    174         TensorShape({batch_size, target_height, target_width, channels})));
    175 
    176     ResizeBicubicBaseline(input->tensor<float, 4>(),
    177                           expected->tensor<float, 4>());
    178     // Note: the baseline implementation reduces first in the x direction, and
    179     // then in the y direction. The optimized version reduces first in the y
    180     // direction, and then the X direction. As a result, there may be
    181     // some slight floating point inaccuracies. We thus ensure we're within
    182     // 0.00001 of the previous implementation.
    183     test::ExpectTensorNear<float>(*expected, *GetOutput(0), 0.00001);
    184   }
    185 
    186   void RunManyRandomTests(int channels) {
    187     for (int batch_size : {1, 2, 5}) {
    188       for (int in_w : {2, 4, 7, 20, 165}) {
    189         for (int in_h : {1, 3, 5, 8, 100, 233}) {
    190           for (int target_height : {1, 2, 3, 50, 113}) {
    191             for (int target_width : {target_height, target_height / 2 + 1}) {
    192               RunRandomTest(batch_size, in_h, in_w, target_height, target_width,
    193                             channels);
    194             }
    195           }
    196         }
    197       }
    198     }
    199   }
    200 };
    201 
    202 TEST_F(ResizeBicubicOpTest, TestBicubic2x2To1x1) {
    203   // Input:
    204   // 1, 2
    205   // 3, 4
    206   AddInputFromArray<float>(TensorShape({1, 2, 2, 1}), {1, 2, 3, 4});
    207   AddInputFromArray<int32>(TensorShape({2}), {1, 1});
    208   TF_ASSERT_OK(RunOpKernel());
    209 
    210   // When scaling down, we have to arbitrarily pick a pixel from the
    211   // original input. In this case, we choose the top/left most pixel.
    212   Tensor expected(allocator(), DT_FLOAT, TensorShape({1, 1, 1, 1}));
    213   test::FillValues<float>(&expected, {1.0});
    214   test::ExpectTensorEqual<float>(expected, *GetOutput(0));
    215 }
    216 
    217 TEST_F(ResizeBicubicOpTest, TestBicubic2x2To0x0) {
    218   AddInputFromArray<float>(TensorShape({1, 2, 2, 1}), {1, 2, 3, 4});
    219   AddInputFromArray<int32>(TensorShape({2}), {0, 0});
    220 
    221   Status s = RunOpKernel();
    222   EXPECT_TRUE(str_util::StrContains(
    223       s.ToString(), "Invalid argument: output dimensions must be positive"))
    224       << s;
    225 }
    226 
    227 TEST_F(ResizeBicubicOpTest, TestBicubicRandom141x186) {
    228   RunRandomTest(2, 141, 186, 299, 299, 1 /* channels */);
    229   RunRandomTest(2, 141, 186, 299, 299, 3 /* channels */);
    230 }
    231 
    232 TEST_F(ResizeBicubicOpTest, TestBicubicRandom183x229) {
    233   RunRandomTest(2, 183, 229, 299, 299, 1 /* channels */);
    234   RunRandomTest(2, 183, 229, 299, 299, 3 /* channels */);
    235 }
    236 
    237 TEST_F(ResizeBicubicOpTest, TestBicubicRandom749x603) {
    238   RunRandomTest(2, 749, 603, 299, 299, 1 /* channels */);
    239   RunRandomTest(2, 749, 603, 299, 299, 3 /* channels */);
    240 }
    241 
    242 TEST_F(ResizeBicubicOpTest, TestAreaRandomDataSeveralInputsSizes1Channel) {
    243   RunManyRandomTests(1);
    244 }
    245 
    246 TEST_F(ResizeBicubicOpTest, TestAreaRandomDataSeveralInputsSizes3Channels) {
    247   RunManyRandomTests(3);
    248 }
    249 
    250 TEST_F(ResizeBicubicOpTest, TestAreaRandomDataSeveralInputsSizes4Channels) {
    251   RunManyRandomTests(4);
    252 }
    253 
    254 static Graph* ResizeBicubic(int batch_size, int size, int channels,
    255                             float scale_y = 0.3, float scale_x = 0.7) {
    256   Graph* g = new Graph(OpRegistry::Global());
    257   Tensor input(DT_FLOAT, TensorShape({batch_size, size, size, channels}));
    258   input.flat<float>().setRandom();
    259   Tensor shape(DT_INT32, TensorShape({2}));
    260   auto shape_t = shape.flat<int32>();
    261   shape_t(0) = scale_y * size;
    262   shape_t(1) = scale_x * size;
    263   test::graph::Binary(g, "ResizeBicubic", test::graph::Constant(g, input),
    264                       test::graph::Constant(g, shape));
    265   return g;
    266 }
    267 
    268 #define BM_ResizeBicubicDev(BATCH, SIZE, CHANNELS)                            \
    269   static void BM_ResizeBicubic##_##BATCH##_##SIZE##_##CHANNELS(int iters) {   \
    270     testing::ItemsProcessed(static_cast<int64>(iters) * BATCH * SIZE * SIZE * \
    271                             CHANNELS);                                        \
    272     test::Benchmark("cpu", ResizeBicubic(BATCH, SIZE, CHANNELS)).Run(iters);  \
    273   }                                                                           \
    274   BENCHMARK(BM_ResizeBicubic##_##BATCH##_##SIZE##_##CHANNELS);
    275 
    276 BM_ResizeBicubicDev(8, 32, 3);
    277 BM_ResizeBicubicDev(8, 128, 3);
    278 BM_ResizeBicubicDev(8, 512, 3);
    279 BM_ResizeBicubicDev(8, 1024, 3);
    280 BM_ResizeBicubicDev(16, 32, 3);
    281 BM_ResizeBicubicDev(16, 128, 3);
    282 BM_ResizeBicubicDev(16, 512, 3);
    283 BM_ResizeBicubicDev(16, 1024, 3);
    284 BM_ResizeBicubicDev(32, 32, 3);
    285 BM_ResizeBicubicDev(32, 128, 3);
    286 BM_ResizeBicubicDev(32, 512, 3);
    287 BM_ResizeBicubicDev(32, 1024, 3);
    288 
    289 #define BM_ResizeBicubicExpand(BATCH, SIZE, CHANNELS)                         \
    290   static void BM_ResizeBicubicExpand##_##BATCH##_##SIZE##_##CHANNELS(         \
    291       int iters) {                                                            \
    292     testing::ItemsProcessed(static_cast<int64>(iters) * BATCH * SIZE * SIZE * \
    293                             CHANNELS * 8 * 8);                                \
    294     test::Benchmark("cpu", ResizeBicubic(BATCH, SIZE, CHANNELS, 8, 8))        \
    295         .Run(iters);                                                          \
    296   }                                                                           \
    297   BENCHMARK(BM_ResizeBicubicExpand##_##BATCH##_##SIZE##_##CHANNELS);
    298 
    299 BM_ResizeBicubicExpand(12, 48, 1);
    300 BM_ResizeBicubicExpand(12, 48, 3);
    301 BM_ResizeBicubicExpand(12, 48, 40);
    302 
    303 }  // end namespace tensorflow
    304