Home | History | Annotate | Download | only in kernels
      1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 #define EIGEN_USE_THREADS
     17 
     18 #include <limits>
     19 
     20 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
     21 #include "tensorflow/core/common_runtime/eigen_thread_pool.h"
     22 #include "tensorflow/core/framework/allocator.h"
     23 #include "tensorflow/core/framework/tensor_testutil.h"
     24 #include "tensorflow/core/framework/types.h"
     25 #include "tensorflow/core/kernels/quantization_utils.h"
     26 #include "tensorflow/core/lib/core/threadpool.h"
     27 #include "tensorflow/core/lib/random/simple_philox.h"
     28 #include "tensorflow/core/lib/strings/strcat.h"
     29 #include "tensorflow/core/platform/test.h"
     30 
     31 namespace tensorflow {
     32 namespace {
     33 
     34 void TestRequantizeMany(Eigen::ThreadPoolDevice* eigen_device, float input_min,
     35                         float input_max, float output_min, float output_max,
     36                         const std::vector<qint32>& values_quantized,
     37                         int tolerance = 1) {
     38   const int values_count = values_quantized.size();
     39   std::vector<quint8> expected_values;
     40   expected_values.reserve(values_count);
     41   for (int value_index = 0; value_index < values_count; ++value_index) {
     42     expected_values.push_back(FloatToQuantized<quint8>(
     43         QuantizedToFloat(values_quantized[value_index], input_min, input_max),
     44         output_min, output_max));
     45   }
     46 
     47   Tensor i_tensor =
     48       tensorflow::test::AsTensor(gtl::ArraySlice<qint32>(values_quantized));
     49   Tensor o_tensor(DT_QUINT8, TensorShape{values_count});
     50   auto output_values = o_tensor.flat<quint8>();
     51 
     52   if (eigen_device == nullptr) {
     53     auto input_array = i_tensor.flat<qint32>();
     54     RequantizeManyInNewRange(input_array.data(), input_array.size(), input_min,
     55                              input_max, output_min, output_max,
     56                              output_values.data());
     57   } else {
     58     RequantizeManyInNewRangeUsingEigen<qint32, quint8>(
     59         *eigen_device, i_tensor, input_min, input_max, output_min, output_max,
     60         &o_tensor);
     61   }
     62 
     63   const string tolerance_str = strings::StrCat("+-", tolerance);
     64   for (size_t value_index = 0; value_index < values_count; ++value_index) {
     65     int e = expected_values[value_index];
     66     int v = output_values(value_index);
     67     ASSERT_TRUE(std::abs(e - v) <= tolerance)
     68         << "actual=" << v << ", expected=" << e << tolerance_str
     69         << ", values_quantized[" << value_index
     70         << "]=" << values_quantized[value_index] << ", input_min=" << input_min
     71         << ", input_max=" << input_max << ", output_min=" << output_min
     72         << ", output_max=" << output_max << ", value_index=" << value_index;
     73   }
     74 }
     75 
     76 void TestRequantizeMany8To32Bit(float input_min, float input_max,
     77                                 float output_min, float output_max,
     78                                 const std::vector<quint8>& values_quantized,
     79                                 int tolerance = 256) {
     80   const int values_count = values_quantized.size();
     81   std::vector<qint32> expected_values;
     82   expected_values.reserve(values_count);
     83   for (int value_index = 0; value_index < values_count; ++value_index) {
     84     expected_values.push_back(FloatToQuantized<qint32>(
     85         QuantizedToFloat(values_quantized[value_index], input_min, input_max),
     86         output_min, output_max));
     87   }
     88 
     89   const Tensor i_tensor =
     90       tensorflow::test::AsTensor(gtl::ArraySlice<quint8>(values_quantized));
     91   Tensor o_tensor(DT_QINT32, TensorShape{values_count});
     92   auto output_values = o_tensor.flat<qint32>();
     93 
     94   const auto input_array = i_tensor.flat<quint8>();
     95   RequantizeManyInNewRange(input_array.data(), input_array.size(), input_min,
     96                            input_max, output_min, output_max,
     97                            output_values.data());
     98 
     99   const string tolerance_str = strings::StrCat("+-", tolerance);
    100   for (int value_index = 0; value_index < values_count; ++value_index) {
    101     const qint32 e = expected_values[value_index];
    102     const qint32 v = output_values(value_index);
    103     ASSERT_TRUE(std::abs(e - v) <= tolerance)
    104         << "actual=" << v << ", expected=" << e << tolerance_str
    105         << ", values_quantized[" << value_index
    106         << "]=" << values_quantized[value_index] << ", input_min=" << input_min
    107         << ", input_max=" << input_max << ", output_min=" << output_min
    108         << ", output_max=" << output_max << ", value_index=" << value_index;
    109   }
    110 }
    111 
    112 // If eigen_device is NULL, then the reference implementation is tested.
    113 void TestRequantizeManyInNewRange32To8Bit(
    114     Eigen::ThreadPoolDevice* eigen_device) {
    115   if (true) {
    116     // These are the float values we're going to test the conversions on.
    117     const size_t values_count = 6;
    118     const float values[values_count] = {0.0f,  0.45f,  1.0f,
    119                                         -1.0f, 127.0f, 255.0f};
    120     // These are the input and output ranges we'll test.
    121     const size_t ranges_count = 6;
    122     const float ranges[ranges_count][4] = {
    123         {0.0f, 255.0f, 0.0f, 255.0f},    //
    124         {0.0f, 1.0f, 0.0f, 1.0f},        //
    125         {-1.0f, 1.0f, -1.0f, 1.0f},      //
    126         {-1.0f, 1.0f, -255.0f, 255.0f},  //
    127         {3.0f, 3.0f, 0.0f, 255.0f},      // input min == max
    128         {0.0f, 255.0f, 5.0f, 5.0f},      // output min == max
    129     };
    130     for (int i = 0; i < ranges_count; ++i) {
    131       const auto& r = ranges[i];
    132       std::vector<qint32> values_quantized;
    133       for (int value_index = 0; value_index < values_count; ++value_index) {
    134         const float v = values[value_index];
    135         values_quantized.push_back(FloatToQuantized<qint32>(v, r[0], r[1]));
    136       }
    137       TestRequantizeMany(eigen_device, r[0], r[1], r[2], r[3],
    138                          values_quantized);
    139     }
    140 
    141     // Test with many different values in the input quantized range.
    142     qint32 low = Eigen::NumTraits<qint32>::lowest();
    143     qint32 high = Eigen::NumTraits<qint32>::highest();
    144     std::vector<qint32> vals{low, high};
    145     int num_steps = 14419;
    146     qint32 step = static_cast<int32>((1LL << 32) / num_steps);
    147     qint32 v = low + static_cast<qint32>(1);
    148     for (int i = 0; i < num_steps; ++i) {
    149       vals.push_back(v);
    150       v += step;
    151     }
    152     TestRequantizeMany(eigen_device, -1.0f, 1.0f, -1.0f, 1.0f, vals);
    153     TestRequantizeMany(eigen_device, -255.0f, 255.0f, -255.0f, 255.0f, vals);
    154     TestRequantizeMany(eigen_device, -1.0f, 1.0f, -12345678.0f, 12345678.0f,
    155                        vals);
    156     TestRequantizeMany(eigen_device, -1.0f, 12345678.0f, -12345678.0f,
    157                        12345678.0f, vals);
    158   }
    159   // Test when the input range is large and output range is small.
    160   // Use all quantized values where the float is in the output range.
    161   const float out_min = -29.1234;
    162   const float out_max = 23.1234;
    163   const float in_min = -1e6;
    164   const float in_max = 1e6;
    165 
    166   qint32 low = FloatToQuantized<qint32>(out_min, in_min, in_max);
    167   qint32 high = FloatToQuantized<qint32>(out_max, in_min, in_max);
    168   std::vector<qint32> vals;
    169   vals.clear();
    170   for (int32 i = low; i <= high; ++i) vals.push_back(i);
    171   TestRequantizeMany(eigen_device, in_min, in_max, out_min, out_max, vals);
    172 }
    173 
    174 void TestRequantizeManyInNewRange8To32Bit() {
    175   // These are the float values we're going to test the conversions on.
    176   const size_t values_count = 6;
    177   const float values[values_count] = {0.0f, 0.45f, 1.0f, -1.0f, 127.0f, 255.0f};
    178   // These are the input and output ranges we'll test.
    179   const size_t ranges_count = 6;
    180   const float ranges[ranges_count][4] = {
    181       {0.0f, 255.0f, 0.0f, 255.0f},    //
    182       {0.0f, 1.0f, 0.0f, 1.0f},        //
    183       {-1.0f, 1.0f, -1.0f, 1.0f},      //
    184       {-1.0f, 1.0f, -255.0f, 255.0f},  //
    185       {3.0f, 3.0f, 0.0f, 255.0f},      // input min == max
    186       {0.0f, 255.0f, 5.0f, 5.0f},      // output min == max
    187   };
    188   for (int i = 0; i < ranges_count; ++i) {
    189     const auto& r = ranges[i];
    190     std::vector<quint8> values_quantized;
    191     for (int value_index = 0; value_index < values_count; ++value_index) {
    192       const float v = values[value_index];
    193       values_quantized.push_back(FloatToQuantized<quint8>(v, r[0], r[1]));
    194     }
    195     TestRequantizeMany8To32Bit(r[0], r[1], r[2], r[3], values_quantized);
    196   }
    197 
    198   // Test with many different values in the input quantized range.
    199   int low = Eigen::NumTraits<quint8>::lowest();
    200   int high = Eigen::NumTraits<quint8>::highest();
    201   std::vector<quint8> vals;
    202   for (int val = low; val <= high; ++val) {
    203     vals.push_back(val);
    204   }
    205   TestRequantizeMany8To32Bit(-1.0f, 1.0f, -1.0f, 1.0f, vals);
    206   TestRequantizeMany8To32Bit(-255.0f, 255.0f, -255.0f, 255.0f, vals);
    207   TestRequantizeMany8To32Bit(-1.0f, 1.0f, -12345678.0f, 12345678.0f, vals);
    208   TestRequantizeMany8To32Bit(-1.0f, 12345678.0f, -12345678.0f, 12345678.0f,
    209                              vals);
    210 }
    211 
    212 template <typename InputType, typename OutputType>
    213 void TestRequantizeManyInNewRangeEigenVsNonEigen() {
    214   thread::ThreadPool threadpool(Env::Default(), "test", 2 /* num_threads */);
    215   EigenThreadPoolWrapper wrapper(&threadpool);
    216   Eigen::ThreadPoolDevice eigen_device(&wrapper, 2 /* num_threads */);
    217 
    218   const size_t ranges_count = 6;
    219   const float ranges[ranges_count][4] = {
    220       {0.0f, 255.0f, 0.0f, 255.0f},    //
    221       {0.0f, 1.0f, 0.0f, 1.0f},        //
    222       {-1.0f, 1.0f, -1.0f, 1.0f},      //
    223       {-1.0f, 1.0f, -255.0f, 255.0f},  //
    224       {3.0f, 3.0f, 0.0f, 255.0f},      // input min == max
    225       {0.0f, 255.0f, 5.0f, 5.0f},      // output min == max
    226   };
    227 
    228   // Random values.
    229   for (size_t range_index = 0; range_index < ranges_count; ++range_index) {
    230     const float input_min = ranges[range_index][0];
    231     const float input_max = ranges[range_index][1];
    232     const float output_min = ranges[range_index][2];
    233     const float output_max = ranges[range_index][3];
    234     const int values_count = 10000;
    235     random::PhiloxRandom philox(testing::RandomSeed(), 17);
    236     random::SimplePhilox rnd(&philox);
    237     std::vector<InputType> values_quantized;
    238     for (int i = 0; i < values_count; ++i) {
    239       float v = (rnd.RandFloat() * (input_max - input_min)) + input_min;
    240       values_quantized.push_back(
    241           FloatToQuantized<InputType>(v, input_min, input_max));
    242     }
    243 
    244     Tensor i_tensor = tensorflow::test::AsTensor(
    245         gtl::ArraySlice<InputType>(values_quantized));
    246     const auto i_array = i_tensor.flat<InputType>();
    247     Tensor o_tensor_eigen(DataTypeToEnum<OutputType>::v(),
    248                           TensorShape{values_count});
    249     auto output_values_eigen = o_tensor_eigen.flat<OutputType>();
    250     Tensor o_tensor_ref(DataTypeToEnum<OutputType>::v(),
    251                         TensorShape{values_count});
    252     auto output_values_ref = o_tensor_ref.flat<OutputType>();
    253 
    254     RequantizeManyInNewRange(i_array.data(), i_array.size(), input_min,
    255                              input_max, output_min, output_max,
    256                              output_values_ref.data());
    257     RequantizeManyInNewRangeUsingEigen<InputType, OutputType>(
    258         eigen_device, i_tensor, input_min, input_max, output_min, output_max,
    259         &o_tensor_eigen);
    260 
    261     const int tolerance = 1;
    262     for (int i = 0; i < values_quantized.size(); ++i) {
    263       auto expected = output_values_ref(i);
    264       auto actual = output_values_eigen(i);
    265       // The eigen computation uses float for constants and computation
    266       // instead of doubles, so can be different by 1 or 2 in some cases
    267       // (e.g., input value 144.062744140625, min -1, max 255, type quint8).
    268       ASSERT_TRUE(std::abs(expected - actual) <= tolerance)
    269           << "expected=" << expected << " actual=" << actual
    270           << " tolerance=" << tolerance << " v=" << values_quantized[i]
    271           << " i=" << i << " input_min=" << input_min
    272           << " input_max=" << input_max
    273           << " input_type=" << DataTypeString(DataTypeToEnum<InputType>::v())
    274           << " output_type=" << DataTypeString(DataTypeToEnum<OutputType>::v());
    275     }
    276   }
    277 }
    278 
    279 template <typename InputType, typename OutputType>
    280 void TimeRequantizeManyInNewRange(int64 num_elements, int64 iterations,
    281                                   bool use_eigen) {
    282   const float input_min = -100.0f;
    283   const float input_max = 100.0f;
    284   const float output_min = -1000000.0f;
    285   const float output_max = 1000000.0f;
    286 
    287   random::PhiloxRandom philox(testing::RandomSeed(), 17);
    288   random::SimplePhilox rnd(&philox);
    289   std::vector<InputType> values_quantized;
    290   for (int i = 0; i < num_elements; ++i) {
    291     float v = (rnd.RandFloat() * (input_max - input_min)) + input_min;
    292     values_quantized.push_back(
    293         FloatToQuantized<InputType>(v, input_min, input_max));
    294   }
    295 
    296   thread::ThreadPool threadpool(Env::Default(), "test", 4 /* num_threads */);
    297   EigenThreadPoolWrapper wrapper(&threadpool);
    298   Eigen::ThreadPoolDevice eigen_device(&wrapper, 4 /* num_threads */);
    299 
    300   Tensor i_tensor =
    301       tensorflow::test::AsTensor(gtl::ArraySlice<InputType>(values_quantized));
    302   const auto i_array = i_tensor.flat<InputType>();
    303   Tensor o_tensor_eigen(DataTypeToEnum<OutputType>::v(),
    304                         TensorShape{num_elements});
    305   Tensor o_tensor_ref(DataTypeToEnum<OutputType>::v(),
    306                       TensorShape{num_elements});
    307   auto output_values_ref = o_tensor_ref.flat<OutputType>();
    308 
    309   int64 total_duration = 0;
    310   for (int i = 0; i < iterations; ++i) {
    311     const int64 start_time = Env::Default()->NowMicros();
    312     if (use_eigen) {
    313       RequantizeManyInNewRangeUsingEigen<InputType, OutputType>(
    314           eigen_device, i_tensor, input_min, input_max, output_min, output_max,
    315           &o_tensor_eigen);
    316     } else {
    317       RequantizeManyInNewRange<InputType, OutputType>(
    318           i_array.data(), i_array.size(), input_min, input_max, output_min,
    319           output_max, output_values_ref.data());
    320     }
    321     const int64 end_time = Env::Default()->NowMicros();
    322     total_duration += end_time - start_time;
    323   }
    324   const int64 one_run_duration = total_duration / iterations;
    325 
    326   const int64 num_ops = num_elements;
    327 
    328   const double million_ops_per_second =
    329       (iterations * num_ops) / static_cast<double>(total_duration);
    330 
    331   LOG(INFO) << "TimeRequantizeManyInNewRange: " << num_elements
    332             << (use_eigen ? " eigen" : " ref") << ": iterations=" << iterations
    333             << ", MOps/s=" << million_ops_per_second
    334             << ", one_run_duration=" << one_run_duration
    335             << ", total_duration=" << total_duration;
    336 }
    337 
    338 template <typename T>
    339 void TestFloatToQuantizedInPlaceUsingEigen(
    340     Eigen::ThreadPoolDevice* eigen_device) {
    341   // These are the float values we're going to test the conversions on.
    342   typedef std::pair<float, float> FPair;
    343   for (FPair min_and_max : std::vector<FPair>{FPair(-255.0f, 255.0f),  //
    344                                               FPair(-1.0f, 1.0f),      //
    345                                               FPair(-1.0f, 255.0f),    //
    346                                               FPair(0.0f, 1e6),        //
    347                                               FPair(0.0f, 1.0f),       //
    348                                               FPair(-31.0f, 13.0f)}) {
    349     const float f_min = min_and_max.first;
    350     const float f_max = min_and_max.second;
    351     const float f_range = f_max - f_min;
    352     const int values_count = 50000;
    353     Tensor input(DT_FLOAT, TensorShape{values_count});
    354     auto input_array = input.flat<float>();
    355     for (int i = 0; i < values_count; ++i) {
    356       input_array(i) = f_min + f_range * i / (values_count - 1);
    357     }
    358 
    359     Tensor output(DataTypeToEnum<T>::v(), TensorShape{values_count});
    360     FloatTensorToQuantizedInPlaceUsingEigen<T>(*eigen_device, input, f_min,
    361                                                f_max, &output);
    362     auto output_array = output.flat<T>();
    363 
    364     const int tolerance = 1;
    365     for (int i = 0; i < values_count; ++i) {
    366       int32 expected = FloatToQuantized<T>(input_array(i), f_min, f_max);
    367       int32 actual = output_array(i);
    368 
    369       // The eigen computation uses float for constants and computation
    370       // instead
    371       // of doubles, so can be different by 1 or 2 in some cases (e.g., input
    372       // value 144.062744140625, min -1, max 255, type quint8).
    373       ASSERT_TRUE(std::abs(expected - actual) <= tolerance)
    374           << "expected=" << expected << " actual=" << actual
    375           << " tolerance=" << tolerance << " v=" << input_array(i) << " i=" << i
    376           << " f_min=" << f_min << " f_max=" << f_max
    377           << " type=" << DataTypeString(DataTypeToEnum<T>::v());
    378     }
    379   }
    380 }
    381 
    382 template <typename T>
    383 void TestQuantizedToFloatInPlaceUsingEigen(
    384     Eigen::ThreadPoolDevice* eigen_device) {
    385   // These are the float values we're going to test the conversions on.
    386   typedef std::pair<float, float> FPair;
    387   for (FPair min_and_max : std::vector<FPair>{
    388            FPair(-255.0f, 255.0f),
    389            FPair(-1.0f, 1.0f),
    390            FPair(-1.0f, 255.0f),
    391            FPair(0.0f, 1e6),
    392            FPair(0.0f, 1.0f),
    393            FPair(-31.0f, 13.0f),
    394            FPair(-5.89505e+08, 5.89505e+08),
    395        }) {
    396     const float f_min = min_and_max.first;
    397     const float f_max = min_and_max.second;
    398     const int values_count = sizeof(T) == 1 ? 256 : 50000;
    399     Tensor input(DataTypeToEnum<T>::v(), TensorShape{values_count});
    400     auto input_array = input.flat<T>();
    401     const double q_range = static_cast<double>(Eigen::NumTraits<T>::highest()) -
    402                            Eigen::NumTraits<T>::lowest();
    403     for (int i = 0; i < values_count; ++i) {
    404       if (sizeof(T) == 1) {
    405         input_array(i) = Eigen::NumTraits<T>::lowest() + i;
    406       } else {
    407         int64 offset = static_cast<int64>(q_range / values_count * i);
    408         input_array(i) = static_cast<int32>(
    409             std::min<int64>(Eigen::NumTraits<T>::lowest() + offset,
    410                             Eigen::NumTraits<T>::highest()));
    411       }
    412     }
    413 
    414     Tensor output(DT_FLOAT, TensorShape{values_count});
    415     QuantizedTensorToFloatInPlaceUsingEigen<T>(*eigen_device, input, f_min,
    416                                                f_max, &output);
    417     auto output_array = output.flat<float>();
    418     const double range = static_cast<double>(f_max) - f_min;
    419     for (int i = 0; i < values_count; ++i) {
    420       float expected = QuantizedToFloat<T>(input_array(i), f_min, f_max);
    421       float actual = output_array(i);
    422       ASSERT_NEAR(expected, actual, range * 1.1e-7)
    423           << "expected=" << expected << " actual=" << actual
    424           << " v=" << input_array(i) << " i=" << i << " f_min=" << f_min
    425           << " f_max=" << f_max
    426           << " type=" << DataTypeString(DataTypeToEnum<T>::v());
    427     }
    428   }
    429 }
    430 
    431 }  // namespace
    432 
    433 void TestFloatToQuantized() {
    434   EXPECT_EQ(quint8(0), FloatToQuantized<quint8>(0.0f, 0.0f, 1.0f));
    435   EXPECT_EQ(quint8(0), FloatToQuantized<quint8>(0.0f, 0.0f, 2.0f));
    436   EXPECT_EQ(quint8(128), FloatToQuantized<quint8>(0.5f, 0.0f, 1.0f));
    437   EXPECT_EQ(quint8(128), FloatToQuantized<quint8>(1.0f, 0.0f, 2.0f));
    438   EXPECT_EQ(quint8(255), FloatToQuantized<quint8>(1.0f, 0.0f, 1.0f));
    439   EXPECT_EQ(quint8(255), FloatToQuantized<quint8>(2.0f, 0.0f, 2.0f));
    440   EXPECT_EQ(quint8(0), FloatToQuantized<quint8>(-128.0f, -128.0f, 127.0f));
    441   EXPECT_EQ(quint8(128), FloatToQuantized<quint8>(0.0f, -128.0f, 127.0f));
    442   EXPECT_EQ(quint8(255), FloatToQuantized<quint8>(127.0f, -128.0f, 127.0f));
    443   EXPECT_EQ(quint8(0), FloatToQuantized<quint8>(1.0f, 1.0f, 256.0f));
    444   EXPECT_EQ(quint8(127), FloatToQuantized<quint8>(128.0f, 1.0f, 256.0f));
    445   EXPECT_EQ(quint8(255), FloatToQuantized<quint8>(256.0f, 1.0f, 256.0f));
    446 
    447   const int int32_min = std::numeric_limits<int>::min();
    448   const int int32_max = std::numeric_limits<int>::max();
    449 
    450   EXPECT_EQ(qint32(int32_min),
    451             FloatToQuantized<qint32>(-128.0f, -128.0f, 128.0f));
    452   EXPECT_EQ(qint32(0), FloatToQuantized<qint32>(0.0f, -128.0f, 128.0f));
    453   EXPECT_EQ(qint32(int32_max),
    454             FloatToQuantized<qint32>(128.0f, -128.0f, 128.0f));
    455 }
    456 
    457 void TestQuantizedToFloat() {
    458   EXPECT_LT(fabsf(0.0f - QuantizedToFloat<quint8>(0, 0.0f, 1.0f)), 1 / 255.0f);
    459   EXPECT_LT(fabsf(0.0f - QuantizedToFloat<quint8>(0, 0.0f, 2.0f)), 1 / 255.0f);
    460   EXPECT_LT(fabsf(0.5f - QuantizedToFloat<quint8>(127, 0.0f, 1.0f)),
    461             1 / 255.0f);
    462   EXPECT_LT(fabsf(1.0f - QuantizedToFloat<quint8>(127, 0.0f, 2.0f)),
    463             1 / 255.0f);
    464   EXPECT_LT(fabsf(1.0f - QuantizedToFloat<quint8>(255, 0.0f, 1.0f)),
    465             1 / 255.0f);
    466   EXPECT_LT(fabsf(2.0f - QuantizedToFloat<quint8>(255, 0.0f, 2.0f)),
    467             1 / 255.0f);
    468   EXPECT_LT(fabsf(1.0f - QuantizedToFloat<quint8>(0, 1.0f, 256.0f)),
    469             1 / 255.0f);
    470   EXPECT_LT(fabsf(128.0f - QuantizedToFloat<quint8>(127, 1.0f, 256.0f)),
    471             1 / 255.0f);
    472   EXPECT_LT(fabsf(256.0f - QuantizedToFloat<quint8>(255, 1.0f, 256.0f)),
    473             1 / 255.0f);
    474 
    475   const int int32_min = std::numeric_limits<int>::min();
    476   const int int32_max = std::numeric_limits<int>::max();
    477 
    478   EXPECT_NEAR(-1.0f, QuantizedToFloat<qint32>(qint32(int32_min), -1.0f, 1.0f),
    479               1e-5f);
    480   EXPECT_NEAR(0.0f, QuantizedToFloat<qint32>(qint32(0), -1.0f, 1.0f), 1e-5f);
    481   EXPECT_NEAR(1.0f, QuantizedToFloat<qint32>(qint32(int32_max), -1.0f, 1.0f),
    482               1e-5f);
    483 
    484   EXPECT_NEAR(32.0f, QuantizedToFloat<qint32>(qint32(32), int32_min, int32_max),
    485               1.0);
    486 }
    487 
    488 void TestAvoidBias() {
    489   for (int i = 0; i < 256; ++i) {
    490     const float as_float = QuantizedToFloat<quint8>(i, 0.0f, 2.0f);
    491     const int back_to_int = FloatToQuantized<quint8>(as_float, 0.0f, 2.0f);
    492     EXPECT_EQ(i, back_to_int);
    493   }
    494 
    495   // All perfectly representable floats should survive quantization, even
    496   // if we pick a range where min is not itself perfectly representable.
    497   const float min = -0.1375f;
    498   const float max = 1.1385f;
    499   const float step_size = (max - min) / 255.0f;
    500   const float tolerance = step_size / 1000.0f;
    501   // This is the smallest perfectly representable float in the range.
    502   float first_float = ceil(min / step_size) * step_size;
    503   for (float f = first_float; f <= max; f += step_size) {
    504     const int as_int = FloatToQuantized<quint8>(f, min, max);
    505     const float back_to_float = QuantizedToFloat<quint8>(as_int, min, max);
    506     EXPECT_NEAR(f, back_to_float, tolerance);
    507   }
    508 }
    509 
    510 void TestRequantizeInNewRange() {
    511   // These are the float values we're going to test the conversions on.
    512   const size_t values_count = 6;
    513   const float values[values_count] = {0.0f, 0.5f, 1.0f, -1.0f, 127.0f, 255.0f};
    514   // These are the input and output ranges we'll test.
    515   const size_t ranges_count = 4;
    516   const float ranges[ranges_count][4] = {
    517       {0.0f, 255.0f, 0.0f, 255.0f},
    518       {0.0f, 1.0f, 0.0f, 1.0f},
    519       {-1.0f, 1.0f, -1.0f, 1.0f},
    520       {-1.0f, 1.0f, -255.0f, 255.0f},
    521   };
    522   for (size_t value_index = 0; value_index < values_count; ++value_index) {
    523     const float value_float = values[value_index];
    524     for (size_t range_index = 0; range_index < ranges_count; ++range_index) {
    525       const float input_min = ranges[range_index][0];
    526       const float input_max = ranges[range_index][1];
    527       const float output_min = ranges[range_index][2];
    528       const float output_max = ranges[range_index][3];
    529       const quint8 input_value =
    530           FloatToQuantized<quint8>(value_float, input_min, input_max);
    531       // Here we convert the quantized input value to what we expect
    532       // to get in the output range.
    533       const qint32 expected_value = FloatToQuantized<qint32>(
    534           QuantizedToFloat(input_value, input_min, input_max), output_min,
    535           output_max);
    536       EXPECT_EQ(expected_value,
    537                 (RequantizeInNewRange<quint8, qint32>(
    538                     input_value, input_min, input_max, output_min, output_max)))
    539           << "value_float=" << value_float << ", input_min=" << input_min
    540           << ", input_max=" << input_max << ", output_min=" << output_min
    541           << ", output_max=" << output_max;
    542     }
    543   }
    544 }
    545 
    546 void TestRequantizeInNewRangeRealData() {
    547   const float input_min = -0.739539f;
    548   const float input_max = 0.641057f;
    549   const float output_min = -2381.49f;
    550   const float output_max = 2207.6f;
    551 
    552   // Start with a value that can be perfectly represented in 8 bits. This
    553   // ensures minimal quantization error, and allows us to use EXPECT_LT below.
    554   const float value_as_float =
    555       QuantizedToFloat<quint8>(83, input_min, input_max);
    556 
    557   const quint8 value_as_quint8 =
    558       FloatToQuantized<quint8>(value_as_float, input_min, input_max);
    559   EXPECT_EQ(quint8(83), value_as_quint8);
    560   const qint32 actual_output = RequantizeInNewRange<quint8, qint32>(
    561       value_as_quint8, input_min, input_max, output_min, output_max);
    562   const qint32 value_as_qint32 =
    563       FloatToQuantized<qint32>(value_as_float, output_min, output_max);
    564   EXPECT_LT(std::abs(value_as_qint32 - actual_output), 10);
    565 }
    566 
    567 void TestRequantizeInNewRange32To8Bit() {
    568   // These are the float values we're going to test the conversions on.
    569   const size_t values_count = 6;
    570   const float values[values_count] = {0.0f, 0.45f, 1.0f, -1.0f, 127.0f, 255.0f};
    571   // These are the input and output ranges we'll test.
    572   const size_t ranges_count = 4;
    573   const float ranges[ranges_count][4] = {
    574       {0.0f, 255.0f, 0.0f, 255.0f},
    575       {0.0f, 1.0f, 0.0f, 1.0f},
    576       {-1.0f, 1.0f, -1.0f, 1.0f},
    577       {-1.0f, 1.0f, -255.0f, 255.0f},
    578   };
    579   for (size_t value_index = 0; value_index < values_count; ++value_index) {
    580     const float value_float = values[value_index];
    581     for (size_t range_index = 0; range_index < ranges_count; ++range_index) {
    582       const float input_min = ranges[range_index][0];
    583       const float input_max = ranges[range_index][1];
    584       const float output_min = ranges[range_index][2];
    585       const float output_max = ranges[range_index][3];
    586       const qint32 input_value =
    587           FloatToQuantized<qint32>(value_float, input_min, input_max);
    588       // Here we convert the quantized input value to what we expect
    589       // to get in the output range.
    590       const quint8 expected_value = FloatToQuantized<quint8>(
    591           QuantizedToFloat(input_value, input_min, input_max), output_min,
    592           output_max);
    593       EXPECT_EQ(expected_value,
    594                 (RequantizeInNewRange<qint32, quint8>(
    595                     input_value, input_min, input_max, output_min, output_max)))
    596           << "input_value=" << input_value << ", value_float=" << value_float
    597           << ", input_min=" << input_min << ", input_max=" << input_max
    598           << ", output_min=" << output_min << ", output_max=" << output_max;
    599     }
    600   }
    601 }
    602 
    603 void TestRequantizeManyInNewRange32To8Bit() {
    604   TestRequantizeManyInNewRange32To8Bit(nullptr /* eigen_device */);
    605 }
    606 
    607 void TestRequantizeManyInNewRange32To8BitUsingEigen() {
    608   thread::ThreadPool threadpool(Env::Default(), "test", 2 /* num_threads */);
    609   EigenThreadPoolWrapper wrapper(&threadpool);
    610   Eigen::ThreadPoolDevice eigen_device(&wrapper, 2 /* num_threads */);
    611   TestRequantizeManyInNewRange32To8Bit(&eigen_device);
    612 }
    613 
    614 void TestRequantizeManyInNewRange32To8BitEigenVsNonEigen() {
    615   TestRequantizeManyInNewRangeEigenVsNonEigen<qint32, quint8>();
    616 }
    617 
    618 void TestRequantizeManyInNewRange32To8BitSignedEigenVsNonEigen() {
    619   TestRequantizeManyInNewRangeEigenVsNonEigen<qint32, qint8>();
    620 }
    621 
    622 void TestFloatTensorToQuantized() {
    623   const int input_width = 3;
    624   const int input_height = 3;
    625   const float input_min = 0.0f;
    626   const float input_max = 255.0f;
    627   Tensor input(DT_FLOAT, TensorShape({input_height, input_width}));
    628   test::FillValues<float>(&input, {1.0f, -1.0f, 10.0f, 10.25f, 127.0f, 255.0f,
    629                                    512.0f, 0.0f, 23.0f});
    630   Tensor expected(DT_QUINT8, TensorShape({input_height, input_width}));
    631   test::FillValues<quint8>(&expected, {1, 0, 10, 10, 127, 255, 255, 0, 23});
    632   Tensor output = FloatTensorToQuantized<quint8>(input, input_min, input_max);
    633   test::ExpectTensorEqual<quint8>(expected, output);
    634 }
    635 
    636 // Verify that FloatToQuantizedInPlaceUsingEigen is same result as
    637 // FloatToQuantized.
    638 void TestFloatToQuantizedInPlaceUsingEigen() {
    639   thread::ThreadPool threadpool(Env::Default(), "test", 2 /* num_threads */);
    640   EigenThreadPoolWrapper wrapper(&threadpool);
    641   Eigen::ThreadPoolDevice eigen_device(&wrapper, 2 /* num_threads */);
    642 
    643   TestFloatToQuantizedInPlaceUsingEigen<quint8>(&eigen_device);
    644   TestFloatToQuantizedInPlaceUsingEigen<qint8>(&eigen_device);
    645   TestFloatToQuantizedInPlaceUsingEigen<quint16>(&eigen_device);
    646   TestFloatToQuantizedInPlaceUsingEigen<qint16>(&eigen_device);
    647 }
    648 
    649 void TestOverflowWithEigen() {
    650   thread::ThreadPool threadpool(Env::Default(), "test", 2 /* num_threads */);
    651   EigenThreadPoolWrapper wrapper(&threadpool);
    652   Eigen::ThreadPoolDevice eigen_device(&wrapper, 2 /* num_threads */);
    653 
    654   const int num_vals = 4;
    655   const float input_min = 0.0f;
    656   const float input_max = 2400.0f;
    657   TensorShape shape({num_vals});
    658   Tensor input(DT_FLOAT, shape);
    659   test::FillValues<float>(&input, {-100.f, 0.f, 2400.0f, 2400.0f});
    660   Tensor expected(DT_QINT32, shape);
    661   // Note that the positive expected values are not the highest int32 value,
    662   // because the implementation does a bounds check using float, not int32.
    663   test::FillValues<qint32>(
    664       &expected,
    665       {static_cast<int32>(-2147483648), static_cast<int32>(-2147483648),
    666        static_cast<int32>(2147483520), static_cast<int32>(2147483520)});
    667 
    668   FloatToQuantizedStruct<qint32> f2q(input_min, input_max);
    669   Tensor output(DT_QINT32, shape);
    670   auto input_array = input.flat<float>();
    671   output.flat<qint32>() = QUANTIZE_WITH_EIGEN(input_array, f2q, qint32);
    672   test::ExpectTensorEqual<qint32>(expected, output);
    673 }
    674 
    675 void TestQuantizedTensorToFloat() {
    676   const int input_width = 3;
    677   const int input_height = 3;
    678   const float input_min = -128.0f;
    679   const float input_max = 127.0f;
    680   Tensor input(DT_QUINT8, TensorShape({input_height, input_width}));
    681   test::FillValues<quint8>(&input, {0, 128, 255, 23, 24, 25, 243, 244, 245});
    682   Tensor expected(DT_FLOAT, TensorShape({input_height, input_width}));
    683   test::FillValues<float>(&expected, {-128.0f, 0.0f, 127.0f, -105.0f, -104.0f,
    684                                       -103.0f, 115.0f, 116.0f, 117.0f});
    685   Tensor output = QuantizedTensorToFloat<quint8>(input, input_min, input_max);
    686   test::ExpectTensorEqual<float>(expected, output);
    687 
    688   // Test for signed 32 bit.
    689   // Note that we cannot use input mins and maxes that match the range because
    690   // there are 7 too few bits of mantissa accuracy in floats to represent
    691   // 2**31-1 accurately.  Also there is no good fraction to use because 2**31-1
    692   // is a mersenne prime.
    693   Tensor input32(DT_QINT32, TensorShape({input_height, input_width}));
    694 
    695   // Use a quantizer centered at 0.
    696   float input_range = 1LL << 25;
    697   int64 num_levels = (1LL << 32) - 1;
    698   float step_size =
    699       static_cast<float>(static_cast<double>(input_range) / num_levels);
    700   float q_compatible_min_value =
    701       roundf(-(input_range / 2.0) / step_size) * step_size;
    702   float q_compatible_max_value = q_compatible_min_value + input_range;
    703   test::FillValues<qint32>(&input32, {-16384, 0, 16256, -13440, -13312, -13184,
    704                                       14720, 14848, 14976});
    705 
    706   Tensor output32 = QuantizedTensorToFloat<qint32>(
    707       input32, q_compatible_min_value, q_compatible_max_value);
    708   test::FillValues<float>(&expected, {-128.0f, 0.0f, 127.0f, -105.0f, -104.0f,
    709                                       -103.0f, 115.0f, 116.0f, 117.0f});
    710   // The quantization error in going between 1<<25 and 1<<32 levels.
    711   const double kTolerance = .5 / 128.0;
    712   test::ExpectTensorNear<float>(expected, output32, kTolerance);
    713 }
    714 
    715 // Verify that QuantizedToFloatInPlaceUsingEigen is same result as
    716 // QuantizedToFloat.
    717 void TestQuantizedToFloatInPlaceUsingEigen() {
    718   thread::ThreadPool threadpool(Env::Default(), "test", 2 /* num_threads */);
    719   EigenThreadPoolWrapper wrapper(&threadpool);
    720   Eigen::ThreadPoolDevice eigen_device(&wrapper, 2 /* num_threads */);
    721 
    722   TestQuantizedToFloatInPlaceUsingEigen<quint8>(&eigen_device);
    723   TestQuantizedToFloatInPlaceUsingEigen<qint8>(&eigen_device);
    724   TestQuantizedToFloatInPlaceUsingEigen<quint16>(&eigen_device);
    725   TestQuantizedToFloatInPlaceUsingEigen<qint16>(&eigen_device);
    726   TestQuantizedToFloatInPlaceUsingEigen<qint32>(&eigen_device);
    727 }
    728 
    729 void BenchmarkRequantizeManyInNewRange() {
    730   TimeRequantizeManyInNewRange<qint32, quint8>(1000, 1000, false);
    731   TimeRequantizeManyInNewRange<qint32, quint8>(1000, 1000, true);
    732   TimeRequantizeManyInNewRange<qint32, quint8>(100000, 100, false);
    733   TimeRequantizeManyInNewRange<qint32, quint8>(100000, 100, true);
    734   TimeRequantizeManyInNewRange<qint32, quint8>(1000000, 10, false);
    735   TimeRequantizeManyInNewRange<qint32, quint8>(1000000, 10, true);
    736 
    737   TimeRequantizeManyInNewRange<quint8, qint32>(1000, 1000, false);
    738   TimeRequantizeManyInNewRange<quint8, qint32>(1000, 1000, true);
    739   TimeRequantizeManyInNewRange<quint8, qint32>(100000, 100, false);
    740   TimeRequantizeManyInNewRange<quint8, qint32>(100000, 100, true);
    741   TimeRequantizeManyInNewRange<quint8, qint32>(1000000, 10, false);
    742   TimeRequantizeManyInNewRange<quint8, qint32>(1000000, 10, true);
    743 }
    744 
    745 #ifdef QUANTIZATION_UTILS_USE_NEON
    746 template <int POW>
    747 void TestDivide64x2Pow(int64 val, int64 ref) {
    748   const int64x2_t val_64x2 = vmovq_n_s64(val);
    749   const int64x2_t ret = Divide64x2Pow<POW>(val_64x2);
    750   // TODO(b/70947959) Change back to int64 when possible
    751   int64_t rets[2];
    752   vst1q_s64(rets, ret);
    753   EXPECT_EQ(rets[0], ref);
    754   EXPECT_EQ(rets[1], ref);
    755   VLOG(1) << "div: val " << val << ", " << ref;
    756 }
    757 
    758 template <int POW>
    759 void TestDivide64x2PowRound(int64 val, int64 ref) {
    760   const int64x2_t val_64x2 = vmovq_n_s64(val);
    761   const int64x2_t shifted = Divide64x2PowRound<POW>(val_64x2);
    762   // TODO(b/70947959) Change back to int64 when possible
    763   int64_t rets[2];
    764   vst1q_s64(rets, shifted);
    765   EXPECT_EQ(rets[0], ref) << "in = " << val << ", " << POW
    766                           << ", act = " << rets[0] << ", ref = " << ref;
    767   EXPECT_EQ(rets[1], ref);
    768   VLOG(1) << "div round: " << val << ", " << rets[0];
    769 }
    770 
    771 void TestDivide64x2PowAll() {
    772   for (int64 i = 0; i < 1000; ++i) {
    773     TestDivide64x2PowRound<1>(
    774         i, static_cast<int64>(static_cast<float>(i) / 2.0f + 0.5f));
    775     TestDivide64x2PowRound<1>(
    776         -i, static_cast<int64>(static_cast<float>(-i) / 2.0f - 0.5f));
    777     TestDivide64x2PowRound<2>(
    778         i, static_cast<int64>(static_cast<float>(i) / 4.0f + 0.5f));
    779     TestDivide64x2PowRound<2>(
    780         -i, static_cast<int64>(static_cast<float>(-i) / 4.0f - 0.5f));
    781     TestDivide64x2PowRound<4>(
    782         i, static_cast<int64>(static_cast<float>(i) / 16.0f + 0.5f));
    783     TestDivide64x2PowRound<4>(
    784         -i, static_cast<int64>(static_cast<float>(-i) / 16.0f - 0.5f));
    785     TestDivide64x2PowRound<8>(
    786         i, static_cast<int64>(static_cast<float>(i) / 256.0f + 0.5f));
    787     TestDivide64x2PowRound<8>(
    788         -i, static_cast<int64>(static_cast<float>(-i) / 256.0f - 0.5f));
    789     TestDivide64x2PowRound<16>(
    790         i, static_cast<int64>(static_cast<float>(i) / 65536.0f + 0.5f));
    791     TestDivide64x2PowRound<16>(
    792         -i, static_cast<int64>(static_cast<float>(-i) / 65536.0f - 0.5f));
    793   }
    794 
    795   TestDivide64x2Pow<2>(100, 25);
    796   TestDivide64x2Pow<2>(-100, -25);
    797   TestDivide64x2Pow<4>(100, 6);
    798   TestDivide64x2Pow<4>(-100, -6);
    799 
    800   for (int64 i = 0; i < 1000; ++i) {
    801     TestDivide64x2Pow<1>(i, i / 2);
    802     TestDivide64x2Pow<1>(-i, -i / 2);
    803     TestDivide64x2Pow<2>(i, i / 4);
    804     TestDivide64x2Pow<2>(-i, -i / 4);
    805     TestDivide64x2Pow<4>(i, i / 16);
    806     TestDivide64x2Pow<4>(-i, -i / 16);
    807     TestDivide64x2Pow<8>(i, i / 256);
    808     TestDivide64x2Pow<8>(-i, -i / 256);
    809     TestDivide64x2Pow<16>(i, i / 65536);
    810     TestDivide64x2Pow<16>(-i, -i / 65536);
    811   }
    812 }
    813 
    814 uint8x8_t To8x8(uint8 val) { return vmov_n_u8(val); }
    815 
    816 int16x8_t To16x8(int16 val) { return vmovq_n_s16(val); }
    817 
    818 int32x2_t To32x2(int32 val) {
    819   int32 vals[2];
    820   vals[0] = val;
    821   vals[1] = val;
    822   return vld1_s32(vals);
    823 }
    824 
    825 template <int RESOLUTION, typename T_CALC>
    826 T_CALC ComputeRefLerp(T_CALC top_left, T_CALC top_right, T_CALC bottom_left,
    827                       T_CALC bottom_right, T_CALC x_lerp, T_CALC y_lerp) {
    828   constexpr T_CALC RESOLUTION_POW = (1 << RESOLUTION);
    829   const T_CALC top =
    830       top_left * RESOLUTION_POW + (top_right - top_left) * x_lerp;
    831   const T_CALC bottom =
    832       bottom_left * RESOLUTION_POW + (bottom_right - bottom_left) * x_lerp;
    833   const T_CALC out = top + (bottom - top) / RESOLUTION_POW * y_lerp;
    834   return (out + RESOLUTION_POW / 2) / RESOLUTION_POW;
    835 }
    836 
    837 template <int RESOLUTION>
    838 void TestComputeLerp8x8(uint8 top_left, uint8 top_right, uint8 bottom_left,
    839                         uint8 bottom_right, int16 x_lerp, int16 y_lerp) {
    840   uint8x8_t top_left8x8 = To8x8(top_left);
    841   uint8x8_t top_right8x8 = To8x8(top_right);
    842   uint8x8_t bottom_left8x8 = To8x8(bottom_left);
    843   uint8x8_t bottom_right8x8 = To8x8(bottom_right);
    844   int16x8_t x_lerp16x8 = To16x8(x_lerp);
    845   int16x8_t y_lerp16x8 = To16x8(y_lerp);
    846   const uint8x8_t ret =
    847       ComputeLerp8x8<RESOLUTION>(top_left8x8, top_right8x8, bottom_left8x8,
    848                                  bottom_right8x8, x_lerp16x8, y_lerp16x8);
    849 
    850   uint8 rets[8];
    851   vst1_u8(rets, ret);
    852 
    853   const int16 ref = ComputeRefLerp<RESOLUTION, int16>(
    854       static_cast<int16>(top_left), static_cast<int16>(top_right),
    855       static_cast<int16>(bottom_left), static_cast<int16>(bottom_right), x_lerp,
    856       y_lerp);
    857 
    858   for (int i = 0; i < 8; ++i) {
    859     EXPECT_EQ(ref, static_cast<int16>(rets[i]));
    860   }
    861 
    862   VLOG(1) << "Lerp(8): " << static_cast<int>(top_left) << ", "
    863           << static_cast<int>(top_right) << ", "
    864           << static_cast<int>(bottom_left) << ", "
    865           << static_cast<int>(bottom_right) << ", " << x_lerp << ", " << y_lerp
    866           << ", " << static_cast<int>(rets[0]) << ", " << ref;
    867 }
    868 
    869 template <int RESOLUTION>
    870 void TestComputeLerp32x2(int32 top_left, int32 top_right, int32 bottom_left,
    871                          int32 bottom_right, int32 x_lerp, int32 y_lerp) {
    872   int32x2_t top_left32x2 = To32x2(top_left);
    873   int32x2_t top_right32x2 = To32x2(top_right);
    874   int32x2_t bottom_left32x2 = To32x2(bottom_left);
    875   int32x2_t bottom_right32x2 = To32x2(bottom_right);
    876   int32x2_t x_lerp32x2 = To32x2(x_lerp);
    877   int32x2_t y_lerp32x2 = To32x2(y_lerp);
    878   const int32x2_t ret =
    879       ComputeLerp32x2<RESOLUTION>(top_left32x2, top_right32x2, bottom_left32x2,
    880                                   bottom_right32x2, x_lerp32x2, y_lerp32x2);
    881   int32 rets[2];
    882   vst1_s32(rets, ret);
    883   const int64 ref = ComputeRefLerp<RESOLUTION, int64>(
    884       static_cast<int64>(top_left), static_cast<int64>(top_right),
    885       static_cast<int64>(bottom_left), static_cast<int64>(bottom_right),
    886       static_cast<int64>(x_lerp), static_cast<int64>(y_lerp));
    887   EXPECT_EQ(static_cast<int64>(rets[0]), ref);
    888   VLOG(1) << "Lerp(32): " << top_left << ", " << top_right << ", "
    889           << bottom_left << ", " << bottom_right << ", " << x_lerp << ", "
    890           << y_lerp << ", " << rets[0] << ", " << ref;
    891 }
    892 
    893 void TestComputeLerp4xAll() {
    894   constexpr int32 RESOLUTION_32 = 30;
    895   constexpr int32 RESOLUTION_MULT_32 = (1 << RESOLUTION_32);
    896   constexpr int32 HALF_32 = RESOLUTION_MULT_32 / 2;
    897   TestComputeLerp32x2<RESOLUTION_32>(100, 200, 300, 400, HALF_32, HALF_32);
    898   TestComputeLerp32x2<RESOLUTION_32>(100, 100, 200, 200, HALF_32, HALF_32);
    899   TestComputeLerp32x2<RESOLUTION_32>(200, 200, 100, 100, HALF_32, HALF_32);
    900   TestComputeLerp32x2<RESOLUTION_32>(100, 200, 100, 200, HALF_32, HALF_32);
    901   TestComputeLerp32x2<RESOLUTION_32>(200, 100, 200, 100, HALF_32, HALF_32);
    902   TestComputeLerp32x2<RESOLUTION_32>(200, 200, 200, 200, HALF_32, HALF_32);
    903 
    904   constexpr int32 RESOLUTION_8 = 7;
    905   constexpr int32 RESOLUTION_MULT_8 = (1 << RESOLUTION_8);
    906   constexpr int32 HALF_8 = RESOLUTION_MULT_8 / 2;
    907   TestComputeLerp8x8<RESOLUTION_8>(10, 20, 30, 40, HALF_8, HALF_8);
    908   TestComputeLerp8x8<RESOLUTION_8>(100, 100, 200, 200, HALF_8, HALF_8);
    909   TestComputeLerp8x8<RESOLUTION_8>(200, 200, 100, 100, HALF_8, HALF_8);
    910   TestComputeLerp8x8<RESOLUTION_8>(100, 200, 100, 200, HALF_8, HALF_8);
    911   TestComputeLerp8x8<RESOLUTION_8>(200, 100, 200, 100, HALF_8, HALF_8);
    912   TestComputeLerp8x8<RESOLUTION_8>(200, 200, 200, 200, HALF_8, HALF_8);
    913 }
    914 
    915 #endif
    916 
    917 }  // namespace tensorflow
    918 
    919 #define RUN_TEST(t) \
    920   TEST(QuantizationUtilsTest, t) { tensorflow::t(); }
    921 
    922 RUN_TEST(TestFloatToQuantized);
    923 RUN_TEST(TestQuantizedToFloat);
    924 RUN_TEST(TestAvoidBias);
    925 RUN_TEST(TestRequantizeInNewRange);
    926 RUN_TEST(TestRequantizeInNewRangeRealData);
    927 RUN_TEST(TestRequantizeInNewRange32To8Bit);
    928 RUN_TEST(TestRequantizeManyInNewRange32To8Bit);
    929 RUN_TEST(TestRequantizeManyInNewRange32To8BitUsingEigen);
    930 RUN_TEST(TestRequantizeManyInNewRange32To8BitEigenVsNonEigen);
    931 RUN_TEST(TestRequantizeManyInNewRange32To8BitSignedEigenVsNonEigen);
    932 RUN_TEST(TestFloatTensorToQuantized);
    933 RUN_TEST(TestRequantizeManyInNewRange8To32Bit);
    934 RUN_TEST(TestFloatToQuantizedInPlaceUsingEigen);
    935 RUN_TEST(TestOverflowWithEigen);
    936 RUN_TEST(TestQuantizedTensorToFloat);
    937 RUN_TEST(TestQuantizedToFloatInPlaceUsingEigen);
    938 
    939 #if defined(__ANDROID__)
    940 
    941 RUN_TEST(BenchmarkRequantizeManyInNewRange);
    942 
    943 #ifdef QUANTIZATION_UTILS_USE_NEON
    944 
    945 RUN_TEST(TestDivide64x2PowAll);
    946 RUN_TEST(TestComputeLerp4xAll);
    947 
    948 #endif  // QUANTIZATION_UTILS_USE_NEON
    949 
    950 #endif  // __ANDROID__
    951 
    952 int main(int argc, char** argv) {
    953   // On Linux, add: FLAGS_logtostderr = true;
    954   ::testing::InitGoogleTest(&argc, argv);
    955   return RUN_ALL_TESTS();
    956 }
    957