Home | History | Annotate | Download | only in kernels
      1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 #ifndef TENSORFLOW_LITE_KERNELS_TEST_UTIL_H_
     16 #define TENSORFLOW_LITE_KERNELS_TEST_UTIL_H_
     17 
     18 #include <complex>
     19 #include <vector>
     20 
     21 #include <gmock/gmock.h>
     22 #include <gtest/gtest.h>
     23 
     24 #include "tensorflow/core/platform/logging.h"
     25 #include "tensorflow/lite/interpreter.h"
     26 #include "tensorflow/lite/kernels/internal/tensor_utils.h"
     27 #include "tensorflow/lite/kernels/register.h"
     28 #include "tensorflow/lite/model.h"
     29 #include "tensorflow/lite/string_util.h"
     30 #include "tensorflow/lite/testing/util.h"
     31 #include "tensorflow/lite/tools/optimize/quantization_utils.h"
     32 
     33 namespace tflite {
     34 
     35 // A gmock matcher that check that elements of a float vector match to a given
     36 // tolerance.
     37 std::vector<::testing::Matcher<float>> ArrayFloatNear(
     38     const std::vector<float>& values, float max_abs_error = 1e-5);
     39 
     40 // A gmock matcher that check that elements of a complex vector match to a given
     41 // tolerance.
     42 std::vector<::testing::Matcher<std::complex<float>>> ArrayComplex64Near(
     43     const std::vector<std::complex<float>>& values, float max_abs_error = 1e-5);
     44 
     45 template <typename T>
     46 inline std::vector<T> Quantize(const std::vector<float>& data, float scale,
     47                                int32_t zero_point) {
     48   std::vector<T> q;
     49   for (float f : data) {
     50     q.push_back(static_cast<T>(std::max<float>(
     51         std::numeric_limits<T>::min(),
     52         std::min<float>(std::numeric_limits<T>::max(),
     53                         std::round(zero_point + (f / scale))))));
     54   }
     55   return q;
     56 }
     57 
     58 template <typename T>
     59 inline std::vector<float> Dequantize(const std::vector<T>& data, float scale,
     60                                      int32_t zero_point) {
     61   std::vector<float> f;
     62   for (T q : data) {
     63     f.push_back(scale * (q - zero_point));
     64   }
     65   return f;
     66 }
     67 
     68 // A test model that contains a single operator. All operator inputs and
     69 // output are external to the model, so the tests can directly access them.
     70 // Typical usage:
     71 //    SingleOpModel m;
     72 //    int a = m.AddInput({TensorType_FLOAT32, a_shape});
     73 //    int b = m.AddInput({TensorType_FLOAT32, b_shape});
     74 //    int c = m.AddOutput({TensorType_FLOAT32, {}});
     75 //    m.SetBuiltinOp(...);
     76 //    m.BuildInterpreter({GetShape(a), GetShape(b)});
     77 //    m.PopulateTensor(a, {...});
     78 //    m.PopulateTensor(b, {...});
     79 //    m.Invoke();
     80 //    EXPECT_THAT(m.ExtractVector<float>(c), ArrayFloatNear({...}));
     81 //
     82 
     83 // A helper struct to construct test tensors. This is particularly useful for
     84 // quantized tensor which must have their scale and zero_point defined before
     85 // the actual data is known. This mimics what happens in practice: quantization
     86 // parameters are calculated during training or post training..
     87 struct TensorData {
     88   TensorData(TensorType type = TensorType_FLOAT32, std::vector<int> shape = {},
     89              float min = 0.0f, float max = 0.0f, float scale = 0.0f,
     90              int32_t zero_point = 0, bool per_channel_quantization = false,
     91              std::vector<float> per_channel_quantization_scales = {},
     92              std::vector<int64_t> per_channel_quantization_offsets = {},
     93              int32_t channel_index = 0)
     94       : type(type),
     95         shape(shape),
     96         min(min),
     97         max(max),
     98         scale(scale),
     99         zero_point(zero_point),
    100         per_channel_quantization(per_channel_quantization),
    101         per_channel_quantization_scales(
    102             std::move(per_channel_quantization_scales)),
    103         per_channel_quantization_offsets(
    104             std::move(per_channel_quantization_offsets)),
    105         channel_index(channel_index) {}
    106   TensorType type;
    107   std::vector<int> shape;
    108   float min;
    109   float max;
    110   float scale;
    111   int32_t zero_point;
    112   bool per_channel_quantization;
    113   std::vector<float> per_channel_quantization_scales;
    114   std::vector<int64_t> per_channel_quantization_offsets;
    115   int32_t channel_index;
    116 };
    117 
    118 class SingleOpResolver : public OpResolver {
    119  public:
    120   SingleOpResolver(const BuiltinOperator op, TfLiteRegistration* registration)
    121       : op_(op), registration_(*registration) {
    122     registration_.builtin_code = static_cast<int32_t>(op);
    123     registration_.version = 1;
    124   }
    125   const TfLiteRegistration* FindOp(BuiltinOperator op,
    126                                    int version) const override {
    127     if (op == op_) {
    128       return &registration_;
    129     }
    130     return nullptr;
    131   }
    132   const TfLiteRegistration* FindOp(const char* op, int version) const override {
    133     return nullptr;
    134   }
    135 
    136  private:
    137   const BuiltinOperator op_;
    138   TfLiteRegistration registration_;
    139 };
    140 
    141 class SingleOpModel {
    142  public:
    143   SingleOpModel() {}
    144   ~SingleOpModel() {}
    145 
    146   // Set a function callback that is run right after graph is prepared
    147   // that allows applying external delegates. This is useful for testing
    148   // other runtimes like NN API or GPU.
    149   void SetApplyDelegate(std::function<void(Interpreter*)> apply_delegate_fn) {
    150     apply_delegate_fn_ = apply_delegate_fn;
    151   }
    152 
    153   // Copying or assignment is disallowed to simplify ownership semantics.
    154   SingleOpModel(const SingleOpModel&) = delete;
    155   SingleOpModel& operator=(const SingleOpModel&) = delete;
    156 
    157   // Add a TensorType input tensor and return its index.
    158   int AddInput(TensorType type, bool is_variable = false) {
    159     return AddInput(TensorData{type}, is_variable);
    160   }
    161   int AddInput(const TensorData& t, bool is_variable = false);
    162 
    163   // Templated version of AddConstInput().
    164   template <typename T>
    165   int AddConstInput(TensorType type, std::initializer_list<T> data,
    166                     std::initializer_list<int> shape) {
    167     int id = AddTensor(TensorData{type, shape}, data);
    168     inputs_.push_back(id);
    169     return id;
    170   }
    171 
    172   // Add a null input tensor (optional input) and return kOptionalTensor.
    173   int AddNullInput();
    174 
    175   // Add a TensorType output tensor and return its index.
    176   int AddOutput(TensorType type) { return AddOutput(TensorData{type}); }
    177   int AddOutput(const TensorData& t);
    178 
    179   template <typename T>
    180   void QuantizeAndPopulate(int index, const std::vector<float>& data) {
    181     TfLiteTensor* t = interpreter_->tensor(index);
    182     auto q = Quantize<T>(data, t->params.scale, t->params.zero_point);
    183     PopulateTensor(index, 0, q.data(), q.data() + q.size());
    184   }
    185 
    186   void SymmetricQuantizeAndPopulate(int index, const std::vector<float>& data) {
    187     std::vector<int8_t> q = QuantizeTensor(index, data);
    188     PopulateTensor(index, /*offset=*/0, reinterpret_cast<uint8_t*>(q.data()),
    189                    reinterpret_cast<uint8_t*>(q.data() + q.size()));
    190   }
    191 
    192   void SignedSymmetricQuantizeAndPopulate(int index,
    193                                           const std::vector<float>& data) {
    194     std::vector<int8_t> q = QuantizeTensor(index, data);
    195     PopulateTensor(index, /*offset=*/0, q.data(), q.data() + q.size());
    196   }
    197 
    198   // Quantize and populate data for filter with per channel quantization.
    199   void PerChannelSymmetricQuantizeAndPopulate(
    200       int index, const std::vector<float>& input_data) {
    201     TfLiteTensor* t = interpreter_->tensor(index);
    202     auto* params =
    203         reinterpret_cast<TfLiteAffineQuantization*>(t->quantization.params);
    204     const int channel_index = params->quantized_dimension;
    205 
    206     std::vector<int32_t> shape(t->dims->size);
    207     for (int i = 0; i < shape.size(); ++i) {
    208       shape[i] = t->dims->data[i];
    209     }
    210     const int32_t num_inputs = input_data.size();
    211     const int32_t num_channel = shape[channel_index];
    212     std::vector<int8_t> quantized_output(num_inputs);
    213     std::vector<float> scales_inv(num_channel);
    214     for (int i = 0; i < num_channel; ++i) {
    215       scales_inv[i] = 1.0f / params->scale->data[i];
    216     }
    217     optimize::utils::SymmetricPerChannelQuantizeValues(
    218         input_data.data(), scales_inv, shape, channel_index, &quantized_output);
    219 
    220     PopulateTensor(index, /*offset=*/0, quantized_output.data(),
    221                    quantized_output.data() + quantized_output.size());
    222   }
    223 
    224   // Quantize and populate data for bias with per channel quantization.
    225   void PerChannelQuantizeBias(int index, const std::vector<float>& input_data) {
    226     const int32_t num_inputs = input_data.size();
    227     std::vector<int32_t> quantized_output(num_inputs);
    228     TfLiteTensor* t = interpreter_->tensor(index);
    229     auto* params =
    230         reinterpret_cast<TfLiteAffineQuantization*>(t->quantization.params);
    231     for (int i = 0; i < num_inputs; ++i) {
    232       quantized_output[i] = input_data[i] * params->scale->data[i];
    233     }
    234     PopulateTensor(index, /*offset=*/0, quantized_output.data(),
    235                    quantized_output.data() + quantized_output.size());
    236   }
    237 
    238   const std::vector<int>& GetShape(int id) { return tensor_data_.at(id).shape; }
    239 
    240   float GetScale(int id) { return tensor_data_.at(id).scale; }
    241   int32_t GetZeroPoint(int id) { return tensor_data_.at(id).zero_point; }
    242 
    243   // Define the operator in this model.
    244   void SetBuiltinOp(BuiltinOperator type, BuiltinOptions builtin_options_type,
    245                     flatbuffers::Offset<void> builtin_options);
    246   void SetCustomOp(const string& name,
    247                    const std::vector<uint8_t>& custom_option,
    248                    const std::function<TfLiteRegistration*()>& registeration);
    249 
    250   // Build the interpreter for this model. Also, resize and allocate all
    251   // tensors given the shapes of the inputs.
    252   void BuildInterpreter(std::vector<std::vector<int>> input_shapes,
    253                         bool allow_fp32_relax_to_fp16 = false);
    254 
    255   void Invoke();
    256 
    257   void PopulateStringTensor(int index, const std::vector<string>& content) {
    258     auto tensor = interpreter_->tensor(index);
    259     DynamicBuffer buf;
    260     for (const string& s : content) {
    261       buf.AddString(s.data(), s.length());
    262     }
    263     buf.WriteToTensor(tensor, /*new_shape=*/nullptr);
    264   }
    265 
    266   // Populate the tensor given its index.
    267   // TODO(b/110696148) clean up and merge with vector-taking variant below.
    268   template <typename T>
    269   void PopulateTensor(int index, const std::initializer_list<T>& data) {
    270     T* v = interpreter_->typed_tensor<T>(index);
    271     if (!v) {
    272       auto* t = interpreter_->tensor(index);
    273       CHECK(t) << "No tensor with index " << index << ".";
    274       CHECK(t->data.raw) << "Empty data for tensor with index " << index << ".";
    275       CHECK(v) << "Type mismatch for tensor with index " << index
    276                << ". Requested " << typeToTfLiteType<T>() << ", got "
    277                << t->type;
    278     }
    279     for (T f : data) {
    280       *v = f;
    281       ++v;
    282     }
    283   }
    284 
    285   // Populate the tensor given its index.
    286   // TODO(b/110696148) clean up and merge with initializer_list-taking variant
    287   // above.
    288   template <typename T>
    289   void PopulateTensor(int index, const std::vector<T>& data) {
    290     T* v = interpreter_->typed_tensor<T>(index);
    291     if (!v) {
    292       auto* t = interpreter_->tensor(index);
    293       CHECK(t) << "No tensor with index " << index << ".";
    294       CHECK(t->data.raw) << "Empty data for tensor with index " << index << ".";
    295       CHECK(v) << "Type mismatch for tensor with index " << index
    296                << ". Requested " << typeToTfLiteType<T>() << ", got "
    297                << t->type;
    298     }
    299     for (T f : data) {
    300       *v = f;
    301       ++v;
    302     }
    303   }
    304 
    305   // Partially populate the tensor, starting at the given offset.
    306   template <typename T>
    307   void PopulateTensor(int index, int offset, T* begin, T* end) {
    308     T* v = interpreter_->typed_tensor<T>(index);
    309     memcpy(v + offset, begin, (end - begin) * sizeof(T));
    310   }
    311 
    312   // Return a vector with the flattened contents of a tensor.
    313   template <typename T>
    314   std::vector<T> ExtractVector(int index) {
    315     T* v = interpreter_->typed_tensor<T>(index);
    316     CHECK(v);
    317     return std::vector<T>(v, v + GetTensorSize(index));
    318   }
    319 
    320   std::vector<int> GetTensorShape(int index) {
    321     std::vector<int> result;
    322     TfLiteTensor* t = interpreter_->tensor(index);
    323     for (int i = 0; i < t->dims->size; ++i) {
    324       result.push_back(t->dims->data[i]);
    325     }
    326     return result;
    327   }
    328 
    329   void SetNumThreads(int num_threads) {
    330     interpreter_->SetNumThreads(num_threads);
    331   }
    332 
    333   void SetResolver(std::unique_ptr<OpResolver> resolver) {
    334     resolver_ = std::move(resolver);
    335   }
    336 
    337  protected:
    338   int32_t GetTensorSize(int index) const;
    339 
    340   flatbuffers::FlatBufferBuilder builder_;
    341   std::unique_ptr<tflite::Interpreter> interpreter_;
    342   std::unique_ptr<OpResolver> resolver_;
    343 
    344  private:
    345   // TODO(gavinbelson): sync this method with
    346   // //tensorflow/lite/kernels/internal/quantization_util.h?l=31
    347   template <typename T>
    348   std::pair<float, int32_t> QuantizationParams(float f_min, float f_max) {
    349     // These are required by many quantized operations.
    350     CHECK_LE(f_min, 0);
    351     CHECK_GE(f_max, 0);
    352     T q_min = std::numeric_limits<T>::min();
    353     T q_max = std::numeric_limits<T>::max();
    354     float range = q_max - q_min;
    355     float scale = (f_max - f_min) / range;
    356     int32_t zero_point = std::min(
    357         q_max,
    358         std::max(q_min, static_cast<T>(std::round(q_min - f_min / scale))));
    359     return {scale, zero_point};
    360   }
    361 
    362   int AddTensorPerChannelQuant(TensorData t) {
    363     const int id = tensors_.size();
    364     flatbuffers::Offset<QuantizationParameters> q_params = 0;
    365     q_params = CreateQuantizationParameters(
    366         builder_, /*min=*/0, /*max=*/0,
    367         /*scale=*/
    368         builder_.CreateVector<float>(t.per_channel_quantization_scales),
    369         /*zero point=*/
    370         builder_.CreateVector<int64_t>(t.per_channel_quantization_offsets),
    371         QuantizationDetails_NONE, 0, t.channel_index);
    372     tensors_.push_back(
    373         CreateTensor(builder_, builder_.CreateVector<int>(t.shape), t.type,
    374                      /*buffer=*/0,
    375                      /*name=*/0, q_params, /*is_variable=*/false));
    376     tensor_data_[id] = t;
    377     return id;
    378   }
    379 
    380   template <typename T>
    381   int AddTensor(TensorData t, std::initializer_list<T> data,
    382                 bool is_variable = false) {
    383     int id = tensors_.size();
    384 
    385     // This is slightly different depending on whether we are adding a
    386     // quantized or a regular tensor.
    387     bool is_quantized = (t.min != 0 || t.max != 0 || t.scale != 0);
    388 
    389     flatbuffers::Offset<QuantizationParameters> q_params = 0;
    390 
    391     if (is_quantized) {
    392       if (t.min != 0 || t.max != 0) {
    393         if (t.type == TensorType_UINT8) {
    394           std::tie(t.scale, t.zero_point) =
    395               QuantizationParams<uint8_t>(t.min, t.max);
    396         } else if (t.type == TensorType_INT8) {
    397           std::tie(t.scale, t.zero_point) =
    398               QuantizationParams<int8_t>(t.min, t.max);
    399         } else if (t.type == TensorType_INT32) {
    400           std::tie(t.scale, t.zero_point) =
    401               QuantizationParams<int32_t>(t.min, t.max);
    402         } else if (t.type == TensorType_INT16) {
    403           std::tie(t.scale, t.zero_point) =
    404               QuantizationParams<int16_t>(t.min, t.max);
    405         } else {
    406           LOG(FATAL) << "No support for the requested quantized type";
    407         }
    408         t.min = 0;
    409         t.max = 0;
    410       }
    411 
    412       q_params = CreateQuantizationParameters(
    413           builder_, /*min=*/0, /*max=*/0,
    414           builder_.CreateVector<float>({t.scale}),
    415           builder_.CreateVector<int64_t>({t.zero_point}));
    416     }
    417 
    418     int buffer_id = 0;
    419     if (data.size()) {
    420       // Initialize buffers list with empty buffer to allow for non-const
    421       // tensors.
    422       if (buffers_.empty()) {
    423         buffers_.push_back(CreateBuffer(builder_, builder_.CreateVector({})));
    424       }
    425 
    426       // Add data as a Buffer to buffers list.
    427       buffer_id = buffers_.size();
    428       auto data_buffer =
    429           builder_.CreateVector(reinterpret_cast<const uint8_t*>(data.begin()),
    430                                 sizeof(T) * data.size());
    431       buffers_.push_back(CreateBuffer(builder_, data_buffer));
    432     }
    433 
    434     tensors_.push_back(CreateTensor(builder_,
    435                                     builder_.CreateVector<int>(t.shape), t.type,
    436                                     /*buffer=*/buffer_id,
    437                                     /*name=*/0, q_params, is_variable));
    438 
    439     tensor_data_[id] = t;
    440 
    441     return id;
    442   }
    443 
    444   std::vector<int8_t> QuantizeTensor(int index,
    445                                      const std::vector<float>& data) {
    446     TfLiteTensor* t = interpreter_->tensor(index);
    447     const int length = data.size();
    448     std::vector<int8_t> q(length);
    449     float min, max, scaling_factor;
    450     tensor_utils::SymmetricQuantizeFloats(data.data(), length, q.data(), &min,
    451                                           &max, &scaling_factor);
    452     // Update quantization params.
    453     t->params.scale = scaling_factor;
    454     t->params.zero_point = 0;
    455     // Populate the new quantization params.
    456     TfLiteQuantizationFree(&t->quantization);
    457     t->quantization.type = kTfLiteAffineQuantization;
    458     auto* affine_quantization = reinterpret_cast<TfLiteAffineQuantization*>(
    459         malloc(sizeof(TfLiteAffineQuantization)));
    460     affine_quantization->quantized_dimension = 0;
    461     affine_quantization->scale = TfLiteFloatArrayCreate(1);
    462     affine_quantization->zero_point = TfLiteIntArrayCreate(1);
    463     affine_quantization->scale->data[0] = scaling_factor;
    464     affine_quantization->zero_point->data[0] = 0;
    465     t->quantization.params = affine_quantization;
    466     return q;
    467   }
    468 
    469   std::map<int, TensorData> tensor_data_;
    470   std::vector<int32_t> inputs_;
    471   std::vector<int32_t> outputs_;
    472   std::vector<flatbuffers::Offset<Tensor>> tensors_;
    473   std::vector<flatbuffers::Offset<OperatorCode>> opcodes_;
    474   std::vector<flatbuffers::Offset<Operator>> operators_;
    475   std::vector<flatbuffers::Offset<Buffer>> buffers_;
    476   std::map<string, std::function<TfLiteRegistration*()>> custom_registrations_;
    477   // A function pointer that gets called after the interpreter is created but
    478   // before evaluation happens. This is useful for applying a delegate.
    479   std::function<void(Interpreter*)> apply_delegate_fn_;
    480 };
    481 
    482 // Base class for single op unit tests.
    483 // The tests are parameterized to test multiple kernels for a single op.
    484 // The parameters are strings like "optimized" and "reference" to have better
    485 // readability in test reports.
    486 //
    487 // To use this class:
    488 // * Define a constant map from strings to TfLiteRegistration.
    489 // * Implement a test class that inherits SingleOpTest.
    490 // * Instantiate the test cases with SingleOpTest::GetKernelTags helper
    491 //   function.
    492 // * Call GetRegistration to get the TfLiteRegistration to be used before
    493 //   building the interpreter.
    494 class SingleOpTest : public ::testing::TestWithParam<string> {
    495  public:
    496   static std::vector<string> GetKernelTags(
    497       const std::map<string, TfLiteRegistration*>& kernel_map) {
    498     std::vector<string> tags;
    499     for (auto it : kernel_map) {
    500       tags.push_back(it.first);
    501     }
    502     return tags;
    503   }
    504 
    505  protected:
    506   virtual const std::map<string, TfLiteRegistration*>& GetKernelMap() = 0;
    507   TfLiteRegistration* GetRegistration() {
    508     return GetKernelMap().at(GetParam());
    509   }
    510 };
    511 
    512 // Returns the corresponding TensorType given the type T.
    513 template <typename T>
    514 TensorType GetTensorType() {
    515   if (std::is_same<T, float>::value) return TensorType_FLOAT32;
    516   if (std::is_same<T, int32_t>::value) return TensorType_INT32;
    517   if (std::is_same<T, uint8_t>::value) return TensorType_UINT8;
    518   if (std::is_same<T, string>::value) return TensorType_STRING;
    519   return TensorType_MIN;  // default value
    520 }
    521 
    522 // Strings have a special implementation that is in test_util.cc
    523 template <>
    524 std::vector<string> SingleOpModel::ExtractVector(int index);
    525 }  // namespace tflite
    526 
    527 #endif  // TENSORFLOW_LITE_KERNELS_TEST_UTIL_H_
    528