1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 #include "tensorflow/lite/kernels/kernel_util.h" 16 17 #include <algorithm> 18 #include <cmath> 19 #include <memory> 20 21 #include "tensorflow/lite/kernels/internal/quantization_util.h" 22 #include "tensorflow/lite/kernels/internal/round.h" 23 24 namespace tflite { 25 26 void GuardedQuantizeMultiplier(double effective_output_scale, 27 int32_t* significand, int* shift) { 28 QuantizeMultiplier(effective_output_scale, significand, shift); 29 // Additional guard to make sure RoundingDivideByPOT does not fail. 30 if (*shift < -31) { 31 // If shift is less than -31, RoundingDivideByPOT fails. This happens when 32 // min and max are close and small. For this particular case, both 33 // significand and shift are set to zero. 34 *significand = 0; 35 *shift = 0; 36 } 37 } 38 39 TfLiteStatus PopulateConvolutionQuantizationParams( 40 TfLiteContext* context, const TfLiteTensor* input, 41 const TfLiteTensor* filter, const TfLiteTensor* bias, TfLiteTensor* output, 42 const TfLiteFusedActivation& activation, int32_t* multiplier, int* shift, 43 int32_t* output_activation_min, int32_t* output_activation_max, 44 int32_t* per_channel_multiplier, int* per_channel_shift) { 45 TF_LITE_ENSURE_EQ(context, input->quantization.type, 46 kTfLiteAffineQuantization); 47 TF_LITE_ENSURE_EQ(context, filter->quantization.type, 48 kTfLiteAffineQuantization); 49 // TODO(jianlijianli): Enable bias type check and bias scale == input scale 50 // * filter scale for each channel in affine quantization once bias 51 // quantization is properly populated. 52 // TF_LITE_ENSURE_EQ(context, bias->quantization.type, 53 // kTfLiteAffineQuantization); 54 55 // Check data type. 56 const auto* affine_quantization = 57 reinterpret_cast<TfLiteAffineQuantization*>(filter->quantization.params); 58 TF_LITE_ENSURE(context, affine_quantization); 59 TF_LITE_ENSURE(context, affine_quantization->scale); 60 const bool is_per_channel = affine_quantization->scale->size > 1; 61 if (is_per_channel) { 62 // Currently only Int8 is supported for per channel quantization. 63 TF_LITE_ENSURE_EQ(context, input->type, kTfLiteInt8); 64 TF_LITE_ENSURE_EQ(context, filter->type, kTfLiteInt8); 65 TF_LITE_ENSURE_EQ( 66 context, affine_quantization->scale->size, 67 filter->dims->data[affine_quantization->quantized_dimension]); 68 } 69 70 // Populate multiplier and shift using affine quantization. 71 const int num_channels = affine_quantization->scale->size; 72 const float input_scale = input->params.scale; 73 const float output_scale = output->params.scale; 74 const float* filter_scales = affine_quantization->scale->data; 75 for (int i = 0; i < num_channels; ++i) { 76 const double filter_scale = static_cast<double>(filter_scales[i]); 77 const double effective_output_scale = static_cast<double>(input_scale) * 78 filter_scale / 79 static_cast<double>(output_scale); 80 int32_t significand; 81 int shift; 82 GuardedQuantizeMultiplier(effective_output_scale, &significand, &shift); 83 per_channel_multiplier[i] = significand; 84 per_channel_shift[i] = shift; 85 } 86 87 // Populate scalar quantization parameters. 88 // This check on legacy quantization parameters is kept only for backward 89 // compatibility. 90 if (input->type == kTfLiteUInt8) { 91 // Check bias scale == input scale * filter scale. 92 double real_multiplier = 0.0; 93 TF_LITE_ENSURE_STATUS(GetQuantizedConvolutionMultipler( 94 context, input, filter, bias, output, &real_multiplier)); 95 int exponent; 96 97 // Populate quantization parameteters with multiplier and shift. 98 QuantizeMultiplier(real_multiplier, multiplier, &exponent); 99 *shift = -exponent; 100 CalculateActivationRangeUint8(activation, output, output_activation_min, 101 output_activation_max); 102 } 103 return kTfLiteOk; 104 } 105 106 TfLiteStatus GetQuantizedConvolutionMultipler(TfLiteContext* context, 107 const TfLiteTensor* input, 108 const TfLiteTensor* filter, 109 const TfLiteTensor* bias, 110 TfLiteTensor* output, 111 double* multiplier) { 112 const double input_product_scale = input->params.scale * filter->params.scale; 113 const double bias_scale = bias->params.scale; 114 const double output_scale = output->params.scale; 115 116 // TODO(ahentz): The following conditions must be guaranteed by the training 117 // pipeline. 118 TF_LITE_ENSURE(context, std::abs(input_product_scale - bias_scale) <= 119 1e-6 * std::min(input_product_scale, bias_scale)); 120 TF_LITE_ENSURE(context, input_product_scale >= 0); 121 122 *multiplier = input_product_scale / output_scale; 123 124 return kTfLiteOk; 125 } 126 127 namespace { 128 void CalculateActivationRangeQuantizedImpl(TfLiteFusedActivation activation, 129 int32_t qmin, int32_t qmax, 130 TfLiteTensor* output, 131 int32_t* act_min, int32_t* act_max) { 132 const auto scale = output->params.scale; 133 const auto zero_point = output->params.zero_point; 134 135 auto quantize = [scale, zero_point](float f) { 136 return zero_point + static_cast<int32_t>(TfLiteRound(f / scale)); 137 }; 138 139 if (activation == kTfLiteActRelu) { 140 *act_min = std::max(qmin, quantize(0.0)); 141 *act_max = qmax; 142 } else if (activation == kTfLiteActRelu6) { 143 *act_min = std::max(qmin, quantize(0.0)); 144 *act_max = std::min(qmax, quantize(6.0)); 145 } else if (activation == kTfLiteActRelu1) { 146 *act_min = std::max(qmin, quantize(-1.0)); 147 *act_max = std::min(qmax, quantize(1.0)); 148 } else { 149 *act_min = qmin; 150 *act_max = qmax; 151 } 152 } 153 } // namespace 154 155 TfLiteStatus CalculateActivationRangeQuantized(TfLiteContext* context, 156 TfLiteFusedActivation activation, 157 TfLiteTensor* output, 158 int32_t* act_min, 159 int32_t* act_max) { 160 int32_t qmin = 0; 161 int32_t qmax = 0; 162 if (output->type == kTfLiteUInt8) { 163 qmin = std::numeric_limits<uint8_t>::min(); 164 qmax = std::numeric_limits<uint8_t>::max(); 165 } else if (output->type == kTfLiteInt8) { 166 qmin = std::numeric_limits<int8_t>::min(); 167 qmax = std::numeric_limits<int8_t>::max(); 168 } else if (output->type == kTfLiteInt16) { 169 qmin = std::numeric_limits<int16_t>::min(); 170 qmax = std::numeric_limits<int16_t>::max(); 171 } else { 172 TF_LITE_ENSURE(context, false); 173 } 174 175 CalculateActivationRangeQuantizedImpl(activation, qmin, qmax, output, act_min, 176 act_max); 177 return kTfLiteOk; 178 } 179 180 void CalculateActivationRangeUint8(TfLiteFusedActivation activation, 181 TfLiteTensor* output, int32_t* act_min, 182 int32_t* act_max) { 183 const int32_t qmin = std::numeric_limits<uint8_t>::min(); 184 const int32_t qmax = std::numeric_limits<uint8_t>::max(); 185 186 CalculateActivationRangeQuantizedImpl(activation, qmin, qmax, output, act_min, 187 act_max); 188 } 189 190 void CalculateActivationRangeInt8(TfLiteFusedActivation activation, 191 TfLiteTensor* output, int32_t* act_min, 192 int32_t* act_max) { 193 const int32_t qmin = std::numeric_limits<int8_t>::min(); 194 const int32_t qmax = std::numeric_limits<int8_t>::max(); 195 196 CalculateActivationRangeQuantizedImpl(activation, qmin, qmax, output, act_min, 197 act_max); 198 } 199 200 bool HaveSameShapes(const TfLiteTensor* input1, const TfLiteTensor* input2) { 201 return TfLiteIntArrayEqual(input1->dims, input2->dims); 202 } 203 204 // TODO(petewarden): Having macros around this is ugly, look at other strategies 205 // before replicating this approach elsewhere. 206 #ifndef TF_LITE_STATIC_MEMORY 207 TfLiteStatus CalculateShapeForBroadcast(TfLiteContext* context, 208 const TfLiteTensor* input1, 209 const TfLiteTensor* input2, 210 TfLiteIntArray** output_shape) { 211 int64_t dims1 = NumDimensions(input1); 212 int64_t dims2 = NumDimensions(input2); 213 int64_t out_dims = std::max(dims1, dims2); 214 if (NumElements(input1) == 0) { 215 *output_shape = TfLiteIntArrayCopy(input1->dims); 216 return kTfLiteOk; 217 } 218 std::unique_ptr<TfLiteIntArray, void (*)(TfLiteIntArray*)> shape( 219 TfLiteIntArrayCreate(out_dims), TfLiteIntArrayFree); 220 for (int i = 0; i < out_dims; ++i) { 221 int64_t d1 = i >= dims1 ? 1 : SizeOfDimension(input1, dims1 - i - 1); 222 int64_t d2 = i >= dims2 ? 1 : SizeOfDimension(input2, dims2 - i - 1); 223 TF_LITE_ENSURE(context, d1 == d2 || d1 == 1 || d2 == 1); 224 shape->data[out_dims - i - 1] = std::max(d1, d2); 225 } 226 *output_shape = shape.release(); 227 return kTfLiteOk; 228 } 229 #endif // TF_LITE_STATIC_MEMORY 230 231 } // namespace tflite 232