1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_CORE_KERNELS_QUANTIZE_AND_DEQUANTIZE_OP_H_ 17 #define TENSORFLOW_CORE_KERNELS_QUANTIZE_AND_DEQUANTIZE_OP_H_ 18 19 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" 20 #include "tensorflow/core/framework/op_kernel.h" 21 #include "tensorflow/core/framework/tensor_types.h" 22 #include "tensorflow/core/kernels/cwise_ops.h" 23 24 namespace tensorflow { 25 26 enum QuantizerRoundMode { 27 // Round half up: if the fraction of y is exactly 0.5, then 28 // round(y) = y + 0.5 29 // E.g., -5.5 gets rounded to -5, -5.4 goes to -5, 30 // 5.4 goes to 5, and 5.5 goes to 6. 31 ROUND_HALF_UP, 32 // Round half to even: if the fraction of y is exactly 0.5, then round(y) is 33 // the nearest even integer to y. 34 // E.g., 23.5 gets rounded to 24, 24.5 gets rounded to 24, while -23.5 becomes 35 // -24, and -24.5 gets rounded to 24. 36 ROUND_HALF_TO_EVEN, 37 }; 38 39 namespace functor { 40 41 // TODO(pauldonnelly): 'signed_input' should really be called 'signed_output'. 42 43 template <typename Device, typename T> 44 struct QuantizeAndDequantizeOneScaleFunctor { 45 void operator()(const Device& d, typename TTypes<T>::ConstVec input, 46 bool signed_input, int num_bits, bool range_given, 47 Tensor* input_min_tensor, Tensor* input_max_tensor, 48 QuantizerRoundMode round_mode, typename TTypes<T>::Vec out); 49 }; 50 51 // The implementation below runs on both CPU and GPU. 52 template <typename Device, typename T, typename Func> 53 void ClampScaleAndRound(const Device& d, typename TTypes<T>::ConstVec input, 54 T min_range, T max_range, T scale, T inverse_scale, 55 Func round_func, typename TTypes<T>::Vec out) { 56 out.device(d) = (input.cwiseMin(max_range).cwiseMax(min_range) * scale) 57 .unaryExpr(round_func) * 58 inverse_scale; 59 } 60 61 // The implementation below runs on both CPU and GPU. 62 template <typename Device, typename T> 63 void ClampScaleAndRound(const Device& d, typename TTypes<T>::ConstVec input, 64 T min_range, T max_range, T scale, T inverse_scale, 65 QuantizerRoundMode round_mode, 66 typename TTypes<T>::Vec out) { 67 switch (round_mode) { 68 case ROUND_HALF_TO_EVEN: 69 ClampScaleAndRound(d, input, min_range, max_range, scale, inverse_scale, 70 Eigen::internal::scalar_round_op_google<T>(), out); 71 break; 72 case ROUND_HALF_UP: 73 ClampScaleAndRound(d, input, min_range, max_range, scale, inverse_scale, 74 Eigen::internal::scalar_round_up_op<T>(), out); 75 break; 76 } 77 } 78 79 // The implementation below runs on both CPU and GPU. 80 template <typename Device, typename T, typename Func> 81 void ScaleAndRound(const Device& d, typename TTypes<T>::ConstVec input, T scale, 82 T inverse_scale, Func round_func, 83 typename TTypes<T>::Vec out) { 84 out.device(d) = (input * scale).unaryExpr(round_func) * inverse_scale; 85 } 86 87 // The implementation below runs on both CPU and GPU. 88 template <typename Device, typename T> 89 void ScaleAndRound(const Device& d, typename TTypes<T>::ConstVec input, T scale, 90 T inverse_scale, QuantizerRoundMode round_mode, 91 typename TTypes<T>::Vec out) { 92 switch (round_mode) { 93 case ROUND_HALF_TO_EVEN: 94 ScaleAndRound(d, input, scale, inverse_scale, 95 Eigen::internal::scalar_round_op_google<T>(), out); 96 break; 97 case ROUND_HALF_UP: 98 ScaleAndRound(d, input, scale, inverse_scale, 99 Eigen::internal::scalar_round_up_op<T>(), out); 100 break; 101 } 102 } 103 104 // The implementation below runs on both CPU and GPU. 105 template <typename Device, typename T> 106 struct QuantizeAndDequantizeOneScaleImpl { 107 static void Compute(const Device& d, typename TTypes<T>::ConstVec input, 108 bool signed_input, int num_bits, bool range_given, 109 Tensor* input_min_tensor, Tensor* input_max_tensor, 110 QuantizerRoundMode round_mode, 111 typename TTypes<T>::Vec out) { 112 T min_range; 113 T max_range; 114 auto input_min = input_min_tensor->scalar<T>(); 115 auto input_max = input_max_tensor->scalar<T>(); 116 if (!range_given) { 117 input_min.device(d) = input.minimum(); 118 input_max.device(d) = input.maximum(); 119 d.memcpyDeviceToHost(&min_range, input_min.data(), sizeof(T)); 120 d.memcpyDeviceToHost(&max_range, input_max.data(), sizeof(T)); 121 } else { 122 // Copy the range values from their respective tensors on the host. 123 min_range = input_min_tensor->scalar<T>()(); 124 max_range = input_max_tensor->scalar<T>()(); 125 } 126 127 // Calculate the range for the simulated integer quantization: 128 // e.g. [-128,127] for signed = true, num_bits = 8, 129 // or [0, 255] for signed = false, num_bits = 8. 130 const int64 min_quantized = signed_input ? -(1ULL << (num_bits - 1)) : 0; 131 const int64 max_quantized = min_quantized + ((1ULL << num_bits) - 1); 132 133 // Determine the maximum scaling factor that would scale 134 // [min_range, max_range] to not exceed [min_quantized, max_quantized], 135 // while keeping 0 unchanged. 136 const T scale_from_min_side = (min_quantized * min_range > 0) 137 ? min_quantized / min_range 138 : std::numeric_limits<T>::max(); 139 const T scale_from_max_side = (max_quantized * max_range > 0) 140 ? max_quantized / max_range 141 : std::numeric_limits<T>::max(); 142 143 // Note: Avoids changing the side of the range that determines scale. 144 T scale, inverse_scale; 145 if (scale_from_min_side < scale_from_max_side) { 146 scale = scale_from_min_side; 147 inverse_scale = min_range / min_quantized; 148 max_range = max_quantized * inverse_scale; 149 } else { 150 scale = scale_from_max_side; 151 inverse_scale = max_range / max_quantized; 152 min_range = min_quantized * inverse_scale; 153 } 154 155 if (range_given) { 156 // Note: The clamping here is to avoid overflow in the quantized type. 157 // The semantics of the op does not guarantee to clamp to the specified 158 // min_range and max_range - because we may have changed either min_range 159 // or max_range. 160 ClampScaleAndRound(d, input, min_range, max_range, scale, inverse_scale, 161 round_mode, out); 162 } else { 163 ScaleAndRound(d, input, scale, inverse_scale, round_mode, out); 164 } 165 } 166 }; 167 168 } // end of namespace functor 169 } // end of namespace tensorflow 170 171 #endif // TENSORFLOW_CORE_KERNELS_QUANTIZE_AND_DEQUANTIZE_OP_H_ 172