Home | History | Annotate | Download | only in kernels
      1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 #ifndef TENSORFLOW_CORE_KERNELS_QUANTIZE_AND_DEQUANTIZE_OP_H_
     17 #define TENSORFLOW_CORE_KERNELS_QUANTIZE_AND_DEQUANTIZE_OP_H_
     18 
     19 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
     20 #include "tensorflow/core/framework/op_kernel.h"
     21 #include "tensorflow/core/framework/tensor_types.h"
     22 #include "tensorflow/core/kernels/cwise_ops.h"
     23 
     24 namespace tensorflow {
     25 
     26 enum QuantizerRoundMode {
     27   // Round half up: if the fraction of y is exactly 0.5, then
     28   // round(y) = y + 0.5
     29   // E.g., -5.5 gets rounded to -5, -5.4 goes to -5,
     30   // 5.4 goes to 5, and 5.5 goes to 6.
     31   ROUND_HALF_UP,
     32   // Round half to even: if the fraction of y is exactly 0.5, then round(y) is
     33   // the nearest even integer to y.
     34   // E.g., 23.5 gets rounded to 24, 24.5 gets rounded to 24, while -23.5 becomes
     35   // -24, and -24.5 gets rounded to 24.
     36   ROUND_HALF_TO_EVEN,
     37 };
     38 
     39 namespace functor {
     40 
     41 // TODO(pauldonnelly): 'signed_input' should really be called 'signed_output'.
     42 
     43 template <typename Device, typename T>
     44 struct QuantizeAndDequantizeOneScaleFunctor {
     45   void operator()(const Device& d, typename TTypes<T>::ConstVec input,
     46                   bool signed_input, int num_bits, bool range_given,
     47                   Tensor* input_min_tensor, Tensor* input_max_tensor,
     48                   QuantizerRoundMode round_mode, typename TTypes<T>::Vec out);
     49 };
     50 
     51 // The implementation below runs on both CPU and GPU.
     52 template <typename Device, typename T, typename Func>
     53 void ClampScaleAndRound(const Device& d, typename TTypes<T>::ConstVec input,
     54                         T min_range, T max_range, T scale, T inverse_scale,
     55                         Func round_func, typename TTypes<T>::Vec out) {
     56   out.device(d) = (input.cwiseMin(max_range).cwiseMax(min_range) * scale)
     57                       .unaryExpr(round_func) *
     58                   inverse_scale;
     59 }
     60 
     61 // The implementation below runs on both CPU and GPU.
     62 template <typename Device, typename T>
     63 void ClampScaleAndRound(const Device& d, typename TTypes<T>::ConstVec input,
     64                         T min_range, T max_range, T scale, T inverse_scale,
     65                         QuantizerRoundMode round_mode,
     66                         typename TTypes<T>::Vec out) {
     67   switch (round_mode) {
     68     case ROUND_HALF_TO_EVEN:
     69       ClampScaleAndRound(d, input, min_range, max_range, scale, inverse_scale,
     70                          Eigen::internal::scalar_round_op_google<T>(), out);
     71       break;
     72     case ROUND_HALF_UP:
     73       ClampScaleAndRound(d, input, min_range, max_range, scale, inverse_scale,
     74                          Eigen::internal::scalar_round_up_op<T>(), out);
     75       break;
     76   }
     77 }
     78 
     79 // The implementation below runs on both CPU and GPU.
     80 template <typename Device, typename T, typename Func>
     81 void ScaleAndRound(const Device& d, typename TTypes<T>::ConstVec input, T scale,
     82                    T inverse_scale, Func round_func,
     83                    typename TTypes<T>::Vec out) {
     84   out.device(d) = (input * scale).unaryExpr(round_func) * inverse_scale;
     85 }
     86 
     87 // The implementation below runs on both CPU and GPU.
     88 template <typename Device, typename T>
     89 void ScaleAndRound(const Device& d, typename TTypes<T>::ConstVec input, T scale,
     90                    T inverse_scale, QuantizerRoundMode round_mode,
     91                    typename TTypes<T>::Vec out) {
     92   switch (round_mode) {
     93     case ROUND_HALF_TO_EVEN:
     94       ScaleAndRound(d, input, scale, inverse_scale,
     95                     Eigen::internal::scalar_round_op_google<T>(), out);
     96       break;
     97     case ROUND_HALF_UP:
     98       ScaleAndRound(d, input, scale, inverse_scale,
     99                     Eigen::internal::scalar_round_up_op<T>(), out);
    100       break;
    101   }
    102 }
    103 
    104 // The implementation below runs on both CPU and GPU.
    105 template <typename Device, typename T>
    106 struct QuantizeAndDequantizeOneScaleImpl {
    107   static void Compute(const Device& d, typename TTypes<T>::ConstVec input,
    108                       bool signed_input, int num_bits, bool range_given,
    109                       Tensor* input_min_tensor, Tensor* input_max_tensor,
    110                       QuantizerRoundMode round_mode,
    111                       typename TTypes<T>::Vec out) {
    112     T min_range;
    113     T max_range;
    114     auto input_min = input_min_tensor->scalar<T>();
    115     auto input_max = input_max_tensor->scalar<T>();
    116     if (!range_given) {
    117       input_min.device(d) = input.minimum();
    118       input_max.device(d) = input.maximum();
    119       d.memcpyDeviceToHost(&min_range, input_min.data(), sizeof(T));
    120       d.memcpyDeviceToHost(&max_range, input_max.data(), sizeof(T));
    121     } else {
    122       // Copy the range values from their respective tensors on the host.
    123       min_range = input_min_tensor->scalar<T>()();
    124       max_range = input_max_tensor->scalar<T>()();
    125     }
    126 
    127     // Calculate the range for the simulated integer quantization:
    128     // e.g. [-128,127] for signed = true, num_bits = 8,
    129     // or [0, 255] for signed = false, num_bits = 8.
    130     const int64 min_quantized = signed_input ? -(1ULL << (num_bits - 1)) : 0;
    131     const int64 max_quantized = min_quantized + ((1ULL << num_bits) - 1);
    132 
    133     // Determine the maximum scaling factor that would scale
    134     // [min_range, max_range] to not exceed [min_quantized, max_quantized],
    135     // while keeping 0 unchanged.
    136     const T scale_from_min_side = (min_quantized * min_range > 0)
    137                                       ? min_quantized / min_range
    138                                       : std::numeric_limits<T>::max();
    139     const T scale_from_max_side = (max_quantized * max_range > 0)
    140                                       ? max_quantized / max_range
    141                                       : std::numeric_limits<T>::max();
    142 
    143     // Note: Avoids changing the side of the range that determines scale.
    144     T scale, inverse_scale;
    145     if (scale_from_min_side < scale_from_max_side) {
    146       scale = scale_from_min_side;
    147       inverse_scale = min_range / min_quantized;
    148       max_range = max_quantized * inverse_scale;
    149     } else {
    150       scale = scale_from_max_side;
    151       inverse_scale = max_range / max_quantized;
    152       min_range = min_quantized * inverse_scale;
    153     }
    154 
    155     if (range_given) {
    156       // Note: The clamping here is to avoid overflow in the quantized type.
    157       // The semantics of the op does not guarantee to clamp to the specified
    158       // min_range and max_range - because we may have changed either min_range
    159       // or max_range.
    160       ClampScaleAndRound(d, input, min_range, max_range, scale, inverse_scale,
    161                          round_mode, out);
    162     } else {
    163       ScaleAndRound(d, input, scale, inverse_scale, round_mode, out);
    164     }
    165   }
    166 };
    167 
    168 }  // end of namespace functor
    169 }  // end of namespace tensorflow
    170 
    171 #endif  // TENSORFLOW_CORE_KERNELS_QUANTIZE_AND_DEQUANTIZE_OP_H_
    172