Home | History | Annotate | Download | only in kernels
      1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 #define EIGEN_USE_THREADS
     17 
     18 #include <vector>
     19 
     20 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
     21 #include "tensorflow/core/framework/op_kernel.h"
     22 #include "tensorflow/core/framework/register_types.h"
     23 #include "tensorflow/core/framework/tensor_types.h"
     24 #include "tensorflow/core/framework/types.h"
     25 #include "tensorflow/core/kernels/concat_lib_cpu.h"
     26 #include "tensorflow/core/kernels/quantization_utils.h"
     27 
     28 namespace tensorflow {
     29 
     30 namespace {
     31 template <typename T>
     32 struct RequantizeCopier {
     33   RequantizeCopier(
     34       const std::vector<std::pair<float, float>>* input_min_and_max,
     35       float output_min, float output_max)
     36       : output_min(output_min),
     37         output_max(output_max),
     38         input_min_and_max(input_min_and_max) {}
     39 
     40   inline void Copy(T* dst, const T* src, int input_index, size_t n) {
     41     const float input_min = (*input_min_and_max)[input_index].first;
     42     const float input_max = (*input_min_and_max)[input_index].second;
     43     if (input_min == output_min && input_max == output_max) {
     44       DCHECK(DataTypeCanUseMemcpy(DataTypeToEnum<T>::v()));
     45       memcpy(dst, src, n * sizeof(T));
     46     } else {
     47       Eigen::array<Eigen::DenseIndex, 1> dims;
     48       dims[0] = n;
     49       typename TTypes<T, 1>::UnalignedConstTensor input_array(src, dims);
     50       typename TTypes<T, 1>::UnalignedTensor output_array(dst, dims);
     51 
     52       QuantizedToFloatStruct<T> q2f(input_min, input_max);
     53       auto input_float = DEQUANTIZE_WITH_EIGEN(input_array, q2f);
     54       FloatToQuantizedStruct<T> f2q(output_min, output_max);
     55       auto input_requantized = QUANTIZE_WITH_EIGEN(input_float, f2q, T);
     56 
     57       // RequantizeCopier::Copy is called from within a shard of computation, so
     58       // don't use the threadpool device here, simply assign with default CPU
     59       // device.
     60       output_array = input_requantized;
     61     }
     62   }
     63 
     64   float output_min;
     65   float output_max;
     66   const std::vector<std::pair<float, float>>* input_min_and_max;
     67 };
     68 }  // namespace
     69 
     70 template <typename T>
     71 class QuantizedConcatOp : public OpKernel {
     72  public:
     73   typedef std::vector<std::unique_ptr<typename TTypes<T, 2>::ConstMatrix>>
     74       ConstMatrixVector;
     75 
     76   explicit QuantizedConcatOp(OpKernelConstruction* c) : OpKernel(c) {}
     77 
     78   void CalculateInputAndOutputRange(
     79       const OpInputList& input_mins, const OpInputList& input_maxes,
     80       const size_t N,
     81       std::vector<std::pair<float, float>>* input_mins_and_maxes,
     82       float* output_min, float* output_max) {
     83     input_mins_and_maxes->reserve(N);
     84     float overall_min = std::numeric_limits<float>::max();
     85     float overall_max = std::numeric_limits<float>::lowest();
     86     for (int i = 0; i < N; ++i) {
     87       const float input_min = input_mins[i].flat<float>()(0);
     88       const float input_max = input_maxes[i].flat<float>()(0);
     89       input_mins_and_maxes->emplace_back(input_min, input_max);
     90       overall_min = std::min(overall_min, input_min);
     91       overall_max = std::max(overall_max, input_max);
     92     }
     93     // Make sure min is no more than zero.
     94     overall_min = std::min(0.0f, overall_min);
     95     if (std::is_signed<T>::value) {
     96       // For signed, we want a symmetrical distribution including zero for the
     97       // output, so pick a range that meets that need.
     98       const float largest_value =
     99           std::max(std::abs(overall_min), std::abs(overall_max));
    100       *output_min = -largest_value;
    101       *output_max = largest_value;
    102     } else {
    103       *output_min = overall_min;
    104       *output_max = overall_max;
    105     }
    106   }
    107 
    108   int64 CalculateInputsDim(const TensorShape& input_shape,
    109                            const int32 concat_dim) {
    110     int64 inputs_flat_dim0 = 1;
    111     for (int d = 0; d < concat_dim; ++d) {
    112       inputs_flat_dim0 *= input_shape.dim_size(d);
    113     }
    114     return inputs_flat_dim0;
    115   }
    116 
    117   void CalculateConcatDims(const size_t N, const TensorShape& input_shape,
    118                            int input_dims, const OpInputList& values,
    119                            OpKernelContext* context, const int32 concat_dim,
    120                            const int64 inputs_flat_dim0,
    121                            ConstMatrixVector* inputs_flat,
    122                            int* output_concat_dim) {
    123     // Note that we reduce the concat of n-dimensional tensors into a two
    124     // dimensional concat. Assuming the dimensions of any input/output
    125     // tensor are {x0, x1,...,xn-1, y0, y1,...,ym-1}, where the concat is along
    126     // the dimension indicated with size y0, we flatten it to {x, y}, where y =
    127     // Prod_i(yi) and x = ((n > 0) ? Prod_i(xi) : 1).
    128     inputs_flat->reserve(N);
    129     *output_concat_dim = 0;
    130     const bool input_is_scalar = IsLegacyScalar(input_shape);
    131     for (int i = 0; i < N; ++i) {
    132       const auto in = values[i];
    133       const bool in_is_scalar = IsLegacyScalar(in.shape());
    134       OP_REQUIRES(
    135           context, in.dims() == input_dims || (input_is_scalar && in_is_scalar),
    136           errors::InvalidArgument(
    137               "ConcatOp : Ranks of all input tensors should match: shape[0] = ",
    138               input_shape.DebugString(), " vs. shape[", i,
    139               "] = ", in.shape().DebugString()));
    140       for (int j = 0; j < input_dims; ++j) {
    141         if (j == concat_dim) {
    142           continue;
    143         }
    144         OP_REQUIRES(
    145             context, in.dim_size(j) == input_shape.dim_size(j),
    146             errors::InvalidArgument(
    147                 "ConcatOp : Dimensions of inputs should match: shape[0] = ",
    148                 input_shape.DebugString(), " vs. shape[", i,
    149                 "] = ", in.shape().DebugString()));
    150       }
    151       if (in.NumElements() > 0) {
    152         int64 inputs_flat_dim1 = in.NumElements() / inputs_flat_dim0;
    153         inputs_flat->emplace_back(new typename TTypes<T, 2>::ConstMatrix(
    154             in.shaped<T, 2>({inputs_flat_dim0, inputs_flat_dim1})));
    155       }
    156       *output_concat_dim += in.dims() > 0 ? in.dim_size(concat_dim) : 1;
    157     }
    158   }
    159 
    160   void Compute(OpKernelContext* context) override {
    161     const Tensor* concat_dim_tensor = nullptr;
    162     OP_REQUIRES_OK(context, context->input("concat_dim", &concat_dim_tensor));
    163     OP_REQUIRES(
    164         context, IsLegacyScalar(concat_dim_tensor->shape()),
    165         errors::InvalidArgument(
    166             "Concat dim tensor should be a scalar integer, but got shape ",
    167             concat_dim_tensor->shape().DebugString()));
    168     const int32 concat_dim = concat_dim_tensor->scalar<int32>()();
    169     OpInputList values;
    170     OP_REQUIRES_OK(context, context->input_list("values", &values));
    171     const size_t N = values.size();
    172     OpInputList input_mins;
    173     OP_REQUIRES_OK(context, context->input_list("input_mins", &input_mins));
    174     OP_REQUIRES(context, (input_mins.size() == N),
    175                 errors::InvalidArgument(
    176                     "QuantizedConcatOp : Expected mins input list length ",
    177                     input_mins.size(), " to equal values length ", N));
    178     OpInputList input_maxes;
    179     OP_REQUIRES_OK(context, context->input_list("input_maxes", &input_maxes));
    180     OP_REQUIRES(context, (input_maxes.size() == N),
    181                 errors::InvalidArgument(
    182                     "QuantizedConcatOp : Expected maxes input list length ",
    183                     input_maxes.size(), " to equal values length ", N));
    184     const int input_dims = values[0].dims();
    185     const TensorShape& input_shape = values[0].shape();
    186     OP_REQUIRES(
    187         context,
    188         (0 <= concat_dim && concat_dim < input_dims) ||
    189             (allow_legacy_scalars() && concat_dim == 0),
    190         errors::InvalidArgument(
    191             "ConcatOp : Expected concatenating dimensions in the range [", 0,
    192             ", ", input_dims, "), but got ", concat_dim));
    193 
    194     float output_min = std::numeric_limits<float>::max();
    195     float output_max = std::numeric_limits<float>::lowest();
    196     std::vector<std::pair<float, float>> input_mins_and_maxes;
    197     CalculateInputAndOutputRange(input_mins, input_maxes, N,
    198                                  &input_mins_and_maxes, &output_min,
    199                                  &output_max);
    200     const int64 inputs_flat_dim0 = CalculateInputsDim(input_shape, concat_dim);
    201     ConstMatrixVector inputs_flat;
    202     int output_concat_dim;
    203     CalculateConcatDims(N, input_shape, input_dims, values, context, concat_dim,
    204                         inputs_flat_dim0, &inputs_flat, &output_concat_dim);
    205 
    206     TensorShape output_shape(input_shape);
    207     // TODO(irving): Remove rank 0 case once !kAllowLegacyScalars
    208     if (output_shape.dims() == 0) {
    209       output_shape.AddDim(output_concat_dim);
    210     } else {
    211       output_shape.set_dim(concat_dim, output_concat_dim);
    212     }
    213     Tensor* output = nullptr;
    214     OP_REQUIRES_OK(context, context->allocate_output(0, output_shape, &output));
    215 
    216     if (output->NumElements() > 0) {
    217       int64 output_dim1 = output->NumElements() / inputs_flat_dim0;
    218       auto output_flat = output->shaped<T, 2>({inputs_flat_dim0, output_dim1});
    219       ConcatCPUImpl<T>(
    220           context->device(), inputs_flat, sizeof(T) /* cost_per_unit */,
    221           RequantizeCopier<T>(&input_mins_and_maxes, output_min, output_max),
    222           &output_flat);
    223     }
    224 
    225     Tensor* output_min_tensor = nullptr;
    226     OP_REQUIRES_OK(context,
    227                    context->allocate_output(1, {}, &output_min_tensor));
    228     output_min_tensor->flat<float>()(0) = output_min;
    229 
    230     Tensor* output_max_tensor = nullptr;
    231     OP_REQUIRES_OK(context,
    232                    context->allocate_output(2, {}, &output_max_tensor));
    233     output_max_tensor->flat<float>()(0) = output_max;
    234   }
    235 };
    236 
    237 #define REGISTER_QUANTIZED_CONCAT(type)                  \
    238   REGISTER_KERNEL_BUILDER(Name("QuantizedConcat")        \
    239                               .Device(DEVICE_CPU)        \
    240                               .TypeConstraint<type>("T") \
    241                               .HostMemory("concat_dim"), \
    242                           QuantizedConcatOp<type>)
    243 
    244 REGISTER_QUANTIZED_CONCAT(quint8);
    245 REGISTER_QUANTIZED_CONCAT(qint32);
    246 
    247 #undef REGISTER_QUANTIZED_CONCAT
    248 
    249 }  // namespace tensorflow
    250