1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #define EIGEN_USE_THREADS 17 18 #include <vector> 19 20 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" 21 #include "tensorflow/core/framework/op_kernel.h" 22 #include "tensorflow/core/framework/register_types.h" 23 #include "tensorflow/core/framework/tensor_types.h" 24 #include "tensorflow/core/framework/types.h" 25 #include "tensorflow/core/kernels/concat_lib_cpu.h" 26 #include "tensorflow/core/kernels/quantization_utils.h" 27 28 namespace tensorflow { 29 30 namespace { 31 template <typename T> 32 struct RequantizeCopier { 33 RequantizeCopier( 34 const std::vector<std::pair<float, float>>* input_min_and_max, 35 float output_min, float output_max) 36 : output_min(output_min), 37 output_max(output_max), 38 input_min_and_max(input_min_and_max) {} 39 40 inline void Copy(T* dst, const T* src, int input_index, size_t n) { 41 const float input_min = (*input_min_and_max)[input_index].first; 42 const float input_max = (*input_min_and_max)[input_index].second; 43 if (input_min == output_min && input_max == output_max) { 44 DCHECK(DataTypeCanUseMemcpy(DataTypeToEnum<T>::v())); 45 memcpy(dst, src, n * sizeof(T)); 46 } else { 47 Eigen::array<Eigen::DenseIndex, 1> dims; 48 dims[0] = n; 49 typename TTypes<T, 1>::UnalignedConstTensor input_array(src, dims); 50 typename TTypes<T, 1>::UnalignedTensor output_array(dst, dims); 51 52 QuantizedToFloatStruct<T> q2f(input_min, input_max); 53 auto input_float = DEQUANTIZE_WITH_EIGEN(input_array, q2f); 54 FloatToQuantizedStruct<T> f2q(output_min, output_max); 55 auto input_requantized = QUANTIZE_WITH_EIGEN(input_float, f2q, T); 56 57 // RequantizeCopier::Copy is called from within a shard of computation, so 58 // don't use the threadpool device here, simply assign with default CPU 59 // device. 60 output_array = input_requantized; 61 } 62 } 63 64 float output_min; 65 float output_max; 66 const std::vector<std::pair<float, float>>* input_min_and_max; 67 }; 68 } // namespace 69 70 template <typename T> 71 class QuantizedConcatOp : public OpKernel { 72 public: 73 typedef std::vector<std::unique_ptr<typename TTypes<T, 2>::ConstMatrix>> 74 ConstMatrixVector; 75 76 explicit QuantizedConcatOp(OpKernelConstruction* c) : OpKernel(c) {} 77 78 void CalculateInputAndOutputRange( 79 const OpInputList& input_mins, const OpInputList& input_maxes, 80 const size_t N, 81 std::vector<std::pair<float, float>>* input_mins_and_maxes, 82 float* output_min, float* output_max) { 83 input_mins_and_maxes->reserve(N); 84 float overall_min = std::numeric_limits<float>::max(); 85 float overall_max = std::numeric_limits<float>::lowest(); 86 for (int i = 0; i < N; ++i) { 87 const float input_min = input_mins[i].flat<float>()(0); 88 const float input_max = input_maxes[i].flat<float>()(0); 89 input_mins_and_maxes->emplace_back(input_min, input_max); 90 overall_min = std::min(overall_min, input_min); 91 overall_max = std::max(overall_max, input_max); 92 } 93 // Make sure min is no more than zero. 94 overall_min = std::min(0.0f, overall_min); 95 if (std::is_signed<T>::value) { 96 // For signed, we want a symmetrical distribution including zero for the 97 // output, so pick a range that meets that need. 98 const float largest_value = 99 std::max(std::abs(overall_min), std::abs(overall_max)); 100 *output_min = -largest_value; 101 *output_max = largest_value; 102 } else { 103 *output_min = overall_min; 104 *output_max = overall_max; 105 } 106 } 107 108 int64 CalculateInputsDim(const TensorShape& input_shape, 109 const int32 concat_dim) { 110 int64 inputs_flat_dim0 = 1; 111 for (int d = 0; d < concat_dim; ++d) { 112 inputs_flat_dim0 *= input_shape.dim_size(d); 113 } 114 return inputs_flat_dim0; 115 } 116 117 void CalculateConcatDims(const size_t N, const TensorShape& input_shape, 118 int input_dims, const OpInputList& values, 119 OpKernelContext* context, const int32 concat_dim, 120 const int64 inputs_flat_dim0, 121 ConstMatrixVector* inputs_flat, 122 int* output_concat_dim) { 123 // Note that we reduce the concat of n-dimensional tensors into a two 124 // dimensional concat. Assuming the dimensions of any input/output 125 // tensor are {x0, x1,...,xn-1, y0, y1,...,ym-1}, where the concat is along 126 // the dimension indicated with size y0, we flatten it to {x, y}, where y = 127 // Prod_i(yi) and x = ((n > 0) ? Prod_i(xi) : 1). 128 inputs_flat->reserve(N); 129 *output_concat_dim = 0; 130 const bool input_is_scalar = IsLegacyScalar(input_shape); 131 for (int i = 0; i < N; ++i) { 132 const auto in = values[i]; 133 const bool in_is_scalar = IsLegacyScalar(in.shape()); 134 OP_REQUIRES( 135 context, in.dims() == input_dims || (input_is_scalar && in_is_scalar), 136 errors::InvalidArgument( 137 "ConcatOp : Ranks of all input tensors should match: shape[0] = ", 138 input_shape.DebugString(), " vs. shape[", i, 139 "] = ", in.shape().DebugString())); 140 for (int j = 0; j < input_dims; ++j) { 141 if (j == concat_dim) { 142 continue; 143 } 144 OP_REQUIRES( 145 context, in.dim_size(j) == input_shape.dim_size(j), 146 errors::InvalidArgument( 147 "ConcatOp : Dimensions of inputs should match: shape[0] = ", 148 input_shape.DebugString(), " vs. shape[", i, 149 "] = ", in.shape().DebugString())); 150 } 151 if (in.NumElements() > 0) { 152 int64 inputs_flat_dim1 = in.NumElements() / inputs_flat_dim0; 153 inputs_flat->emplace_back(new typename TTypes<T, 2>::ConstMatrix( 154 in.shaped<T, 2>({inputs_flat_dim0, inputs_flat_dim1}))); 155 } 156 *output_concat_dim += in.dims() > 0 ? in.dim_size(concat_dim) : 1; 157 } 158 } 159 160 void Compute(OpKernelContext* context) override { 161 const Tensor* concat_dim_tensor = nullptr; 162 OP_REQUIRES_OK(context, context->input("concat_dim", &concat_dim_tensor)); 163 OP_REQUIRES( 164 context, IsLegacyScalar(concat_dim_tensor->shape()), 165 errors::InvalidArgument( 166 "Concat dim tensor should be a scalar integer, but got shape ", 167 concat_dim_tensor->shape().DebugString())); 168 const int32 concat_dim = concat_dim_tensor->scalar<int32>()(); 169 OpInputList values; 170 OP_REQUIRES_OK(context, context->input_list("values", &values)); 171 const size_t N = values.size(); 172 OpInputList input_mins; 173 OP_REQUIRES_OK(context, context->input_list("input_mins", &input_mins)); 174 OP_REQUIRES(context, (input_mins.size() == N), 175 errors::InvalidArgument( 176 "QuantizedConcatOp : Expected mins input list length ", 177 input_mins.size(), " to equal values length ", N)); 178 OpInputList input_maxes; 179 OP_REQUIRES_OK(context, context->input_list("input_maxes", &input_maxes)); 180 OP_REQUIRES(context, (input_maxes.size() == N), 181 errors::InvalidArgument( 182 "QuantizedConcatOp : Expected maxes input list length ", 183 input_maxes.size(), " to equal values length ", N)); 184 const int input_dims = values[0].dims(); 185 const TensorShape& input_shape = values[0].shape(); 186 OP_REQUIRES( 187 context, 188 (0 <= concat_dim && concat_dim < input_dims) || 189 (allow_legacy_scalars() && concat_dim == 0), 190 errors::InvalidArgument( 191 "ConcatOp : Expected concatenating dimensions in the range [", 0, 192 ", ", input_dims, "), but got ", concat_dim)); 193 194 float output_min = std::numeric_limits<float>::max(); 195 float output_max = std::numeric_limits<float>::lowest(); 196 std::vector<std::pair<float, float>> input_mins_and_maxes; 197 CalculateInputAndOutputRange(input_mins, input_maxes, N, 198 &input_mins_and_maxes, &output_min, 199 &output_max); 200 const int64 inputs_flat_dim0 = CalculateInputsDim(input_shape, concat_dim); 201 ConstMatrixVector inputs_flat; 202 int output_concat_dim; 203 CalculateConcatDims(N, input_shape, input_dims, values, context, concat_dim, 204 inputs_flat_dim0, &inputs_flat, &output_concat_dim); 205 206 TensorShape output_shape(input_shape); 207 // TODO(irving): Remove rank 0 case once !kAllowLegacyScalars 208 if (output_shape.dims() == 0) { 209 output_shape.AddDim(output_concat_dim); 210 } else { 211 output_shape.set_dim(concat_dim, output_concat_dim); 212 } 213 Tensor* output = nullptr; 214 OP_REQUIRES_OK(context, context->allocate_output(0, output_shape, &output)); 215 216 if (output->NumElements() > 0) { 217 int64 output_dim1 = output->NumElements() / inputs_flat_dim0; 218 auto output_flat = output->shaped<T, 2>({inputs_flat_dim0, output_dim1}); 219 ConcatCPUImpl<T>( 220 context->device(), inputs_flat, sizeof(T) /* cost_per_unit */, 221 RequantizeCopier<T>(&input_mins_and_maxes, output_min, output_max), 222 &output_flat); 223 } 224 225 Tensor* output_min_tensor = nullptr; 226 OP_REQUIRES_OK(context, 227 context->allocate_output(1, {}, &output_min_tensor)); 228 output_min_tensor->flat<float>()(0) = output_min; 229 230 Tensor* output_max_tensor = nullptr; 231 OP_REQUIRES_OK(context, 232 context->allocate_output(2, {}, &output_max_tensor)); 233 output_max_tensor->flat<float>()(0) = output_max; 234 } 235 }; 236 237 #define REGISTER_QUANTIZED_CONCAT(type) \ 238 REGISTER_KERNEL_BUILDER(Name("QuantizedConcat") \ 239 .Device(DEVICE_CPU) \ 240 .TypeConstraint<type>("T") \ 241 .HostMemory("concat_dim"), \ 242 QuantizedConcatOp<type>) 243 244 REGISTER_QUANTIZED_CONCAT(quint8); 245 REGISTER_QUANTIZED_CONCAT(qint32); 246 247 #undef REGISTER_QUANTIZED_CONCAT 248 249 } // namespace tensorflow 250