1 /* 2 * Copyright (C) 2019 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "CpuOperationUtils.h" 18 #include "OperationResolver.h" 19 20 #include "tensorflow/lite/kernels/internal/optimized/optimized_ops.h" 21 22 #include "Tracing.h" 23 24 namespace android { 25 namespace nn { 26 namespace l2_norm { 27 28 constexpr char kOperationName[] = "L2_NORMALIZATION"; 29 30 constexpr uint32_t kNumInputs = 2; 31 constexpr uint32_t kInputTensor = 0; 32 constexpr uint32_t kAxisScalar = 1; 33 34 constexpr uint32_t kNumOutputs = 1; 35 constexpr uint32_t kOutputTensor = 0; 36 37 namespace { 38 39 inline bool l2normFloat32Impl(const float* inputData, const Shape& inputShape, int32_t axis, 40 float* outputData, const Shape& outputShape) { 41 NNTRACE_TRANS("l2normFloat32"); 42 const uint32_t outerSize = getNumberOfElements(inputShape, 0, axis); 43 const uint32_t axisSize = getSizeOfDimension(inputShape, axis); 44 const uint32_t innerSize = 45 getNumberOfElements(inputShape, axis + 1, getNumberOfDimensions(inputShape)); 46 for (uint32_t outer = 0; outer < outerSize; ++outer) { 47 const float* inputBeg = inputData + outer * axisSize * innerSize; 48 const float* inputEnd = inputBeg + axisSize * innerSize; 49 float* outputBeg = outputData + outer * axisSize * innerSize; 50 for (uint32_t inner = 0; inner < innerSize; ++inner, ++inputBeg, ++inputEnd, ++outputBeg) { 51 float sum = 0.0f; 52 for (const float* p = inputBeg; p < inputEnd; p += innerSize) { 53 float val = *p; 54 sum += val * val; 55 } 56 float l2_norm = std::sqrt(sum); 57 float* pOut = outputBeg; 58 for (const float* p = inputBeg; p < inputEnd; p += innerSize, pOut += innerSize) { 59 *pOut = *p / l2_norm; 60 } 61 } 62 } 63 return true; 64 } 65 66 inline bool l2normQuant8Impl(const uint8_t* inputData, const Shape& inputShape, int32_t axis, 67 uint8_t* outputData, const Shape& outputShape) { 68 NNTRACE_TRANS("l2normQuant8"); 69 const uint32_t outerSize = getNumberOfElements(inputShape, 0, axis); 70 const uint32_t axisSize = getSizeOfDimension(inputShape, axis); 71 const uint32_t innerSize = 72 getNumberOfElements(inputShape, axis + 1, getNumberOfDimensions(inputShape)); 73 for (uint32_t outer = 0; outer < outerSize; ++outer) { 74 const uint8_t* inputBeg = inputData + outer * axisSize * innerSize; 75 const uint8_t* inputEnd = inputBeg + axisSize * innerSize; 76 uint8_t* outputBeg = outputData + outer * axisSize * innerSize; 77 for (uint32_t inner = 0; inner < innerSize; ++inner, ++inputBeg, ++inputEnd, ++outputBeg) { 78 int32_t sum = 0; 79 for (const uint8_t* p = inputBeg; p < inputEnd; p += innerSize) { 80 int32_t val = static_cast<int32_t>(*p) - inputShape.offset; 81 sum += val * val; 82 } 83 int32_t invMultiplier, invShift; 84 tflite::GetInvSqrtQuantizedMultiplierExp(sum, -1, &invMultiplier, &invShift); 85 uint8_t* pOut = outputBeg; 86 for (const uint8_t* p = inputBeg; p < inputEnd; p += innerSize, pOut += innerSize) { 87 int32_t val = static_cast<int32_t>(*p) - inputShape.offset; 88 int32_t scaledVal = tflite::MultiplyByQuantizedMultiplierSmallerThanOneExp( 89 val * 128, invMultiplier, invShift) + 90 128; 91 *pOut = static_cast<uint8_t>(std::min(std::max(scaledVal, 0), 255)); 92 } 93 } 94 } 95 return true; 96 } 97 98 bool l2normFloat32(const float* inputData, const Shape& inputShape, int32_t axis, float* outputData, 99 const Shape& outputShape) { 100 int32_t ndim = getNumberOfDimensions(inputShape); 101 NN_CHECK(handleNegativeAxis(inputShape, &axis)); 102 // TFLite optimized implementation only supports computation along the last axis 103 if (axis == ndim - 1) { 104 NNTRACE_COMP("optimized_ops::L2Normalization::float"); 105 tflite::L2NormalizationParams param = {.input_zero_point = 0}; 106 tflite::optimized_ops::L2Normalization(param, convertShapeToTflshape(inputShape), inputData, 107 convertShapeToTflshape(outputShape), outputData); 108 return true; 109 } else { 110 return l2normFloat32Impl(inputData, inputShape, axis, outputData, outputShape); 111 } 112 } 113 114 bool l2normFloat16(const _Float16* inputData, const Shape& inputShape, int32_t axis, 115 _Float16* outputData, const Shape& outputShape) { 116 NNTRACE_TRANS("l2normFloat16"); 117 std::vector<float> inputDataFloat32(getNumberOfElements(inputShape)); 118 convertFloat16ToFloat32(inputData, &inputDataFloat32); 119 std::vector<float> outputDataFloat32(getNumberOfElements(outputShape)); 120 121 l2normFloat32(inputDataFloat32.data(), inputShape, axis, outputDataFloat32.data(), outputShape); 122 convertFloat32ToFloat16(outputDataFloat32, outputData); 123 124 return true; 125 } 126 127 bool l2normQuant8(const uint8_t* inputData, const Shape& inputShape, int32_t axis, 128 uint8_t* outputData, const Shape& outputShape) { 129 int32_t ndim = getNumberOfDimensions(inputShape); 130 NN_CHECK(handleNegativeAxis(inputShape, &axis)); 131 // TFLite optimized implementation only supports computation along the last axis 132 if (axis == ndim - 1) { 133 NNTRACE_COMP("optimized_ops::L2Normalization::uint8"); 134 tflite::L2NormalizationParams param = {.input_zero_point = inputShape.offset}; 135 tflite::optimized_ops::L2Normalization(param, convertShapeToTflshape(inputShape), inputData, 136 convertShapeToTflshape(outputShape), outputData); 137 return true; 138 } else { 139 return l2normQuant8Impl(inputData, inputShape, axis, outputData, outputShape); 140 } 141 } 142 143 } // namespace 144 145 bool validate(const IOperationValidationContext* context) { 146 NN_RET_CHECK(context->getNumInputs() == kNumInputs || 147 context->getNumInputs() == kNumInputs - 1); 148 NN_RET_CHECK_EQ(context->getNumOutputs(), kNumOutputs); 149 150 const OperandType inputType = context->getInputType(kInputTensor); 151 std::vector<OperandType> inExpectedTypes = {inputType}; 152 if (inputType == OperandType::TENSOR_FLOAT16 || inputType == OperandType::TENSOR_QUANT8_ASYMM) { 153 NN_RET_CHECK(validateHalVersion(context, HalVersion::V1_2)); 154 } else if (inputType == OperandType::TENSOR_FLOAT32) { 155 NN_RET_CHECK(validateHalVersion(context, HalVersion::V1_0)); 156 } else { 157 NN_RET_CHECK_FAIL() << "Unsupported tensor type for operation " << kOperationName; 158 } 159 if (context->getNumInputs() == kNumInputs) { 160 inExpectedTypes.push_back(OperandType::INT32); 161 NN_RET_CHECK(validateHalVersion(context, HalVersion::V1_2)); 162 } else if (context->getInputShape(kInputTensor).dimensions.size() != 4) { 163 NN_RET_CHECK(validateHalVersion(context, HalVersion::V1_2)); 164 } 165 return validateInputTypes(context, inExpectedTypes) && 166 validateOutputTypes(context, {inputType}); 167 } 168 169 bool prepare(IOperationExecutionContext* context) { 170 const Shape& input = context->getInputShape(kInputTensor); 171 int32_t numDimensions = getNumberOfDimensions(input); 172 int32_t axis = context->getNumInputs() == kNumInputs 173 ? context->getInputValue<int32_t>(kAxisScalar) 174 : -1; 175 NN_RET_CHECK_GE(axis, -numDimensions); 176 NN_RET_CHECK_LT(axis, numDimensions); 177 Shape output = context->getOutputShape(kOutputTensor); 178 output.type = input.type; 179 output.dimensions = input.dimensions; 180 if (output.type == OperandType::TENSOR_QUANT8_ASYMM) { 181 output.scale = 1.0f / 128.0f; 182 output.offset = 128; 183 } else { 184 output.scale = 0; 185 output.offset = 0; 186 } 187 return context->setOutputShape(kOutputTensor, output); 188 } 189 190 bool execute(IOperationExecutionContext* context) { 191 int32_t axis = context->getNumInputs() == kNumInputs 192 ? context->getInputValue<int32_t>(kAxisScalar) 193 : -1; 194 NN_RET_CHECK(handleNegativeAxis(context->getInputShape(kInputTensor), &axis)); 195 switch (context->getInputType(kInputTensor)) { 196 case OperandType::TENSOR_FLOAT32: 197 return l2normFloat32(context->getInputBuffer<float>(kInputTensor), 198 context->getInputShape(kInputTensor), axis, 199 context->getOutputBuffer<float>(kOutputTensor), 200 context->getOutputShape(kOutputTensor)); 201 case OperandType::TENSOR_FLOAT16: 202 return l2normFloat16(context->getInputBuffer<_Float16>(kInputTensor), 203 context->getInputShape(kInputTensor), axis, 204 context->getOutputBuffer<_Float16>(kOutputTensor), 205 context->getOutputShape(kOutputTensor)); 206 case OperandType::TENSOR_QUANT8_ASYMM: 207 return l2normQuant8(context->getInputBuffer<uint8_t>(kInputTensor), 208 context->getInputShape(kInputTensor), axis, 209 context->getOutputBuffer<uint8_t>(kOutputTensor), 210 context->getOutputShape(kOutputTensor)); 211 default: 212 NN_RET_CHECK_FAIL() << "Unsupported tensor type for operation " << kOperationName; 213 } 214 } 215 216 } // namespace l2_norm 217 218 NN_REGISTER_OPERATION(L2_NORMALIZATION, l2_norm::kOperationName, l2_norm::validate, 219 l2_norm::prepare, l2_norm::execute); 220 221 } // namespace nn 222 } // namespace android 223