Home | History | Annotate | Download | only in operations
      1 /*
      2  * Copyright (C) 2017 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "CpuOperationUtils.h"
     18 #include "Operations.h"
     19 
     20 #include "tensorflow/lite/kernels/internal/optimized/depthwiseconv_float.h"
     21 #include "tensorflow/lite/kernels/internal/optimized/depthwiseconv_uint8.h"
     22 
     23 #include "Tracing.h"
     24 
     25 namespace android {
     26 namespace nn {
     27 
     28 bool depthwiseConvFloat16(const _Float16* inputData, const Shape& inputShape,
     29                           const _Float16* filterData, const Shape& filterShape,
     30                           const _Float16* biasData, const Shape& biasShape, int32_t paddingLeft,
     31                           int32_t paddingRight, int32_t paddingTop, int32_t paddingBottom,
     32                           int32_t strideWidth, int32_t strideHeight, int32_t dilationWidthFactor,
     33                           int32_t dilationHeightFactor, int32_t depthMultiplier, int32_t activation,
     34                           _Float16* outputData, const Shape& outputShape) {
     35     NNTRACE_TRANS("depthwiseConvFloat16");
     36     std::vector<float> inputDataFloat32(getNumberOfElements(inputShape));
     37     convertFloat16ToFloat32(inputData, &inputDataFloat32);
     38     std::vector<float> filterDataFloat32(getNumberOfElements(filterShape));
     39     convertFloat16ToFloat32(filterData, &filterDataFloat32);
     40     std::vector<float> biasDataFloat32(getNumberOfElements(biasShape));
     41     convertFloat16ToFloat32(biasData, &biasDataFloat32);
     42 
     43     std::vector<float> outputDataFloat32(getNumberOfElements(outputShape));
     44     depthwiseConvFloat32(inputDataFloat32.data(), inputShape, filterDataFloat32.data(), filterShape,
     45                          biasDataFloat32.data(), biasShape, paddingLeft, paddingRight, paddingTop,
     46                          paddingBottom, strideWidth, strideHeight, dilationWidthFactor,
     47                          dilationHeightFactor, depthMultiplier, activation,
     48                          outputDataFloat32.data(), outputShape);
     49 
     50     convertFloat32ToFloat16(outputDataFloat32, outputData);
     51     return true;
     52 }
     53 
     54 #define ANDROID_NN_DEPTHWISE_CONV_PARAMETERS                    \
     55     uint32_t height = getSizeOfDimension(inputShape, 1);        \
     56     uint32_t width = getSizeOfDimension(inputShape, 2);         \
     57     uint32_t filterHeight = getSizeOfDimension(filterShape, 1); \
     58     uint32_t filterWidth = getSizeOfDimension(filterShape, 2);  \
     59     uint32_t outHeight = getSizeOfDimension(outputShape, 1);    \
     60     uint32_t outWidth = getSizeOfDimension(outputShape, 2);     \
     61                                                                 \
     62     uint32_t paddingHeight = (uint32_t)paddingTop;              \
     63     uint32_t paddingWidth = (uint32_t)paddingLeft;
     64 
     65 bool depthwiseConvFloat32(const float* inputData, const Shape& inputShape, const float* filterData,
     66                           const Shape& filterShape, const float* biasData, const Shape& biasShape,
     67                           int32_t paddingLeft, int32_t paddingRight, int32_t paddingTop,
     68                           int32_t paddingBottom, int32_t strideWidth, int32_t strideHeight,
     69                           int32_t dilationWidthFactor, int32_t dilationHeightFactor,
     70                           int32_t depthMultiplier, int32_t activation, float* outputData,
     71                           const Shape& outputShape) {
     72     NNTRACE_TRANS("depthwiseConvFloat32");
     73 
     74     ANDROID_NN_DEPTHWISE_CONV_PARAMETERS
     75 
     76     float output_activation_min, output_activation_max;
     77     CalculateActivationRangeFloat(activation, &output_activation_min, &output_activation_max);
     78 
     79     tflite::DepthwiseParams params{
     80             .padding_values = {static_cast<int16>(paddingWidth), static_cast<int16>(paddingHeight)},
     81             .stride_width = static_cast<int16>(strideWidth),
     82             .stride_height = static_cast<int16>(strideHeight),
     83             .depth_multiplier = static_cast<int16>(depthMultiplier),
     84             .float_activation_min = output_activation_min,
     85             .float_activation_max = output_activation_max,
     86             .dilation_width_factor = static_cast<int16>(dilationWidthFactor),
     87             .dilation_height_factor = static_cast<int16>(dilationHeightFactor),
     88     };
     89     NNTRACE_COMP_SWITCH("optimized_ops::DepthwiseConv");
     90     tflite::optimized_ops::DepthwiseConv(params, convertShapeToTflshape(inputShape), inputData,
     91                                          convertShapeToTflshape(filterShape), filterData,
     92                                          convertShapeToTflshape(biasShape), biasData,
     93                                          convertShapeToTflshape(outputShape), outputData);
     94 
     95     return true;
     96 }
     97 
     98 bool depthwiseConvQuant8(const uint8_t* inputData, const Shape& inputShape,
     99                          const uint8_t* filterData, const Shape& filterShape,
    100                          const int32_t* biasData, const Shape& biasShape, int32_t paddingLeft,
    101                          int32_t paddingRight, int32_t paddingTop, int32_t paddingBottom,
    102                          int32_t strideWidth, int32_t strideHeight, int32_t dilationWidthFactor,
    103                          int32_t dilationHeightFactor, int32_t depthMultiplier, int32_t activation,
    104                          uint8_t* outputData, const Shape& outputShape) {
    105     NNTRACE_TRANS("depthwiseConvQuant8");
    106 
    107     ANDROID_NN_DEPTHWISE_CONV_PARAMETERS
    108 
    109     double real_multiplier = 0.0;
    110     int32_t output_multiplier = 0;
    111     int32_t output_shift = 0;
    112     int32_t output_activation_min = 0;
    113     int32_t output_activation_max = 0;
    114 
    115     NN_RET_CHECK(GetQuantizedConvolutionMultipler(inputShape, filterShape, biasShape, outputShape,
    116                                                   &real_multiplier));
    117     int exponent;
    118     NN_RET_CHECK(QuantizeMultiplier(real_multiplier, &output_multiplier, &exponent));
    119     output_shift = -exponent;
    120     CalculateActivationRangeUint8(activation, outputShape, &output_activation_min,
    121                                   &output_activation_max);
    122 
    123     tflite::DepthwiseParams params{
    124             .padding_values = {static_cast<int16>(paddingWidth), static_cast<int16>(paddingHeight)},
    125             .stride_width = static_cast<int16>(strideWidth),
    126             .stride_height = static_cast<int16>(strideHeight),
    127             .depth_multiplier = static_cast<int16>(depthMultiplier),
    128             .quantized_activation_min = output_activation_min,
    129             .quantized_activation_max = output_activation_max,
    130             .dilation_width_factor = static_cast<int16>(dilationWidthFactor),
    131             .dilation_height_factor = static_cast<int16>(dilationHeightFactor),
    132             .input_offset = -inputShape.offset,
    133             .weights_offset = -filterShape.offset,
    134             .output_offset = outputShape.offset,
    135             .output_shift = -output_shift,
    136             .output_multiplier = output_multiplier,
    137     };
    138     NNTRACE_COMP_SWITCH("optimized_ops::DepthwiseConv");
    139     tflite::optimized_ops::DepthwiseConv(params, convertShapeToTflshape(inputShape), inputData,
    140                                          convertShapeToTflshape(filterShape), filterData,
    141                                          convertShapeToTflshape(biasShape), biasData,
    142                                          convertShapeToTflshape(outputShape), outputData);
    143     return true;
    144 }
    145 
    146 bool depthwiseConvQuant8PerChannel(const uint8_t* inputData, const Shape& inputShape,
    147                                    const int8_t* filterData, const Shape& filterShape,
    148                                    const float* filterScales, const int32_t* biasData,
    149                                    const Shape& biasShape, int32_t paddingLeft,
    150                                    int32_t paddingRight, int32_t paddingTop, int32_t paddingBottom,
    151                                    int32_t strideWidth, int32_t strideHeight,
    152                                    int32_t dilationWidthFactor, int32_t dilationHeightFactor,
    153 
    154                                    int32_t depthMultiplier, int32_t activation, uint8_t* outputData,
    155                                    const Shape& outputShape) {
    156     NNTRACE_TRANS("depthwiseConvQuant8");
    157 
    158     uint32_t paddingHeight = (uint32_t)paddingTop;
    159     uint32_t paddingWidth = (uint32_t)paddingLeft;
    160 
    161     uint32_t numBatches = getSizeOfDimension(inputShape, 0);
    162     uint32_t inputHeight = getSizeOfDimension(inputShape, 1);
    163     uint32_t inputWidth = getSizeOfDimension(inputShape, 2);
    164     uint32_t inputDepth = getSizeOfDimension(inputShape, 3);
    165     uint32_t filterHeight = getSizeOfDimension(filterShape, 1);
    166     uint32_t filterWidth = getSizeOfDimension(filterShape, 2);
    167     uint32_t filterDepth = getSizeOfDimension(filterShape, 3);
    168     uint32_t outputHeight = getSizeOfDimension(outputShape, 1);
    169     uint32_t outputWidth = getSizeOfDimension(outputShape, 2);
    170     uint32_t outputDepth = getSizeOfDimension(outputShape, 3);
    171 
    172     int32_t inputOffset = -inputShape.offset;
    173     int32_t outputOffset = outputShape.offset;
    174 
    175     auto realMultiplier = std::vector<double>(outputDepth, .0f);
    176     auto outputMultiplier = std::vector<int32_t>(outputDepth, 0);
    177     auto outputShift = std::vector<int32_t>(outputDepth, .0f);
    178 
    179     for (int i = 0; i < outputDepth; ++i) {
    180         Shape filterChannelShape = filterShape;
    181         filterChannelShape.scale = filterScales[i];
    182         Shape biasChannelShape = biasShape;
    183         biasChannelShape.scale = filterScales[i] * inputShape.scale;
    184         NN_RET_CHECK(GetQuantizedConvolutionMultipler(
    185                 inputShape, filterChannelShape, biasChannelShape, outputShape, &realMultiplier[i]));
    186         int exponent;
    187         NN_RET_CHECK(QuantizeMultiplier(realMultiplier[i], &outputMultiplier[i], &exponent));
    188         outputShift[i] = -exponent;
    189     }
    190 
    191     int32_t output_activation_min = 0, output_activation_max = 0;
    192     CalculateActivationRangeUint8(activation, outputShape, &output_activation_min,
    193                                   &output_activation_max);
    194 
    195     const uint8_t* inputBase = inputData;
    196     uint8_t* outPtr = outputData;
    197     for (uint32_t b = 0; b < numBatches; b++) {
    198         for (uint32_t h = 0; h < outputHeight; h++) {
    199             for (uint32_t w = 0; w < outputWidth; w++) {
    200                 for (uint32_t ic = 0; ic < inputDepth; ic++) {
    201                     for (uint32_t m = 0; m < depthMultiplier; m++) {
    202                         int32_t wInputOrigin = static_cast<int32_t>(w) * strideWidth - paddingLeft;
    203                         int32_t hInputOrigin = static_cast<int32_t>(h) * strideHeight - paddingTop;
    204                         const int oc = m + ic * depthMultiplier;
    205 
    206                         int32_t sum = 0.0f;
    207                         for (uint32_t i = 0; i < filterHeight; i++) {
    208                             for (uint32_t j = 0; j < filterWidth; j++) {
    209                                 int32_t hInput = hInputOrigin +
    210                                                  dilationHeightFactor * static_cast<int32_t>(i);
    211                                 int32_t wInput = wInputOrigin +
    212                                                  dilationWidthFactor * static_cast<int32_t>(j);
    213 
    214                                 if (hInput >= 0 && hInput < static_cast<int32_t>(inputHeight) &&
    215                                     wInput >= 0 && wInput < static_cast<int32_t>(inputWidth)) {
    216                                     uint32_t filterIndex =
    217                                             i * filterWidth * filterDepth + j * filterDepth + oc;
    218                                     uint32_t inputIndex = hInput * inputWidth * inputDepth +
    219                                                           wInput * inputDepth + ic;
    220                                     sum += (static_cast<int32_t>(filterData[filterIndex])) *
    221                                            (static_cast<int32_t>(inputBase[inputIndex]) +
    222                                             inputOffset);
    223                                 }
    224                             }
    225                         }
    226 
    227                         sum += biasData[oc];
    228                         sum = tflite::MultiplyByQuantizedMultiplier(sum, outputMultiplier[oc],
    229                                                                     -outputShift[oc]);
    230                         sum += outputOffset;
    231                         sum = std::max(std::min(sum, output_activation_max), output_activation_min);
    232                         outPtr[m] = static_cast<uint8_t>(sum);
    233                     }
    234                     outPtr += depthMultiplier;
    235                 }
    236             }
    237         }
    238         inputBase += inputHeight * inputWidth * inputDepth;
    239     }
    240 
    241     return true;
    242 }
    243 
    244 #undef ANDROID_NN_DEPTHWISE_CONV_PARAMETERS
    245 }  // namespace nn
    246 }  // namespace android
    247