Home | History | Annotate | Download | only in operations
      1 /*
      2  * Copyright (C) 2018 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "CpuOperationUtils.h"
     18 #include "Operations.h"
     19 
     20 #include <cfloat>
     21 #include <cmath>
     22 
     23 #include "Tracing.h"
     24 #include "tensorflow/lite/kernels/internal/common.h"
     25 
     26 namespace android {
     27 namespace nn {
     28 
     29 #define ANDROID_NN_GROUPED_CONV_PARAMETERS                      \
     30     uint32_t numBatches = getSizeOfDimension(inputShape, 0);    \
     31     uint32_t inputHeight = getSizeOfDimension(inputShape, 1);   \
     32     uint32_t inputWidth = getSizeOfDimension(inputShape, 2);    \
     33     uint32_t inputDepth = getSizeOfDimension(inputShape, 3);    \
     34     uint32_t filterHeight = getSizeOfDimension(filterShape, 1); \
     35     uint32_t filterWidth = getSizeOfDimension(filterShape, 2);  \
     36     uint32_t filterDepth = getSizeOfDimension(filterShape, 3);  \
     37     uint32_t outputHeight = getSizeOfDimension(outputShape, 1); \
     38     uint32_t outputWidth = getSizeOfDimension(outputShape, 2);  \
     39     uint32_t outputDepth = getSizeOfDimension(outputShape, 3);  \
     40     uint32_t outputGroupDepth = outputDepth / numGroups;
     41 
     42 bool groupedConvFloat32(const float* inputData, const Shape& inputShape, const float* filterData,
     43                         const Shape& filterShape, const float* biasData, const Shape& biasShape,
     44                         int32_t padding_left, int32_t padding_right, int32_t padding_top,
     45                         int32_t padding_bottom, int32_t stride_width, int32_t stride_height,
     46                         int32_t numGroups, int32_t activation, float* outputData,
     47                         const Shape& outputShape) {
     48     NNTRACE_TRANS("groupConvFloat32");
     49     ANDROID_NN_GROUPED_CONV_PARAMETERS
     50 
     51     float output_activation_min = 0.0f, output_activation_max = 0.0f;
     52     CalculateActivationRangeFloat(activation, &output_activation_min, &output_activation_max);
     53 
     54     const float* inputBase = inputData;
     55     float* outPtr = outputData;
     56     for (uint32_t b = 0; b < numBatches; b++) {
     57         for (uint32_t h = 0; h < outputHeight; h++) {
     58             for (uint32_t w = 0; w < outputWidth; w++) {
     59                 const float* filterBase = filterData;
     60                 for (uint32_t g = 0; g < numGroups; g++) {
     61                     for (uint32_t d = 0; d < outputGroupDepth; d++) {
     62                         int32_t wInputOrigin =
     63                                 static_cast<int32_t>(w) * stride_width - padding_left;
     64                         int32_t hInputOrigin =
     65                                 static_cast<int32_t>(h) * stride_height - padding_top;
     66                         float sum = 0.0f;
     67                         for (uint32_t i = 0; i < filterHeight; i++) {
     68                             for (uint32_t j = 0; j < filterWidth; j++) {
     69                                 for (uint32_t k = 0; k < filterDepth; k++) {
     70                                     int32_t hInput = hInputOrigin + static_cast<int32_t>(i);
     71                                     int32_t wInput = wInputOrigin + static_cast<int32_t>(j);
     72                                     uint32_t dInput = filterDepth * g + k;
     73                                     if (hInput >= 0 && hInput < static_cast<int32_t>(inputHeight) &&
     74                                         wInput >= 0 && wInput < static_cast<int32_t>(inputWidth)) {
     75                                         uint32_t filterIndex =
     76                                                 i * filterWidth * filterDepth + j * filterDepth + k;
     77                                         uint32_t inputIndex = hInput * inputWidth * inputDepth +
     78                                                               wInput * inputDepth + dInput;
     79                                         sum += filterBase[filterIndex] * inputBase[inputIndex];
     80                                     }
     81                                 }
     82                             }
     83                         }
     84                         sum += biasData[g * outputGroupDepth + d];
     85                         sum = std::max(std::min(sum, output_activation_max), output_activation_min);
     86                         outPtr[d] = sum;
     87                         filterBase += filterHeight * filterWidth * filterDepth;
     88                     }
     89                     outPtr += outputGroupDepth;
     90                 }
     91             }
     92         }
     93         inputBase += inputHeight * inputWidth * inputDepth;
     94     }
     95 
     96     return true;
     97 }
     98 
     99 bool groupedConvQuant8(const uint8_t* inputData, const Shape& inputShape, const uint8_t* filterData,
    100                        const Shape& filterShape, const int32_t* biasData, const Shape& biasShape,
    101                        int32_t padding_left, int32_t padding_right, int32_t padding_top,
    102                        int32_t padding_bottom, int32_t stride_width, int32_t stride_height,
    103                        int32_t numGroups, int32_t activation, uint8_t* outputData,
    104                        const Shape& outputShape) {
    105     NNTRACE_TRANS("groupConvQuant8");
    106     ANDROID_NN_GROUPED_CONV_PARAMETERS
    107 
    108     int32_t inputOffset = -inputShape.offset;
    109     int32_t filterOffset = -filterShape.offset;
    110     int32_t outputOffset = outputShape.offset;
    111 
    112     double realMultiplier = 0.0;
    113     int32_t outputMultiplier = 0;
    114     int32_t outputShift = 0;
    115     NN_RET_CHECK(GetQuantizedConvolutionMultipler(inputShape, filterShape, biasShape, outputShape,
    116                                                   &realMultiplier));
    117     int exponent;
    118     NN_RET_CHECK(QuantizeMultiplier(realMultiplier, &outputMultiplier, &exponent));
    119     outputShift = -exponent;
    120 
    121     int32_t output_activation_min = 0, output_activation_max = 0;
    122     CalculateActivationRangeUint8(activation, outputShape, &output_activation_min,
    123                                   &output_activation_max);
    124 
    125     const uint8_t* inputBase = inputData;
    126     uint8_t* outPtr = outputData;
    127     for (uint32_t b = 0; b < numBatches; b++) {
    128         for (uint32_t h = 0; h < outputHeight; h++) {
    129             for (uint32_t w = 0; w < outputWidth; w++) {
    130                 const uint8_t* filterBase = filterData;
    131                 for (uint32_t g = 0; g < numGroups; g++) {
    132                     for (uint32_t d = 0; d < outputGroupDepth; d++) {
    133                         int32_t wInputOrigin =
    134                                 static_cast<int32_t>(w) * stride_width - padding_left;
    135                         int32_t hInputOrigin =
    136                                 static_cast<int32_t>(h) * stride_height - padding_top;
    137                         int32_t sum = 0.0f;
    138                         for (uint32_t i = 0; i < filterHeight; i++) {
    139                             for (uint32_t j = 0; j < filterWidth; j++) {
    140                                 for (uint32_t k = 0; k < filterDepth; k++) {
    141                                     int32_t hInput = hInputOrigin + static_cast<int32_t>(i);
    142                                     int32_t wInput = wInputOrigin + static_cast<int32_t>(j);
    143                                     uint32_t dInput = filterDepth * g + k;
    144                                     if (hInput >= 0 && hInput < static_cast<int32_t>(inputHeight) &&
    145                                         wInput >= 0 && wInput < static_cast<int32_t>(inputWidth)) {
    146                                         uint32_t filterIndex =
    147                                                 i * filterWidth * filterDepth + j * filterDepth + k;
    148                                         uint32_t inputIndex = hInput * inputWidth * inputDepth +
    149                                                               wInput * inputDepth + dInput;
    150                                         sum += (static_cast<int32_t>(filterBase[filterIndex]) +
    151                                                 filterOffset) *
    152                                                (static_cast<int32_t>(inputBase[inputIndex]) +
    153                                                 inputOffset);
    154                                     }
    155                                 }
    156                             }
    157                         }
    158                         sum += biasData[g * outputGroupDepth + d];
    159                         sum = tflite::MultiplyByQuantizedMultiplier(sum, outputMultiplier,
    160                                                                     -outputShift);
    161                         sum += outputOffset;
    162                         sum = std::max(std::min(sum, output_activation_max), output_activation_min);
    163                         outPtr[d] = static_cast<uint8_t>(sum);
    164                         filterBase += filterHeight * filterWidth * filterDepth;
    165                     }
    166                     outPtr += outputGroupDepth;
    167                 }
    168             }
    169         }
    170         inputBase += inputHeight * inputWidth * inputDepth;
    171     }
    172 
    173     return true;
    174 }
    175 
    176 bool groupedConvQuant8PerChannel(const uint8_t* inputData, const Shape& inputShape,
    177                                  const int8_t* filterData, const Shape& filterShape,
    178                                  const float* filterScales, const int32_t* biasData,
    179                                  const Shape& biasShape, int32_t padding_left,
    180                                  int32_t padding_right, int32_t padding_top, int32_t padding_bottom,
    181                                  int32_t stride_width, int32_t stride_height, int32_t numGroups,
    182                                  int32_t activation, uint8_t* outputData,
    183                                  const Shape& outputShape) {
    184     NNTRACE_TRANS("groupConvQuant8");
    185     ANDROID_NN_GROUPED_CONV_PARAMETERS
    186 
    187     int32_t inputOffset = -inputShape.offset;
    188     int32_t outputOffset = outputShape.offset;
    189 
    190     auto realMultiplier = std::vector<double>(outputDepth, .0f);
    191     auto outputMultiplier = std::vector<int32_t>(outputDepth, 0);
    192     auto outputShift = std::vector<int32_t>(outputDepth, 0);
    193 
    194     for (int i = 0; i < outputDepth; ++i) {
    195         Shape filterChannelShape = filterShape;
    196         filterChannelShape.scale = filterScales[i];
    197         Shape biasChannelShape = biasShape;
    198         biasChannelShape.scale = filterScales[i] * inputShape.scale;
    199 
    200         NN_RET_CHECK(GetQuantizedConvolutionMultipler(
    201                 inputShape, filterChannelShape, biasChannelShape, outputShape, &realMultiplier[i]));
    202         int exponent;
    203         NN_RET_CHECK(QuantizeMultiplier(realMultiplier[i], &outputMultiplier[i], &exponent));
    204         outputShift[i] = -exponent;
    205     }
    206 
    207     int32_t output_activation_min = 0, output_activation_max = 0;
    208     CalculateActivationRangeUint8(activation, outputShape, &output_activation_min,
    209                                   &output_activation_max);
    210 
    211     const uint8_t* inputBase = inputData;
    212     uint8_t* outPtr = outputData;
    213     for (uint32_t b = 0; b < numBatches; b++) {
    214         for (uint32_t h = 0; h < outputHeight; h++) {
    215             for (uint32_t w = 0; w < outputWidth; w++) {
    216                 const int8_t* filterBase = filterData;
    217                 for (uint32_t g = 0; g < numGroups; g++) {
    218                     for (uint32_t d = 0; d < outputGroupDepth; d++) {
    219                         int32_t wInputOrigin =
    220                                 static_cast<int32_t>(w) * stride_width - padding_left;
    221                         int32_t hInputOrigin =
    222                                 static_cast<int32_t>(h) * stride_height - padding_top;
    223                         int32_t sum = 0.0f;
    224                         for (uint32_t i = 0; i < filterHeight; i++) {
    225                             for (uint32_t j = 0; j < filterWidth; j++) {
    226                                 for (uint32_t k = 0; k < filterDepth; k++) {
    227                                     int32_t hInput = hInputOrigin + static_cast<int32_t>(i);
    228                                     int32_t wInput = wInputOrigin + static_cast<int32_t>(j);
    229                                     uint32_t dInput = filterDepth * g + k;
    230                                     if (hInput >= 0 && hInput < static_cast<int32_t>(inputHeight) &&
    231                                         wInput >= 0 && wInput < static_cast<int32_t>(inputWidth)) {
    232                                         uint32_t filterIndex =
    233                                                 i * filterWidth * filterDepth + j * filterDepth + k;
    234                                         uint32_t inputIndex = hInput * inputWidth * inputDepth +
    235                                                               wInput * inputDepth + dInput;
    236                                         sum += (static_cast<int32_t>(filterBase[filterIndex])) *
    237                                                (static_cast<int32_t>(inputBase[inputIndex]) +
    238                                                 inputOffset);
    239                                     }
    240                                 }
    241                             }
    242                         }
    243                         int channelIndex = g * outputGroupDepth + d;
    244                         sum += biasData[channelIndex];
    245                         sum = tflite::MultiplyByQuantizedMultiplier(
    246                                 sum, outputMultiplier[channelIndex], -outputShift[channelIndex]);
    247                         sum += outputOffset;
    248                         sum = std::max(std::min(sum, output_activation_max), output_activation_min);
    249                         outPtr[d] = static_cast<uint8_t>(sum);
    250                         filterBase += filterHeight * filterWidth * filterDepth;
    251                     }
    252                     outPtr += outputGroupDepth;
    253                 }
    254             }
    255         }
    256         inputBase += inputHeight * inputWidth * inputDepth;
    257     }
    258 
    259     return true;
    260 }
    261 
    262 bool groupedConvFloat16(const _Float16* inputData, const Shape& inputShape,
    263                         const _Float16* filterData, const Shape& filterShape,
    264                         const _Float16* biasData, const Shape& biasShape, int32_t padding_left,
    265                         int32_t padding_right, int32_t padding_top, int32_t padding_bottom,
    266                         int32_t stride_width, int32_t stride_height, int32_t numGroups,
    267                         int32_t activation, _Float16* outputData, const Shape& outputShape) {
    268     NNTRACE_TRANS("groupConvFloat16");
    269 
    270     std::vector<float> inputData_float32(getNumberOfElements(inputShape));
    271     std::vector<float> filterData_float32(getNumberOfElements(filterShape));
    272     std::vector<float> biasData_float32(getNumberOfElements(biasShape));
    273     std::vector<float> outputData_float32(getNumberOfElements(outputShape));
    274 
    275     convertFloat16ToFloat32(inputData, &inputData_float32);
    276     convertFloat16ToFloat32(filterData, &filterData_float32);
    277     convertFloat16ToFloat32(biasData, &biasData_float32);
    278 
    279     groupedConvFloat32(inputData_float32.data(), inputShape, filterData_float32.data(), filterShape,
    280                        biasData_float32.data(), biasShape, padding_left, padding_right, padding_top,
    281                        padding_bottom, stride_width, stride_height, numGroups, activation,
    282                        outputData_float32.data(), outputShape);
    283     convertFloat32ToFloat16(outputData_float32, outputData);
    284 
    285     return true;
    286 }
    287 
    288 #undef ANDROID_NN_GROUPED_CONV_PARAMETERS
    289 }  // namespace nn
    290 }  // namespace android
    291