Home | History | Annotate | Download | only in common
      1 /*
      2  * Copyright (C) 2017 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #define LOG_TAG "OperationsUtils"
     18 
     19 #include "OperationsUtils.h"
     20 #include "Operations.h"
     21 #include "Utils.h"
     22 
     23 #include <cmath>
     24 
     25 namespace android {
     26 namespace nn {
     27 
     28 bool SameShape(const Shape& in1, const Shape& in2) {
     29     if (in1.type != in2.type || in1.dimensions.size() != in2.dimensions.size()) {
     30         return false;
     31     }
     32     for (size_t i = 0; i < in1.dimensions.size(); i++) {
     33         if (in1.dimensions[i] != in2.dimensions[i]) {
     34             return false;
     35         }
     36     }
     37     return true;
     38 }
     39 
     40 bool SetShape(const Shape& in, Shape* out) {
     41     if (in.type != out->type || in.dimensions.size() != out->dimensions.size()) {
     42         return false;
     43     }
     44     out->dimensions = in.dimensions;
     45     return true;
     46 }
     47 
     48 uint32_t getNumberOfElements(const Shape& shape) {
     49     uint32_t count = 1;
     50     for (size_t i = 0; i < shape.dimensions.size(); i++) {
     51         count *= shape.dimensions[i];
     52     }
     53     return count;
     54 }
     55 
     56 uint32_t getNumberOfDimensions(const Shape& shape) {
     57     return shape.dimensions.size();
     58 }
     59 
     60 uint32_t getSizeOfDimension(const Shape& shape, uint32_t dimensionIdx) {
     61     if (dimensionIdx >= shape.dimensions.size()) {
     62         // TODO, log the error
     63         return 0;
     64     }
     65     return shape.dimensions[dimensionIdx];
     66 }
     67 
     68 bool QuantizeMultiplierSmallerThanOne(double double_multiplier,
     69                                       int32_t* quantized_multiplier,
     70                                       int32_t* right_shift) {
     71     NN_OPS_CHECK(double_multiplier >= 0.);
     72     NN_OPS_CHECK(double_multiplier < 1.);
     73     if (double_multiplier == 0.) {
     74         *quantized_multiplier = 0;
     75         *right_shift = 0;
     76         return true;
     77     }
     78     NN_OPS_CHECK(double_multiplier > 0.);
     79     const double q = std::frexp(double_multiplier, right_shift);
     80     *right_shift *= -1;
     81     int64_t q_fixed = static_cast<int64_t>(std::round(q * (1ll << 31)));
     82     NN_OPS_CHECK(q_fixed <= (1ll << 31));
     83     if (q_fixed == (1ll << 31)) {
     84         q_fixed /= 2;
     85         --*right_shift;
     86     }
     87     NN_OPS_CHECK(*right_shift >= 0);
     88     NN_OPS_CHECK(q_fixed <= std::numeric_limits<int32_t>::max());
     89     *quantized_multiplier = static_cast<int32_t>(q_fixed);
     90     return true;
     91 }
     92 
     93 bool QuantizeMultiplierGreaterThanOne(double double_multiplier,
     94                                       int32_t* quantized_multiplier,
     95                                       int* left_shift) {
     96     NN_OPS_CHECK(double_multiplier > 1.);
     97     const double q = std::frexp(double_multiplier, left_shift);
     98     int64_t q_fixed = static_cast<int64_t>(std::round(q * (1ll << 31)));
     99     NN_OPS_CHECK(q_fixed <= (1ll << 31));
    100     if (q_fixed == (1ll << 31)) {
    101         q_fixed /= 2;
    102         ++*left_shift;
    103     }
    104     NN_OPS_CHECK(*left_shift >= 0);
    105     NN_OPS_CHECK(q_fixed <= std::numeric_limits<int32_t>::max());
    106     *quantized_multiplier = static_cast<int32_t>(q_fixed);
    107     return true;
    108 }
    109 
    110 bool GetQuantizedConvolutionMultipler(const Shape& inputShape,
    111                                       const Shape& filterShape,
    112                                       const Shape& biasShape,
    113                                       const Shape& outputShape,
    114                                       float* multiplier) {
    115     const float input_product_scale = inputShape.scale * filterShape.scale;
    116     const float bias_scale = biasShape.scale;
    117     const float output_scale = outputShape.scale;
    118 
    119     // The following conditions must be guaranteed by the training pipeline.
    120     NN_OPS_CHECK(std::abs(input_product_scale - bias_scale) <=
    121               1e-6 * std::min(input_product_scale, bias_scale));
    122     NN_OPS_CHECK(input_product_scale >= 0);
    123     NN_OPS_CHECK(input_product_scale < output_scale);
    124     *multiplier = input_product_scale / output_scale;
    125     return true;
    126 }
    127 
    128 void CalculateActivationRangeUint8(int32_t activation,
    129                                    const Shape& outputShape,
    130                                    int32_t* act_min,
    131                                    int32_t* act_max) {
    132     const int32_t qmin = std::numeric_limits<uint8_t>::min();
    133     const int32_t qmax = std::numeric_limits<uint8_t>::max();
    134 
    135     const auto scale = outputShape.scale;
    136     const auto zero_point = outputShape.offset;
    137 
    138     auto quantize = [scale, zero_point](float f) {
    139         return zero_point + static_cast<int32_t>(std::round(f / scale));
    140     };
    141 
    142     if (activation == kActivationRelu) {
    143         *act_min = std::max(qmin, quantize(0.0));
    144         *act_max = qmax;
    145     } else if (activation == kActivationRelu6) {
    146         *act_min = std::max(qmin, quantize(0.0));
    147         *act_max = std::min(qmax, quantize(6.0));
    148     } else if (activation == kActivationRelu1) {
    149         *act_min = std::max(qmin, quantize(-1.0));
    150         *act_max = std::min(qmax, quantize(1.0));
    151     } else if (activation == kActivationNone){
    152         *act_min = qmin;
    153         *act_max = qmax;
    154     } else {
    155         LOG(ERROR) << "Unsupported fused activation function.";
    156     }
    157 }
    158 
    159 void CalculateActivationRangeFloat(int32_t activation,
    160                                    float* activation_min,
    161                                    float* activation_max) {
    162     if (activation == kActivationRelu) {
    163         *activation_min = 0.f;
    164         *activation_max = std::numeric_limits<float>::max();
    165     } else if (activation == kActivationRelu6) {
    166         *activation_min = 0.f;
    167         *activation_max = 6.f;
    168     } else if (activation == kActivationRelu1) {
    169         *activation_min = -1.f;
    170         *activation_max = 1.f;
    171     } else if (activation == kActivationNone){
    172         *activation_min = std::numeric_limits<float>::lowest();
    173         *activation_max = std::numeric_limits<float>::max();
    174     } else {
    175         LOG(ERROR) << "Unsupported fused activation function.";
    176     }
    177 }
    178 
    179 int32_t CalculateInputRadius(int input_integer_bits, int input_left_shift) {
    180     const double max_input_rescaled = 1.0 * ((1 << input_integer_bits) - 1) *
    181                                       (1ll << (31 - input_integer_bits)) /
    182                                       (1ll << input_left_shift);
    183     // Tighten bound using floor.  Suppose that we could use the exact value.
    184     // After scaling the difference, the result would be at the maximum.  Thus we
    185     // must ensure that our value has lower magnitude.
    186     return static_cast<int32_t>(std::floor(max_input_rescaled));
    187 }
    188 
    189 bool addMulPrepare(const Shape& in1, const Shape& in2, Shape* out) {
    190     NN_OPS_CHECK(getNumberOfDimensions(in1) <= 4 && getNumberOfDimensions(in2) <= 4);
    191     NN_OPS_CHECK(in1.type == in2.type);
    192     if (SameShape(in1, in2)) {
    193         return SetShape(in1, out);
    194     } else {
    195         // BroadcastAdd needed
    196         uint32_t numberOfDims1 = getNumberOfDimensions(in1);
    197         uint32_t numberOfDims2 = getNumberOfDimensions(in2);
    198         uint32_t maxDims = std::max(numberOfDims1, numberOfDims2);
    199         out->dimensions = std::vector<uint32_t>(maxDims);
    200         for (uint32_t i = 1; i <= maxDims; i++) {
    201             uint32_t dim1 = 1;
    202             if (i <= numberOfDims1) {
    203                 dim1 = getSizeOfDimension(in1, numberOfDims1 - i);
    204             }
    205             uint32_t dim2 = 1;
    206             if (i <= numberOfDims2) {
    207                 dim2 = getSizeOfDimension(in2, numberOfDims2 - i);
    208             }
    209             if (dim1 != dim2 && dim1 != 1 && dim2 != 1) {
    210                 LOG(ERROR) << "Dimensions mismatch for BroadcastAdd";
    211                 return false;
    212             }
    213             out->dimensions[maxDims - i] = std::max(dim1, dim2);
    214         }
    215     }
    216     return true;
    217 }
    218 
    219 bool floorPrepare(const Shape& input, Shape* output) {
    220     return SetShape(input, output);
    221 }
    222 
    223 bool dequantizePrepare(const Shape& input, Shape* output) {
    224     if (input.type != OperandType::TENSOR_QUANT8_ASYMM ||
    225             output->type != OperandType::TENSOR_FLOAT32) {
    226         LOG(ERROR) << "bad input / output operand type.";
    227         return false;
    228     }
    229     if (input.dimensions.size() != output->dimensions.size()) {
    230         LOG(ERROR) << "input and output tensors don't have the same rank.";
    231         return false;
    232     }
    233     output->dimensions = input.dimensions;
    234     return true;
    235 }
    236 
    237 bool convPrepare(const Shape& input,
    238                  const Shape& filter,
    239                  const Shape& bias,
    240                  int32_t padding_left, int32_t padding_right,
    241                  int32_t padding_top, int32_t padding_bottom,
    242                  int32_t stride_width, int32_t stride_height,
    243                  Shape* output) {
    244     NN_OPS_CHECK(input.type == filter.type);
    245     if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
    246         NN_OPS_CHECK(bias.type == OperandType::TENSOR_INT32);
    247     } else {
    248         NN_OPS_CHECK(input.type == bias.type);
    249     }
    250     NN_OPS_CHECK(getNumberOfDimensions(input) == 4);
    251     NN_OPS_CHECK(getNumberOfDimensions(filter) == 4);
    252     NN_OPS_CHECK(getNumberOfDimensions(bias) == 1);
    253 
    254     NN_OPS_CHECK(getSizeOfDimension(filter, 0) == getSizeOfDimension(bias, 0));
    255     NN_OPS_CHECK(getSizeOfDimension(filter, 3) == getSizeOfDimension(input, 3));
    256 
    257     uint32_t channels_out = getSizeOfDimension(filter, 0);
    258     uint32_t width        = getSizeOfDimension(input, 2);
    259     uint32_t height       = getSizeOfDimension(input, 1);
    260     uint32_t filterWidth  = getSizeOfDimension(filter, 2);
    261     uint32_t filterHeight = getSizeOfDimension(filter, 1);
    262     uint32_t batches      = getSizeOfDimension(input, 0);
    263 
    264     uint32_t outWidth = computeOutSize(width, filterWidth, stride_width,
    265                                        padding_left, padding_right);
    266     uint32_t outHeight = computeOutSize(height, filterHeight, stride_height,
    267                                         padding_top, padding_bottom);
    268 
    269     output->type = input.type;
    270     output->dimensions = {batches, outHeight, outWidth, channels_out};
    271     return true;
    272 }
    273 
    274 bool depthwiseConvPrepare(const Shape& input,
    275                           const Shape& filter,
    276                           const Shape& bias,
    277                           int32_t padding_left, int32_t padding_right,
    278                           int32_t padding_top, int32_t padding_bottom,
    279                           int32_t stride_width, int32_t stride_height,
    280                           Shape* output) {
    281     NN_OPS_CHECK(input.type == filter.type);
    282     if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
    283         NN_OPS_CHECK(bias.type == OperandType::TENSOR_INT32);
    284     } else {
    285         NN_OPS_CHECK(input.type == bias.type);
    286     }
    287     NN_OPS_CHECK(getNumberOfDimensions(input) == 4);
    288     NN_OPS_CHECK(getNumberOfDimensions(filter) == 4);
    289     NN_OPS_CHECK(getNumberOfDimensions(bias) == 1);
    290 
    291     NN_OPS_CHECK(getSizeOfDimension(filter, 3) == getSizeOfDimension(bias, 0));
    292 
    293     uint32_t channels_out = getSizeOfDimension(filter, 3);
    294     uint32_t width        = getSizeOfDimension(input, 2);
    295     uint32_t height       = getSizeOfDimension(input, 1);
    296     uint32_t filterWidth  = getSizeOfDimension(filter, 2);
    297     uint32_t filterHeight = getSizeOfDimension(filter, 1);
    298     uint32_t batches      = getSizeOfDimension(input, 0);
    299 
    300     uint32_t outWidth = computeOutSize(width, filterWidth, stride_width,
    301                                        padding_left, padding_right);
    302     uint32_t outHeight = computeOutSize(height, filterHeight, stride_height,
    303                                         padding_top, padding_bottom);
    304 
    305     output->type = input.type;
    306     output->dimensions = {batches, outHeight, outWidth, channels_out};
    307     return true;
    308 }
    309 
    310 
    311 bool genericPoolingPrepare(const Shape& input,
    312                            int32_t padding_left, int32_t padding_right,
    313                            int32_t padding_top, int32_t padding_bottom,
    314                            int32_t stride_width, int32_t stride_height,
    315                            int32_t filter_width, int32_t filter_height,
    316                            Shape* output) {
    317     NN_OPS_CHECK(getNumberOfDimensions(input) == 4);
    318 
    319     uint32_t batches      = getSizeOfDimension(input, 0);
    320     uint32_t width        = getSizeOfDimension(input, 2);
    321     uint32_t height       = getSizeOfDimension(input, 1);
    322     uint32_t channels_out = getSizeOfDimension(input, 3);
    323 
    324     uint32_t outWidth = computeOutSize(width, filter_width, stride_width,
    325                                        padding_left, padding_right);
    326     uint32_t outHeight = computeOutSize(height, filter_height, stride_height,
    327                                         padding_top, padding_bottom);
    328 
    329     output->type = input.type;
    330     output->dimensions = {batches, outHeight, outWidth, channels_out};
    331     return true;
    332 }
    333 
    334 
    335 bool genericActivationPrepare(const Shape& input,
    336                               Shape* output) {
    337     NN_OPS_CHECK(getNumberOfDimensions(input) <= 4);
    338     return SetShape(input, output);
    339 }
    340 
    341 bool fullyConnectedPrepare(const Shape& input,
    342                            const Shape& weights,
    343                            const Shape& bias,
    344                            Shape* output) {
    345     // Check all the parameters of tensor match within themselves and match the
    346     // input configuration.
    347     NN_OPS_CHECK(input.type == weights.type);
    348     if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
    349         NN_OPS_CHECK(bias.type == OperandType::TENSOR_INT32);
    350     } else {
    351         NN_OPS_CHECK(input.type == bias.type);
    352     }
    353     // The Tensorflow fully connected layer specification says that input should
    354     // be of at least rank 2, so we check. Tflite doesn't check.
    355     NN_OPS_CHECK(getNumberOfDimensions(input) >= 2);
    356     NN_OPS_CHECK(getNumberOfDimensions(weights) == 2);
    357     uint32_t input_n_elements = getNumberOfElements(input);
    358     uint32_t num_units  = getSizeOfDimension(weights, 0);
    359     uint32_t input_size = getSizeOfDimension(weights, 1);
    360     uint32_t batch_size = input_n_elements / input_size;
    361 
    362     NN_OPS_CHECK(getSizeOfDimension(bias, 0) == num_units);
    363     NN_OPS_CHECK(input_size * batch_size == input_n_elements);
    364 
    365     output->type = input.type;
    366     output->dimensions = {batch_size, num_units};
    367 
    368     return true;
    369 }
    370 
    371 bool concatenationPrepare(const std::vector<Shape>& inputShapes,
    372                           int32_t axis,
    373                           Shape* output) {
    374 
    375     int num_inputs = inputShapes.size();
    376     OperandType input_type = inputShapes[0].type;
    377     uint32_t num_dimensions = getNumberOfDimensions(inputShapes[0]);
    378 
    379     NN_OPS_CHECK(axis >= 0);
    380     NN_OPS_CHECK(axis < (int32_t)num_dimensions);
    381 
    382     int sumAxis = getSizeOfDimension(inputShapes[0], axis);
    383     for (int i = 1; i < num_inputs; ++i) {
    384         NN_OPS_CHECK(getNumberOfDimensions(inputShapes[i]) == num_dimensions);
    385         NN_OPS_CHECK(inputShapes[i].type == inputShapes[0].type);
    386         if (input_type == OperandType::TENSOR_QUANT8_ASYMM) {
    387             NN_OPS_CHECK(inputShapes[0].offset == inputShapes[i].offset);
    388             NN_OPS_CHECK(inputShapes[0].scale == inputShapes[i].scale);
    389         }
    390         for (int d = 0; d < (int32_t)num_dimensions; ++d) {
    391             if (d == axis) {
    392                 sumAxis += getSizeOfDimension(inputShapes[i], axis);
    393             } else {
    394                 NN_OPS_CHECK(getSizeOfDimension(inputShapes[0], d) ==
    395                            getSizeOfDimension(inputShapes[i], d));
    396             }
    397         }
    398     }
    399 
    400     output->type = input_type;
    401     output->dimensions = inputShapes[0].dimensions;
    402     output->dimensions[axis] = sumAxis;
    403 
    404     if (input_type == OperandType::TENSOR_QUANT8_ASYMM) {
    405         NN_OPS_CHECK(inputShapes[0].offset == output->offset);
    406         NN_OPS_CHECK(inputShapes[0].scale == output->scale);
    407     }
    408 
    409     return true;
    410 }
    411 
    412 
    413 bool genericNormalizationPrepare(const Shape& input, Shape* output) {
    414     NN_OPS_CHECK(getNumberOfDimensions(input) == 4);
    415     return SetShape(input, output);
    416 }
    417 
    418 bool reshapePrepare(const Shape& input,
    419                     const int32_t* targetDims,
    420                     const int32_t targetDimsSize,
    421                     Shape* output) {
    422     // Reshape allows one of the targetDims components to have the
    423     // special -1 value, meaning it will be calculated automatically based on the
    424     // input. Here we calculate what that dimension should be so that the number
    425     // of output elements in the same as the number of input elements.
    426     int32_t numInputElements = (int32_t) getNumberOfElements(input);
    427 
    428     std::vector<uint32_t> outDims(targetDimsSize);
    429     int32_t numOutputElements = 1;
    430     int32_t strechDim = -1;
    431     for (int32_t i = 0; i < targetDimsSize; ++i) {
    432         int32_t value = targetDims[i];
    433         if (value == -1) {
    434             NN_OPS_CHECK(strechDim == -1);
    435             strechDim = i;
    436         } else {
    437             numOutputElements *= value;
    438             outDims[i] = (uint32_t)value;
    439         }
    440     }
    441     if (strechDim != -1) {
    442         int32_t strechValue = numInputElements / numOutputElements;
    443         outDims[strechDim] = (uint32_t) strechValue;
    444         numOutputElements *= strechValue;
    445     }
    446 
    447     NN_OPS_CHECK(numInputElements == numOutputElements);
    448 
    449     output->type = input.type;
    450     output->dimensions = outDims;
    451     output->offset = input.offset;
    452     output->scale = input.scale;
    453 
    454     return true;
    455 }
    456 
    457 bool resizeBilinearPrepare(const Shape& input,
    458                            int32_t width,
    459                            int32_t height,
    460                            Shape* output) {
    461     NN_OPS_CHECK(getNumberOfDimensions(input) == 4);
    462     uint32_t batches  = getSizeOfDimension(input, 0);
    463     uint32_t channels = getSizeOfDimension(input, 3);
    464 
    465     output->type = input.type;
    466     output->dimensions = {batches, (uint32_t)height, (uint32_t)width, channels};
    467 
    468     return true;
    469 }
    470 
    471 bool depthToSpacePrepare(const Shape& input,
    472                          int32_t blockSize,
    473                          Shape* output) {
    474     NN_OPS_CHECK(getNumberOfDimensions(input) == 4);
    475     NN_OPS_CHECK(blockSize > 0);
    476 
    477     uint32_t batches  = getSizeOfDimension(input, 0);
    478     uint32_t height   = getSizeOfDimension(input, 1);
    479     uint32_t width    = getSizeOfDimension(input, 2);
    480     uint32_t channels = getSizeOfDimension(input, 3);
    481 
    482     NN_OPS_CHECK(channels % (blockSize * blockSize) == 0);
    483     output->type = input.type;
    484     output->dimensions = {batches,
    485                           height * blockSize,
    486                           width * blockSize,
    487                           channels / (blockSize * blockSize)};
    488     output->offset = input.offset;
    489     output->scale = input.scale;
    490 
    491     return true;
    492 }
    493 
    494 bool spaceToDepthPrepare(const Shape& input,
    495                          int32_t blockSize,
    496                          Shape* output) {
    497     NN_OPS_CHECK(getNumberOfDimensions(input) == 4);
    498     NN_OPS_CHECK(blockSize > 0);
    499 
    500     uint32_t batches  = getSizeOfDimension(input, 0);
    501     uint32_t height   = getSizeOfDimension(input, 1);
    502     uint32_t width    = getSizeOfDimension(input, 2);
    503     uint32_t channels = getSizeOfDimension(input, 3);
    504 
    505     NN_OPS_CHECK(height % blockSize == 0);
    506     NN_OPS_CHECK(width % blockSize == 0);
    507 
    508     output->type = input.type;
    509     output->dimensions = {batches,
    510                           height / blockSize,
    511                           width / blockSize,
    512                           channels * (blockSize * blockSize)};
    513     output->offset = input.offset;
    514     output->scale = input.scale;
    515 
    516     return true;
    517 }
    518 
    519 bool embeddingLookupPrepare(const Shape &valueShape,
    520                             const Shape &lookupShape,
    521                             Shape *outputShape) {
    522     NN_OPS_CHECK(getNumberOfDimensions(valueShape) >= 2);
    523     NN_OPS_CHECK(getNumberOfDimensions(lookupShape) == 1);
    524 
    525     const uint32_t rows     = getSizeOfDimension(valueShape, 0);
    526     const uint32_t columns  = getSizeOfDimension(valueShape, 1);
    527 
    528     const uint32_t lookups  = getSizeOfDimension(lookupShape, 0);
    529 
    530     outputShape->type = valueShape.type;
    531     outputShape->dimensions = { lookups, columns };
    532     for (uint32_t i = 2; i < getNumberOfDimensions(valueShape); i++) {
    533         outputShape->dimensions.push_back(getSizeOfDimension(valueShape, i));
    534     }
    535     outputShape->offset = valueShape.offset;
    536     outputShape->scale = valueShape.scale;
    537 
    538     return true;
    539 }
    540 
    541 bool hashtableLookupPrepare(const Shape &lookupShape,
    542                             const Shape &keyShape,
    543                             const Shape &valueShape,
    544                             Shape *outputShape,
    545                             Shape *hitShape) {
    546     NN_OPS_CHECK(getNumberOfDimensions(lookupShape) == 1);
    547     NN_OPS_CHECK(getNumberOfDimensions(keyShape) == 1);
    548     NN_OPS_CHECK(getNumberOfDimensions(valueShape) >= 1);
    549 
    550     const uint32_t lookups  = getSizeOfDimension(lookupShape, 0);
    551     const uint32_t keys     = getSizeOfDimension(keyShape, 0);
    552     const uint32_t rows     = getSizeOfDimension(valueShape, 0);
    553     outputShape->type = valueShape.type;
    554     outputShape->dimensions = { lookups };
    555     for (uint32_t i = 1; i < getNumberOfDimensions(valueShape); i++) {
    556         outputShape->dimensions.push_back(getSizeOfDimension(valueShape, i));
    557     }
    558     outputShape->offset = valueShape.offset;
    559     outputShape->scale = valueShape.scale;
    560 
    561     hitShape->type = OperandType::TENSOR_QUANT8_ASYMM;
    562     hitShape->dimensions = { lookups };
    563     hitShape->offset = 0;
    564     hitShape->scale = 1.f;
    565 
    566     return true;
    567 }
    568 
    569 bool padPrepare(const Shape& input,
    570                 const int32_t* paddingsData,
    571                 const Shape& paddingsShape,
    572                 Shape* output) {
    573     // Currently only 4D tensors are supported.
    574     uint32_t numInputDims = getNumberOfDimensions(input);
    575     NN_OPS_CHECK(numInputDims == 4);
    576 
    577     // paddings need to be provided as a 2-D int32 tensor.
    578     NN_OPS_CHECK(paddingsShape.type == OperandType::TENSOR_INT32);
    579     NN_OPS_CHECK(getNumberOfDimensions(paddingsShape) == 2);
    580     NN_OPS_CHECK(getSizeOfDimension(paddingsShape, 0) == numInputDims);
    581     NN_OPS_CHECK(getSizeOfDimension(paddingsShape, 1) == 2);
    582 
    583     std::vector<uint32_t> outDims(numInputDims);
    584     for (uint32_t i = 0; i < numInputDims; ++i) {
    585         int32_t beforePadding = *paddingsData++;
    586         int32_t afterPadding = *paddingsData++;
    587         // Pad value has to be greater than equal to 0.
    588         NN_OPS_CHECK(beforePadding >= 0 && afterPadding >= 0);
    589         outDims[i] = beforePadding + getSizeOfDimension(input, i) + afterPadding;
    590     }
    591     output->type = input.type;
    592     output->dimensions = outDims;
    593     output->offset = input.offset;
    594     output->scale = input.scale;
    595 
    596     return true;
    597 }
    598 
    599 bool batchToSpacePrepare(const Shape& input,
    600                          const int32_t* blockSizeData,
    601                          const Shape& blockSizeShape,
    602                          Shape* output) {
    603     // Only 4D NHWC tensors are supported.
    604     NN_OPS_CHECK(getNumberOfDimensions(input) == 4);
    605 
    606     // blockSize need to be provided as a 1-D int32 tensor.
    607     NN_OPS_CHECK(blockSizeShape.type == OperandType::TENSOR_INT32);
    608     NN_OPS_CHECK(getNumberOfDimensions(blockSizeShape) == 1);
    609     // Only applies to spatial dimensions.
    610     NN_OPS_CHECK(getSizeOfDimension(blockSizeShape, 0) == 2);
    611 
    612     uint32_t batches  = getSizeOfDimension(input, 0);
    613     uint32_t height   = getSizeOfDimension(input, 1);
    614     uint32_t width    = getSizeOfDimension(input, 2);
    615     uint32_t channels = getSizeOfDimension(input, 3);
    616 
    617     NN_OPS_CHECK(batches % (blockSizeData[0] * blockSizeData[1]) == 0);
    618     output->type = input.type;
    619     output->dimensions = {batches / (blockSizeData[0] * blockSizeData[1]),
    620                           height * blockSizeData[0],
    621                           width * blockSizeData[1],
    622                           channels};
    623     output->offset = input.offset;
    624     output->scale = input.scale;
    625 
    626     return true;
    627 }
    628 
    629 bool spaceToBatchPrepare(const Shape& input,
    630                          const int32_t* blockSizeData,
    631                          const Shape& blockSizeShape,
    632                          const int32_t* paddingsData,
    633                          const Shape& paddingsShape,
    634                          Shape* output) {
    635     // Only 4D NHWC tensors are supported.
    636     NN_OPS_CHECK(getNumberOfDimensions(input) == 4);
    637 
    638     // blockSize need to be provided as a 1-D int32 tensor.
    639     NN_OPS_CHECK(blockSizeShape.type == OperandType::TENSOR_INT32);
    640     NN_OPS_CHECK(getNumberOfDimensions(blockSizeShape) == 1);
    641     // Only applies to spatial dimensions.
    642     NN_OPS_CHECK(getSizeOfDimension(blockSizeShape, 0) == 2);
    643 
    644     // paddings need to be provided as a 2-D int32 tensor.
    645     NN_OPS_CHECK(paddingsShape.type == OperandType::TENSOR_INT32);
    646     NN_OPS_CHECK(getNumberOfDimensions(paddingsShape) == 2);
    647     NN_OPS_CHECK(getSizeOfDimension(paddingsShape, 0) == 2);
    648     NN_OPS_CHECK(getSizeOfDimension(paddingsShape, 1) == 2);
    649 
    650     uint32_t batches  = getSizeOfDimension(input, 0);
    651     uint32_t height   = getSizeOfDimension(input, 1);
    652     uint32_t width    = getSizeOfDimension(input, 2);
    653     uint32_t channels = getSizeOfDimension(input, 3);
    654 
    655     uint32_t paddedHeight = paddingsData[0] + height + paddingsData[1];
    656     uint32_t paddedWidth = paddingsData[2] + width + paddingsData[3];
    657 
    658     NN_OPS_CHECK(paddedHeight % blockSizeData[0] == 0);
    659     NN_OPS_CHECK(paddedWidth % blockSizeData[1] == 0);
    660 
    661     output->type = input.type;
    662     output->dimensions = {batches * (blockSizeData[0] * blockSizeData[1]),
    663                           paddedHeight / blockSizeData[0],
    664                           paddedWidth / blockSizeData[1],
    665                           channels};
    666     output->offset = input.offset;
    667     output->scale = input.scale;
    668 
    669     return true;
    670 }
    671 
    672 bool squeezePrepare(const Shape& input,
    673                     const int32_t* squeezeDims,
    674                     const Shape& squeezeDimsShape,
    675                     Shape* output) {
    676     int32_t numInputDims = static_cast<int32_t>(getNumberOfDimensions(input));
    677 
    678     // squeezeDims need to be provided as a 1-D int32 tensor.
    679     NN_OPS_CHECK(squeezeDimsShape.type == OperandType::TENSOR_INT32);
    680     NN_OPS_CHECK(getNumberOfDimensions(squeezeDimsShape) == 1);
    681 
    682     int32_t squeezeDimsSize = static_cast<int32_t>(getSizeOfDimension(squeezeDimsShape, 0));
    683     std::vector<bool> shouldSqueeze(numInputDims, false);
    684     int32_t numDimsSqueezed = 0;
    685 
    686     if (squeezeDimsSize == 0) {
    687         // If squeezeDimsSize is 0, all dims with value 1 will be squeezed.
    688         for (int32_t idx = 0; idx < numInputDims; ++idx) {
    689             if (getSizeOfDimension(input, idx) == 1) {
    690                 shouldSqueeze[idx] = true;
    691                 ++numDimsSqueezed;
    692             }
    693         }
    694     } else {
    695         for (int32_t idx = 0; idx < squeezeDimsSize; ++idx) {
    696             int32_t current = squeezeDims[idx] < 0 ? squeezeDims[idx] + numInputDims
    697                                                : squeezeDims[idx];
    698             NN_OPS_CHECK(current >= 0 && current < numInputDims &&
    699                          getSizeOfDimension(input, current) == 1);
    700             if (!shouldSqueeze[current]) ++numDimsSqueezed;
    701             shouldSqueeze[current] = true;
    702       }
    703     }
    704 
    705     // Sets output dimensions.
    706     std::vector<uint32_t> outDims(numInputDims - numDimsSqueezed);
    707     for (int32_t inIdx = 0, outIdx = 0; inIdx < numInputDims; ++inIdx) {
    708         if (!shouldSqueeze[inIdx]) {
    709             outDims[outIdx++] = getSizeOfDimension(input, inIdx);
    710         }
    711     }
    712 
    713     output->type = input.type;
    714     output->dimensions = outDims;
    715     output->offset = input.offset;
    716     output->scale = input.scale;
    717 
    718     return true;
    719 }
    720 
    721 bool transposePrepare(const Shape& input,
    722                       const int32_t* permData,
    723                       const Shape& permShape,
    724                       Shape* output) {
    725     uint32_t numInputDims = getNumberOfDimensions(input);
    726     // Transpose op only supports 1D-4D input arrays.
    727     NN_OPS_CHECK(numInputDims <= 4);
    728 
    729     // perm need to be provided as a 1-D int32 tensor.
    730     NN_OPS_CHECK(permShape.type == OperandType::TENSOR_INT32);
    731     NN_OPS_CHECK(getNumberOfDimensions(permShape) == 1);
    732     NN_OPS_CHECK(numInputDims == getSizeOfDimension(permShape, 0));
    733 
    734     std::vector<uint32_t> outDims(numInputDims);
    735     for (int32_t idx = 0; idx < static_cast<int32_t>(numInputDims); ++idx) {
    736         NN_OPS_CHECK(permData[idx] >= 0 && permData[idx] < static_cast<int32_t>(numInputDims));
    737         outDims[idx] = getSizeOfDimension(input, permData[idx]);
    738     }
    739 
    740     output->type = input.type;
    741     output->dimensions = outDims;
    742     output->offset = input.offset;
    743     output->scale = input.scale;
    744 
    745     return true;
    746 }
    747 
    748 bool meanPrepare(const Shape& input,
    749                  const int32_t* axisData,
    750                  const Shape& axisShape,
    751                  bool keepDims,
    752                  Shape* output) {
    753 
    754     // perm need to be provided as a 1-D int32 tensor.
    755     NN_OPS_CHECK(axisShape.type == OperandType::TENSOR_INT32);
    756     NN_OPS_CHECK(getNumberOfDimensions(axisShape) == 1);
    757 
    758     int32_t numInputDims = static_cast<int32_t>(getNumberOfDimensions(input));
    759     int32_t axisSize = static_cast<int32_t>(getSizeOfDimension(axisShape, 0));
    760 
    761     // Determines size of output tensor.
    762     if (keepDims) {
    763         std::vector<uint32_t> outDims(numInputDims);
    764         for (int32_t idx = 0; idx < numInputDims; ++idx) {
    765             bool isAxis = false;
    766             for (int32_t axisIdx = 0; axisIdx < axisSize; ++axisIdx) {
    767                 if (axisData[axisIdx] == idx || axisData[axisIdx] + numInputDims == idx) {
    768                     isAxis = true;
    769                     break;
    770                 }
    771             }
    772             if (isAxis) {
    773                 outDims[idx] = 1;
    774             } else {
    775                 outDims[idx] = getSizeOfDimension(input, idx);
    776             }
    777         }
    778         output->dimensions = outDims;
    779     } else {
    780         // Calculates size of reducing axis.
    781         int32_t numReduceAxis = axisSize;
    782         for (int32_t i = 0; i < axisSize; ++i) {
    783             int32_t current = axisData[i];
    784             if (current < 0) {
    785                 current += numInputDims;
    786             }
    787             NN_OPS_CHECK(current >= 0 && current < numInputDims);
    788             for (int32_t j = 0; j < i; ++j) {
    789                 int32_t previous = axisData[j];
    790                 if (previous < 0) {
    791                     previous += numInputDims;
    792                 }
    793                 if (current == previous) {
    794                     --numReduceAxis;
    795                     break;
    796                 }
    797             }
    798         }
    799         // Determines output dimensions.
    800         std::vector<uint32_t> outDims(numInputDims - numReduceAxis);
    801         int32_t numSkipAxis = 0;
    802         for (int32_t idx = 0; idx < numInputDims; ++idx) {
    803             bool isAxis = false;
    804             for (int32_t axisIdx = 0; axisIdx < axisSize; ++axisIdx) {
    805                 if (axisData[axisIdx] == idx || axisData[axisIdx] + numInputDims == idx) {
    806                     ++numSkipAxis;
    807                     isAxis = true;
    808                     break;
    809                 }
    810             }
    811             if (!isAxis) {
    812                 outDims[idx - numSkipAxis] = getSizeOfDimension(input, idx);
    813             }
    814         }
    815         output->dimensions = outDims;
    816     }
    817 
    818     output->type = input.type;
    819     output->offset = input.offset;
    820     output->scale = input.scale;
    821 
    822     return true;
    823 }
    824 
    825 bool stridedSlicePrepare(const Shape& input,
    826                          const int32_t* beginData, const Shape& beginShape,
    827                          const int32_t* endData, const Shape& endShape,
    828                          const int32_t* stridesData, const Shape& stridesShape,
    829                          int32_t beginMask, int32_t endMask, int32_t shrinkAxisMask,
    830                          Shape* output) {
    831     uint32_t numInputDims = getNumberOfDimensions(input);
    832     // StridedSlice op only supports 1D-4D input arrays.
    833     NN_OPS_CHECK(numInputDims <= 4);
    834 
    835     NN_OPS_CHECK(getNumberOfDimensions(beginShape) == 1);
    836     NN_OPS_CHECK(getNumberOfDimensions(endShape) == 1);
    837     NN_OPS_CHECK(getNumberOfDimensions(stridesShape) == 1);
    838 
    839     NN_OPS_CHECK(getSizeOfDimension(beginShape, 0) == numInputDims);
    840     NN_OPS_CHECK(getSizeOfDimension(endShape, 0) == numInputDims);
    841     NN_OPS_CHECK(getSizeOfDimension(stridesShape, 0) == numInputDims);
    842 
    843     NN_OPS_CHECK(beginShape.type == OperandType::TENSOR_INT32);
    844     NN_OPS_CHECK(endShape.type == OperandType::TENSOR_INT32);
    845     NN_OPS_CHECK(stridesShape.type == OperandType::TENSOR_INT32);
    846 
    847     // Determine size of output tensor and map indices
    848     std::vector<uint32_t> outDims;
    849     for (int32_t idx = 0; idx < static_cast<int32_t>(numInputDims); idx++) {
    850       int32_t dim = static_cast<int32_t>(getSizeOfDimension(input, idx));
    851       int32_t stride = stridesData[idx];
    852       // stride value has to be non-zero
    853       NN_OPS_CHECK(stride != 0);
    854       bool positiveStride = stride > 0;
    855 
    856       int32_t begin = beginMask & (1 << idx)
    857               ? positiveStride ? 0 : dim - 1
    858               : ClampedIndex(beginData[idx], dim, positiveStride);
    859       int32_t end = endMask & (1 << idx)
    860               ? positiveStride ? dim : -1
    861               : ClampedIndex(endData[idx], dim, positiveStride);
    862 
    863       // This is valid for both positive and negative strides
    864       int32_t outDim = ceil((end - begin) / static_cast<float>(stride));
    865       outDim = outDim < 0 ? 0 : static_cast<uint32_t>(outDim);
    866       if (!(shrinkAxisMask & (1 << idx))) {
    867           outDims.push_back(outDim);
    868       } else {
    869           if (outDim != 1) {
    870               LOG(ERROR) << "Outdim " << idx << " is " << outDim << ", expected 1";
    871               NN_OPS_CHECK(outDim == 1);
    872           }
    873       }
    874     }
    875 
    876     output->type = input.type;
    877     output->dimensions = outDims;
    878     output->offset = input.offset;
    879     output->scale = input.scale;
    880 
    881     return true;
    882 }
    883 } // namespace nn
    884 } // namespace android
    885