Home | History | Annotate | Download | only in operations
      1 /*
      2  * Copyright (C) 2017 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "SVDF.h"
     18 
     19 #include "CpuExecutor.h"
     20 #include "CpuOperationUtils.h"
     21 #include "HalInterfaces.h"
     22 
     23 #include "Tracing.h"
     24 
     25 namespace android {
     26 namespace nn {
     27 
     28 SVDF::SVDF(const Operation& operation,
     29            std::vector<RunTimeOperandInfo>& operands) {
     30     NNTRACE_TRANS("SVDF::SVDF");
     31     input_ = GetInput(operation, operands, kInputTensor);
     32     weights_feature_ = GetInput(operation, operands, kWeightsFeatureTensor);
     33     weights_time_ = GetInput(operation, operands, kWeightsTimeTensor);
     34     bias_ = GetInput(operation, operands, kBiasTensor);
     35     state_in_ = GetInput(operation, operands, kStateInTensor);
     36 
     37     params_.rank_ = getScalarData<int>(*GetInput(operation, operands, kRankParam));
     38     params_.activation_ = static_cast<TfLiteFusedActivation>(getScalarData<int>(
     39         *GetInput(operation, operands, kActivationParam)));
     40 
     41     state_out_ = GetOutput(operation, operands, kStateOutTensor);
     42     output_ = GetOutput(operation, operands, kOutputTensor);
     43 }
     44 
     45 bool SVDF::Prepare(const Operation &operation,
     46                    std::vector<RunTimeOperandInfo> &operands,
     47                    Shape *stateShape,
     48                    Shape *outputShape) {
     49   NNTRACE_TRANS("SVDF::Prepare");
     50   // Check we have all the inputs and outputs we need.
     51   const int num_inputs = NumInputsWithValues(operation, operands);
     52 
     53   NN_CHECK(num_inputs == 6 || num_inputs == 7);
     54   NN_CHECK_EQ(NumOutputs(operation), 2);
     55 
     56   const RunTimeOperandInfo *input =
     57       GetInput(operation, operands, SVDF::kInputTensor);
     58   const RunTimeOperandInfo *weights_feature =
     59       GetInput(operation, operands, SVDF::kWeightsFeatureTensor);
     60   const RunTimeOperandInfo *weights_time =
     61       GetInput(operation, operands, SVDF::kWeightsTimeTensor);
     62 
     63   // Check all the parameters of tensor match within themselves and match the
     64   // input configuration.
     65   const int rank = getScalarData<int>(*GetInput(operation, operands, kRankParam));
     66   const uint32_t batch_size = SizeOfDimension(input, 0);
     67   const uint32_t num_filters = SizeOfDimension(weights_feature, 0);
     68   NN_CHECK_EQ(num_filters % rank, 0);
     69   const uint32_t num_units = num_filters / rank;
     70   const uint32_t memory_size = SizeOfDimension(weights_time, 1);
     71   NN_CHECK_EQ(SizeOfDimension(input, 1), SizeOfDimension(weights_feature, 1));
     72   NN_CHECK_EQ(SizeOfDimension(weights_time, 0), num_filters);
     73 
     74   const RunTimeOperandInfo *bias =
     75       GetInput(operation, operands, kBiasTensor);
     76   if (!IsNullInput(bias)) {
     77     NN_CHECK_EQ(SizeOfDimension(bias, 0), num_units);
     78   }
     79 
     80   // Resize state.
     81   const Shape &inputShape = input->shape();
     82   stateShape->type = inputShape.type;
     83   stateShape->dimensions = { batch_size, memory_size * num_filters };
     84   stateShape->offset = inputShape.offset;
     85   stateShape->scale = inputShape.scale;
     86 
     87   // Resize output.
     88   outputShape->type = inputShape.type;
     89   outputShape->dimensions = { batch_size, num_units };
     90   outputShape->offset = inputShape.offset;
     91   outputShape->scale = inputShape.scale;
     92 
     93   return true;
     94 }
     95 
     96 bool SVDF::Eval() {
     97     NNTRACE_TRANS("SVDF::Eval");
     98     switch (input_->type) {
     99         case OperandType::TENSOR_FLOAT16: {
    100             std::vector<float> inputDataFloat32(getNumberOfElements(input_->shape()));
    101             convertFloat16ToFloat32(reinterpret_cast<_Float16*>(input_->buffer), &inputDataFloat32);
    102             std::vector<float> inputStateDataFloat32(getNumberOfElements(state_in_->shape()));
    103             convertFloat16ToFloat32(reinterpret_cast<_Float16*>(state_in_->buffer),
    104                                     &inputStateDataFloat32);
    105             std::vector<float> biasDataFloat32(getNumberOfElements(bias_->shape()));
    106             if (!IsNullInput(bias_)) {
    107                 convertFloat16ToFloat32(reinterpret_cast<_Float16*>(bias_->buffer),
    108                                         &biasDataFloat32);
    109             }
    110             std::vector<float> weightsFeatureDataFloat32(
    111                     getNumberOfElements(weights_feature_->shape()));
    112             convertFloat16ToFloat32(reinterpret_cast<_Float16*>(weights_feature_->buffer),
    113                                     &weightsFeatureDataFloat32);
    114             std::vector<float> weightsTimeDataFloat32(getNumberOfElements(weights_time_->shape()));
    115             convertFloat16ToFloat32(reinterpret_cast<_Float16*>(weights_time_->buffer),
    116                                     &weightsTimeDataFloat32);
    117             std::vector<float> outputDataFloat32(getNumberOfElements(output_->shape()));
    118             std::vector<float> outputStateDataFloat32(getNumberOfElements(state_out_->shape()));
    119 
    120             EvalFloat32(inputDataFloat32.data(), inputStateDataFloat32.data(),
    121                         biasDataFloat32.data(), weightsFeatureDataFloat32.data(),
    122                         weightsTimeDataFloat32.data(), outputDataFloat32.data(),
    123                         outputStateDataFloat32.data());
    124             convertFloat32ToFloat16(outputDataFloat32,
    125                                     reinterpret_cast<_Float16*>(output_->buffer));
    126             convertFloat32ToFloat16(outputStateDataFloat32,
    127                                     reinterpret_cast<_Float16*>(state_out_->buffer));
    128             break;
    129         }
    130         case OperandType::TENSOR_FLOAT32: {
    131             EvalFloat32(reinterpret_cast<float*>(input_->buffer),
    132                         reinterpret_cast<float*>(state_in_->buffer),
    133                         reinterpret_cast<float*>(bias_->buffer),
    134                         reinterpret_cast<float*>(weights_feature_->buffer),
    135                         reinterpret_cast<float*>(weights_time_->buffer),
    136                         reinterpret_cast<float*>(output_->buffer),
    137                         reinterpret_cast<float*>(state_out_->buffer));
    138             break;
    139         }
    140         default: {
    141             LOG(ERROR) << "Unsupported data type: " << static_cast<int>(input_->type);
    142             return false;
    143         }
    144     }
    145     return true;
    146 }
    147 
    148 void SVDF::EvalFloat32(const float* inputData, const float* inputStateData, const float* biasData,
    149                        const float* weightsFeatureData, const float* weightsTimeData,
    150                        float* outputData, float* outputStateData) {
    151     NNTRACE_COMP("SVDF::EvalFloat32");
    152 
    153     const int rank = params_.rank_;
    154     const int batch_size = SizeOfDimension(input_, 0);
    155     const int input_size = SizeOfDimension(input_, 1);
    156     const int num_filters = SizeOfDimension(weights_feature_, 0);
    157     const int num_units = num_filters / rank;
    158     const int memory_size = SizeOfDimension(weights_time_, 1);
    159 
    160     memcpy(outputStateData, inputStateData, sizeof(float) * batch_size * memory_size * num_filters);
    161     // Compute conv1d(inputs, weights_feature).
    162     for (int b = 0; b < batch_size; b++) {
    163         float* state_ptr_batch = outputStateData + b * memory_size * num_filters;
    164         for (int c = 0; c < num_filters; c++) {
    165             float* state_ptr = state_ptr_batch + c * memory_size;
    166             state_ptr[memory_size - 1] = 0.0;
    167         }
    168     }
    169     // The state left most column is used to save current cycle activation. This
    170     // is achieved by starting at state->data.f[memory_size - 1] and having the
    171     // stride equal to memory_size.
    172     tflite::tensor_utils::MatrixBatchVectorMultiplyAccumulate(
    173             weightsFeatureData, num_filters, input_size, inputData, batch_size,
    174             &outputStateData[memory_size - 1], memory_size);
    175 
    176     // Compute matmul(state, weights_time).
    177     // The right most column is used to save temporary output (with the size of
    178     // num_filters). This is achieved by starting at state->data.f and having the
    179     // stride equal to memory_size.
    180     float scratch[batch_size * num_filters];
    181     for (int b = 0; b < batch_size; b++) {
    182         float* state_out_ptr_batch = outputStateData + b * memory_size * num_filters;
    183         float* scratch_ptr_batch = scratch + b * num_filters;
    184         tflite::tensor_utils::BatchVectorBatchVectorDotProduct(
    185                 weightsTimeData, state_out_ptr_batch, memory_size, num_filters, scratch_ptr_batch,
    186                 /*result_stride=*/1);
    187     }
    188 
    189     // Initialize output with bias if provided.
    190     if (!IsNullInput(bias_)) {
    191         tflite::tensor_utils::VectorBatchVectorAssign(biasData, num_units, batch_size, outputData);
    192     } else {
    193         tflite::tensor_utils::ZeroVector(outputData, batch_size * num_units);
    194     }
    195 
    196     // Reduction sum
    197     for (int b = 0; b < batch_size; b++) {
    198         float* output_ptr_batch = outputData + b * num_units;
    199         float* scratch_ptr_batch = scratch + b * num_filters;
    200         tflite::tensor_utils::ReductionSumVector(scratch_ptr_batch, output_ptr_batch, num_units,
    201                                                  rank);
    202     }
    203 
    204     // Apply activation.
    205     for (int b = 0; b < batch_size; b++) {
    206         float* output_ptr_batch = outputData + b * num_units;
    207         tflite::tensor_utils::ApplyActivationToVector(output_ptr_batch, num_units,
    208                                                       params_.activation_, output_ptr_batch);
    209     }
    210 
    211     // Right shift the state.
    212     for (int b = 0; b < batch_size; b++) {
    213         float* state_out_ptr_batch = outputStateData + b * memory_size * num_filters;
    214         for (int f = 0; f < num_filters; f++) {
    215             tflite::tensor_utils::VectorShiftLeft(state_out_ptr_batch, memory_size,
    216                                                   /*shift_value=*/0.0);
    217             state_out_ptr_batch += memory_size;
    218         }
    219     }
    220 }
    221 
    222 }  // namespace nn
    223 }  // namespace android
    224