1 /* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "SVDF.h" 18 19 #include "CpuExecutor.h" 20 #include "CpuOperationUtils.h" 21 #include "HalInterfaces.h" 22 23 #include "Tracing.h" 24 25 namespace android { 26 namespace nn { 27 28 SVDF::SVDF(const Operation& operation, 29 std::vector<RunTimeOperandInfo>& operands) { 30 NNTRACE_TRANS("SVDF::SVDF"); 31 input_ = GetInput(operation, operands, kInputTensor); 32 weights_feature_ = GetInput(operation, operands, kWeightsFeatureTensor); 33 weights_time_ = GetInput(operation, operands, kWeightsTimeTensor); 34 bias_ = GetInput(operation, operands, kBiasTensor); 35 state_in_ = GetInput(operation, operands, kStateInTensor); 36 37 params_.rank_ = getScalarData<int>(*GetInput(operation, operands, kRankParam)); 38 params_.activation_ = static_cast<TfLiteFusedActivation>(getScalarData<int>( 39 *GetInput(operation, operands, kActivationParam))); 40 41 state_out_ = GetOutput(operation, operands, kStateOutTensor); 42 output_ = GetOutput(operation, operands, kOutputTensor); 43 } 44 45 bool SVDF::Prepare(const Operation &operation, 46 std::vector<RunTimeOperandInfo> &operands, 47 Shape *stateShape, 48 Shape *outputShape) { 49 NNTRACE_TRANS("SVDF::Prepare"); 50 // Check we have all the inputs and outputs we need. 51 const int num_inputs = NumInputsWithValues(operation, operands); 52 53 NN_CHECK(num_inputs == 6 || num_inputs == 7); 54 NN_CHECK_EQ(NumOutputs(operation), 2); 55 56 const RunTimeOperandInfo *input = 57 GetInput(operation, operands, SVDF::kInputTensor); 58 const RunTimeOperandInfo *weights_feature = 59 GetInput(operation, operands, SVDF::kWeightsFeatureTensor); 60 const RunTimeOperandInfo *weights_time = 61 GetInput(operation, operands, SVDF::kWeightsTimeTensor); 62 63 // Check all the parameters of tensor match within themselves and match the 64 // input configuration. 65 const int rank = getScalarData<int>(*GetInput(operation, operands, kRankParam)); 66 const uint32_t batch_size = SizeOfDimension(input, 0); 67 const uint32_t num_filters = SizeOfDimension(weights_feature, 0); 68 NN_CHECK_EQ(num_filters % rank, 0); 69 const uint32_t num_units = num_filters / rank; 70 const uint32_t memory_size = SizeOfDimension(weights_time, 1); 71 NN_CHECK_EQ(SizeOfDimension(input, 1), SizeOfDimension(weights_feature, 1)); 72 NN_CHECK_EQ(SizeOfDimension(weights_time, 0), num_filters); 73 74 const RunTimeOperandInfo *bias = 75 GetInput(operation, operands, kBiasTensor); 76 if (!IsNullInput(bias)) { 77 NN_CHECK_EQ(SizeOfDimension(bias, 0), num_units); 78 } 79 80 // Resize state. 81 const Shape &inputShape = input->shape(); 82 stateShape->type = inputShape.type; 83 stateShape->dimensions = { batch_size, memory_size * num_filters }; 84 stateShape->offset = inputShape.offset; 85 stateShape->scale = inputShape.scale; 86 87 // Resize output. 88 outputShape->type = inputShape.type; 89 outputShape->dimensions = { batch_size, num_units }; 90 outputShape->offset = inputShape.offset; 91 outputShape->scale = inputShape.scale; 92 93 return true; 94 } 95 96 bool SVDF::Eval() { 97 NNTRACE_TRANS("SVDF::Eval"); 98 switch (input_->type) { 99 case OperandType::TENSOR_FLOAT16: { 100 std::vector<float> inputDataFloat32(getNumberOfElements(input_->shape())); 101 convertFloat16ToFloat32(reinterpret_cast<_Float16*>(input_->buffer), &inputDataFloat32); 102 std::vector<float> inputStateDataFloat32(getNumberOfElements(state_in_->shape())); 103 convertFloat16ToFloat32(reinterpret_cast<_Float16*>(state_in_->buffer), 104 &inputStateDataFloat32); 105 std::vector<float> biasDataFloat32(getNumberOfElements(bias_->shape())); 106 if (!IsNullInput(bias_)) { 107 convertFloat16ToFloat32(reinterpret_cast<_Float16*>(bias_->buffer), 108 &biasDataFloat32); 109 } 110 std::vector<float> weightsFeatureDataFloat32( 111 getNumberOfElements(weights_feature_->shape())); 112 convertFloat16ToFloat32(reinterpret_cast<_Float16*>(weights_feature_->buffer), 113 &weightsFeatureDataFloat32); 114 std::vector<float> weightsTimeDataFloat32(getNumberOfElements(weights_time_->shape())); 115 convertFloat16ToFloat32(reinterpret_cast<_Float16*>(weights_time_->buffer), 116 &weightsTimeDataFloat32); 117 std::vector<float> outputDataFloat32(getNumberOfElements(output_->shape())); 118 std::vector<float> outputStateDataFloat32(getNumberOfElements(state_out_->shape())); 119 120 EvalFloat32(inputDataFloat32.data(), inputStateDataFloat32.data(), 121 biasDataFloat32.data(), weightsFeatureDataFloat32.data(), 122 weightsTimeDataFloat32.data(), outputDataFloat32.data(), 123 outputStateDataFloat32.data()); 124 convertFloat32ToFloat16(outputDataFloat32, 125 reinterpret_cast<_Float16*>(output_->buffer)); 126 convertFloat32ToFloat16(outputStateDataFloat32, 127 reinterpret_cast<_Float16*>(state_out_->buffer)); 128 break; 129 } 130 case OperandType::TENSOR_FLOAT32: { 131 EvalFloat32(reinterpret_cast<float*>(input_->buffer), 132 reinterpret_cast<float*>(state_in_->buffer), 133 reinterpret_cast<float*>(bias_->buffer), 134 reinterpret_cast<float*>(weights_feature_->buffer), 135 reinterpret_cast<float*>(weights_time_->buffer), 136 reinterpret_cast<float*>(output_->buffer), 137 reinterpret_cast<float*>(state_out_->buffer)); 138 break; 139 } 140 default: { 141 LOG(ERROR) << "Unsupported data type: " << static_cast<int>(input_->type); 142 return false; 143 } 144 } 145 return true; 146 } 147 148 void SVDF::EvalFloat32(const float* inputData, const float* inputStateData, const float* biasData, 149 const float* weightsFeatureData, const float* weightsTimeData, 150 float* outputData, float* outputStateData) { 151 NNTRACE_COMP("SVDF::EvalFloat32"); 152 153 const int rank = params_.rank_; 154 const int batch_size = SizeOfDimension(input_, 0); 155 const int input_size = SizeOfDimension(input_, 1); 156 const int num_filters = SizeOfDimension(weights_feature_, 0); 157 const int num_units = num_filters / rank; 158 const int memory_size = SizeOfDimension(weights_time_, 1); 159 160 memcpy(outputStateData, inputStateData, sizeof(float) * batch_size * memory_size * num_filters); 161 // Compute conv1d(inputs, weights_feature). 162 for (int b = 0; b < batch_size; b++) { 163 float* state_ptr_batch = outputStateData + b * memory_size * num_filters; 164 for (int c = 0; c < num_filters; c++) { 165 float* state_ptr = state_ptr_batch + c * memory_size; 166 state_ptr[memory_size - 1] = 0.0; 167 } 168 } 169 // The state left most column is used to save current cycle activation. This 170 // is achieved by starting at state->data.f[memory_size - 1] and having the 171 // stride equal to memory_size. 172 tflite::tensor_utils::MatrixBatchVectorMultiplyAccumulate( 173 weightsFeatureData, num_filters, input_size, inputData, batch_size, 174 &outputStateData[memory_size - 1], memory_size); 175 176 // Compute matmul(state, weights_time). 177 // The right most column is used to save temporary output (with the size of 178 // num_filters). This is achieved by starting at state->data.f and having the 179 // stride equal to memory_size. 180 float scratch[batch_size * num_filters]; 181 for (int b = 0; b < batch_size; b++) { 182 float* state_out_ptr_batch = outputStateData + b * memory_size * num_filters; 183 float* scratch_ptr_batch = scratch + b * num_filters; 184 tflite::tensor_utils::BatchVectorBatchVectorDotProduct( 185 weightsTimeData, state_out_ptr_batch, memory_size, num_filters, scratch_ptr_batch, 186 /*result_stride=*/1); 187 } 188 189 // Initialize output with bias if provided. 190 if (!IsNullInput(bias_)) { 191 tflite::tensor_utils::VectorBatchVectorAssign(biasData, num_units, batch_size, outputData); 192 } else { 193 tflite::tensor_utils::ZeroVector(outputData, batch_size * num_units); 194 } 195 196 // Reduction sum 197 for (int b = 0; b < batch_size; b++) { 198 float* output_ptr_batch = outputData + b * num_units; 199 float* scratch_ptr_batch = scratch + b * num_filters; 200 tflite::tensor_utils::ReductionSumVector(scratch_ptr_batch, output_ptr_batch, num_units, 201 rank); 202 } 203 204 // Apply activation. 205 for (int b = 0; b < batch_size; b++) { 206 float* output_ptr_batch = outputData + b * num_units; 207 tflite::tensor_utils::ApplyActivationToVector(output_ptr_batch, num_units, 208 params_.activation_, output_ptr_batch); 209 } 210 211 // Right shift the state. 212 for (int b = 0; b < batch_size; b++) { 213 float* state_out_ptr_batch = outputStateData + b * memory_size * num_filters; 214 for (int f = 0; f < num_filters; f++) { 215 tflite::tensor_utils::VectorShiftLeft(state_out_ptr_batch, memory_size, 216 /*shift_value=*/0.0); 217 state_out_ptr_batch += memory_size; 218 } 219 } 220 } 221 222 } // namespace nn 223 } // namespace android 224