Home | History | Annotate | Download | only in internal
      1 // Copyright 2015 The Gemmlowp Authors. All Rights Reserved.
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //     http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 
     15 // compute.h: the central stage of the Gemm computation, operates
     16 // on already-packed LHS and RHS blocks and calls the Gemm kernel
     17 // to compute a block of the product.
     18 
     19 #ifndef GEMMLOWP_INTERNAL_COMPUTE_H_
     20 #define GEMMLOWP_INTERNAL_COMPUTE_H_
     21 
     22 #include "block_params.h"
     23 #include "kernel.h"
     24 #include "pack.h"
     25 
     26 namespace gemmlowp {
     27 
     28 template <typename PackedLhs, typename PackedRhs, typename PackedResult>
     29 class ComputeImpl {
     30   typedef typename PackedLhs::KernelSideFormat KernelLhsFormat;
     31   typedef typename PackedRhs::KernelSideFormat KernelRhsFormat;
     32   typedef KernelFormat<KernelLhsFormat, KernelRhsFormat> Format;
     33 
     34   const KernelBase& kernel_;
     35   const BlockParams& block_params_;
     36 
     37   PackedResult* const packed_result_;
     38   const PackedLhs& packed_lhs_;
     39   const PackedRhs& packed_rhs_;
     40 
     41  public:
     42   ComputeImpl(const KernelBase& _kernel, const BlockParams& _block_params,
     43               PackedResult* _packed_result, const PackedLhs& _packed_lhs,
     44               const PackedRhs& _packed_rhs)
     45       : kernel_(_kernel),
     46         block_params_(_block_params),
     47         packed_result_(_packed_result),
     48         packed_lhs_(_packed_lhs),
     49         packed_rhs_(_packed_rhs) {}
     50 
     51   void Compute(int depth) {
     52     depth = RoundUp<Format::kDepth>(depth);
     53     assert(depth <= block_params_.l2_depth);
     54     for (int d = 0; d < depth; d += block_params_.l1_depth) {
     55       int ds = std::min(block_params_.l1_depth, depth - d);
     56 
     57       for (int r = 0; r < block_params_.l2_rows; r += block_params_.l1_rows) {
     58         int rs = std::min(block_params_.l1_rows, block_params_.l2_rows - r);
     59 
     60         ComputeL1(r, rs, 0, block_params_.l2_cols, d, ds);
     61       }
     62     }
     63   }
     64 
     65  private:
     66   void ComputeRun(int start_row, int start_col, int start_depth,
     67                   int depth) GEMMLOWP_NOINLINE {
     68     packed_lhs_.seek_run(start_row, start_depth);
     69     packed_rhs_.seek_run(start_col, start_depth);
     70     auto packed_result_block = packed_result_->Map().block(
     71         start_row, start_col, Format::kRows, Format::kCols);
     72     kernel_.Run(packed_result_block.data(), packed_result_block.rows_stride(),
     73                 packed_result_block.cols_stride(), packed_lhs_.current_data(),
     74                 packed_rhs_.current_data(), start_depth, depth);
     75   }
     76 
     77   void ComputeL1(int start_row, int rows, int start_col, int cols,
     78                  int start_depth, int depth) {
     79     assert(rows % Format::kRows == 0);
     80     assert(cols % Format::kCols == 0);
     81     assert(depth % Format::kDepth == 0);
     82 
     83     for (int c = 0; c < cols; c += Format::kCols) {
     84       for (int r = 0; r < rows; r += Format::kRows) {
     85         ComputeRun(start_row + r, start_col + c, start_depth, depth);
     86       }
     87     }
     88   }
     89 };
     90 
     91 template <typename PackedLhs, typename PackedRhs, typename PackedResult>
     92 void Compute(const KernelBase& kernel, const BlockParams& block_params,
     93              PackedResult* packed_result, const PackedLhs& packed_lhs,
     94              const PackedRhs& packed_rhs, int depth) {
     95   ScopedProfilingLabel label("compute");
     96   ComputeImpl<PackedLhs, PackedRhs, PackedResult> impl(
     97       kernel, block_params, packed_result, packed_lhs, packed_rhs);
     98 
     99   impl.Compute(depth);
    100 }
    101 
    102 }  // namespace gemmlowp
    103 
    104 #endif  // GEMMLOWP_INTERNAL_COMPUTE_H_
    105