Home | History | Annotate | Download | only in optimized
      1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 #ifndef TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_OPTIMIZED_CBLAS_REFERENCE_H_
     17 #define TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_OPTIMIZED_CBLAS_REFERENCE_H_
     18 
     19 #include "tensorflow/contrib/lite/kernels/internal/compatibility.h"
     20 
     21 // The reference implementation for a small subset of CBLAS interface.
     22 // This is only used for testing CBLAS implementation, and should never be used
     23 // in production code.
     24 
     25 namespace tflite {
     26 namespace cblas_ops {
     27 
     28 // The following code follows the original CBLAS specification, and it might
     29 // conflict with the TensorFlow naming convention.
     30 // TODO(ycling): Find another way to test CBLAS with bazel, without writing
     31 // a reference implementation by ourselves.
     32 enum CBLAS_ORDER { CblasRowMajor = 0, CblasColMajor = 1 };
     33 
     34 enum CBLAS_TRANSPOSE { CblasNoTrans = 0, CblasTrans = 1, CblasConjTrans = 2 };
     35 
     36 // A reference implementation for matrix multiplication.
     37 // The following code computes, c = a * transponse(b) matrix multiplication
     38 // with CBLAS, where:
     39 // * `a` is a matrix with dimensions (m, k).
     40 // * `b` is a matrix with dimensions (n, k), so transpose(b) is (k, n).
     41 // * `c` is a matrix with dimensions (m, n).
     42 // The naming of variables is aligned with CBLAS specification here.
     43 void cblas_sgemm(const enum CBLAS_ORDER order,
     44                  const enum CBLAS_TRANSPOSE trans_a,
     45                  const enum CBLAS_TRANSPOSE trans_b, const int m, const int n,
     46                  const int k, const float alpha, const float *a,
     47                  const int stride_a, const float *b, const int stride_b,
     48                  const float beta, float *c, const int stride_c) {
     49   TFLITE_DCHECK(order == CblasRowMajor);
     50   TFLITE_DCHECK(trans_a == CblasNoTrans);
     51   TFLITE_DCHECK(trans_b == CblasTrans);
     52   TFLITE_DCHECK(beta == 0.0f);
     53   for (int row = 0; row < m; ++row) {
     54     for (int col = 0; col < n; ++col) {
     55       // If `beta` non-zero, multiple it with the original values in output.
     56       // Otherwise, ignore the original value in output completely.
     57       float value = 0.0f;
     58       for (int idx = 0; idx < k; ++idx) {
     59         value += alpha * a[stride_a * row + idx] * b[stride_b * col + idx];
     60       }
     61       c[stride_c * row + col] = value;
     62     }
     63   }
     64 }
     65 
     66 }  // namespace cblas_ops
     67 }  // namespace tflite
     68 
     69 #endif  // TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_OPTIMIZED_CBLAS_REFERENCE_H_
     70