Home | History | Annotate | Download | only in kernels
      1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 #ifndef TENSORFLOW_CONTRIB_QUANTIZATION_KERNELS_META_SUPPORT_H_
     17 #define TENSORFLOW_CONTRIB_QUANTIZATION_KERNELS_META_SUPPORT_H_
     18 
     19 #include "meta/multi_thread_gemm.h"
     20 #include "meta/multi_thread_transform.h"
     21 #include "meta/quantized_mul_kernels.h"
     22 #include "meta/streams.h"
     23 #include "meta/transform_kernels.h"
     24 
     25 #include "tensorflow/core/framework/numeric_types.h"
     26 
     27 namespace tensorflow {
     28 
     29 class OpKernelContext;
     30 
     31 namespace meta {
     32 
     33 // Gemmlowp/meta is a small library of optimized Arm32/64 kernels for quantized
     34 // matrix multiplication and other quantized computations.
     35 
     36 // Set the maximum number of threads of computation that the internal workers
     37 // pool can use. If num_threads is 0, then use intra_op_parallelism_threads.
     38 void SetNumThreads(int num_threads);
     39 
     40 int GetNumThreads();
     41 
     42 // Toggle the internal workers pool. If set to false, the computations will
     43 // use the worker pool passed each time in the OpKernelContext. If set to true
     44 // then the OpKernelContext will be ignored, and the internal optimized workers
     45 // pool will be used.
     46 //
     47 // The internal workers pool is disabled by default (false).
     48 void SetUseLocalContext(bool use_local_context);
     49 
     50 bool GetUseLocalContext();
     51 
     52 // Toggles the codepath. Enabled by default (true) on supported platforms.
     53 void SetEnabled(bool enabled);
     54 
     55 // Returns true if the codepath is supported and is enabled. Use this call
     56 // before calling the compute functions. If the codepath is not supported, and
     57 // any of the compute function is called, the library will log a FATAL error.
     58 bool IsSupportedAndEnabled();
     59 
     60 // Calculate the quantized matrix multiplication:
     61 //
     62 // for (i, j) in [0, m) x [0, n) do
     63 //   c_data[i, j] :=
     64 //     sum((a_data[i, l] + offset_a) * (b_data[l, j] + offset_b)) : l in [0, k)
     65 //
     66 // If transpose_a is false the lhs operand has row major layout, otherwise
     67 // column major. Similarly transpose_b describes the layout of the rhs operand.
     68 // lda, ldb, and ldc are the strides of the lhs operand, rhs operand and the
     69 // result arrays.
     70 void QuantizedGemm(OpKernelContext* context, bool transpose_a, bool transpose_b,
     71                    const quint8* a_data, const quint8* b_data, qint32* c_data,
     72                    int m, int n, int k, int offset_a, int offset_b, int lda,
     73                    int ldb, int ldc);
     74 
     75 // Take an array of numbers from the range [input_min, input_max] quantized
     76 // uniformly to int32 values, recover their float values, and then quantize
     77 // them back uniformly to the range [output_min, output_max] as uint8.
     78 // Saturate the uint8 values.
     79 void Requantize(OpKernelContext* context, const qint32* input, int count,
     80                 float input_min, float input_max, float output_min,
     81                 float output_max, quint8* output);
     82 
     83 // Take an array of numbers from the range [range_min, range_max] quantized
     84 // uniformly to uint8 values and recover their float values.
     85 void Dequantize(OpKernelContext* context, const quint8* input, int count,
     86                 float range_min, float range_max, float* output);
     87 
     88 // Take an array of float values and quantize them uniformly to the range
     89 // [range_min, range_max] expressed as uint8. Saturate the uint8 values.
     90 void Quantize(OpKernelContext*, const float* input, int count, float range_min,
     91               float range_max, quint8* output);
     92 
     93 // Take two arrays: the inputs and the bias quantized uniformly in the ranges
     94 // [input_min, input_max], and [bias_min, bias_max] accordingly, as uint8
     95 // values. Recover their float values. Add the values. Quantize them back
     96 // uniformly to the range [output_min, output_max] as int32. Saturate the
     97 // int32 values.
     98 void QuantizedBiasAdd(OpKernelContext* context, const quint8* input,
     99                       int input_count, const quint8* bias, int bias_count,
    100                       float input_min, float input_max, float bias_min,
    101                       float bias_max, float output_min, float output_max,
    102                       qint32* output);
    103 
    104 // Take an array of uint8 values and clamp them to the range [clamp_min,
    105 // clamp_max].
    106 void Clamp(OpKernelContext* context, const quint8* input, int input_count,
    107            quint8 clamp_min, quint8 clamp_max, quint8* output);
    108 
    109 }  // namespace meta
    110 }  // namespace tensorflow
    111 
    112 #endif  // TENSORFLOW_CONTRIB_QUANTIZATION_KERNELS_META_SUPPORT_H_
    113