Home | History | Annotate | Download | only in cpu
      1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_CPU_DOT_OP_EMITTER_H_
     17 #define TENSORFLOW_COMPILER_XLA_SERVICE_CPU_DOT_OP_EMITTER_H_
     18 
     19 #include "llvm/IR/IRBuilder.h"
     20 #include "tensorflow/compiler/xla/service/cpu/cpu_options.h"
     21 #include "tensorflow/compiler/xla/service/cpu/target_machine_features.h"
     22 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
     23 #include "tensorflow/compiler/xla/service/hlo_module_config.h"
     24 #include "tensorflow/compiler/xla/service/llvm_ir/ir_array.h"
     25 #include "tensorflow/compiler/xla/service/llvm_ir/llvm_loop.h"
     26 #include "tensorflow/compiler/xla/types.h"
     27 #include "tensorflow/core/lib/core/status.h"
     28 #include "tensorflow/core/lib/core/stringpiece.h"
     29 #include "tensorflow/core/platform/types.h"
     30 
     31 namespace xla {
     32 namespace cpu {
     33 
     34 bool PotentiallyImplementedAsEigenDot(const HloInstruction& hlo);
     35 
     36 // Returns the index for an operand to `hlo` that should ideally be column
     37 // major.  Returns nullopt if there is no such operand or if `hlo` is not a dot
     38 // or a fusion containing a dot.
     39 tensorflow::gtl::optional<int64> ProfitableToMakeDotOperandColumnMajor(
     40     const HloInstruction& hlo);
     41 
     42 // Returns true to indicate that we can generate a tiled LLVM IR implementation
     43 // for |dot|.
     44 bool ProfitableToImplementDotInTiledLlvmIr(const HloInstruction& dot);
     45 
     46 // Helper class for emitting LLVM IR to perform the dot operation.
     47 class DotOpEmitter {
     48  public:
     49   // Emit LLVM IR to perform the dot operation on lhs_array and rhs_array and
     50   // place the result in target_array. IR is emitted at current insert point of
     51   // the builder. Upon completion of the method, the insert point is set to the
     52   // end of all instructions emitted for this operation.
     53   //
     54   // If `addend_array` is not nullptr then it must be an array of the same
     55   // dimensions as the result, and the result is computed as `addend_array` +
     56   // dot(`lhs_array`, `rhs_array`).  A non-null `addend_array` is only supported
     57   // for Matrix-vector products.
     58   static tensorflow::Status EmitDotOperation(
     59       const HloInstruction& dot, bool transpose_lhs, bool transpose_rhs,
     60       const llvm_ir::IrArray& target_array, const llvm_ir::IrArray& lhs_array,
     61       const llvm_ir::IrArray& rhs_array, const llvm_ir::IrArray* addend_array,
     62       llvm::Value* executable_run_options_value, llvm::IRBuilder<>* ir_builder,
     63       const HloModuleConfig& hlo_module_config,
     64       const TargetMachineFeatures& target_machine_features);
     65 
     66  private:
     67   DotOpEmitter(const HloInstruction& dot, bool transpose_lhs,
     68                bool transpose_rhs, const llvm_ir::IrArray& target_array,
     69                const llvm_ir::IrArray& lhs_array,
     70                const llvm_ir::IrArray& rhs_array,
     71                const llvm_ir::IrArray* addend_array,
     72                llvm::Value* executable_run_options_value,
     73                llvm::IRBuilder<>* ir_builder,
     74                const HloModuleConfig& hlo_module_config,
     75                const TargetMachineFeatures& target_machine_features);
     76 
     77   // Emits the IR to perform the dot operation.
     78   tensorflow::Status Emit();
     79 
     80   // Emits instructions to perform a scalar dot product (a multiply of the
     81   // LHS and RHS) and store the results in the target.
     82   tensorflow::Status EmitScalarDot();
     83 
     84   // Emit an LLVM IR implementation of the dot operation if we can.  Returns
     85   // true if an LLVM IR implementation was emitted.
     86   bool EmitLlvmIrDotIfProfitable();
     87 
     88   // Emits a call to the CPU runtime to perform the matrix multiply.
     89   tensorflow::Status EmitCallToRuntime();
     90 
     91   // Emits a series of nested loops for iterating over an operand array in the
     92   // dot operation. Loops are constructed in major to minor dimension layout
     93   // order. No loop is emitted for the given reduction_dimension. The function
     94   // returns an IrArray index for the given operand_array containing the indvars
     95   // of the loops. All dimensions of the index are filled except for the
     96   // reduction dimension. name_suffix is the string to append to the names of
     97   // LLVM constructs (eg, basic blocks) constructed by this method.
     98   llvm_ir::IrArray::Index EmitOperandArrayLoopNest(
     99       llvm_ir::ForLoopNest* loop_nest, const llvm_ir::IrArray& operand_array,
    100       int64 reduction_dimension, tensorflow::StringPiece name_suffix);
    101 
    102   // Our runtime operation requires that all arrays have the same layout,
    103   // no padding, and a rank of two.
    104   bool ShapesAreLegalForRuntimeDot() const;
    105 
    106   // Represents the dimensions of a matrix-matrix multiply operation.
    107   struct MatMultDims {
    108     // The number of rows in the LHS.
    109     int64 m;
    110 
    111     // The number of columns in the LHS, which is also must be equal to the
    112     // number of rows in the RHS.
    113     int64 k;
    114 
    115     // The number of columns on the RHS.
    116     int64 n;
    117 
    118     // True if the LHS matrix column major.
    119     bool lhs_column_major;
    120 
    121     // True if the RHS matrix column major.
    122     bool rhs_column_major;
    123   };
    124 
    125   // Get the MatMultDims instance for the dot product this DotOpEmitter
    126   // represents.  Precondition: the dot is of rank 2 (and thus its operands are
    127   // of rank 2 as well).
    128   MatMultDims GetMatMultDims() const;
    129 
    130   // When doing a tiled GEMV in LLVM IR, a "tile" consists of this many vector
    131   // registers.
    132   int64 GetGemvTilingFactor() const {
    133     const int64 kDefaultTilingFactor = 8;
    134     return options::LlvmIrGemvTilingFactor(hlo_module_config_)
    135         .value_or(kDefaultTilingFactor);
    136   }
    137 
    138   const HloInstruction& dot_;
    139   const bool transpose_lhs_;
    140   const bool transpose_rhs_;
    141   const llvm_ir::IrArray& target_array_;
    142   const llvm_ir::IrArray& lhs_array_;
    143   const llvm_ir::IrArray& rhs_array_;
    144   const llvm_ir::IrArray* addend_array_;
    145   llvm::Value* executable_run_options_value_;
    146   llvm::IRBuilder<>* ir_builder_;
    147   const HloModuleConfig& hlo_module_config_;
    148   const TargetMachineFeatures& target_machine_features_;
    149 };
    150 
    151 }  // namespace cpu
    152 }  // namespace xla
    153 
    154 #endif  // TENSORFLOW_COMPILER_XLA_SERVICE_CPU_DOT_OP_EMITTER_H_
    155