Home | History | Annotate | Download | only in cpu
      1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 #include "tensorflow/compiler/xla/service/cpu/compiler_functor.h"
     17 
     18 #include <algorithm>
     19 #include <iterator>
     20 #include <memory>
     21 #include <string>
     22 #include <utility>
     23 #include <vector>
     24 
     25 #include "absl/memory/memory.h"
     26 #include "llvm/ADT/StringRef.h"
     27 #include "llvm/Analysis/TargetLibraryInfo.h"
     28 #include "llvm/Analysis/TargetTransformInfo.h"
     29 #include "llvm/IR/LegacyPassManager.h"
     30 #include "llvm/IR/Verifier.h"
     31 #include "llvm/MC/MCContext.h"
     32 #include "llvm/Object/ObjectFile.h"
     33 #include "llvm/Support/SmallVectorMemoryBuffer.h"
     34 #include "llvm/Support/raw_ostream.h"
     35 #include "llvm/Target/TargetMachine.h"
     36 #include "llvm/Transforms/IPO.h"
     37 #include "llvm/Transforms/IPO/AlwaysInliner.h"
     38 #include "llvm/Transforms/IPO/PassManagerBuilder.h"
     39 #include "tensorflow/compiler/xla/service/cpu/cpu_runtime.h"
     40 #include "tensorflow/compiler/xla/service/cpu/llvm_ir_runtime.h"
     41 #include "tensorflow/compiler/xla/service/llvm_ir/llvm_util.h"
     42 #include "tensorflow/compiler/xla/statusor.h"
     43 #include "tensorflow/compiler/xla/types.h"
     44 #include "tensorflow/compiler/xla/util.h"
     45 #include "tensorflow/core/platform/logging.h"
     46 
     47 namespace xla {
     48 namespace cpu {
     49 
     50 /* Create filtered versions of the LLVM Pass Managers to filter out some
     51 of the expensive passes.
     52 Profiling:
     53    learning/brain/google/xla/benchmarks:inception_cpu_benchmark
     54    learning/brain/google/xla/benchmarks:cifarnet
     55 pointed to LICM and IndVarSimplify as the hottest passes.
     56 LICM is known to exhibit O(n^2) time in the number of instructions.
     57 IndVarSimplify is slow due to SCEV. If loops are emitted in canonical form,
     58 this pass is not necessary.
     59 Disabling these as a starting point.
     60 */
     61 // TODO(b/64227304) Creating a custom pass pipeline will replace this.
     62 
     63 namespace {
     64 class FilteredPassManager : public llvm::legacy::PassManager {
     65  public:
     66   explicit FilteredPassManager(bool disable_expensive_passes)
     67       : disable_expensive_passes_(disable_expensive_passes) {}
     68   void add(llvm::Pass* p) override {
     69     llvm::StringRef PassName = p->getPassName();
     70     if (PassName.contains("Warn about non-applied transformations")) {
     71       delete p;
     72       return;
     73     }
     74     if (disable_expensive_passes_) {
     75       if (PassName.contains("Unroll loops")) {
     76         delete p;
     77         return;
     78       }
     79     }
     80     llvm::legacy::PassManager::add(p);
     81   }
     82 
     83  private:
     84   bool disable_expensive_passes_;
     85 };
     86 }  // anonymous namespace
     87 
     88 std::unique_ptr<llvm::MemoryBuffer> CompilerFunctor::operator()(
     89     llvm::Module& module) const {
     90   FilteredPassManager module_passes(disable_expensive_passes_);
     91   llvm::legacy::FunctionPassManager function_passes(&module);
     92 
     93   VLOG(2) << "IR before optimizations";
     94   XLA_VLOG_LINES(2, llvm_ir::DumpModuleToString(module));
     95 
     96   if (pre_optimization_hook_) {
     97     pre_optimization_hook_(module);
     98   }
     99 
    100   // Add the appropriate TargetLibraryInfo and TargetTransformInfo.
    101   AddTargetInfoPasses(&module_passes);
    102 
    103   // Build up optimization pipeline.
    104   if (optimize_for_size_) {
    105     // Optimizing for size turns on -O2 level optimizations.
    106     //
    107     // TODO(b/64153864): Although the code generator supports size_level = 2 to
    108     // turn on more aggressive code size optimizations than size_level = 1, we
    109     // pass size_level = 1 because in many cases a size_level of 2 does
    110     // worse. Investigate why.
    111     AddOptimizationPasses(&module_passes, &function_passes, /*opt_level=*/2,
    112                           /*size_level=*/1);
    113   } else {
    114     AddOptimizationPasses(&module_passes, &function_passes,
    115                           /*opt_level=*/opt_level_, /*size_level=*/0);
    116   }
    117 
    118   // Run optimization passes on module.
    119   function_passes.doInitialization();
    120 
    121   CHECK(!llvm::verifyModule(module, &llvm::dbgs()));
    122 
    123   for (auto func = module.begin(); func != module.end(); ++func) {
    124     function_passes.run(*func);
    125   }
    126   function_passes.doFinalization();
    127   module_passes.run(module);
    128 
    129   CHECK(!llvm::verifyModule(module, &llvm::dbgs()));
    130 
    131   runtime::RewriteIRRuntimeFunctions(&module, enable_fast_math_);
    132 
    133   // Buffer for holding machine code prior to constructing the ObjectFile.
    134   llvm::SmallVector<char, 0> stream_buffer;
    135   llvm::raw_svector_ostream ostream(stream_buffer);
    136 
    137   VLOG(2) << "IR after optimizations";
    138   XLA_VLOG_LINES(2, llvm_ir::DumpModuleToString(module));
    139 
    140   if (post_optimization_hook_) {
    141     post_optimization_hook_(module);
    142   }
    143 
    144   // Generate code.
    145   llvm::MCContext* mc_context;
    146   llvm::legacy::PassManager codegen_passes;
    147   target_machine_->addPassesToEmitMC(codegen_passes, mc_context, ostream);
    148   codegen_passes.run(module);
    149 
    150   std::unique_ptr<llvm::MemoryBuffer> memory_buffer(
    151       new llvm::SmallVectorMemoryBuffer(std::move(stream_buffer)));
    152 
    153   if (post_codegen_hook_) {
    154     llvm::Expected<std::unique_ptr<llvm::object::ObjectFile>> obj_file =
    155         llvm::object::ObjectFile::createObjectFile(*memory_buffer);
    156     if (obj_file) {
    157       post_codegen_hook_(*obj_file.get());
    158     } else {
    159       LOG(WARNING) << "Could convert memory buffer to object file!";
    160     }
    161   }
    162 
    163   return memory_buffer;
    164 }
    165 
    166 static std::vector<llvm::VecDesc> VectorFunctionsForTargetLibraryInfoImpl() {
    167   std::vector<llvm::VecDesc> result = {
    168       {"tanhf", runtime::kTanhV4F32SymbolName, 4},
    169       {"llvm.tanh.f32", runtime::kTanhV4F32SymbolName, 4},
    170 
    171       {"tanhf", runtime::kTanhV8F32SymbolName, 8},
    172       {"llvm.tanh.f32", runtime::kTanhV8F32SymbolName, 8},
    173 
    174       {"expf", runtime::kExpV4F32SymbolName, 4},
    175       {"llvm.exp.f32", runtime::kExpV4F32SymbolName, 4},
    176 
    177       {"expf", runtime::kExpV8F32SymbolName, 8},
    178       {"llvm.exp.f32", runtime::kExpV8F32SymbolName, 8},
    179 
    180       {"logf", runtime::kLogV4F32SymbolName, 4},
    181       {"llvm.log.f32", runtime::kLogV4F32SymbolName, 4},
    182 
    183       {"logf", runtime::kLogV8F32SymbolName, 8},
    184       {"llvm.log.f32", runtime::kLogV8F32SymbolName, 8},
    185   };
    186   return result;
    187 }
    188 
    189 void CompilerFunctor::AddTargetInfoPasses(
    190     llvm::legacy::PassManagerBase* passes) const {
    191   llvm::Triple target_triple(target_machine_->getTargetTriple());
    192   auto target_library_info_impl =
    193       absl::make_unique<llvm::TargetLibraryInfoImpl>(target_triple);
    194   target_library_info_impl->addVectorizableFunctions(
    195       VectorFunctionsForTargetLibraryInfoImpl());
    196   passes->add(
    197       new llvm::TargetLibraryInfoWrapperPass(*target_library_info_impl));
    198   passes->add(createTargetTransformInfoWrapperPass(
    199       target_machine_->getTargetIRAnalysis()));
    200 }
    201 
    202 void CompilerFunctor::AddOptimizationPasses(
    203     llvm::legacy::PassManagerBase* module_passes,
    204     llvm::legacy::FunctionPassManager* function_passes, unsigned opt_level,
    205     unsigned size_level) const {
    206   llvm::PassManagerBuilder builder;
    207   builder.OptLevel = opt_level;
    208   builder.SizeLevel = size_level;
    209 
    210   if (opt_level > 1) {
    211     builder.Inliner = llvm::createFunctionInliningPass();
    212   } else {
    213     // Only inline functions marked with "alwaysinline".
    214     builder.Inliner = llvm::createAlwaysInlinerLegacyPass();
    215   }
    216 
    217   builder.DisableUnitAtATime = false;
    218   builder.DisableUnrollLoops = opt_level == 0;
    219   builder.LoopVectorize = opt_level > 0 && size_level == 0;
    220   builder.SLPVectorize = opt_level > 1 && size_level == 0;
    221 
    222   builder.populateFunctionPassManager(*function_passes);
    223   builder.populateModulePassManager(*module_passes);
    224 }
    225 
    226 }  // namespace cpu
    227 }  // namespace xla
    228