Home | History | Annotate | Download | only in cpu
      1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_CPU_TARGET_MACHINE_FEATURES_H_
     17 #define TENSORFLOW_COMPILER_XLA_SERVICE_CPU_TARGET_MACHINE_FEATURES_H_
     18 
     19 #include "llvm/Analysis/TargetTransformInfo.h"
     20 #include "llvm/Target/TargetMachine.h"
     21 #include "tensorflow/compiler/xla/primitive_util.h"
     22 #include "tensorflow/core/lib/gtl/flatmap.h"
     23 
     24 namespace xla {
     25 namespace cpu {
     26 
     27 // Wraps an llvm::TargetMachine and parses out some information that feeds into
     28 // LLVM IR code generation decisions.
     29 class TargetMachineFeatures {
     30  public:
     31   static constexpr int kX86AvxVectorByteSize = 32;
     32 
     33   TargetMachineFeatures(llvm::TargetMachine* target_machine)
     34       : target_machine_(target_machine) {}
     35 
     36   // Return the vectorization factor, which is the number of bytes of data
     37   // explicitly vectorized routines will try to process at once.
     38   int vectorization_factor_in_bytes() const {
     39     // Ideally this should be a function of the cache line size (which we can
     40     // get from llvm::TargetTransformInfo::getCacheLineSize) of the target
     41     // machine.  Guess a value of 128 bytes for now.
     42     return 128;
     43   }
     44 
     45   // Return the size of the largest vector size in bytes.  We need to pass in
     46   // "function" since llvm functions can contain annotations for specializing
     47   // them to specific micro-architectures (though currently XLA does not use
     48   // this functionality).
     49   int vector_register_byte_size(const llvm::Function& function) const {
     50     llvm::TargetTransformInfo* tti = GetTargetTransformInfoFor(function);
     51     return tti->getRegisterBitWidth(/*Vector=*/true) / 8;
     52   }
     53 
     54   // Return the number of elements of type `type` that can fit into the largest
     55   // vector register available.  We need to pass in "function" since llvm
     56   // functions can contain annotations for specializing them to specific
     57   // micro-architectures (though currently XLA does not use this functionality).
     58   int vector_register_num_elements(const llvm::Function& function,
     59                                    PrimitiveType type) const {
     60     return vector_register_byte_size(function) /
     61            (primitive_util::BitWidth(type) / 8);
     62   }
     63 
     64  private:
     65   llvm::TargetTransformInfo* GetTargetTransformInfoFor(
     66       const llvm::Function& function) const;
     67 
     68   // This cache saves us from having to create a llvm::TargetTransformInfo for
     69   // every call to GetTargetTransformInfoFor (creating a TargetTransformInfo
     70   // costs one heap allocation on X86).
     71   //
     72   // This is mutated from within `GetTargetTransformInfoFor` which is
     73   // semantically a getter (and thus `const`); and is therefore declared
     74   // mutable.  Making this mutable is okay because it has cache semantics.
     75   mutable tensorflow::gtl::FlatMap<const llvm::Function*,
     76                                    llvm::TargetTransformInfo>
     77       target_transform_info_cache_;
     78   llvm::TargetMachine* target_machine_;
     79 };
     80 
     81 }  // namespace cpu
     82 }  // namespace xla
     83 
     84 #endif  // TENSORFLOW_COMPILER_XLA_SERVICE_CPU_TARGET_MACHINE_FEATURES_H_
     85