Home | History | Annotate | Download | only in optimizing
      1 /*
      2  * Copyright (C) 2014 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_
     18 #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_
     19 
     20 #include "arch/instruction_set.h"
     21 #include "arch/instruction_set_features.h"
     22 #include "base/bit_field.h"
     23 #include "driver/compiler_options.h"
     24 #include "globals.h"
     25 #include "locations.h"
     26 #include "memory_region.h"
     27 #include "nodes.h"
     28 #include "stack_map_stream.h"
     29 
     30 namespace art {
     31 
     32 // Binary encoding of 2^32 for type double.
     33 static int64_t constexpr k2Pow32EncodingForDouble = INT64_C(0x41F0000000000000);
     34 // Binary encoding of 2^31 for type double.
     35 static int64_t constexpr k2Pow31EncodingForDouble = INT64_C(0x41E0000000000000);
     36 
     37 // Maximum value for a primitive integer.
     38 static int32_t constexpr kPrimIntMax = 0x7fffffff;
     39 // Maximum value for a primitive long.
     40 static int64_t constexpr kPrimLongMax = 0x7fffffffffffffff;
     41 
     42 class Assembler;
     43 class CodeGenerator;
     44 class DexCompilationUnit;
     45 class ParallelMoveResolver;
     46 class SrcMapElem;
     47 template <class Alloc>
     48 class SrcMap;
     49 using DefaultSrcMap = SrcMap<std::allocator<SrcMapElem>>;
     50 
     51 class CodeAllocator {
     52  public:
     53   CodeAllocator() {}
     54   virtual ~CodeAllocator() {}
     55 
     56   virtual uint8_t* Allocate(size_t size) = 0;
     57 
     58  private:
     59   DISALLOW_COPY_AND_ASSIGN(CodeAllocator);
     60 };
     61 
     62 struct PcInfo {
     63   uint32_t dex_pc;
     64   uintptr_t native_pc;
     65 };
     66 
     67 class SlowPathCode : public ArenaObject<kArenaAllocSlowPaths> {
     68  public:
     69   SlowPathCode() {
     70     for (size_t i = 0; i < kMaximumNumberOfExpectedRegisters; ++i) {
     71       saved_core_stack_offsets_[i] = kRegisterNotSaved;
     72       saved_fpu_stack_offsets_[i] = kRegisterNotSaved;
     73     }
     74   }
     75 
     76   virtual ~SlowPathCode() {}
     77 
     78   virtual void EmitNativeCode(CodeGenerator* codegen) = 0;
     79 
     80   void SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations);
     81   void RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations);
     82   void RecordPcInfo(CodeGenerator* codegen, HInstruction* instruction, uint32_t dex_pc);
     83 
     84   bool IsCoreRegisterSaved(int reg) const {
     85     return saved_core_stack_offsets_[reg] != kRegisterNotSaved;
     86   }
     87 
     88   bool IsFpuRegisterSaved(int reg) const {
     89     return saved_fpu_stack_offsets_[reg] != kRegisterNotSaved;
     90   }
     91 
     92   uint32_t GetStackOffsetOfCoreRegister(int reg) const {
     93     return saved_core_stack_offsets_[reg];
     94   }
     95 
     96   uint32_t GetStackOffsetOfFpuRegister(int reg) const {
     97     return saved_fpu_stack_offsets_[reg];
     98   }
     99 
    100  private:
    101   static constexpr size_t kMaximumNumberOfExpectedRegisters = 32;
    102   static constexpr uint32_t kRegisterNotSaved = -1;
    103   uint32_t saved_core_stack_offsets_[kMaximumNumberOfExpectedRegisters];
    104   uint32_t saved_fpu_stack_offsets_[kMaximumNumberOfExpectedRegisters];
    105   DISALLOW_COPY_AND_ASSIGN(SlowPathCode);
    106 };
    107 
    108 class InvokeDexCallingConventionVisitor {
    109  public:
    110   virtual Location GetNextLocation(Primitive::Type type) = 0;
    111 
    112  protected:
    113   InvokeDexCallingConventionVisitor() {}
    114   virtual ~InvokeDexCallingConventionVisitor() {}
    115 
    116   // The current index for core registers.
    117   uint32_t gp_index_ = 0u;
    118   // The current index for floating-point registers.
    119   uint32_t float_index_ = 0u;
    120   // The current stack index.
    121   uint32_t stack_index_ = 0u;
    122 
    123  private:
    124   DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor);
    125 };
    126 
    127 class CodeGenerator {
    128  public:
    129   // Compiles the graph to executable instructions. Returns whether the compilation
    130   // succeeded.
    131   void CompileBaseline(CodeAllocator* allocator, bool is_leaf = false);
    132   void CompileOptimized(CodeAllocator* allocator);
    133   static CodeGenerator* Create(HGraph* graph,
    134                                InstructionSet instruction_set,
    135                                const InstructionSetFeatures& isa_features,
    136                                const CompilerOptions& compiler_options);
    137   virtual ~CodeGenerator() {}
    138 
    139   HGraph* GetGraph() const { return graph_; }
    140 
    141   HBasicBlock* GetNextBlockToEmit() const;
    142   HBasicBlock* FirstNonEmptyBlock(HBasicBlock* block) const;
    143   bool GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const;
    144 
    145   size_t GetStackSlotOfParameter(HParameterValue* parameter) const {
    146     // Note that this follows the current calling convention.
    147     return GetFrameSize()
    148         + InstructionSetPointerSize(GetInstructionSet())  // Art method
    149         + parameter->GetIndex() * kVRegSize;
    150   }
    151 
    152   virtual void Initialize() = 0;
    153   virtual void Finalize(CodeAllocator* allocator);
    154   virtual void GenerateFrameEntry() = 0;
    155   virtual void GenerateFrameExit() = 0;
    156   virtual void Bind(HBasicBlock* block) = 0;
    157   virtual void Move(HInstruction* instruction, Location location, HInstruction* move_for) = 0;
    158   virtual Assembler* GetAssembler() = 0;
    159   virtual size_t GetWordSize() const = 0;
    160   virtual size_t GetFloatingPointSpillSlotSize() const = 0;
    161   virtual uintptr_t GetAddressOf(HBasicBlock* block) const = 0;
    162   void InitializeCodeGeneration(size_t number_of_spill_slots,
    163                                 size_t maximum_number_of_live_core_registers,
    164                                 size_t maximum_number_of_live_fp_registers,
    165                                 size_t number_of_out_slots,
    166                                 const GrowableArray<HBasicBlock*>& block_order);
    167   int32_t GetStackSlot(HLocal* local) const;
    168   Location GetTemporaryLocation(HTemporary* temp) const;
    169 
    170   uint32_t GetFrameSize() const { return frame_size_; }
    171   void SetFrameSize(uint32_t size) { frame_size_ = size; }
    172   uint32_t GetCoreSpillMask() const { return core_spill_mask_; }
    173   uint32_t GetFpuSpillMask() const { return fpu_spill_mask_; }
    174 
    175   size_t GetNumberOfCoreRegisters() const { return number_of_core_registers_; }
    176   size_t GetNumberOfFloatingPointRegisters() const { return number_of_fpu_registers_; }
    177   virtual void SetupBlockedRegisters(bool is_baseline) const = 0;
    178 
    179   virtual void ComputeSpillMask() {
    180     core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_;
    181     DCHECK_NE(core_spill_mask_, 0u) << "At least the return address register must be saved";
    182     fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_;
    183   }
    184 
    185   static uint32_t ComputeRegisterMask(const int* registers, size_t length) {
    186     uint32_t mask = 0;
    187     for (size_t i = 0, e = length; i < e; ++i) {
    188       mask |= (1 << registers[i]);
    189     }
    190     return mask;
    191   }
    192 
    193   virtual void DumpCoreRegister(std::ostream& stream, int reg) const = 0;
    194   virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const = 0;
    195   virtual InstructionSet GetInstructionSet() const = 0;
    196 
    197   const CompilerOptions& GetCompilerOptions() const { return compiler_options_; }
    198 
    199   // Saves the register in the stack. Returns the size taken on stack.
    200   virtual size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) = 0;
    201   // Restores the register from the stack. Returns the size taken on stack.
    202   virtual size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) = 0;
    203 
    204   virtual size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0;
    205   virtual size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0;
    206 
    207   virtual bool NeedsTwoRegisters(Primitive::Type type) const = 0;
    208   // Returns whether we should split long moves in parallel moves.
    209   virtual bool ShouldSplitLongMoves() const { return false; }
    210 
    211   bool IsCoreCalleeSaveRegister(int reg) const {
    212     return (core_callee_save_mask_ & (1 << reg)) != 0;
    213   }
    214 
    215   bool IsFloatingPointCalleeSaveRegister(int reg) const {
    216     return (fpu_callee_save_mask_ & (1 << reg)) != 0;
    217   }
    218 
    219   void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc, SlowPathCode* slow_path = nullptr);
    220   bool CanMoveNullCheckToUser(HNullCheck* null_check);
    221   void MaybeRecordImplicitNullCheck(HInstruction* instruction);
    222 
    223   void AddSlowPath(SlowPathCode* slow_path) {
    224     slow_paths_.Add(slow_path);
    225   }
    226 
    227   void BuildSourceMap(DefaultSrcMap* src_map) const;
    228   void BuildMappingTable(std::vector<uint8_t>* vector) const;
    229   void BuildVMapTable(std::vector<uint8_t>* vector) const;
    230   void BuildNativeGCMap(
    231       std::vector<uint8_t>* vector, const DexCompilationUnit& dex_compilation_unit) const;
    232   void BuildStackMaps(std::vector<uint8_t>* vector);
    233 
    234   bool IsBaseline() const {
    235     return is_baseline_;
    236   }
    237 
    238   bool IsLeafMethod() const {
    239     return is_leaf_;
    240   }
    241 
    242   void MarkNotLeaf() {
    243     is_leaf_ = false;
    244     requires_current_method_ = true;
    245   }
    246 
    247   void SetRequiresCurrentMethod() {
    248     requires_current_method_ = true;
    249   }
    250 
    251   bool RequiresCurrentMethod() const {
    252     return requires_current_method_;
    253   }
    254 
    255   // Clears the spill slots taken by loop phis in the `LocationSummary` of the
    256   // suspend check. This is called when the code generator generates code
    257   // for the suspend check at the back edge (instead of where the suspend check
    258   // is, which is the loop entry). At this point, the spill slots for the phis
    259   // have not been written to.
    260   void ClearSpillSlotsFromLoopPhisInStackMap(HSuspendCheck* suspend_check) const;
    261 
    262   bool* GetBlockedCoreRegisters() const { return blocked_core_registers_; }
    263   bool* GetBlockedFloatingPointRegisters() const { return blocked_fpu_registers_; }
    264 
    265   // Helper that returns the pointer offset of an index in an object array.
    266   // Note: this method assumes we always have the same pointer size, regardless
    267   // of the architecture.
    268   static size_t GetCacheOffset(uint32_t index);
    269   // Pointer variant for ArtMethod and ArtField arrays.
    270   size_t GetCachePointerOffset(uint32_t index);
    271 
    272   void EmitParallelMoves(Location from1,
    273                          Location to1,
    274                          Primitive::Type type1,
    275                          Location from2,
    276                          Location to2,
    277                          Primitive::Type type2);
    278 
    279   static bool StoreNeedsWriteBarrier(Primitive::Type type, HInstruction* value) {
    280     // Check that null value is not represented as an integer constant.
    281     DCHECK(type != Primitive::kPrimNot || !value->IsIntConstant());
    282     return type == Primitive::kPrimNot && !value->IsNullConstant();
    283   }
    284 
    285   void AddAllocatedRegister(Location location) {
    286     allocated_registers_.Add(location);
    287   }
    288 
    289   void AllocateLocations(HInstruction* instruction);
    290 
    291   // Tells whether the stack frame of the compiled method is
    292   // considered "empty", that is either actually having a size of zero,
    293   // or just containing the saved return address register.
    294   bool HasEmptyFrame() const {
    295     return GetFrameSize() == (CallPushesPC() ? GetWordSize() : 0);
    296   }
    297 
    298   static int32_t GetInt32ValueOf(HConstant* constant) {
    299     if (constant->IsIntConstant()) {
    300       return constant->AsIntConstant()->GetValue();
    301     } else if (constant->IsNullConstant()) {
    302       return 0;
    303     } else {
    304       DCHECK(constant->IsFloatConstant());
    305       return bit_cast<int32_t, float>(constant->AsFloatConstant()->GetValue());
    306     }
    307   }
    308 
    309   static int64_t GetInt64ValueOf(HConstant* constant) {
    310     if (constant->IsIntConstant()) {
    311       return constant->AsIntConstant()->GetValue();
    312     } else if (constant->IsNullConstant()) {
    313       return 0;
    314     } else if (constant->IsFloatConstant()) {
    315       return bit_cast<int32_t, float>(constant->AsFloatConstant()->GetValue());
    316     } else if (constant->IsLongConstant()) {
    317       return constant->AsLongConstant()->GetValue();
    318     } else {
    319       DCHECK(constant->IsDoubleConstant());
    320       return bit_cast<int64_t, double>(constant->AsDoubleConstant()->GetValue());
    321     }
    322   }
    323 
    324   size_t GetFirstRegisterSlotInSlowPath() const {
    325     return first_register_slot_in_slow_path_;
    326   }
    327 
    328   uint32_t FrameEntrySpillSize() const {
    329     return GetFpuSpillSize() + GetCoreSpillSize();
    330   }
    331 
    332   virtual ParallelMoveResolver* GetMoveResolver() = 0;
    333 
    334  protected:
    335   CodeGenerator(HGraph* graph,
    336                 size_t number_of_core_registers,
    337                 size_t number_of_fpu_registers,
    338                 size_t number_of_register_pairs,
    339                 uint32_t core_callee_save_mask,
    340                 uint32_t fpu_callee_save_mask,
    341                 const CompilerOptions& compiler_options)
    342       : frame_size_(0),
    343         core_spill_mask_(0),
    344         fpu_spill_mask_(0),
    345         first_register_slot_in_slow_path_(0),
    346         blocked_core_registers_(graph->GetArena()->AllocArray<bool>(number_of_core_registers)),
    347         blocked_fpu_registers_(graph->GetArena()->AllocArray<bool>(number_of_fpu_registers)),
    348         blocked_register_pairs_(graph->GetArena()->AllocArray<bool>(number_of_register_pairs)),
    349         number_of_core_registers_(number_of_core_registers),
    350         number_of_fpu_registers_(number_of_fpu_registers),
    351         number_of_register_pairs_(number_of_register_pairs),
    352         core_callee_save_mask_(core_callee_save_mask),
    353         fpu_callee_save_mask_(fpu_callee_save_mask),
    354         is_baseline_(false),
    355         graph_(graph),
    356         compiler_options_(compiler_options),
    357         pc_infos_(graph->GetArena(), 32),
    358         slow_paths_(graph->GetArena(), 8),
    359         block_order_(nullptr),
    360         current_block_index_(0),
    361         is_leaf_(true),
    362         requires_current_method_(false),
    363         stack_map_stream_(graph->GetArena()) {}
    364 
    365   // Register allocation logic.
    366   void AllocateRegistersLocally(HInstruction* instruction) const;
    367 
    368   // Backend specific implementation for allocating a register.
    369   virtual Location AllocateFreeRegister(Primitive::Type type) const = 0;
    370 
    371   static size_t FindFreeEntry(bool* array, size_t length);
    372   static size_t FindTwoFreeConsecutiveAlignedEntries(bool* array, size_t length);
    373 
    374   virtual Location GetStackLocation(HLoadLocal* load) const = 0;
    375 
    376   virtual HGraphVisitor* GetLocationBuilder() = 0;
    377   virtual HGraphVisitor* GetInstructionVisitor() = 0;
    378 
    379   // Returns the location of the first spilled entry for floating point registers,
    380   // relative to the stack pointer.
    381   uint32_t GetFpuSpillStart() const {
    382     return GetFrameSize() - FrameEntrySpillSize();
    383   }
    384 
    385   uint32_t GetFpuSpillSize() const {
    386     return POPCOUNT(fpu_spill_mask_) * GetFloatingPointSpillSlotSize();
    387   }
    388 
    389   uint32_t GetCoreSpillSize() const {
    390     return POPCOUNT(core_spill_mask_) * GetWordSize();
    391   }
    392 
    393   bool HasAllocatedCalleeSaveRegisters() const {
    394     // We check the core registers against 1 because it always comprises the return PC.
    395     return (POPCOUNT(allocated_registers_.GetCoreRegisters() & core_callee_save_mask_) != 1)
    396       || (POPCOUNT(allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_) != 0);
    397   }
    398 
    399   bool CallPushesPC() const {
    400     InstructionSet instruction_set = GetInstructionSet();
    401     return instruction_set == kX86 || instruction_set == kX86_64;
    402   }
    403 
    404   // Arm64 has its own type for a label, so we need to templatize this method
    405   // to share the logic.
    406   template <typename T>
    407   T* CommonGetLabelOf(T* raw_pointer_to_labels_array, HBasicBlock* block) const {
    408     block = FirstNonEmptyBlock(block);
    409     return raw_pointer_to_labels_array + block->GetBlockId();
    410   }
    411 
    412   // Frame size required for this method.
    413   uint32_t frame_size_;
    414   uint32_t core_spill_mask_;
    415   uint32_t fpu_spill_mask_;
    416   uint32_t first_register_slot_in_slow_path_;
    417 
    418   // Registers that were allocated during linear scan.
    419   RegisterSet allocated_registers_;
    420 
    421   // Arrays used when doing register allocation to know which
    422   // registers we can allocate. `SetupBlockedRegisters` updates the
    423   // arrays.
    424   bool* const blocked_core_registers_;
    425   bool* const blocked_fpu_registers_;
    426   bool* const blocked_register_pairs_;
    427   size_t number_of_core_registers_;
    428   size_t number_of_fpu_registers_;
    429   size_t number_of_register_pairs_;
    430   const uint32_t core_callee_save_mask_;
    431   const uint32_t fpu_callee_save_mask_;
    432 
    433   // Whether we are using baseline.
    434   bool is_baseline_;
    435 
    436  private:
    437   void InitLocationsBaseline(HInstruction* instruction);
    438   size_t GetStackOffsetOfSavedRegister(size_t index);
    439   void CompileInternal(CodeAllocator* allocator, bool is_baseline);
    440   void BlockIfInRegister(Location location, bool is_out = false) const;
    441 
    442   HGraph* const graph_;
    443   const CompilerOptions& compiler_options_;
    444 
    445   GrowableArray<PcInfo> pc_infos_;
    446   GrowableArray<SlowPathCode*> slow_paths_;
    447 
    448   // The order to use for code generation.
    449   const GrowableArray<HBasicBlock*>* block_order_;
    450 
    451   // The current block index in `block_order_` of the block
    452   // we are generating code for.
    453   size_t current_block_index_;
    454 
    455   // Whether the method is a leaf method.
    456   bool is_leaf_;
    457 
    458   // Whether an instruction in the graph accesses the current method.
    459   bool requires_current_method_;
    460 
    461   StackMapStream stack_map_stream_;
    462 
    463   friend class OptimizingCFITest;
    464 
    465   DISALLOW_COPY_AND_ASSIGN(CodeGenerator);
    466 };
    467 
    468 template <typename C, typename F>
    469 class CallingConvention {
    470  public:
    471   CallingConvention(const C* registers,
    472                     size_t number_of_registers,
    473                     const F* fpu_registers,
    474                     size_t number_of_fpu_registers,
    475                     size_t pointer_size)
    476       : registers_(registers),
    477         number_of_registers_(number_of_registers),
    478         fpu_registers_(fpu_registers),
    479         number_of_fpu_registers_(number_of_fpu_registers),
    480         pointer_size_(pointer_size) {}
    481 
    482   size_t GetNumberOfRegisters() const { return number_of_registers_; }
    483   size_t GetNumberOfFpuRegisters() const { return number_of_fpu_registers_; }
    484 
    485   C GetRegisterAt(size_t index) const {
    486     DCHECK_LT(index, number_of_registers_);
    487     return registers_[index];
    488   }
    489 
    490   F GetFpuRegisterAt(size_t index) const {
    491     DCHECK_LT(index, number_of_fpu_registers_);
    492     return fpu_registers_[index];
    493   }
    494 
    495   size_t GetStackOffsetOf(size_t index) const {
    496     // We still reserve the space for parameters passed by registers.
    497     // Add space for the method pointer.
    498     return pointer_size_ + index * kVRegSize;
    499   }
    500 
    501  private:
    502   const C* registers_;
    503   const size_t number_of_registers_;
    504   const F* fpu_registers_;
    505   const size_t number_of_fpu_registers_;
    506   const size_t pointer_size_;
    507 
    508   DISALLOW_COPY_AND_ASSIGN(CallingConvention);
    509 };
    510 
    511 }  // namespace art
    512 
    513 #endif  // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_
    514