Home | History | Annotate | Download | only in optimizing
      1 /*
      2  * Copyright (C) 2014 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_
     18 #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_
     19 
     20 #include "arch/instruction_set.h"
     21 #include "arch/instruction_set_features.h"
     22 #include "base/arena_containers.h"
     23 #include "base/arena_object.h"
     24 #include "base/bit_field.h"
     25 #include "base/bit_utils.h"
     26 #include "base/enums.h"
     27 #include "globals.h"
     28 #include "graph_visualizer.h"
     29 #include "locations.h"
     30 #include "memory_region.h"
     31 #include "nodes.h"
     32 #include "optimizing_compiler_stats.h"
     33 #include "read_barrier_option.h"
     34 #include "stack_map_stream.h"
     35 #include "string_reference.h"
     36 #include "utils/label.h"
     37 #include "utils/type_reference.h"
     38 
     39 namespace art {
     40 
     41 // Binary encoding of 2^32 for type double.
     42 static int64_t constexpr k2Pow32EncodingForDouble = INT64_C(0x41F0000000000000);
     43 // Binary encoding of 2^31 for type double.
     44 static int64_t constexpr k2Pow31EncodingForDouble = INT64_C(0x41E0000000000000);
     45 
     46 // Minimum value for a primitive integer.
     47 static int32_t constexpr kPrimIntMin = 0x80000000;
     48 // Minimum value for a primitive long.
     49 static int64_t constexpr kPrimLongMin = INT64_C(0x8000000000000000);
     50 
     51 // Maximum value for a primitive integer.
     52 static int32_t constexpr kPrimIntMax = 0x7fffffff;
     53 // Maximum value for a primitive long.
     54 static int64_t constexpr kPrimLongMax = INT64_C(0x7fffffffffffffff);
     55 
     56 static constexpr ReadBarrierOption kCompilerReadBarrierOption =
     57     kEmitCompilerReadBarrier ? kWithReadBarrier : kWithoutReadBarrier;
     58 
     59 class Assembler;
     60 class CodeGenerator;
     61 class CompilerDriver;
     62 class CompilerOptions;
     63 class LinkerPatch;
     64 class ParallelMoveResolver;
     65 
     66 class CodeAllocator {
     67  public:
     68   CodeAllocator() {}
     69   virtual ~CodeAllocator() {}
     70 
     71   virtual uint8_t* Allocate(size_t size) = 0;
     72 
     73  private:
     74   DISALLOW_COPY_AND_ASSIGN(CodeAllocator);
     75 };
     76 
     77 class SlowPathCode : public DeletableArenaObject<kArenaAllocSlowPaths> {
     78  public:
     79   explicit SlowPathCode(HInstruction* instruction) : instruction_(instruction) {
     80     for (size_t i = 0; i < kMaximumNumberOfExpectedRegisters; ++i) {
     81       saved_core_stack_offsets_[i] = kRegisterNotSaved;
     82       saved_fpu_stack_offsets_[i] = kRegisterNotSaved;
     83     }
     84   }
     85 
     86   virtual ~SlowPathCode() {}
     87 
     88   virtual void EmitNativeCode(CodeGenerator* codegen) = 0;
     89 
     90   // Save live core and floating-point caller-save registers and
     91   // update the stack mask in `locations` for registers holding object
     92   // references.
     93   virtual void SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations);
     94   // Restore live core and floating-point caller-save registers.
     95   virtual void RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations);
     96 
     97   bool IsCoreRegisterSaved(int reg) const {
     98     return saved_core_stack_offsets_[reg] != kRegisterNotSaved;
     99   }
    100 
    101   bool IsFpuRegisterSaved(int reg) const {
    102     return saved_fpu_stack_offsets_[reg] != kRegisterNotSaved;
    103   }
    104 
    105   uint32_t GetStackOffsetOfCoreRegister(int reg) const {
    106     return saved_core_stack_offsets_[reg];
    107   }
    108 
    109   uint32_t GetStackOffsetOfFpuRegister(int reg) const {
    110     return saved_fpu_stack_offsets_[reg];
    111   }
    112 
    113   virtual bool IsFatal() const { return false; }
    114 
    115   virtual const char* GetDescription() const = 0;
    116 
    117   Label* GetEntryLabel() { return &entry_label_; }
    118   Label* GetExitLabel() { return &exit_label_; }
    119 
    120   HInstruction* GetInstruction() const {
    121     return instruction_;
    122   }
    123 
    124   uint32_t GetDexPc() const {
    125     return instruction_ != nullptr ? instruction_->GetDexPc() : kNoDexPc;
    126   }
    127 
    128  protected:
    129   static constexpr size_t kMaximumNumberOfExpectedRegisters = 32;
    130   static constexpr uint32_t kRegisterNotSaved = -1;
    131   // The instruction where this slow path is happening.
    132   HInstruction* instruction_;
    133   uint32_t saved_core_stack_offsets_[kMaximumNumberOfExpectedRegisters];
    134   uint32_t saved_fpu_stack_offsets_[kMaximumNumberOfExpectedRegisters];
    135 
    136  private:
    137   Label entry_label_;
    138   Label exit_label_;
    139 
    140   DISALLOW_COPY_AND_ASSIGN(SlowPathCode);
    141 };
    142 
    143 class InvokeDexCallingConventionVisitor {
    144  public:
    145   virtual Location GetNextLocation(Primitive::Type type) = 0;
    146   virtual Location GetReturnLocation(Primitive::Type type) const = 0;
    147   virtual Location GetMethodLocation() const = 0;
    148 
    149  protected:
    150   InvokeDexCallingConventionVisitor() {}
    151   virtual ~InvokeDexCallingConventionVisitor() {}
    152 
    153   // The current index for core registers.
    154   uint32_t gp_index_ = 0u;
    155   // The current index for floating-point registers.
    156   uint32_t float_index_ = 0u;
    157   // The current stack index.
    158   uint32_t stack_index_ = 0u;
    159 
    160  private:
    161   DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor);
    162 };
    163 
    164 class FieldAccessCallingConvention {
    165  public:
    166   virtual Location GetObjectLocation() const = 0;
    167   virtual Location GetFieldIndexLocation() const = 0;
    168   virtual Location GetReturnLocation(Primitive::Type type) const = 0;
    169   virtual Location GetSetValueLocation(Primitive::Type type, bool is_instance) const = 0;
    170   virtual Location GetFpuLocation(Primitive::Type type) const = 0;
    171   virtual ~FieldAccessCallingConvention() {}
    172 
    173  protected:
    174   FieldAccessCallingConvention() {}
    175 
    176  private:
    177   DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConvention);
    178 };
    179 
    180 class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
    181  public:
    182   // Compiles the graph to executable instructions.
    183   void Compile(CodeAllocator* allocator);
    184   static std::unique_ptr<CodeGenerator> Create(HGraph* graph,
    185                                                InstructionSet instruction_set,
    186                                                const InstructionSetFeatures& isa_features,
    187                                                const CompilerOptions& compiler_options,
    188                                                OptimizingCompilerStats* stats = nullptr);
    189   virtual ~CodeGenerator() {}
    190 
    191   // Get the graph. This is the outermost graph, never the graph of a method being inlined.
    192   HGraph* GetGraph() const { return graph_; }
    193 
    194   HBasicBlock* GetNextBlockToEmit() const;
    195   HBasicBlock* FirstNonEmptyBlock(HBasicBlock* block) const;
    196   bool GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const;
    197 
    198   size_t GetStackSlotOfParameter(HParameterValue* parameter) const {
    199     // Note that this follows the current calling convention.
    200     return GetFrameSize()
    201         + static_cast<size_t>(InstructionSetPointerSize(GetInstructionSet()))  // Art method
    202         + parameter->GetIndex() * kVRegSize;
    203   }
    204 
    205   virtual void Initialize() = 0;
    206   virtual void Finalize(CodeAllocator* allocator);
    207   virtual void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches);
    208   virtual void GenerateFrameEntry() = 0;
    209   virtual void GenerateFrameExit() = 0;
    210   virtual void Bind(HBasicBlock* block) = 0;
    211   virtual void MoveConstant(Location destination, int32_t value) = 0;
    212   virtual void MoveLocation(Location dst, Location src, Primitive::Type dst_type) = 0;
    213   virtual void AddLocationAsTemp(Location location, LocationSummary* locations) = 0;
    214 
    215   virtual Assembler* GetAssembler() = 0;
    216   virtual const Assembler& GetAssembler() const = 0;
    217   virtual size_t GetWordSize() const = 0;
    218   virtual size_t GetFloatingPointSpillSlotSize() const = 0;
    219   virtual uintptr_t GetAddressOf(HBasicBlock* block) = 0;
    220   void InitializeCodeGeneration(size_t number_of_spill_slots,
    221                                 size_t maximum_safepoint_spill_size,
    222                                 size_t number_of_out_slots,
    223                                 const ArenaVector<HBasicBlock*>& block_order);
    224   // Backends can override this as necessary. For most, no special alignment is required.
    225   virtual uint32_t GetPreferredSlotsAlignment() const { return 1; }
    226 
    227   uint32_t GetFrameSize() const { return frame_size_; }
    228   void SetFrameSize(uint32_t size) { frame_size_ = size; }
    229   uint32_t GetCoreSpillMask() const { return core_spill_mask_; }
    230   uint32_t GetFpuSpillMask() const { return fpu_spill_mask_; }
    231 
    232   size_t GetNumberOfCoreRegisters() const { return number_of_core_registers_; }
    233   size_t GetNumberOfFloatingPointRegisters() const { return number_of_fpu_registers_; }
    234   virtual void SetupBlockedRegisters() const = 0;
    235 
    236   virtual void ComputeSpillMask() {
    237     core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_;
    238     DCHECK_NE(core_spill_mask_, 0u) << "At least the return address register must be saved";
    239     fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_;
    240   }
    241 
    242   static uint32_t ComputeRegisterMask(const int* registers, size_t length) {
    243     uint32_t mask = 0;
    244     for (size_t i = 0, e = length; i < e; ++i) {
    245       mask |= (1 << registers[i]);
    246     }
    247     return mask;
    248   }
    249 
    250   virtual void DumpCoreRegister(std::ostream& stream, int reg) const = 0;
    251   virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const = 0;
    252   virtual InstructionSet GetInstructionSet() const = 0;
    253 
    254   const CompilerOptions& GetCompilerOptions() const { return compiler_options_; }
    255 
    256   void MaybeRecordStat(MethodCompilationStat compilation_stat, size_t count = 1) const;
    257 
    258   // Saves the register in the stack. Returns the size taken on stack.
    259   virtual size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) = 0;
    260   // Restores the register from the stack. Returns the size taken on stack.
    261   virtual size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) = 0;
    262 
    263   virtual size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0;
    264   virtual size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0;
    265 
    266   virtual bool NeedsTwoRegisters(Primitive::Type type) const = 0;
    267   // Returns whether we should split long moves in parallel moves.
    268   virtual bool ShouldSplitLongMoves() const { return false; }
    269 
    270   size_t GetNumberOfCoreCalleeSaveRegisters() const {
    271     return POPCOUNT(core_callee_save_mask_);
    272   }
    273 
    274   size_t GetNumberOfCoreCallerSaveRegisters() const {
    275     DCHECK_GE(GetNumberOfCoreRegisters(), GetNumberOfCoreCalleeSaveRegisters());
    276     return GetNumberOfCoreRegisters() - GetNumberOfCoreCalleeSaveRegisters();
    277   }
    278 
    279   bool IsCoreCalleeSaveRegister(int reg) const {
    280     return (core_callee_save_mask_ & (1 << reg)) != 0;
    281   }
    282 
    283   bool IsFloatingPointCalleeSaveRegister(int reg) const {
    284     return (fpu_callee_save_mask_ & (1 << reg)) != 0;
    285   }
    286 
    287   uint32_t GetSlowPathSpills(LocationSummary* locations, bool core_registers) const {
    288     DCHECK(locations->OnlyCallsOnSlowPath() ||
    289            (locations->Intrinsified() && locations->CallsOnMainAndSlowPath() &&
    290                !locations->HasCustomSlowPathCallingConvention()));
    291     uint32_t live_registers = core_registers
    292         ? locations->GetLiveRegisters()->GetCoreRegisters()
    293         : locations->GetLiveRegisters()->GetFloatingPointRegisters();
    294     if (locations->HasCustomSlowPathCallingConvention()) {
    295       // Save only the live registers that the custom calling convention wants us to save.
    296       uint32_t caller_saves = core_registers
    297           ? locations->GetCustomSlowPathCallerSaves().GetCoreRegisters()
    298           : locations->GetCustomSlowPathCallerSaves().GetFloatingPointRegisters();
    299       return live_registers & caller_saves;
    300     } else {
    301       // Default ABI, we need to spill non-callee-save live registers.
    302       uint32_t callee_saves = core_registers ? core_callee_save_mask_ : fpu_callee_save_mask_;
    303       return live_registers & ~callee_saves;
    304     }
    305   }
    306 
    307   size_t GetNumberOfSlowPathSpills(LocationSummary* locations, bool core_registers) const {
    308     return POPCOUNT(GetSlowPathSpills(locations, core_registers));
    309   }
    310 
    311   size_t GetStackOffsetOfShouldDeoptimizeFlag() const {
    312     DCHECK(GetGraph()->HasShouldDeoptimizeFlag());
    313     DCHECK_GE(GetFrameSize(), FrameEntrySpillSize() + kShouldDeoptimizeFlagSize);
    314     return GetFrameSize() - FrameEntrySpillSize() - kShouldDeoptimizeFlagSize;
    315   }
    316 
    317   // Record native to dex mapping for a suspend point.  Required by runtime.
    318   void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc, SlowPathCode* slow_path = nullptr);
    319   // Check whether we have already recorded mapping at this PC.
    320   bool HasStackMapAtCurrentPc();
    321   // Record extra stack maps if we support native debugging.
    322   void MaybeRecordNativeDebugInfo(HInstruction* instruction,
    323                                   uint32_t dex_pc,
    324                                   SlowPathCode* slow_path = nullptr);
    325 
    326   bool CanMoveNullCheckToUser(HNullCheck* null_check);
    327   void MaybeRecordImplicitNullCheck(HInstruction* instruction);
    328   LocationSummary* CreateThrowingSlowPathLocations(
    329       HInstruction* instruction, RegisterSet caller_saves = RegisterSet::Empty());
    330   void GenerateNullCheck(HNullCheck* null_check);
    331   virtual void GenerateImplicitNullCheck(HNullCheck* null_check) = 0;
    332   virtual void GenerateExplicitNullCheck(HNullCheck* null_check) = 0;
    333 
    334   // Records a stack map which the runtime might use to set catch phi values
    335   // during exception delivery.
    336   // TODO: Replace with a catch-entering instruction that records the environment.
    337   void RecordCatchBlockInfo();
    338 
    339   // TODO: Avoid creating the `std::unique_ptr` here.
    340   void AddSlowPath(SlowPathCode* slow_path) {
    341     slow_paths_.push_back(std::unique_ptr<SlowPathCode>(slow_path));
    342   }
    343 
    344   void BuildStackMaps(MemoryRegion stack_map_region,
    345                       MemoryRegion method_info_region,
    346                       const DexFile::CodeItem& code_item);
    347   void ComputeStackMapAndMethodInfoSize(size_t* stack_map_size, size_t* method_info_size);
    348   size_t GetNumberOfJitRoots() const {
    349     return jit_string_roots_.size() + jit_class_roots_.size();
    350   }
    351 
    352   // Fills the `literals` array with literals collected during code generation.
    353   // Also emits literal patches.
    354   void EmitJitRoots(uint8_t* code,
    355                     Handle<mirror::ObjectArray<mirror::Object>> roots,
    356                     const uint8_t* roots_data)
    357       REQUIRES_SHARED(Locks::mutator_lock_);
    358 
    359   bool IsLeafMethod() const {
    360     return is_leaf_;
    361   }
    362 
    363   void MarkNotLeaf() {
    364     is_leaf_ = false;
    365     requires_current_method_ = true;
    366   }
    367 
    368   void SetRequiresCurrentMethod() {
    369     requires_current_method_ = true;
    370   }
    371 
    372   bool RequiresCurrentMethod() const {
    373     return requires_current_method_;
    374   }
    375 
    376   // Clears the spill slots taken by loop phis in the `LocationSummary` of the
    377   // suspend check. This is called when the code generator generates code
    378   // for the suspend check at the back edge (instead of where the suspend check
    379   // is, which is the loop entry). At this point, the spill slots for the phis
    380   // have not been written to.
    381   void ClearSpillSlotsFromLoopPhisInStackMap(HSuspendCheck* suspend_check) const;
    382 
    383   bool* GetBlockedCoreRegisters() const { return blocked_core_registers_; }
    384   bool* GetBlockedFloatingPointRegisters() const { return blocked_fpu_registers_; }
    385 
    386   bool IsBlockedCoreRegister(size_t i) { return blocked_core_registers_[i]; }
    387   bool IsBlockedFloatingPointRegister(size_t i) { return blocked_fpu_registers_[i]; }
    388 
    389   // Helper that returns the pointer offset of an index in an object array.
    390   // Note: this method assumes we always have the same pointer size, regardless
    391   // of the architecture.
    392   static size_t GetCacheOffset(uint32_t index);
    393   // Pointer variant for ArtMethod and ArtField arrays.
    394   size_t GetCachePointerOffset(uint32_t index);
    395 
    396   // Helper that returns the offset of the array's length field.
    397   // Note: Besides the normal arrays, we also use the HArrayLength for
    398   // accessing the String's `count` field in String intrinsics.
    399   static uint32_t GetArrayLengthOffset(HArrayLength* array_length);
    400 
    401   // Helper that returns the offset of the array's data.
    402   // Note: Besides the normal arrays, we also use the HArrayGet for
    403   // accessing the String's `value` field in String intrinsics.
    404   static uint32_t GetArrayDataOffset(HArrayGet* array_get);
    405 
    406   // Return the entry point offset for ReadBarrierMarkRegX, where X is `reg`.
    407   template <PointerSize pointer_size>
    408   static int32_t GetReadBarrierMarkEntryPointsOffset(size_t reg) {
    409     // The entry point list defines 30 ReadBarrierMarkRegX entry points.
    410     DCHECK_LT(reg, 30u);
    411     // The ReadBarrierMarkRegX entry points are ordered by increasing
    412     // register number in Thread::tls_Ptr_.quick_entrypoints.
    413     return QUICK_ENTRYPOINT_OFFSET(pointer_size, pReadBarrierMarkReg00).Int32Value()
    414         + static_cast<size_t>(pointer_size) * reg;
    415   }
    416 
    417   void EmitParallelMoves(Location from1,
    418                          Location to1,
    419                          Primitive::Type type1,
    420                          Location from2,
    421                          Location to2,
    422                          Primitive::Type type2);
    423 
    424   static bool StoreNeedsWriteBarrier(Primitive::Type type, HInstruction* value) {
    425     // Check that null value is not represented as an integer constant.
    426     DCHECK(type != Primitive::kPrimNot || !value->IsIntConstant());
    427     return type == Primitive::kPrimNot && !value->IsNullConstant();
    428   }
    429 
    430 
    431   // Performs checks pertaining to an InvokeRuntime call.
    432   void ValidateInvokeRuntime(QuickEntrypointEnum entrypoint,
    433                              HInstruction* instruction,
    434                              SlowPathCode* slow_path);
    435 
    436   // Performs checks pertaining to an InvokeRuntimeWithoutRecordingPcInfo call.
    437   static void ValidateInvokeRuntimeWithoutRecordingPcInfo(HInstruction* instruction,
    438                                                           SlowPathCode* slow_path);
    439 
    440   void AddAllocatedRegister(Location location) {
    441     allocated_registers_.Add(location);
    442   }
    443 
    444   bool HasAllocatedRegister(bool is_core, int reg) const {
    445     return is_core
    446         ? allocated_registers_.ContainsCoreRegister(reg)
    447         : allocated_registers_.ContainsFloatingPointRegister(reg);
    448   }
    449 
    450   void AllocateLocations(HInstruction* instruction);
    451 
    452   // Tells whether the stack frame of the compiled method is
    453   // considered "empty", that is either actually having a size of zero,
    454   // or just containing the saved return address register.
    455   bool HasEmptyFrame() const {
    456     return GetFrameSize() == (CallPushesPC() ? GetWordSize() : 0);
    457   }
    458 
    459   static int32_t GetInt32ValueOf(HConstant* constant) {
    460     if (constant->IsIntConstant()) {
    461       return constant->AsIntConstant()->GetValue();
    462     } else if (constant->IsNullConstant()) {
    463       return 0;
    464     } else {
    465       DCHECK(constant->IsFloatConstant());
    466       return bit_cast<int32_t, float>(constant->AsFloatConstant()->GetValue());
    467     }
    468   }
    469 
    470   static int64_t GetInt64ValueOf(HConstant* constant) {
    471     if (constant->IsIntConstant()) {
    472       return constant->AsIntConstant()->GetValue();
    473     } else if (constant->IsNullConstant()) {
    474       return 0;
    475     } else if (constant->IsFloatConstant()) {
    476       return bit_cast<int32_t, float>(constant->AsFloatConstant()->GetValue());
    477     } else if (constant->IsLongConstant()) {
    478       return constant->AsLongConstant()->GetValue();
    479     } else {
    480       DCHECK(constant->IsDoubleConstant());
    481       return bit_cast<int64_t, double>(constant->AsDoubleConstant()->GetValue());
    482     }
    483   }
    484 
    485   size_t GetFirstRegisterSlotInSlowPath() const {
    486     return first_register_slot_in_slow_path_;
    487   }
    488 
    489   uint32_t FrameEntrySpillSize() const {
    490     return GetFpuSpillSize() + GetCoreSpillSize();
    491   }
    492 
    493   virtual ParallelMoveResolver* GetMoveResolver() = 0;
    494 
    495   static void CreateCommonInvokeLocationSummary(
    496       HInvoke* invoke, InvokeDexCallingConventionVisitor* visitor);
    497 
    498   void GenerateInvokeUnresolvedRuntimeCall(HInvokeUnresolved* invoke);
    499 
    500   void GenerateInvokePolymorphicCall(HInvokePolymorphic* invoke);
    501 
    502   void CreateUnresolvedFieldLocationSummary(
    503       HInstruction* field_access,
    504       Primitive::Type field_type,
    505       const FieldAccessCallingConvention& calling_convention);
    506 
    507   void GenerateUnresolvedFieldAccess(
    508       HInstruction* field_access,
    509       Primitive::Type field_type,
    510       uint32_t field_index,
    511       uint32_t dex_pc,
    512       const FieldAccessCallingConvention& calling_convention);
    513 
    514   static void CreateLoadClassRuntimeCallLocationSummary(HLoadClass* cls,
    515                                                         Location runtime_type_index_location,
    516                                                         Location runtime_return_location);
    517   void GenerateLoadClassRuntimeCall(HLoadClass* cls);
    518 
    519   static void CreateSystemArrayCopyLocationSummary(HInvoke* invoke);
    520 
    521   void SetDisassemblyInformation(DisassemblyInformation* info) { disasm_info_ = info; }
    522   DisassemblyInformation* GetDisassemblyInformation() const { return disasm_info_; }
    523 
    524   virtual void InvokeRuntime(QuickEntrypointEnum entrypoint,
    525                              HInstruction* instruction,
    526                              uint32_t dex_pc,
    527                              SlowPathCode* slow_path = nullptr) = 0;
    528 
    529   // Check if the desired_string_load_kind is supported. If it is, return it,
    530   // otherwise return a fall-back kind that should be used instead.
    531   virtual HLoadString::LoadKind GetSupportedLoadStringKind(
    532       HLoadString::LoadKind desired_string_load_kind) = 0;
    533 
    534   // Check if the desired_class_load_kind is supported. If it is, return it,
    535   // otherwise return a fall-back kind that should be used instead.
    536   virtual HLoadClass::LoadKind GetSupportedLoadClassKind(
    537       HLoadClass::LoadKind desired_class_load_kind) = 0;
    538 
    539   static LocationSummary::CallKind GetLoadStringCallKind(HLoadString* load) {
    540     switch (load->GetLoadKind()) {
    541       case HLoadString::LoadKind::kBssEntry:
    542         DCHECK(load->NeedsEnvironment());
    543         return LocationSummary::kCallOnSlowPath;
    544       case HLoadString::LoadKind::kDexCacheViaMethod:
    545         DCHECK(load->NeedsEnvironment());
    546         return LocationSummary::kCallOnMainOnly;
    547       case HLoadString::LoadKind::kJitTableAddress:
    548         DCHECK(!load->NeedsEnvironment());
    549         return kEmitCompilerReadBarrier
    550             ? LocationSummary::kCallOnSlowPath
    551             : LocationSummary::kNoCall;
    552         break;
    553       default:
    554         DCHECK(!load->NeedsEnvironment());
    555         return LocationSummary::kNoCall;
    556     }
    557   }
    558 
    559   // Check if the desired_dispatch_info is supported. If it is, return it,
    560   // otherwise return a fall-back info that should be used instead.
    561   virtual HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
    562       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
    563       HInvokeStaticOrDirect* invoke) = 0;
    564 
    565   // Generate a call to a static or direct method.
    566   virtual void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) = 0;
    567   // Generate a call to a virtual method.
    568   virtual void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) = 0;
    569 
    570   // Copy the result of a call into the given target.
    571   virtual void MoveFromReturnRegister(Location trg, Primitive::Type type) = 0;
    572 
    573   virtual void GenerateNop() = 0;
    574 
    575   uint32_t GetReferenceSlowFlagOffset() const;
    576   uint32_t GetReferenceDisableFlagOffset() const;
    577 
    578   static QuickEntrypointEnum GetArrayAllocationEntrypoint(Handle<mirror::Class> array_klass);
    579 
    580  protected:
    581   // Patch info used for recording locations of required linker patches and their targets,
    582   // i.e. target method, string, type or code identified by their dex file and index.
    583   template <typename LabelType>
    584   struct PatchInfo {
    585     PatchInfo(const DexFile& target_dex_file, uint32_t target_index)
    586         : dex_file(target_dex_file), index(target_index) { }
    587 
    588     const DexFile& dex_file;
    589     uint32_t index;
    590     LabelType label;
    591   };
    592 
    593   CodeGenerator(HGraph* graph,
    594                 size_t number_of_core_registers,
    595                 size_t number_of_fpu_registers,
    596                 size_t number_of_register_pairs,
    597                 uint32_t core_callee_save_mask,
    598                 uint32_t fpu_callee_save_mask,
    599                 const CompilerOptions& compiler_options,
    600                 OptimizingCompilerStats* stats)
    601       : frame_size_(0),
    602         core_spill_mask_(0),
    603         fpu_spill_mask_(0),
    604         first_register_slot_in_slow_path_(0),
    605         allocated_registers_(RegisterSet::Empty()),
    606         blocked_core_registers_(graph->GetArena()->AllocArray<bool>(number_of_core_registers,
    607                                                                     kArenaAllocCodeGenerator)),
    608         blocked_fpu_registers_(graph->GetArena()->AllocArray<bool>(number_of_fpu_registers,
    609                                                                    kArenaAllocCodeGenerator)),
    610         number_of_core_registers_(number_of_core_registers),
    611         number_of_fpu_registers_(number_of_fpu_registers),
    612         number_of_register_pairs_(number_of_register_pairs),
    613         core_callee_save_mask_(core_callee_save_mask),
    614         fpu_callee_save_mask_(fpu_callee_save_mask),
    615         stack_map_stream_(graph->GetArena(), graph->GetInstructionSet()),
    616         block_order_(nullptr),
    617         jit_string_roots_(StringReferenceValueComparator(),
    618                           graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
    619         jit_class_roots_(TypeReferenceValueComparator(),
    620                          graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
    621         disasm_info_(nullptr),
    622         stats_(stats),
    623         graph_(graph),
    624         compiler_options_(compiler_options),
    625         slow_paths_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
    626         current_slow_path_(nullptr),
    627         current_block_index_(0),
    628         is_leaf_(true),
    629         requires_current_method_(false) {
    630     slow_paths_.reserve(8);
    631   }
    632 
    633   virtual HGraphVisitor* GetLocationBuilder() = 0;
    634   virtual HGraphVisitor* GetInstructionVisitor() = 0;
    635 
    636   // Returns the location of the first spilled entry for floating point registers,
    637   // relative to the stack pointer.
    638   uint32_t GetFpuSpillStart() const {
    639     return GetFrameSize() - FrameEntrySpillSize();
    640   }
    641 
    642   uint32_t GetFpuSpillSize() const {
    643     return POPCOUNT(fpu_spill_mask_) * GetFloatingPointSpillSlotSize();
    644   }
    645 
    646   uint32_t GetCoreSpillSize() const {
    647     return POPCOUNT(core_spill_mask_) * GetWordSize();
    648   }
    649 
    650   virtual bool HasAllocatedCalleeSaveRegisters() const {
    651     // We check the core registers against 1 because it always comprises the return PC.
    652     return (POPCOUNT(allocated_registers_.GetCoreRegisters() & core_callee_save_mask_) != 1)
    653       || (POPCOUNT(allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_) != 0);
    654   }
    655 
    656   bool CallPushesPC() const {
    657     InstructionSet instruction_set = GetInstructionSet();
    658     return instruction_set == kX86 || instruction_set == kX86_64;
    659   }
    660 
    661   // Arm64 has its own type for a label, so we need to templatize these methods
    662   // to share the logic.
    663 
    664   template <typename LabelType>
    665   LabelType* CommonInitializeLabels() {
    666     // We use raw array allocations instead of ArenaVector<> because Labels are
    667     // non-constructible and non-movable and as such cannot be held in a vector.
    668     size_t size = GetGraph()->GetBlocks().size();
    669     LabelType* labels = GetGraph()->GetArena()->AllocArray<LabelType>(size,
    670                                                                       kArenaAllocCodeGenerator);
    671     for (size_t i = 0; i != size; ++i) {
    672       new(labels + i) LabelType();
    673     }
    674     return labels;
    675   }
    676 
    677   template <typename LabelType>
    678   LabelType* CommonGetLabelOf(LabelType* raw_pointer_to_labels_array, HBasicBlock* block) const {
    679     block = FirstNonEmptyBlock(block);
    680     return raw_pointer_to_labels_array + block->GetBlockId();
    681   }
    682 
    683   SlowPathCode* GetCurrentSlowPath() {
    684     return current_slow_path_;
    685   }
    686 
    687   // Emit the patches assocatied with JIT roots. Only applies to JIT compiled code.
    688   virtual void EmitJitRootPatches(uint8_t* code ATTRIBUTE_UNUSED,
    689                                   const uint8_t* roots_data ATTRIBUTE_UNUSED) {
    690     DCHECK_EQ(jit_string_roots_.size(), 0u);
    691     DCHECK_EQ(jit_class_roots_.size(), 0u);
    692   }
    693 
    694   // Frame size required for this method.
    695   uint32_t frame_size_;
    696   uint32_t core_spill_mask_;
    697   uint32_t fpu_spill_mask_;
    698   uint32_t first_register_slot_in_slow_path_;
    699 
    700   // Registers that were allocated during linear scan.
    701   RegisterSet allocated_registers_;
    702 
    703   // Arrays used when doing register allocation to know which
    704   // registers we can allocate. `SetupBlockedRegisters` updates the
    705   // arrays.
    706   bool* const blocked_core_registers_;
    707   bool* const blocked_fpu_registers_;
    708   size_t number_of_core_registers_;
    709   size_t number_of_fpu_registers_;
    710   size_t number_of_register_pairs_;
    711   const uint32_t core_callee_save_mask_;
    712   const uint32_t fpu_callee_save_mask_;
    713 
    714   StackMapStream stack_map_stream_;
    715 
    716   // The order to use for code generation.
    717   const ArenaVector<HBasicBlock*>* block_order_;
    718 
    719   // Maps a StringReference (dex_file, string_index) to the index in the literal table.
    720   // Entries are intially added with a pointer in the handle zone, and `EmitJitRoots`
    721   // will compute all the indices.
    722   ArenaSafeMap<StringReference, uint64_t, StringReferenceValueComparator> jit_string_roots_;
    723 
    724   // Maps a ClassReference (dex_file, type_index) to the index in the literal table.
    725   // Entries are intially added with a pointer in the handle zone, and `EmitJitRoots`
    726   // will compute all the indices.
    727   ArenaSafeMap<TypeReference, uint64_t, TypeReferenceValueComparator> jit_class_roots_;
    728 
    729   DisassemblyInformation* disasm_info_;
    730 
    731  private:
    732   size_t GetStackOffsetOfSavedRegister(size_t index);
    733   void GenerateSlowPaths();
    734   void BlockIfInRegister(Location location, bool is_out = false) const;
    735   void EmitEnvironment(HEnvironment* environment, SlowPathCode* slow_path);
    736 
    737   OptimizingCompilerStats* stats_;
    738 
    739   HGraph* const graph_;
    740   const CompilerOptions& compiler_options_;
    741 
    742   ArenaVector<std::unique_ptr<SlowPathCode>> slow_paths_;
    743 
    744   // The current slow-path that we're generating code for.
    745   SlowPathCode* current_slow_path_;
    746 
    747   // The current block index in `block_order_` of the block
    748   // we are generating code for.
    749   size_t current_block_index_;
    750 
    751   // Whether the method is a leaf method.
    752   bool is_leaf_;
    753 
    754   // Whether an instruction in the graph accesses the current method.
    755   // TODO: Rename: this actually indicates that some instruction in the method
    756   // needs the environment including a valid stack frame.
    757   bool requires_current_method_;
    758 
    759   friend class OptimizingCFITest;
    760 
    761   DISALLOW_COPY_AND_ASSIGN(CodeGenerator);
    762 };
    763 
    764 template <typename C, typename F>
    765 class CallingConvention {
    766  public:
    767   CallingConvention(const C* registers,
    768                     size_t number_of_registers,
    769                     const F* fpu_registers,
    770                     size_t number_of_fpu_registers,
    771                     PointerSize pointer_size)
    772       : registers_(registers),
    773         number_of_registers_(number_of_registers),
    774         fpu_registers_(fpu_registers),
    775         number_of_fpu_registers_(number_of_fpu_registers),
    776         pointer_size_(pointer_size) {}
    777 
    778   size_t GetNumberOfRegisters() const { return number_of_registers_; }
    779   size_t GetNumberOfFpuRegisters() const { return number_of_fpu_registers_; }
    780 
    781   C GetRegisterAt(size_t index) const {
    782     DCHECK_LT(index, number_of_registers_);
    783     return registers_[index];
    784   }
    785 
    786   F GetFpuRegisterAt(size_t index) const {
    787     DCHECK_LT(index, number_of_fpu_registers_);
    788     return fpu_registers_[index];
    789   }
    790 
    791   size_t GetStackOffsetOf(size_t index) const {
    792     // We still reserve the space for parameters passed by registers.
    793     // Add space for the method pointer.
    794     return static_cast<size_t>(pointer_size_) + index * kVRegSize;
    795   }
    796 
    797  private:
    798   const C* registers_;
    799   const size_t number_of_registers_;
    800   const F* fpu_registers_;
    801   const size_t number_of_fpu_registers_;
    802   const PointerSize pointer_size_;
    803 
    804   DISALLOW_COPY_AND_ASSIGN(CallingConvention);
    805 };
    806 
    807 /**
    808  * A templated class SlowPathGenerator with a templated method NewSlowPath()
    809  * that can be used by any code generator to share equivalent slow-paths with
    810  * the objective of reducing generated code size.
    811  *
    812  * InstructionType:  instruction that requires SlowPathCodeType
    813  * SlowPathCodeType: subclass of SlowPathCode, with constructor SlowPathCodeType(InstructionType *)
    814  */
    815 template <typename InstructionType>
    816 class SlowPathGenerator {
    817   static_assert(std::is_base_of<HInstruction, InstructionType>::value,
    818                 "InstructionType is not a subclass of art::HInstruction");
    819 
    820  public:
    821   SlowPathGenerator(HGraph* graph, CodeGenerator* codegen)
    822       : graph_(graph),
    823         codegen_(codegen),
    824         slow_path_map_(std::less<uint32_t>(), graph->GetArena()->Adapter(kArenaAllocSlowPaths)) {}
    825 
    826   // Creates and adds a new slow-path, if needed, or returns existing one otherwise.
    827   // Templating the method (rather than the whole class) on the slow-path type enables
    828   // keeping this code at a generic, non architecture-specific place.
    829   //
    830   // NOTE: This approach assumes each InstructionType only generates one SlowPathCodeType.
    831   //       To relax this requirement, we would need some RTTI on the stored slow-paths,
    832   //       or template the class as a whole on SlowPathType.
    833   template <typename SlowPathCodeType>
    834   SlowPathCodeType* NewSlowPath(InstructionType* instruction) {
    835     static_assert(std::is_base_of<SlowPathCode, SlowPathCodeType>::value,
    836                   "SlowPathCodeType is not a subclass of art::SlowPathCode");
    837     static_assert(std::is_constructible<SlowPathCodeType, InstructionType*>::value,
    838                   "SlowPathCodeType is not constructible from InstructionType*");
    839     // Iterate over potential candidates for sharing. Currently, only same-typed
    840     // slow-paths with exactly the same dex-pc are viable candidates.
    841     // TODO: pass dex-pc/slow-path-type to run-time to allow even more sharing?
    842     const uint32_t dex_pc = instruction->GetDexPc();
    843     auto iter = slow_path_map_.find(dex_pc);
    844     if (iter != slow_path_map_.end()) {
    845       auto candidates = iter->second;
    846       for (const auto& it : candidates) {
    847         InstructionType* other_instruction = it.first;
    848         SlowPathCodeType* other_slow_path = down_cast<SlowPathCodeType*>(it.second);
    849         // Determine if the instructions allow for slow-path sharing.
    850         if (HaveSameLiveRegisters(instruction, other_instruction) &&
    851             HaveSameStackMap(instruction, other_instruction)) {
    852           // Can share: reuse existing one.
    853           return other_slow_path;
    854         }
    855       }
    856     } else {
    857       // First time this dex-pc is seen.
    858       iter = slow_path_map_.Put(dex_pc, {{}, {graph_->GetArena()->Adapter(kArenaAllocSlowPaths)}});
    859     }
    860     // Cannot share: create and add new slow-path for this particular dex-pc.
    861     SlowPathCodeType* slow_path = new (graph_->GetArena()) SlowPathCodeType(instruction);
    862     iter->second.emplace_back(std::make_pair(instruction, slow_path));
    863     codegen_->AddSlowPath(slow_path);
    864     return slow_path;
    865   }
    866 
    867  private:
    868   // Tests if both instructions have same set of live physical registers. This ensures
    869   // the slow-path has exactly the same preamble on saving these registers to stack.
    870   bool HaveSameLiveRegisters(const InstructionType* i1, const InstructionType* i2) const {
    871     const uint32_t core_spill = ~codegen_->GetCoreSpillMask();
    872     const uint32_t fpu_spill = ~codegen_->GetFpuSpillMask();
    873     RegisterSet* live1 = i1->GetLocations()->GetLiveRegisters();
    874     RegisterSet* live2 = i2->GetLocations()->GetLiveRegisters();
    875     return (((live1->GetCoreRegisters() & core_spill) ==
    876              (live2->GetCoreRegisters() & core_spill)) &&
    877             ((live1->GetFloatingPointRegisters() & fpu_spill) ==
    878              (live2->GetFloatingPointRegisters() & fpu_spill)));
    879   }
    880 
    881   // Tests if both instructions have the same stack map. This ensures the interpreter
    882   // will find exactly the same dex-registers at the same entries.
    883   bool HaveSameStackMap(const InstructionType* i1, const InstructionType* i2) const {
    884     DCHECK(i1->HasEnvironment());
    885     DCHECK(i2->HasEnvironment());
    886     // We conservatively test if the two instructions find exactly the same instructions
    887     // and location in each dex-register. This guarantees they will have the same stack map.
    888     HEnvironment* e1 = i1->GetEnvironment();
    889     HEnvironment* e2 = i2->GetEnvironment();
    890     if (e1->GetParent() != e2->GetParent() || e1->Size() != e2->Size()) {
    891       return false;
    892     }
    893     for (size_t i = 0, sz = e1->Size(); i < sz; ++i) {
    894       if (e1->GetInstructionAt(i) != e2->GetInstructionAt(i) ||
    895           !e1->GetLocationAt(i).Equals(e2->GetLocationAt(i))) {
    896         return false;
    897       }
    898     }
    899     return true;
    900   }
    901 
    902   HGraph* const graph_;
    903   CodeGenerator* const codegen_;
    904 
    905   // Map from dex-pc to vector of already existing instruction/slow-path pairs.
    906   ArenaSafeMap<uint32_t, ArenaVector<std::pair<InstructionType*, SlowPathCode*>>> slow_path_map_;
    907 
    908   DISALLOW_COPY_AND_ASSIGN(SlowPathGenerator);
    909 };
    910 
    911 class InstructionCodeGenerator : public HGraphVisitor {
    912  public:
    913   InstructionCodeGenerator(HGraph* graph, CodeGenerator* codegen)
    914       : HGraphVisitor(graph),
    915         deopt_slow_paths_(graph, codegen) {}
    916 
    917  protected:
    918   // Add slow-path generator for each instruction/slow-path combination that desires sharing.
    919   // TODO: under current regime, only deopt sharing make sense; extend later.
    920   SlowPathGenerator<HDeoptimize> deopt_slow_paths_;
    921 };
    922 
    923 }  // namespace art
    924 
    925 #endif  // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_
    926