Home | History | Annotate | Download | only in optimizing
      1 /*
      2  * Copyright (C) 2014 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_
     18 #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_
     19 
     20 #include "arch/instruction_set.h"
     21 #include "arch/instruction_set_features.h"
     22 #include "base/arena_containers.h"
     23 #include "base/arena_object.h"
     24 #include "base/bit_field.h"
     25 #include "base/bit_utils.h"
     26 #include "base/enums.h"
     27 #include "globals.h"
     28 #include "graph_visualizer.h"
     29 #include "locations.h"
     30 #include "memory_region.h"
     31 #include "nodes.h"
     32 #include "optimizing_compiler_stats.h"
     33 #include "read_barrier_option.h"
     34 #include "stack.h"
     35 #include "stack_map_stream.h"
     36 #include "string_reference.h"
     37 #include "type_reference.h"
     38 #include "utils/label.h"
     39 
     40 namespace art {
     41 
     42 // Binary encoding of 2^32 for type double.
     43 static int64_t constexpr k2Pow32EncodingForDouble = INT64_C(0x41F0000000000000);
     44 // Binary encoding of 2^31 for type double.
     45 static int64_t constexpr k2Pow31EncodingForDouble = INT64_C(0x41E0000000000000);
     46 
     47 // Minimum value for a primitive integer.
     48 static int32_t constexpr kPrimIntMin = 0x80000000;
     49 // Minimum value for a primitive long.
     50 static int64_t constexpr kPrimLongMin = INT64_C(0x8000000000000000);
     51 
     52 // Maximum value for a primitive integer.
     53 static int32_t constexpr kPrimIntMax = 0x7fffffff;
     54 // Maximum value for a primitive long.
     55 static int64_t constexpr kPrimLongMax = INT64_C(0x7fffffffffffffff);
     56 
     57 static constexpr ReadBarrierOption kCompilerReadBarrierOption =
     58     kEmitCompilerReadBarrier ? kWithReadBarrier : kWithoutReadBarrier;
     59 
     60 class Assembler;
     61 class CodeGenerator;
     62 class CompilerDriver;
     63 class CompilerOptions;
     64 class LinkerPatch;
     65 class ParallelMoveResolver;
     66 
     67 class CodeAllocator {
     68  public:
     69   CodeAllocator() {}
     70   virtual ~CodeAllocator() {}
     71 
     72   virtual uint8_t* Allocate(size_t size) = 0;
     73 
     74  private:
     75   DISALLOW_COPY_AND_ASSIGN(CodeAllocator);
     76 };
     77 
     78 class SlowPathCode : public DeletableArenaObject<kArenaAllocSlowPaths> {
     79  public:
     80   explicit SlowPathCode(HInstruction* instruction) : instruction_(instruction) {
     81     for (size_t i = 0; i < kMaximumNumberOfExpectedRegisters; ++i) {
     82       saved_core_stack_offsets_[i] = kRegisterNotSaved;
     83       saved_fpu_stack_offsets_[i] = kRegisterNotSaved;
     84     }
     85   }
     86 
     87   virtual ~SlowPathCode() {}
     88 
     89   virtual void EmitNativeCode(CodeGenerator* codegen) = 0;
     90 
     91   // Save live core and floating-point caller-save registers and
     92   // update the stack mask in `locations` for registers holding object
     93   // references.
     94   virtual void SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations);
     95   // Restore live core and floating-point caller-save registers.
     96   virtual void RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations);
     97 
     98   bool IsCoreRegisterSaved(int reg) const {
     99     return saved_core_stack_offsets_[reg] != kRegisterNotSaved;
    100   }
    101 
    102   bool IsFpuRegisterSaved(int reg) const {
    103     return saved_fpu_stack_offsets_[reg] != kRegisterNotSaved;
    104   }
    105 
    106   uint32_t GetStackOffsetOfCoreRegister(int reg) const {
    107     return saved_core_stack_offsets_[reg];
    108   }
    109 
    110   uint32_t GetStackOffsetOfFpuRegister(int reg) const {
    111     return saved_fpu_stack_offsets_[reg];
    112   }
    113 
    114   virtual bool IsFatal() const { return false; }
    115 
    116   virtual const char* GetDescription() const = 0;
    117 
    118   Label* GetEntryLabel() { return &entry_label_; }
    119   Label* GetExitLabel() { return &exit_label_; }
    120 
    121   HInstruction* GetInstruction() const {
    122     return instruction_;
    123   }
    124 
    125   uint32_t GetDexPc() const {
    126     return instruction_ != nullptr ? instruction_->GetDexPc() : kNoDexPc;
    127   }
    128 
    129  protected:
    130   static constexpr size_t kMaximumNumberOfExpectedRegisters = 32;
    131   static constexpr uint32_t kRegisterNotSaved = -1;
    132   // The instruction where this slow path is happening.
    133   HInstruction* instruction_;
    134   uint32_t saved_core_stack_offsets_[kMaximumNumberOfExpectedRegisters];
    135   uint32_t saved_fpu_stack_offsets_[kMaximumNumberOfExpectedRegisters];
    136 
    137  private:
    138   Label entry_label_;
    139   Label exit_label_;
    140 
    141   DISALLOW_COPY_AND_ASSIGN(SlowPathCode);
    142 };
    143 
    144 class InvokeDexCallingConventionVisitor {
    145  public:
    146   virtual Location GetNextLocation(Primitive::Type type) = 0;
    147   virtual Location GetReturnLocation(Primitive::Type type) const = 0;
    148   virtual Location GetMethodLocation() const = 0;
    149 
    150  protected:
    151   InvokeDexCallingConventionVisitor() {}
    152   virtual ~InvokeDexCallingConventionVisitor() {}
    153 
    154   // The current index for core registers.
    155   uint32_t gp_index_ = 0u;
    156   // The current index for floating-point registers.
    157   uint32_t float_index_ = 0u;
    158   // The current stack index.
    159   uint32_t stack_index_ = 0u;
    160 
    161  private:
    162   DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor);
    163 };
    164 
    165 class FieldAccessCallingConvention {
    166  public:
    167   virtual Location GetObjectLocation() const = 0;
    168   virtual Location GetFieldIndexLocation() const = 0;
    169   virtual Location GetReturnLocation(Primitive::Type type) const = 0;
    170   virtual Location GetSetValueLocation(Primitive::Type type, bool is_instance) const = 0;
    171   virtual Location GetFpuLocation(Primitive::Type type) const = 0;
    172   virtual ~FieldAccessCallingConvention() {}
    173 
    174  protected:
    175   FieldAccessCallingConvention() {}
    176 
    177  private:
    178   DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConvention);
    179 };
    180 
    181 class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
    182  public:
    183   // Compiles the graph to executable instructions.
    184   void Compile(CodeAllocator* allocator);
    185   static std::unique_ptr<CodeGenerator> Create(HGraph* graph,
    186                                                InstructionSet instruction_set,
    187                                                const InstructionSetFeatures& isa_features,
    188                                                const CompilerOptions& compiler_options,
    189                                                OptimizingCompilerStats* stats = nullptr);
    190   virtual ~CodeGenerator() {}
    191 
    192   // Get the graph. This is the outermost graph, never the graph of a method being inlined.
    193   HGraph* GetGraph() const { return graph_; }
    194 
    195   HBasicBlock* GetNextBlockToEmit() const;
    196   HBasicBlock* FirstNonEmptyBlock(HBasicBlock* block) const;
    197   bool GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const;
    198 
    199   size_t GetStackSlotOfParameter(HParameterValue* parameter) const {
    200     // Note that this follows the current calling convention.
    201     return GetFrameSize()
    202         + static_cast<size_t>(InstructionSetPointerSize(GetInstructionSet()))  // Art method
    203         + parameter->GetIndex() * kVRegSize;
    204   }
    205 
    206   virtual void Initialize() = 0;
    207   virtual void Finalize(CodeAllocator* allocator);
    208   virtual void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches);
    209   virtual void GenerateFrameEntry() = 0;
    210   virtual void GenerateFrameExit() = 0;
    211   virtual void Bind(HBasicBlock* block) = 0;
    212   virtual void MoveConstant(Location destination, int32_t value) = 0;
    213   virtual void MoveLocation(Location dst, Location src, Primitive::Type dst_type) = 0;
    214   virtual void AddLocationAsTemp(Location location, LocationSummary* locations) = 0;
    215 
    216   virtual Assembler* GetAssembler() = 0;
    217   virtual const Assembler& GetAssembler() const = 0;
    218   virtual size_t GetWordSize() const = 0;
    219   virtual size_t GetFloatingPointSpillSlotSize() const = 0;
    220   virtual uintptr_t GetAddressOf(HBasicBlock* block) = 0;
    221   void InitializeCodeGeneration(size_t number_of_spill_slots,
    222                                 size_t maximum_safepoint_spill_size,
    223                                 size_t number_of_out_slots,
    224                                 const ArenaVector<HBasicBlock*>& block_order);
    225   // Backends can override this as necessary. For most, no special alignment is required.
    226   virtual uint32_t GetPreferredSlotsAlignment() const { return 1; }
    227 
    228   uint32_t GetFrameSize() const { return frame_size_; }
    229   void SetFrameSize(uint32_t size) { frame_size_ = size; }
    230   uint32_t GetCoreSpillMask() const { return core_spill_mask_; }
    231   uint32_t GetFpuSpillMask() const { return fpu_spill_mask_; }
    232 
    233   size_t GetNumberOfCoreRegisters() const { return number_of_core_registers_; }
    234   size_t GetNumberOfFloatingPointRegisters() const { return number_of_fpu_registers_; }
    235   virtual void SetupBlockedRegisters() const = 0;
    236 
    237   virtual void ComputeSpillMask() {
    238     core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_;
    239     DCHECK_NE(core_spill_mask_, 0u) << "At least the return address register must be saved";
    240     fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_;
    241   }
    242 
    243   static uint32_t ComputeRegisterMask(const int* registers, size_t length) {
    244     uint32_t mask = 0;
    245     for (size_t i = 0, e = length; i < e; ++i) {
    246       mask |= (1 << registers[i]);
    247     }
    248     return mask;
    249   }
    250 
    251   virtual void DumpCoreRegister(std::ostream& stream, int reg) const = 0;
    252   virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const = 0;
    253   virtual InstructionSet GetInstructionSet() const = 0;
    254 
    255   const CompilerOptions& GetCompilerOptions() const { return compiler_options_; }
    256 
    257   void MaybeRecordStat(MethodCompilationStat compilation_stat, size_t count = 1) const;
    258 
    259   // Saves the register in the stack. Returns the size taken on stack.
    260   virtual size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) = 0;
    261   // Restores the register from the stack. Returns the size taken on stack.
    262   virtual size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) = 0;
    263 
    264   virtual size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0;
    265   virtual size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0;
    266 
    267   virtual bool NeedsTwoRegisters(Primitive::Type type) const = 0;
    268   // Returns whether we should split long moves in parallel moves.
    269   virtual bool ShouldSplitLongMoves() const { return false; }
    270 
    271   size_t GetNumberOfCoreCalleeSaveRegisters() const {
    272     return POPCOUNT(core_callee_save_mask_);
    273   }
    274 
    275   size_t GetNumberOfCoreCallerSaveRegisters() const {
    276     DCHECK_GE(GetNumberOfCoreRegisters(), GetNumberOfCoreCalleeSaveRegisters());
    277     return GetNumberOfCoreRegisters() - GetNumberOfCoreCalleeSaveRegisters();
    278   }
    279 
    280   bool IsCoreCalleeSaveRegister(int reg) const {
    281     return (core_callee_save_mask_ & (1 << reg)) != 0;
    282   }
    283 
    284   bool IsFloatingPointCalleeSaveRegister(int reg) const {
    285     return (fpu_callee_save_mask_ & (1 << reg)) != 0;
    286   }
    287 
    288   uint32_t GetSlowPathSpills(LocationSummary* locations, bool core_registers) const {
    289     DCHECK(locations->OnlyCallsOnSlowPath() ||
    290            (locations->Intrinsified() && locations->CallsOnMainAndSlowPath() &&
    291                !locations->HasCustomSlowPathCallingConvention()));
    292     uint32_t live_registers = core_registers
    293         ? locations->GetLiveRegisters()->GetCoreRegisters()
    294         : locations->GetLiveRegisters()->GetFloatingPointRegisters();
    295     if (locations->HasCustomSlowPathCallingConvention()) {
    296       // Save only the live registers that the custom calling convention wants us to save.
    297       uint32_t caller_saves = core_registers
    298           ? locations->GetCustomSlowPathCallerSaves().GetCoreRegisters()
    299           : locations->GetCustomSlowPathCallerSaves().GetFloatingPointRegisters();
    300       return live_registers & caller_saves;
    301     } else {
    302       // Default ABI, we need to spill non-callee-save live registers.
    303       uint32_t callee_saves = core_registers ? core_callee_save_mask_ : fpu_callee_save_mask_;
    304       return live_registers & ~callee_saves;
    305     }
    306   }
    307 
    308   size_t GetNumberOfSlowPathSpills(LocationSummary* locations, bool core_registers) const {
    309     return POPCOUNT(GetSlowPathSpills(locations, core_registers));
    310   }
    311 
    312   size_t GetStackOffsetOfShouldDeoptimizeFlag() const {
    313     DCHECK(GetGraph()->HasShouldDeoptimizeFlag());
    314     DCHECK_GE(GetFrameSize(), FrameEntrySpillSize() + kShouldDeoptimizeFlagSize);
    315     return GetFrameSize() - FrameEntrySpillSize() - kShouldDeoptimizeFlagSize;
    316   }
    317 
    318   // Record native to dex mapping for a suspend point.  Required by runtime.
    319   void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc, SlowPathCode* slow_path = nullptr);
    320   // Check whether we have already recorded mapping at this PC.
    321   bool HasStackMapAtCurrentPc();
    322   // Record extra stack maps if we support native debugging.
    323   void MaybeRecordNativeDebugInfo(HInstruction* instruction,
    324                                   uint32_t dex_pc,
    325                                   SlowPathCode* slow_path = nullptr);
    326 
    327   bool CanMoveNullCheckToUser(HNullCheck* null_check);
    328   void MaybeRecordImplicitNullCheck(HInstruction* instruction);
    329   LocationSummary* CreateThrowingSlowPathLocations(
    330       HInstruction* instruction, RegisterSet caller_saves = RegisterSet::Empty());
    331   void GenerateNullCheck(HNullCheck* null_check);
    332   virtual void GenerateImplicitNullCheck(HNullCheck* null_check) = 0;
    333   virtual void GenerateExplicitNullCheck(HNullCheck* null_check) = 0;
    334 
    335   // Records a stack map which the runtime might use to set catch phi values
    336   // during exception delivery.
    337   // TODO: Replace with a catch-entering instruction that records the environment.
    338   void RecordCatchBlockInfo();
    339 
    340   // TODO: Avoid creating the `std::unique_ptr` here.
    341   void AddSlowPath(SlowPathCode* slow_path) {
    342     slow_paths_.push_back(std::unique_ptr<SlowPathCode>(slow_path));
    343   }
    344 
    345   void BuildStackMaps(MemoryRegion stack_map_region,
    346                       MemoryRegion method_info_region,
    347                       const DexFile::CodeItem& code_item);
    348   void ComputeStackMapAndMethodInfoSize(size_t* stack_map_size, size_t* method_info_size);
    349   size_t GetNumberOfJitRoots() const {
    350     return jit_string_roots_.size() + jit_class_roots_.size();
    351   }
    352 
    353   // Fills the `literals` array with literals collected during code generation.
    354   // Also emits literal patches.
    355   void EmitJitRoots(uint8_t* code,
    356                     Handle<mirror::ObjectArray<mirror::Object>> roots,
    357                     const uint8_t* roots_data)
    358       REQUIRES_SHARED(Locks::mutator_lock_);
    359 
    360   bool IsLeafMethod() const {
    361     return is_leaf_;
    362   }
    363 
    364   void MarkNotLeaf() {
    365     is_leaf_ = false;
    366     requires_current_method_ = true;
    367   }
    368 
    369   void SetRequiresCurrentMethod() {
    370     requires_current_method_ = true;
    371   }
    372 
    373   bool RequiresCurrentMethod() const {
    374     return requires_current_method_;
    375   }
    376 
    377   // Clears the spill slots taken by loop phis in the `LocationSummary` of the
    378   // suspend check. This is called when the code generator generates code
    379   // for the suspend check at the back edge (instead of where the suspend check
    380   // is, which is the loop entry). At this point, the spill slots for the phis
    381   // have not been written to.
    382   void ClearSpillSlotsFromLoopPhisInStackMap(HSuspendCheck* suspend_check) const;
    383 
    384   bool* GetBlockedCoreRegisters() const { return blocked_core_registers_; }
    385   bool* GetBlockedFloatingPointRegisters() const { return blocked_fpu_registers_; }
    386 
    387   bool IsBlockedCoreRegister(size_t i) { return blocked_core_registers_[i]; }
    388   bool IsBlockedFloatingPointRegister(size_t i) { return blocked_fpu_registers_[i]; }
    389 
    390   // Helper that returns the pointer offset of an index in an object array.
    391   // Note: this method assumes we always have the same pointer size, regardless
    392   // of the architecture.
    393   static size_t GetCacheOffset(uint32_t index);
    394   // Pointer variant for ArtMethod and ArtField arrays.
    395   size_t GetCachePointerOffset(uint32_t index);
    396 
    397   // Helper that returns the offset of the array's length field.
    398   // Note: Besides the normal arrays, we also use the HArrayLength for
    399   // accessing the String's `count` field in String intrinsics.
    400   static uint32_t GetArrayLengthOffset(HArrayLength* array_length);
    401 
    402   // Helper that returns the offset of the array's data.
    403   // Note: Besides the normal arrays, we also use the HArrayGet for
    404   // accessing the String's `value` field in String intrinsics.
    405   static uint32_t GetArrayDataOffset(HArrayGet* array_get);
    406 
    407   void EmitParallelMoves(Location from1,
    408                          Location to1,
    409                          Primitive::Type type1,
    410                          Location from2,
    411                          Location to2,
    412                          Primitive::Type type2);
    413 
    414   static bool StoreNeedsWriteBarrier(Primitive::Type type, HInstruction* value) {
    415     // Check that null value is not represented as an integer constant.
    416     DCHECK(type != Primitive::kPrimNot || !value->IsIntConstant());
    417     return type == Primitive::kPrimNot && !value->IsNullConstant();
    418   }
    419 
    420 
    421   // Performs checks pertaining to an InvokeRuntime call.
    422   void ValidateInvokeRuntime(QuickEntrypointEnum entrypoint,
    423                              HInstruction* instruction,
    424                              SlowPathCode* slow_path);
    425 
    426   // Performs checks pertaining to an InvokeRuntimeWithoutRecordingPcInfo call.
    427   static void ValidateInvokeRuntimeWithoutRecordingPcInfo(HInstruction* instruction,
    428                                                           SlowPathCode* slow_path);
    429 
    430   void AddAllocatedRegister(Location location) {
    431     allocated_registers_.Add(location);
    432   }
    433 
    434   bool HasAllocatedRegister(bool is_core, int reg) const {
    435     return is_core
    436         ? allocated_registers_.ContainsCoreRegister(reg)
    437         : allocated_registers_.ContainsFloatingPointRegister(reg);
    438   }
    439 
    440   void AllocateLocations(HInstruction* instruction);
    441 
    442   // Tells whether the stack frame of the compiled method is
    443   // considered "empty", that is either actually having a size of zero,
    444   // or just containing the saved return address register.
    445   bool HasEmptyFrame() const {
    446     return GetFrameSize() == (CallPushesPC() ? GetWordSize() : 0);
    447   }
    448 
    449   static int32_t GetInt32ValueOf(HConstant* constant) {
    450     if (constant->IsIntConstant()) {
    451       return constant->AsIntConstant()->GetValue();
    452     } else if (constant->IsNullConstant()) {
    453       return 0;
    454     } else {
    455       DCHECK(constant->IsFloatConstant());
    456       return bit_cast<int32_t, float>(constant->AsFloatConstant()->GetValue());
    457     }
    458   }
    459 
    460   static int64_t GetInt64ValueOf(HConstant* constant) {
    461     if (constant->IsIntConstant()) {
    462       return constant->AsIntConstant()->GetValue();
    463     } else if (constant->IsNullConstant()) {
    464       return 0;
    465     } else if (constant->IsFloatConstant()) {
    466       return bit_cast<int32_t, float>(constant->AsFloatConstant()->GetValue());
    467     } else if (constant->IsLongConstant()) {
    468       return constant->AsLongConstant()->GetValue();
    469     } else {
    470       DCHECK(constant->IsDoubleConstant());
    471       return bit_cast<int64_t, double>(constant->AsDoubleConstant()->GetValue());
    472     }
    473   }
    474 
    475   size_t GetFirstRegisterSlotInSlowPath() const {
    476     return first_register_slot_in_slow_path_;
    477   }
    478 
    479   uint32_t FrameEntrySpillSize() const {
    480     return GetFpuSpillSize() + GetCoreSpillSize();
    481   }
    482 
    483   virtual ParallelMoveResolver* GetMoveResolver() = 0;
    484 
    485   static void CreateCommonInvokeLocationSummary(
    486       HInvoke* invoke, InvokeDexCallingConventionVisitor* visitor);
    487 
    488   void GenerateInvokeStaticOrDirectRuntimeCall(
    489       HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path);
    490   void GenerateInvokeUnresolvedRuntimeCall(HInvokeUnresolved* invoke);
    491 
    492   void GenerateInvokePolymorphicCall(HInvokePolymorphic* invoke);
    493 
    494   void CreateUnresolvedFieldLocationSummary(
    495       HInstruction* field_access,
    496       Primitive::Type field_type,
    497       const FieldAccessCallingConvention& calling_convention);
    498 
    499   void GenerateUnresolvedFieldAccess(
    500       HInstruction* field_access,
    501       Primitive::Type field_type,
    502       uint32_t field_index,
    503       uint32_t dex_pc,
    504       const FieldAccessCallingConvention& calling_convention);
    505 
    506   static void CreateLoadClassRuntimeCallLocationSummary(HLoadClass* cls,
    507                                                         Location runtime_type_index_location,
    508                                                         Location runtime_return_location);
    509   void GenerateLoadClassRuntimeCall(HLoadClass* cls);
    510 
    511   static void CreateSystemArrayCopyLocationSummary(HInvoke* invoke);
    512 
    513   void SetDisassemblyInformation(DisassemblyInformation* info) { disasm_info_ = info; }
    514   DisassemblyInformation* GetDisassemblyInformation() const { return disasm_info_; }
    515 
    516   virtual void InvokeRuntime(QuickEntrypointEnum entrypoint,
    517                              HInstruction* instruction,
    518                              uint32_t dex_pc,
    519                              SlowPathCode* slow_path = nullptr) = 0;
    520 
    521   // Check if the desired_string_load_kind is supported. If it is, return it,
    522   // otherwise return a fall-back kind that should be used instead.
    523   virtual HLoadString::LoadKind GetSupportedLoadStringKind(
    524       HLoadString::LoadKind desired_string_load_kind) = 0;
    525 
    526   // Check if the desired_class_load_kind is supported. If it is, return it,
    527   // otherwise return a fall-back kind that should be used instead.
    528   virtual HLoadClass::LoadKind GetSupportedLoadClassKind(
    529       HLoadClass::LoadKind desired_class_load_kind) = 0;
    530 
    531   static LocationSummary::CallKind GetLoadStringCallKind(HLoadString* load) {
    532     switch (load->GetLoadKind()) {
    533       case HLoadString::LoadKind::kBssEntry:
    534         DCHECK(load->NeedsEnvironment());
    535         return LocationSummary::kCallOnSlowPath;
    536       case HLoadString::LoadKind::kRuntimeCall:
    537         DCHECK(load->NeedsEnvironment());
    538         return LocationSummary::kCallOnMainOnly;
    539       case HLoadString::LoadKind::kJitTableAddress:
    540         DCHECK(!load->NeedsEnvironment());
    541         return kEmitCompilerReadBarrier
    542             ? LocationSummary::kCallOnSlowPath
    543             : LocationSummary::kNoCall;
    544         break;
    545       default:
    546         DCHECK(!load->NeedsEnvironment());
    547         return LocationSummary::kNoCall;
    548     }
    549   }
    550 
    551   // Check if the desired_dispatch_info is supported. If it is, return it,
    552   // otherwise return a fall-back info that should be used instead.
    553   virtual HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
    554       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
    555       HInvokeStaticOrDirect* invoke) = 0;
    556 
    557   // Generate a call to a static or direct method.
    558   virtual void GenerateStaticOrDirectCall(
    559       HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) = 0;
    560   // Generate a call to a virtual method.
    561   virtual void GenerateVirtualCall(
    562       HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) = 0;
    563 
    564   // Copy the result of a call into the given target.
    565   virtual void MoveFromReturnRegister(Location trg, Primitive::Type type) = 0;
    566 
    567   virtual void GenerateNop() = 0;
    568 
    569   static QuickEntrypointEnum GetArrayAllocationEntrypoint(Handle<mirror::Class> array_klass);
    570 
    571  protected:
    572   // Patch info used for recording locations of required linker patches and their targets,
    573   // i.e. target method, string, type or code identified by their dex file and index.
    574   template <typename LabelType>
    575   struct PatchInfo {
    576     PatchInfo(const DexFile& target_dex_file, uint32_t target_index)
    577         : dex_file(target_dex_file), index(target_index) { }
    578 
    579     const DexFile& dex_file;
    580     uint32_t index;
    581     LabelType label;
    582   };
    583 
    584   CodeGenerator(HGraph* graph,
    585                 size_t number_of_core_registers,
    586                 size_t number_of_fpu_registers,
    587                 size_t number_of_register_pairs,
    588                 uint32_t core_callee_save_mask,
    589                 uint32_t fpu_callee_save_mask,
    590                 const CompilerOptions& compiler_options,
    591                 OptimizingCompilerStats* stats)
    592       : frame_size_(0),
    593         core_spill_mask_(0),
    594         fpu_spill_mask_(0),
    595         first_register_slot_in_slow_path_(0),
    596         allocated_registers_(RegisterSet::Empty()),
    597         blocked_core_registers_(graph->GetArena()->AllocArray<bool>(number_of_core_registers,
    598                                                                     kArenaAllocCodeGenerator)),
    599         blocked_fpu_registers_(graph->GetArena()->AllocArray<bool>(number_of_fpu_registers,
    600                                                                    kArenaAllocCodeGenerator)),
    601         number_of_core_registers_(number_of_core_registers),
    602         number_of_fpu_registers_(number_of_fpu_registers),
    603         number_of_register_pairs_(number_of_register_pairs),
    604         core_callee_save_mask_(core_callee_save_mask),
    605         fpu_callee_save_mask_(fpu_callee_save_mask),
    606         stack_map_stream_(graph->GetArena(), graph->GetInstructionSet()),
    607         block_order_(nullptr),
    608         jit_string_roots_(StringReferenceValueComparator(),
    609                           graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
    610         jit_class_roots_(TypeReferenceValueComparator(),
    611                          graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
    612         disasm_info_(nullptr),
    613         stats_(stats),
    614         graph_(graph),
    615         compiler_options_(compiler_options),
    616         slow_paths_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
    617         current_slow_path_(nullptr),
    618         current_block_index_(0),
    619         is_leaf_(true),
    620         requires_current_method_(false) {
    621     slow_paths_.reserve(8);
    622   }
    623 
    624   virtual HGraphVisitor* GetLocationBuilder() = 0;
    625   virtual HGraphVisitor* GetInstructionVisitor() = 0;
    626 
    627   // Returns the location of the first spilled entry for floating point registers,
    628   // relative to the stack pointer.
    629   uint32_t GetFpuSpillStart() const {
    630     return GetFrameSize() - FrameEntrySpillSize();
    631   }
    632 
    633   uint32_t GetFpuSpillSize() const {
    634     return POPCOUNT(fpu_spill_mask_) * GetFloatingPointSpillSlotSize();
    635   }
    636 
    637   uint32_t GetCoreSpillSize() const {
    638     return POPCOUNT(core_spill_mask_) * GetWordSize();
    639   }
    640 
    641   virtual bool HasAllocatedCalleeSaveRegisters() const {
    642     // We check the core registers against 1 because it always comprises the return PC.
    643     return (POPCOUNT(allocated_registers_.GetCoreRegisters() & core_callee_save_mask_) != 1)
    644       || (POPCOUNT(allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_) != 0);
    645   }
    646 
    647   bool CallPushesPC() const {
    648     InstructionSet instruction_set = GetInstructionSet();
    649     return instruction_set == kX86 || instruction_set == kX86_64;
    650   }
    651 
    652   // Arm64 has its own type for a label, so we need to templatize these methods
    653   // to share the logic.
    654 
    655   template <typename LabelType>
    656   LabelType* CommonInitializeLabels() {
    657     // We use raw array allocations instead of ArenaVector<> because Labels are
    658     // non-constructible and non-movable and as such cannot be held in a vector.
    659     size_t size = GetGraph()->GetBlocks().size();
    660     LabelType* labels = GetGraph()->GetArena()->AllocArray<LabelType>(size,
    661                                                                       kArenaAllocCodeGenerator);
    662     for (size_t i = 0; i != size; ++i) {
    663       new(labels + i) LabelType();
    664     }
    665     return labels;
    666   }
    667 
    668   template <typename LabelType>
    669   LabelType* CommonGetLabelOf(LabelType* raw_pointer_to_labels_array, HBasicBlock* block) const {
    670     block = FirstNonEmptyBlock(block);
    671     return raw_pointer_to_labels_array + block->GetBlockId();
    672   }
    673 
    674   SlowPathCode* GetCurrentSlowPath() {
    675     return current_slow_path_;
    676   }
    677 
    678   // Emit the patches assocatied with JIT roots. Only applies to JIT compiled code.
    679   virtual void EmitJitRootPatches(uint8_t* code ATTRIBUTE_UNUSED,
    680                                   const uint8_t* roots_data ATTRIBUTE_UNUSED) {
    681     DCHECK_EQ(jit_string_roots_.size(), 0u);
    682     DCHECK_EQ(jit_class_roots_.size(), 0u);
    683   }
    684 
    685   // Frame size required for this method.
    686   uint32_t frame_size_;
    687   uint32_t core_spill_mask_;
    688   uint32_t fpu_spill_mask_;
    689   uint32_t first_register_slot_in_slow_path_;
    690 
    691   // Registers that were allocated during linear scan.
    692   RegisterSet allocated_registers_;
    693 
    694   // Arrays used when doing register allocation to know which
    695   // registers we can allocate. `SetupBlockedRegisters` updates the
    696   // arrays.
    697   bool* const blocked_core_registers_;
    698   bool* const blocked_fpu_registers_;
    699   size_t number_of_core_registers_;
    700   size_t number_of_fpu_registers_;
    701   size_t number_of_register_pairs_;
    702   const uint32_t core_callee_save_mask_;
    703   const uint32_t fpu_callee_save_mask_;
    704 
    705   StackMapStream stack_map_stream_;
    706 
    707   // The order to use for code generation.
    708   const ArenaVector<HBasicBlock*>* block_order_;
    709 
    710   // Maps a StringReference (dex_file, string_index) to the index in the literal table.
    711   // Entries are intially added with a pointer in the handle zone, and `EmitJitRoots`
    712   // will compute all the indices.
    713   ArenaSafeMap<StringReference, uint64_t, StringReferenceValueComparator> jit_string_roots_;
    714 
    715   // Maps a ClassReference (dex_file, type_index) to the index in the literal table.
    716   // Entries are intially added with a pointer in the handle zone, and `EmitJitRoots`
    717   // will compute all the indices.
    718   ArenaSafeMap<TypeReference, uint64_t, TypeReferenceValueComparator> jit_class_roots_;
    719 
    720   DisassemblyInformation* disasm_info_;
    721 
    722  private:
    723   size_t GetStackOffsetOfSavedRegister(size_t index);
    724   void GenerateSlowPaths();
    725   void BlockIfInRegister(Location location, bool is_out = false) const;
    726   void EmitEnvironment(HEnvironment* environment, SlowPathCode* slow_path);
    727 
    728   OptimizingCompilerStats* stats_;
    729 
    730   HGraph* const graph_;
    731   const CompilerOptions& compiler_options_;
    732 
    733   ArenaVector<std::unique_ptr<SlowPathCode>> slow_paths_;
    734 
    735   // The current slow-path that we're generating code for.
    736   SlowPathCode* current_slow_path_;
    737 
    738   // The current block index in `block_order_` of the block
    739   // we are generating code for.
    740   size_t current_block_index_;
    741 
    742   // Whether the method is a leaf method.
    743   bool is_leaf_;
    744 
    745   // Whether an instruction in the graph accesses the current method.
    746   // TODO: Rename: this actually indicates that some instruction in the method
    747   // needs the environment including a valid stack frame.
    748   bool requires_current_method_;
    749 
    750   friend class OptimizingCFITest;
    751 
    752   DISALLOW_COPY_AND_ASSIGN(CodeGenerator);
    753 };
    754 
    755 template <typename C, typename F>
    756 class CallingConvention {
    757  public:
    758   CallingConvention(const C* registers,
    759                     size_t number_of_registers,
    760                     const F* fpu_registers,
    761                     size_t number_of_fpu_registers,
    762                     PointerSize pointer_size)
    763       : registers_(registers),
    764         number_of_registers_(number_of_registers),
    765         fpu_registers_(fpu_registers),
    766         number_of_fpu_registers_(number_of_fpu_registers),
    767         pointer_size_(pointer_size) {}
    768 
    769   size_t GetNumberOfRegisters() const { return number_of_registers_; }
    770   size_t GetNumberOfFpuRegisters() const { return number_of_fpu_registers_; }
    771 
    772   C GetRegisterAt(size_t index) const {
    773     DCHECK_LT(index, number_of_registers_);
    774     return registers_[index];
    775   }
    776 
    777   F GetFpuRegisterAt(size_t index) const {
    778     DCHECK_LT(index, number_of_fpu_registers_);
    779     return fpu_registers_[index];
    780   }
    781 
    782   size_t GetStackOffsetOf(size_t index) const {
    783     // We still reserve the space for parameters passed by registers.
    784     // Add space for the method pointer.
    785     return static_cast<size_t>(pointer_size_) + index * kVRegSize;
    786   }
    787 
    788  private:
    789   const C* registers_;
    790   const size_t number_of_registers_;
    791   const F* fpu_registers_;
    792   const size_t number_of_fpu_registers_;
    793   const PointerSize pointer_size_;
    794 
    795   DISALLOW_COPY_AND_ASSIGN(CallingConvention);
    796 };
    797 
    798 /**
    799  * A templated class SlowPathGenerator with a templated method NewSlowPath()
    800  * that can be used by any code generator to share equivalent slow-paths with
    801  * the objective of reducing generated code size.
    802  *
    803  * InstructionType:  instruction that requires SlowPathCodeType
    804  * SlowPathCodeType: subclass of SlowPathCode, with constructor SlowPathCodeType(InstructionType *)
    805  */
    806 template <typename InstructionType>
    807 class SlowPathGenerator {
    808   static_assert(std::is_base_of<HInstruction, InstructionType>::value,
    809                 "InstructionType is not a subclass of art::HInstruction");
    810 
    811  public:
    812   SlowPathGenerator(HGraph* graph, CodeGenerator* codegen)
    813       : graph_(graph),
    814         codegen_(codegen),
    815         slow_path_map_(std::less<uint32_t>(), graph->GetArena()->Adapter(kArenaAllocSlowPaths)) {}
    816 
    817   // Creates and adds a new slow-path, if needed, or returns existing one otherwise.
    818   // Templating the method (rather than the whole class) on the slow-path type enables
    819   // keeping this code at a generic, non architecture-specific place.
    820   //
    821   // NOTE: This approach assumes each InstructionType only generates one SlowPathCodeType.
    822   //       To relax this requirement, we would need some RTTI on the stored slow-paths,
    823   //       or template the class as a whole on SlowPathType.
    824   template <typename SlowPathCodeType>
    825   SlowPathCodeType* NewSlowPath(InstructionType* instruction) {
    826     static_assert(std::is_base_of<SlowPathCode, SlowPathCodeType>::value,
    827                   "SlowPathCodeType is not a subclass of art::SlowPathCode");
    828     static_assert(std::is_constructible<SlowPathCodeType, InstructionType*>::value,
    829                   "SlowPathCodeType is not constructible from InstructionType*");
    830     // Iterate over potential candidates for sharing. Currently, only same-typed
    831     // slow-paths with exactly the same dex-pc are viable candidates.
    832     // TODO: pass dex-pc/slow-path-type to run-time to allow even more sharing?
    833     const uint32_t dex_pc = instruction->GetDexPc();
    834     auto iter = slow_path_map_.find(dex_pc);
    835     if (iter != slow_path_map_.end()) {
    836       const ArenaVector<std::pair<InstructionType*, SlowPathCode*>>& candidates = iter->second;
    837       for (const auto& it : candidates) {
    838         InstructionType* other_instruction = it.first;
    839         SlowPathCodeType* other_slow_path = down_cast<SlowPathCodeType*>(it.second);
    840         // Determine if the instructions allow for slow-path sharing.
    841         if (HaveSameLiveRegisters(instruction, other_instruction) &&
    842             HaveSameStackMap(instruction, other_instruction)) {
    843           // Can share: reuse existing one.
    844           return other_slow_path;
    845         }
    846       }
    847     } else {
    848       // First time this dex-pc is seen.
    849       iter = slow_path_map_.Put(dex_pc, {{}, {graph_->GetArena()->Adapter(kArenaAllocSlowPaths)}});
    850     }
    851     // Cannot share: create and add new slow-path for this particular dex-pc.
    852     SlowPathCodeType* slow_path = new (graph_->GetArena()) SlowPathCodeType(instruction);
    853     iter->second.emplace_back(std::make_pair(instruction, slow_path));
    854     codegen_->AddSlowPath(slow_path);
    855     return slow_path;
    856   }
    857 
    858  private:
    859   // Tests if both instructions have same set of live physical registers. This ensures
    860   // the slow-path has exactly the same preamble on saving these registers to stack.
    861   bool HaveSameLiveRegisters(const InstructionType* i1, const InstructionType* i2) const {
    862     const uint32_t core_spill = ~codegen_->GetCoreSpillMask();
    863     const uint32_t fpu_spill = ~codegen_->GetFpuSpillMask();
    864     RegisterSet* live1 = i1->GetLocations()->GetLiveRegisters();
    865     RegisterSet* live2 = i2->GetLocations()->GetLiveRegisters();
    866     return (((live1->GetCoreRegisters() & core_spill) ==
    867              (live2->GetCoreRegisters() & core_spill)) &&
    868             ((live1->GetFloatingPointRegisters() & fpu_spill) ==
    869              (live2->GetFloatingPointRegisters() & fpu_spill)));
    870   }
    871 
    872   // Tests if both instructions have the same stack map. This ensures the interpreter
    873   // will find exactly the same dex-registers at the same entries.
    874   bool HaveSameStackMap(const InstructionType* i1, const InstructionType* i2) const {
    875     DCHECK(i1->HasEnvironment());
    876     DCHECK(i2->HasEnvironment());
    877     // We conservatively test if the two instructions find exactly the same instructions
    878     // and location in each dex-register. This guarantees they will have the same stack map.
    879     HEnvironment* e1 = i1->GetEnvironment();
    880     HEnvironment* e2 = i2->GetEnvironment();
    881     if (e1->GetParent() != e2->GetParent() || e1->Size() != e2->Size()) {
    882       return false;
    883     }
    884     for (size_t i = 0, sz = e1->Size(); i < sz; ++i) {
    885       if (e1->GetInstructionAt(i) != e2->GetInstructionAt(i) ||
    886           !e1->GetLocationAt(i).Equals(e2->GetLocationAt(i))) {
    887         return false;
    888       }
    889     }
    890     return true;
    891   }
    892 
    893   HGraph* const graph_;
    894   CodeGenerator* const codegen_;
    895 
    896   // Map from dex-pc to vector of already existing instruction/slow-path pairs.
    897   ArenaSafeMap<uint32_t, ArenaVector<std::pair<InstructionType*, SlowPathCode*>>> slow_path_map_;
    898 
    899   DISALLOW_COPY_AND_ASSIGN(SlowPathGenerator);
    900 };
    901 
    902 class InstructionCodeGenerator : public HGraphVisitor {
    903  public:
    904   InstructionCodeGenerator(HGraph* graph, CodeGenerator* codegen)
    905       : HGraphVisitor(graph),
    906         deopt_slow_paths_(graph, codegen) {}
    907 
    908  protected:
    909   // Add slow-path generator for each instruction/slow-path combination that desires sharing.
    910   // TODO: under current regime, only deopt sharing make sense; extend later.
    911   SlowPathGenerator<HDeoptimize> deopt_slow_paths_;
    912 };
    913 
    914 }  // namespace art
    915 
    916 #endif  // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_
    917