Home | History | Annotate | Download | only in optimizing
      1 /*
      2  * Copyright (C) 2014 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_
     18 #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_
     19 
     20 #include "arch/instruction_set.h"
     21 #include "arch/instruction_set_features.h"
     22 #include "base/arena_containers.h"
     23 #include "base/arena_object.h"
     24 #include "base/bit_field.h"
     25 #include "compiled_method.h"
     26 #include "driver/compiler_options.h"
     27 #include "globals.h"
     28 #include "graph_visualizer.h"
     29 #include "locations.h"
     30 #include "memory_region.h"
     31 #include "nodes.h"
     32 #include "optimizing_compiler_stats.h"
     33 #include "stack_map_stream.h"
     34 #include "utils/label.h"
     35 
     36 namespace art {
     37 
     38 // Binary encoding of 2^32 for type double.
     39 static int64_t constexpr k2Pow32EncodingForDouble = INT64_C(0x41F0000000000000);
     40 // Binary encoding of 2^31 for type double.
     41 static int64_t constexpr k2Pow31EncodingForDouble = INT64_C(0x41E0000000000000);
     42 
     43 // Minimum value for a primitive integer.
     44 static int32_t constexpr kPrimIntMin = 0x80000000;
     45 // Minimum value for a primitive long.
     46 static int64_t constexpr kPrimLongMin = INT64_C(0x8000000000000000);
     47 
     48 // Maximum value for a primitive integer.
     49 static int32_t constexpr kPrimIntMax = 0x7fffffff;
     50 // Maximum value for a primitive long.
     51 static int64_t constexpr kPrimLongMax = INT64_C(0x7fffffffffffffff);
     52 
     53 class Assembler;
     54 class CodeGenerator;
     55 class CompilerDriver;
     56 class LinkerPatch;
     57 class ParallelMoveResolver;
     58 
     59 class CodeAllocator {
     60  public:
     61   CodeAllocator() {}
     62   virtual ~CodeAllocator() {}
     63 
     64   virtual uint8_t* Allocate(size_t size) = 0;
     65 
     66  private:
     67   DISALLOW_COPY_AND_ASSIGN(CodeAllocator);
     68 };
     69 
     70 class SlowPathCode : public DeletableArenaObject<kArenaAllocSlowPaths> {
     71  public:
     72   explicit SlowPathCode(HInstruction* instruction) : instruction_(instruction) {
     73     for (size_t i = 0; i < kMaximumNumberOfExpectedRegisters; ++i) {
     74       saved_core_stack_offsets_[i] = kRegisterNotSaved;
     75       saved_fpu_stack_offsets_[i] = kRegisterNotSaved;
     76     }
     77   }
     78 
     79   virtual ~SlowPathCode() {}
     80 
     81   virtual void EmitNativeCode(CodeGenerator* codegen) = 0;
     82 
     83   virtual void SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations);
     84   virtual void RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations);
     85 
     86   bool IsCoreRegisterSaved(int reg) const {
     87     return saved_core_stack_offsets_[reg] != kRegisterNotSaved;
     88   }
     89 
     90   bool IsFpuRegisterSaved(int reg) const {
     91     return saved_fpu_stack_offsets_[reg] != kRegisterNotSaved;
     92   }
     93 
     94   uint32_t GetStackOffsetOfCoreRegister(int reg) const {
     95     return saved_core_stack_offsets_[reg];
     96   }
     97 
     98   uint32_t GetStackOffsetOfFpuRegister(int reg) const {
     99     return saved_fpu_stack_offsets_[reg];
    100   }
    101 
    102   virtual bool IsFatal() const { return false; }
    103 
    104   virtual const char* GetDescription() const = 0;
    105 
    106   Label* GetEntryLabel() { return &entry_label_; }
    107   Label* GetExitLabel() { return &exit_label_; }
    108 
    109   HInstruction* GetInstruction() const {
    110     return instruction_;
    111   }
    112 
    113   uint32_t GetDexPc() const {
    114     return instruction_ != nullptr ? instruction_->GetDexPc() : kNoDexPc;
    115   }
    116 
    117  protected:
    118   static constexpr size_t kMaximumNumberOfExpectedRegisters = 32;
    119   static constexpr uint32_t kRegisterNotSaved = -1;
    120   // The instruction where this slow path is happening.
    121   HInstruction* instruction_;
    122   uint32_t saved_core_stack_offsets_[kMaximumNumberOfExpectedRegisters];
    123   uint32_t saved_fpu_stack_offsets_[kMaximumNumberOfExpectedRegisters];
    124 
    125  private:
    126   Label entry_label_;
    127   Label exit_label_;
    128 
    129   DISALLOW_COPY_AND_ASSIGN(SlowPathCode);
    130 };
    131 
    132 class InvokeDexCallingConventionVisitor {
    133  public:
    134   virtual Location GetNextLocation(Primitive::Type type) = 0;
    135   virtual Location GetReturnLocation(Primitive::Type type) const = 0;
    136   virtual Location GetMethodLocation() const = 0;
    137 
    138  protected:
    139   InvokeDexCallingConventionVisitor() {}
    140   virtual ~InvokeDexCallingConventionVisitor() {}
    141 
    142   // The current index for core registers.
    143   uint32_t gp_index_ = 0u;
    144   // The current index for floating-point registers.
    145   uint32_t float_index_ = 0u;
    146   // The current stack index.
    147   uint32_t stack_index_ = 0u;
    148 
    149  private:
    150   DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor);
    151 };
    152 
    153 class FieldAccessCallingConvention {
    154  public:
    155   virtual Location GetObjectLocation() const = 0;
    156   virtual Location GetFieldIndexLocation() const = 0;
    157   virtual Location GetReturnLocation(Primitive::Type type) const = 0;
    158   virtual Location GetSetValueLocation(Primitive::Type type, bool is_instance) const = 0;
    159   virtual Location GetFpuLocation(Primitive::Type type) const = 0;
    160   virtual ~FieldAccessCallingConvention() {}
    161 
    162  protected:
    163   FieldAccessCallingConvention() {}
    164 
    165  private:
    166   DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConvention);
    167 };
    168 
    169 class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
    170  public:
    171   // Compiles the graph to executable instructions.
    172   void Compile(CodeAllocator* allocator);
    173   static std::unique_ptr<CodeGenerator> Create(HGraph* graph,
    174                                                InstructionSet instruction_set,
    175                                                const InstructionSetFeatures& isa_features,
    176                                                const CompilerOptions& compiler_options,
    177                                                OptimizingCompilerStats* stats = nullptr);
    178   virtual ~CodeGenerator() {}
    179 
    180   // Get the graph. This is the outermost graph, never the graph of a method being inlined.
    181   HGraph* GetGraph() const { return graph_; }
    182 
    183   HBasicBlock* GetNextBlockToEmit() const;
    184   HBasicBlock* FirstNonEmptyBlock(HBasicBlock* block) const;
    185   bool GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const;
    186 
    187   size_t GetStackSlotOfParameter(HParameterValue* parameter) const {
    188     // Note that this follows the current calling convention.
    189     return GetFrameSize()
    190         + InstructionSetPointerSize(GetInstructionSet())  // Art method
    191         + parameter->GetIndex() * kVRegSize;
    192   }
    193 
    194   virtual void Initialize() = 0;
    195   virtual void Finalize(CodeAllocator* allocator);
    196   virtual void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches);
    197   virtual void GenerateFrameEntry() = 0;
    198   virtual void GenerateFrameExit() = 0;
    199   virtual void Bind(HBasicBlock* block) = 0;
    200   virtual void MoveConstant(Location destination, int32_t value) = 0;
    201   virtual void MoveLocation(Location dst, Location src, Primitive::Type dst_type) = 0;
    202   virtual void AddLocationAsTemp(Location location, LocationSummary* locations) = 0;
    203 
    204   virtual Assembler* GetAssembler() = 0;
    205   virtual const Assembler& GetAssembler() const = 0;
    206   virtual size_t GetWordSize() const = 0;
    207   virtual size_t GetFloatingPointSpillSlotSize() const = 0;
    208   virtual uintptr_t GetAddressOf(HBasicBlock* block) = 0;
    209   void InitializeCodeGeneration(size_t number_of_spill_slots,
    210                                 size_t maximum_number_of_live_core_registers,
    211                                 size_t maximum_number_of_live_fpu_registers,
    212                                 size_t number_of_out_slots,
    213                                 const ArenaVector<HBasicBlock*>& block_order);
    214 
    215   uint32_t GetFrameSize() const { return frame_size_; }
    216   void SetFrameSize(uint32_t size) { frame_size_ = size; }
    217   uint32_t GetCoreSpillMask() const { return core_spill_mask_; }
    218   uint32_t GetFpuSpillMask() const { return fpu_spill_mask_; }
    219 
    220   size_t GetNumberOfCoreRegisters() const { return number_of_core_registers_; }
    221   size_t GetNumberOfFloatingPointRegisters() const { return number_of_fpu_registers_; }
    222   virtual void SetupBlockedRegisters() const = 0;
    223 
    224   virtual void ComputeSpillMask() {
    225     core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_;
    226     DCHECK_NE(core_spill_mask_, 0u) << "At least the return address register must be saved";
    227     fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_;
    228   }
    229 
    230   static uint32_t ComputeRegisterMask(const int* registers, size_t length) {
    231     uint32_t mask = 0;
    232     for (size_t i = 0, e = length; i < e; ++i) {
    233       mask |= (1 << registers[i]);
    234     }
    235     return mask;
    236   }
    237 
    238   virtual void DumpCoreRegister(std::ostream& stream, int reg) const = 0;
    239   virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const = 0;
    240   virtual InstructionSet GetInstructionSet() const = 0;
    241 
    242   const CompilerOptions& GetCompilerOptions() const { return compiler_options_; }
    243 
    244   void MaybeRecordStat(MethodCompilationStat compilation_stat, size_t count = 1) const;
    245 
    246   // Saves the register in the stack. Returns the size taken on stack.
    247   virtual size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) = 0;
    248   // Restores the register from the stack. Returns the size taken on stack.
    249   virtual size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) = 0;
    250 
    251   virtual size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0;
    252   virtual size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0;
    253 
    254   virtual bool NeedsTwoRegisters(Primitive::Type type) const = 0;
    255   // Returns whether we should split long moves in parallel moves.
    256   virtual bool ShouldSplitLongMoves() const { return false; }
    257 
    258   size_t GetNumberOfCoreCalleeSaveRegisters() const {
    259     return POPCOUNT(core_callee_save_mask_);
    260   }
    261 
    262   size_t GetNumberOfCoreCallerSaveRegisters() const {
    263     DCHECK_GE(GetNumberOfCoreRegisters(), GetNumberOfCoreCalleeSaveRegisters());
    264     return GetNumberOfCoreRegisters() - GetNumberOfCoreCalleeSaveRegisters();
    265   }
    266 
    267   bool IsCoreCalleeSaveRegister(int reg) const {
    268     return (core_callee_save_mask_ & (1 << reg)) != 0;
    269   }
    270 
    271   bool IsFloatingPointCalleeSaveRegister(int reg) const {
    272     return (fpu_callee_save_mask_ & (1 << reg)) != 0;
    273   }
    274 
    275   // Record native to dex mapping for a suspend point.  Required by runtime.
    276   void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc, SlowPathCode* slow_path = nullptr);
    277   // Check whether we have already recorded mapping at this PC.
    278   bool HasStackMapAtCurrentPc();
    279   // Record extra stack maps if we support native debugging.
    280   void MaybeRecordNativeDebugInfo(HInstruction* instruction,
    281                                   uint32_t dex_pc,
    282                                   SlowPathCode* slow_path = nullptr);
    283 
    284   bool CanMoveNullCheckToUser(HNullCheck* null_check);
    285   void MaybeRecordImplicitNullCheck(HInstruction* instruction);
    286   void GenerateNullCheck(HNullCheck* null_check);
    287   virtual void GenerateImplicitNullCheck(HNullCheck* null_check) = 0;
    288   virtual void GenerateExplicitNullCheck(HNullCheck* null_check) = 0;
    289 
    290   // Records a stack map which the runtime might use to set catch phi values
    291   // during exception delivery.
    292   // TODO: Replace with a catch-entering instruction that records the environment.
    293   void RecordCatchBlockInfo();
    294 
    295   // Returns true if implicit null checks are allowed in the compiler options
    296   // and if the null check is not inside a try block. We currently cannot do
    297   // implicit null checks in that case because we need the NullCheckSlowPath to
    298   // save live registers, which may be needed by the runtime to set catch phis.
    299   bool IsImplicitNullCheckAllowed(HNullCheck* null_check) const;
    300 
    301   // TODO: Avoid creating the `std::unique_ptr` here.
    302   void AddSlowPath(SlowPathCode* slow_path) {
    303     slow_paths_.push_back(std::unique_ptr<SlowPathCode>(slow_path));
    304   }
    305 
    306   void BuildStackMaps(MemoryRegion region, const DexFile::CodeItem& code_item);
    307   size_t ComputeStackMapsSize();
    308 
    309   bool IsLeafMethod() const {
    310     return is_leaf_;
    311   }
    312 
    313   void MarkNotLeaf() {
    314     is_leaf_ = false;
    315     requires_current_method_ = true;
    316   }
    317 
    318   void SetRequiresCurrentMethod() {
    319     requires_current_method_ = true;
    320   }
    321 
    322   bool RequiresCurrentMethod() const {
    323     return requires_current_method_;
    324   }
    325 
    326   // Clears the spill slots taken by loop phis in the `LocationSummary` of the
    327   // suspend check. This is called when the code generator generates code
    328   // for the suspend check at the back edge (instead of where the suspend check
    329   // is, which is the loop entry). At this point, the spill slots for the phis
    330   // have not been written to.
    331   void ClearSpillSlotsFromLoopPhisInStackMap(HSuspendCheck* suspend_check) const;
    332 
    333   bool* GetBlockedCoreRegisters() const { return blocked_core_registers_; }
    334   bool* GetBlockedFloatingPointRegisters() const { return blocked_fpu_registers_; }
    335 
    336   // Helper that returns the pointer offset of an index in an object array.
    337   // Note: this method assumes we always have the same pointer size, regardless
    338   // of the architecture.
    339   static size_t GetCacheOffset(uint32_t index);
    340   // Pointer variant for ArtMethod and ArtField arrays.
    341   size_t GetCachePointerOffset(uint32_t index);
    342 
    343   void EmitParallelMoves(Location from1,
    344                          Location to1,
    345                          Primitive::Type type1,
    346                          Location from2,
    347                          Location to2,
    348                          Primitive::Type type2);
    349 
    350   static bool StoreNeedsWriteBarrier(Primitive::Type type, HInstruction* value) {
    351     // Check that null value is not represented as an integer constant.
    352     DCHECK(type != Primitive::kPrimNot || !value->IsIntConstant());
    353     return type == Primitive::kPrimNot && !value->IsNullConstant();
    354   }
    355 
    356   void ValidateInvokeRuntime(HInstruction* instruction, SlowPathCode* slow_path);
    357 
    358   void AddAllocatedRegister(Location location) {
    359     allocated_registers_.Add(location);
    360   }
    361 
    362   bool HasAllocatedRegister(bool is_core, int reg) const {
    363     return is_core
    364         ? allocated_registers_.ContainsCoreRegister(reg)
    365         : allocated_registers_.ContainsFloatingPointRegister(reg);
    366   }
    367 
    368   void AllocateLocations(HInstruction* instruction);
    369 
    370   // Tells whether the stack frame of the compiled method is
    371   // considered "empty", that is either actually having a size of zero,
    372   // or just containing the saved return address register.
    373   bool HasEmptyFrame() const {
    374     return GetFrameSize() == (CallPushesPC() ? GetWordSize() : 0);
    375   }
    376 
    377   static int32_t GetInt32ValueOf(HConstant* constant) {
    378     if (constant->IsIntConstant()) {
    379       return constant->AsIntConstant()->GetValue();
    380     } else if (constant->IsNullConstant()) {
    381       return 0;
    382     } else {
    383       DCHECK(constant->IsFloatConstant());
    384       return bit_cast<int32_t, float>(constant->AsFloatConstant()->GetValue());
    385     }
    386   }
    387 
    388   static int64_t GetInt64ValueOf(HConstant* constant) {
    389     if (constant->IsIntConstant()) {
    390       return constant->AsIntConstant()->GetValue();
    391     } else if (constant->IsNullConstant()) {
    392       return 0;
    393     } else if (constant->IsFloatConstant()) {
    394       return bit_cast<int32_t, float>(constant->AsFloatConstant()->GetValue());
    395     } else if (constant->IsLongConstant()) {
    396       return constant->AsLongConstant()->GetValue();
    397     } else {
    398       DCHECK(constant->IsDoubleConstant());
    399       return bit_cast<int64_t, double>(constant->AsDoubleConstant()->GetValue());
    400     }
    401   }
    402 
    403   size_t GetFirstRegisterSlotInSlowPath() const {
    404     return first_register_slot_in_slow_path_;
    405   }
    406 
    407   uint32_t FrameEntrySpillSize() const {
    408     return GetFpuSpillSize() + GetCoreSpillSize();
    409   }
    410 
    411   virtual ParallelMoveResolver* GetMoveResolver() = 0;
    412 
    413   static void CreateCommonInvokeLocationSummary(
    414       HInvoke* invoke, InvokeDexCallingConventionVisitor* visitor);
    415 
    416   void GenerateInvokeUnresolvedRuntimeCall(HInvokeUnresolved* invoke);
    417 
    418   void CreateUnresolvedFieldLocationSummary(
    419       HInstruction* field_access,
    420       Primitive::Type field_type,
    421       const FieldAccessCallingConvention& calling_convention);
    422 
    423   void GenerateUnresolvedFieldAccess(
    424       HInstruction* field_access,
    425       Primitive::Type field_type,
    426       uint32_t field_index,
    427       uint32_t dex_pc,
    428       const FieldAccessCallingConvention& calling_convention);
    429 
    430   // TODO: This overlaps a bit with MoveFromReturnRegister. Refactor for a better design.
    431   static void CreateLoadClassLocationSummary(HLoadClass* cls,
    432                                              Location runtime_type_index_location,
    433                                              Location runtime_return_location,
    434                                              bool code_generator_supports_read_barrier = false);
    435 
    436   static void CreateSystemArrayCopyLocationSummary(HInvoke* invoke);
    437 
    438   void SetDisassemblyInformation(DisassemblyInformation* info) { disasm_info_ = info; }
    439   DisassemblyInformation* GetDisassemblyInformation() const { return disasm_info_; }
    440 
    441   virtual void InvokeRuntime(QuickEntrypointEnum entrypoint,
    442                              HInstruction* instruction,
    443                              uint32_t dex_pc,
    444                              SlowPathCode* slow_path) = 0;
    445 
    446   // Check if the desired_string_load_kind is supported. If it is, return it,
    447   // otherwise return a fall-back info that should be used instead.
    448   virtual HLoadString::LoadKind GetSupportedLoadStringKind(
    449       HLoadString::LoadKind desired_string_load_kind) = 0;
    450 
    451   // Check if the desired_dispatch_info is supported. If it is, return it,
    452   // otherwise return a fall-back info that should be used instead.
    453   virtual HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
    454       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
    455       MethodReference target_method) = 0;
    456 
    457   // Generate a call to a static or direct method.
    458   virtual void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) = 0;
    459   // Generate a call to a virtual method.
    460   virtual void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) = 0;
    461 
    462   // Copy the result of a call into the given target.
    463   virtual void MoveFromReturnRegister(Location trg, Primitive::Type type) = 0;
    464 
    465   virtual void GenerateNop() = 0;
    466 
    467  protected:
    468   // Method patch info used for recording locations of required linker patches and
    469   // target methods. The target method can be used for various purposes, whether for
    470   // patching the address of the method or the code pointer or a PC-relative call.
    471   template <typename LabelType>
    472   struct MethodPatchInfo {
    473     explicit MethodPatchInfo(MethodReference m) : target_method(m), label() { }
    474 
    475     MethodReference target_method;
    476     LabelType label;
    477   };
    478 
    479   // String patch info used for recording locations of required linker patches and
    480   // target strings. The actual string address can be absolute or PC-relative.
    481   template <typename LabelType>
    482   struct StringPatchInfo {
    483     StringPatchInfo(const DexFile& df, uint32_t index)
    484         : dex_file(df), string_index(index), label() { }
    485 
    486     const DexFile& dex_file;
    487     uint32_t string_index;
    488     LabelType label;
    489   };
    490 
    491   CodeGenerator(HGraph* graph,
    492                 size_t number_of_core_registers,
    493                 size_t number_of_fpu_registers,
    494                 size_t number_of_register_pairs,
    495                 uint32_t core_callee_save_mask,
    496                 uint32_t fpu_callee_save_mask,
    497                 const CompilerOptions& compiler_options,
    498                 OptimizingCompilerStats* stats)
    499       : frame_size_(0),
    500         core_spill_mask_(0),
    501         fpu_spill_mask_(0),
    502         first_register_slot_in_slow_path_(0),
    503         blocked_core_registers_(graph->GetArena()->AllocArray<bool>(number_of_core_registers,
    504                                                                     kArenaAllocCodeGenerator)),
    505         blocked_fpu_registers_(graph->GetArena()->AllocArray<bool>(number_of_fpu_registers,
    506                                                                    kArenaAllocCodeGenerator)),
    507         blocked_register_pairs_(graph->GetArena()->AllocArray<bool>(number_of_register_pairs,
    508                                                                     kArenaAllocCodeGenerator)),
    509         number_of_core_registers_(number_of_core_registers),
    510         number_of_fpu_registers_(number_of_fpu_registers),
    511         number_of_register_pairs_(number_of_register_pairs),
    512         core_callee_save_mask_(core_callee_save_mask),
    513         fpu_callee_save_mask_(fpu_callee_save_mask),
    514         stack_map_stream_(graph->GetArena()),
    515         block_order_(nullptr),
    516         disasm_info_(nullptr),
    517         stats_(stats),
    518         graph_(graph),
    519         compiler_options_(compiler_options),
    520         slow_paths_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
    521         current_slow_path_(nullptr),
    522         current_block_index_(0),
    523         is_leaf_(true),
    524         requires_current_method_(false) {
    525     slow_paths_.reserve(8);
    526   }
    527 
    528   virtual HGraphVisitor* GetLocationBuilder() = 0;
    529   virtual HGraphVisitor* GetInstructionVisitor() = 0;
    530 
    531   // Returns the location of the first spilled entry for floating point registers,
    532   // relative to the stack pointer.
    533   uint32_t GetFpuSpillStart() const {
    534     return GetFrameSize() - FrameEntrySpillSize();
    535   }
    536 
    537   uint32_t GetFpuSpillSize() const {
    538     return POPCOUNT(fpu_spill_mask_) * GetFloatingPointSpillSlotSize();
    539   }
    540 
    541   uint32_t GetCoreSpillSize() const {
    542     return POPCOUNT(core_spill_mask_) * GetWordSize();
    543   }
    544 
    545   bool HasAllocatedCalleeSaveRegisters() const {
    546     // We check the core registers against 1 because it always comprises the return PC.
    547     return (POPCOUNT(allocated_registers_.GetCoreRegisters() & core_callee_save_mask_) != 1)
    548       || (POPCOUNT(allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_) != 0);
    549   }
    550 
    551   bool CallPushesPC() const {
    552     InstructionSet instruction_set = GetInstructionSet();
    553     return instruction_set == kX86 || instruction_set == kX86_64;
    554   }
    555 
    556   // Arm64 has its own type for a label, so we need to templatize these methods
    557   // to share the logic.
    558 
    559   template <typename LabelType>
    560   LabelType* CommonInitializeLabels() {
    561     // We use raw array allocations instead of ArenaVector<> because Labels are
    562     // non-constructible and non-movable and as such cannot be held in a vector.
    563     size_t size = GetGraph()->GetBlocks().size();
    564     LabelType* labels = GetGraph()->GetArena()->AllocArray<LabelType>(size,
    565                                                                       kArenaAllocCodeGenerator);
    566     for (size_t i = 0; i != size; ++i) {
    567       new(labels + i) LabelType();
    568     }
    569     return labels;
    570   }
    571 
    572   template <typename LabelType>
    573   LabelType* CommonGetLabelOf(LabelType* raw_pointer_to_labels_array, HBasicBlock* block) const {
    574     block = FirstNonEmptyBlock(block);
    575     return raw_pointer_to_labels_array + block->GetBlockId();
    576   }
    577 
    578   SlowPathCode* GetCurrentSlowPath() {
    579     return current_slow_path_;
    580   }
    581 
    582   // Frame size required for this method.
    583   uint32_t frame_size_;
    584   uint32_t core_spill_mask_;
    585   uint32_t fpu_spill_mask_;
    586   uint32_t first_register_slot_in_slow_path_;
    587 
    588   // Registers that were allocated during linear scan.
    589   RegisterSet allocated_registers_;
    590 
    591   // Arrays used when doing register allocation to know which
    592   // registers we can allocate. `SetupBlockedRegisters` updates the
    593   // arrays.
    594   bool* const blocked_core_registers_;
    595   bool* const blocked_fpu_registers_;
    596   bool* const blocked_register_pairs_;
    597   size_t number_of_core_registers_;
    598   size_t number_of_fpu_registers_;
    599   size_t number_of_register_pairs_;
    600   const uint32_t core_callee_save_mask_;
    601   const uint32_t fpu_callee_save_mask_;
    602 
    603   StackMapStream stack_map_stream_;
    604 
    605   // The order to use for code generation.
    606   const ArenaVector<HBasicBlock*>* block_order_;
    607 
    608   DisassemblyInformation* disasm_info_;
    609 
    610  private:
    611   size_t GetStackOffsetOfSavedRegister(size_t index);
    612   void GenerateSlowPaths();
    613   void BlockIfInRegister(Location location, bool is_out = false) const;
    614   void EmitEnvironment(HEnvironment* environment, SlowPathCode* slow_path);
    615 
    616   OptimizingCompilerStats* stats_;
    617 
    618   HGraph* const graph_;
    619   const CompilerOptions& compiler_options_;
    620 
    621   ArenaVector<std::unique_ptr<SlowPathCode>> slow_paths_;
    622 
    623   // The current slow-path that we're generating code for.
    624   SlowPathCode* current_slow_path_;
    625 
    626   // The current block index in `block_order_` of the block
    627   // we are generating code for.
    628   size_t current_block_index_;
    629 
    630   // Whether the method is a leaf method.
    631   bool is_leaf_;
    632 
    633   // Whether an instruction in the graph accesses the current method.
    634   bool requires_current_method_;
    635 
    636   friend class OptimizingCFITest;
    637 
    638   DISALLOW_COPY_AND_ASSIGN(CodeGenerator);
    639 };
    640 
    641 template <typename C, typename F>
    642 class CallingConvention {
    643  public:
    644   CallingConvention(const C* registers,
    645                     size_t number_of_registers,
    646                     const F* fpu_registers,
    647                     size_t number_of_fpu_registers,
    648                     size_t pointer_size)
    649       : registers_(registers),
    650         number_of_registers_(number_of_registers),
    651         fpu_registers_(fpu_registers),
    652         number_of_fpu_registers_(number_of_fpu_registers),
    653         pointer_size_(pointer_size) {}
    654 
    655   size_t GetNumberOfRegisters() const { return number_of_registers_; }
    656   size_t GetNumberOfFpuRegisters() const { return number_of_fpu_registers_; }
    657 
    658   C GetRegisterAt(size_t index) const {
    659     DCHECK_LT(index, number_of_registers_);
    660     return registers_[index];
    661   }
    662 
    663   F GetFpuRegisterAt(size_t index) const {
    664     DCHECK_LT(index, number_of_fpu_registers_);
    665     return fpu_registers_[index];
    666   }
    667 
    668   size_t GetStackOffsetOf(size_t index) const {
    669     // We still reserve the space for parameters passed by registers.
    670     // Add space for the method pointer.
    671     return pointer_size_ + index * kVRegSize;
    672   }
    673 
    674  private:
    675   const C* registers_;
    676   const size_t number_of_registers_;
    677   const F* fpu_registers_;
    678   const size_t number_of_fpu_registers_;
    679   const size_t pointer_size_;
    680 
    681   DISALLOW_COPY_AND_ASSIGN(CallingConvention);
    682 };
    683 
    684 /**
    685  * A templated class SlowPathGenerator with a templated method NewSlowPath()
    686  * that can be used by any code generator to share equivalent slow-paths with
    687  * the objective of reducing generated code size.
    688  *
    689  * InstructionType:  instruction that requires SlowPathCodeType
    690  * SlowPathCodeType: subclass of SlowPathCode, with constructor SlowPathCodeType(InstructionType *)
    691  */
    692 template <typename InstructionType>
    693 class SlowPathGenerator {
    694   static_assert(std::is_base_of<HInstruction, InstructionType>::value,
    695                 "InstructionType is not a subclass of art::HInstruction");
    696 
    697  public:
    698   SlowPathGenerator(HGraph* graph, CodeGenerator* codegen)
    699       : graph_(graph),
    700         codegen_(codegen),
    701         slow_path_map_(std::less<uint32_t>(), graph->GetArena()->Adapter(kArenaAllocSlowPaths)) {}
    702 
    703   // Creates and adds a new slow-path, if needed, or returns existing one otherwise.
    704   // Templating the method (rather than the whole class) on the slow-path type enables
    705   // keeping this code at a generic, non architecture-specific place.
    706   //
    707   // NOTE: This approach assumes each InstructionType only generates one SlowPathCodeType.
    708   //       To relax this requirement, we would need some RTTI on the stored slow-paths,
    709   //       or template the class as a whole on SlowPathType.
    710   template <typename SlowPathCodeType>
    711   SlowPathCodeType* NewSlowPath(InstructionType* instruction) {
    712     static_assert(std::is_base_of<SlowPathCode, SlowPathCodeType>::value,
    713                   "SlowPathCodeType is not a subclass of art::SlowPathCode");
    714     static_assert(std::is_constructible<SlowPathCodeType, InstructionType*>::value,
    715                   "SlowPathCodeType is not constructible from InstructionType*");
    716     // Iterate over potential candidates for sharing. Currently, only same-typed
    717     // slow-paths with exactly the same dex-pc are viable candidates.
    718     // TODO: pass dex-pc/slow-path-type to run-time to allow even more sharing?
    719     const uint32_t dex_pc = instruction->GetDexPc();
    720     auto iter = slow_path_map_.find(dex_pc);
    721     if (iter != slow_path_map_.end()) {
    722       auto candidates = iter->second;
    723       for (const auto& it : candidates) {
    724         InstructionType* other_instruction = it.first;
    725         SlowPathCodeType* other_slow_path = down_cast<SlowPathCodeType*>(it.second);
    726         // Determine if the instructions allow for slow-path sharing.
    727         if (HaveSameLiveRegisters(instruction, other_instruction) &&
    728             HaveSameStackMap(instruction, other_instruction)) {
    729           // Can share: reuse existing one.
    730           return other_slow_path;
    731         }
    732       }
    733     } else {
    734       // First time this dex-pc is seen.
    735       iter = slow_path_map_.Put(dex_pc, {{}, {graph_->GetArena()->Adapter(kArenaAllocSlowPaths)}});
    736     }
    737     // Cannot share: create and add new slow-path for this particular dex-pc.
    738     SlowPathCodeType* slow_path = new (graph_->GetArena()) SlowPathCodeType(instruction);
    739     iter->second.emplace_back(std::make_pair(instruction, slow_path));
    740     codegen_->AddSlowPath(slow_path);
    741     return slow_path;
    742   }
    743 
    744  private:
    745   // Tests if both instructions have same set of live physical registers. This ensures
    746   // the slow-path has exactly the same preamble on saving these registers to stack.
    747   bool HaveSameLiveRegisters(const InstructionType* i1, const InstructionType* i2) const {
    748     const uint32_t core_spill = ~codegen_->GetCoreSpillMask();
    749     const uint32_t fpu_spill = ~codegen_->GetFpuSpillMask();
    750     RegisterSet* live1 = i1->GetLocations()->GetLiveRegisters();
    751     RegisterSet* live2 = i2->GetLocations()->GetLiveRegisters();
    752     return (((live1->GetCoreRegisters() & core_spill) ==
    753              (live2->GetCoreRegisters() & core_spill)) &&
    754             ((live1->GetFloatingPointRegisters() & fpu_spill) ==
    755              (live2->GetFloatingPointRegisters() & fpu_spill)));
    756   }
    757 
    758   // Tests if both instructions have the same stack map. This ensures the interpreter
    759   // will find exactly the same dex-registers at the same entries.
    760   bool HaveSameStackMap(const InstructionType* i1, const InstructionType* i2) const {
    761     DCHECK(i1->HasEnvironment());
    762     DCHECK(i2->HasEnvironment());
    763     // We conservatively test if the two instructions find exactly the same instructions
    764     // and location in each dex-register. This guarantees they will have the same stack map.
    765     HEnvironment* e1 = i1->GetEnvironment();
    766     HEnvironment* e2 = i2->GetEnvironment();
    767     if (e1->GetParent() != e2->GetParent() || e1->Size() != e2->Size()) {
    768       return false;
    769     }
    770     for (size_t i = 0, sz = e1->Size(); i < sz; ++i) {
    771       if (e1->GetInstructionAt(i) != e2->GetInstructionAt(i) ||
    772           !e1->GetLocationAt(i).Equals(e2->GetLocationAt(i))) {
    773         return false;
    774       }
    775     }
    776     return true;
    777   }
    778 
    779   HGraph* const graph_;
    780   CodeGenerator* const codegen_;
    781 
    782   // Map from dex-pc to vector of already existing instruction/slow-path pairs.
    783   ArenaSafeMap<uint32_t, ArenaVector<std::pair<InstructionType*, SlowPathCode*>>> slow_path_map_;
    784 
    785   DISALLOW_COPY_AND_ASSIGN(SlowPathGenerator);
    786 };
    787 
    788 class InstructionCodeGenerator : public HGraphVisitor {
    789  public:
    790   InstructionCodeGenerator(HGraph* graph, CodeGenerator* codegen)
    791       : HGraphVisitor(graph),
    792         deopt_slow_paths_(graph, codegen) {}
    793 
    794  protected:
    795   // Add slow-path generator for each instruction/slow-path combination that desires sharing.
    796   // TODO: under current regime, only deopt sharing make sense; extend later.
    797   SlowPathGenerator<HDeoptimize> deopt_slow_paths_;
    798 };
    799 
    800 }  // namespace art
    801 
    802 #endif  // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_
    803