Home | History | Annotate | Download | only in optimizing
      1 /*
      2  * Copyright (C) 2014 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_H_
     18 #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_H_
     19 
     20 #include "arch/x86/instruction_set_features_x86.h"
     21 #include "base/enums.h"
     22 #include "code_generator.h"
     23 #include "dex/dex_file_types.h"
     24 #include "driver/compiler_options.h"
     25 #include "nodes.h"
     26 #include "parallel_move_resolver.h"
     27 #include "utils/x86/assembler_x86.h"
     28 
     29 namespace art {
     30 namespace x86 {
     31 
     32 // Use a local definition to prevent copying mistakes.
     33 static constexpr size_t kX86WordSize = static_cast<size_t>(kX86PointerSize);
     34 
     35 class CodeGeneratorX86;
     36 
     37 static constexpr Register kParameterCoreRegisters[] = { ECX, EDX, EBX };
     38 static constexpr RegisterPair kParameterCorePairRegisters[] = { ECX_EDX, EDX_EBX };
     39 static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters);
     40 static constexpr XmmRegister kParameterFpuRegisters[] = { XMM0, XMM1, XMM2, XMM3 };
     41 static constexpr size_t kParameterFpuRegistersLength = arraysize(kParameterFpuRegisters);
     42 
     43 static constexpr Register kRuntimeParameterCoreRegisters[] = { EAX, ECX, EDX, EBX };
     44 static constexpr size_t kRuntimeParameterCoreRegistersLength =
     45     arraysize(kRuntimeParameterCoreRegisters);
     46 static constexpr XmmRegister kRuntimeParameterFpuRegisters[] = { XMM0, XMM1, XMM2, XMM3 };
     47 static constexpr size_t kRuntimeParameterFpuRegistersLength =
     48     arraysize(kRuntimeParameterFpuRegisters);
     49 
     50 class InvokeRuntimeCallingConvention : public CallingConvention<Register, XmmRegister> {
     51  public:
     52   InvokeRuntimeCallingConvention()
     53       : CallingConvention(kRuntimeParameterCoreRegisters,
     54                           kRuntimeParameterCoreRegistersLength,
     55                           kRuntimeParameterFpuRegisters,
     56                           kRuntimeParameterFpuRegistersLength,
     57                           kX86PointerSize) {}
     58 
     59  private:
     60   DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConvention);
     61 };
     62 
     63 class InvokeDexCallingConvention : public CallingConvention<Register, XmmRegister> {
     64  public:
     65   InvokeDexCallingConvention() : CallingConvention(
     66       kParameterCoreRegisters,
     67       kParameterCoreRegistersLength,
     68       kParameterFpuRegisters,
     69       kParameterFpuRegistersLength,
     70       kX86PointerSize) {}
     71 
     72   RegisterPair GetRegisterPairAt(size_t argument_index) {
     73     DCHECK_LT(argument_index + 1, GetNumberOfRegisters());
     74     return kParameterCorePairRegisters[argument_index];
     75   }
     76 
     77  private:
     78   DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention);
     79 };
     80 
     81 class InvokeDexCallingConventionVisitorX86 : public InvokeDexCallingConventionVisitor {
     82  public:
     83   InvokeDexCallingConventionVisitorX86() {}
     84   virtual ~InvokeDexCallingConventionVisitorX86() {}
     85 
     86   Location GetNextLocation(DataType::Type type) override;
     87   Location GetReturnLocation(DataType::Type type) const override;
     88   Location GetMethodLocation() const override;
     89 
     90  private:
     91   InvokeDexCallingConvention calling_convention;
     92 
     93   DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitorX86);
     94 };
     95 
     96 class FieldAccessCallingConventionX86 : public FieldAccessCallingConvention {
     97  public:
     98   FieldAccessCallingConventionX86() {}
     99 
    100   Location GetObjectLocation() const override {
    101     return Location::RegisterLocation(ECX);
    102   }
    103   Location GetFieldIndexLocation() const override {
    104     return Location::RegisterLocation(EAX);
    105   }
    106   Location GetReturnLocation(DataType::Type type) const override {
    107     return DataType::Is64BitType(type)
    108         ? Location::RegisterPairLocation(EAX, EDX)
    109         : Location::RegisterLocation(EAX);
    110   }
    111   Location GetSetValueLocation(DataType::Type type, bool is_instance) const override {
    112     return DataType::Is64BitType(type)
    113         ? (is_instance
    114             ? Location::RegisterPairLocation(EDX, EBX)
    115             : Location::RegisterPairLocation(ECX, EDX))
    116         : (is_instance
    117             ? Location::RegisterLocation(EDX)
    118             : Location::RegisterLocation(ECX));
    119   }
    120   Location GetFpuLocation(DataType::Type type ATTRIBUTE_UNUSED) const override {
    121     return Location::FpuRegisterLocation(XMM0);
    122   }
    123 
    124  private:
    125   DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConventionX86);
    126 };
    127 
    128 class ParallelMoveResolverX86 : public ParallelMoveResolverWithSwap {
    129  public:
    130   ParallelMoveResolverX86(ArenaAllocator* allocator, CodeGeneratorX86* codegen)
    131       : ParallelMoveResolverWithSwap(allocator), codegen_(codegen) {}
    132 
    133   void EmitMove(size_t index) override;
    134   void EmitSwap(size_t index) override;
    135   void SpillScratch(int reg) override;
    136   void RestoreScratch(int reg) override;
    137 
    138   X86Assembler* GetAssembler() const;
    139 
    140  private:
    141   void Exchange(Register reg, int mem);
    142   void Exchange32(XmmRegister reg, int mem);
    143   void Exchange128(XmmRegister reg, int mem);
    144   void ExchangeMemory(int mem1, int mem2, int number_of_words);
    145   void MoveMemoryToMemory(int dst, int src, int number_of_words);
    146 
    147   CodeGeneratorX86* const codegen_;
    148 
    149   DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolverX86);
    150 };
    151 
    152 class LocationsBuilderX86 : public HGraphVisitor {
    153  public:
    154   LocationsBuilderX86(HGraph* graph, CodeGeneratorX86* codegen)
    155       : HGraphVisitor(graph), codegen_(codegen) {}
    156 
    157 #define DECLARE_VISIT_INSTRUCTION(name, super)     \
    158   void Visit##name(H##name* instr) override;
    159 
    160   FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION)
    161   FOR_EACH_CONCRETE_INSTRUCTION_X86(DECLARE_VISIT_INSTRUCTION)
    162   FOR_EACH_CONCRETE_INSTRUCTION_X86_COMMON(DECLARE_VISIT_INSTRUCTION)
    163 
    164 #undef DECLARE_VISIT_INSTRUCTION
    165 
    166   void VisitInstruction(HInstruction* instruction) override {
    167     LOG(FATAL) << "Unreachable instruction " << instruction->DebugName()
    168                << " (id " << instruction->GetId() << ")";
    169   }
    170 
    171  private:
    172   void HandleBitwiseOperation(HBinaryOperation* instruction);
    173   void HandleInvoke(HInvoke* invoke);
    174   void HandleCondition(HCondition* condition);
    175   void HandleShift(HBinaryOperation* instruction);
    176   void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info);
    177   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
    178 
    179   CodeGeneratorX86* const codegen_;
    180   InvokeDexCallingConventionVisitorX86 parameter_visitor_;
    181 
    182   DISALLOW_COPY_AND_ASSIGN(LocationsBuilderX86);
    183 };
    184 
    185 class InstructionCodeGeneratorX86 : public InstructionCodeGenerator {
    186  public:
    187   InstructionCodeGeneratorX86(HGraph* graph, CodeGeneratorX86* codegen);
    188 
    189 #define DECLARE_VISIT_INSTRUCTION(name, super)     \
    190   void Visit##name(H##name* instr) override;
    191 
    192   FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION)
    193   FOR_EACH_CONCRETE_INSTRUCTION_X86(DECLARE_VISIT_INSTRUCTION)
    194   FOR_EACH_CONCRETE_INSTRUCTION_X86_COMMON(DECLARE_VISIT_INSTRUCTION)
    195 
    196 #undef DECLARE_VISIT_INSTRUCTION
    197 
    198   void VisitInstruction(HInstruction* instruction) override {
    199     LOG(FATAL) << "Unreachable instruction " << instruction->DebugName()
    200                << " (id " << instruction->GetId() << ")";
    201   }
    202 
    203   X86Assembler* GetAssembler() const { return assembler_; }
    204 
    205   // The compare/jump sequence will generate about (1.5 * num_entries) instructions. A jump
    206   // table version generates 7 instructions and num_entries literals. Compare/jump sequence will
    207   // generates less code/data with a small num_entries.
    208   static constexpr uint32_t kPackedSwitchJumpTableThreshold = 5;
    209 
    210  private:
    211   // Generate code for the given suspend check. If not null, `successor`
    212   // is the block to branch to if the suspend check is not needed, and after
    213   // the suspend call.
    214   void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor);
    215   void GenerateClassInitializationCheck(SlowPathCode* slow_path, Register class_reg);
    216   void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, Register temp);
    217   void HandleBitwiseOperation(HBinaryOperation* instruction);
    218   void GenerateDivRemIntegral(HBinaryOperation* instruction);
    219   void DivRemOneOrMinusOne(HBinaryOperation* instruction);
    220   void DivByPowerOfTwo(HDiv* instruction);
    221   void RemByPowerOfTwo(HRem* instruction);
    222   void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
    223   void GenerateRemFP(HRem* rem);
    224   void HandleCondition(HCondition* condition);
    225   void HandleShift(HBinaryOperation* instruction);
    226   void GenerateShlLong(const Location& loc, Register shifter);
    227   void GenerateShrLong(const Location& loc, Register shifter);
    228   void GenerateUShrLong(const Location& loc, Register shifter);
    229   void GenerateShlLong(const Location& loc, int shift);
    230   void GenerateShrLong(const Location& loc, int shift);
    231   void GenerateUShrLong(const Location& loc, int shift);
    232   void GenerateMinMaxInt(LocationSummary* locations, bool is_min, DataType::Type type);
    233   void GenerateMinMaxFP(LocationSummary* locations, bool is_min, DataType::Type type);
    234   void GenerateMinMax(HBinaryOperation* minmax, bool is_min);
    235 
    236   void HandleFieldSet(HInstruction* instruction,
    237                       const FieldInfo& field_info,
    238                       bool value_can_be_null);
    239   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
    240 
    241   // Generate a heap reference load using one register `out`:
    242   //
    243   //   out <- *(out + offset)
    244   //
    245   // while honoring heap poisoning and/or read barriers (if any).
    246   //
    247   // Location `maybe_temp` is used when generating a read barrier and
    248   // shall be a register in that case; it may be an invalid location
    249   // otherwise.
    250   void GenerateReferenceLoadOneRegister(HInstruction* instruction,
    251                                         Location out,
    252                                         uint32_t offset,
    253                                         Location maybe_temp,
    254                                         ReadBarrierOption read_barrier_option);
    255   // Generate a heap reference load using two different registers
    256   // `out` and `obj`:
    257   //
    258   //   out <- *(obj + offset)
    259   //
    260   // while honoring heap poisoning and/or read barriers (if any).
    261   //
    262   // Location `maybe_temp` is used when generating a Baker's (fast
    263   // path) read barrier and shall be a register in that case; it may
    264   // be an invalid location otherwise.
    265   void GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
    266                                          Location out,
    267                                          Location obj,
    268                                          uint32_t offset,
    269                                          ReadBarrierOption read_barrier_option);
    270   // Generate a GC root reference load:
    271   //
    272   //   root <- *address
    273   //
    274   // while honoring read barriers based on read_barrier_option.
    275   void GenerateGcRootFieldLoad(HInstruction* instruction,
    276                                Location root,
    277                                const Address& address,
    278                                Label* fixup_label,
    279                                ReadBarrierOption read_barrier_option);
    280 
    281   // Push value to FPU stack. `is_fp` specifies whether the value is floating point or not.
    282   // `is_wide` specifies whether it is long/double or not.
    283   void PushOntoFPStack(Location source, uint32_t temp_offset,
    284                        uint32_t stack_adjustment, bool is_fp, bool is_wide);
    285 
    286   template<class LabelType>
    287   void GenerateTestAndBranch(HInstruction* instruction,
    288                              size_t condition_input_index,
    289                              LabelType* true_target,
    290                              LabelType* false_target);
    291   template<class LabelType>
    292   void GenerateCompareTestAndBranch(HCondition* condition,
    293                                     LabelType* true_target,
    294                                     LabelType* false_target);
    295   template<class LabelType>
    296   void GenerateFPJumps(HCondition* cond, LabelType* true_label, LabelType* false_label);
    297   template<class LabelType>
    298   void GenerateLongComparesAndJumps(HCondition* cond,
    299                                     LabelType* true_label,
    300                                     LabelType* false_label);
    301 
    302   void HandleGoto(HInstruction* got, HBasicBlock* successor);
    303   void GenPackedSwitchWithCompares(Register value_reg,
    304                                    int32_t lower_bound,
    305                                    uint32_t num_entries,
    306                                    HBasicBlock* switch_block,
    307                                    HBasicBlock* default_block);
    308 
    309   void GenerateFPCompare(Location lhs, Location rhs, HInstruction* insn, bool is_double);
    310 
    311   X86Assembler* const assembler_;
    312   CodeGeneratorX86* const codegen_;
    313 
    314   DISALLOW_COPY_AND_ASSIGN(InstructionCodeGeneratorX86);
    315 };
    316 
    317 class JumpTableRIPFixup;
    318 
    319 class CodeGeneratorX86 : public CodeGenerator {
    320  public:
    321   CodeGeneratorX86(HGraph* graph,
    322                    const CompilerOptions& compiler_options,
    323                    OptimizingCompilerStats* stats = nullptr);
    324   virtual ~CodeGeneratorX86() {}
    325 
    326   void GenerateFrameEntry() override;
    327   void GenerateFrameExit() override;
    328   void Bind(HBasicBlock* block) override;
    329   void MoveConstant(Location destination, int32_t value) override;
    330   void MoveLocation(Location dst, Location src, DataType::Type dst_type) override;
    331   void AddLocationAsTemp(Location location, LocationSummary* locations) override;
    332 
    333   size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) override;
    334   size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) override;
    335   size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) override;
    336   size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) override;
    337 
    338   // Generate code to invoke a runtime entry point.
    339   void InvokeRuntime(QuickEntrypointEnum entrypoint,
    340                      HInstruction* instruction,
    341                      uint32_t dex_pc,
    342                      SlowPathCode* slow_path = nullptr) override;
    343 
    344   // Generate code to invoke a runtime entry point, but do not record
    345   // PC-related information in a stack map.
    346   void InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
    347                                            HInstruction* instruction,
    348                                            SlowPathCode* slow_path);
    349 
    350   void GenerateInvokeRuntime(int32_t entry_point_offset);
    351 
    352   size_t GetWordSize() const override {
    353     return kX86WordSize;
    354   }
    355 
    356   size_t GetFloatingPointSpillSlotSize() const override {
    357     return GetGraph()->HasSIMD()
    358         ? 4 * kX86WordSize   // 16 bytes == 4 words for each spill
    359         : 2 * kX86WordSize;  //  8 bytes == 2 words for each spill
    360   }
    361 
    362   HGraphVisitor* GetLocationBuilder() override {
    363     return &location_builder_;
    364   }
    365 
    366   HGraphVisitor* GetInstructionVisitor() override {
    367     return &instruction_visitor_;
    368   }
    369 
    370   X86Assembler* GetAssembler() override {
    371     return &assembler_;
    372   }
    373 
    374   const X86Assembler& GetAssembler() const override {
    375     return assembler_;
    376   }
    377 
    378   uintptr_t GetAddressOf(HBasicBlock* block) override {
    379     return GetLabelOf(block)->Position();
    380   }
    381 
    382   void SetupBlockedRegisters() const override;
    383 
    384   void DumpCoreRegister(std::ostream& stream, int reg) const override;
    385   void DumpFloatingPointRegister(std::ostream& stream, int reg) const override;
    386 
    387   ParallelMoveResolverX86* GetMoveResolver() override {
    388     return &move_resolver_;
    389   }
    390 
    391   InstructionSet GetInstructionSet() const override {
    392     return InstructionSet::kX86;
    393   }
    394 
    395   const X86InstructionSetFeatures& GetInstructionSetFeatures() const;
    396 
    397   // Helper method to move a 32bits value between two locations.
    398   void Move32(Location destination, Location source);
    399   // Helper method to move a 64bits value between two locations.
    400   void Move64(Location destination, Location source);
    401 
    402   // Check if the desired_string_load_kind is supported. If it is, return it,
    403   // otherwise return a fall-back kind that should be used instead.
    404   HLoadString::LoadKind GetSupportedLoadStringKind(
    405       HLoadString::LoadKind desired_string_load_kind) override;
    406 
    407   // Check if the desired_class_load_kind is supported. If it is, return it,
    408   // otherwise return a fall-back kind that should be used instead.
    409   HLoadClass::LoadKind GetSupportedLoadClassKind(
    410       HLoadClass::LoadKind desired_class_load_kind) override;
    411 
    412   // Check if the desired_dispatch_info is supported. If it is, return it,
    413   // otherwise return a fall-back info that should be used instead.
    414   HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
    415       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
    416       ArtMethod* method) override;
    417 
    418   // Generate a call to a static or direct method.
    419   void GenerateStaticOrDirectCall(
    420       HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) override;
    421   // Generate a call to a virtual method.
    422   void GenerateVirtualCall(
    423       HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) override;
    424 
    425   void RecordBootImageIntrinsicPatch(HX86ComputeBaseMethodAddress* method_address,
    426                                      uint32_t intrinsic_data);
    427   void RecordBootImageRelRoPatch(HX86ComputeBaseMethodAddress* method_address,
    428                                  uint32_t boot_image_offset);
    429   void RecordBootImageMethodPatch(HInvokeStaticOrDirect* invoke);
    430   void RecordMethodBssEntryPatch(HInvokeStaticOrDirect* invoke);
    431   void RecordBootImageTypePatch(HLoadClass* load_class);
    432   Label* NewTypeBssEntryPatch(HLoadClass* load_class);
    433   void RecordBootImageStringPatch(HLoadString* load_string);
    434   Label* NewStringBssEntryPatch(HLoadString* load_string);
    435 
    436   void LoadBootImageAddress(Register reg,
    437                             uint32_t boot_image_reference,
    438                             HInvokeStaticOrDirect* invoke);
    439   void AllocateInstanceForIntrinsic(HInvokeStaticOrDirect* invoke, uint32_t boot_image_offset);
    440 
    441   Label* NewJitRootStringPatch(const DexFile& dex_file,
    442                                dex::StringIndex string_index,
    443                                Handle<mirror::String> handle);
    444   Label* NewJitRootClassPatch(const DexFile& dex_file,
    445                               dex::TypeIndex type_index,
    446                               Handle<mirror::Class> handle);
    447 
    448   void MoveFromReturnRegister(Location trg, DataType::Type type) override;
    449 
    450   // Emit linker patches.
    451   void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) override;
    452 
    453   void PatchJitRootUse(uint8_t* code,
    454                        const uint8_t* roots_data,
    455                        const PatchInfo<Label>& info,
    456                        uint64_t index_in_table) const;
    457   void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) override;
    458 
    459   // Emit a write barrier.
    460   void MarkGCCard(Register temp,
    461                   Register card,
    462                   Register object,
    463                   Register value,
    464                   bool value_can_be_null);
    465 
    466   void GenerateMemoryBarrier(MemBarrierKind kind);
    467 
    468   Label* GetLabelOf(HBasicBlock* block) const {
    469     return CommonGetLabelOf<Label>(block_labels_, block);
    470   }
    471 
    472   void Initialize() override {
    473     block_labels_ = CommonInitializeLabels<Label>();
    474   }
    475 
    476   bool NeedsTwoRegisters(DataType::Type type) const override {
    477     return type == DataType::Type::kInt64;
    478   }
    479 
    480   bool ShouldSplitLongMoves() const override { return true; }
    481 
    482   Label* GetFrameEntryLabel() { return &frame_entry_label_; }
    483 
    484   void AddMethodAddressOffset(HX86ComputeBaseMethodAddress* method_base, int32_t offset) {
    485     method_address_offset_.Put(method_base->GetId(), offset);
    486   }
    487 
    488   int32_t GetMethodAddressOffset(HX86ComputeBaseMethodAddress* method_base) const {
    489     return method_address_offset_.Get(method_base->GetId());
    490   }
    491 
    492   int32_t ConstantAreaStart() const {
    493     return constant_area_start_;
    494   }
    495 
    496   Address LiteralDoubleAddress(double v, HX86ComputeBaseMethodAddress* method_base, Register reg);
    497   Address LiteralFloatAddress(float v, HX86ComputeBaseMethodAddress* method_base, Register reg);
    498   Address LiteralInt32Address(int32_t v, HX86ComputeBaseMethodAddress* method_base, Register reg);
    499   Address LiteralInt64Address(int64_t v, HX86ComputeBaseMethodAddress* method_base, Register reg);
    500 
    501   // Load a 32-bit value into a register in the most efficient manner.
    502   void Load32BitValue(Register dest, int32_t value);
    503 
    504   // Compare a register with a 32-bit value in the most efficient manner.
    505   void Compare32BitValue(Register dest, int32_t value);
    506 
    507   // Compare int values. Supports only register locations for `lhs`.
    508   void GenerateIntCompare(Location lhs, Location rhs);
    509   void GenerateIntCompare(Register lhs, Location rhs);
    510 
    511   // Construct address for array access.
    512   static Address ArrayAddress(Register obj,
    513                               Location index,
    514                               ScaleFactor scale,
    515                               uint32_t data_offset);
    516 
    517   Address LiteralCaseTable(HX86PackedSwitch* switch_instr, Register reg, Register value);
    518 
    519   void Finalize(CodeAllocator* allocator) override;
    520 
    521   // Fast path implementation of ReadBarrier::Barrier for a heap
    522   // reference field load when Baker's read barriers are used.
    523   void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
    524                                              Location ref,
    525                                              Register obj,
    526                                              uint32_t offset,
    527                                              bool needs_null_check);
    528   // Fast path implementation of ReadBarrier::Barrier for a heap
    529   // reference array load when Baker's read barriers are used.
    530   void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
    531                                              Location ref,
    532                                              Register obj,
    533                                              uint32_t data_offset,
    534                                              Location index,
    535                                              bool needs_null_check);
    536   // Factored implementation, used by GenerateFieldLoadWithBakerReadBarrier,
    537   // GenerateArrayLoadWithBakerReadBarrier and some intrinsics.
    538   //
    539   // Load the object reference located at address `src`, held by
    540   // object `obj`, into `ref`, and mark it if needed.  The base of
    541   // address `src` must be `obj`.
    542   //
    543   // If `always_update_field` is true, the value of the reference is
    544   // atomically updated in the holder (`obj`).  This operation
    545   // requires a temporary register, which must be provided as a
    546   // non-null pointer (`temp`).
    547   void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
    548                                                  Location ref,
    549                                                  Register obj,
    550                                                  const Address& src,
    551                                                  bool needs_null_check,
    552                                                  bool always_update_field = false,
    553                                                  Register* temp = nullptr);
    554 
    555   // Generate a read barrier for a heap reference within `instruction`
    556   // using a slow path.
    557   //
    558   // A read barrier for an object reference read from the heap is
    559   // implemented as a call to the artReadBarrierSlow runtime entry
    560   // point, which is passed the values in locations `ref`, `obj`, and
    561   // `offset`:
    562   //
    563   //   mirror::Object* artReadBarrierSlow(mirror::Object* ref,
    564   //                                      mirror::Object* obj,
    565   //                                      uint32_t offset);
    566   //
    567   // The `out` location contains the value returned by
    568   // artReadBarrierSlow.
    569   //
    570   // When `index` is provided (i.e. for array accesses), the offset
    571   // value passed to artReadBarrierSlow is adjusted to take `index`
    572   // into account.
    573   void GenerateReadBarrierSlow(HInstruction* instruction,
    574                                Location out,
    575                                Location ref,
    576                                Location obj,
    577                                uint32_t offset,
    578                                Location index = Location::NoLocation());
    579 
    580   // If read barriers are enabled, generate a read barrier for a heap
    581   // reference using a slow path. If heap poisoning is enabled, also
    582   // unpoison the reference in `out`.
    583   void MaybeGenerateReadBarrierSlow(HInstruction* instruction,
    584                                     Location out,
    585                                     Location ref,
    586                                     Location obj,
    587                                     uint32_t offset,
    588                                     Location index = Location::NoLocation());
    589 
    590   // Generate a read barrier for a GC root within `instruction` using
    591   // a slow path.
    592   //
    593   // A read barrier for an object reference GC root is implemented as
    594   // a call to the artReadBarrierForRootSlow runtime entry point,
    595   // which is passed the value in location `root`:
    596   //
    597   //   mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root);
    598   //
    599   // The `out` location contains the value returned by
    600   // artReadBarrierForRootSlow.
    601   void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root);
    602 
    603   // Ensure that prior stores complete to memory before subsequent loads.
    604   // The locked add implementation will avoid serializing device memory, but will
    605   // touch (but not change) the top of the stack.
    606   // The 'non_temporal' parameter should be used to ensure ordering of non-temporal stores.
    607   void MemoryFence(bool non_temporal = false) {
    608     if (!non_temporal) {
    609       assembler_.lock()->addl(Address(ESP, 0), Immediate(0));
    610     } else {
    611       assembler_.mfence();
    612     }
    613   }
    614 
    615   void GenerateNop() override;
    616   void GenerateImplicitNullCheck(HNullCheck* instruction) override;
    617   void GenerateExplicitNullCheck(HNullCheck* instruction) override;
    618 
    619   // When we don't know the proper offset for the value, we use kDummy32BitOffset.
    620   // The correct value will be inserted when processing Assembler fixups.
    621   static constexpr int32_t kDummy32BitOffset = 256;
    622 
    623  private:
    624   struct X86PcRelativePatchInfo : PatchInfo<Label> {
    625     X86PcRelativePatchInfo(HX86ComputeBaseMethodAddress* address,
    626                            const DexFile* target_dex_file,
    627                            uint32_t target_index)
    628         : PatchInfo(target_dex_file, target_index),
    629           method_address(address) {}
    630     HX86ComputeBaseMethodAddress* method_address;
    631   };
    632 
    633   template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
    634   void EmitPcRelativeLinkerPatches(const ArenaDeque<X86PcRelativePatchInfo>& infos,
    635                                    ArenaVector<linker::LinkerPatch>* linker_patches);
    636 
    637   Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp);
    638 
    639   // Labels for each block that will be compiled.
    640   Label* block_labels_;  // Indexed by block id.
    641   Label frame_entry_label_;
    642   LocationsBuilderX86 location_builder_;
    643   InstructionCodeGeneratorX86 instruction_visitor_;
    644   ParallelMoveResolverX86 move_resolver_;
    645   X86Assembler assembler_;
    646 
    647   // PC-relative method patch info for kBootImageLinkTimePcRelative/kBootImageRelRo.
    648   // Also used for type/string patches for kBootImageRelRo (same linker patch as for methods).
    649   ArenaDeque<X86PcRelativePatchInfo> boot_image_method_patches_;
    650   // PC-relative method patch info for kBssEntry.
    651   ArenaDeque<X86PcRelativePatchInfo> method_bss_entry_patches_;
    652   // PC-relative type patch info for kBootImageLinkTimePcRelative.
    653   ArenaDeque<X86PcRelativePatchInfo> boot_image_type_patches_;
    654   // PC-relative type patch info for kBssEntry.
    655   ArenaDeque<X86PcRelativePatchInfo> type_bss_entry_patches_;
    656   // PC-relative String patch info for kBootImageLinkTimePcRelative.
    657   ArenaDeque<X86PcRelativePatchInfo> boot_image_string_patches_;
    658   // PC-relative String patch info for kBssEntry.
    659   ArenaDeque<X86PcRelativePatchInfo> string_bss_entry_patches_;
    660   // PC-relative patch info for IntrinsicObjects.
    661   ArenaDeque<X86PcRelativePatchInfo> boot_image_intrinsic_patches_;
    662 
    663   // Patches for string root accesses in JIT compiled code.
    664   ArenaDeque<PatchInfo<Label>> jit_string_patches_;
    665   // Patches for class root accesses in JIT compiled code.
    666   ArenaDeque<PatchInfo<Label>> jit_class_patches_;
    667 
    668   // Offset to the start of the constant area in the assembled code.
    669   // Used for fixups to the constant area.
    670   int32_t constant_area_start_;
    671 
    672   // Fixups for jump tables that need to be patched after the constant table is generated.
    673   ArenaVector<JumpTableRIPFixup*> fixups_to_jump_tables_;
    674 
    675   // Maps a HX86ComputeBaseMethodAddress instruction id, to its offset in the
    676   // compiled code.
    677   ArenaSafeMap<uint32_t, int32_t> method_address_offset_;
    678 
    679   DISALLOW_COPY_AND_ASSIGN(CodeGeneratorX86);
    680 };
    681 
    682 }  // namespace x86
    683 }  // namespace art
    684 
    685 #endif  // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_H_
    686