Home | History | Annotate | Download | only in optimizing
      1 /*
      2  * Copyright (C) 2014 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_H_
     18 #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_H_
     19 
     20 #include "arch/x86/instruction_set_features_x86.h"
     21 #include "base/enums.h"
     22 #include "code_generator.h"
     23 #include "dex/dex_file_types.h"
     24 #include "driver/compiler_options.h"
     25 #include "nodes.h"
     26 #include "parallel_move_resolver.h"
     27 #include "utils/x86/assembler_x86.h"
     28 
     29 namespace art {
     30 namespace x86 {
     31 
     32 // Use a local definition to prevent copying mistakes.
     33 static constexpr size_t kX86WordSize = static_cast<size_t>(kX86PointerSize);
     34 
     35 class CodeGeneratorX86;
     36 
     37 static constexpr Register kParameterCoreRegisters[] = { ECX, EDX, EBX };
     38 static constexpr RegisterPair kParameterCorePairRegisters[] = { ECX_EDX, EDX_EBX };
     39 static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters);
     40 static constexpr XmmRegister kParameterFpuRegisters[] = { XMM0, XMM1, XMM2, XMM3 };
     41 static constexpr size_t kParameterFpuRegistersLength = arraysize(kParameterFpuRegisters);
     42 
     43 static constexpr Register kRuntimeParameterCoreRegisters[] = { EAX, ECX, EDX, EBX };
     44 static constexpr size_t kRuntimeParameterCoreRegistersLength =
     45     arraysize(kRuntimeParameterCoreRegisters);
     46 static constexpr XmmRegister kRuntimeParameterFpuRegisters[] = { XMM0, XMM1, XMM2, XMM3 };
     47 static constexpr size_t kRuntimeParameterFpuRegistersLength =
     48     arraysize(kRuntimeParameterFpuRegisters);
     49 
     50 class InvokeRuntimeCallingConvention : public CallingConvention<Register, XmmRegister> {
     51  public:
     52   InvokeRuntimeCallingConvention()
     53       : CallingConvention(kRuntimeParameterCoreRegisters,
     54                           kRuntimeParameterCoreRegistersLength,
     55                           kRuntimeParameterFpuRegisters,
     56                           kRuntimeParameterFpuRegistersLength,
     57                           kX86PointerSize) {}
     58 
     59  private:
     60   DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConvention);
     61 };
     62 
     63 class InvokeDexCallingConvention : public CallingConvention<Register, XmmRegister> {
     64  public:
     65   InvokeDexCallingConvention() : CallingConvention(
     66       kParameterCoreRegisters,
     67       kParameterCoreRegistersLength,
     68       kParameterFpuRegisters,
     69       kParameterFpuRegistersLength,
     70       kX86PointerSize) {}
     71 
     72   RegisterPair GetRegisterPairAt(size_t argument_index) {
     73     DCHECK_LT(argument_index + 1, GetNumberOfRegisters());
     74     return kParameterCorePairRegisters[argument_index];
     75   }
     76 
     77  private:
     78   DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention);
     79 };
     80 
     81 class InvokeDexCallingConventionVisitorX86 : public InvokeDexCallingConventionVisitor {
     82  public:
     83   InvokeDexCallingConventionVisitorX86() {}
     84   virtual ~InvokeDexCallingConventionVisitorX86() {}
     85 
     86   Location GetNextLocation(DataType::Type type) OVERRIDE;
     87   Location GetReturnLocation(DataType::Type type) const OVERRIDE;
     88   Location GetMethodLocation() const OVERRIDE;
     89 
     90  private:
     91   InvokeDexCallingConvention calling_convention;
     92 
     93   DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitorX86);
     94 };
     95 
     96 class FieldAccessCallingConventionX86 : public FieldAccessCallingConvention {
     97  public:
     98   FieldAccessCallingConventionX86() {}
     99 
    100   Location GetObjectLocation() const OVERRIDE {
    101     return Location::RegisterLocation(ECX);
    102   }
    103   Location GetFieldIndexLocation() const OVERRIDE {
    104     return Location::RegisterLocation(EAX);
    105   }
    106   Location GetReturnLocation(DataType::Type type) const OVERRIDE {
    107     return DataType::Is64BitType(type)
    108         ? Location::RegisterPairLocation(EAX, EDX)
    109         : Location::RegisterLocation(EAX);
    110   }
    111   Location GetSetValueLocation(DataType::Type type, bool is_instance) const OVERRIDE {
    112     return DataType::Is64BitType(type)
    113         ? (is_instance
    114             ? Location::RegisterPairLocation(EDX, EBX)
    115             : Location::RegisterPairLocation(ECX, EDX))
    116         : (is_instance
    117             ? Location::RegisterLocation(EDX)
    118             : Location::RegisterLocation(ECX));
    119   }
    120   Location GetFpuLocation(DataType::Type type ATTRIBUTE_UNUSED) const OVERRIDE {
    121     return Location::FpuRegisterLocation(XMM0);
    122   }
    123 
    124  private:
    125   DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConventionX86);
    126 };
    127 
    128 class ParallelMoveResolverX86 : public ParallelMoveResolverWithSwap {
    129  public:
    130   ParallelMoveResolverX86(ArenaAllocator* allocator, CodeGeneratorX86* codegen)
    131       : ParallelMoveResolverWithSwap(allocator), codegen_(codegen) {}
    132 
    133   void EmitMove(size_t index) OVERRIDE;
    134   void EmitSwap(size_t index) OVERRIDE;
    135   void SpillScratch(int reg) OVERRIDE;
    136   void RestoreScratch(int reg) OVERRIDE;
    137 
    138   X86Assembler* GetAssembler() const;
    139 
    140  private:
    141   void Exchange(Register reg, int mem);
    142   void Exchange32(XmmRegister reg, int mem);
    143   void Exchange128(XmmRegister reg, int mem);
    144   void ExchangeMemory(int mem1, int mem2, int number_of_words);
    145   void MoveMemoryToMemory(int dst, int src, int number_of_words);
    146 
    147   CodeGeneratorX86* const codegen_;
    148 
    149   DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolverX86);
    150 };
    151 
    152 class LocationsBuilderX86 : public HGraphVisitor {
    153  public:
    154   LocationsBuilderX86(HGraph* graph, CodeGeneratorX86* codegen)
    155       : HGraphVisitor(graph), codegen_(codegen) {}
    156 
    157 #define DECLARE_VISIT_INSTRUCTION(name, super)     \
    158   void Visit##name(H##name* instr) OVERRIDE;
    159 
    160   FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION)
    161   FOR_EACH_CONCRETE_INSTRUCTION_X86(DECLARE_VISIT_INSTRUCTION)
    162 
    163 #undef DECLARE_VISIT_INSTRUCTION
    164 
    165   void VisitInstruction(HInstruction* instruction) OVERRIDE {
    166     LOG(FATAL) << "Unreachable instruction " << instruction->DebugName()
    167                << " (id " << instruction->GetId() << ")";
    168   }
    169 
    170  private:
    171   void HandleBitwiseOperation(HBinaryOperation* instruction);
    172   void HandleInvoke(HInvoke* invoke);
    173   void HandleCondition(HCondition* condition);
    174   void HandleShift(HBinaryOperation* instruction);
    175   void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info);
    176   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
    177 
    178   CodeGeneratorX86* const codegen_;
    179   InvokeDexCallingConventionVisitorX86 parameter_visitor_;
    180 
    181   DISALLOW_COPY_AND_ASSIGN(LocationsBuilderX86);
    182 };
    183 
    184 class InstructionCodeGeneratorX86 : public InstructionCodeGenerator {
    185  public:
    186   InstructionCodeGeneratorX86(HGraph* graph, CodeGeneratorX86* codegen);
    187 
    188 #define DECLARE_VISIT_INSTRUCTION(name, super)     \
    189   void Visit##name(H##name* instr) OVERRIDE;
    190 
    191   FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION)
    192   FOR_EACH_CONCRETE_INSTRUCTION_X86(DECLARE_VISIT_INSTRUCTION)
    193 
    194 #undef DECLARE_VISIT_INSTRUCTION
    195 
    196   void VisitInstruction(HInstruction* instruction) OVERRIDE {
    197     LOG(FATAL) << "Unreachable instruction " << instruction->DebugName()
    198                << " (id " << instruction->GetId() << ")";
    199   }
    200 
    201   X86Assembler* GetAssembler() const { return assembler_; }
    202 
    203   // The compare/jump sequence will generate about (1.5 * num_entries) instructions. A jump
    204   // table version generates 7 instructions and num_entries literals. Compare/jump sequence will
    205   // generates less code/data with a small num_entries.
    206   static constexpr uint32_t kPackedSwitchJumpTableThreshold = 5;
    207 
    208  private:
    209   // Generate code for the given suspend check. If not null, `successor`
    210   // is the block to branch to if the suspend check is not needed, and after
    211   // the suspend call.
    212   void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor);
    213   void GenerateClassInitializationCheck(SlowPathCode* slow_path, Register class_reg);
    214   void HandleBitwiseOperation(HBinaryOperation* instruction);
    215   void GenerateDivRemIntegral(HBinaryOperation* instruction);
    216   void DivRemOneOrMinusOne(HBinaryOperation* instruction);
    217   void DivByPowerOfTwo(HDiv* instruction);
    218   void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
    219   void GenerateRemFP(HRem* rem);
    220   void HandleCondition(HCondition* condition);
    221   void HandleShift(HBinaryOperation* instruction);
    222   void GenerateShlLong(const Location& loc, Register shifter);
    223   void GenerateShrLong(const Location& loc, Register shifter);
    224   void GenerateUShrLong(const Location& loc, Register shifter);
    225   void GenerateShlLong(const Location& loc, int shift);
    226   void GenerateShrLong(const Location& loc, int shift);
    227   void GenerateUShrLong(const Location& loc, int shift);
    228 
    229   void HandleFieldSet(HInstruction* instruction,
    230                       const FieldInfo& field_info,
    231                       bool value_can_be_null);
    232   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
    233 
    234   // Generate a heap reference load using one register `out`:
    235   //
    236   //   out <- *(out + offset)
    237   //
    238   // while honoring heap poisoning and/or read barriers (if any).
    239   //
    240   // Location `maybe_temp` is used when generating a read barrier and
    241   // shall be a register in that case; it may be an invalid location
    242   // otherwise.
    243   void GenerateReferenceLoadOneRegister(HInstruction* instruction,
    244                                         Location out,
    245                                         uint32_t offset,
    246                                         Location maybe_temp,
    247                                         ReadBarrierOption read_barrier_option);
    248   // Generate a heap reference load using two different registers
    249   // `out` and `obj`:
    250   //
    251   //   out <- *(obj + offset)
    252   //
    253   // while honoring heap poisoning and/or read barriers (if any).
    254   //
    255   // Location `maybe_temp` is used when generating a Baker's (fast
    256   // path) read barrier and shall be a register in that case; it may
    257   // be an invalid location otherwise.
    258   void GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
    259                                          Location out,
    260                                          Location obj,
    261                                          uint32_t offset,
    262                                          ReadBarrierOption read_barrier_option);
    263   // Generate a GC root reference load:
    264   //
    265   //   root <- *address
    266   //
    267   // while honoring read barriers based on read_barrier_option.
    268   void GenerateGcRootFieldLoad(HInstruction* instruction,
    269                                Location root,
    270                                const Address& address,
    271                                Label* fixup_label,
    272                                ReadBarrierOption read_barrier_option);
    273 
    274   // Push value to FPU stack. `is_fp` specifies whether the value is floating point or not.
    275   // `is_wide` specifies whether it is long/double or not.
    276   void PushOntoFPStack(Location source, uint32_t temp_offset,
    277                        uint32_t stack_adjustment, bool is_fp, bool is_wide);
    278 
    279   template<class LabelType>
    280   void GenerateTestAndBranch(HInstruction* instruction,
    281                              size_t condition_input_index,
    282                              LabelType* true_target,
    283                              LabelType* false_target);
    284   template<class LabelType>
    285   void GenerateCompareTestAndBranch(HCondition* condition,
    286                                     LabelType* true_target,
    287                                     LabelType* false_target);
    288   template<class LabelType>
    289   void GenerateFPJumps(HCondition* cond, LabelType* true_label, LabelType* false_label);
    290   template<class LabelType>
    291   void GenerateLongComparesAndJumps(HCondition* cond,
    292                                     LabelType* true_label,
    293                                     LabelType* false_label);
    294 
    295   void HandleGoto(HInstruction* got, HBasicBlock* successor);
    296   void GenPackedSwitchWithCompares(Register value_reg,
    297                                    int32_t lower_bound,
    298                                    uint32_t num_entries,
    299                                    HBasicBlock* switch_block,
    300                                    HBasicBlock* default_block);
    301 
    302   void GenerateFPCompare(Location lhs, Location rhs, HInstruction* insn, bool is_double);
    303 
    304   X86Assembler* const assembler_;
    305   CodeGeneratorX86* const codegen_;
    306 
    307   DISALLOW_COPY_AND_ASSIGN(InstructionCodeGeneratorX86);
    308 };
    309 
    310 class JumpTableRIPFixup;
    311 
    312 class CodeGeneratorX86 : public CodeGenerator {
    313  public:
    314   CodeGeneratorX86(HGraph* graph,
    315                    const X86InstructionSetFeatures& isa_features,
    316                    const CompilerOptions& compiler_options,
    317                    OptimizingCompilerStats* stats = nullptr);
    318   virtual ~CodeGeneratorX86() {}
    319 
    320   void GenerateFrameEntry() OVERRIDE;
    321   void GenerateFrameExit() OVERRIDE;
    322   void Bind(HBasicBlock* block) OVERRIDE;
    323   void MoveConstant(Location destination, int32_t value) OVERRIDE;
    324   void MoveLocation(Location dst, Location src, DataType::Type dst_type) OVERRIDE;
    325   void AddLocationAsTemp(Location location, LocationSummary* locations) OVERRIDE;
    326 
    327   size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
    328   size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
    329   size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
    330   size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
    331 
    332   // Generate code to invoke a runtime entry point.
    333   void InvokeRuntime(QuickEntrypointEnum entrypoint,
    334                      HInstruction* instruction,
    335                      uint32_t dex_pc,
    336                      SlowPathCode* slow_path = nullptr) OVERRIDE;
    337 
    338   // Generate code to invoke a runtime entry point, but do not record
    339   // PC-related information in a stack map.
    340   void InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
    341                                            HInstruction* instruction,
    342                                            SlowPathCode* slow_path);
    343 
    344   void GenerateInvokeRuntime(int32_t entry_point_offset);
    345 
    346   size_t GetWordSize() const OVERRIDE {
    347     return kX86WordSize;
    348   }
    349 
    350   size_t GetFloatingPointSpillSlotSize() const OVERRIDE {
    351     return GetGraph()->HasSIMD()
    352         ? 4 * kX86WordSize   // 16 bytes == 4 words for each spill
    353         : 2 * kX86WordSize;  //  8 bytes == 2 words for each spill
    354   }
    355 
    356   HGraphVisitor* GetLocationBuilder() OVERRIDE {
    357     return &location_builder_;
    358   }
    359 
    360   HGraphVisitor* GetInstructionVisitor() OVERRIDE {
    361     return &instruction_visitor_;
    362   }
    363 
    364   X86Assembler* GetAssembler() OVERRIDE {
    365     return &assembler_;
    366   }
    367 
    368   const X86Assembler& GetAssembler() const OVERRIDE {
    369     return assembler_;
    370   }
    371 
    372   uintptr_t GetAddressOf(HBasicBlock* block) OVERRIDE {
    373     return GetLabelOf(block)->Position();
    374   }
    375 
    376   void SetupBlockedRegisters() const OVERRIDE;
    377 
    378   void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE;
    379   void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE;
    380 
    381   ParallelMoveResolverX86* GetMoveResolver() OVERRIDE {
    382     return &move_resolver_;
    383   }
    384 
    385   InstructionSet GetInstructionSet() const OVERRIDE {
    386     return InstructionSet::kX86;
    387   }
    388 
    389   // Helper method to move a 32bits value between two locations.
    390   void Move32(Location destination, Location source);
    391   // Helper method to move a 64bits value between two locations.
    392   void Move64(Location destination, Location source);
    393 
    394   // Check if the desired_string_load_kind is supported. If it is, return it,
    395   // otherwise return a fall-back kind that should be used instead.
    396   HLoadString::LoadKind GetSupportedLoadStringKind(
    397       HLoadString::LoadKind desired_string_load_kind) OVERRIDE;
    398 
    399   // Check if the desired_class_load_kind is supported. If it is, return it,
    400   // otherwise return a fall-back kind that should be used instead.
    401   HLoadClass::LoadKind GetSupportedLoadClassKind(
    402       HLoadClass::LoadKind desired_class_load_kind) OVERRIDE;
    403 
    404   // Check if the desired_dispatch_info is supported. If it is, return it,
    405   // otherwise return a fall-back info that should be used instead.
    406   HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
    407       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
    408       HInvokeStaticOrDirect* invoke) OVERRIDE;
    409 
    410   // Generate a call to a static or direct method.
    411   void GenerateStaticOrDirectCall(
    412       HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE;
    413   // Generate a call to a virtual method.
    414   void GenerateVirtualCall(
    415       HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE;
    416 
    417   void RecordBootImageMethodPatch(HInvokeStaticOrDirect* invoke);
    418   void RecordMethodBssEntryPatch(HInvokeStaticOrDirect* invoke);
    419   void RecordBootImageTypePatch(HLoadClass* load_class);
    420   Label* NewTypeBssEntryPatch(HLoadClass* load_class);
    421   void RecordBootImageStringPatch(HLoadString* load_string);
    422   Label* NewStringBssEntryPatch(HLoadString* load_string);
    423   Label* NewJitRootStringPatch(const DexFile& dex_file,
    424                                dex::StringIndex string_index,
    425                                Handle<mirror::String> handle);
    426   Label* NewJitRootClassPatch(const DexFile& dex_file,
    427                               dex::TypeIndex type_index,
    428                               Handle<mirror::Class> handle);
    429 
    430   void MoveFromReturnRegister(Location trg, DataType::Type type) OVERRIDE;
    431 
    432   // Emit linker patches.
    433   void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) OVERRIDE;
    434 
    435   void PatchJitRootUse(uint8_t* code,
    436                        const uint8_t* roots_data,
    437                        const PatchInfo<Label>& info,
    438                        uint64_t index_in_table) const;
    439   void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE;
    440 
    441   // Emit a write barrier.
    442   void MarkGCCard(Register temp,
    443                   Register card,
    444                   Register object,
    445                   Register value,
    446                   bool value_can_be_null);
    447 
    448   void GenerateMemoryBarrier(MemBarrierKind kind);
    449 
    450   Label* GetLabelOf(HBasicBlock* block) const {
    451     return CommonGetLabelOf<Label>(block_labels_, block);
    452   }
    453 
    454   void Initialize() OVERRIDE {
    455     block_labels_ = CommonInitializeLabels<Label>();
    456   }
    457 
    458   bool NeedsTwoRegisters(DataType::Type type) const OVERRIDE {
    459     return type == DataType::Type::kInt64;
    460   }
    461 
    462   bool ShouldSplitLongMoves() const OVERRIDE { return true; }
    463 
    464   Label* GetFrameEntryLabel() { return &frame_entry_label_; }
    465 
    466   const X86InstructionSetFeatures& GetInstructionSetFeatures() const {
    467     return isa_features_;
    468   }
    469 
    470   void AddMethodAddressOffset(HX86ComputeBaseMethodAddress* method_base, int32_t offset) {
    471     method_address_offset_.Put(method_base->GetId(), offset);
    472   }
    473 
    474   int32_t GetMethodAddressOffset(HX86ComputeBaseMethodAddress* method_base) const {
    475     return method_address_offset_.Get(method_base->GetId());
    476   }
    477 
    478   int32_t ConstantAreaStart() const {
    479     return constant_area_start_;
    480   }
    481 
    482   Address LiteralDoubleAddress(double v, HX86ComputeBaseMethodAddress* method_base, Register reg);
    483   Address LiteralFloatAddress(float v, HX86ComputeBaseMethodAddress* method_base, Register reg);
    484   Address LiteralInt32Address(int32_t v, HX86ComputeBaseMethodAddress* method_base, Register reg);
    485   Address LiteralInt64Address(int64_t v, HX86ComputeBaseMethodAddress* method_base, Register reg);
    486 
    487   // Load a 32-bit value into a register in the most efficient manner.
    488   void Load32BitValue(Register dest, int32_t value);
    489 
    490   // Compare a register with a 32-bit value in the most efficient manner.
    491   void Compare32BitValue(Register dest, int32_t value);
    492 
    493   // Compare int values. Supports only register locations for `lhs`.
    494   void GenerateIntCompare(Location lhs, Location rhs);
    495   void GenerateIntCompare(Register lhs, Location rhs);
    496 
    497   // Construct address for array access.
    498   static Address ArrayAddress(Register obj,
    499                               Location index,
    500                               ScaleFactor scale,
    501                               uint32_t data_offset);
    502 
    503   Address LiteralCaseTable(HX86PackedSwitch* switch_instr, Register reg, Register value);
    504 
    505   void Finalize(CodeAllocator* allocator) OVERRIDE;
    506 
    507   // Fast path implementation of ReadBarrier::Barrier for a heap
    508   // reference field load when Baker's read barriers are used.
    509   void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
    510                                              Location ref,
    511                                              Register obj,
    512                                              uint32_t offset,
    513                                              bool needs_null_check);
    514   // Fast path implementation of ReadBarrier::Barrier for a heap
    515   // reference array load when Baker's read barriers are used.
    516   void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
    517                                              Location ref,
    518                                              Register obj,
    519                                              uint32_t data_offset,
    520                                              Location index,
    521                                              bool needs_null_check);
    522   // Factored implementation, used by GenerateFieldLoadWithBakerReadBarrier,
    523   // GenerateArrayLoadWithBakerReadBarrier and some intrinsics.
    524   //
    525   // Load the object reference located at address `src`, held by
    526   // object `obj`, into `ref`, and mark it if needed.  The base of
    527   // address `src` must be `obj`.
    528   //
    529   // If `always_update_field` is true, the value of the reference is
    530   // atomically updated in the holder (`obj`).  This operation
    531   // requires a temporary register, which must be provided as a
    532   // non-null pointer (`temp`).
    533   void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
    534                                                  Location ref,
    535                                                  Register obj,
    536                                                  const Address& src,
    537                                                  bool needs_null_check,
    538                                                  bool always_update_field = false,
    539                                                  Register* temp = nullptr);
    540 
    541   // Generate a read barrier for a heap reference within `instruction`
    542   // using a slow path.
    543   //
    544   // A read barrier for an object reference read from the heap is
    545   // implemented as a call to the artReadBarrierSlow runtime entry
    546   // point, which is passed the values in locations `ref`, `obj`, and
    547   // `offset`:
    548   //
    549   //   mirror::Object* artReadBarrierSlow(mirror::Object* ref,
    550   //                                      mirror::Object* obj,
    551   //                                      uint32_t offset);
    552   //
    553   // The `out` location contains the value returned by
    554   // artReadBarrierSlow.
    555   //
    556   // When `index` is provided (i.e. for array accesses), the offset
    557   // value passed to artReadBarrierSlow is adjusted to take `index`
    558   // into account.
    559   void GenerateReadBarrierSlow(HInstruction* instruction,
    560                                Location out,
    561                                Location ref,
    562                                Location obj,
    563                                uint32_t offset,
    564                                Location index = Location::NoLocation());
    565 
    566   // If read barriers are enabled, generate a read barrier for a heap
    567   // reference using a slow path. If heap poisoning is enabled, also
    568   // unpoison the reference in `out`.
    569   void MaybeGenerateReadBarrierSlow(HInstruction* instruction,
    570                                     Location out,
    571                                     Location ref,
    572                                     Location obj,
    573                                     uint32_t offset,
    574                                     Location index = Location::NoLocation());
    575 
    576   // Generate a read barrier for a GC root within `instruction` using
    577   // a slow path.
    578   //
    579   // A read barrier for an object reference GC root is implemented as
    580   // a call to the artReadBarrierForRootSlow runtime entry point,
    581   // which is passed the value in location `root`:
    582   //
    583   //   mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root);
    584   //
    585   // The `out` location contains the value returned by
    586   // artReadBarrierForRootSlow.
    587   void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root);
    588 
    589   // Ensure that prior stores complete to memory before subsequent loads.
    590   // The locked add implementation will avoid serializing device memory, but will
    591   // touch (but not change) the top of the stack.
    592   // The 'non_temporal' parameter should be used to ensure ordering of non-temporal stores.
    593   void MemoryFence(bool non_temporal = false) {
    594     if (!non_temporal) {
    595       assembler_.lock()->addl(Address(ESP, 0), Immediate(0));
    596     } else {
    597       assembler_.mfence();
    598     }
    599   }
    600 
    601   void GenerateNop() OVERRIDE;
    602   void GenerateImplicitNullCheck(HNullCheck* instruction) OVERRIDE;
    603   void GenerateExplicitNullCheck(HNullCheck* instruction) OVERRIDE;
    604 
    605   // When we don't know the proper offset for the value, we use kDummy32BitOffset.
    606   // The correct value will be inserted when processing Assembler fixups.
    607   static constexpr int32_t kDummy32BitOffset = 256;
    608 
    609  private:
    610   struct X86PcRelativePatchInfo : PatchInfo<Label> {
    611     X86PcRelativePatchInfo(HX86ComputeBaseMethodAddress* address,
    612                            const DexFile* target_dex_file,
    613                            uint32_t target_index)
    614         : PatchInfo(target_dex_file, target_index),
    615           method_address(address) {}
    616     HX86ComputeBaseMethodAddress* method_address;
    617   };
    618 
    619   template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
    620   void EmitPcRelativeLinkerPatches(const ArenaDeque<X86PcRelativePatchInfo>& infos,
    621                                    ArenaVector<linker::LinkerPatch>* linker_patches);
    622 
    623   Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp);
    624 
    625   // Labels for each block that will be compiled.
    626   Label* block_labels_;  // Indexed by block id.
    627   Label frame_entry_label_;
    628   LocationsBuilderX86 location_builder_;
    629   InstructionCodeGeneratorX86 instruction_visitor_;
    630   ParallelMoveResolverX86 move_resolver_;
    631   X86Assembler assembler_;
    632   const X86InstructionSetFeatures& isa_features_;
    633 
    634   // PC-relative method patch info for kBootImageLinkTimePcRelative.
    635   ArenaDeque<X86PcRelativePatchInfo> boot_image_method_patches_;
    636   // PC-relative method patch info for kBssEntry.
    637   ArenaDeque<X86PcRelativePatchInfo> method_bss_entry_patches_;
    638   // PC-relative type patch info for kBootImageLinkTimePcRelative.
    639   ArenaDeque<X86PcRelativePatchInfo> boot_image_type_patches_;
    640   // Type patch locations for kBssEntry.
    641   ArenaDeque<X86PcRelativePatchInfo> type_bss_entry_patches_;
    642   // String patch locations; type depends on configuration (intern table or boot image PIC).
    643   ArenaDeque<X86PcRelativePatchInfo> boot_image_string_patches_;
    644   // String patch locations for kBssEntry.
    645   ArenaDeque<X86PcRelativePatchInfo> string_bss_entry_patches_;
    646 
    647   // Patches for string root accesses in JIT compiled code.
    648   ArenaDeque<PatchInfo<Label>> jit_string_patches_;
    649   // Patches for class root accesses in JIT compiled code.
    650   ArenaDeque<PatchInfo<Label>> jit_class_patches_;
    651 
    652   // Offset to the start of the constant area in the assembled code.
    653   // Used for fixups to the constant area.
    654   int32_t constant_area_start_;
    655 
    656   // Fixups for jump tables that need to be patched after the constant table is generated.
    657   ArenaVector<JumpTableRIPFixup*> fixups_to_jump_tables_;
    658 
    659   // Maps a HX86ComputeBaseMethodAddress instruction id, to its offset in the
    660   // compiled code.
    661   ArenaSafeMap<uint32_t, int32_t> method_address_offset_;
    662 
    663   DISALLOW_COPY_AND_ASSIGN(CodeGeneratorX86);
    664 };
    665 
    666 }  // namespace x86
    667 }  // namespace art
    668 
    669 #endif  // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_H_
    670