1 /* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_ 18 #define ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_ 19 20 #include "code_generator_arm_vixl.h" 21 #include "scheduler.h" 22 23 namespace art { 24 namespace arm { 25 // TODO: Replace CodeGeneratorARMType with CodeGeneratorARMVIXL everywhere? 26 typedef CodeGeneratorARMVIXL CodeGeneratorARMType; 27 28 // AArch32 instruction latencies. 29 // We currently assume that all ARM CPUs share the same instruction latency list. 30 // The following latencies were tuned based on performance experiments and 31 // automatic tuning using differential evolution approach on various benchmarks. 32 static constexpr uint32_t kArmIntegerOpLatency = 2; 33 static constexpr uint32_t kArmFloatingPointOpLatency = 11; 34 static constexpr uint32_t kArmDataProcWithShifterOpLatency = 4; 35 static constexpr uint32_t kArmMulIntegerLatency = 6; 36 static constexpr uint32_t kArmMulFloatingPointLatency = 11; 37 static constexpr uint32_t kArmDivIntegerLatency = 10; 38 static constexpr uint32_t kArmDivFloatLatency = 20; 39 static constexpr uint32_t kArmDivDoubleLatency = 25; 40 static constexpr uint32_t kArmTypeConversionFloatingPointIntegerLatency = 11; 41 static constexpr uint32_t kArmMemoryLoadLatency = 9; 42 static constexpr uint32_t kArmMemoryStoreLatency = 9; 43 static constexpr uint32_t kArmMemoryBarrierLatency = 6; 44 static constexpr uint32_t kArmBranchLatency = 4; 45 static constexpr uint32_t kArmCallLatency = 5; 46 static constexpr uint32_t kArmCallInternalLatency = 29; 47 static constexpr uint32_t kArmLoadStringInternalLatency = 10; 48 static constexpr uint32_t kArmNopLatency = 2; 49 static constexpr uint32_t kArmLoadWithBakerReadBarrierLatency = 18; 50 static constexpr uint32_t kArmRuntimeTypeCheckLatency = 46; 51 52 class SchedulingLatencyVisitorARM : public SchedulingLatencyVisitor { 53 public: 54 explicit SchedulingLatencyVisitorARM(CodeGenerator* codegen) 55 : codegen_(down_cast<CodeGeneratorARMType*>(codegen)) {} 56 57 // Default visitor for instructions not handled specifically below. 58 void VisitInstruction(HInstruction* ATTRIBUTE_UNUSED) { 59 last_visited_latency_ = kArmIntegerOpLatency; 60 } 61 62 // We add a second unused parameter to be able to use this macro like the others 63 // defined in `nodes.h`. 64 #define FOR_EACH_SCHEDULED_ARM_INSTRUCTION(M) \ 65 M(ArrayGet , unused) \ 66 M(ArrayLength , unused) \ 67 M(ArraySet , unused) \ 68 M(Add , unused) \ 69 M(Sub , unused) \ 70 M(And , unused) \ 71 M(Or , unused) \ 72 M(Ror , unused) \ 73 M(Xor , unused) \ 74 M(Shl , unused) \ 75 M(Shr , unused) \ 76 M(UShr , unused) \ 77 M(Mul , unused) \ 78 M(Div , unused) \ 79 M(Condition , unused) \ 80 M(Compare , unused) \ 81 M(BoundsCheck , unused) \ 82 M(InstanceFieldGet , unused) \ 83 M(InstanceFieldSet , unused) \ 84 M(InstanceOf , unused) \ 85 M(Invoke , unused) \ 86 M(LoadString , unused) \ 87 M(NewArray , unused) \ 88 M(NewInstance , unused) \ 89 M(Rem , unused) \ 90 M(StaticFieldGet , unused) \ 91 M(StaticFieldSet , unused) \ 92 M(SuspendCheck , unused) \ 93 M(TypeConversion , unused) 94 95 #define FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(M) \ 96 M(BitwiseNegatedRight, unused) \ 97 M(MultiplyAccumulate, unused) \ 98 M(IntermediateAddress, unused) \ 99 M(IntermediateAddressIndex, unused) \ 100 M(DataProcWithShifterOp, unused) 101 102 #define DECLARE_VISIT_INSTRUCTION(type, unused) \ 103 void Visit##type(H##type* instruction) OVERRIDE; 104 105 FOR_EACH_SCHEDULED_ARM_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) 106 FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) 107 FOR_EACH_CONCRETE_INSTRUCTION_ARM(DECLARE_VISIT_INSTRUCTION) 108 109 #undef DECLARE_VISIT_INSTRUCTION 110 111 private: 112 void HandleBinaryOperationLantencies(HBinaryOperation* instr); 113 void HandleBitwiseOperationLantencies(HBinaryOperation* instr); 114 void HandleShiftLatencies(HBinaryOperation* instr); 115 void HandleDivRemConstantIntegralLatencies(int32_t imm); 116 void HandleFieldSetLatencies(HInstruction* instruction, const FieldInfo& field_info); 117 void HandleFieldGetLatencies(HInstruction* instruction, const FieldInfo& field_info); 118 void HandleGenerateDataProcInstruction(bool internal_latency = false); 119 void HandleGenerateDataProc(HDataProcWithShifterOp* instruction); 120 void HandleGenerateLongDataProc(HDataProcWithShifterOp* instruction); 121 122 // The latency setting for each HInstruction depends on how CodeGenerator may generate code, 123 // latency visitors may query CodeGenerator for such information for accurate latency settings. 124 CodeGeneratorARMType* codegen_; 125 }; 126 127 class HSchedulerARM : public HScheduler { 128 public: 129 HSchedulerARM(ArenaAllocator* arena, 130 SchedulingNodeSelector* selector, 131 SchedulingLatencyVisitorARM* arm_latency_visitor) 132 : HScheduler(arena, arm_latency_visitor, selector) {} 133 ~HSchedulerARM() OVERRIDE {} 134 135 bool IsSchedulable(const HInstruction* instruction) const OVERRIDE { 136 #define CASE_INSTRUCTION_KIND(type, unused) case \ 137 HInstruction::InstructionKind::k##type: 138 switch (instruction->GetKind()) { 139 FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(CASE_INSTRUCTION_KIND) 140 return true; 141 FOR_EACH_CONCRETE_INSTRUCTION_ARM(CASE_INSTRUCTION_KIND) 142 return true; 143 default: 144 return HScheduler::IsSchedulable(instruction); 145 } 146 #undef CASE_INSTRUCTION_KIND 147 } 148 149 private: 150 DISALLOW_COPY_AND_ASSIGN(HSchedulerARM); 151 }; 152 153 } // namespace arm 154 } // namespace art 155 156 #endif // ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_ 157