Home | History | Annotate | Download | only in optimizing
      1 /*
      2  * Copyright (C) 2017 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #ifndef ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_
     18 #define ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_
     19 
     20 #include "code_generator_arm_vixl.h"
     21 #include "scheduler.h"
     22 
     23 namespace art {
     24 namespace arm {
     25 // TODO: Replace CodeGeneratorARMType with CodeGeneratorARMVIXL everywhere?
     26 typedef CodeGeneratorARMVIXL CodeGeneratorARMType;
     27 
     28 // AArch32 instruction latencies.
     29 // We currently assume that all ARM CPUs share the same instruction latency list.
     30 // The following latencies were tuned based on performance experiments and
     31 // automatic tuning using differential evolution approach on various benchmarks.
     32 static constexpr uint32_t kArmIntegerOpLatency = 2;
     33 static constexpr uint32_t kArmFloatingPointOpLatency = 11;
     34 static constexpr uint32_t kArmDataProcWithShifterOpLatency = 4;
     35 static constexpr uint32_t kArmMulIntegerLatency = 6;
     36 static constexpr uint32_t kArmMulFloatingPointLatency = 11;
     37 static constexpr uint32_t kArmDivIntegerLatency = 10;
     38 static constexpr uint32_t kArmDivFloatLatency = 20;
     39 static constexpr uint32_t kArmDivDoubleLatency = 25;
     40 static constexpr uint32_t kArmTypeConversionFloatingPointIntegerLatency = 11;
     41 static constexpr uint32_t kArmMemoryLoadLatency = 9;
     42 static constexpr uint32_t kArmMemoryStoreLatency = 9;
     43 static constexpr uint32_t kArmMemoryBarrierLatency = 6;
     44 static constexpr uint32_t kArmBranchLatency = 4;
     45 static constexpr uint32_t kArmCallLatency = 5;
     46 static constexpr uint32_t kArmCallInternalLatency = 29;
     47 static constexpr uint32_t kArmLoadStringInternalLatency = 10;
     48 static constexpr uint32_t kArmNopLatency = 2;
     49 static constexpr uint32_t kArmLoadWithBakerReadBarrierLatency = 18;
     50 static constexpr uint32_t kArmRuntimeTypeCheckLatency = 46;
     51 
     52 class SchedulingLatencyVisitorARM : public SchedulingLatencyVisitor {
     53  public:
     54   explicit SchedulingLatencyVisitorARM(CodeGenerator* codegen)
     55       : codegen_(down_cast<CodeGeneratorARMType*>(codegen)) {}
     56 
     57   // Default visitor for instructions not handled specifically below.
     58   void VisitInstruction(HInstruction* ATTRIBUTE_UNUSED) {
     59     last_visited_latency_ = kArmIntegerOpLatency;
     60   }
     61 
     62 // We add a second unused parameter to be able to use this macro like the others
     63 // defined in `nodes.h`.
     64 #define FOR_EACH_SCHEDULED_ARM_INSTRUCTION(M)    \
     65   M(ArrayGet         , unused)                   \
     66   M(ArrayLength      , unused)                   \
     67   M(ArraySet         , unused)                   \
     68   M(Add              , unused)                   \
     69   M(Sub              , unused)                   \
     70   M(And              , unused)                   \
     71   M(Or               , unused)                   \
     72   M(Ror              , unused)                   \
     73   M(Xor              , unused)                   \
     74   M(Shl              , unused)                   \
     75   M(Shr              , unused)                   \
     76   M(UShr             , unused)                   \
     77   M(Mul              , unused)                   \
     78   M(Div              , unused)                   \
     79   M(Condition        , unused)                   \
     80   M(Compare          , unused)                   \
     81   M(BoundsCheck      , unused)                   \
     82   M(InstanceFieldGet , unused)                   \
     83   M(InstanceFieldSet , unused)                   \
     84   M(InstanceOf       , unused)                   \
     85   M(Invoke           , unused)                   \
     86   M(LoadString       , unused)                   \
     87   M(NewArray         , unused)                   \
     88   M(NewInstance      , unused)                   \
     89   M(Rem              , unused)                   \
     90   M(StaticFieldGet   , unused)                   \
     91   M(StaticFieldSet   , unused)                   \
     92   M(SuspendCheck     , unused)                   \
     93   M(TypeConversion   , unused)
     94 
     95 #define FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(M) \
     96   M(BitwiseNegatedRight, unused)                 \
     97   M(MultiplyAccumulate, unused)                  \
     98   M(IntermediateAddress, unused)                 \
     99   M(IntermediateAddressIndex, unused)            \
    100   M(DataProcWithShifterOp, unused)
    101 
    102 #define DECLARE_VISIT_INSTRUCTION(type, unused)  \
    103   void Visit##type(H##type* instruction) OVERRIDE;
    104 
    105   FOR_EACH_SCHEDULED_ARM_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
    106   FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
    107   FOR_EACH_CONCRETE_INSTRUCTION_ARM(DECLARE_VISIT_INSTRUCTION)
    108 
    109 #undef DECLARE_VISIT_INSTRUCTION
    110 
    111  private:
    112   bool CanGenerateTest(HCondition* cond);
    113   void HandleGenerateConditionWithZero(IfCondition cond);
    114   void HandleGenerateLongTestConstant(HCondition* cond);
    115   void HandleGenerateLongTest(HCondition* cond);
    116   void HandleGenerateLongComparesAndJumps();
    117   void HandleGenerateTest(HCondition* cond);
    118   void HandleGenerateConditionGeneric(HCondition* cond);
    119   void HandleGenerateEqualLong(HCondition* cond);
    120   void HandleGenerateConditionLong(HCondition* cond);
    121   void HandleGenerateConditionIntegralOrNonPrimitive(HCondition* cond);
    122   void HandleCondition(HCondition* instr);
    123   void HandleBinaryOperationLantencies(HBinaryOperation* instr);
    124   void HandleBitwiseOperationLantencies(HBinaryOperation* instr);
    125   void HandleShiftLatencies(HBinaryOperation* instr);
    126   void HandleDivRemConstantIntegralLatencies(int32_t imm);
    127   void HandleFieldSetLatencies(HInstruction* instruction, const FieldInfo& field_info);
    128   void HandleFieldGetLatencies(HInstruction* instruction, const FieldInfo& field_info);
    129   void HandleGenerateDataProcInstruction(bool internal_latency = false);
    130   void HandleGenerateDataProc(HDataProcWithShifterOp* instruction);
    131   void HandleGenerateLongDataProc(HDataProcWithShifterOp* instruction);
    132 
    133   // The latency setting for each HInstruction depends on how CodeGenerator may generate code,
    134   // latency visitors may query CodeGenerator for such information for accurate latency settings.
    135   CodeGeneratorARMType* codegen_;
    136 };
    137 
    138 class HSchedulerARM : public HScheduler {
    139  public:
    140   HSchedulerARM(ScopedArenaAllocator* allocator,
    141                 SchedulingNodeSelector* selector,
    142                 SchedulingLatencyVisitorARM* arm_latency_visitor)
    143       : HScheduler(allocator, arm_latency_visitor, selector) {}
    144   ~HSchedulerARM() OVERRIDE {}
    145 
    146   bool IsSchedulable(const HInstruction* instruction) const OVERRIDE {
    147 #define CASE_INSTRUCTION_KIND(type, unused) case \
    148   HInstruction::InstructionKind::k##type:
    149     switch (instruction->GetKind()) {
    150       FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(CASE_INSTRUCTION_KIND)
    151         return true;
    152       FOR_EACH_CONCRETE_INSTRUCTION_ARM(CASE_INSTRUCTION_KIND)
    153         return true;
    154       default:
    155         return HScheduler::IsSchedulable(instruction);
    156     }
    157 #undef CASE_INSTRUCTION_KIND
    158   }
    159 
    160  private:
    161   DISALLOW_COPY_AND_ASSIGN(HSchedulerARM);
    162 };
    163 
    164 }  // namespace arm
    165 }  // namespace art
    166 
    167 #endif  // ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_
    168