Home | History | Annotate | Download | only in optimizing
      1 /*
      2  * Copyright (C) 2017 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #ifndef ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_
     18 #define ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_
     19 
     20 #include "code_generator_arm_vixl.h"
     21 #include "scheduler.h"
     22 
     23 namespace art {
     24 namespace arm {
     25 // TODO: Replace CodeGeneratorARMType with CodeGeneratorARMVIXL everywhere?
     26 typedef CodeGeneratorARMVIXL CodeGeneratorARMType;
     27 
     28 // AArch32 instruction latencies.
     29 // We currently assume that all ARM CPUs share the same instruction latency list.
     30 // The following latencies were tuned based on performance experiments and
     31 // automatic tuning using differential evolution approach on various benchmarks.
     32 static constexpr uint32_t kArmIntegerOpLatency = 2;
     33 static constexpr uint32_t kArmFloatingPointOpLatency = 11;
     34 static constexpr uint32_t kArmDataProcWithShifterOpLatency = 4;
     35 static constexpr uint32_t kArmMulIntegerLatency = 6;
     36 static constexpr uint32_t kArmMulFloatingPointLatency = 11;
     37 static constexpr uint32_t kArmDivIntegerLatency = 10;
     38 static constexpr uint32_t kArmDivFloatLatency = 20;
     39 static constexpr uint32_t kArmDivDoubleLatency = 25;
     40 static constexpr uint32_t kArmTypeConversionFloatingPointIntegerLatency = 11;
     41 static constexpr uint32_t kArmMemoryLoadLatency = 9;
     42 static constexpr uint32_t kArmMemoryStoreLatency = 9;
     43 static constexpr uint32_t kArmMemoryBarrierLatency = 6;
     44 static constexpr uint32_t kArmBranchLatency = 4;
     45 static constexpr uint32_t kArmCallLatency = 5;
     46 static constexpr uint32_t kArmCallInternalLatency = 29;
     47 static constexpr uint32_t kArmLoadStringInternalLatency = 10;
     48 static constexpr uint32_t kArmNopLatency = 2;
     49 static constexpr uint32_t kArmLoadWithBakerReadBarrierLatency = 18;
     50 static constexpr uint32_t kArmRuntimeTypeCheckLatency = 46;
     51 
     52 class SchedulingLatencyVisitorARM : public SchedulingLatencyVisitor {
     53  public:
     54   explicit SchedulingLatencyVisitorARM(CodeGenerator* codegen)
     55       : codegen_(down_cast<CodeGeneratorARMType*>(codegen)) {}
     56 
     57   // Default visitor for instructions not handled specifically below.
     58   void VisitInstruction(HInstruction* ATTRIBUTE_UNUSED) {
     59     last_visited_latency_ = kArmIntegerOpLatency;
     60   }
     61 
     62 // We add a second unused parameter to be able to use this macro like the others
     63 // defined in `nodes.h`.
     64 #define FOR_EACH_SCHEDULED_ARM_INSTRUCTION(M)    \
     65   M(ArrayGet         , unused)                   \
     66   M(ArrayLength      , unused)                   \
     67   M(ArraySet         , unused)                   \
     68   M(Add              , unused)                   \
     69   M(Sub              , unused)                   \
     70   M(And              , unused)                   \
     71   M(Or               , unused)                   \
     72   M(Ror              , unused)                   \
     73   M(Xor              , unused)                   \
     74   M(Shl              , unused)                   \
     75   M(Shr              , unused)                   \
     76   M(UShr             , unused)                   \
     77   M(Mul              , unused)                   \
     78   M(Div              , unused)                   \
     79   M(Condition        , unused)                   \
     80   M(Compare          , unused)                   \
     81   M(BoundsCheck      , unused)                   \
     82   M(InstanceFieldGet , unused)                   \
     83   M(InstanceFieldSet , unused)                   \
     84   M(InstanceOf       , unused)                   \
     85   M(Invoke           , unused)                   \
     86   M(LoadString       , unused)                   \
     87   M(NewArray         , unused)                   \
     88   M(NewInstance      , unused)                   \
     89   M(Rem              , unused)                   \
     90   M(StaticFieldGet   , unused)                   \
     91   M(StaticFieldSet   , unused)                   \
     92   M(SuspendCheck     , unused)                   \
     93   M(TypeConversion   , unused)
     94 
     95 #define FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(M) \
     96   M(BitwiseNegatedRight, unused)                 \
     97   M(MultiplyAccumulate, unused)                  \
     98   M(IntermediateAddress, unused)                 \
     99   M(IntermediateAddressIndex, unused)            \
    100   M(DataProcWithShifterOp, unused)
    101 
    102 #define DECLARE_VISIT_INSTRUCTION(type, unused)  \
    103   void Visit##type(H##type* instruction) OVERRIDE;
    104 
    105   FOR_EACH_SCHEDULED_ARM_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
    106   FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
    107   FOR_EACH_CONCRETE_INSTRUCTION_ARM(DECLARE_VISIT_INSTRUCTION)
    108 
    109 #undef DECLARE_VISIT_INSTRUCTION
    110 
    111  private:
    112   void HandleBinaryOperationLantencies(HBinaryOperation* instr);
    113   void HandleBitwiseOperationLantencies(HBinaryOperation* instr);
    114   void HandleShiftLatencies(HBinaryOperation* instr);
    115   void HandleDivRemConstantIntegralLatencies(int32_t imm);
    116   void HandleFieldSetLatencies(HInstruction* instruction, const FieldInfo& field_info);
    117   void HandleFieldGetLatencies(HInstruction* instruction, const FieldInfo& field_info);
    118   void HandleGenerateDataProcInstruction(bool internal_latency = false);
    119   void HandleGenerateDataProc(HDataProcWithShifterOp* instruction);
    120   void HandleGenerateLongDataProc(HDataProcWithShifterOp* instruction);
    121 
    122   // The latency setting for each HInstruction depends on how CodeGenerator may generate code,
    123   // latency visitors may query CodeGenerator for such information for accurate latency settings.
    124   CodeGeneratorARMType* codegen_;
    125 };
    126 
    127 class HSchedulerARM : public HScheduler {
    128  public:
    129   HSchedulerARM(ArenaAllocator* arena,
    130                 SchedulingNodeSelector* selector,
    131                 SchedulingLatencyVisitorARM* arm_latency_visitor)
    132       : HScheduler(arena, arm_latency_visitor, selector) {}
    133   ~HSchedulerARM() OVERRIDE {}
    134 
    135   bool IsSchedulable(const HInstruction* instruction) const OVERRIDE {
    136 #define CASE_INSTRUCTION_KIND(type, unused) case \
    137   HInstruction::InstructionKind::k##type:
    138     switch (instruction->GetKind()) {
    139       FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(CASE_INSTRUCTION_KIND)
    140         return true;
    141       FOR_EACH_CONCRETE_INSTRUCTION_ARM(CASE_INSTRUCTION_KIND)
    142         return true;
    143       default:
    144         return HScheduler::IsSchedulable(instruction);
    145     }
    146 #undef CASE_INSTRUCTION_KIND
    147   }
    148 
    149  private:
    150   DISALLOW_COPY_AND_ASSIGN(HSchedulerARM);
    151 };
    152 
    153 }  // namespace arm
    154 }  // namespace art
    155 
    156 #endif  // ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_
    157