Home | History | Annotate | Download | only in optimizing
      1 /*
      2  * Copyright (C) 2016 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #ifndef ART_COMPILER_OPTIMIZING_LOOP_OPTIMIZATION_H_
     18 #define ART_COMPILER_OPTIMIZING_LOOP_OPTIMIZATION_H_
     19 
     20 #include "induction_var_range.h"
     21 #include "nodes.h"
     22 #include "optimization.h"
     23 
     24 namespace art {
     25 
     26 class CompilerDriver;
     27 
     28 /**
     29  * Loop optimizations. Builds a loop hierarchy and applies optimizations to
     30  * the detected nested loops, such as removal of dead induction and empty loops
     31  * and inner loop vectorization.
     32  */
     33 class HLoopOptimization : public HOptimization {
     34  public:
     35   HLoopOptimization(HGraph* graph,
     36                     CompilerDriver* compiler_driver,
     37                     HInductionVarAnalysis* induction_analysis);
     38 
     39   void Run() OVERRIDE;
     40 
     41   static constexpr const char* kLoopOptimizationPassName = "loop_optimization";
     42 
     43  private:
     44   /**
     45    * A single loop inside the loop hierarchy representation.
     46    */
     47   struct LoopNode : public ArenaObject<kArenaAllocLoopOptimization> {
     48     explicit LoopNode(HLoopInformation* lp_info)
     49         : loop_info(lp_info),
     50           outer(nullptr),
     51           inner(nullptr),
     52           previous(nullptr),
     53           next(nullptr) {}
     54     HLoopInformation* loop_info;
     55     LoopNode* outer;
     56     LoopNode* inner;
     57     LoopNode* previous;
     58     LoopNode* next;
     59   };
     60 
     61   /*
     62    * Vectorization restrictions (bit mask).
     63    */
     64   enum VectorRestrictions {
     65     kNone            = 0,    // no restrictions
     66     kNoMul           = 1,    // no multiplication
     67     kNoDiv           = 2,    // no division
     68     kNoShift         = 4,    // no shift
     69     kNoShr           = 8,    // no arithmetic shift right
     70     kNoHiBits        = 16,   // "wider" operations cannot bring in higher order bits
     71     kNoSignedHAdd    = 32,   // no signed halving add
     72     kNoUnroundedHAdd = 64,   // no unrounded halving add
     73     kNoAbs           = 128,  // no absolute value
     74   };
     75 
     76   /*
     77    * Vectorization mode during synthesis
     78    * (sequential peeling/cleanup loop or vector loop).
     79    */
     80   enum VectorMode {
     81     kSequential,
     82     kVector
     83   };
     84 
     85   /*
     86    * Representation of a unit-stride array reference.
     87    */
     88   struct ArrayReference {
     89     ArrayReference(HInstruction* b, HInstruction* o, Primitive::Type t, bool l)
     90         : base(b), offset(o), type(t), lhs(l) { }
     91     bool operator<(const ArrayReference& other) const {
     92       return
     93           (base < other.base) ||
     94           (base == other.base &&
     95            (offset < other.offset || (offset == other.offset &&
     96                                       (type < other.type ||
     97                                        (type == other.type && lhs < other.lhs)))));
     98     }
     99     HInstruction* base;    // base address
    100     HInstruction* offset;  // offset + i
    101     Primitive::Type type;  // component type
    102     bool lhs;              // def/use
    103   };
    104 
    105   // Loop setup and traversal.
    106   void LocalRun();
    107   void AddLoop(HLoopInformation* loop_info);
    108   void RemoveLoop(LoopNode* node);
    109   void TraverseLoopsInnerToOuter(LoopNode* node);
    110 
    111   // Optimization.
    112   void SimplifyInduction(LoopNode* node);
    113   void SimplifyBlocks(LoopNode* node);
    114   void OptimizeInnerLoop(LoopNode* node);
    115 
    116   // Vectorization analysis and synthesis.
    117   bool CanVectorize(LoopNode* node, HBasicBlock* block, int64_t trip_count);
    118   void Vectorize(LoopNode* node, HBasicBlock* block, HBasicBlock* exit, int64_t trip_count);
    119   void GenerateNewLoop(LoopNode* node,
    120                        HBasicBlock* block,
    121                        HBasicBlock* new_preheader,
    122                        HInstruction* lo,
    123                        HInstruction* hi,
    124                        HInstruction* step);
    125   bool VectorizeDef(LoopNode* node, HInstruction* instruction, bool generate_code);
    126   bool VectorizeUse(LoopNode* node,
    127                     HInstruction* instruction,
    128                     bool generate_code,
    129                     Primitive::Type type,
    130                     uint64_t restrictions);
    131   bool TrySetVectorType(Primitive::Type type, /*out*/ uint64_t* restrictions);
    132   bool TrySetVectorLength(uint32_t length);
    133   void GenerateVecInv(HInstruction* org, Primitive::Type type);
    134   void GenerateVecSub(HInstruction* org, HInstruction* off);
    135   void GenerateVecMem(HInstruction* org,
    136                       HInstruction* opa,
    137                       HInstruction* opb,
    138                       Primitive::Type type);
    139   void GenerateVecOp(HInstruction* org, HInstruction* opa, HInstruction* opb, Primitive::Type type);
    140 
    141   // Vectorization idioms.
    142   bool VectorizeHalvingAddIdiom(LoopNode* node,
    143                                 HInstruction* instruction,
    144                                 bool generate_code,
    145                                 Primitive::Type type,
    146                                 uint64_t restrictions);
    147 
    148   // Helpers.
    149   bool TrySetPhiInduction(HPhi* phi, bool restrict_uses);
    150   bool TrySetSimpleLoopHeader(HBasicBlock* block);
    151   bool IsEmptyBody(HBasicBlock* block);
    152   bool IsOnlyUsedAfterLoop(HLoopInformation* loop_info,
    153                            HInstruction* instruction,
    154                            bool collect_loop_uses,
    155                            /*out*/ int32_t* use_count);
    156   bool IsUsedOutsideLoop(HLoopInformation* loop_info,
    157                          HInstruction* instruction);
    158   bool TryReplaceWithLastValue(HLoopInformation* loop_info,
    159                                HInstruction* instruction,
    160                                HBasicBlock* block);
    161   bool TryAssignLastValue(HLoopInformation* loop_info,
    162                           HInstruction* instruction,
    163                           HBasicBlock* block,
    164                           bool collect_loop_uses);
    165   void RemoveDeadInstructions(const HInstructionList& list);
    166   bool CanRemoveCycle();  // Whether the current 'iset_' is removable.
    167 
    168   // Compiler driver (to query ISA features).
    169   const CompilerDriver* compiler_driver_;
    170 
    171   // Range information based on prior induction variable analysis.
    172   InductionVarRange induction_range_;
    173 
    174   // Phase-local heap memory allocator for the loop optimizer. Storage obtained
    175   // through this allocator is immediately released when the loop optimizer is done.
    176   ArenaAllocator* loop_allocator_;
    177 
    178   // Global heap memory allocator. Used to build HIR.
    179   ArenaAllocator* global_allocator_;
    180 
    181   // Entries into the loop hierarchy representation. The hierarchy resides
    182   // in phase-local heap memory.
    183   LoopNode* top_loop_;
    184   LoopNode* last_loop_;
    185 
    186   // Temporary bookkeeping of a set of instructions.
    187   // Contents reside in phase-local heap memory.
    188   ArenaSet<HInstruction*>* iset_;
    189 
    190   // Counter that tracks how many induction cycles have been simplified. Useful
    191   // to trigger incremental updates of induction variable analysis of outer loops
    192   // when the induction of inner loops has changed.
    193   uint32_t induction_simplication_count_;
    194 
    195   // Flag that tracks if any simplifications have occurred.
    196   bool simplified_;
    197 
    198   // Number of "lanes" for selected packed type.
    199   uint32_t vector_length_;
    200 
    201   // Set of array references in the vector loop.
    202   // Contents reside in phase-local heap memory.
    203   ArenaSet<ArrayReference>* vector_refs_;
    204 
    205   // Mapping used during vectorization synthesis for both the scalar peeling/cleanup
    206   // loop (simd_ is false) and the actual vector loop (simd_ is true). The data
    207   // structure maps original instructions into the new instructions.
    208   // Contents reside in phase-local heap memory.
    209   ArenaSafeMap<HInstruction*, HInstruction*>* vector_map_;
    210 
    211   // Temporary vectorization bookkeeping.
    212   HBasicBlock* vector_preheader_;  // preheader of the new loop
    213   HBasicBlock* vector_header_;  // header of the new loop
    214   HBasicBlock* vector_body_;  // body of the new loop
    215   HInstruction* vector_runtime_test_a_;
    216   HInstruction* vector_runtime_test_b_;  // defines a != b runtime test
    217   HPhi* vector_phi_;  // the Phi representing the normalized loop index
    218   VectorMode vector_mode_;  // selects synthesis mode
    219 
    220   friend class LoopOptimizationTest;
    221 
    222   DISALLOW_COPY_AND_ASSIGN(HLoopOptimization);
    223 };
    224 
    225 }  // namespace art
    226 
    227 #endif  // ART_COMPILER_OPTIMIZING_LOOP_OPTIMIZATION_H_
    228