Home | History | Annotate | Download | only in optimizing
      1 /*
      2  * Copyright (C) 2014 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #ifndef ART_COMPILER_OPTIMIZING_INLINER_H_
     18 #define ART_COMPILER_OPTIMIZING_INLINER_H_
     19 
     20 #include "dex/dex_file_types.h"
     21 #include "dex/invoke_type.h"
     22 #include "optimization.h"
     23 #include "profile/profile_compilation_info.h"
     24 
     25 namespace art {
     26 
     27 class CodeGenerator;
     28 class DexCompilationUnit;
     29 class HGraph;
     30 class HInvoke;
     31 class OptimizingCompilerStats;
     32 
     33 class HInliner : public HOptimization {
     34  public:
     35   HInliner(HGraph* outer_graph,
     36            HGraph* outermost_graph,
     37            CodeGenerator* codegen,
     38            const DexCompilationUnit& outer_compilation_unit,
     39            const DexCompilationUnit& caller_compilation_unit,
     40            VariableSizedHandleScope* handles,
     41            OptimizingCompilerStats* stats,
     42            size_t total_number_of_dex_registers,
     43            size_t total_number_of_instructions,
     44            HInliner* parent,
     45            size_t depth = 0,
     46            const char* name = kInlinerPassName)
     47       : HOptimization(outer_graph, name, stats),
     48         outermost_graph_(outermost_graph),
     49         outer_compilation_unit_(outer_compilation_unit),
     50         caller_compilation_unit_(caller_compilation_unit),
     51         codegen_(codegen),
     52         total_number_of_dex_registers_(total_number_of_dex_registers),
     53         total_number_of_instructions_(total_number_of_instructions),
     54         parent_(parent),
     55         depth_(depth),
     56         inlining_budget_(0),
     57         handles_(handles),
     58         inline_stats_(nullptr) {}
     59 
     60   bool Run() override;
     61 
     62   static constexpr const char* kInlinerPassName = "inliner";
     63 
     64  private:
     65   enum InlineCacheType {
     66     kInlineCacheNoData = 0,
     67     kInlineCacheUninitialized = 1,
     68     kInlineCacheMonomorphic = 2,
     69     kInlineCachePolymorphic = 3,
     70     kInlineCacheMegamorphic = 4,
     71     kInlineCacheMissingTypes = 5
     72   };
     73 
     74   bool TryInline(HInvoke* invoke_instruction);
     75 
     76   // Try to inline `resolved_method` in place of `invoke_instruction`. `do_rtp` is whether
     77   // reference type propagation can run after the inlining. If the inlining is successful, this
     78   // method will replace and remove the `invoke_instruction`. If `cha_devirtualize` is true,
     79   // a CHA guard needs to be added for the inlining.
     80   bool TryInlineAndReplace(HInvoke* invoke_instruction,
     81                            ArtMethod* resolved_method,
     82                            ReferenceTypeInfo receiver_type,
     83                            bool do_rtp,
     84                            bool cha_devirtualize)
     85     REQUIRES_SHARED(Locks::mutator_lock_);
     86 
     87   bool TryBuildAndInline(HInvoke* invoke_instruction,
     88                          ArtMethod* resolved_method,
     89                          ReferenceTypeInfo receiver_type,
     90                          HInstruction** return_replacement)
     91     REQUIRES_SHARED(Locks::mutator_lock_);
     92 
     93   bool TryBuildAndInlineHelper(HInvoke* invoke_instruction,
     94                                ArtMethod* resolved_method,
     95                                ReferenceTypeInfo receiver_type,
     96                                bool same_dex_file,
     97                                HInstruction** return_replacement);
     98 
     99   // Run simple optimizations on `callee_graph`.
    100   void RunOptimizations(HGraph* callee_graph,
    101                         const dex::CodeItem* code_item,
    102                         const DexCompilationUnit& dex_compilation_unit)
    103     REQUIRES_SHARED(Locks::mutator_lock_);
    104 
    105   // Try to recognize known simple patterns and replace invoke call with appropriate instructions.
    106   bool TryPatternSubstitution(HInvoke* invoke_instruction,
    107                               ArtMethod* resolved_method,
    108                               HInstruction** return_replacement)
    109     REQUIRES_SHARED(Locks::mutator_lock_);
    110 
    111   // Create a new HInstanceFieldGet.
    112   HInstanceFieldGet* CreateInstanceFieldGet(uint32_t field_index,
    113                                             ArtMethod* referrer,
    114                                             HInstruction* obj);
    115   // Create a new HInstanceFieldSet.
    116   HInstanceFieldSet* CreateInstanceFieldSet(uint32_t field_index,
    117                                             ArtMethod* referrer,
    118                                             HInstruction* obj,
    119                                             HInstruction* value,
    120                                             bool* is_final = nullptr);
    121 
    122   // Try inlining the invoke instruction using inline caches.
    123   bool TryInlineFromInlineCache(
    124       const DexFile& caller_dex_file,
    125       HInvoke* invoke_instruction,
    126       ArtMethod* resolved_method)
    127     REQUIRES_SHARED(Locks::mutator_lock_);
    128 
    129   // Try getting the inline cache from JIT code cache.
    130   // Return true if the inline cache was successfully allocated and the
    131   // invoke info was found in the profile info.
    132   InlineCacheType GetInlineCacheJIT(
    133       HInvoke* invoke_instruction,
    134       StackHandleScope<1>* hs,
    135       /*out*/Handle<mirror::ObjectArray<mirror::Class>>* inline_cache)
    136     REQUIRES_SHARED(Locks::mutator_lock_);
    137 
    138   // Try getting the inline cache from AOT offline profile.
    139   // Return true if the inline cache was successfully allocated and the
    140   // invoke info was found in the profile info.
    141   InlineCacheType GetInlineCacheAOT(const DexFile& caller_dex_file,
    142       HInvoke* invoke_instruction,
    143       StackHandleScope<1>* hs,
    144       /*out*/Handle<mirror::ObjectArray<mirror::Class>>* inline_cache)
    145     REQUIRES_SHARED(Locks::mutator_lock_);
    146 
    147   // Extract the mirror classes from the offline profile and add them to the `inline_cache`.
    148   // Note that even if we have profile data for the invoke the inline_cache might contain
    149   // only null entries if the types cannot be resolved.
    150   InlineCacheType ExtractClassesFromOfflineProfile(
    151       const HInvoke* invoke_instruction,
    152       const ProfileCompilationInfo::OfflineProfileMethodInfo& offline_profile,
    153       /*out*/Handle<mirror::ObjectArray<mirror::Class>> inline_cache)
    154     REQUIRES_SHARED(Locks::mutator_lock_);
    155 
    156   // Compute the inline cache type.
    157   InlineCacheType GetInlineCacheType(
    158       const Handle<mirror::ObjectArray<mirror::Class>>& classes)
    159     REQUIRES_SHARED(Locks::mutator_lock_);
    160 
    161   // Try to inline the target of a monomorphic call. If successful, the code
    162   // in the graph will look like:
    163   // if (receiver.getClass() != ic.GetMonomorphicType()) deopt
    164   // ... // inlined code
    165   bool TryInlineMonomorphicCall(HInvoke* invoke_instruction,
    166                                 ArtMethod* resolved_method,
    167                                 Handle<mirror::ObjectArray<mirror::Class>> classes)
    168     REQUIRES_SHARED(Locks::mutator_lock_);
    169 
    170   // Try to inline targets of a polymorphic call.
    171   bool TryInlinePolymorphicCall(HInvoke* invoke_instruction,
    172                                 ArtMethod* resolved_method,
    173                                 Handle<mirror::ObjectArray<mirror::Class>> classes)
    174     REQUIRES_SHARED(Locks::mutator_lock_);
    175 
    176   bool TryInlinePolymorphicCallToSameTarget(HInvoke* invoke_instruction,
    177                                             ArtMethod* resolved_method,
    178                                             Handle<mirror::ObjectArray<mirror::Class>> classes)
    179     REQUIRES_SHARED(Locks::mutator_lock_);
    180 
    181   // Returns whether or not we should use only polymorphic inlining with no deoptimizations.
    182   bool UseOnlyPolymorphicInliningWithNoDeopt();
    183 
    184   // Try CHA-based devirtualization to change virtual method calls into
    185   // direct calls.
    186   // Returns the actual method that resolved_method can be devirtualized to.
    187   ArtMethod* TryCHADevirtualization(ArtMethod* resolved_method)
    188     REQUIRES_SHARED(Locks::mutator_lock_);
    189 
    190   // Add a CHA guard for a CHA-based devirtualized call. A CHA guard checks a
    191   // should_deoptimize flag and if it's true, does deoptimization.
    192   void AddCHAGuard(HInstruction* invoke_instruction,
    193                    uint32_t dex_pc,
    194                    HInstruction* cursor,
    195                    HBasicBlock* bb_cursor);
    196 
    197   HInstanceFieldGet* BuildGetReceiverClass(ClassLinker* class_linker,
    198                                            HInstruction* receiver,
    199                                            uint32_t dex_pc) const
    200     REQUIRES_SHARED(Locks::mutator_lock_);
    201 
    202   void FixUpReturnReferenceType(ArtMethod* resolved_method, HInstruction* return_replacement)
    203     REQUIRES_SHARED(Locks::mutator_lock_);
    204 
    205   // Creates an instance of ReferenceTypeInfo from `klass` if `klass` is
    206   // admissible (see ReferenceTypePropagation::IsAdmissible for details).
    207   // Otherwise returns inexact Object RTI.
    208   ReferenceTypeInfo GetClassRTI(ObjPtr<mirror::Class> klass) REQUIRES_SHARED(Locks::mutator_lock_);
    209 
    210   bool ArgumentTypesMoreSpecific(HInvoke* invoke_instruction, ArtMethod* resolved_method)
    211     REQUIRES_SHARED(Locks::mutator_lock_);
    212 
    213   bool ReturnTypeMoreSpecific(HInvoke* invoke_instruction, HInstruction* return_replacement)
    214     REQUIRES_SHARED(Locks::mutator_lock_);
    215 
    216   // Add a type guard on the given `receiver`. This will add to the graph:
    217   // i0 = HFieldGet(receiver, klass)
    218   // i1 = HLoadClass(class_index, is_referrer)
    219   // i2 = HNotEqual(i0, i1)
    220   //
    221   // And if `with_deoptimization` is true:
    222   // HDeoptimize(i2)
    223   //
    224   // The method returns the `HNotEqual`, that will be used for polymorphic inlining.
    225   HInstruction* AddTypeGuard(HInstruction* receiver,
    226                              HInstruction* cursor,
    227                              HBasicBlock* bb_cursor,
    228                              dex::TypeIndex class_index,
    229                              Handle<mirror::Class> klass,
    230                              HInstruction* invoke_instruction,
    231                              bool with_deoptimization)
    232     REQUIRES_SHARED(Locks::mutator_lock_);
    233 
    234   /*
    235    * Ad-hoc implementation for implementing a diamond pattern in the graph for
    236    * polymorphic inlining:
    237    * 1) `compare` becomes the input of the new `HIf`.
    238    * 2) Everything up until `invoke_instruction` is in the then branch (could
    239    *    contain multiple blocks).
    240    * 3) `invoke_instruction` is moved to the otherwise block.
    241    * 4) If `return_replacement` is not null, the merge block will have
    242    *    a phi whose inputs are `return_replacement` and `invoke_instruction`.
    243    *
    244    * Before:
    245    *             Block1
    246    *             compare
    247    *              ...
    248    *         invoke_instruction
    249    *
    250    * After:
    251    *            Block1
    252    *            compare
    253    *              if
    254    *          /        \
    255    *         /          \
    256    *   Then block    Otherwise block
    257    *      ...       invoke_instruction
    258    *       \              /
    259    *        \            /
    260    *          Merge block
    261    *  phi(return_replacement, invoke_instruction)
    262    */
    263   void CreateDiamondPatternForPolymorphicInline(HInstruction* compare,
    264                                                 HInstruction* return_replacement,
    265                                                 HInstruction* invoke_instruction);
    266 
    267   // Update the inlining budget based on `total_number_of_instructions_`.
    268   void UpdateInliningBudget();
    269 
    270   // Count the number of calls of `method` being inlined recursively.
    271   size_t CountRecursiveCallsOf(ArtMethod* method) const;
    272 
    273   // Pretty-print for spaces during logging.
    274   std::string DepthString(int line) const;
    275 
    276   HGraph* const outermost_graph_;
    277   const DexCompilationUnit& outer_compilation_unit_;
    278   const DexCompilationUnit& caller_compilation_unit_;
    279   CodeGenerator* const codegen_;
    280   const size_t total_number_of_dex_registers_;
    281   size_t total_number_of_instructions_;
    282 
    283   // The 'parent' inliner, that means the inlinigng optimization that requested
    284   // `graph_` to be inlined.
    285   const HInliner* const parent_;
    286   const size_t depth_;
    287 
    288   // The budget left for inlining, in number of instructions.
    289   size_t inlining_budget_;
    290   VariableSizedHandleScope* const handles_;
    291 
    292   // Used to record stats about optimizations on the inlined graph.
    293   // If the inlining is successful, these stats are merged to the caller graph's stats.
    294   OptimizingCompilerStats* inline_stats_;
    295 
    296   DISALLOW_COPY_AND_ASSIGN(HInliner);
    297 };
    298 
    299 }  // namespace art
    300 
    301 #endif  // ART_COMPILER_OPTIMIZING_INLINER_H_
    302