Home | History | Annotate | Download | only in service
      1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_HLO_ORDERING_H_
     17 #define TENSORFLOW_COMPILER_XLA_SERVICE_HLO_ORDERING_H_
     18 
     19 #include <memory>
     20 #include <string>
     21 #include <utility>
     22 
     23 #include "absl/container/flat_hash_map.h"
     24 #include "tensorflow/compiler/xla/service/call_graph.h"
     25 #include "tensorflow/compiler/xla/service/hlo.pb.h"
     26 #include "tensorflow/compiler/xla/service/hlo_dataflow_analysis.h"
     27 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
     28 #include "tensorflow/compiler/xla/service/hlo_module.h"
     29 #include "tensorflow/compiler/xla/service/hlo_reachability.h"
     30 #include "tensorflow/compiler/xla/service/hlo_schedule.h"
     31 #include "tensorflow/compiler/xla/service/hlo_value.h"
     32 #include "tensorflow/compiler/xla/types.h"
     33 
     34 namespace xla {
     35 
     36 // Base class for describing a partial ordering of HLO instructions. Used to
     37 // determine live range overlap of HLO instruction output buffers.
     38 class HloOrdering {
     39  public:
     40   HloOrdering(const HloModule* module)
     41       : module_(module), call_graph_(CallGraph::Build(module)) {}
     42   virtual ~HloOrdering() = default;
     43 
     44   // Returns true if instruction 'a' executes before instruction 'b'. This is
     45   // not reflexive, that is, an instruction does not execute before itself.
     46   bool ExecutesBefore(const HloInstruction* a, const HloInstruction* b) const;
     47 
     48   // Returns whether the value 'a' is defined before the value 'b' under the
     49   // given ordering.
     50   bool IsDefinedBefore(const HloValue& a, const HloValue& b) const;
     51 
     52   // Returns whether the given use is before the given value definition under
     53   // the given ordering.
     54   bool UseIsBeforeValueDefinition(const HloUse& use, const HloValue& value,
     55                                   const HloDataflowAnalysis& dataflow) const;
     56   // Returns whether the given values interfere. Two values interfere if they
     57   // may both be simultaneously live.
     58   bool MayInterfere(const HloValue& a, const HloValue& b,
     59                     const HloDataflowAnalysis& dataflow) const;
     60 
     61   // Returns true if the live range of the given value 'a' is strictly before
     62   // the live range of value 'b' using the given HLO ordering.
     63   bool LiveRangeStrictlyBefore(const HloValue& a, const HloValue& b,
     64                                const HloDataflowAnalysis& dataflow) const;
     65 
     66   // Returns the sequential instruction order for the given computation, or
     67   // nullptr if the computation does not have a sequential ordering.
     68   virtual const HloInstructionSequence* SequentialOrder(
     69       const HloComputation& computation) const = 0;
     70 
     71   // Return the call graph of the module used to compute ordering.
     72   const CallGraph& call_graph() const { return *call_graph_; }
     73 
     74   virtual string ToString() const = 0;
     75 
     76  protected:
     77   // Returns true if instruction 'a' executes before instruction 'b'.
     78   // Precondition: 'a' and 'b' are in the same computation.
     79   //
     80   // Derived classes should implement this method for determining order of
     81   // instructions in the same computation. ExecutesBefore() analyzes the
     82   // callgraph and uses this method to determine ordering of instructions in
     83   // different computations.
     84   virtual bool ExecutesBeforeInSameComputation(
     85       const HloInstruction* a, const HloInstruction* b) const = 0;
     86 
     87   const HloModule* module_;
     88 
     89   std::unique_ptr<CallGraph> call_graph_;
     90 };
     91 
     92 // Base class for partial orderings implemented by a map of predecessors for
     93 // each instruction. Subclasses should fill in predecessors_.
     94 class PredecessorHloOrdering : public HloOrdering {
     95  public:
     96   ~PredecessorHloOrdering() override = default;
     97 
     98   // Returns nullptr indicating the computation does not have a sequential
     99   // ordering.
    100   const HloInstructionSequence* SequentialOrder(
    101       const HloComputation& computation) const override {
    102     return nullptr;
    103   }
    104 
    105   HloReachabilityMap& reachability_map(const HloComputation* computation) {
    106     return *predecessors_.at(computation);
    107   }
    108   const HloReachabilityMap& reachability_map(
    109       const HloComputation* computation) const {
    110     return *predecessors_.at(computation);
    111   }
    112 
    113  protected:
    114   explicit PredecessorHloOrdering(const HloModule* module);
    115   string ToStringHelper(const string& name) const;
    116 
    117   bool ExecutesBeforeInSameComputation(const HloInstruction* a,
    118                                        const HloInstruction* b) const override;
    119 
    120   // For each computation in the module, this is the set of the instruction's
    121   // predecessors. An instruction is an element of its own predecessor set.
    122   //
    123   // Subclasses should fill this in to define the desired ordering.
    124   absl::flat_hash_map<const HloComputation*,
    125                       std::unique_ptr<HloReachabilityMap>>
    126       predecessors_;
    127 };
    128 
    129 // An HLO ordering based on data dependencies in the HLO graph. In this partial
    130 // order, instruction A executes before instruction B only if there is a path
    131 // from A to B in the HLO graph. For example, given the following graph:
    132 /*
    133           param
    134          /     \
    135       negate   exp
    136           \    /
    137            add
    138 */
    139 // DependencyHloOrdering gives the following executes-before relations:
    140 //   param executes before negate, exp, and add
    141 //   negate executes before add
    142 //   exp executes before add
    143 //   add executes before nothing
    144 // negate and exp are not ordered because the dependencies allow either to
    145 // execute before the other (or in parallel). DependencyHloOrdering ordering
    146 // allows maximum parallelism and enables any execution order which satisfies
    147 // data dependencies. This requires pessimistic assumptions about buffer live
    148 // ranges and can result in more memory used than more constrained orderings.
    149 class DependencyHloOrdering : public PredecessorHloOrdering {
    150  public:
    151   explicit DependencyHloOrdering(const HloModule* module);
    152   ~DependencyHloOrdering() override = default;
    153 
    154   string ToString() const override;
    155 };
    156 
    157 // An HLO ordering based on a total order of instructions in each computation.
    158 // The computation total order is a sequencing of all of its instructions in
    159 // the computation (eg, {inst0, inst1, inst2,...}) as in single-threaded
    160 // execution. For example, given the following HLO graph:
    161 /*
    162           param
    163          /     \
    164       negate   exp
    165           \    /
    166            add
    167 */
    168 // and the following sequence:
    169 //
    170 //  {param, negate, exp, add}
    171 //
    172 // SequentialHloOrdering gives the following executes-before relations:
    173 //   param executes before negate, exp, and add
    174 //   negate executes before exp and add
    175 //   exp executes before add
    176 //   add executes before nothing
    177 // This is more constrained than DependencyHloOrdering in this example because
    178 // negate and exp are ordered (negate before exp). This enables param to share
    179 // the same buffer as exp (param buffer is dead after exp). Generally, this
    180 // ordering enables more buffer sharing (reduced memory usage) because buffer
    181 // interference is reduced relative to DependencyHloOrdering.
    182 class SequentialHloOrdering : public HloOrdering {
    183  public:
    184   SequentialHloOrdering(const HloSchedule& schedule);
    185   SequentialHloOrdering(HloSchedule&& schedule);
    186   ~SequentialHloOrdering() override = default;
    187 
    188   // Returns the sequential instruction order for the given computation.
    189   const HloInstructionSequence* SequentialOrder(
    190       const HloComputation& computation) const override;
    191 
    192   string ToString() const override;
    193 
    194  protected:
    195   void Initialize();
    196 
    197   bool ExecutesBeforeInSameComputation(const HloInstruction* a,
    198                                        const HloInstruction* b) const override;
    199 
    200   const HloSchedule schedule_;
    201 
    202   // The position of every instruction in the HLO module in its respective
    203   // computation sequence (a value of zero indicates the instruction is first in
    204   // the sequence, etc). Instructions from all computations are contained in
    205   // this map so more than one instruction may have the same position
    206   // value. This is not a problem because ExecutesBefore also verifies
    207   // instructions are in the same computation.
    208   absl::flat_hash_map<const HloInstruction*, int> order_position_;
    209 };
    210 
    211 }  // namespace xla
    212 
    213 #endif  // TENSORFLOW_COMPILER_XLA_SERVICE_HLO_ORDERING_H_
    214