Home | History | Annotate | Download | only in service
      1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_HLO_EXECUTION_PROFILE_H_
     17 #define TENSORFLOW_COMPILER_XLA_SERVICE_HLO_EXECUTION_PROFILE_H_
     18 
     19 #include <unordered_map>
     20 
     21 #include "tensorflow/compiler/xla/map_util.h"
     22 #include "tensorflow/compiler/xla/service/hlo_cost_analysis.h"
     23 #include "tensorflow/compiler/xla/service/hlo_profile_printer.h"
     24 #include "tensorflow/compiler/xla/types.h"
     25 #include "tensorflow/core/platform/stream_executor_no_cuda.h"
     26 #include "tensorflow/core/platform/types.h"
     27 
     28 namespace xla {
     29 
     30 class HloInstruction;
     31 
     32 // Maps all HloInstructions and HloComputations in an HloModule to integers.
     33 // These integers form the contiguous range [0, total_count()).
     34 class HloProfileIndexMap {
     35  public:
     36   // Scans `module` to populate this instance of HloProfileIndexMap.
     37   explicit HloProfileIndexMap(const HloModule& module);
     38 
     39   HloProfileIndexMap(const HloProfileIndexMap&) = default;
     40   HloProfileIndexMap(HloProfileIndexMap&&) = default;
     41 
     42   HloProfileIndexMap& operator=(const HloProfileIndexMap&) = default;
     43   HloProfileIndexMap& operator=(HloProfileIndexMap&&) = default;
     44 
     45   size_t GetProfileIndexFor(const HloInstruction& instruction) const {
     46     return FindOrDie(instruction_to_profile_idx(), &instruction);
     47   }
     48 
     49   size_t GetProfileIndexFor(const HloComputation& computation) const {
     50     return FindOrDie(computation_to_profile_idx(), &computation);
     51   }
     52 
     53   size_t instruction_count() const {
     54     return instruction_to_profile_idx().size();
     55   }
     56 
     57   size_t computation_count() const {
     58     return computation_to_profile_idx().size();
     59   }
     60 
     61   size_t total_count() const {
     62     return instruction_count() + computation_count();
     63   }
     64 
     65   const std::unordered_map<const HloInstruction*, int64>&
     66   instruction_to_profile_idx() const {
     67     return instruction_to_profile_idx_;
     68   }
     69 
     70   const std::unordered_map<const HloComputation*, int64>&
     71   computation_to_profile_idx() const {
     72     return computation_to_profile_idx_;
     73   }
     74 
     75  private:
     76   std::unordered_map<const HloInstruction*, int64> instruction_to_profile_idx_;
     77   std::unordered_map<const HloComputation*, int64> computation_to_profile_idx_;
     78 };
     79 
     80 // Create an instance of `HloProfilePrinterData`.
     81 std::unique_ptr<HloProfilePrinterData> CreateHloProfilePrinterData(
     82     const HloProfileIndexMap& hlo_profile_index_map,
     83     const HloCostAnalysis& cost_analysis);
     84 
     85 // Describes how much time each HLO operation took.
     86 //
     87 // Each HloComputation takes a certain number of cycles.  This class helps break
     88 // down how much time each HLO took.
     89 class HloExecutionProfile {
     90  public:
     91   using DeviceDescription = perftools::gputools::DeviceDescription;
     92 
     93   HloExecutionProfile(const HloProfilePrinterData* hlo_profile_printer_data,
     94                       const HloProfileIndexMap* hlo_profile_index_map);
     95 
     96   // Record how many cycles this HLO took to execute.
     97   void SetCyclesTakenBy(const HloInstruction* hlo, uint64 cycles_taken);
     98 
     99   // Returns how many cycles this HLO took to execute.  Profiling information
    100   // may not be available for some instructions in which case zero is returned.
    101   uint64 GetCyclesTakenBy(const HloInstruction& hlo) const;
    102 
    103   // Return the number of cycles this computation took to execute.
    104   uint64 total_cycles_executed(const HloComputation& computation) const {
    105     return profile_counters_[hlo_profile_index_map_.GetProfileIndexFor(
    106         computation)];
    107   }
    108 
    109   // Record how many cycles a computation took to execute.
    110   void set_total_cycles_executed(const HloComputation& computation,
    111                                  uint64 total_cycles_executed) {
    112     profile_counters_[hlo_profile_index_map_.GetProfileIndexFor(computation)] =
    113         total_cycles_executed;
    114   }
    115 
    116   // Returns a version of the execution profile suitable for performance
    117   // debugging; e.g. emits cycle counts, execution time at the nominal device
    118   // frequency, and the effective throughput given the provided cost_analysis
    119   // for the operations in a given computation. Returns an empty string if it
    120   // wasn't possible to generate a printable version.
    121   string ToString(const DeviceDescription& device_description) const {
    122     return PrintHloProfile(hlo_profile_printer_data_, profile_counters_.data(),
    123                            device_description.clock_rate_ghz());
    124   }
    125 
    126   std::vector<int64>* mutable_profile_counters() { return &profile_counters_; }
    127   const std::vector<int64>& profile_counters() const {
    128     return profile_counters_;
    129   }
    130 
    131  private:
    132   const HloProfilePrinterData& hlo_profile_printer_data_;
    133   const HloProfileIndexMap& hlo_profile_index_map_;
    134 
    135   // Stores per-Hlo profile counters.  This is the only thing that changes when
    136   // we execute an XLA computation.
    137   std::vector<int64> profile_counters_;
    138 };
    139 
    140 }  // namespace xla
    141 
    142 #endif  // TENSORFLOW_COMPILER_XLA_SERVICE_HLO_EXECUTION_PROFILE_H_
    143