Home | History | Annotate | Download | only in util
      1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 #ifndef TENSORFLOW_UTIL_STAT_SUMMARIZER_H_
     17 #define TENSORFLOW_UTIL_STAT_SUMMARIZER_H_
     18 
     19 #include <stdlib.h>
     20 
     21 #include <cmath>
     22 #include <limits>
     23 #include <map>
     24 #include <sstream>
     25 #include <string>
     26 
     27 #include "tensorflow/core/framework/tensor.h"
     28 #include "tensorflow/core/framework/types.pb.h"
     29 #include "tensorflow/core/platform/types.h"
     30 
     31 namespace tensorflow {
     32 
     33 class GraphDef;
     34 class StepStats;
     35 class NodeExecStats;
     36 
     37 template <typename ValueType, typename HighPrecisionValueType = double>
     38 class Stat {
     39  public:
     40   void UpdateStat(ValueType v) {
     41     if (count_ == 0) {
     42       first_ = v;
     43     }
     44 
     45     newest_ = v;
     46     max_ = std::max(v, max_);
     47     min_ = std::min(v, min_);
     48     ++count_;
     49     sum_ += v;
     50     squared_sum_ += static_cast<HighPrecisionValueType>(v) * v;
     51   }
     52 
     53   void Reset() { new (this) Stat<ValueType, HighPrecisionValueType>(); }
     54 
     55   bool empty() const { return count_ == 0; }
     56 
     57   ValueType first() const { return first_; }
     58 
     59   ValueType newest() const { return newest_; }
     60 
     61   ValueType max() const { return max_; }
     62 
     63   ValueType min() const { return min_; }
     64 
     65   int64 count() const { return count_; }
     66 
     67   ValueType sum() const { return sum_; }
     68 
     69   HighPrecisionValueType squared_sum() const { return squared_sum_; }
     70 
     71   bool all_same() const { return (count_ == 0 || min_ == max_); }
     72 
     73   HighPrecisionValueType avg() const {
     74     return empty() ? std::numeric_limits<ValueType>::quiet_NaN()
     75                    : static_cast<HighPrecisionValueType>(sum_) / count_;
     76   }
     77 
     78   ValueType std_deviation() const {
     79     return all_same() ? 0 : sqrt(squared_sum_ / count_ - avg() * avg());
     80   }
     81 
     82   void OutputToStream(std::ostream* stream) const {
     83     if (empty()) {
     84       *stream << "count=0";
     85     } else if (all_same()) {
     86       *stream << "count=" << count_ << " curr=" << newest_;
     87       if (count_ > 1) *stream << "(all same)";
     88     } else {
     89       *stream << "count=" << count_ << " first=" << first_
     90               << " curr=" << newest_ << " min=" << min_ << " max=" << max_
     91               << " avg=" << avg() << " std=" << std_deviation();
     92     }
     93   }
     94 
     95   friend std::ostream& operator<<(std::ostream& stream,
     96                                   const Stat<ValueType>& stat) {
     97     stat.OutputToStream(&stream);
     98     return stream;
     99   }
    100 
    101  private:
    102   ValueType first_ = 0;
    103   ValueType newest_ = 0;
    104   ValueType max_ = std::numeric_limits<ValueType>::min();
    105   ValueType min_ = std::numeric_limits<ValueType>::max();
    106   int64 count_ = 0;
    107   ValueType sum_ = 0;
    108   HighPrecisionValueType squared_sum_ = 0;
    109 };
    110 
    111 // Used to control the output of the statistics summarizer;
    112 class StatSummarizerOptions {
    113  public:
    114   StatSummarizerOptions()
    115       : show_run_order(true),
    116         run_order_limit(0),
    117         show_time(true),
    118         time_limit(10),
    119         show_memory(true),
    120         memory_limit(10),
    121         show_type(true),
    122         show_summary(true) {}
    123 
    124   bool show_run_order;
    125   int run_order_limit;
    126   bool show_time;
    127   int time_limit;
    128   bool show_memory;
    129   int memory_limit;
    130   bool show_type;
    131   bool show_summary;
    132 };
    133 
    134 // A StatSummarizer assists in performance analysis of Graph executions.
    135 //
    136 // It summarizes time spent executing (on GPU/CPU), memory used etc. across
    137 // multiple executions of a single Graph from the StepStats collected during
    138 // graph execution.
    139 //
    140 // See tensorflow/tools/benchmark/benchmark_model.cc for an example usage.
    141 class StatSummarizer {
    142  public:
    143   enum SortingMetric {
    144     BY_NAME,
    145     BY_RUN_ORDER,
    146     BY_TIME,
    147     BY_MEMORY,
    148     BY_TYPE,
    149   };
    150 
    151   explicit StatSummarizer(const StatSummarizerOptions& options);
    152 
    153   // Deprecated: Use StatSummarizer(const StatSummarizerOptions&) instead. The
    154   // GraphDef is not needed by the StatSummarizer.
    155   explicit StatSummarizer(const tensorflow::GraphDef& tensorflow_graph);
    156 
    157   ~StatSummarizer();
    158 
    159   // Adds another run's StepStats output to the aggregate counts.
    160   void ProcessStepStats(const StepStats& step_stats);
    161 
    162   // Returns a string detailing the accumulated runtime stats in a tab-separated
    163   // format which can be pasted into a spreadsheet for further analysis.
    164   std::string GetOutputString() const;
    165 
    166   std::string ShortSummary() const;
    167 
    168   // Prints the string returned by GetOutputString().
    169   void PrintStepStats() const;
    170 
    171   // Prints the output tensor sizes and types for each node.
    172   void PrintOutputs() const;
    173 
    174   void ComputeStatsByType(std::map<string, int64>* node_type_map_count,
    175                           std::map<string, int64>* node_type_map_time,
    176                           std::map<string, int64>* node_type_map_memory,
    177                           std::map<string, int64>* node_type_map_times_called,
    178                           int64* accumulated_us) const;
    179 
    180   std::string GetStatsByNodeType() const;
    181 
    182   std::string GetStatsByMetric(const string& title,
    183                                SortingMetric sorting_metric,
    184                                int num_stats) const;
    185 
    186   void Reset();
    187 
    188   // Returns number of runs.
    189   int num_runs() const { return run_total_us_.count(); }
    190 
    191   // Returns stats of total microseconds spent by all nodes in each run.
    192   const Stat<int64>& run_total_us() const { return run_total_us_; }
    193 
    194  private:
    195   struct Detail {
    196     string name;
    197     string type;
    198     int64 run_order;
    199     Stat<int64> start_us;
    200     Stat<int64> rel_end_us;
    201     Stat<int64> mem_used;
    202     std::vector<TensorDescription> outputs;
    203     int64 times_called;
    204   };
    205 
    206   void Validate(const Detail* detail, const NodeExecStats& ns) const;
    207 
    208   void OrderNodesByMetric(SortingMetric sorting_metric,
    209                           std::vector<const Detail*>* details) const;
    210 
    211   std::string HeaderString(const string& title) const;
    212   std::string ColumnString(const Detail& detail,
    213                            const int64 cumulative_stat_on_node,
    214                            const Stat<int64>& stat) const;
    215 
    216   Stat<int64> run_total_us_;
    217   Stat<int64> memory_;
    218 
    219   std::map<std::string, Detail> details_;
    220   StatSummarizerOptions options_;
    221 };
    222 
    223 }  // namespace tensorflow
    224 
    225 #endif  // TENSORFLOW_UTIL_STAT_SUMMARIZER_H_
    226