1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_UTIL_STAT_SUMMARIZER_H_ 17 #define TENSORFLOW_UTIL_STAT_SUMMARIZER_H_ 18 19 #include <stdlib.h> 20 21 #include <cmath> 22 #include <limits> 23 #include <map> 24 #include <sstream> 25 #include <string> 26 27 #include "tensorflow/core/framework/tensor.h" 28 #include "tensorflow/core/framework/types.pb.h" 29 #include "tensorflow/core/platform/types.h" 30 31 namespace tensorflow { 32 33 class GraphDef; 34 class StepStats; 35 class NodeExecStats; 36 37 template <typename ValueType, typename HighPrecisionValueType = double> 38 class Stat { 39 public: 40 void UpdateStat(ValueType v) { 41 if (count_ == 0) { 42 first_ = v; 43 } 44 45 newest_ = v; 46 max_ = std::max(v, max_); 47 min_ = std::min(v, min_); 48 ++count_; 49 sum_ += v; 50 squared_sum_ += static_cast<HighPrecisionValueType>(v) * v; 51 } 52 53 void Reset() { new (this) Stat<ValueType, HighPrecisionValueType>(); } 54 55 bool empty() const { return count_ == 0; } 56 57 ValueType first() const { return first_; } 58 59 ValueType newest() const { return newest_; } 60 61 ValueType max() const { return max_; } 62 63 ValueType min() const { return min_; } 64 65 int64 count() const { return count_; } 66 67 ValueType sum() const { return sum_; } 68 69 HighPrecisionValueType squared_sum() const { return squared_sum_; } 70 71 bool all_same() const { return (count_ == 0 || min_ == max_); } 72 73 HighPrecisionValueType avg() const { 74 return empty() ? std::numeric_limits<ValueType>::quiet_NaN() 75 : static_cast<HighPrecisionValueType>(sum_) / count_; 76 } 77 78 ValueType std_deviation() const { 79 return all_same() ? 0 : sqrt(squared_sum_ / count_ - avg() * avg()); 80 } 81 82 void OutputToStream(std::ostream* stream) const { 83 if (empty()) { 84 *stream << "count=0"; 85 } else if (all_same()) { 86 *stream << "count=" << count_ << " curr=" << newest_; 87 if (count_ > 1) *stream << "(all same)"; 88 } else { 89 *stream << "count=" << count_ << " first=" << first_ 90 << " curr=" << newest_ << " min=" << min_ << " max=" << max_ 91 << " avg=" << avg() << " std=" << std_deviation(); 92 } 93 } 94 95 friend std::ostream& operator<<(std::ostream& stream, 96 const Stat<ValueType>& stat) { 97 stat.OutputToStream(&stream); 98 return stream; 99 } 100 101 private: 102 ValueType first_ = 0; 103 ValueType newest_ = 0; 104 ValueType max_ = std::numeric_limits<ValueType>::min(); 105 ValueType min_ = std::numeric_limits<ValueType>::max(); 106 int64 count_ = 0; 107 ValueType sum_ = 0; 108 HighPrecisionValueType squared_sum_ = 0; 109 }; 110 111 // Used to control the output of the statistics summarizer; 112 class StatSummarizerOptions { 113 public: 114 StatSummarizerOptions() 115 : show_run_order(true), 116 run_order_limit(0), 117 show_time(true), 118 time_limit(10), 119 show_memory(true), 120 memory_limit(10), 121 show_type(true), 122 show_summary(true) {} 123 124 bool show_run_order; 125 int run_order_limit; 126 bool show_time; 127 int time_limit; 128 bool show_memory; 129 int memory_limit; 130 bool show_type; 131 bool show_summary; 132 }; 133 134 // A StatSummarizer assists in performance analysis of Graph executions. 135 // 136 // It summarizes time spent executing (on GPU/CPU), memory used etc. across 137 // multiple executions of a single Graph from the StepStats collected during 138 // graph execution. 139 // 140 // See tensorflow/tools/benchmark/benchmark_model.cc for an example usage. 141 class StatSummarizer { 142 public: 143 enum SortingMetric { 144 BY_NAME, 145 BY_RUN_ORDER, 146 BY_TIME, 147 BY_MEMORY, 148 BY_TYPE, 149 }; 150 151 explicit StatSummarizer(const StatSummarizerOptions& options); 152 153 // Deprecated: Use StatSummarizer(const StatSummarizerOptions&) instead. The 154 // GraphDef is not needed by the StatSummarizer. 155 explicit StatSummarizer(const tensorflow::GraphDef& tensorflow_graph); 156 157 ~StatSummarizer(); 158 159 // Adds another run's StepStats output to the aggregate counts. 160 void ProcessStepStats(const StepStats& step_stats); 161 162 // Returns a string detailing the accumulated runtime stats in a tab-separated 163 // format which can be pasted into a spreadsheet for further analysis. 164 std::string GetOutputString() const; 165 166 std::string ShortSummary() const; 167 168 // Prints the string returned by GetOutputString(). 169 void PrintStepStats() const; 170 171 // Prints the output tensor sizes and types for each node. 172 void PrintOutputs() const; 173 174 void ComputeStatsByType(std::map<string, int64>* node_type_map_count, 175 std::map<string, int64>* node_type_map_time, 176 std::map<string, int64>* node_type_map_memory, 177 std::map<string, int64>* node_type_map_times_called, 178 int64* accumulated_us) const; 179 180 std::string GetStatsByNodeType() const; 181 182 std::string GetStatsByMetric(const string& title, 183 SortingMetric sorting_metric, 184 int num_stats) const; 185 186 void Reset(); 187 188 // Returns number of runs. 189 int num_runs() const { return run_total_us_.count(); } 190 191 // Returns stats of total microseconds spent by all nodes in each run. 192 const Stat<int64>& run_total_us() const { return run_total_us_; } 193 194 private: 195 struct Detail { 196 string name; 197 string type; 198 int64 run_order; 199 Stat<int64> start_us; 200 Stat<int64> rel_end_us; 201 Stat<int64> mem_used; 202 std::vector<TensorDescription> outputs; 203 int64 times_called; 204 }; 205 206 void Validate(const Detail* detail, const NodeExecStats& ns) const; 207 208 void OrderNodesByMetric(SortingMetric sorting_metric, 209 std::vector<const Detail*>* details) const; 210 211 std::string HeaderString(const string& title) const; 212 std::string ColumnString(const Detail& detail, 213 const int64 cumulative_stat_on_node, 214 const Stat<int64>& stat) const; 215 216 Stat<int64> run_total_us_; 217 Stat<int64> memory_; 218 219 std::map<std::string, Detail> details_; 220 StatSummarizerOptions options_; 221 }; 222 223 } // namespace tensorflow 224 225 #endif // TENSORFLOW_UTIL_STAT_SUMMARIZER_H_ 226