Home | History | Annotate | Download | only in graph
      1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 #ifndef TENSORFLOW_GRAPH_COSTMODEL_H_
     17 #define TENSORFLOW_GRAPH_COSTMODEL_H_
     18 
     19 #include <unordered_map>
     20 #include <vector>
     21 
     22 #include "tensorflow/core/framework/cost_graph.pb.h"
     23 #include "tensorflow/core/framework/step_stats.pb.h"
     24 #include "tensorflow/core/framework/tensor_shape.pb.h"
     25 #include "tensorflow/core/graph/graph.h"
     26 #include "tensorflow/core/graph/types.h"
     27 #include "tensorflow/core/lib/core/stringpiece.h"
     28 #include "tensorflow/core/lib/gtl/array_slice.h"
     29 #include "tensorflow/core/platform/macros.h"
     30 #include "tensorflow/core/platform/protobuf.h"
     31 
     32 namespace tensorflow {
     33 typedef std::unordered_map<StringPiece, int32, StringPieceHasher>
     34     NodeNameToCostIdMap;
     35 
     36 class StepStats;
     37 
     38 // CostModel keeps track of the following runtime statistics for nodes
     39 // of a single Graph:
     40 //    * The total number of times a node has executed.
     41 //    * The accumulated execution time (in microseconds) of a node.
     42 //    * The accumulated size (in bytes) of each node's output.
     43 //
     44 // This class is NOT thread-safe.
     45 class CostModel {
     46  public:
     47   // If "global" is true, maintains costs based on Node::cost_id, otherwise
     48   // maintains costs based on Node::id.
     49   explicit CostModel(bool is_global) : is_global_(is_global) {
     50     unknown_shape_.set_unknown_rank(true);
     51   }
     52 
     53   // Assigns min_count_ as a function of the median count for a Node.
     54   // This value is then used for suppressing the time/size costs of
     55   // infrequent operations.
     56   // NOTE(tucker): Maybe this should move to a subclass of CostModel.
     57   void SuppressInfrequent();
     58 
     59   bool is_global() const { return is_global_; }
     60 
     61   inline int Id(const Node* n) const {
     62     if (is_global_) {
     63       return n->cost_id();
     64     } else {
     65       return n->id();
     66     }
     67   }
     68 
     69   // Initializes cost model for 'g'.
     70   void InitFromGraph(const Graph& g);
     71 
     72   // Merges costs from cm.
     73   // REQUIRES: is_global_ is true for this and for "cm"
     74   void MergeFromGlobal(const CostModel& cm);
     75 
     76   // Merges costs from "cm", which has been computed relative to "g".
     77   // REQUIRES: is_global_ is true for this, and false for "cm".
     78   void MergeFromLocal(const Graph& g, const CostModel& cm);
     79 
     80   void MergeFromStats(const NodeNameToCostIdMap& map, const StepStats& ss);
     81 
     82   // Sets the number of outputs of "node".
     83   void SetNumOutputs(const Node* node, int num_outputs);
     84 
     85   // Records that "node" has executed "num_count" more times.
     86   void RecordCount(const Node* node, int num_count);
     87 
     88   // Returns how many times "node" has been executed.
     89   int32 TotalCount(const Node* node) const;
     90 
     91   // Records that "output_slot" of "node" has produced tensors of
     92   // aggregated "bytes".
     93   void RecordSize(const Node* node, int output_slot, Bytes bytes);
     94 
     95   // Returns total bytes of tensors produced by "node"s output slot.
     96   Bytes TotalBytes(const Node* node, int output_slot) const;
     97 
     98   // Returns a prediction for the size of the tensor at the
     99   // output_slot produced by one execution of "node".
    100   Bytes SizeEstimate(const Node* node, int output_slot) const;
    101 
    102   // Records that Executions of "node" have taken "time" microseconds.
    103   void RecordTime(const Node* node, Microseconds time);
    104 
    105   // Returns the total execution time for "node".
    106   Microseconds TotalTime(const Node* node) const;
    107 
    108   // Returns a prediction for one execution of "node".
    109   Microseconds TimeEstimate(const Node* node) const;
    110 
    111   // Check that an estimate is available for every OP node in graph.
    112   void CheckInitialized(const Graph& graph) const;
    113 
    114   // Records the maximum size in bytes and optionally the corresponding shape of
    115   // the tensor generated by "output_slot" of "node". If
    116   void RecordMaxMemorySize(const Node* node, int output_slot, Bytes bytes,
    117                            const TensorShapeProto& tensor_shape,
    118                            const DataType& dtype);
    119 
    120   // Returns the maximum size in bytes of the tensor generated by "output_slot"
    121   // of "node".
    122   Bytes MaxMemorySize(const Node* node, int output_slot) const;
    123 
    124   // Returns the shape corresponding to the largest memory size of the tensor
    125   // generated by "output_slot" of "node".
    126   const TensorShapeProto& MaxMemoryShape(const Node* node,
    127                                          int output_slot) const;
    128 
    129   // Returns the shape corresponding to the largest memory size of the tensor
    130   // generated by "output_slot" of "node".
    131   DataType MaxMemoryType(const Node* node, int output_slot) const;
    132 
    133   // Returns the size in bytes of temporary memory consumed by "node".
    134   Bytes TempMemorySize(const Node* node) const;
    135 
    136   // Returns the size of persistent memory allocated by "node".
    137   Bytes PersistentMemorySize(const Node* node) const;
    138 
    139   // Records memory stats such as temp momory and persistent memory.
    140   void RecordMemoryStats(const Node* node, const MemoryStats& memory_stats);
    141 
    142   // Records the maximum execution time (in microseconds) of "node".
    143   void RecordMaxExecutionTime(const Node* node, Microseconds time);
    144 
    145   // Returns the maximum execution time (in microseconds) of "node".
    146   Microseconds MaxExecutionTime(const Node* node) const;
    147 
    148   // Record the unique id of the tensor generated by "output_slot" of "node".
    149   // Any other tensor sharing the same id will be an alias, i.e. it will share
    150   // the same underlying memory storage area.
    151   void RecordAllocationId(const Node* node, int output_slot, int64 alloc_id);
    152 
    153   // Return the unique id of the tensor generated by "output_slot" of "node".
    154   int64 AllocationId(const Node* node, int output_slot) const;
    155 
    156   bool IsPersistentTensor(const Node* node, int64 alloc_id) const;
    157 
    158   // Helper routines to encapsulate static estimation heuristics
    159 
    160   // Compute an estimate of the time to copy "b" bytes over the network,
    161   // given a fixed cost of "network_latency_millis" milliseconds and
    162   // an estimated bandwidth of "estimated_gbps" gigabits per second (note that
    163   // this value is in gigabits, not gigabytes).
    164   static Microseconds CopyTimeEstimate(Bytes b, double network_latency_millis,
    165                                        double estimated_gbps);
    166   static Microseconds ComputationTimeEstimate(int64 mathops);
    167 
    168   // Add this CostModel into the CostGraphDef.
    169   void AddToCostGraphDef(const Graph* graph, CostGraphDef* cost_graph) const;
    170 
    171   // Write the contents of the CostModel to the INFO log.
    172   void WriteSummaryToLog() const;
    173 
    174   // Increment the times that the cost model is updated.
    175   void IncrementUpdateTimes();
    176 
    177   // Get the times that the cost model is updated.
    178   int32 GetUpdateTimes() const;
    179 
    180  private:
    181   static Bytes MinTensorMemoryUsage(const TensorShapeProto& tensor_shape,
    182                                     const DataType& dtype);
    183 
    184   const bool is_global_;
    185 
    186   // Resizes vectors so that they are large enough for "id" and id's outputs.
    187   void Ensure(int id, int num_outputs);
    188 
    189   // Nodes and Edges whose count is < this value
    190   // get type/byte estimates of 0.
    191   int32 min_count_ = 0;
    192 
    193   // The number of times the cost model is updated.
    194   int32 update_times_ = 0;
    195 
    196   // Number of times each Node has been executed.
    197   std::vector<int32> count_;
    198   // Cumulative execution time.
    199   std::vector<Microseconds> time_;
    200   // Cumulative Bytes output on each channel.
    201   std::vector<gtl::InlinedVector<Bytes, 2>> slot_bytes_;
    202 
    203   // Maximum execution time
    204   std::vector<Microseconds> max_exec_time_;
    205 
    206   // Maximum memory usage
    207   struct MemUsage {
    208     MemUsage() : temp_memory_size(0), persistent_memory_size(0) {}
    209 
    210     // TODO(yuefengz): temp_memory_size is not being used, remove it.
    211     Bytes temp_memory_size;
    212     Bytes persistent_memory_size;
    213 
    214     gtl::InlinedVector<Bytes, 2> output_port_mem;
    215     gtl::InlinedVector<TensorShapeProto, 2> output_port_shape;
    216     gtl::InlinedVector<DataType, 2> output_port_type;
    217   };
    218   std::vector<MemUsage> max_mem_usage_;
    219 
    220   std::vector<gtl::InlinedVector<int64, 2>> output_port_alloc_ids_;
    221 
    222   std::set<int64> persistent_alloc_ids_;
    223   std::map<string, std::set<int64>> persistent_alloc_ids_by_devices_;
    224 
    225   TensorShapeProto unknown_shape_;
    226 
    227   TF_DISALLOW_COPY_AND_ASSIGN(CostModel);
    228 };
    229 
    230 }  // namespace tensorflow
    231 
    232 #endif  // TENSORFLOW_GRAPH_COSTMODEL_H_
    233