Home | History | Annotate | Download | only in internal
      1 /* Copyright 2016 The TensorFlow Authors All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 #include "tensorflow/core/profiler/internal/tfprof_node_show.h"
     16 
     17 #include "tensorflow/core/lib/strings/str_util.h"
     18 #include "tensorflow/core/lib/strings/stringprintf.h"
     19 
     20 namespace tensorflow {
     21 namespace tfprof {
     22 namespace {}
     23 
     24 ShowNode::ShowNode(const TFGraphNode* node) : node(node), account(false) {
     25   ReInit(-1);
     26 }
     27 
     28 void ShowNode::ReInit(int64 step) {
     29   mutable_proto()->set_name(name());
     30   mutable_proto()->clear_devices();
     31   if (!node->canonical_device().empty()) {
     32     mutable_proto()->add_devices(node->canonical_device());
     33   }
     34   mutable_proto()->set_run_count(node->run_count(step));
     35   mutable_proto()->set_exec_micros(node->exec_micros(step));
     36   mutable_proto()->set_accelerator_exec_micros(
     37       node->accelerator_exec_micros(step));
     38   mutable_proto()->set_cpu_exec_micros(node->cpu_exec_micros(step));
     39 
     40   mutable_proto()->set_requested_bytes(node->requested_bytes(step));
     41   mutable_proto()->set_peak_bytes(node->peak_bytes(step));
     42   mutable_proto()->set_residual_bytes(node->residual_bytes(step));
     43   mutable_proto()->set_output_bytes(node->output_bytes(step));
     44 
     45   mutable_proto()->set_float_ops(node->float_ops(step));
     46 
     47   mutable_proto()->clear_input_shapes();
     48   for (const auto& inp : node->input_shapes()) {
     49     (*mutable_proto()->mutable_input_shapes())[inp.first].MergeFrom(
     50         VecToShapeProto(inp.second));
     51   }
     52   proto_.set_parameters(node->parameters());
     53 }
     54 
     55 GraphNodeProto* ShowNode::mutable_proto() { return &proto_; }
     56 
     57 const GraphNodeProto& ShowNode::proto() const { return proto_; }
     58 
     59 void ShowNode::AggregateTotalStats(ShowNode* node) {
     60   GraphNodeProto* node_pb = node->mutable_proto();
     61   mutable_proto()->set_total_run_count(proto().total_run_count() +
     62                                        node_pb->total_run_count());
     63   mutable_proto()->set_total_definition_count(
     64       proto().total_definition_count() + node_pb->total_definition_count());
     65   mutable_proto()->set_total_exec_micros(proto().total_exec_micros() +
     66                                          node_pb->total_exec_micros());
     67   mutable_proto()->set_total_accelerator_exec_micros(
     68       proto().total_accelerator_exec_micros() +
     69       node_pb->total_accelerator_exec_micros());
     70   mutable_proto()->set_total_cpu_exec_micros(proto().total_cpu_exec_micros() +
     71                                              node_pb->total_cpu_exec_micros());
     72 
     73   mutable_proto()->set_total_requested_bytes(proto().total_requested_bytes() +
     74                                              node_pb->total_requested_bytes());
     75   mutable_proto()->set_total_peak_bytes(proto().total_peak_bytes() +
     76                                         node_pb->total_peak_bytes());
     77   mutable_proto()->set_total_residual_bytes(proto().total_residual_bytes() +
     78                                             node_pb->total_residual_bytes());
     79   mutable_proto()->set_total_output_bytes(proto().total_output_bytes() +
     80                                           node_pb->total_output_bytes());
     81   mutable_proto()->set_total_parameters(proto().total_parameters() +
     82                                         node_pb->total_parameters());
     83   mutable_proto()->set_total_float_ops(proto().total_float_ops() +
     84                                        node_pb->total_float_ops());
     85 }
     86 
     87 void ShowNode::AddSelfToTotalStats() {
     88   mutable_proto()->set_total_definition_count(proto().total_definition_count() +
     89                                               1);
     90   mutable_proto()->set_total_run_count(proto().total_run_count() +
     91                                        proto().run_count());
     92   mutable_proto()->set_total_exec_micros(proto().total_exec_micros() +
     93                                          proto().exec_micros());
     94   mutable_proto()->set_total_accelerator_exec_micros(
     95       proto().total_accelerator_exec_micros() +
     96       proto().accelerator_exec_micros());
     97   mutable_proto()->set_total_cpu_exec_micros(proto().total_cpu_exec_micros() +
     98                                              proto().cpu_exec_micros());
     99 
    100   mutable_proto()->set_total_requested_bytes(proto().total_requested_bytes() +
    101                                              proto().requested_bytes());
    102   mutable_proto()->set_total_peak_bytes(proto().total_peak_bytes() +
    103                                         proto().peak_bytes());
    104   mutable_proto()->set_total_residual_bytes(proto().total_residual_bytes() +
    105                                             proto().residual_bytes());
    106   mutable_proto()->set_total_output_bytes(proto().total_output_bytes() +
    107                                           proto().output_bytes());
    108 
    109   mutable_proto()->set_total_parameters(proto().total_parameters() +
    110                                         proto().parameters());
    111   mutable_proto()->set_total_float_ops(proto().total_float_ops() +
    112                                        proto().float_ops());
    113 }
    114 
    115 void ShowNode::ResetTotalStats() {
    116   formatted_str.clear();
    117 
    118   mutable_proto()->set_total_definition_count(0);
    119   mutable_proto()->set_total_run_count(0);
    120   mutable_proto()->set_total_exec_micros(0);
    121   mutable_proto()->set_total_accelerator_exec_micros(0);
    122   mutable_proto()->set_total_cpu_exec_micros(0);
    123 
    124   mutable_proto()->set_total_requested_bytes(0);
    125   mutable_proto()->set_total_peak_bytes(0);
    126   mutable_proto()->set_total_residual_bytes(0);
    127   mutable_proto()->set_total_output_bytes(0);
    128 
    129   mutable_proto()->set_total_parameters(0);
    130   mutable_proto()->set_total_float_ops(0);
    131   mutable_proto()->mutable_children()->Clear();
    132 }
    133 
    134 ShowMultiNode::ShowMultiNode(TFMultiGraphNode* node)
    135     : node(node), account(false), show(false) {
    136   ReInit(-1, {".*"});
    137 }
    138 
    139 bool ShowMultiNode::ReInit(int64 step,
    140                            const std::vector<string>& type_regexes) {
    141   bool has_matched_type = node->SnapshotNodes(step, type_regexes);
    142 
    143   std::vector<ShowNode> snodes;
    144   mutable_proto()->mutable_graph_nodes()->Clear();
    145   for (auto it : node->graph_nodes()) {
    146     ShowNode snode(it.second);
    147     snodes.push_back(snode);
    148     snodes.back().ReInit(step);
    149     snodes.back().AddSelfToTotalStats();
    150     mutable_proto()->add_graph_nodes()->MergeFrom(snodes.back().proto());
    151   }
    152 
    153   mutable_proto()->set_name(name());
    154   mutable_proto()->set_exec_micros(node->exec_micros());
    155   mutable_proto()->set_accelerator_exec_micros(node->accelerator_exec_micros());
    156   mutable_proto()->set_cpu_exec_micros(node->cpu_exec_micros());
    157 
    158   mutable_proto()->set_requested_bytes(node->requested_bytes());
    159   mutable_proto()->set_peak_bytes(node->peak_bytes());
    160   mutable_proto()->set_residual_bytes(node->residual_bytes());
    161   mutable_proto()->set_output_bytes(node->output_bytes());
    162 
    163   mutable_proto()->set_float_ops(node->float_ops());
    164 
    165   mutable_proto()->set_parameters(node->parameters());
    166   return has_matched_type;
    167 }
    168 
    169 MultiGraphNodeProto* ShowMultiNode::mutable_proto() { return &proto_; }
    170 
    171 const MultiGraphNodeProto& ShowMultiNode::proto() const { return proto_; }
    172 
    173 void ShowMultiNode::AggregateTotalStats(ShowMultiNode* node) {
    174   MultiGraphNodeProto* node_pb = node->mutable_proto();
    175   mutable_proto()->set_total_exec_micros(proto().total_exec_micros() +
    176                                          node_pb->total_exec_micros());
    177   mutable_proto()->set_total_accelerator_exec_micros(
    178       proto().total_accelerator_exec_micros() +
    179       node_pb->total_accelerator_exec_micros());
    180   mutable_proto()->set_total_cpu_exec_micros(proto().total_cpu_exec_micros() +
    181                                              node_pb->total_cpu_exec_micros());
    182 
    183   mutable_proto()->set_total_requested_bytes(proto().total_requested_bytes() +
    184                                              node_pb->total_requested_bytes());
    185   mutable_proto()->set_total_peak_bytes(proto().total_peak_bytes() +
    186                                         node_pb->total_peak_bytes());
    187   mutable_proto()->set_total_residual_bytes(proto().total_residual_bytes() +
    188                                             node_pb->total_residual_bytes());
    189   mutable_proto()->set_total_output_bytes(proto().total_output_bytes() +
    190                                           node_pb->total_output_bytes());
    191 
    192   mutable_proto()->set_total_parameters(proto().total_parameters() +
    193                                         node_pb->total_parameters());
    194   mutable_proto()->set_total_float_ops(proto().total_float_ops() +
    195                                        node_pb->total_float_ops());
    196 }
    197 
    198 void ShowMultiNode::AddSelfToTotalStats() {
    199   mutable_proto()->set_total_exec_micros(proto().total_exec_micros() +
    200                                          proto().exec_micros());
    201   mutable_proto()->set_total_accelerator_exec_micros(
    202       proto().total_accelerator_exec_micros() +
    203       proto().accelerator_exec_micros());
    204   mutable_proto()->set_total_cpu_exec_micros(proto().total_cpu_exec_micros() +
    205                                              proto().cpu_exec_micros());
    206 
    207   mutable_proto()->set_total_requested_bytes(proto().total_requested_bytes() +
    208                                              proto().requested_bytes());
    209   mutable_proto()->set_total_peak_bytes(proto().total_peak_bytes() +
    210                                         proto().peak_bytes());
    211   mutable_proto()->set_total_residual_bytes(proto().total_residual_bytes() +
    212                                             proto().residual_bytes());
    213   mutable_proto()->set_total_output_bytes(proto().total_output_bytes() +
    214                                           proto().output_bytes());
    215 
    216   mutable_proto()->set_total_parameters(proto().total_parameters() +
    217                                         proto().parameters());
    218   mutable_proto()->set_total_float_ops(proto().total_float_ops() +
    219                                        proto().float_ops());
    220 }
    221 
    222 void ShowMultiNode::ResetTotalStats() {
    223   formatted_str.clear();
    224   mutable_proto()->set_total_exec_micros(0);
    225   mutable_proto()->set_total_accelerator_exec_micros(0);
    226   mutable_proto()->set_total_cpu_exec_micros(0);
    227 
    228   mutable_proto()->set_total_requested_bytes(0);
    229   mutable_proto()->set_total_peak_bytes(0);
    230   mutable_proto()->set_total_residual_bytes(0);
    231   mutable_proto()->set_total_output_bytes(0);
    232 
    233   mutable_proto()->set_total_parameters(0);
    234   mutable_proto()->set_total_float_ops(0);
    235   mutable_proto()->mutable_children()->Clear();
    236 }
    237 
    238 }  // namespace tfprof
    239 }  // namespace tensorflow
    240