Home | History | Annotate | Download | only in service
      1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 #include "tensorflow/compiler/xla/service/human_readable_profile_builder.h"
     17 #include "tensorflow/compiler/xla/metric_table_report.h"
     18 #include "tensorflow/compiler/xla/util.h"
     19 #include "tensorflow/core/lib/strings/numbers.h"
     20 #include "tensorflow/core/lib/strings/strcat.h"
     21 #include "tensorflow/core/lib/strings/stringprintf.h"
     22 
     23 namespace xla {
     24 
     25 using tensorflow::strings::Appendf;
     26 using tensorflow::strings::HumanReadableElapsedTime;
     27 using tensorflow::strings::HumanReadableNumBytes;
     28 using tensorflow::strings::Printf;
     29 using tensorflow::strings::StrAppend;
     30 
     31 string HumanReadableProfileBuilder::ToString() const {
     32   string s;
     33 
     34   Appendf(&s, "Execution profile for %s: (%s @ f_nom)\n",
     35           computation_name_.c_str(),
     36           HumanReadableElapsedTime(CyclesToSeconds(total_cycles_)).c_str());
     37 
     38   auto append_op = [&](const OpInfo& op) {
     39     string bytes_per_sec;
     40     string bytes_per_cycle;
     41     if (op.cycles <= 0 || op.bytes_accessed < 0) {
     42       bytes_per_sec = "<unknown>";
     43       bytes_per_cycle = "<unknown>";
     44     } else {
     45       bytes_per_sec =
     46           HumanReadableNumBytes(op.bytes_accessed / CyclesToSeconds(op.cycles));
     47       if (op.bytes_accessed > op.cycles) {
     48         bytes_per_cycle = HumanReadableNumBytes(op.bytes_accessed / op.cycles);
     49       } else {
     50         bytes_per_cycle =
     51             Printf("%.3fB", static_cast<float>(op.bytes_accessed) / op.cycles);
     52       }
     53     }
     54 
     55     double cycles_percent = 0;
     56     if (total_cycles_ > 0) {
     57       cycles_percent = op.cycles / static_cast<double>(total_cycles_) * 100;
     58     }
     59 
     60     double nsecs = op.cycles / clock_rate_ghz_;
     61     Appendf(&s,
     62             "%15lld cycles (%6.2f%%) :: %12.1f usec (%12.1f optimal) :: %18s "
     63             ":: %18s :: %12s/s :: %12s/cycle :: %s\n",
     64             op.cycles, cycles_percent, CyclesToMicroseconds(op.cycles),
     65             op.optimal_seconds * 1e6,
     66             op.flop_count <= 0
     67                 ? "<none>"
     68                 : HumanReadableNumFlops(op.flop_count, nsecs).c_str(),
     69             op.transcendental_count <= 0 ? "<none>"
     70                                          : HumanReadableNumTranscendentalOps(
     71                                                op.transcendental_count, nsecs)
     72                                                .c_str(),
     73             bytes_per_sec.c_str(), bytes_per_cycle.c_str(), op.name.c_str());
     74   };
     75 
     76   float optimal_seconds_sum = 0.0;
     77   int64 total_flops = 0.;
     78   int64 total_transcendentals = 0.;
     79   int64 total_bytes = 0;
     80   for (const auto& op : op_infos_) {
     81     optimal_seconds_sum += op.optimal_seconds;
     82     total_flops += op.flop_count;
     83     total_transcendentals += op.transcendental_count;
     84     total_bytes += op.bytes_accessed;
     85   }
     86 
     87   VLOG(1) << "Total floating point ops: " << total_flops;
     88 
     89   append_op({"[total]", "[total]", /*category=*/"", total_cycles_, total_flops,
     90              total_transcendentals, total_bytes, optimal_seconds_sum});
     91 
     92   // Sort ops in decreasing order of cycles.
     93   std::vector<OpInfo> sorted_ops(op_infos_);
     94   std::sort(
     95       sorted_ops.begin(), sorted_ops.end(),
     96       [](const OpInfo& a, const OpInfo& b) { return a.cycles > b.cycles; });
     97   for (const auto& op : sorted_ops) {
     98     append_op(op);
     99   }
    100 
    101   if (total_cycles_ <= 0) {
    102     StrAppend(&s, "****** 0 total cycles ******\n");
    103   } else {
    104     // Only show an optimal discrepancy table if at least one value was
    105     // specified. Estimates are non-negative, so if the sum is greater than
    106     // zero, then at least one summand was greater than zero.
    107     if (optimal_seconds_sum > 0) {
    108       MetricTableReport table;
    109       table.SetMetricName("microseconds above estimated optimum");
    110       table.SetEntryName("ops");
    111       table.SetShowCategoryTable();
    112       float total_discrepancy_in_microseconds = 0.0f;
    113       for (const auto& op : sorted_ops) {
    114         MetricTableReport::Entry entry;
    115         entry.text = op.name;
    116         entry.short_text = op.short_name;
    117         entry.category_text = op.category;
    118         entry.metric =
    119             CyclesToMicroseconds(op.cycles) - op.optimal_seconds * 1e6;
    120         total_discrepancy_in_microseconds += entry.metric;
    121         table.AddEntry(std::move(entry));
    122       }
    123       StrAppend(&s, table.MakeReport(total_discrepancy_in_microseconds));
    124     }
    125 
    126     {
    127       MetricTableReport table;
    128       table.SetMetricName("microseconds");
    129       table.SetEntryName("ops");
    130       table.SetShowCategoryTable();
    131       for (const auto& op : sorted_ops) {
    132         MetricTableReport::Entry entry;
    133         entry.text = op.name;
    134         entry.short_text = op.short_name;
    135         entry.category_text = op.category;
    136         entry.metric = CyclesToMicroseconds(op.cycles);
    137         table.AddEntry(std::move(entry));
    138       }
    139       StrAppend(&s, table.MakeReport(CyclesToMicroseconds(total_cycles_)));
    140     }
    141   }
    142   return s;
    143 }
    144 
    145 }  // namespace xla
    146