Home | History | Annotate | Download | only in service
      1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 #include "tensorflow/compiler/xla/service/executable.h"
     17 
     18 #include "tensorflow/compiler/xla/legacy_flags/debug_options_flags.h"
     19 #include "tensorflow/compiler/xla/service/hlo_graph_dumper.h"
     20 #include "tensorflow/compiler/xla/status.h"
     21 #include "tensorflow/compiler/xla/status_macros.h"
     22 #include "tensorflow/core/lib/hash/hash.h"
     23 #include "tensorflow/core/lib/io/path.h"
     24 #include "tensorflow/core/lib/strings/stringprintf.h"
     25 #include "tensorflow/core/platform/env.h"
     26 
     27 using tensorflow::gtl::ArraySlice;
     28 
     29 namespace xla {
     30 
     31 StatusOr<std::vector<std::unique_ptr<ShapedBuffer>>>
     32 Executable::ExecuteOnStreams(
     33     ArraySlice<const ServiceExecutableRunOptions> run_options,
     34     ArraySlice<ArraySlice<const ShapedBuffer*>> arguments) {
     35   TF_RET_CHECK(run_options.size() == arguments.size());
     36 
     37   std::vector<std::unique_ptr<ShapedBuffer>> return_values(run_options.size());
     38 
     39   if (run_options.size() == 1) {
     40     TF_ASSIGN_OR_RETURN(return_values[0],
     41                         ExecuteOnStream(&run_options[0], arguments[0],
     42                                         /*hlo_execution_profile=*/nullptr));
     43     return std::move(return_values);
     44   }
     45 
     46   for (size_t i = 0; i < run_options.size(); ++i) {
     47     // We cannot BlockHostUntilDone() on the already-launched executions in case
     48     // of error, since if the executions communicate, the initially launched
     49     // executions may never complete if not all executions are running.
     50     TF_ASSIGN_OR_RETURN(return_values[i],
     51                         ExecuteAsyncOnStream(&run_options[i], arguments[i]));
     52   }
     53   for (const auto& options : run_options) {
     54     TF_RET_CHECK(options.stream() != nullptr);
     55     TF_RETURN_IF_ERROR(options.stream()->BlockHostUntilDone());
     56   }
     57   return std::move(return_values);
     58 }
     59 
     60 StatusOr<std::unique_ptr<ShapedBuffer>> Executable::ExecuteOnStreamWrapper(
     61     const ServiceExecutableRunOptions* run_options, ExecutionProfile* profile,
     62     ArraySlice<const ShapedBuffer*> arguments) {
     63   perftools::gputools::Stream* stream = run_options->stream();
     64   std::unique_ptr<perftools::gputools::Timer> timer;
     65   if (profile != nullptr) {
     66     timer.reset(new perftools::gputools::Timer(stream->parent()));
     67     stream->InitTimer(timer.get()).ThenStartTimer(timer.get());
     68   }
     69 
     70   VLOG(1) << "enqueueing executable on stream...";
     71   // If the profiling flag isn't enabled, we pass nullptr as the profile to
     72   // indicate profiling is not requested.
     73   std::unique_ptr<HloExecutionProfile> profile_ptr =
     74       module_config().debug_options().xla_hlo_profile() &&
     75               hlo_profiling_enabled()
     76           ? MakeUnique<HloExecutionProfile>(&hlo_profile_printer_data(),
     77                                             &hlo_profile_index_map())
     78           : nullptr;
     79 
     80   StatusOr<std::unique_ptr<ShapedBuffer>> return_value =
     81       ExecuteOnStream(run_options, arguments, profile_ptr.get());
     82 
     83   if (profile != nullptr) {
     84     VLOG(1) << "enqueueing 'stop timer' and blocking host until done...";
     85     stream->ThenStopTimer(timer.get());
     86     TF_RETURN_IF_ERROR(stream->BlockHostUntilDone());
     87     VLOG(1) << "done with block-host-until-done";
     88 
     89     // Merge in run-time profile information from execution_profile.
     90     //
     91     // TODO(b/71713097): This is buggy -- even though the mutex takes care of
     92     // C++ level races, some other concurrent ExecuteOnStreamWrapper call could
     93     // have rewritten the execution_profile before we get to it.
     94     profile->MergeFrom(execution_profile());
     95 
     96     // Overall execution time (in nanoseconds) from the executor timer.
     97     if (stream->ok()) {
     98       // Don't read timer->Nanoseconds() if the stream isn't OK -- that's
     99       // illegal.
    100       profile->set_compute_and_transfer_time_ns(timer->Nanoseconds());
    101     }
    102 
    103     // TODO(b/28123297): On GPU we end up including transfer time in
    104     // the compute time this way. Instead, we should get the correct
    105     // value by measuring it. Setting the field here at least lets
    106     // benchmarks provide *some* value for GPU computations.
    107     //
    108     // TODO(b/28447609): The value in compute_and_transfer_time_ns is actually
    109     // the compute time without the transfer time, so this way we get the
    110     // correct compute time. We should instead have the correct value for
    111     // compute_and_transfer_time and set compute_time to the compute time.
    112     if (profile->compute_time_ns() == 0) {
    113       profile->set_compute_time_ns(profile->compute_and_transfer_time_ns());
    114     }
    115   }
    116 
    117   if (profile_ptr != nullptr) {
    118     XLA_LOG_LINES(
    119         tensorflow::INFO,
    120         profile_ptr->ToString(stream->parent()->GetDeviceDescription()));
    121     hlo_graph_dumper::MaybeDumpHloModule(module(), "Service::Execute",
    122                                          profile_ptr.get());
    123   }
    124 
    125   return return_value;
    126 }
    127 
    128 Status Executable::DumpSessionModule() {
    129   TF_RET_CHECK(dumping());
    130   const string& directory_path =
    131       module_config().debug_options().xla_dump_executions_to();
    132   VersionedComputationHandle versioned_handle = entry_computation_handle();
    133   // This filename does not include the version number because the computation
    134   // is only ever executed at one version.
    135   string filename = tensorflow::strings::Printf(
    136       "computation_%lld__%s__execution_%lld", versioned_handle.handle.handle(),
    137       session_module_->entry().name().c_str(), ++execution_count_);
    138   return Executable::DumpToDirectory(directory_path, filename,
    139                                      *session_module_);
    140 }
    141 
    142 /* static */ Status Executable::DumpToDirectory(
    143     const string& directory_path, string filename,
    144     const SessionModule& session_module) {
    145   tensorflow::Env* env = tensorflow::Env::Default();
    146   if (!env->IsDirectory(directory_path).ok()) {
    147     // NB! CreateDir does not work reliably with multiple XLA threads -- two
    148     // threads can race to observe the absence of the dump directory and
    149     // simultaneously try to create it, causing the "losing" thread to get a
    150     // "directory already exists" error.
    151     TF_RETURN_IF_ERROR(env->RecursivelyCreateDir(directory_path));
    152   }
    153   filename = SanitizeFileName(std::move(filename));
    154   string file_path = tensorflow::io::JoinPath(directory_path, filename);
    155   string result;
    156   TF_RET_CHECK(
    157       tensorflow::SerializeToStringDeterministic(session_module, &result));
    158   return tensorflow::WriteStringToFile(tensorflow::Env::Default(), file_path,
    159                                        result);
    160 }
    161 
    162 }  // namespace xla
    163