Home | History | Annotate | Download | only in stream_executor
      1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 // TODO(jhen): Replace hardcoded, platform specific path strings in GetXXXPath()
     17 // with a function in e.g. cuda.h.
     18 
     19 #include "tensorflow/stream_executor/dso_loader.h"
     20 
     21 #include <limits.h>
     22 #include <stdlib.h>
     23 #include <initializer_list>
     24 #include <vector>
     25 
     26 #include "tensorflow/core/platform/load_library.h"
     27 #include "tensorflow/stream_executor/lib/env.h"
     28 #include "tensorflow/stream_executor/lib/error.h"
     29 #include "tensorflow/stream_executor/lib/path.h"
     30 #include "tensorflow/stream_executor/lib/str_util.h"
     31 #include "tensorflow/stream_executor/lib/strcat.h"
     32 #include "tensorflow/stream_executor/lib/stringprintf.h"
     33 #include "tensorflow/stream_executor/platform/logging.h"
     34 #include "tensorflow/stream_executor/platform/port.h"
     35 
     36 #if !defined(PLATFORM_GOOGLE)
     37 #include "cuda/cuda_config.h"
     38 #endif
     39 
     40 namespace perftools {
     41 namespace gputools {
     42 namespace internal {
     43 
     44 string GetCudaVersion() { return TF_CUDA_VERSION; }
     45 string GetCudnnVersion() { return TF_CUDNN_VERSION; }
     46 
     47 /* static */ port::Status DsoLoader::GetCublasDsoHandle(void** dso_handle) {
     48   return GetDsoHandle(FindDsoPath(port::Env::Default()->FormatLibraryFileName(
     49                                       "cublas", GetCudaVersion()),
     50                                   GetCudaLibraryDirPath()),
     51                       dso_handle);
     52 }
     53 
     54 /* static */ port::Status DsoLoader::GetCudnnDsoHandle(void** dso_handle) {
     55   // libcudnn is versioned differently than the other libraries and may have a
     56   // different version number than other CUDA libraries.  See b/22397368 for
     57   // some details about the complications surrounding this.
     58   return GetDsoHandle(FindDsoPath(port::Env::Default()->FormatLibraryFileName(
     59                                       "cudnn", GetCudnnVersion()),
     60                                   GetCudaLibraryDirPath()),
     61                       dso_handle);
     62 }
     63 
     64 /* static */ port::Status DsoLoader::GetCufftDsoHandle(void** dso_handle) {
     65   return GetDsoHandle(FindDsoPath(port::Env::Default()->FormatLibraryFileName(
     66                                       "cufft", GetCudaVersion()),
     67                                   GetCudaLibraryDirPath()),
     68                       dso_handle);
     69 }
     70 
     71 /* static */ port::Status DsoLoader::GetCurandDsoHandle(void** dso_handle) {
     72   return GetDsoHandle(FindDsoPath(port::Env::Default()->FormatLibraryFileName(
     73                                       "curand", GetCudaVersion()),
     74                                   GetCudaLibraryDirPath()),
     75                       dso_handle);
     76 }
     77 
     78 /* static */ port::Status DsoLoader::GetLibcudaDsoHandle(void** dso_handle) {
     79 #if defined(PLATFORM_WINDOWS)
     80   return GetDsoHandle(
     81       FindDsoPath(port::Env::Default()->FormatLibraryFileName("nvcuda", ""),
     82                   GetCudaDriverLibraryPath()),
     83       dso_handle);
     84 #else
     85   port::Status status = GetDsoHandle(
     86       FindDsoPath(port::Env::Default()->FormatLibraryFileName("cuda", "1"),
     87                   GetCudaDriverLibraryPath()),
     88       dso_handle);
     89 #if defined(__APPLE__)
     90   // On Mac OS X, CUDA sometimes installs libcuda.dylib instead of
     91   // libcuda.1.dylib.
     92   return status.ok() ? status : GetDsoHandle(
     93      FindDsoPath(port::Env::Default()->FormatLibraryFileName("cuda", ""),
     94                  GetCudaDriverLibraryPath()),
     95      dso_handle);
     96 #else
     97   return status;
     98 #endif
     99 #endif
    100 }
    101 
    102 /* static */ port::Status DsoLoader::GetLibcuptiDsoHandle(void** dso_handle) {
    103 #if defined(ANDROID_TEGRA)
    104   // On Android devices the CUDA version number is not added to the library
    105   // name.
    106   return GetDsoHandle(
    107       FindDsoPath(port::Env::Default()->FormatLibraryFileName("cupti", ""),
    108                   GetCudaCuptiLibraryPath()),
    109       dso_handle);
    110 #else
    111   return GetDsoHandle(FindDsoPath(port::Env::Default()->FormatLibraryFileName(
    112                                       "cupti", GetCudaVersion()),
    113                                   GetCudaCuptiLibraryPath()),
    114                       dso_handle);
    115 #endif
    116 }
    117 
    118 static mutex& GetRpathMutex() {
    119   static mutex* mu = new mutex;
    120   return *mu;
    121 }
    122 
    123 /* static */ void DsoLoader::RegisterRpath(port::StringPiece path) {
    124   mutex_lock lock{GetRpathMutex()};
    125   GetRpaths()->push_back(path.ToString());
    126 }
    127 
    128 /* static */ port::Status DsoLoader::GetDsoHandle(port::StringPiece path,
    129                                                   void** dso_handle,
    130                                                   LoadKind load_kind) {
    131   if (load_kind != LoadKind::kLocal) {
    132     return port::Status(port::error::INVALID_ARGUMENT,
    133                         "Only LoadKind::kLocal is currently supported");
    134   }
    135   string path_string = path.ToString();
    136   port::Status s =
    137       port::Env::Default()->LoadLibrary(path_string.c_str(), dso_handle);
    138   if (!s.ok()) {
    139 #if !defined(PLATFORM_WINDOWS)
    140     char* ld_library_path = getenv("LD_LIBRARY_PATH");
    141 #endif
    142     LOG(INFO) << "Couldn't open CUDA library " << path
    143 #if !defined(PLATFORM_WINDOWS)
    144               << ". LD_LIBRARY_PATH: "
    145               << (ld_library_path != nullptr ? ld_library_path : "")
    146 #endif
    147     ;
    148     return port::Status(port::error::FAILED_PRECONDITION,
    149                         port::StrCat("could not dlopen DSO: ", path,
    150                                      "; dlerror: ", s.error_message()));
    151   }
    152   LOG(INFO) << "successfully opened CUDA library " << path << " locally";
    153   return port::Status::OK();
    154 }
    155 
    156 /* static */ string DsoLoader::GetBinaryDirectory(bool strip_executable_name) {
    157   string exe_path = port::Env::Default()->GetExecutablePath();
    158   return strip_executable_name ? port::Dirname(exe_path).ToString() : exe_path;
    159 }
    160 
    161 // Creates a heap-allocated vector for initial rpaths.
    162 // Ownership is transferred to the caller.
    163 static std::vector<string>* CreatePrimordialRpaths() {
    164   auto rpaths = new std::vector<string>;
    165 #if defined(__APPLE__)
    166   rpaths->push_back("driver/driver_sh.runfiles/local_config_cuda/cuda/lib");
    167 #else
    168   rpaths->push_back("driver/driver_sh.runfiles/local_config_cuda/cuda/lib64");
    169 #endif
    170   return rpaths;
    171 }
    172 
    173 /* static */ std::vector<string>* DsoLoader::GetRpaths() {
    174   static std::vector<string>* rpaths = CreatePrimordialRpaths();
    175   return rpaths;
    176 }
    177 
    178 /* static */ bool DsoLoader::TrySymbolicDereference(string* candidate) {
    179 #if defined(PLATFORM_WINDOWS)
    180   return false;
    181 #else
    182   char buf[PATH_MAX];
    183   char* result = realpath(candidate->c_str(), buf);
    184   if (result == nullptr) {
    185     return false;
    186   }
    187   VLOG(3) << "realpath resolved candidate path \"" << *candidate << "\" to \""
    188           << result << "\"";
    189   *candidate = result;
    190   return true;
    191 #endif
    192 }
    193 
    194 /* static */ string DsoLoader::FindDsoPath(port::StringPiece library_name,
    195                                            port::StringPiece runfiles_relpath) {
    196   // Keep a record of the paths we attempted so we can dump out meaningful
    197   // diagnostics if no path is found.
    198   std::vector<string> attempted;
    199 
    200   using StringPieces = std::vector<port::StringPiece>;
    201   string candidate;
    202 
    203   // Otherwise, try binary-plus-rpath locations.
    204   string binary_directory =
    205       GetBinaryDirectory(true /* = strip_executable_name */);
    206   mutex_lock lock{GetRpathMutex()};
    207   for (const string& rpath : *GetRpaths()) {
    208     candidate =
    209         port::Join(StringPieces{binary_directory, rpath, library_name}, "/");
    210     if (TrySymbolicDereference(&candidate)) {
    211       return candidate;
    212     }
    213   }
    214   attempted.push_back(candidate);
    215 
    216   return library_name.ToString();
    217 }
    218 
    219 /* static */ string DsoLoader::GetCudaLibraryDirPath() {
    220 #if defined(__APPLE__)
    221   return "external/local_config_cuda/cuda/lib";
    222 #else
    223   return "external/local_config_cuda/cuda/lib64";
    224 #endif
    225 }
    226 
    227 /* static */ string DsoLoader::GetCudaDriverLibraryPath() {
    228 #if defined(__APPLE__)
    229   return "external/local_config_cuda/cuda/driver/lib";
    230 #elif defined(PLATFORM_WINDOWS)
    231   return "";
    232 #else
    233   return "external/local_config_cuda/cuda/driver/lib64";
    234 #endif
    235 }
    236 
    237 /* static */ string DsoLoader::GetCudaCuptiLibraryPath() {
    238 #if defined(__APPLE__)
    239   return "external/local_config_cuda/cuda/extras/CUPTI/lib";
    240 #else
    241   return "external/local_config_cuda/cuda/extras/CUPTI/lib64";
    242 #endif
    243 }
    244 
    245 // -- CachedDsoLoader
    246 
    247 /* static */ port::StatusOr<void*> CachedDsoLoader::GetCublasDsoHandle() {
    248   static port::StatusOr<void*> result =
    249       FetchHandleResult(DsoLoader::GetCublasDsoHandle);
    250   return result;
    251 }
    252 
    253 /* static */ port::StatusOr<void*> CachedDsoLoader::GetCurandDsoHandle() {
    254   static port::StatusOr<void*> result =
    255       FetchHandleResult(DsoLoader::GetCurandDsoHandle);
    256   return result;
    257 }
    258 
    259 /* static */ port::StatusOr<void*> CachedDsoLoader::GetCudnnDsoHandle() {
    260   static port::StatusOr<void*> result =
    261       FetchHandleResult(DsoLoader::GetCudnnDsoHandle);
    262   return result;
    263 }
    264 
    265 /* static */ port::StatusOr<void*> CachedDsoLoader::GetCufftDsoHandle() {
    266   static port::StatusOr<void*> result =
    267       FetchHandleResult(DsoLoader::GetCufftDsoHandle);
    268   return result;
    269 }
    270 
    271 /* static */ port::StatusOr<void*> CachedDsoLoader::GetLibcudaDsoHandle() {
    272   static port::StatusOr<void*> result =
    273       FetchHandleResult(DsoLoader::GetLibcudaDsoHandle);
    274   return result;
    275 }
    276 
    277 /* static */ port::StatusOr<void*> CachedDsoLoader::GetLibcuptiDsoHandle() {
    278   static port::StatusOr<void*> result =
    279       FetchHandleResult(DsoLoader::GetLibcuptiDsoHandle);
    280   return result;
    281 }
    282 
    283 /* static */ port::StatusOr<void*> CachedDsoLoader::FetchHandleResult(
    284     std::function<port::Status(void**)> load_dso) {
    285   void* handle;
    286   auto status = load_dso(&handle);
    287   if (!status.ok()) {
    288     return status;
    289   }
    290   return handle;
    291 }
    292 
    293 }  // namespace internal
    294 }  // namespace gputools
    295 }  // namespace perftools
    296