Home | History | Annotate | Download | only in stream_executor
      1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 #include "tensorflow/stream_executor/kernel_spec.h"
     17 
     18 
     19 namespace perftools {
     20 namespace gputools {
     21 
     22 KernelLoaderSpec::KernelLoaderSpec(port::StringPiece kernelname)
     23     : kernelname_(kernelname.ToString()) {}
     24 
     25 OnDiskKernelLoaderSpec::OnDiskKernelLoaderSpec(port::StringPiece filename,
     26                                                port::StringPiece kernelname)
     27     : KernelLoaderSpec(kernelname), filename_(filename.ToString()) {}
     28 
     29 CudaPtxOnDisk::CudaPtxOnDisk(port::StringPiece filename,
     30                              port::StringPiece kernelname)
     31     : OnDiskKernelLoaderSpec(filename, kernelname) {}
     32 
     33 CudaCubinOnDisk::CudaCubinOnDisk(port::StringPiece filename,
     34                                  port::StringPiece kernelname)
     35     : OnDiskKernelLoaderSpec(filename, kernelname) {}
     36 
     37 CudaCubinInMemory::CudaCubinInMemory(const char *bytes,
     38                                      port::StringPiece kernelname)
     39     : KernelLoaderSpec(kernelname), bytes_(bytes) {}
     40 
     41 bool CompareComputeCapability(const std::tuple<int, int> &lhs,
     42                               const std::tuple<int, int> &rhs) {
     43   return std::get<0>(lhs) < std::get<0>(rhs) ||
     44          (std::get<0>(lhs) == std::get<0>(rhs) &&
     45           std::get<1>(lhs) < std::get<1>(rhs));
     46 }
     47 
     48 const std::tuple<int, int> CudaPtxInMemory::kMinimumCapability{1, 0};
     49 
     50 CudaPtxInMemory::CudaPtxInMemory(port::StringPiece ptx,
     51                                  port::StringPiece kernel_name,
     52                                  bool ptx_compressed)
     53     : KernelLoaderSpec(kernel_name),
     54       ptx_by_compute_capability_(CompareComputeCapability) {
     55   if (ptx_compressed) {
     56     // Lazy decompression. Put an empty string in decompressed_ptx_ showing that
     57     // the original ptx is compressed.
     58     decompressed_ptx_[ptx.data()] = "";
     59   }
     60   ptx_by_compute_capability_[kMinimumCapability] = ptx.data();
     61 }
     62 
     63 CudaPtxInMemory::CudaPtxInMemory(
     64     const std::initializer_list<CudaPtxInMemory::PtxSpec> &spec_list,
     65     port::StringPiece kernel_name, bool ptx_compressed)
     66     : KernelLoaderSpec(kernel_name),
     67       ptx_by_compute_capability_(CompareComputeCapability) {
     68   for (const auto &spec : spec_list) {
     69     int major, minor;
     70     port::StringPiece ptx;
     71     std::tie(major, minor, ptx) = spec;
     72     if (ptx_compressed) {
     73       // Lazy decompression. Put an empty string in decompressed_ptx_ showing
     74       // that the original ptx is compressed.
     75       decompressed_ptx_[ptx.data()] = "";
     76     }
     77     ptx_by_compute_capability_[std::tuple<int, int>{major, minor}] = ptx.data();
     78   }
     79 }
     80 
     81 string CudaPtxInMemory::DecompressPtx(const char *ptx) {
     82   // Get the length of the PTX string from the beginning of the buffer.
     83   uint64 ptx_length = *reinterpret_cast<const uint64 *>(ptx);
     84   // Get the PTX string from the buffer with offset and length.
     85   string compressed_ptx(ptx + sizeof(uint64),
     86                         ptx + sizeof(uint64) + ptx_length);
     87   string decompressed_ptx;
     88   // Decompress the PTX string with bzip2.
     89   LOG(FATAL) << "bzip2 decompression is not supported yet.";
     90   return decompressed_ptx;
     91 }
     92 
     93 const char *CudaPtxInMemory::default_text() const {
     94   if (ptx_by_compute_capability_.empty()) {
     95     return nullptr;
     96   }
     97 
     98   mutex_lock lock{mu_};
     99 
    100   auto ptx = ptx_by_compute_capability_.begin()->second;
    101   // Check if there is an entry in decompressed ptx table.
    102   auto decompressed_ptx_iter = decompressed_ptx_.find(ptx);
    103   if (decompressed_ptx_iter != decompressed_ptx_.end()) {
    104     // If the decompressed string is empty, which means the ptx hasn't been
    105     // decompressed, decompress it here.
    106     if (decompressed_ptx_iter->second.empty()) {
    107       decompressed_ptx_iter->second = DecompressPtx(ptx);
    108     }
    109     return decompressed_ptx_iter->second.c_str();
    110   }
    111   return ptx;
    112 }
    113 
    114 const char *CudaPtxInMemory::original_default_text() const {
    115   if (ptx_by_compute_capability_.empty()) {
    116     return nullptr;
    117   }
    118 
    119   return ptx_by_compute_capability_.begin()->second;
    120 }
    121 
    122 const char *CudaPtxInMemory::text(int compute_capability_major,
    123                                   int compute_capability_minor) const {
    124   std::tuple<int, int> capability{compute_capability_major,
    125                                   compute_capability_minor};
    126 
    127   auto ptx_iter = ptx_by_compute_capability_.find(capability);
    128   if (ptx_iter == ptx_by_compute_capability_.end()) {
    129     return nullptr;
    130   }
    131 
    132   mutex_lock lock{mu_};
    133 
    134   // Check if there is an entry in decompressed ptx table.
    135   auto decompressed_ptx_iter = decompressed_ptx_.find(ptx_iter->second);
    136   if (decompressed_ptx_iter != decompressed_ptx_.end()) {
    137     // If the decompressed string is empty, which means the ptx hasn't been
    138     // decompressed, decompress it here.
    139     if (decompressed_ptx_iter->second.empty()) {
    140       decompressed_ptx_iter->second = DecompressPtx(ptx_iter->second);
    141     }
    142     return decompressed_ptx_iter->second.c_str();
    143   }
    144   return ptx_iter->second;
    145 }
    146 
    147 const char *CudaPtxInMemory::original_text(int compute_capability_major,
    148                                            int compute_capability_minor) const {
    149   std::tuple<int, int> capability{compute_capability_major,
    150                                   compute_capability_minor};
    151 
    152   auto ptx_iter = ptx_by_compute_capability_.find(capability);
    153   if (ptx_iter == ptx_by_compute_capability_.end()) {
    154     return nullptr;
    155   }
    156 
    157   return ptx_iter->second;
    158 }
    159 
    160 OpenCLTextOnDisk::OpenCLTextOnDisk(port::StringPiece filename,
    161                                    port::StringPiece kernelname)
    162     : OnDiskKernelLoaderSpec(filename, kernelname) {}
    163 
    164 OpenCLTextInMemory::OpenCLTextInMemory(port::StringPiece text,
    165                                        port::StringPiece kernelname)
    166     : KernelLoaderSpec(kernelname), text_(text.ToString()) {}
    167 
    168 OpenCLBinaryOnDisk::OpenCLBinaryOnDisk(port::StringPiece filename,
    169                                        port::StringPiece kernelname)
    170     : OnDiskKernelLoaderSpec(filename, kernelname) {}
    171 
    172 MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddOpenCLTextOnDisk(
    173     port::StringPiece filename, port::StringPiece kernelname) {
    174   CHECK(ocl_text_on_disk_ == nullptr);
    175   ocl_text_on_disk_.reset(new OpenCLTextOnDisk{filename, kernelname});
    176   return this;
    177 }
    178 
    179 MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddOpenCLBinaryOnDisk(
    180     port::StringPiece filename, port::StringPiece kernelname) {
    181   CHECK(ocl_binary_on_disk_ == nullptr);
    182   ocl_binary_on_disk_.reset(new OpenCLBinaryOnDisk{filename, kernelname});
    183   return this;
    184 }
    185 
    186 MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddOpenCLTextInMemory(
    187     port::StringPiece filename, port::StringPiece kernelname) {
    188   CHECK(ocl_text_in_memory_ == nullptr);
    189   ocl_text_in_memory_.reset(new OpenCLTextInMemory{filename, kernelname});
    190   return this;
    191 }
    192 
    193 MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddCudaPtxOnDisk(
    194     port::StringPiece filename, port::StringPiece kernelname) {
    195   CHECK(cuda_ptx_on_disk_ == nullptr);
    196   cuda_ptx_on_disk_.reset(new CudaPtxOnDisk{filename, kernelname});
    197   return this;
    198 }
    199 
    200 MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddCudaCubinInMemory(
    201     const char *bytes, port::StringPiece kernelname) {
    202   CHECK(cuda_cubin_in_memory_ == nullptr);
    203   cuda_cubin_in_memory_.reset(new CudaCubinInMemory{bytes, kernelname});
    204   return this;
    205 }
    206 
    207 MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddCudaCubinOnDisk(
    208     port::StringPiece filename, port::StringPiece kernelname) {
    209   CHECK(cuda_cubin_on_disk_ == nullptr);
    210   cuda_cubin_on_disk_.reset(new CudaCubinOnDisk{filename, kernelname});
    211   return this;
    212 }
    213 
    214 MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddCudaPtxInMemory(
    215     port::StringPiece ptx, port::StringPiece kernelname) {
    216   CHECK(cuda_ptx_in_memory_ == nullptr);
    217   cuda_ptx_in_memory_.reset(
    218       new CudaPtxInMemory{ptx, kernelname, false /* ptx_compressed */});
    219   return this;
    220 }
    221 
    222 MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddCudaCompressedPtxInMemory(
    223     port::StringPiece ptx, port::StringPiece kernelname) {
    224   CHECK(cuda_ptx_in_memory_ == nullptr);
    225   cuda_ptx_in_memory_.reset(
    226       new CudaPtxInMemory{ptx, kernelname, true /* ptx_compressed */});
    227   return this;
    228 }
    229 
    230 MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddCudaPtxInMemory(
    231     std::initializer_list<CudaPtxInMemory::PtxSpec> spec_list,
    232     port::StringPiece kernelname) {
    233   CHECK(cuda_ptx_in_memory_ == nullptr);
    234   cuda_ptx_in_memory_.reset(
    235       new CudaPtxInMemory{spec_list, kernelname, false /* ptx_compressed */});
    236   return this;
    237 }
    238 
    239 MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddCudaCompressedPtxInMemory(
    240     std::initializer_list<CudaPtxInMemory::PtxSpec> spec_list,
    241     port::StringPiece kernelname) {
    242   CHECK(cuda_ptx_in_memory_ == nullptr);
    243   cuda_ptx_in_memory_.reset(
    244       new CudaPtxInMemory{spec_list, kernelname, true /* ptx_compressed */});
    245   return this;
    246 }
    247 
    248 MultiKernelLoaderSpec::MultiKernelLoaderSpec(size_t arity) : arity_(arity) {}
    249 
    250 }  // namespace gputools
    251 }  // namespace perftools
    252