Home | History | Annotate | Download | only in sycl
      1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 #if !TENSORFLOW_USE_SYCL
     17 #error This file must only be included when building TensorFlow with SYCL support
     18 #endif
     19 
     20 #ifndef TENSORFLOW_CORE_COMMON_RUNTIME_SYCL_SYCL_DEVICE_H_
     21 #define TENSORFLOW_CORE_COMMON_RUNTIME_SYCL_SYCL_DEVICE_H_
     22 
     23 #include "tensorflow/core/common_runtime/local_device.h"
     24 #include "tensorflow/core/common_runtime/sycl/sycl_allocator.h"
     25 #include "tensorflow/core/common_runtime/sycl/sycl_device_context.h"
     26 #include "tensorflow/core/public/session_options.h"
     27 
     28 namespace tensorflow {
     29 
     30 class GSYCLInterface {
     31   std::vector<Eigen::QueueInterface*> m_queue_interface_;  // owned
     32   std::vector<Allocator*> m_cpu_allocator_;                // not owned
     33   std::vector<SYCLAllocator*> m_sycl_allocator_;           // owned
     34   std::vector<SYCLDeviceContext*> m_sycl_context_;         // ref counted
     35   GSYCLInterface() {
     36     bool found_device = false;
     37     auto device_list = Eigen::get_sycl_supported_devices();
     38     // Obtain list of supported devices from Eigen
     39     for (const auto& device : device_list) {
     40       if (device.is_gpu()) {
     41         // returns first found GPU
     42         AddDevice(device);
     43         found_device = true;
     44       }
     45     }
     46 
     47     if (!found_device) {
     48       // Currently Intel GPU is not supported
     49       LOG(WARNING) << "No OpenCL GPU found that is supported by "
     50                    << "ComputeCpp/triSYCL, trying OpenCL CPU";
     51     }
     52 
     53     for (const auto& device : device_list) {
     54       if (device.is_cpu()) {
     55         // returns first found CPU
     56         AddDevice(device);
     57         found_device = true;
     58       }
     59     }
     60 
     61     if (!found_device) {
     62       LOG(WARNING) << "No OpenCL CPU found that is supported by "
     63                    << "ComputeCpp/triSYCL, checking for host sycl device";
     64     }
     65 
     66     for (const auto& device : device_list) {
     67       // triSYCL only supports the host device for now
     68       if (device.is_host()) {
     69         LOG(WARNING) << "Found SYCL host device";
     70         AddDevice(device);
     71         found_device = true;
     72       }
     73     }
     74 
     75     if (!found_device) {
     76       // Currently Intel GPU is not supported
     77       LOG(FATAL) << "No SYCL host and no OpenCL GPU nor CPU"
     78                  << " supported by ComputeCPP/triSYCL was found";
     79     } else {
     80       LOG(INFO) << "Found following OpenCL devices:";
     81       for (int i = 0; i < device_list.size(); i++) {
     82         LOG(INFO) << GetShortDeviceDescription(i);
     83       }
     84     }
     85   }
     86 
     87   ~GSYCLInterface() {
     88     m_cpu_allocator_.clear();
     89 
     90     for (auto p : m_sycl_allocator_) {
     91       p->Synchronize();
     92       p->ClearSYCLDevice();
     93       // Cannot delete the Allocator instances, as the Allocator lifetime
     94       // needs to exceed any Tensor created by it. There is no way of
     95       // knowing when all Tensors have been deallocated, as they are
     96       // RefCounted and wait until all instances of a Tensor have been
     97       // destroyed before calling Allocator.Deallocate. This could happen at
     98       // program exit, which can set up a race condition between destroying
     99       // Tensors and Allocators when the program is cleaning up.
    100     }
    101     m_sycl_allocator_.clear();
    102 
    103     for (auto p : m_sycl_context_) {
    104       p->Unref();
    105     }
    106     m_sycl_context_.clear();
    107 
    108     for (auto p : m_queue_interface_) {
    109       p->deallocate_all();
    110       delete p;
    111     }
    112     m_queue_interface_.clear();
    113   }
    114 
    115   void AddDevice(const cl::sycl::device& d) {
    116     m_queue_interface_.push_back(new Eigen::QueueInterface(d));
    117     m_cpu_allocator_.push_back(cpu_allocator());
    118     m_sycl_allocator_.push_back(new SYCLAllocator(m_queue_interface_.back()));
    119     m_sycl_context_.push_back(new SYCLDeviceContext());
    120   }
    121 
    122  public:
    123   static const GSYCLInterface* instance() {
    124     // c++11 guarantees that this will be constructed in a thread safe way
    125     static const GSYCLInterface instance;
    126     return &instance;
    127   }
    128 
    129   Eigen::QueueInterface* GetQueueInterface(size_t i = 0) const {
    130     if (!m_queue_interface_.empty()) {
    131       return m_queue_interface_[i];
    132     } else {
    133       std::cerr << "No cl::sycl::device has been added" << std::endl;
    134       return nullptr;
    135     }
    136   }
    137 
    138   SYCLAllocator* GetSYCLAllocator(size_t i = 0) const {
    139     if (!m_sycl_allocator_.empty()) {
    140       return m_sycl_allocator_[i];
    141     } else {
    142       std::cerr << "No cl::sycl::device has been added" << std::endl;
    143       return nullptr;
    144     }
    145   }
    146 
    147   Allocator* GetCPUAllocator(size_t i = 0) const {
    148     if (!m_cpu_allocator_.empty()) {
    149       return m_cpu_allocator_[i];
    150     } else {
    151       std::cerr << "No cl::sycl::device has been added" << std::endl;
    152       return nullptr;
    153     }
    154   }
    155 
    156   SYCLDeviceContext* GetSYCLContext(size_t i = 0) const {
    157     if (!m_sycl_context_.empty()) {
    158       return m_sycl_context_[i];
    159     } else {
    160       std::cerr << "No cl::sycl::device has been added" << std::endl;
    161       return nullptr;
    162     }
    163   }
    164 
    165   string GetShortDeviceDescription(int device_id = 0) const {
    166     Eigen::QueueInterface* queue_ptr = GetQueueInterface(device_id);
    167     if (!queue_ptr) {
    168       LOG(ERROR)
    169           << "Device name cannot be given after Eigen QueueInterface destroyed";
    170       return "";
    171     }
    172     auto device = queue_ptr->sycl_queue().get_device();
    173     auto name = device.get_info<cl::sycl::info::device::name>();
    174     auto vendor = device.get_info<cl::sycl::info::device::vendor>();
    175     auto profile = device.get_info<cl::sycl::info::device::profile>();
    176 
    177     std::string type;
    178     if (device.is_host()) {
    179       type = "Host";
    180     } else if (device.is_cpu()) {
    181       type = "CPU";
    182     } else if (device.is_gpu()) {
    183       type = "GPU";
    184     } else if (device.is_accelerator()) {
    185       type = "Accelerator";
    186     } else {
    187       type = "Unknown";
    188     }
    189 
    190     return strings::StrCat(
    191         "id: ", device_id, ", type: ", type, ", name: ", name.c_str(),
    192         ", vendor: ", vendor.c_str(), ", profile: ", profile.c_str());
    193   }
    194 };
    195 
    196 class SYCLDevice : public LocalDevice {
    197  public:
    198   SYCLDevice(const SessionOptions& options, const string& name,
    199              Bytes memory_limit, const DeviceLocality& locality,
    200              const string& physical_device_desc, SYCLAllocator* sycl_allocator,
    201              Allocator* cpu_allocator, SYCLDeviceContext* ctx)
    202       : LocalDevice(options, Device::BuildDeviceAttributes(
    203                                  name, DEVICE_SYCL, memory_limit, locality,
    204                                  physical_device_desc)),
    205         cpu_allocator_(cpu_allocator),
    206         sycl_allocator_(sycl_allocator),
    207         device_context_(ctx) {
    208     set_eigen_sycl_device(sycl_allocator->getSyclDevice());
    209   }
    210 
    211   ~SYCLDevice() override;
    212 
    213   void Compute(OpKernel* op_kernel, OpKernelContext* context) override;
    214   Allocator* GetAllocator(AllocatorAttributes attr) override;
    215   Status MakeTensorFromProto(const TensorProto& tensor_proto,
    216                              const AllocatorAttributes alloc_attrs,
    217                              Tensor* tensor) override;
    218 
    219   Status FillContextMap(const Graph* graph,
    220                         DeviceContextMap* device_context_map) override;
    221 
    222   Status Sync() override;
    223 
    224  private:
    225   Allocator* cpu_allocator_;           // not owned
    226   SYCLAllocator* sycl_allocator_;      // not owned
    227   SYCLDeviceContext* device_context_;  // not owned
    228 };
    229 
    230 }  // namespace tensorflow
    231 
    232 #endif  // TENSORFLOW_CORE_COMMON_RUNTIME_SYCL_SYCL_DEVICE_H_
    233