Home | History | Annotate | Download | only in service
      1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_BACKEND_H_
     17 #define TENSORFLOW_COMPILER_XLA_SERVICE_BACKEND_H_
     18 
     19 #include <map>
     20 #include <memory>
     21 #include <string>
     22 #include <vector>
     23 
     24 #include "tensorflow/compiler/xla/service/compiler.h"
     25 #include "tensorflow/compiler/xla/service/computation_placer.h"
     26 #include "tensorflow/compiler/xla/service/device_memory_allocator.h"
     27 #include "tensorflow/compiler/xla/service/pool.h"
     28 #include "tensorflow/compiler/xla/service/transfer_manager.h"
     29 #include "tensorflow/compiler/xla/statusor.h"
     30 #include "tensorflow/compiler/xla/types.h"
     31 #include "tensorflow/core/lib/gtl/array_slice.h"
     32 #include "tensorflow/core/lib/strings/strcat.h"
     33 #include "tensorflow/core/platform/mutex.h"
     34 #include "tensorflow/core/platform/stream_executor_no_cuda.h"
     35 #include "tensorflow/core/platform/thread_annotations.h"
     36 
     37 namespace Eigen {
     38 struct ThreadPoolDevice;
     39 }
     40 
     41 namespace xla {
     42 
     43 // Options to configure the backend when it is created.
     44 class BackendOptions {
     45  public:
     46   // Set the platform backing the backend, or nullptr for the default platform.
     47   BackendOptions& set_platform(perftools::gputools::Platform* platform);
     48   perftools::gputools::Platform* platform() const;
     49 
     50   // Sets the thread pool size for parallel execution of an individual operator.
     51   // The default value of -1 will result in initializing the thread pool with
     52   // the number of threads equal to the number of cores in the system.
     53   BackendOptions& set_intra_op_parallelism_threads(int num_threads);
     54   int intra_op_parallelism_threads() const;
     55 
     56  private:
     57   perftools::gputools::Platform* platform_ = nullptr;
     58   int intra_op_parallelism_threads_ = -1;
     59 };
     60 
     61 // Class which encapsulates an XLA backend. It includes everything necessary
     62 // to compile and execute computations on a particular platform.
     63 //
     64 // It also offers a pooling API for creation/use of initialized streams:
     65 //
     66 //    StreamPtr stream = backend->BorrowStream().ConsumeValueOrDie();
     67 class Backend {
     68  public:
     69   using StreamPtr = Pool<perftools::gputools::Stream>::SmartPtr;
     70 
     71   // Creates a new backend.
     72   static StatusOr<std::unique_ptr<Backend>> CreateBackend(
     73       const BackendOptions& options);
     74 
     75   // Creates a backend for the default platform. The default platform is defined
     76   // in PlatformUtil.
     77   static StatusOr<std::unique_ptr<Backend>> CreateDefaultBackend();
     78 
     79   ~Backend();
     80 
     81   // Accessors for the various objects.
     82   perftools::gputools::Platform* platform() const { return platform_; }
     83   Compiler* compiler() const { return compiler_; }
     84   DeviceMemoryAllocator* memory_allocator() const {
     85     return memory_allocator_.get();
     86   }
     87   TransferManager* transfer_manager() const { return transfer_manager_; }
     88   ComputationPlacer* computation_placer() const { return computation_placer_; }
     89 
     90   // Returns the number of devices of the platform type which are visible. Not
     91   // all of these devices may be usable by XLA.
     92   int device_count() const { return stream_executors_.size(); }
     93 
     94   // Returns the device ordinal number of the default device.
     95   int default_device_ordinal() const;
     96 
     97   // Returns stream executors of all supported devices for this backend. The
     98   // executors are ordered by the device ordinal.
     99   const std::vector<perftools::gputools::StreamExecutor*>& stream_executors()
    100       const {
    101     return stream_executors_;
    102   }
    103 
    104   // Returns the stream executor for the given device ordinal.
    105   StatusOr<perftools::gputools::StreamExecutor*> stream_executor(
    106       int device_ordinal) const;
    107 
    108   // Returns the stream executor for the default device ordinal. This stream
    109   // executor can only be used when the number of computations is 1 (replication
    110   // can be > 1).
    111   perftools::gputools::StreamExecutor* default_stream_executor() const {
    112     CHECK(!stream_executors_.empty());
    113     return stream_executors_[0];
    114   }
    115 
    116   // Borrows a stream for use by the caller, either by grabbing it from an
    117   // internal pool, or by constructing/initializating it, and returns the result
    118   // to the caller.
    119   StatusOr<StreamPtr> BorrowStream(int device_ordinal);
    120   StatusOr<StreamPtr> BorrowStream(
    121       perftools::gputools::StreamExecutor* executor);
    122 
    123   // Returns a function to borrow a stream, as `BorrowStream` above does.
    124   // Purely for convenience, the caller could rather make this anonymous
    125   // function itself.
    126   std::function<StatusOr<StreamPtr>(int)> StreamBorrower() {
    127     return [this](int device_ordinal) { return BorrowStream(device_ordinal); };
    128   }
    129 
    130   // Returns whether the given device ordinal of the backend is supported.
    131   bool device_ordinal_supported(int device_ordinal) const {
    132     return (device_ordinal >= 0 && device_ordinal < device_count() &&
    133             stream_executors_[device_ordinal] != nullptr);
    134   }
    135 
    136   // Return a string identifier for the given device, eg: "GPU:3".
    137   string device_name(int device_ordinal) const {
    138     return tensorflow::strings::StrCat(platform_->Name(), ":", device_ordinal);
    139   }
    140 
    141   // Returns true if the devices with the given ordinals are equivalent from
    142   // XLA's perspective. That is, an executable compiled for one device would
    143   // be equivalent to an executable compiled for the other.
    144   StatusOr<bool> devices_equivalent(int device_ordinal_a, int device_ordinal_b);
    145 
    146   // For the host platform, returns the threadpool to use when scheduling
    147   // parallel operators. For other platforms, returns NULL.
    148   tensorflow::thread::ThreadPool* inter_op_thread_pool() const;
    149 
    150   // For the host platform, returns the configured eigen threadpool device to be
    151   // used for scheduling work. For other platforms, returns NULL.
    152   const Eigen::ThreadPoolDevice* eigen_intra_op_thread_pool_device() const;
    153   tensorflow::thread::ThreadPool* eigen_intra_op_thread_pool() const;
    154 
    155   // Resets the devices associated with this backend.
    156   Status ResetDevices();
    157 
    158  private:
    159   struct EigenThreadPoolWrapper;
    160   Backend(perftools::gputools::Platform* platform, Compiler* compiler,
    161           tensorflow::gtl::ArraySlice<perftools::gputools::StreamExecutor*>
    162               stream_executors,
    163           TransferManager* transfer_manager,
    164           ComputationPlacer* computation_placer,
    165           int intra_op_parallelism_threads);
    166   Backend(const Backend&) = delete;
    167   Backend& operator=(const Backend&) = delete;
    168 
    169   perftools::gputools::Platform* platform_;
    170   Compiler* compiler_;
    171   TransferManager* transfer_manager_;
    172   ComputationPlacer* computation_placer_;
    173 
    174   // Vector of stream executors. stream_executors_[0] is the default executor.
    175   std::vector<perftools::gputools::StreamExecutor*> stream_executors_;
    176 
    177   tensorflow::mutex mu_;
    178 
    179   // Mapping from stream executor to stream pools, used by `BorrowStream` above.
    180   std::map<perftools::gputools::StreamExecutor*,
    181            Pool<perftools::gputools::Stream>>
    182       stream_pools_ GUARDED_BY(mu_);
    183 
    184   // The default memory allocator to use.
    185   std::unique_ptr<StreamExecutorMemoryAllocator> memory_allocator_;
    186 
    187   // For the CPU backend, a threadpool for scheduling parallel operators.
    188   std::unique_ptr<tensorflow::thread::ThreadPool> inter_op_thread_pool_;
    189 
    190   // For the CPU backend, an Eigen threadpool device for use by Eigen code.
    191   std::unique_ptr<EigenThreadPoolWrapper> intra_op_thread_pool_wrapper_;
    192 };
    193 
    194 }  // namespace xla
    195 
    196 #endif  // TENSORFLOW_COMPILER_XLA_SERVICE_BACKEND_H_
    197