Home | History | Annotate | Download | only in gpu
      1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONVOLUTION_RUNNER_H_
     17 #define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONVOLUTION_RUNNER_H_
     18 
     19 #include "tensorflow/compiler/xla/status.h"
     20 #include "tensorflow/compiler/xla/statusor.h"
     21 #include "tensorflow/compiler/xla/types.h"
     22 #include "tensorflow/compiler/xla/xla_data.pb.h"
     23 #include "tensorflow/core/platform/stream_executor_no_cuda.h"
     24 
     25 namespace xla {
     26 namespace gpu {
     27 
     28 // This file contains low-level routines for running cudnn convolutions.
     29 
     30 // Different types of convolutions supported by cudnn.
     31 //
     32 // A way to think about these is that a convolution is defined by three arrays
     33 // -- the "input", the "filter", and the "output" -- and given any two of these,
     34 // we can compute the third.  For example, a backward-input convolution takes as
     35 // input a filter and an "output" and produces an "input" such that if one were
     36 // to do a forward convolution of "input" using filter, the result would be
     37 // something with the same shape as "output".
     38 //
     39 // This way of thinking is not correct if you look at the values produced. For
     40 // example, a backward-input convolution is not actually the mathematical
     41 // inverse of a forward convolution.  But it's right as far as the shapes and
     42 // "connectivity" (i.e. which elements of the input affect which elements of
     43 // the output) are concerned.
     44 enum class CudnnConvKind {
     45   kForward,         // input  + filter => output
     46   kBackwardInput,   // filter + output => input
     47   kBackwardFilter,  // input  + output => filter
     48 };
     49 
     50 // Converts a CudnnConvKind value to a string.
     51 string CudnnConvKindToString(CudnnConvKind kind);
     52 
     53 // Calls into cudnn to run the specified convolution.
     54 //
     55 // Note that depending on the value of CudnnConvKind, the result of this call
     56 // may be written into input_buf, filter_buf, or output_buf!
     57 //
     58 // At the moment we only support cudnn convolutions over float and half, and
     59 // convolution with half data type is implemented with cudnn PSEUDO_HALF
     60 // configuration, that is, the input values are half and the internal
     61 // computation type is float.
     62 //
     63 // We provide one overload which takes a scratch buffer, and another which takes
     64 // an allocator which is responsible for allocating the scratch space.  In
     65 // theory the second one shouldn't be necessary -- users of this function could
     66 // just ask cudnn how much scratch space it needs for a particular convolution.
     67 // But in practice, StreamExecutor does not expose such an API, and in the name
     68 // of parsimony, perhaps it's better not to add it.  Instead, the first time you
     69 // call a convolution, you should call the version that takes a scratch
     70 // allocator and take note of how much memory is used.  The next time you call
     71 // the same conv, you can provide an explicitly preallocated scratch buffer of
     72 // that size, if you like.
     73 Status RunCudnnConvolution(
     74     CudnnConvKind kind, const Shape& input_shape, const Shape& filter_shape,
     75     const Shape& output_shape, perftools::gputools::DeviceMemoryBase input_buf,
     76     perftools::gputools::DeviceMemoryBase filter_buf,
     77     perftools::gputools::DeviceMemoryBase output_buf,
     78     perftools::gputools::DeviceMemoryBase scratch_buf, const Window& window,
     79     const ConvolutionDimensionNumbers& dnums,
     80     perftools::gputools::dnn::AlgorithmConfig algorithm,
     81     perftools::gputools::Stream* stream,
     82     perftools::gputools::dnn::ProfileResult* profile_result = nullptr);
     83 
     84 Status RunCudnnConvolution(
     85     CudnnConvKind kind, const Shape& input_shape, const Shape& filter_shape,
     86     const Shape& output_shape, perftools::gputools::DeviceMemoryBase input_buf,
     87     perftools::gputools::DeviceMemoryBase filter_buf,
     88     perftools::gputools::DeviceMemoryBase output_buf,
     89     perftools::gputools::ScratchAllocator* scratch_allocator,
     90     const Window& window, const ConvolutionDimensionNumbers& dnums,
     91     perftools::gputools::dnn::AlgorithmConfig algorithm,
     92     perftools::gputools::Stream* stream,
     93     perftools::gputools::dnn::ProfileResult* profile_result = nullptr);
     94 
     95 }  // namespace gpu
     96 }  // namespace xla
     97 
     98 #endif  // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONVOLUTION_RUNNER_H_
     99