Home | History | Annotate | Download | only in kernels
      1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 #if GOOGLE_CUDA
     17 
     18 #define EIGEN_USE_GPU
     19 
     20 #include "tensorflow/core/kernels/reduction_gpu_kernels.cu.h"
     21 
     22 namespace tensorflow {
     23 namespace functor {
     24 
     25 typedef Eigen::GpuDevice GPUDevice;
     26 
     27 // Derive Index type. int (32-bit) or long (64-bit) depending on the
     28 // compile-time configuration. "float" here is not relevant.
     29 // TODO(zhifengc): Moves the definition to TTypes.
     30 typedef TTypes<float>::Tensor::Index Index;
     31 
     32 // T: the data type
     33 // REDUCER: the reducer functor
     34 // NUM_AXES: the number of axes to reduce
     35 // IN_DIMS: the number of dimensions of the input tensor
     36 #define DEFINE(T, REDUCER, IN_DIMS, NUM_AXES)                          \
     37   template void ReduceFunctor<GPUDevice, REDUCER>::Reduce(             \
     38       OpKernelContext* ctx, TTypes<T, IN_DIMS - NUM_AXES>::Tensor out, \
     39       TTypes<T, IN_DIMS>::ConstTensor in,                              \
     40       const Eigen::array<Index, NUM_AXES>& reduction_axes,             \
     41       const REDUCER& reducer);
     42 
     43 #define DEFINE_IDENTITY(T, REDUCER)                              \
     44   template void ReduceFunctor<GPUDevice, REDUCER>::FillIdentity( \
     45       const GPUDevice& d, TTypes<T>::Vec out, const REDUCER& reducer);
     46 
     47 #define DEFINE_FOR_TYPE_AND_R(T, R) \
     48   DEFINE(T, R, 1, 1);               \
     49   DEFINE(T, R, 2, 1);               \
     50   DEFINE(T, R, 3, 1);               \
     51   DEFINE(T, R, 3, 2);               \
     52   DEFINE_IDENTITY(T, R)
     53 
     54 #define DEFINE_FOR_ALL_REDUCERS(T)                           \
     55   DEFINE_FOR_TYPE_AND_R(T, Eigen::internal::SumReducer<T>);  \
     56   DEFINE_FOR_TYPE_AND_R(T, Eigen::internal::MeanReducer<T>); \
     57   DEFINE_FOR_TYPE_AND_R(T, Eigen::internal::MinReducer<T>);  \
     58   DEFINE_FOR_TYPE_AND_R(T, Eigen::internal::MaxReducer<T>);  \
     59   DEFINE_FOR_TYPE_AND_R(T, Eigen::internal::ProdReducer<T>)
     60 
     61 DEFINE_FOR_ALL_REDUCERS(float);
     62 #undef DEFINE_FOR_ALL_REDUCERS
     63 #undef DEFINE_FOR_TYPE_AND_R
     64 #undef DEFINE
     65 
     66 }  // end namespace functor
     67 }  // end namespace tensorflow
     68 
     69 #endif  // GOOGLE_CUDA
     70