Home | History | Annotate | Download | only in kernels
      1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 #ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_SEGMENT_REDUCTION_OPS_H_
     17 #define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_SEGMENT_REDUCTION_OPS_H_
     18 
     19 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
     20 #include "tensorflow/core/framework/tensor.h"
     21 #include "tensorflow/core/framework/tensor_shape.h"
     22 #include "tensorflow/core/framework/tensor_types.h"
     23 
     24 namespace tensorflow {
     25 
     26 class OpKernelContext;
     27 
     28 namespace functor {
     29 
     30 #ifdef GOOGLE_CUDA
     31 typedef Eigen::GpuDevice GPUDevice;
     32 // Functor for SegmentSumGPUOp.
     33 // output_rows: the number of output segments (unique segment ids in
     34 //                'segment_ids').
     35 // segment_ids_shape: shape of 'segment_ids' tensor.
     36 // segment_ids: unsorted map from input to output segment ids at which to
     37 //                perform segment sum operation.
     38 // data_size: size of input data tensor.
     39 // data: input data tensor.
     40 // output: output reshaped to {output_rows, output.size/output_rows}
     41 template <typename T, typename Index>
     42 struct SegmentSumFunctor {
     43   void operator()(OpKernelContext* ctx, const GPUDevice& d,
     44                   const Index output_rows, const TensorShape& segment_ids_shape,
     45                   typename TTypes<Index>::ConstFlat segment_ids,
     46                   const Index data_size, const T* data,
     47                   typename TTypes<T, 2>::Tensor output);
     48 };
     49 
     50 #endif
     51 
     52 template <typename Device, typename T, typename Index, typename InitialValueF,
     53           typename ReductionF>
     54 struct UnsortedSegmentFunctor {
     55   void operator()(OpKernelContext* ctx, const Index num_segments,
     56                   const TensorShape& segment_ids_shape,
     57                   typename TTypes<Index>::ConstFlat segment_ids,
     58                   const Index data_size, const T* data,
     59                   typename TTypes<T, 2>::Tensor output);
     60 };
     61 
     62 #ifdef GOOGLE_CUDA
     63 // reduction functors for the gpu
     64 template <typename T>
     65 struct SumOpGpu {
     66   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void operator()(T* dest,
     67                                                         const T& value) {
     68     CudaAtomicAdd(dest, value);
     69   }
     70 };
     71 
     72 template <typename T>
     73 struct ProdOpGpu {
     74   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void operator()(T* dest,
     75                                                         const T& value) {
     76     CudaAtomicMul(dest, value);
     77   }
     78 };
     79 
     80 template <typename T>
     81 struct MaxOpGpu {
     82   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void operator()(T* dest,
     83                                                         const T& value) {
     84     CudaAtomicMax(dest, value);
     85   }
     86 };
     87 
     88 template <typename T>
     89 struct MinOpGpu {
     90   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void operator()(T* dest,
     91                                                         const T& value) {
     92     CudaAtomicMin(dest, value);
     93   }
     94 };
     95 
     96 #endif  // GOOGLE_CUDA
     97 
     98 // initial value functors
     99 template <typename T>
    100 struct Zero {
    101   EIGEN_STRONG_INLINE T operator()() const { return T(0); }
    102 };
    103 
    104 template <typename T>
    105 struct One {
    106   EIGEN_STRONG_INLINE T operator()() const { return T(1); }
    107 };
    108 
    109 template <typename T>
    110 struct Lowest {
    111   EIGEN_STRONG_INLINE T operator()() const {
    112     return Eigen::NumTraits<T>::lowest();
    113   }
    114 };
    115 
    116 template <typename T>
    117 struct Highest {
    118   EIGEN_STRONG_INLINE T operator()() const {
    119     return Eigen::NumTraits<T>::highest();
    120   }
    121 };
    122 
    123 }  // namespace functor
    124 }  // namespace tensorflow
    125 
    126 #endif  // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_SEGMENT_REDUCTION_OPS_H_
    127