1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_SEGMENT_REDUCTION_OPS_H_ 17 #define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_SEGMENT_REDUCTION_OPS_H_ 18 19 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" 20 #include "tensorflow/core/framework/tensor.h" 21 #include "tensorflow/core/framework/tensor_shape.h" 22 #include "tensorflow/core/framework/tensor_types.h" 23 24 namespace tensorflow { 25 26 class OpKernelContext; 27 28 namespace functor { 29 30 #ifdef GOOGLE_CUDA 31 typedef Eigen::GpuDevice GPUDevice; 32 // Functor for SegmentSumGPUOp. 33 // output_rows: the number of output segments (unique segment ids in 34 // 'segment_ids'). 35 // segment_ids_shape: shape of 'segment_ids' tensor. 36 // segment_ids: unsorted map from input to output segment ids at which to 37 // perform segment sum operation. 38 // data_size: size of input data tensor. 39 // data: input data tensor. 40 // output: output reshaped to {output_rows, output.size/output_rows} 41 template <typename T, typename Index> 42 struct SegmentSumFunctor { 43 void operator()(OpKernelContext* ctx, const GPUDevice& d, 44 const Index output_rows, const TensorShape& segment_ids_shape, 45 typename TTypes<Index>::ConstFlat segment_ids, 46 const Index data_size, const T* data, 47 typename TTypes<T, 2>::Tensor output); 48 }; 49 50 #endif 51 52 template <typename Device, typename T, typename Index, typename InitialValueF, 53 typename ReductionF> 54 struct UnsortedSegmentFunctor { 55 void operator()(OpKernelContext* ctx, const Index num_segments, 56 const TensorShape& segment_ids_shape, 57 typename TTypes<Index>::ConstFlat segment_ids, 58 const Index data_size, const T* data, 59 typename TTypes<T, 2>::Tensor output); 60 }; 61 62 #ifdef GOOGLE_CUDA 63 // reduction functors for the gpu 64 template <typename T> 65 struct SumOpGpu { 66 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void operator()(T* dest, 67 const T& value) { 68 CudaAtomicAdd(dest, value); 69 } 70 }; 71 72 template <typename T> 73 struct ProdOpGpu { 74 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void operator()(T* dest, 75 const T& value) { 76 CudaAtomicMul(dest, value); 77 } 78 }; 79 80 template <typename T> 81 struct MaxOpGpu { 82 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void operator()(T* dest, 83 const T& value) { 84 CudaAtomicMax(dest, value); 85 } 86 }; 87 88 template <typename T> 89 struct MinOpGpu { 90 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void operator()(T* dest, 91 const T& value) { 92 CudaAtomicMin(dest, value); 93 } 94 }; 95 96 #endif // GOOGLE_CUDA 97 98 // initial value functors 99 template <typename T> 100 struct Zero { 101 EIGEN_STRONG_INLINE T operator()() const { return T(0); } 102 }; 103 104 template <typename T> 105 struct One { 106 EIGEN_STRONG_INLINE T operator()() const { return T(1); } 107 }; 108 109 template <typename T> 110 struct Lowest { 111 EIGEN_STRONG_INLINE T operator()() const { 112 return Eigen::NumTraits<T>::lowest(); 113 } 114 }; 115 116 template <typename T> 117 struct Highest { 118 EIGEN_STRONG_INLINE T operator()() const { 119 return Eigen::NumTraits<T>::highest(); 120 } 121 }; 122 123 } // namespace functor 124 } // namespace tensorflow 125 126 #endif // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_SEGMENT_REDUCTION_OPS_H_ 127