1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 // See docs in ../ops/sparse_ops.cc. 17 18 #define EIGEN_USE_THREADS 19 20 #include "tensorflow/core/framework/op_kernel.h" 21 #include "tensorflow/core/framework/register_types.h" 22 #include "tensorflow/core/framework/tensor.h" 23 #include "tensorflow/core/framework/tensor_util.h" 24 #include "tensorflow/core/framework/types.h" 25 #include "tensorflow/core/util/sparse/sparse_tensor.h" 26 27 // TODO(b/31496047): Fix non-standard include order. 28 #include <numeric> // clang-format off 29 30 using tensorflow::sparse::SparseTensor; 31 using tensorflow::gtl::ArraySlice; 32 33 namespace tensorflow { 34 35 struct ReduceDetails { 36 // The dimensions to call Reorder() with. 37 std::vector<int64> reorder_dims; 38 39 // The dimensions to call group() with after Reorder(). 40 std::vector<int64> group_by_dims; 41 42 // The shape after reduction. 43 TensorShape reduced_shape; 44 }; 45 46 // Compute common reduce parameters that'll be used for SparseTensor 47 // reductions. Usage: 48 // ReduceDetails reduction = SparseTensorReduceHelper(sp, axes, keep_dims); 49 // sp.Reorder(reduction.reorder_dims); 50 // for (const auto& g : sp.group(reduction.group_by_dims)) { 51 // ... 52 // } 53 // // Set output shape to reduction.reduced_shape. 54 ReduceDetails SparseTensorReduceHelper(const SparseTensor &sp, 55 gtl::ArraySlice<int32> axes_slice, 56 bool keep_dims) { 57 ReduceDetails reduction; 58 59 std::vector<int32> reduction_axes(axes_slice.begin(), axes_slice.end()); 60 int ndims = sp.dims(); 61 for (int64 i = 0; i < reduction_axes.size(); ++i) { 62 reduction_axes[i] = (reduction_axes[i] + ndims) % ndims; 63 } 64 std::sort(reduction_axes.begin(), reduction_axes.end()); 65 66 // (0) Calculate the grouping dimensions: 67 // group_by_dims == {0, .., NDIMS-1} \ reduction_axes. 68 std::vector<int64> perm(ndims); 69 std::iota(perm.begin(), perm.end(), 0); 70 71 // Requires perm and reduction_axes_ be sorted; group_by_dims will be 72 // sorted as well. 73 std::set_difference( 74 perm.begin(), perm.end(), reduction_axes.begin(), reduction_axes.end(), 75 std::inserter(reduction.group_by_dims, reduction.group_by_dims.begin())); 76 77 // Now append the rest of the axes (the complement of group_by_dims_); 78 // result is used by Reorder(). 79 reduction.reorder_dims = reduction.group_by_dims; 80 std::set_difference(perm.begin(), perm.end(), reduction.group_by_dims.begin(), 81 reduction.group_by_dims.end(), 82 std::back_inserter(reduction.reorder_dims)); 83 84 // (1) Calculate the shape after reduction. 85 auto sp_shape = sp.shape(); 86 std::vector<int64> out_dim_sizes; 87 if (keep_dims) { 88 out_dim_sizes.reserve(ndims); 89 auto beg = reduction.group_by_dims.begin(); 90 auto end = reduction.group_by_dims.end(); 91 for (int d = 0; d < ndims; ++d) { 92 if (std::find(beg, end, d) == end) { 93 out_dim_sizes.push_back(1); // A reduced axis. 94 } else { 95 out_dim_sizes.push_back(sp_shape[d]); 96 } 97 } 98 } else { 99 out_dim_sizes = sp.PickDims(reduction.group_by_dims); 100 } 101 102 reduction.reduced_shape = TensorShape(out_dim_sizes); 103 return reduction; 104 } 105 106 Status ValidateInputs(const Tensor *shape_t, const Tensor *reduction_axes_t) { 107 // indices and values are validated in SparseTensor ctor. 108 if (!TensorShapeUtils::IsVector(shape_t->shape())) { 109 return errors::InvalidArgument( 110 "Expected input_shape to be a vector; got shape: ", 111 shape_t->shape().DebugString()); 112 } 113 if (!TensorShapeUtils::IsScalar(reduction_axes_t->shape()) && 114 !TensorShapeUtils::IsVector(reduction_axes_t->shape())) { 115 return errors::InvalidArgument( 116 "Expected reduction_axes to be a scalar or a vector; got shape: ", 117 reduction_axes_t->shape().DebugString()); 118 } 119 120 const auto reduction_axes_flat = reduction_axes_t->flat<int32>(); 121 for (int64 i = 0; i < reduction_axes_flat.size(); i++) { 122 int32 axis = reduction_axes_flat(i); 123 if (axis < -shape_t->NumElements() || axis >= shape_t->NumElements()) { 124 return errors::InvalidArgument("Invalid reduction dimension ", axis, 125 ", for input with ", 126 shape_t->NumElements(), " dimensions."); 127 } 128 } 129 130 return Status::OK(); 131 } 132 133 struct SumOp { 134 template <typename T> 135 static void Run(OpKernelContext *ctx, typename TTypes<T>::Scalar &s, const typename TTypes<T>::UnalignedVec &v) { 136 s.device(ctx->eigen_cpu_device()) = v.sum(); 137 } 138 static StringPiece Name() { 139 return "sum"; 140 } 141 }; 142 143 struct MaxOp { 144 template <typename T> 145 static void Run(OpKernelContext *ctx, typename TTypes<T>::Scalar &s, const typename TTypes<T>::UnalignedVec &v) { 146 s.device(ctx->eigen_cpu_device()) = v.maximum(); 147 } 148 static StringPiece Name() { 149 return "max"; 150 } 151 }; 152 153 template <typename T, typename Op> 154 class SparseReduceOp : public OpKernel { 155 public: 156 explicit SparseReduceOp(OpKernelConstruction *ctx) : OpKernel(ctx) { 157 OP_REQUIRES_OK(ctx, ctx->GetAttr("keep_dims", &keep_dims_)); 158 } 159 160 void Compute(OpKernelContext *ctx) override { 161 const Tensor *indices_t, *values_t, *shape_t, *reduction_axes_t; 162 OP_REQUIRES_OK(ctx, ctx->input("input_indices", &indices_t)); 163 OP_REQUIRES_OK(ctx, ctx->input("input_values", &values_t)); 164 OP_REQUIRES_OK(ctx, ctx->input("input_shape", &shape_t)); 165 OP_REQUIRES_OK(ctx, ctx->input("reduction_axes", &reduction_axes_t)); 166 167 OP_REQUIRES_OK(ctx, ValidateInputs(shape_t, reduction_axes_t)); 168 169 // TODO(zongheng): we will call Reorder() below, which will modify 170 // in-place the underlying indices and values buffers. To avoid 171 // surprises of this kernel being stateful, we work around the above by 172 // making deep copies here. Remove this if/when we change Reorder()'s 173 // semantics. 174 const auto shape_vec = shape_t->vec<int64>(); 175 SparseTensor sp(tensor::DeepCopy(*indices_t), tensor::DeepCopy(*values_t), 176 TensorShape(shape_vec)); 177 ReduceDetails reduction = SparseTensorReduceHelper( 178 sp, reduction_axes_t->flat<int32>(), keep_dims_); 179 180 Tensor *out_values; 181 OP_REQUIRES_OK( 182 ctx, ctx->allocate_output(0, reduction.reduced_shape, &out_values)); 183 auto out_flat = out_values->flat<T>(); 184 out_flat.setZero(); 185 186 Tensor tmp_reduced_val; 187 OP_REQUIRES_OK(ctx, ctx->allocate_temp(DataTypeToEnum<T>::value, 188 TensorShape({}), &tmp_reduced_val)); 189 auto reduced_val = tmp_reduced_val.scalar<T>(); 190 191 // Compute strides, and use it to convert coords to flat index. The 192 // coordinates returned by .group() have the same ndims as group_by_dims. 193 gtl::InlinedVector<int64, 8> output_strides(reduction.group_by_dims.size()); 194 if (!output_strides.empty()) { // Do this iff we don't reduce all. 195 output_strides.back() = 1; 196 for (int d = output_strides.size() - 2; d >= 0; --d) { 197 output_strides[d] = 198 output_strides[d + 1] * shape_vec(reduction.group_by_dims[d + 1]); 199 } 200 } 201 202 auto CoordinatesToFlatIndex = [](ArraySlice<int64> coords, 203 ArraySlice<int64> strides) { 204 if (strides.empty()) { // Reduce all. 205 return 0LL; 206 } 207 CHECK_EQ(coords.size(), strides.size()); 208 int64 idx = 0; 209 for (int i = 0; i < coords.size(); ++i) { 210 idx += coords[i] * strides[i]; 211 } 212 return idx; 213 }; 214 215 // Each group maps one-on-one onto a value in the reduced tensor. 216 // g.group() provides the coordinates of a particular reduced value. 217 sp.Reorder<T>(reduction.reorder_dims); 218 for (const auto &g : sp.group(reduction.group_by_dims)) { 219 Op::template Run<T>(ctx, reduced_val, g.template values<T>()); 220 const int64 idx = CoordinatesToFlatIndex(g.group(), output_strides); 221 out_flat(idx) = reduced_val(); 222 VLOG(2) << "coords: " << str_util::Join(g.group(), ",") 223 << "; idx: " << idx << "; group " << Op::Name() << ": " 224 << reduced_val(); 225 } 226 } 227 228 private: 229 // True if the number of dimensions should be maintained. 230 bool keep_dims_; 231 }; 232 233 #define REGISTER_KERNELS(T) \ 234 REGISTER_KERNEL_BUILDER( \ 235 Name("SparseReduceSum").Device(DEVICE_CPU).TypeConstraint<T>("T"), \ 236 SparseReduceOp<T, SumOp>) 237 TF_CALL_NUMBER_TYPES(REGISTER_KERNELS); 238 #undef REGISTER_KERNELS 239 240 #define REGISTER_KERNELS(T) \ 241 REGISTER_KERNEL_BUILDER( \ 242 Name("SparseReduceMax").Device(DEVICE_CPU).TypeConstraint<T>("T"), \ 243 SparseReduceOp<T, MaxOp>) 244 TF_CALL_REAL_NUMBER_TYPES(REGISTER_KERNELS); 245 #undef REGISTER_KERNELS 246 247 template <typename T, typename Op> 248 class SparseReduceSparseOp : public OpKernel { 249 public: 250 explicit SparseReduceSparseOp(OpKernelConstruction *ctx) : OpKernel(ctx) { 251 OP_REQUIRES_OK(ctx, ctx->GetAttr("keep_dims", &keep_dims_)); 252 } 253 254 void Compute(OpKernelContext *ctx) override { 255 const Tensor *indices_t, *values_t, *shape_t, *reduction_axes_t; 256 OP_REQUIRES_OK(ctx, ctx->input("input_indices", &indices_t)); 257 OP_REQUIRES_OK(ctx, ctx->input("input_values", &values_t)); 258 OP_REQUIRES_OK(ctx, ctx->input("input_shape", &shape_t)); 259 OP_REQUIRES_OK(ctx, ctx->input("reduction_axes", &reduction_axes_t)); 260 261 OP_REQUIRES_OK(ctx, ValidateInputs(shape_t, reduction_axes_t)); 262 263 SparseTensor sp(tensor::DeepCopy(*indices_t), tensor::DeepCopy(*values_t), 264 TensorShape(shape_t->vec<int64>())); 265 ReduceDetails reduction = SparseTensorReduceHelper( 266 sp, reduction_axes_t->flat<int32>(), keep_dims_); 267 268 sp.Reorder<T>(reduction.reorder_dims); 269 // Count nnzs in the output SparseTensor. 270 int64 nnz = 0; 271 auto iter = sp.group(reduction.group_by_dims); 272 for (auto it = iter.begin(); it != iter.end(); ++it) { 273 nnz++; 274 } 275 276 Tensor *out_indices_t; 277 OP_REQUIRES_OK(ctx, 278 ctx->allocate_output( 279 0, TensorShape({nnz, reduction.reduced_shape.dims()}), 280 &out_indices_t)); 281 typename TTypes<int64>::Matrix out_indices_mat = 282 out_indices_t->matrix<int64>(); 283 // For keep_dims. We don't explicitly set dim fields for reduced dims below. 284 out_indices_mat.setZero(); 285 286 Tensor *out_values_t; 287 OP_REQUIRES_OK(ctx, 288 ctx->allocate_output(1, TensorShape({nnz}), &out_values_t)); 289 auto out_flat = out_values_t->flat<T>(); 290 291 Tensor tmp_reduced_val; 292 OP_REQUIRES_OK(ctx, ctx->allocate_temp(DataTypeToEnum<T>::value, 293 TensorShape({}), &tmp_reduced_val)); 294 auto reduced_val = tmp_reduced_val.scalar<T>(); 295 int64 i = 0; 296 for (const auto &g : sp.group(reduction.group_by_dims)) { 297 Op::template Run<T>(ctx, reduced_val, g.template values<T>()); 298 std::vector<int64> group = g.group(); 299 for (int64 j = 0; j < group.size(); j++) { 300 if (keep_dims_) { 301 out_indices_mat(i, reduction.group_by_dims[j]) = group[j]; 302 } else { 303 out_indices_mat(i, j) = group[j]; 304 } 305 } 306 out_flat(i) = reduced_val(); 307 i++; 308 VLOG(2) << "coords: " << str_util::Join(g.group(), ",") 309 << "; group " << Op::Name() << ": " 310 << reduced_val(); 311 } 312 313 Tensor *out_shape_t; 314 OP_REQUIRES_OK(ctx, ctx->allocate_output( 315 2, TensorShape({reduction.reduced_shape.dims()}), 316 &out_shape_t)); 317 auto out_shape_flat = out_shape_t->flat<int64>(); 318 auto out_dim_sizes = reduction.reduced_shape.dim_sizes(); 319 std::copy(out_dim_sizes.begin(), out_dim_sizes.end(), &out_shape_flat(0)); 320 } 321 322 private: 323 // True if the number of dimensions should be maintained. 324 bool keep_dims_; 325 }; 326 327 #define REGISTER_KERNELS(T) \ 328 REGISTER_KERNEL_BUILDER( \ 329 Name("SparseReduceSumSparse").Device(DEVICE_CPU).TypeConstraint<T>("T"), \ 330 SparseReduceSparseOp<T, SumOp>) 331 TF_CALL_NUMBER_TYPES(REGISTER_KERNELS); 332 #undef REGISTER_KERNELS 333 334 #define REGISTER_KERNELS(T) \ 335 REGISTER_KERNEL_BUILDER( \ 336 Name("SparseReduceMaxSparse").Device(DEVICE_CPU).TypeConstraint<T>("T"), \ 337 SparseReduceSparseOp<T, MaxOp>) 338 TF_CALL_REAL_NUMBER_TYPES(REGISTER_KERNELS); 339 #undef REGISTER_KERNELS 340 341 } // namespace tensorflow 342