Home | History | Annotate | Download | only in kernels
      1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 // See docs in ../ops/sparse_ops.cc.
     17 
     18 #define EIGEN_USE_THREADS
     19 
     20 #include "tensorflow/core/framework/op_kernel.h"
     21 #include "tensorflow/core/framework/register_types.h"
     22 #include "tensorflow/core/framework/tensor.h"
     23 #include "tensorflow/core/framework/tensor_util.h"
     24 #include "tensorflow/core/framework/types.h"
     25 #include "tensorflow/core/util/sparse/sparse_tensor.h"
     26 
     27 // TODO(b/31496047): Fix non-standard include order.
     28 #include <numeric>  // clang-format off
     29 
     30 using tensorflow::sparse::SparseTensor;
     31 using tensorflow::gtl::ArraySlice;
     32 
     33 namespace tensorflow {
     34 
     35 struct ReduceDetails {
     36   // The dimensions to call Reorder() with.
     37   std::vector<int64> reorder_dims;
     38 
     39   // The dimensions to call group() with after Reorder().
     40   std::vector<int64> group_by_dims;
     41 
     42   // The shape after reduction.
     43   TensorShape reduced_shape;
     44 };
     45 
     46 // Compute common reduce parameters that'll be used for SparseTensor
     47 // reductions. Usage:
     48 // ReduceDetails reduction = SparseTensorReduceHelper(sp, axes, keep_dims);
     49 // sp.Reorder(reduction.reorder_dims);
     50 // for (const auto& g : sp.group(reduction.group_by_dims)) {
     51 //   ...
     52 // }
     53 // // Set output shape to reduction.reduced_shape.
     54 ReduceDetails SparseTensorReduceHelper(const SparseTensor &sp,
     55                                        gtl::ArraySlice<int32> axes_slice,
     56                                        bool keep_dims) {
     57   ReduceDetails reduction;
     58 
     59   std::vector<int32> reduction_axes(axes_slice.begin(), axes_slice.end());
     60   int ndims = sp.dims();
     61   for (int64 i = 0; i < reduction_axes.size(); ++i) {
     62     reduction_axes[i] = (reduction_axes[i] + ndims) % ndims;
     63   }
     64   std::sort(reduction_axes.begin(), reduction_axes.end());
     65 
     66   // (0) Calculate the grouping dimensions:
     67   // group_by_dims == {0, .., NDIMS-1} \ reduction_axes.
     68   std::vector<int64> perm(ndims);
     69   std::iota(perm.begin(), perm.end(), 0);
     70 
     71   // Requires perm and reduction_axes_ be sorted; group_by_dims will be
     72   // sorted as well.
     73   std::set_difference(
     74       perm.begin(), perm.end(), reduction_axes.begin(), reduction_axes.end(),
     75       std::inserter(reduction.group_by_dims, reduction.group_by_dims.begin()));
     76 
     77   // Now append the rest of the axes (the complement of group_by_dims_);
     78   // result is used by Reorder().
     79   reduction.reorder_dims = reduction.group_by_dims;
     80   std::set_difference(perm.begin(), perm.end(), reduction.group_by_dims.begin(),
     81                       reduction.group_by_dims.end(),
     82                       std::back_inserter(reduction.reorder_dims));
     83 
     84   // (1) Calculate the shape after reduction.
     85   auto sp_shape = sp.shape();
     86   std::vector<int64> out_dim_sizes;
     87   if (keep_dims) {
     88     out_dim_sizes.reserve(ndims);
     89     auto beg = reduction.group_by_dims.begin();
     90     auto end = reduction.group_by_dims.end();
     91     for (int d = 0; d < ndims; ++d) {
     92       if (std::find(beg, end, d) == end) {
     93         out_dim_sizes.push_back(1);  // A reduced axis.
     94       } else {
     95         out_dim_sizes.push_back(sp_shape[d]);
     96       }
     97     }
     98   } else {
     99     out_dim_sizes = sp.PickDims(reduction.group_by_dims);
    100   }
    101 
    102   reduction.reduced_shape = TensorShape(out_dim_sizes);
    103   return reduction;
    104 }
    105 
    106 Status ValidateInputs(const Tensor *shape_t, const Tensor *reduction_axes_t) {
    107   // indices and values are validated in SparseTensor ctor.
    108   if (!TensorShapeUtils::IsVector(shape_t->shape())) {
    109     return errors::InvalidArgument(
    110         "Expected input_shape to be a vector; got shape: ",
    111         shape_t->shape().DebugString());
    112   }
    113   if (!TensorShapeUtils::IsScalar(reduction_axes_t->shape()) &&
    114       !TensorShapeUtils::IsVector(reduction_axes_t->shape())) {
    115     return errors::InvalidArgument(
    116         "Expected reduction_axes to be a scalar or a vector; got shape: ",
    117         reduction_axes_t->shape().DebugString());
    118   }
    119 
    120   const auto reduction_axes_flat = reduction_axes_t->flat<int32>();
    121   for (int64 i = 0; i < reduction_axes_flat.size(); i++) {
    122     int32 axis = reduction_axes_flat(i);
    123     if (axis < -shape_t->NumElements() || axis >= shape_t->NumElements()) {
    124       return errors::InvalidArgument("Invalid reduction dimension ", axis,
    125                                      ", for input with ",
    126                                      shape_t->NumElements(), " dimensions.");
    127     }
    128   }
    129 
    130   return Status::OK();
    131 }
    132 
    133 struct SumOp {
    134   template <typename T>
    135   static void Run(OpKernelContext *ctx, typename TTypes<T>::Scalar &s, const typename TTypes<T>::UnalignedVec &v) {
    136       s.device(ctx->eigen_cpu_device()) = v.sum();
    137   }
    138   static StringPiece Name() {
    139       return "sum";
    140   }
    141 };
    142 
    143 struct MaxOp {
    144   template <typename T>
    145   static void Run(OpKernelContext *ctx, typename TTypes<T>::Scalar &s, const typename TTypes<T>::UnalignedVec &v) {
    146       s.device(ctx->eigen_cpu_device()) = v.maximum();
    147   }
    148   static StringPiece Name() {
    149       return "max";
    150   }
    151 };
    152 
    153 template <typename T, typename Op>
    154 class SparseReduceOp : public OpKernel {
    155  public:
    156   explicit SparseReduceOp(OpKernelConstruction *ctx) : OpKernel(ctx) {
    157     OP_REQUIRES_OK(ctx, ctx->GetAttr("keep_dims", &keep_dims_));
    158   }
    159 
    160   void Compute(OpKernelContext *ctx) override {
    161     const Tensor *indices_t, *values_t, *shape_t, *reduction_axes_t;
    162     OP_REQUIRES_OK(ctx, ctx->input("input_indices", &indices_t));
    163     OP_REQUIRES_OK(ctx, ctx->input("input_values", &values_t));
    164     OP_REQUIRES_OK(ctx, ctx->input("input_shape", &shape_t));
    165     OP_REQUIRES_OK(ctx, ctx->input("reduction_axes", &reduction_axes_t));
    166 
    167     OP_REQUIRES_OK(ctx, ValidateInputs(shape_t, reduction_axes_t));
    168 
    169     // TODO(zongheng): we will call Reorder() below, which will modify
    170     // in-place the underlying indices and values buffers.  To avoid
    171     // surprises of this kernel being stateful, we work around the above by
    172     // making deep copies here.  Remove this if/when we change Reorder()'s
    173     // semantics.
    174     const auto shape_vec = shape_t->vec<int64>();
    175     SparseTensor sp(tensor::DeepCopy(*indices_t), tensor::DeepCopy(*values_t),
    176                     TensorShape(shape_vec));
    177     ReduceDetails reduction = SparseTensorReduceHelper(
    178         sp, reduction_axes_t->flat<int32>(), keep_dims_);
    179 
    180     Tensor *out_values;
    181     OP_REQUIRES_OK(
    182         ctx, ctx->allocate_output(0, reduction.reduced_shape, &out_values));
    183     auto out_flat = out_values->flat<T>();
    184     out_flat.setZero();
    185 
    186     Tensor tmp_reduced_val;
    187     OP_REQUIRES_OK(ctx, ctx->allocate_temp(DataTypeToEnum<T>::value,
    188                                            TensorShape({}), &tmp_reduced_val));
    189     auto reduced_val = tmp_reduced_val.scalar<T>();
    190 
    191     // Compute strides, and use it to convert coords to flat index.  The
    192     // coordinates returned by .group() have the same ndims as group_by_dims.
    193     gtl::InlinedVector<int64, 8> output_strides(reduction.group_by_dims.size());
    194     if (!output_strides.empty()) {  // Do this iff we don't reduce all.
    195       output_strides.back() = 1;
    196       for (int d = output_strides.size() - 2; d >= 0; --d) {
    197         output_strides[d] =
    198             output_strides[d + 1] * shape_vec(reduction.group_by_dims[d + 1]);
    199       }
    200     }
    201 
    202     auto CoordinatesToFlatIndex = [](ArraySlice<int64> coords,
    203                                      ArraySlice<int64> strides) {
    204       if (strides.empty()) {  // Reduce all.
    205         return 0LL;
    206       }
    207       CHECK_EQ(coords.size(), strides.size());
    208       int64 idx = 0;
    209       for (int i = 0; i < coords.size(); ++i) {
    210         idx += coords[i] * strides[i];
    211       }
    212       return idx;
    213     };
    214 
    215     // Each group maps one-on-one onto a value in the reduced tensor.
    216     // g.group() provides the coordinates of a particular reduced value.
    217     sp.Reorder<T>(reduction.reorder_dims);
    218     for (const auto &g : sp.group(reduction.group_by_dims)) {
    219       Op::template Run<T>(ctx, reduced_val, g.template values<T>());
    220       const int64 idx = CoordinatesToFlatIndex(g.group(), output_strides);
    221       out_flat(idx) = reduced_val();
    222       VLOG(2) << "coords: " << str_util::Join(g.group(), ",")
    223               << "; idx: " << idx << "; group " << Op::Name() << ": "
    224               << reduced_val();
    225     }
    226   }
    227 
    228  private:
    229   // True if the number of dimensions should be maintained.
    230   bool keep_dims_;
    231 };
    232 
    233 #define REGISTER_KERNELS(T)                                              \
    234   REGISTER_KERNEL_BUILDER(                                               \
    235       Name("SparseReduceSum").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
    236       SparseReduceOp<T, SumOp>)
    237 TF_CALL_NUMBER_TYPES(REGISTER_KERNELS);
    238 #undef REGISTER_KERNELS
    239 
    240 #define REGISTER_KERNELS(T)                                              \
    241   REGISTER_KERNEL_BUILDER(                                               \
    242       Name("SparseReduceMax").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
    243       SparseReduceOp<T, MaxOp>)
    244 TF_CALL_REAL_NUMBER_TYPES(REGISTER_KERNELS);
    245 #undef REGISTER_KERNELS
    246 
    247 template <typename T, typename Op>
    248 class SparseReduceSparseOp : public OpKernel {
    249  public:
    250   explicit SparseReduceSparseOp(OpKernelConstruction *ctx) : OpKernel(ctx) {
    251     OP_REQUIRES_OK(ctx, ctx->GetAttr("keep_dims", &keep_dims_));
    252   }
    253 
    254   void Compute(OpKernelContext *ctx) override {
    255     const Tensor *indices_t, *values_t, *shape_t, *reduction_axes_t;
    256     OP_REQUIRES_OK(ctx, ctx->input("input_indices", &indices_t));
    257     OP_REQUIRES_OK(ctx, ctx->input("input_values", &values_t));
    258     OP_REQUIRES_OK(ctx, ctx->input("input_shape", &shape_t));
    259     OP_REQUIRES_OK(ctx, ctx->input("reduction_axes", &reduction_axes_t));
    260 
    261     OP_REQUIRES_OK(ctx, ValidateInputs(shape_t, reduction_axes_t));
    262 
    263     SparseTensor sp(tensor::DeepCopy(*indices_t), tensor::DeepCopy(*values_t),
    264                     TensorShape(shape_t->vec<int64>()));
    265     ReduceDetails reduction = SparseTensorReduceHelper(
    266         sp, reduction_axes_t->flat<int32>(), keep_dims_);
    267 
    268     sp.Reorder<T>(reduction.reorder_dims);
    269     // Count nnzs in the output SparseTensor.
    270     int64 nnz = 0;
    271     auto iter = sp.group(reduction.group_by_dims);
    272     for (auto it = iter.begin(); it != iter.end(); ++it) {
    273       nnz++;
    274     }
    275 
    276     Tensor *out_indices_t;
    277     OP_REQUIRES_OK(ctx,
    278                    ctx->allocate_output(
    279                        0, TensorShape({nnz, reduction.reduced_shape.dims()}),
    280                        &out_indices_t));
    281     typename TTypes<int64>::Matrix out_indices_mat =
    282         out_indices_t->matrix<int64>();
    283     // For keep_dims. We don't explicitly set dim fields for reduced dims below.
    284     out_indices_mat.setZero();
    285 
    286     Tensor *out_values_t;
    287     OP_REQUIRES_OK(ctx,
    288                    ctx->allocate_output(1, TensorShape({nnz}), &out_values_t));
    289     auto out_flat = out_values_t->flat<T>();
    290 
    291     Tensor tmp_reduced_val;
    292     OP_REQUIRES_OK(ctx, ctx->allocate_temp(DataTypeToEnum<T>::value,
    293                                            TensorShape({}), &tmp_reduced_val));
    294     auto reduced_val = tmp_reduced_val.scalar<T>();
    295     int64 i = 0;
    296     for (const auto &g : sp.group(reduction.group_by_dims)) {
    297       Op::template Run<T>(ctx, reduced_val, g.template values<T>());
    298       std::vector<int64> group = g.group();
    299       for (int64 j = 0; j < group.size(); j++) {
    300         if (keep_dims_) {
    301           out_indices_mat(i, reduction.group_by_dims[j]) = group[j];
    302         } else {
    303           out_indices_mat(i, j) = group[j];
    304         }
    305       }
    306       out_flat(i) = reduced_val();
    307       i++;
    308       VLOG(2) << "coords: " << str_util::Join(g.group(), ",")
    309               << "; group " << Op::Name() << ": "
    310               << reduced_val();
    311     }
    312 
    313     Tensor *out_shape_t;
    314     OP_REQUIRES_OK(ctx, ctx->allocate_output(
    315                             2, TensorShape({reduction.reduced_shape.dims()}),
    316                             &out_shape_t));
    317     auto out_shape_flat = out_shape_t->flat<int64>();
    318     auto out_dim_sizes = reduction.reduced_shape.dim_sizes();
    319     std::copy(out_dim_sizes.begin(), out_dim_sizes.end(), &out_shape_flat(0));
    320   }
    321 
    322  private:
    323   // True if the number of dimensions should be maintained.
    324   bool keep_dims_;
    325 };
    326 
    327 #define REGISTER_KERNELS(T)                                                    \
    328   REGISTER_KERNEL_BUILDER(                                                     \
    329       Name("SparseReduceSumSparse").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
    330       SparseReduceSparseOp<T, SumOp>)
    331 TF_CALL_NUMBER_TYPES(REGISTER_KERNELS);
    332 #undef REGISTER_KERNELS
    333 
    334 #define REGISTER_KERNELS(T)                                                    \
    335   REGISTER_KERNEL_BUILDER(                                                     \
    336       Name("SparseReduceMaxSparse").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
    337       SparseReduceSparseOp<T, MaxOp>)
    338 TF_CALL_REAL_NUMBER_TYPES(REGISTER_KERNELS);
    339 #undef REGISTER_KERNELS
    340 
    341 }  // namespace tensorflow
    342