Home | History | Annotate | Download | only in kernels
      1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 // SparseSparseBinaryOpShared is the shared code for binary coefficient-wise
     17 // (cwise) operations of the following form:
     18 //
     19 //   sparse_t <binary cwise op> sparse_t -> new sparse_t
     20 //
     21 // The output SparseTensor may store up to "a_nnz + b_nnz" elements.
     22 
     23 // IMPLEMENTATION DETAILS (not part of the interface specification).
     24 //
     25 // This kernel implements the "union" semantics on the non-zeros: namely, any
     26 // non-zero from either side participate in the calculations, and any resultant
     27 // zeros will NOT be excluded from the output storage.
     28 //
     29 // (In the future, we could always add a pruning op the prunes away the zeros,
     30 // if desirable.)
     31 
     32 // See docs of all registered ops in ../ops/sparse_ops.cc.
     33 
     34 #define EIGEN_USE_THREADS
     35 
     36 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
     37 #include "tensorflow/core/framework/op_kernel.h"
     38 #include "tensorflow/core/framework/register_types.h"
     39 #include "tensorflow/core/framework/tensor.h"
     40 #include "tensorflow/core/framework/tensor_util.h"
     41 #include "tensorflow/core/framework/types.h"
     42 #include "tensorflow/core/kernels/cwise_ops.h"
     43 #include "tensorflow/core/kernels/cwise_ops_common.h"
     44 #include "tensorflow/core/util/sparse/sparse_tensor.h"
     45 
     46 namespace tensorflow {
     47 
     48 typedef Eigen::ThreadPoolDevice CPUDevice;
     49 
     50 namespace {
     51 // Unions the sparse indices and outputs corresponding values: namely, if a
     52 // non-zero appear in one side, it will participate in the calculation, where
     53 // the counterpart on the other side is either a value or an implicit zero.
     54 //
     55 // On exit, outputs the augmented values in "{a,b}_augmented_values", and fills
     56 // "entries_to_copy" with "(from_a?, index)" pairs.  All three vectors have the
     57 // same size.
     58 //
     59 // The input and output sparse tensors are assumed ordered in the canonical
     60 // row-major order.
     61 template <typename T>
     62 void UnionSparseIndicesAndValues(
     63     typename TTypes<int64>::ConstMatrix a_indices_mat,
     64     typename TTypes<T>::ConstFlat a_values, int64 a_nnz,
     65     typename TTypes<int64>::ConstMatrix b_indices_mat,
     66     typename TTypes<T>::ConstFlat b_values, int64 b_nnz, int num_dims,
     67     std::vector<T> *a_augmented_values, std::vector<T> *b_augmented_values,
     68     std::vector<std::pair<bool, int64>> *entries_to_copy) {
     69   entries_to_copy->reserve(a_nnz + b_nnz);
     70   a_augmented_values->reserve(a_nnz);
     71   b_augmented_values->reserve(b_nnz);
     72 
     73   int64 i = 0, j = 0;
     74   const T kZero = T(0);
     75   while (i < a_nnz && j < b_nnz) {
     76     switch (sparse::DimComparator::cmp(a_indices_mat, b_indices_mat, i, j,
     77                                        num_dims)) {
     78       case -1:
     79         entries_to_copy->emplace_back(true, i);
     80         a_augmented_values->push_back(a_values(i));
     81         b_augmented_values->push_back(kZero);
     82         ++i;
     83         break;
     84       case 0:
     85         entries_to_copy->emplace_back(true, i);
     86         a_augmented_values->push_back(a_values(i));
     87         b_augmented_values->push_back(b_values(j));
     88         ++i;
     89         ++j;
     90         break;
     91       case 1:
     92         entries_to_copy->emplace_back(false, j);
     93         a_augmented_values->push_back(kZero);
     94         b_augmented_values->push_back(b_values(j));
     95         ++j;
     96         break;
     97     }
     98   }
     99   // Handles leftovers; at most one loop runs.
    100   while (i < a_nnz) {
    101     entries_to_copy->emplace_back(/* is_a */ true, i);
    102     a_augmented_values->push_back(a_values(i++));
    103     b_augmented_values->push_back(kZero);
    104   }
    105   while (j < b_nnz) {
    106     entries_to_copy->emplace_back(/* is_a */ false, j);
    107     a_augmented_values->push_back(kZero);
    108     b_augmented_values->push_back(b_values(j++));
    109   }
    110 }
    111 }  // anonymous namespace
    112 
    113 // Device: CPUDevice.  GPU kernel is not supported currently.
    114 // T: dtype of the SparseTensor's.
    115 // Functor: binary cwise operation to perform on the corresponding operand
    116 // values.  See cwise_ops.h for a list of possible functors to register with.
    117 template <typename Device, typename T, typename Functor>
    118 class SparseSparseBinaryOpShared : public OpKernel {
    119  public:
    120   explicit SparseSparseBinaryOpShared(OpKernelConstruction *ctx)
    121       : OpKernel(ctx) {}
    122 
    123   void Compute(OpKernelContext *ctx) override {
    124     const Tensor *a_indices_t, *a_values_t, *a_shape_t, *b_indices_t,
    125         *b_values_t, *b_shape_t;
    126     OP_REQUIRES_OK(ctx, ctx->input("a_indices", &a_indices_t));
    127     OP_REQUIRES_OK(ctx, ctx->input("a_values", &a_values_t));
    128     OP_REQUIRES_OK(ctx, ctx->input("a_shape", &a_shape_t));
    129     OP_REQUIRES_OK(ctx, ctx->input("b_indices", &b_indices_t));
    130     OP_REQUIRES_OK(ctx, ctx->input("b_values", &b_values_t));
    131     OP_REQUIRES_OK(ctx, ctx->input("b_shape", &b_shape_t));
    132 
    133     // Validations.
    134     OP_REQUIRES(
    135         ctx,
    136         TensorShapeUtils::IsMatrix(a_indices_t->shape()) &&
    137             TensorShapeUtils::IsMatrix(b_indices_t->shape()),
    138         errors::InvalidArgument("Inputs a_indices and b_indices should be "
    139                                 "matrices but received shapes: ",
    140                                 a_indices_t->shape().DebugString(), ", ",
    141                                 b_indices_t->shape().DebugString()));
    142     OP_REQUIRES(ctx,
    143                 TensorShapeUtils::IsVector(a_values_t->shape()) &&
    144                     TensorShapeUtils::IsVector(b_values_t->shape()),
    145                 errors::InvalidArgument(
    146                     "Inputs a_values and b_values should be vectors "
    147                     "but received shapes: ",
    148                     a_values_t->shape().DebugString(), " and ",
    149                     b_values_t->shape().DebugString()));
    150 
    151     const int64 a_nnz = a_indices_t->dim_size(0);
    152     const int64 b_nnz = b_indices_t->dim_size(0);
    153     const auto a_values = a_values_t->vec<T>();
    154     const auto b_values = b_values_t->vec<T>();
    155 
    156     OP_REQUIRES(
    157         ctx, a_values.size() == a_nnz && b_values.size() == b_nnz,
    158         errors::InvalidArgument("Expected ", a_nnz, " and ", b_nnz,
    159                                 " non-empty input values, got ",
    160                                 a_values.size(), " and ", b_values.size()));
    161 
    162     OP_REQUIRES(ctx,
    163                 TensorShapeUtils::IsVector(a_shape_t->shape()) &&
    164                     TensorShapeUtils::IsVector(b_shape_t->shape()),
    165                 errors::InvalidArgument(
    166                     "Input shapes should be a vector but received shapes ",
    167                     a_shape_t->shape().DebugString(), " and ",
    168                     b_shape_t->shape().DebugString()));
    169     OP_REQUIRES(ctx, a_shape_t->IsSameSize(*b_shape_t),
    170                 errors::InvalidArgument(
    171                     "Operands do not have the same ranks; got shapes: ",
    172                     a_shape_t->SummarizeValue(10), " and ",
    173                     b_shape_t->SummarizeValue(10)));
    174     const auto a_shape = a_shape_t->flat<int64>();
    175     const auto b_shape = b_shape_t->flat<int64>();
    176     for (int i = 0; i < a_shape_t->NumElements(); ++i) {
    177       OP_REQUIRES(ctx, a_shape(i) == b_shape(i),
    178                   errors::InvalidArgument("Operands' shapes do not match: got ",
    179                                           a_shape(i), " and ", b_shape(i),
    180                                           " for dimension ", i));
    181     }
    182 
    183     const int num_dims = a_indices_t->dim_size(1);
    184     const auto a_indices_mat = a_indices_t->matrix<int64>();
    185     const auto b_indices_mat = b_indices_t->matrix<int64>();
    186     std::vector<T> a_augmented_values, b_augmented_values;
    187     std::vector<std::pair<bool, int64>> entries_to_copy;  // from_a?, idx
    188     UnionSparseIndicesAndValues(a_indices_mat, a_values, a_nnz, b_indices_mat,
    189                                 b_values, b_nnz, num_dims, &a_augmented_values,
    190                                 &b_augmented_values, &entries_to_copy);
    191 
    192     // Allocates and fills output tensors.
    193     const int64 sum_nnz = a_augmented_values.size();
    194     Tensor *output_indices_t, *output_values_t;
    195     OP_REQUIRES_OK(ctx,
    196                    ctx->allocate_output(0, TensorShape({sum_nnz, num_dims}),
    197                                         &output_indices_t));
    198     OP_REQUIRES_OK(
    199         ctx, ctx->allocate_output(1, TensorShape({sum_nnz}), &output_values_t));
    200     auto output_indices_mat = output_indices_t->matrix<int64>();
    201 
    202     for (int64 i = 0; i < sum_nnz; ++i) {
    203       const bool from_a = entries_to_copy[i].first;
    204       const int64 idx = entries_to_copy[i].second;
    205       output_indices_mat.chip<0>(i) =
    206           from_a ? a_indices_mat.chip<0>(idx) : b_indices_mat.chip<0>(idx);
    207     }
    208 
    209     // Performs the functor operation using Eigen.
    210     //
    211     // Note that the two stack-allocated std::vector's may not be aligned. Using
    212     // allocate_temp() would've given us aligned storage, but we do not know
    213     // their sizes in advance, so we couldn't use allocate_temp() anyway.
    214     //
    215     // TODO(zongheng): measure if it's worthwile to somehow force alignment.
    216     using UnalignedTensorMap =
    217         Eigen::TensorMap<Eigen::Tensor<const T, 1, Eigen::RowMajor>,
    218                          Eigen::Unaligned>;
    219     auto a_augmented_values_t =
    220         UnalignedTensorMap(a_augmented_values.data(), sum_nnz);
    221     auto b_augmented_values_t =
    222         UnalignedTensorMap(b_augmented_values.data(), sum_nnz);
    223     output_values_t->flat<T>().device(ctx->eigen_device<Device>()) =
    224         a_augmented_values_t.binaryExpr(b_augmented_values_t,
    225                                         typename Functor::func());
    226   }
    227 };
    228 
    229 #define REGISTER_KERNELS(T)                                                  \
    230   REGISTER_KERNEL_BUILDER(                                                   \
    231       Name("SparseSparseMinimum").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
    232       SparseSparseBinaryOpShared<CPUDevice, T, functor::minimum<T>>)         \
    233                                                                              \
    234   REGISTER_KERNEL_BUILDER(                                                   \
    235       Name("SparseSparseMaximum").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
    236       SparseSparseBinaryOpShared<CPUDevice, T, functor::maximum<T>>)
    237 
    238 TF_CALL_REAL_NUMBER_TYPES(REGISTER_KERNELS);
    239 #undef REGISTER_KERNELS
    240 
    241 }  // namespace tensorflow
    242