1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 // SparseSparseBinaryOpShared is the shared code for binary coefficient-wise 17 // (cwise) operations of the following form: 18 // 19 // sparse_t <binary cwise op> sparse_t -> new sparse_t 20 // 21 // The output SparseTensor may store up to "a_nnz + b_nnz" elements. 22 23 // IMPLEMENTATION DETAILS (not part of the interface specification). 24 // 25 // This kernel implements the "union" semantics on the non-zeros: namely, any 26 // non-zero from either side participate in the calculations, and any resultant 27 // zeros will NOT be excluded from the output storage. 28 // 29 // (In the future, we could always add a pruning op the prunes away the zeros, 30 // if desirable.) 31 32 // See docs of all registered ops in ../ops/sparse_ops.cc. 33 34 #define EIGEN_USE_THREADS 35 36 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" 37 #include "tensorflow/core/framework/op_kernel.h" 38 #include "tensorflow/core/framework/register_types.h" 39 #include "tensorflow/core/framework/tensor.h" 40 #include "tensorflow/core/framework/tensor_util.h" 41 #include "tensorflow/core/framework/types.h" 42 #include "tensorflow/core/kernels/cwise_ops.h" 43 #include "tensorflow/core/kernels/cwise_ops_common.h" 44 #include "tensorflow/core/util/sparse/sparse_tensor.h" 45 46 namespace tensorflow { 47 48 typedef Eigen::ThreadPoolDevice CPUDevice; 49 50 namespace { 51 // Unions the sparse indices and outputs corresponding values: namely, if a 52 // non-zero appear in one side, it will participate in the calculation, where 53 // the counterpart on the other side is either a value or an implicit zero. 54 // 55 // On exit, outputs the augmented values in "{a,b}_augmented_values", and fills 56 // "entries_to_copy" with "(from_a?, index)" pairs. All three vectors have the 57 // same size. 58 // 59 // The input and output sparse tensors are assumed ordered in the canonical 60 // row-major order. 61 template <typename T> 62 void UnionSparseIndicesAndValues( 63 typename TTypes<int64>::ConstMatrix a_indices_mat, 64 typename TTypes<T>::ConstFlat a_values, int64 a_nnz, 65 typename TTypes<int64>::ConstMatrix b_indices_mat, 66 typename TTypes<T>::ConstFlat b_values, int64 b_nnz, int num_dims, 67 std::vector<T> *a_augmented_values, std::vector<T> *b_augmented_values, 68 std::vector<std::pair<bool, int64>> *entries_to_copy) { 69 entries_to_copy->reserve(a_nnz + b_nnz); 70 a_augmented_values->reserve(a_nnz); 71 b_augmented_values->reserve(b_nnz); 72 73 int64 i = 0, j = 0; 74 const T kZero = T(0); 75 while (i < a_nnz && j < b_nnz) { 76 switch (sparse::DimComparator::cmp(a_indices_mat, b_indices_mat, i, j, 77 num_dims)) { 78 case -1: 79 entries_to_copy->emplace_back(true, i); 80 a_augmented_values->push_back(a_values(i)); 81 b_augmented_values->push_back(kZero); 82 ++i; 83 break; 84 case 0: 85 entries_to_copy->emplace_back(true, i); 86 a_augmented_values->push_back(a_values(i)); 87 b_augmented_values->push_back(b_values(j)); 88 ++i; 89 ++j; 90 break; 91 case 1: 92 entries_to_copy->emplace_back(false, j); 93 a_augmented_values->push_back(kZero); 94 b_augmented_values->push_back(b_values(j)); 95 ++j; 96 break; 97 } 98 } 99 // Handles leftovers; at most one loop runs. 100 while (i < a_nnz) { 101 entries_to_copy->emplace_back(/* is_a */ true, i); 102 a_augmented_values->push_back(a_values(i++)); 103 b_augmented_values->push_back(kZero); 104 } 105 while (j < b_nnz) { 106 entries_to_copy->emplace_back(/* is_a */ false, j); 107 a_augmented_values->push_back(kZero); 108 b_augmented_values->push_back(b_values(j++)); 109 } 110 } 111 } // anonymous namespace 112 113 // Device: CPUDevice. GPU kernel is not supported currently. 114 // T: dtype of the SparseTensor's. 115 // Functor: binary cwise operation to perform on the corresponding operand 116 // values. See cwise_ops.h for a list of possible functors to register with. 117 template <typename Device, typename T, typename Functor> 118 class SparseSparseBinaryOpShared : public OpKernel { 119 public: 120 explicit SparseSparseBinaryOpShared(OpKernelConstruction *ctx) 121 : OpKernel(ctx) {} 122 123 void Compute(OpKernelContext *ctx) override { 124 const Tensor *a_indices_t, *a_values_t, *a_shape_t, *b_indices_t, 125 *b_values_t, *b_shape_t; 126 OP_REQUIRES_OK(ctx, ctx->input("a_indices", &a_indices_t)); 127 OP_REQUIRES_OK(ctx, ctx->input("a_values", &a_values_t)); 128 OP_REQUIRES_OK(ctx, ctx->input("a_shape", &a_shape_t)); 129 OP_REQUIRES_OK(ctx, ctx->input("b_indices", &b_indices_t)); 130 OP_REQUIRES_OK(ctx, ctx->input("b_values", &b_values_t)); 131 OP_REQUIRES_OK(ctx, ctx->input("b_shape", &b_shape_t)); 132 133 // Validations. 134 OP_REQUIRES( 135 ctx, 136 TensorShapeUtils::IsMatrix(a_indices_t->shape()) && 137 TensorShapeUtils::IsMatrix(b_indices_t->shape()), 138 errors::InvalidArgument("Inputs a_indices and b_indices should be " 139 "matrices but received shapes: ", 140 a_indices_t->shape().DebugString(), ", ", 141 b_indices_t->shape().DebugString())); 142 OP_REQUIRES(ctx, 143 TensorShapeUtils::IsVector(a_values_t->shape()) && 144 TensorShapeUtils::IsVector(b_values_t->shape()), 145 errors::InvalidArgument( 146 "Inputs a_values and b_values should be vectors " 147 "but received shapes: ", 148 a_values_t->shape().DebugString(), " and ", 149 b_values_t->shape().DebugString())); 150 151 const int64 a_nnz = a_indices_t->dim_size(0); 152 const int64 b_nnz = b_indices_t->dim_size(0); 153 const auto a_values = a_values_t->vec<T>(); 154 const auto b_values = b_values_t->vec<T>(); 155 156 OP_REQUIRES( 157 ctx, a_values.size() == a_nnz && b_values.size() == b_nnz, 158 errors::InvalidArgument("Expected ", a_nnz, " and ", b_nnz, 159 " non-empty input values, got ", 160 a_values.size(), " and ", b_values.size())); 161 162 OP_REQUIRES(ctx, 163 TensorShapeUtils::IsVector(a_shape_t->shape()) && 164 TensorShapeUtils::IsVector(b_shape_t->shape()), 165 errors::InvalidArgument( 166 "Input shapes should be a vector but received shapes ", 167 a_shape_t->shape().DebugString(), " and ", 168 b_shape_t->shape().DebugString())); 169 OP_REQUIRES(ctx, a_shape_t->IsSameSize(*b_shape_t), 170 errors::InvalidArgument( 171 "Operands do not have the same ranks; got shapes: ", 172 a_shape_t->SummarizeValue(10), " and ", 173 b_shape_t->SummarizeValue(10))); 174 const auto a_shape = a_shape_t->flat<int64>(); 175 const auto b_shape = b_shape_t->flat<int64>(); 176 for (int i = 0; i < a_shape_t->NumElements(); ++i) { 177 OP_REQUIRES(ctx, a_shape(i) == b_shape(i), 178 errors::InvalidArgument("Operands' shapes do not match: got ", 179 a_shape(i), " and ", b_shape(i), 180 " for dimension ", i)); 181 } 182 183 const int num_dims = a_indices_t->dim_size(1); 184 const auto a_indices_mat = a_indices_t->matrix<int64>(); 185 const auto b_indices_mat = b_indices_t->matrix<int64>(); 186 std::vector<T> a_augmented_values, b_augmented_values; 187 std::vector<std::pair<bool, int64>> entries_to_copy; // from_a?, idx 188 UnionSparseIndicesAndValues(a_indices_mat, a_values, a_nnz, b_indices_mat, 189 b_values, b_nnz, num_dims, &a_augmented_values, 190 &b_augmented_values, &entries_to_copy); 191 192 // Allocates and fills output tensors. 193 const int64 sum_nnz = a_augmented_values.size(); 194 Tensor *output_indices_t, *output_values_t; 195 OP_REQUIRES_OK(ctx, 196 ctx->allocate_output(0, TensorShape({sum_nnz, num_dims}), 197 &output_indices_t)); 198 OP_REQUIRES_OK( 199 ctx, ctx->allocate_output(1, TensorShape({sum_nnz}), &output_values_t)); 200 auto output_indices_mat = output_indices_t->matrix<int64>(); 201 202 for (int64 i = 0; i < sum_nnz; ++i) { 203 const bool from_a = entries_to_copy[i].first; 204 const int64 idx = entries_to_copy[i].second; 205 output_indices_mat.chip<0>(i) = 206 from_a ? a_indices_mat.chip<0>(idx) : b_indices_mat.chip<0>(idx); 207 } 208 209 // Performs the functor operation using Eigen. 210 // 211 // Note that the two stack-allocated std::vector's may not be aligned. Using 212 // allocate_temp() would've given us aligned storage, but we do not know 213 // their sizes in advance, so we couldn't use allocate_temp() anyway. 214 // 215 // TODO(zongheng): measure if it's worthwile to somehow force alignment. 216 using UnalignedTensorMap = 217 Eigen::TensorMap<Eigen::Tensor<const T, 1, Eigen::RowMajor>, 218 Eigen::Unaligned>; 219 auto a_augmented_values_t = 220 UnalignedTensorMap(a_augmented_values.data(), sum_nnz); 221 auto b_augmented_values_t = 222 UnalignedTensorMap(b_augmented_values.data(), sum_nnz); 223 output_values_t->flat<T>().device(ctx->eigen_device<Device>()) = 224 a_augmented_values_t.binaryExpr(b_augmented_values_t, 225 typename Functor::func()); 226 } 227 }; 228 229 #define REGISTER_KERNELS(T) \ 230 REGISTER_KERNEL_BUILDER( \ 231 Name("SparseSparseMinimum").Device(DEVICE_CPU).TypeConstraint<T>("T"), \ 232 SparseSparseBinaryOpShared<CPUDevice, T, functor::minimum<T>>) \ 233 \ 234 REGISTER_KERNEL_BUILDER( \ 235 Name("SparseSparseMaximum").Device(DEVICE_CPU).TypeConstraint<T>("T"), \ 236 SparseSparseBinaryOpShared<CPUDevice, T, functor::maximum<T>>) 237 238 TF_CALL_REAL_NUMBER_TYPES(REGISTER_KERNELS); 239 #undef REGISTER_KERNELS 240 241 } // namespace tensorflow 242