1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h" 17 #include "tensorflow/core/framework/allocator.h" 18 #include "tensorflow/core/framework/fake_input.h" 19 #include "tensorflow/core/framework/node_def_builder.h" 20 #include "tensorflow/core/framework/op_kernel.h" 21 #include "tensorflow/core/framework/tensor.h" 22 #include "tensorflow/core/framework/tensor_testutil.h" 23 #include "tensorflow/core/framework/types.h" 24 #include "tensorflow/core/graph/graph.h" 25 #include "tensorflow/core/graph/node_builder.h" 26 #include "tensorflow/core/kernels/ops_testutil.h" 27 #include "tensorflow/core/platform/test.h" 28 #include "tensorflow/core/platform/test_benchmark.h" 29 30 namespace tensorflow { 31 32 namespace { 33 34 static void ExpectHasSubstr(StringPiece s, StringPiece expected) { 35 EXPECT_TRUE(StringPiece(s).contains(expected)) 36 << "'" << s << "' does not contain '" << expected << "'"; 37 } 38 39 class SparseDenseCDivTest : public OpsTestBase { 40 protected: 41 template <typename T> 42 void MakeOp() { 43 DataType value_type = tensorflow::DataTypeToEnum<T>::value; 44 TF_ASSERT_OK(NodeDefBuilder("cdiv", "SparseDenseCwiseDiv") 45 .Input(FakeInput(DT_INT64)) 46 .Input(FakeInput(value_type)) 47 .Input(FakeInput(DT_INT64)) 48 .Input(FakeInput(value_type)) 49 .Attr("T", value_type) 50 .Finalize(node_def())); 51 TF_ASSERT_OK(InitOp()); 52 } 53 }; 54 55 class SparseDenseCMulTest : public OpsTestBase { 56 protected: 57 template <typename T> 58 void MakeOp() { 59 DataType value_type = tensorflow::DataTypeToEnum<T>::value; 60 TF_ASSERT_OK(NodeDefBuilder("cmul", "SparseDenseCwiseMul") 61 .Input(FakeInput(DT_INT64)) 62 .Input(FakeInput(value_type)) 63 .Input(FakeInput(DT_INT64)) 64 .Input(FakeInput(value_type)) 65 .Attr("T", value_type) 66 .Finalize(node_def())); 67 TF_ASSERT_OK(InitOp()); 68 } 69 }; 70 71 TEST_F(SparseDenseCDivTest, DoNotBroadcastSparse_FewerDims) { 72 MakeOp<float>(); 73 // [1] op [2, 1] 74 AddInputFromArray<int64>(TensorShape({1, 1}), {0}); // indices 75 AddInputFromArray<float>(TensorShape({1}), {1618}); // values 76 AddInputFromArray<int64>(TensorShape({1}), {1}); // shape 77 AddInputFromArray<float>(TensorShape({2, 1}), {17, 19}); // dense 78 79 ExpectHasSubstr(RunOpKernel().ToString(), "broadcasts dense to sparse only"); 80 } 81 82 TEST_F(SparseDenseCDivTest, DoNotBroadcastSparse_SameDims) { 83 MakeOp<float>(); 84 // [1, 1] op [2, 1] 85 AddInputFromArray<int64>(TensorShape({1, 2}), {0, 0}); 86 AddInputFromArray<float>(TensorShape({1}), {1618}); 87 AddInputFromArray<int64>(TensorShape({2}), {1, 1}); 88 AddInputFromArray<float>(TensorShape({2, 1}), {17, 19}); 89 90 ExpectHasSubstr(RunOpKernel().ToString(), "broadcasts dense to sparse only"); 91 } 92 93 TEST_F(SparseDenseCDivTest, SameShape) { 94 MakeOp<float>(); 95 // [ 1] 96 // [2 ] cdiv [dense: same shape, all 1's] 97 // [3 4] 98 const auto indices_shape = TensorShape({4, 2}); 99 std::initializer_list<int64> in{0, 1, 1, 0, 2, 0, 2, 1}; 100 const gtl::ArraySlice<int64> indices(in); 101 std::initializer_list<int64> sh{3, 2}; 102 const gtl::ArraySlice<int64> shape(sh); 103 104 // Tensor dense(DT_FLOAT, TensorShape({3, 1})); 105 Tensor dense(DT_FLOAT, TensorShape(shape)); 106 auto dense_flat = dense.flat<float>(); 107 dense_flat.setConstant(1.); 108 109 AddInputFromArray<int64>(indices_shape, indices); 110 AddInputFromArray<float>(TensorShape({4}), {1, 2, 3, 4}); 111 AddInputFromArray<int64>(TensorShape({2}), shape); 112 AddInputFromArray<float>(TensorShape(shape), dense_flat); 113 114 TF_ASSERT_OK(RunOpKernel()); 115 116 Tensor expected(allocator(), DT_FLOAT, TensorShape({4})); 117 test::FillValues<float>(&expected, {1, 2, 3, 4}); 118 test::ExpectTensorEqual<float>(expected, *GetOutput(0)); 119 } 120 121 TEST_F(SparseDenseCDivTest, BroadcastDenseSameDims) { 122 // No broadcast. 123 MakeOp<float>(); 124 // [ 1] 125 // [2 ] cdiv [dense: shape [3,1], all 1's] 126 // [3 4] 127 const auto indices_shape = TensorShape({4, 2}); 128 std::initializer_list<int64> in{0, 1, 1, 0, 2, 0, 2, 1}; 129 const gtl::ArraySlice<int64> indices(in); 130 std::initializer_list<int64> sh{3, 2}; 131 const gtl::ArraySlice<int64> shape(sh); 132 133 Tensor dense(DT_FLOAT, TensorShape({3, 1})); 134 auto dense_flat = dense.flat<float>(); 135 dense_flat.setConstant(1.); 136 137 AddInputFromArray<int64>(indices_shape, indices); 138 AddInputFromArray<float>(TensorShape({4}), {1, 2, 3, 4}); 139 AddInputFromArray<int64>(TensorShape({2}), shape); 140 AddInputFromArray<float>(TensorShape({3, 1}), dense_flat); 141 142 TF_ASSERT_OK(RunOpKernel()); 143 144 Tensor expected(allocator(), DT_FLOAT, TensorShape({4})); 145 test::FillValues<float>(&expected, {1, 2, 3, 4}); 146 test::ExpectTensorEqual<float>(expected, *GetOutput(0)); 147 } 148 149 TEST_F(SparseDenseCDivTest, BroadcastDenseFewerDims) { 150 MakeOp<float>(); 151 // [ 1] 152 // [2 ] cdiv [dense: shape [2]] 153 // [3 4] 154 const auto indices_shape = TensorShape({4, 2}); 155 std::initializer_list<int64> in{0, 1, 1, 0, 2, 0, 2, 1}; 156 const gtl::ArraySlice<int64> indices(in); 157 std::initializer_list<int64> sh{3, 2}; 158 const gtl::ArraySlice<int64> shape(sh); 159 160 Tensor dense(DT_FLOAT, TensorShape({2})); 161 auto dense_flat = dense.flat<float>(); 162 dense_flat.setConstant(1.); 163 164 AddInputFromArray<int64>(indices_shape, indices); 165 AddInputFromArray<float>(TensorShape({4}), {1, 2, 3, 4}); 166 AddInputFromArray<int64>(TensorShape({2}), shape); 167 AddInputFromArray<float>(TensorShape({2}), dense_flat); 168 169 TF_ASSERT_OK(RunOpKernel()); 170 171 Tensor expected(allocator(), DT_FLOAT, TensorShape({4})); 172 test::FillValues<float>(&expected, {1, 2, 3, 4}); 173 test::ExpectTensorEqual<float>(expected, *GetOutput(0)); 174 } 175 176 TEST_F(SparseDenseCMulTest, BroadcastDense) { 177 MakeOp<float>(); 178 // [ 1] 179 // [2 ] (shape [3,2]) cmul [0.5 0] (shape [2]) 180 // [3 4] 181 // 182 // Result: 183 // [? 0] 184 // [1 ?] where ? remains implicitly zero. 185 // [1.5 0] 186 const auto indices_shape = TensorShape({4, 2}); 187 std::initializer_list<int64> in{0, 1, 1, 0, 2, 0, 2, 1}; 188 const gtl::ArraySlice<int64> indices(in); 189 std::initializer_list<int64> sh{3, 2}; 190 const gtl::ArraySlice<int64> shape(sh); 191 192 Tensor dense(DT_FLOAT, TensorShape({2})); 193 auto dense_flat = dense.flat<float>(); 194 dense_flat(0) = 0.5; 195 dense_flat(1) = 0; 196 197 AddInputFromArray<int64>(indices_shape, indices); 198 AddInputFromArray<float>(TensorShape({4}), {1, 2, 3, 4}); 199 AddInputFromArray<int64>(TensorShape({2}), shape); 200 AddInputFromArray<float>(TensorShape({2}), dense_flat); 201 202 TF_ASSERT_OK(RunOpKernel()); 203 204 Tensor expected(allocator(), DT_FLOAT, TensorShape({4})); 205 test::FillValues<float>(&expected, {0, 1, 1.5, 0}); 206 test::ExpectTensorEqual<float>(expected, *GetOutput(0)); 207 } 208 209 // Benchmarking code follows. 210 211 static Graph* SparseMatCMulDenseMat(Graph* g, Node* sp_indices, Node* sp_vals, 212 Node* sp_shape, Node* dense) { 213 Node* ret; 214 TF_CHECK_OK( 215 NodeBuilder(g->NewName("SparseDenseCwiseMul"), "SparseDenseCwiseMul") 216 .Input(sp_indices) 217 .Input(sp_vals) 218 .Input(sp_shape) 219 .Input(dense) 220 .Finalize(g, &ret)); 221 return g; 222 } 223 224 static Node* MakeTensor(Graph* g, int B, int M, int N) { 225 Tensor data(DT_FLOAT, TensorShape({B, M, N})); 226 data.flat<float>().setRandom(); 227 return test::graph::Constant(g, data); 228 } 229 230 struct ST { 231 Node* indices; 232 Node* vals; 233 Node* shape; 234 }; 235 236 static ST MakeSparseTensor(Graph* g, int B, int M, int N, int nnz_inner) { 237 const int total_nnz = B * M * nnz_inner; 238 const int kNumDims = 3; 239 240 Tensor indices(DT_INT64, TensorShape({total_nnz, kNumDims})); 241 Tensor vals(DT_FLOAT, TensorShape({total_nnz})); 242 Tensor shape(DT_INT64, TensorShape({kNumDims})); 243 vals.flat<float>().setRandom(); 244 test::FillValues(&shape, gtl::ArraySlice<int64>({B, M, N})); 245 auto indices_mat = indices.matrix<int64>(); 246 247 int nnz_cnt = 0; 248 std::unordered_set<int> picked; 249 std::random_device rd; 250 std::mt19937 gen(rd()); 251 std::uniform_int_distribution<> dist(0, N - 1); 252 253 for (int i = 0; i < B; ++i) { 254 for (int j = 0; j < M; ++j) { 255 for (int k = 0; k < nnz_inner; ++k) { 256 indices_mat(nnz_cnt, 0) = i; 257 indices_mat(nnz_cnt, 1) = j; 258 259 int inner = dist(gen); 260 while (picked.count(inner) == 1) { 261 inner = dist(gen); 262 } 263 picked.insert(inner); 264 indices_mat(nnz_cnt, 2) = inner; 265 266 ++nnz_cnt; 267 } 268 } 269 } 270 271 return ST{test::graph::Constant(g, indices), test::graph::Constant(g, vals), 272 test::graph::Constant(g, shape)}; 273 } 274 275 // [8, 4, N{nnz}] cmul [8, 4, N] 276 #define BM_SparseMatCMulDenseMatArgs(N, NNZ_INNER) \ 277 static void BM_SparseMatCMulDenseMat_##N##_##NNZ_INNER(int iters) { \ 278 Graph* g = new Graph(OpRegistry::Global()); \ 279 Node* dense = MakeTensor(g, 8, 4, N); \ 280 ST sp = MakeSparseTensor(g, 8, 4, N, NNZ_INNER); \ 281 \ 282 testing::ItemsProcessed(static_cast<int64>(iters * 8 * 4 * N * 2)); \ 283 test::Benchmark( \ 284 "cpu", SparseMatCMulDenseMat(g, sp.indices, sp.vals, sp.shape, dense)) \ 285 .Run(iters); \ 286 } \ 287 BENCHMARK(BM_SparseMatCMulDenseMat_##N##_##NNZ_INNER) 288 289 BM_SparseMatCMulDenseMatArgs(1048576, 1); 290 BM_SparseMatCMulDenseMatArgs(1048576, 8); 291 BM_SparseMatCMulDenseMatArgs(1048576, 32); 292 BM_SparseMatCMulDenseMatArgs(262144, 1); 293 BM_SparseMatCMulDenseMatArgs(262144, 8); 294 BM_SparseMatCMulDenseMatArgs(262144, 32); 295 296 } // namespace 297 298 } // namespace tensorflow 299