1 # Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 # 3 # Licensed under the Apache License, Version 2.0 (the "License"); 4 # you may not use this file except in compliance with the License. 5 # You may obtain a copy of the License at 6 # 7 # http://www.apache.org/licenses/LICENSE-2.0 8 # 9 # Unless required by applicable law or agreed to in writing, software 10 # distributed under the License is distributed on an "AS IS" BASIS, 11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 # See the License for the specific language governing permissions and 13 # limitations under the License. 14 # ============================================================================== 15 """Tests for sparse_ops.sparse_tensor_dense_matmul.""" 16 17 from __future__ import absolute_import 18 from __future__ import division 19 from __future__ import print_function 20 21 import sys 22 import time 23 24 import numpy as np 25 26 from tensorflow.core.protobuf import config_pb2 27 from tensorflow.python.client import session 28 from tensorflow.python.framework import constant_op 29 from tensorflow.python.framework import dtypes 30 from tensorflow.python.framework import ops 31 from tensorflow.python.framework import sparse_tensor 32 from tensorflow.python.framework import tensor_shape 33 from tensorflow.python.ops import array_ops 34 from tensorflow.python.ops import control_flow_ops 35 from tensorflow.python.ops import math_ops 36 from tensorflow.python.ops import sparse_ops 37 from tensorflow.python.platform import app 38 from tensorflow.python.platform import test 39 40 41 def _maybe_complex(x): 42 if x.dtype.kind == "c": # complex 43 return (x + 1j * x) / 2 44 return x 45 46 47 class SparseTensorDenseMatMulTest(test.TestCase): 48 49 def _testMatmul(self, 50 x, 51 y, 52 adjoint_a=False, 53 adjoint_b=False, 54 indices_dtype=np.int64): 55 x_mat = np.matrix(x) 56 if adjoint_a: 57 x_mat = x_mat.H 58 y_mat = np.matrix(y) 59 if adjoint_b: 60 y_mat = y_mat.H 61 62 np_ans = x_mat * y_mat 63 64 x_indices = np.vstack(np.where(x)).astype(indices_dtype).T 65 x_values = x[np.where(x)] 66 x_shape = x.shape 67 68 with self.test_session(use_gpu=True): 69 sp_x_value = sparse_tensor.SparseTensorValue( 70 indices=x_indices, values=x_values, dense_shape=x_shape) 71 tf_value_ans = sparse_ops.sparse_tensor_dense_matmul( 72 sp_x_value, y, adjoint_a=adjoint_a, adjoint_b=adjoint_b) 73 tf_tensor_ans = sparse_ops.sparse_tensor_dense_matmul( 74 sparse_tensor.SparseTensor.from_value(sp_x_value), 75 y, 76 adjoint_a=adjoint_a, 77 adjoint_b=adjoint_b) 78 79 # Ensure that the RHS shape is known at least. 80 self.assertEqual(tf_value_ans.get_shape()[1], np_ans.shape[1]) 81 self.assertEqual(tf_tensor_ans.get_shape()[1], np_ans.shape[1]) 82 83 for out in (tf_value_ans.eval(), tf_tensor_ans.eval()): 84 if x.dtype == np.float32: 85 self.assertAllClose(np_ans, out, rtol=1e-4, atol=1e-4) 86 elif x.dtype == np.float64: 87 self.assertAllClose(np_ans, out, rtol=1e-6, atol=1e-6) 88 else: 89 self.assertAllClose(np_ans, out, rtol=1e-4, atol=1e-4) 90 91 def _testBasic(self, value_dtype, indices_dtype=np.int64): 92 x = _maybe_complex(np.random.rand(10, 10).astype(value_dtype)) 93 x[np.abs(x) < 0.5] = 0 # Make it sparse 94 95 y = _maybe_complex(np.random.randn(10, 20).astype(value_dtype)) 96 97 self._testMatmul(x, y, indices_dtype=indices_dtype) 98 99 def testBasic(self): 100 np.random.seed(127) # Repeatable results 101 self._testBasic(np.int32) 102 self._testBasic(np.float32) 103 self._testBasic(np.float64) 104 self._testBasic(np.complex64) 105 self._testBasic(np.complex128) 106 self._testBasic(np.int32, indices_dtype=np.int32) 107 self._testBasic(np.float32, indices_dtype=np.int32) 108 109 def testShapeInference(self): 110 x = np.random.rand(10, 10) 111 x[np.abs(x) < 0.5] = 0 # Make it sparse 112 y = np.random.randn(10, 20) 113 x_indices = np.vstack(np.where(x)).astype(np.int64).T 114 x_values = x[np.where(x)] 115 x_shape = x.shape 116 x_st = sparse_tensor.SparseTensor(x_indices, x_values, x_shape) 117 result = sparse_ops.sparse_tensor_dense_matmul(x_st, y) 118 self.assertEqual(result.get_shape(), (10, 20)) 119 120 x_shape_unknown = array_ops.placeholder(dtype=dtypes.int64, shape=None) 121 x_st_shape_unknown = sparse_tensor.SparseTensor(x_indices, x_values, 122 x_shape_unknown) 123 result_left_shape_unknown = sparse_ops.sparse_tensor_dense_matmul( 124 x_st_shape_unknown, y) 125 self.assertEqual(result_left_shape_unknown.get_shape().as_list(), 126 [None, 20]) 127 128 x_shape_inconsistent = [10, 15] 129 x_st_shape_inconsistent = sparse_tensor.SparseTensor(x_indices, x_values, 130 x_shape_inconsistent) 131 with self.assertRaisesRegexp(ValueError, "Dimensions must be equal"): 132 sparse_ops.sparse_tensor_dense_matmul(x_st_shape_inconsistent, y) 133 134 def testInvalidIndicesForSparseTensorDenseMatmul(self): 135 # Note: use_gpu=False because nice errors are only returned from CPU kernel. 136 with self.test_session(use_gpu=False): 137 indices = np.matrix([[1, 10]]).astype(np.int64) 138 values = np.array([10]).astype(np.float32) 139 shape = [3, 2] 140 sparse_t = sparse_tensor.SparseTensor(indices, values, shape) 141 142 # Test multiplying by both a small and large dense matrix, to hit 143 # both cases in the kernel. 144 dense_t = np.matrix([[1] * 5, [2] * 5], dtype=np.float32) 145 with self.assertRaisesOpError( 146 "k .10. from index.0,1. out of bounds .>=2."): 147 sparse_ops.sparse_tensor_dense_matmul(sparse_t, dense_t).eval() 148 dense_t = np.matrix([[1] * 500, [2] * 500], dtype=np.float32) 149 with self.assertRaisesOpError( 150 "k .10. from index.0,1. out of bounds .>=2."): 151 sparse_ops.sparse_tensor_dense_matmul(sparse_t, dense_t).eval() 152 153 # Repeat with adjoint_a, to get a different error. 154 dense_t = np.matrix([[1] * 5, [2] * 5, [3] * 5], dtype=np.float32) 155 with self.assertRaisesOpError( 156 "m .10. from index.0,1. out of bounds .>=2."): 157 sparse_ops.sparse_tensor_dense_matmul( 158 sparse_t, dense_t, adjoint_a=True).eval() 159 dense_t = np.matrix([[1] * 500, [2] * 500, [3] * 500], dtype=np.float32) 160 with self.assertRaisesOpError( 161 "m .10. from index.0,1. out of bounds .>=2."): 162 sparse_ops.sparse_tensor_dense_matmul( 163 sparse_t, dense_t, adjoint_a=True).eval() 164 165 def testInvalidIndicesForSparseTensorDenseMatmulOnGPU(self): 166 # Note: use_gpu=False because nice errors are only returned from CPU kerne 167 if not test.is_gpu_available(): 168 return 169 with self.test_session(use_gpu=True): 170 indices = np.array([[1, 10]]).astype(np.int64) 171 values = np.array([10]).astype(np.float32) 172 shape = [3, 2] 173 sparse_t = sparse_tensor.SparseTensor(indices, values, shape) 174 175 # Test multiplying by both a small and large dense matrix, to hit 176 # both cases in the kernel. 177 dense_t = np.matrix([[1] * 5, [2] * 5], dtype=np.float32) 178 expected_t = np.array([[0] * 5, [np.nan] * 5, [0] * 5], dtype=np.float32) 179 self.assertAllClose(expected_t, 180 sparse_ops.sparse_tensor_dense_matmul( 181 sparse_t, dense_t).eval()) 182 dense_t = np.matrix([[1] * 500, [2] * 500], dtype=np.float32) 183 expected_t = np.array( 184 [[0] * 500, [np.nan] * 500, [0] * 500], dtype=np.float32) 185 self.assertAllClose(expected_t, 186 sparse_ops.sparse_tensor_dense_matmul( 187 sparse_t, dense_t).eval()) 188 189 # Repeat with adjoint_a, now the error is that the sparse index 190 # is OOO w.r.t. the output. The GPU kernel can't do much here, 191 # so it just doesn't accumulate. 192 193 dense_t = np.matrix([[1] * 5, [2] * 5, [3] * 5], dtype=np.float32) 194 expected_t = np.array([[0] * 5, [0] * 5], dtype=np.float32) 195 self.assertAllClose(expected_t, 196 sparse_ops.sparse_tensor_dense_matmul( 197 sparse_t, dense_t, adjoint_a=True).eval()) 198 199 dense_t = np.matrix([[1] * 500, [2] * 500, [3] * 500], dtype=np.float32) 200 expected_t = np.array([[0] * 500, [0] * 500], dtype=np.float32) 201 self.assertAllClose(expected_t, 202 sparse_ops.sparse_tensor_dense_matmul( 203 sparse_t, dense_t, adjoint_a=True).eval()) 204 205 # Tests setting one dimension to be a high value. 206 def _testLarge(self, np_dtype): 207 r1 = np.random.randint(6000, 20000) 208 r2 = np.random.randint(1, 10) 209 r3 = np.random.randint(1, 10) 210 211 for m, k, n in [(r1, r2, r3), 212 (r2, r1, r3), 213 (r2, r3, r1)]: 214 x = _maybe_complex(np.random.rand(m, k).astype(np_dtype)) 215 x[np.abs(x) < 0.8] = 0 216 217 y = _maybe_complex(np.random.randn(k, n).astype(np_dtype)) 218 219 self._testMatmul(x, y, adjoint_a=False, adjoint_b=False) 220 self._testMatmul(x.transpose(), y, adjoint_a=True, adjoint_b=False) 221 self._testMatmul(x, y.transpose(), adjoint_a=False, adjoint_b=True) 222 self._testMatmul( 223 x.transpose(), y.transpose(), adjoint_a=True, adjoint_b=True) 224 225 np.random.seed(127) # Repeatable results 226 self._testLarge(np.float32) 227 self._testLarge(np.float64) 228 self._testLarge(np.complex64) 229 self._testLarge(np.complex128) 230 231 # Tests random sized matrices. 232 def testFloatRandom(self): 233 np.random.seed(127) # Repeatable results 234 for _ in range(8): 235 for adjoint_a in [True, False]: 236 for adjoint_b in [True, False]: 237 for thresh in [0.0, 0.2, 0.8, 1.0]: 238 n, k, m = np.random.randint(1, 100, size=3) 239 x = np.random.rand(n, k).astype(np.float32) 240 x[x < thresh] = 0 # Make it sparse 241 y = np.random.randn(k, m).astype(np.float32) 242 x = x.transpose() if adjoint_a else x 243 y = y.transpose() if adjoint_b else y 244 self._testMatmul(x, y, adjoint_a, adjoint_b) 245 246 247 def _sparse_tensor_dense_vs_dense_matmul_benchmark_dense(x, y, adjoint_a, 248 adjoint_b): 249 250 def body(t, prev): 251 with ops.control_dependencies([prev]): 252 return (t + 1, math_ops.matmul( 253 x, 254 y, 255 transpose_a=adjoint_a, 256 transpose_b=adjoint_b, 257 a_is_sparse=True, 258 b_is_sparse=False)) 259 260 t0 = constant_op.constant(0) 261 v0 = constant_op.constant(0.0) 262 263 def _timeit(iterations, _): 264 (_, final) = control_flow_ops.while_loop( 265 lambda t, _: t < iterations, 266 body, (t0, v0), 267 parallel_iterations=1, 268 back_prop=False, 269 shape_invariants=(tensor_shape.TensorShape(()), 270 tensor_shape.TensorShape(None))) 271 return [final] 272 273 return _timeit 274 275 276 def _sparse_tensor_dense_vs_dense_matmul_benchmark_sparse(x_ind, x_val, x_shape, 277 y, adjoint_a, 278 adjoint_b): 279 sp_x = sparse_tensor.SparseTensor( 280 indices=x_ind, values=x_val, dense_shape=x_shape) 281 282 def body(t, prev): 283 with ops.control_dependencies([prev]): 284 return (t + 1, sparse_ops.sparse_tensor_dense_matmul( 285 sp_x, y, adjoint_a=adjoint_a, adjoint_b=adjoint_b)) 286 287 t0 = constant_op.constant(0) 288 v0 = constant_op.constant(0.0) 289 290 def _timeit(iterations, _): 291 (_, final) = control_flow_ops.while_loop( 292 lambda t, _: t < iterations, 293 body, (t0, v0), 294 parallel_iterations=1, 295 back_prop=False, 296 shape_invariants=(tensor_shape.TensorShape(()), 297 tensor_shape.TensorShape(None))) 298 return [final] 299 300 return _timeit 301 302 303 def sparse_tensor_dense_vs_dense_matmul_benchmark(thresh, 304 m, 305 k, 306 n, 307 adjoint_a, 308 adjoint_b, 309 use_gpu, 310 skip_dense=False): 311 config = config_pb2.ConfigProto() 312 config.allow_soft_placement = True 313 314 # Configurable for benchmarking: 315 # config.intra_op_parallelism_threads = 100 316 # config.gpu_options.per_process_gpu_memory_fraction = 0.3 317 318 np.random.seed([6, 117]) # Reproducibility 319 x = np.random.rand(m, k).astype(np.float32) 320 x[x < thresh] = 0 321 y = np.random.randn(k, n).astype(np.float32) 322 if adjoint_a: 323 x = x.T 324 if adjoint_b: 325 y = y.T 326 327 def _timer(sess, ops_fn, iterations): 328 # Warm in 329 sess.run(ops_fn(10, sess)) 330 331 # Timing run 332 start = time.time() 333 sess.run(ops_fn(iterations, sess)) 334 end = time.time() 335 336 return (end - start) / (1.0 * iterations) # Average runtime per iteration 337 338 # Using regular matmul, marking one of the matrices as dense. 339 if skip_dense: 340 delta_dense = float("nan") 341 else: 342 with session.Session(config=config, graph=ops.Graph()) as sess: 343 if not use_gpu: 344 with ops.device("/cpu:0"): 345 x_t = constant_op.constant(x) 346 y_t = constant_op.constant(y) 347 ops_fn = _sparse_tensor_dense_vs_dense_matmul_benchmark_dense( 348 x_t, y_t, adjoint_a, adjoint_b) 349 else: 350 with ops.device("/device:GPU:0"): 351 x_t = constant_op.constant(x) 352 y_t = constant_op.constant(y) 353 ops_fn = _sparse_tensor_dense_vs_dense_matmul_benchmark_dense( 354 x_t, y_t, adjoint_a, adjoint_b) 355 delta_dense = _timer(sess, ops_fn, 200) 356 357 # Using sparse_tensor_dense_matmul. 358 with session.Session("", config=config, graph=ops.Graph()) as sess: 359 if not use_gpu: 360 with ops.device("/cpu:0"): 361 x_ind = constant_op.constant(np.vstack(np.where(x)).astype(np.int64).T) 362 x_val = constant_op.constant(x[np.where(x)]) 363 x_shape = constant_op.constant(np.array(x.shape).astype(np.int64)) 364 y_t = constant_op.constant(y) 365 ops_fn = _sparse_tensor_dense_vs_dense_matmul_benchmark_sparse( 366 x_ind, x_val, x_shape, y_t, adjoint_a, adjoint_b) 367 else: 368 with ops.device("/device:GPU:0"): 369 x_ind = constant_op.constant(np.vstack(np.where(x)).astype(np.int64).T) 370 x_val = constant_op.constant(x[np.where(x)]) 371 x_shape = constant_op.constant(np.array(x.shape).astype(np.int64)) 372 y_t = constant_op.constant(y) 373 ops_fn = _sparse_tensor_dense_vs_dense_matmul_benchmark_sparse( 374 x_ind, x_val, x_shape, y_t, adjoint_a, adjoint_b) 375 delta_sparse = _timer(sess, ops_fn, 200) 376 377 print("%g \t %d \t %s \t %d \t %d \t %g \t %g \t %g" % 378 (1 - thresh, n, use_gpu, m, k, delta_dense, delta_sparse, 379 delta_sparse / delta_dense)) 380 381 382 def main(_): 383 print("DenseDense MatMul (w/ Sparse Flag) vs. SparseTensorDense MatMul") 384 print("Matrix sizes:") 385 print(" A sparse [m, k] with % nonzero values between 1% and 80%") 386 print(" B dense [k, n]") 387 print("") 388 print("% nnz \t n \t gpu \t m \t k \t dt(dense) \t dt(sparse) " 389 "\t dt(sparse)/dt(dense)") 390 391 for thresh in (0.99, 0.8, 0.5, 0.2): 392 for n in (50, 100): 393 for use_gpu in (True, False): 394 for m in (100, 1000): 395 for k in (100, 1000): 396 sparse_tensor_dense_vs_dense_matmul_benchmark( 397 thresh, m, k, n, False, False, use_gpu=use_gpu) 398 399 # Enable for large scale benchmarks, these ones take a long time to run. 400 # 401 # for use_gpu in (True, False): 402 # sparse_tensor_dense_vs_dense_matmul_benchmark( 403 # thresh=0.99, m=1000000, k=1000, n=100, adjoint_a=False, 404 # adjoint_b=False, use_gpu=use_gpu, skip_dense=True) 405 406 407 if __name__ == "__main__": 408 if "--benchmarks" in sys.argv: 409 sys.argv.remove("--benchmarks") 410 app.run() 411 else: 412 test.main() 413