1 # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 # 3 # Licensed under the Apache License, Version 2.0 (the "License"); 4 # you may not use this file except in compliance with the License. 5 # You may obtain a copy of the License at 6 # 7 # http://www.apache.org/licenses/LICENSE-2.0 8 # 9 # Unless required by applicable law or agreed to in writing, software 10 # distributed under the License is distributed on an "AS IS" BASIS, 11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 # See the License for the specific language governing permissions and 13 # limitations under the License. 14 # ============================================================================== 15 """Tests for sparse_cross_op.""" 16 17 from __future__ import absolute_import 18 from __future__ import division 19 from __future__ import print_function 20 21 import numpy 22 23 from tensorflow.python.client import session 24 from tensorflow.python.framework import constant_op 25 from tensorflow.python.framework import dtypes 26 from tensorflow.python.framework import sparse_tensor 27 from tensorflow.python.ops import sparse_ops 28 from tensorflow.python.platform import test 29 30 31 class SparseCrossOpTest(test.TestCase): 32 33 def test_simple(self): 34 """Tests a simple scenario.""" 35 op = sparse_ops._sparse_cross([ 36 self._sparse_tensor([['batch1-FC1-F1'], 37 ['batch2-FC1-F1', 'batch2-FC1-F2']]), 38 self._sparse_tensor([['batch1-FC2-F1'], 39 ['batch2-FC2-F1', 'batch2-FC2-F2']]) 40 ]) 41 expected_out = self._sparse_tensor([['batch1-FC1-F1_X_batch1-FC2-F1'], [ 42 'batch2-FC1-F1_X_batch2-FC2-F1', 'batch2-FC1-F1_X_batch2-FC2-F2', 43 'batch2-FC1-F2_X_batch2-FC2-F1', 'batch2-FC1-F2_X_batch2-FC2-F2' 44 ]]) 45 with self.test_session() as sess: 46 self._assert_sparse_tensor_equals(expected_out, sess.run(op)) 47 48 def test_dense(self): 49 """Tests only dense inputs.""" 50 op = sparse_ops._sparse_cross([ 51 constant_op.constant([['batch1-FC1-F1', 'batch1-FC1-F2'], 52 ['batch2-FC1-F1', 'batch2-FC1-F2']], 53 dtypes.string), 54 constant_op.constant([['batch1-FC2-F1', 'batch1-FC2-F2'], 55 ['batch2-FC2-F1', 'batch2-FC2-F2']], 56 dtypes.string), 57 ]) 58 expected_out = self._sparse_tensor([[ 59 'batch1-FC1-F1_X_batch1-FC2-F1', 'batch1-FC1-F1_X_batch1-FC2-F2', 60 'batch1-FC1-F2_X_batch1-FC2-F1', 'batch1-FC1-F2_X_batch1-FC2-F2' 61 ], [ 62 'batch2-FC1-F1_X_batch2-FC2-F1', 'batch2-FC1-F1_X_batch2-FC2-F2', 63 'batch2-FC1-F2_X_batch2-FC2-F1', 'batch2-FC1-F2_X_batch2-FC2-F2' 64 ]]) 65 with self.test_session() as sess: 66 self._assert_sparse_tensor_equals(expected_out, sess.run(op)) 67 68 def test_integer_mixed_string_sparse(self): 69 """Tests mixed type.""" 70 op = sparse_ops._sparse_cross([ 71 self._sparse_tensor([[11], [333, 55555]]), 72 self._sparse_tensor([['batch1-FC2-F1'], 73 ['batch2-FC2-F1', 'batch2-FC2-F2']]) 74 ]) 75 expected_out = self._sparse_tensor([['11_X_batch1-FC2-F1'], [ 76 '333_X_batch2-FC2-F1', '333_X_batch2-FC2-F2', '55555_X_batch2-FC2-F1', 77 '55555_X_batch2-FC2-F2' 78 ]]) 79 with self.test_session() as sess: 80 self._assert_sparse_tensor_equals(expected_out, sess.run(op)) 81 82 def test_integer_mixed_string_dense(self): 83 """Tests mixed dense inputs.""" 84 op = sparse_ops._sparse_cross([ 85 constant_op.constant([[11, 333], [55555, 999999]], dtypes.int64), 86 constant_op.constant([['batch1-FC2-F1', 'batch1-FC2-F2'], 87 ['batch2-FC2-F1', 'batch2-FC2-F2']], 88 dtypes.string), 89 ]) 90 expected_out = self._sparse_tensor([[ 91 '11_X_batch1-FC2-F1', '11_X_batch1-FC2-F2', '333_X_batch1-FC2-F1', 92 '333_X_batch1-FC2-F2' 93 ], [ 94 '55555_X_batch2-FC2-F1', '55555_X_batch2-FC2-F2', 95 '999999_X_batch2-FC2-F1', '999999_X_batch2-FC2-F2' 96 ]]) 97 with self.test_session() as sess: 98 self._assert_sparse_tensor_equals(expected_out, sess.run(op)) 99 100 def test_sparse_cross_dense(self): 101 """Tests sparse and dense inputs.""" 102 op = sparse_ops._sparse_cross([ 103 self._sparse_tensor([['batch1-FC1-F1'], 104 ['batch2-FC1-F1', 'batch2-FC1-F2']]), 105 constant_op.constant([['batch1-FC2-F1', 'batch1-FC2-F2'], 106 ['batch2-FC2-F1', 'batch2-FC2-F2']], 107 dtypes.string), 108 ]) 109 expected_out = self._sparse_tensor( 110 [['batch1-FC1-F1_X_batch1-FC2-F1', 'batch1-FC1-F1_X_batch1-FC2-F2'], [ 111 'batch2-FC1-F1_X_batch2-FC2-F1', 'batch2-FC1-F1_X_batch2-FC2-F2', 112 'batch2-FC1-F2_X_batch2-FC2-F1', 'batch2-FC1-F2_X_batch2-FC2-F2' 113 ]]) 114 with self.test_session() as sess: 115 self._assert_sparse_tensor_equals(expected_out, sess.run(op)) 116 117 def test_integer_sparse_input(self): 118 """Tests mixed type sparse and dense inputs.""" 119 op = sparse_ops._sparse_cross([ 120 self._sparse_tensor([[11], [333, 5555]]), 121 constant_op.constant([['batch1-FC2-F1', 'batch1-FC2-F2'], 122 ['batch2-FC2-F1', 'batch2-FC2-F2']], 123 dtypes.string), 124 ]) 125 expected_out = self._sparse_tensor( 126 [['11_X_batch1-FC2-F1', '11_X_batch1-FC2-F2'], [ 127 '333_X_batch2-FC2-F1', '333_X_batch2-FC2-F2', 128 '5555_X_batch2-FC2-F1', '5555_X_batch2-FC2-F2' 129 ]]) 130 with self.test_session() as sess: 131 self._assert_sparse_tensor_equals(expected_out, sess.run(op)) 132 133 def test_permutation_3x3x3(self): 134 """Tests 3x3x3 permutation.""" 135 op = sparse_ops._sparse_cross([ 136 self._sparse_tensor( 137 [['batch1-FC1-F1', 'batch1-FC1-F2', 'batch1-FC1-F3']]), 138 self._sparse_tensor( 139 [['batch1-FC2-F1', 'batch1-FC2-F2', 'batch1-FC2-F3']]), 140 self._sparse_tensor( 141 [['batch1-FC3-F1', 'batch1-FC3-F2', 'batch1-FC3-F3']]) 142 ]) 143 expected_out = self._sparse_tensor([[ 144 'batch1-FC1-F1_X_batch1-FC2-F1_X_batch1-FC3-F1', 145 'batch1-FC1-F1_X_batch1-FC2-F1_X_batch1-FC3-F2', 146 'batch1-FC1-F1_X_batch1-FC2-F1_X_batch1-FC3-F3', 147 'batch1-FC1-F1_X_batch1-FC2-F2_X_batch1-FC3-F1', 148 'batch1-FC1-F1_X_batch1-FC2-F2_X_batch1-FC3-F2', 149 'batch1-FC1-F1_X_batch1-FC2-F2_X_batch1-FC3-F3', 150 'batch1-FC1-F1_X_batch1-FC2-F3_X_batch1-FC3-F1', 151 'batch1-FC1-F1_X_batch1-FC2-F3_X_batch1-FC3-F2', 152 'batch1-FC1-F1_X_batch1-FC2-F3_X_batch1-FC3-F3', 153 'batch1-FC1-F2_X_batch1-FC2-F1_X_batch1-FC3-F1', 154 'batch1-FC1-F2_X_batch1-FC2-F1_X_batch1-FC3-F2', 155 'batch1-FC1-F2_X_batch1-FC2-F1_X_batch1-FC3-F3', 156 'batch1-FC1-F2_X_batch1-FC2-F2_X_batch1-FC3-F1', 157 'batch1-FC1-F2_X_batch1-FC2-F2_X_batch1-FC3-F2', 158 'batch1-FC1-F2_X_batch1-FC2-F2_X_batch1-FC3-F3', 159 'batch1-FC1-F2_X_batch1-FC2-F3_X_batch1-FC3-F1', 160 'batch1-FC1-F2_X_batch1-FC2-F3_X_batch1-FC3-F2', 161 'batch1-FC1-F2_X_batch1-FC2-F3_X_batch1-FC3-F3', 162 'batch1-FC1-F3_X_batch1-FC2-F1_X_batch1-FC3-F1', 163 'batch1-FC1-F3_X_batch1-FC2-F1_X_batch1-FC3-F2', 164 'batch1-FC1-F3_X_batch1-FC2-F1_X_batch1-FC3-F3', 165 'batch1-FC1-F3_X_batch1-FC2-F2_X_batch1-FC3-F1', 166 'batch1-FC1-F3_X_batch1-FC2-F2_X_batch1-FC3-F2', 167 'batch1-FC1-F3_X_batch1-FC2-F2_X_batch1-FC3-F3', 168 'batch1-FC1-F3_X_batch1-FC2-F3_X_batch1-FC3-F1', 169 'batch1-FC1-F3_X_batch1-FC2-F3_X_batch1-FC3-F2', 170 'batch1-FC1-F3_X_batch1-FC2-F3_X_batch1-FC3-F3' 171 ]]) 172 with self.test_session() as sess: 173 self._assert_sparse_tensor_equals(expected_out, sess.run(op)) 174 175 def test_permutation_3x1x2(self): 176 """Tests 3x1x2 permutation.""" 177 op = sparse_ops._sparse_cross([ 178 self._sparse_tensor( 179 [['batch1-FC1-F1', 'batch1-FC1-F2', 'batch1-FC1-F3']]), 180 self._sparse_tensor([['batch1-FC2-F1']]), 181 self._sparse_tensor([['batch1-FC3-F1', 'batch1-FC3-F2']]) 182 ]) 183 expected_out = self._sparse_tensor([[ 184 'batch1-FC1-F1_X_batch1-FC2-F1_X_batch1-FC3-F1', 185 'batch1-FC1-F1_X_batch1-FC2-F1_X_batch1-FC3-F2', 186 'batch1-FC1-F2_X_batch1-FC2-F1_X_batch1-FC3-F1', 187 'batch1-FC1-F2_X_batch1-FC2-F1_X_batch1-FC3-F2', 188 'batch1-FC1-F3_X_batch1-FC2-F1_X_batch1-FC3-F1', 189 'batch1-FC1-F3_X_batch1-FC2-F1_X_batch1-FC3-F2' 190 ]]) 191 with self.test_session() as sess: 192 self._assert_sparse_tensor_equals(expected_out, sess.run(op)) 193 194 def test_large_batch(self): 195 """Tests with large batch size to force multithreading.""" 196 batch_size = 5000 197 col1 = [] 198 col2 = [] 199 col3 = [] 200 for b in range(batch_size): 201 col1.append( 202 ['batch%d-FC1-F1' % b, 'batch%d-FC1-F2' % b, 'batch%d-FC1-F3' % b]) 203 col2.append(['batch%d-FC2-F1' % b]) 204 col3.append(['batch%d-FC3-F1' % b, 'batch%d-FC3-F2' % b]) 205 206 op = sparse_ops._sparse_cross([ 207 self._sparse_tensor(col1), self._sparse_tensor(col2), 208 self._sparse_tensor(col3) 209 ]) 210 211 col_out = [] 212 for b in range(batch_size): 213 col_out.append([ 214 'batch%d-FC1-F1_X_batch%d-FC2-F1_X_batch%d-FC3-F1' % (b, b, b), 215 'batch%d-FC1-F1_X_batch%d-FC2-F1_X_batch%d-FC3-F2' % (b, b, b), 216 'batch%d-FC1-F2_X_batch%d-FC2-F1_X_batch%d-FC3-F1' % (b, b, b), 217 'batch%d-FC1-F2_X_batch%d-FC2-F1_X_batch%d-FC3-F2' % (b, b, b), 218 'batch%d-FC1-F3_X_batch%d-FC2-F1_X_batch%d-FC3-F1' % (b, b, b), 219 'batch%d-FC1-F3_X_batch%d-FC2-F1_X_batch%d-FC3-F2' % (b, b, b) 220 ]) 221 222 expected_out = self._sparse_tensor(col_out) 223 with self.test_session() as sess: 224 self._assert_sparse_tensor_equals(expected_out, sess.run(op)) 225 226 def test_one_column_empty(self): 227 """Tests when one column is empty. 228 229 The crossed tensor should be empty. 230 """ 231 op = sparse_ops._sparse_cross([ 232 self._sparse_tensor([['batch1-FC1-F1', 'batch1-FC1-F2']]), 233 self._sparse_tensor([], 1), 234 self._sparse_tensor([['batch1-FC3-F1', 'batch1-FC3-F2']]) 235 ]) 236 with self.test_session() as sess: 237 self._assert_sparse_tensor_empty(sess.run(op)) 238 239 def test_some_columns_empty(self): 240 """Tests when more than one columns are empty. 241 242 Cross for the corresponding batch should be empty. 243 """ 244 op = sparse_ops._sparse_cross([ 245 self._sparse_tensor([['batch1-FC1-F1', 'batch1-FC1-F2']], 2), 246 self._sparse_tensor([['batch1-FC2-F1'], ['batch2-FC2-F1']], 2), 247 self._sparse_tensor([['batch1-FC3-F1', 'batch1-FC3-F2']], 2) 248 ]) 249 expected_out = self._sparse_tensor([[ 250 'batch1-FC1-F1_X_batch1-FC2-F1_X_batch1-FC3-F1', 251 'batch1-FC1-F1_X_batch1-FC2-F1_X_batch1-FC3-F2', 252 'batch1-FC1-F2_X_batch1-FC2-F1_X_batch1-FC3-F1', 253 'batch1-FC1-F2_X_batch1-FC2-F1_X_batch1-FC3-F2' 254 ]], 2) 255 with self.test_session() as sess: 256 self._assert_sparse_tensor_equals(expected_out, sess.run(op)) 257 258 def test_all_columns_empty(self): 259 """Tests when all columns are empty. 260 261 The crossed tensor should be empty. 262 """ 263 op = sparse_ops._sparse_cross([ 264 self._sparse_tensor([]), self._sparse_tensor([]), 265 self._sparse_tensor([]) 266 ]) 267 with self.test_session() as sess: 268 self._assert_sparse_tensor_empty(sess.run(op)) 269 270 def test_hashed_zero_bucket_no_hash_key(self): 271 op = sparse_ops._sparse_cross_hashed( 272 [ 273 self._sparse_tensor([['batch1-FC1-F1']]), 274 self._sparse_tensor([['batch1-FC2-F1']]), 275 self._sparse_tensor([['batch1-FC3-F1']]) 276 ]) 277 # Check actual hashed output to prevent unintentional hashing changes. 278 expected_out = self._sparse_tensor([[1971693436396284976]]) 279 with self.test_session() as sess: 280 self._assert_sparse_tensor_equals(expected_out, sess.run(op)) 281 282 def test_hashed_zero_bucket(self): 283 op = sparse_ops._sparse_cross_hashed( 284 [ 285 self._sparse_tensor([['batch1-FC1-F1']]), 286 self._sparse_tensor([['batch1-FC2-F1']]), 287 self._sparse_tensor([['batch1-FC3-F1']]) 288 ], 289 hash_key=sparse_ops._DEFAULT_HASH_KEY + 1) 290 # Check actual hashed output to prevent unintentional hashing changes. 291 expected_out = self._sparse_tensor([[4847552627144134031]]) 292 with self.test_session() as sess: 293 self._assert_sparse_tensor_equals(expected_out, sess.run(op)) 294 295 # TODO(sibyl-Aix6ihai): Add benchmark to compare Hashed vs Non-hashed. 296 def test_hashed_no_hash_key(self): 297 op = sparse_ops._sparse_cross_hashed( 298 [ 299 self._sparse_tensor([['batch1-FC1-F1']]), 300 self._sparse_tensor([['batch1-FC2-F1']]), 301 self._sparse_tensor([['batch1-FC3-F1']]) 302 ], 303 num_buckets=100) 304 # Check actual hashed output to prevent unintentional hashing changes. 305 expected_out = self._sparse_tensor([[83]]) 306 with self.test_session() as sess: 307 self._assert_sparse_tensor_equals(expected_out, sess.run(op)) 308 309 def test_hashed_output(self): 310 op = sparse_ops._sparse_cross_hashed( 311 [ 312 self._sparse_tensor([['batch1-FC1-F1']]), 313 self._sparse_tensor([['batch1-FC2-F1']]), 314 self._sparse_tensor([['batch1-FC3-F1']]) 315 ], 316 num_buckets=100, 317 hash_key=sparse_ops._DEFAULT_HASH_KEY + 1) 318 # Check actual hashed output to prevent unintentional hashing changes. 319 expected_out = self._sparse_tensor([[31]]) 320 with self.test_session() as sess: 321 self._assert_sparse_tensor_equals(expected_out, sess.run(op)) 322 323 def test_hashed__has_no_collision(self): 324 """Tests that fingerprint concatenation has no collisions.""" 325 # Although the last 10 bits of 359 and 1024+359 are identical. 326 # As a result, all the crosses shouldn't collide. 327 t1 = constant_op.constant([[359], [359 + 1024]]) 328 t2 = constant_op.constant([list(range(10)), list(range(10))]) 329 cross = sparse_ops._sparse_cross_hashed( 330 [t2, t1], 331 num_buckets=1024, 332 hash_key=sparse_ops._DEFAULT_HASH_KEY + 1) 333 cross_dense = sparse_ops.sparse_tensor_to_dense(cross) 334 with session.Session(): 335 values = cross_dense.eval() 336 self.assertTrue(numpy.not_equal(values[0], values[1]).all()) 337 338 def test_hashed_3x1x2(self): 339 """Tests 3x1x2 permutation with hashed output.""" 340 op = sparse_ops._sparse_cross_hashed( 341 [ 342 self._sparse_tensor( 343 [['batch1-FC1-F1', 'batch1-FC1-F2', 'batch1-FC1-F3']]), 344 self._sparse_tensor([['batch1-FC2-F1']]), 345 self._sparse_tensor([['batch1-FC3-F1', 'batch1-FC3-F2']]) 346 ], 347 num_buckets=1000) 348 with self.test_session() as sess: 349 out = sess.run(op) 350 self.assertEqual(6, len(out.values)) 351 self.assertAllEqual([[0, i] for i in range(6)], out.indices) 352 self.assertTrue(all(x < 1000 and x >= 0 for x in out.values)) 353 all_values_are_different = len(out.values) == len(set(out.values)) 354 self.assertTrue(all_values_are_different) 355 356 def _assert_sparse_tensor_empty(self, sp): 357 self.assertEquals(0, sp.indices.size) 358 self.assertEquals(0, sp.values.size) 359 # TODO(zakaria): check if we can ignore the first dim of the shape. 360 self.assertEquals(0, sp.dense_shape[1]) 361 362 def _assert_sparse_tensor_equals(self, sp1, sp2): 363 self.assertAllEqual(sp1.indices.eval(), sp2.indices) 364 self.assertAllEqual(sp1.values.eval(), sp2.values) 365 self.assertAllEqual(sp1.dense_shape.eval(), sp2.dense_shape) 366 367 def _sparse_tensor(self, data, batch_size=-1): 368 """Generates a SparseTensor. 369 370 Args: 371 data: Should be a list of list of strings or int64. Each item of the outer 372 list represents a batch. Each item of the batch is a feature of a 373 specific feature column. 374 batch_size: optional batch size, especially for cases when data has no 375 entry for some batches. 376 377 Returns: 378 A SparseTensor. 379 """ 380 indices = [] 381 values = [] 382 max_col_count = 0 383 for batch, batch_ix in zip(data, range(len(data))): 384 for column, column_ix in zip(batch, range(len(batch))): 385 indices.append([batch_ix, column_ix]) 386 values.append(column) 387 max_col_count = max(max_col_count, column_ix + 1) 388 shape = [batch_size if batch_size != -1 else len(data), max_col_count] 389 value_type = (dtypes.string if not values or isinstance(values[0], str) else 390 dtypes.int64) 391 return sparse_tensor.SparseTensor( 392 constant_op.constant(indices, dtypes.int64, [len(indices), 2]), 393 constant_op.constant(values, value_type, [len(indices)]), 394 constant_op.constant(shape, dtypes.int64)) 395 396 397 if __name__ == '__main__': 398 test.main() 399