Home | History | Annotate | Download | only in kernel_tests
      1 # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
      2 #
      3 # Licensed under the Apache License, Version 2.0 (the "License");
      4 # you may not use this file except in compliance with the License.
      5 # You may obtain a copy of the License at
      6 #
      7 #     http://www.apache.org/licenses/LICENSE-2.0
      8 #
      9 # Unless required by applicable law or agreed to in writing, software
     10 # distributed under the License is distributed on an "AS IS" BASIS,
     11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 # See the License for the specific language governing permissions and
     13 # limitations under the License.
     14 # ==============================================================================
     15 """Tests for sparse_cross_op."""
     16 
     17 from __future__ import absolute_import
     18 from __future__ import division
     19 from __future__ import print_function
     20 
     21 import numpy
     22 
     23 from tensorflow.python.client import session
     24 from tensorflow.python.framework import constant_op
     25 from tensorflow.python.framework import dtypes
     26 from tensorflow.python.framework import sparse_tensor
     27 from tensorflow.python.ops import sparse_ops
     28 from tensorflow.python.platform import test
     29 
     30 
     31 class SparseCrossOpTest(test.TestCase):
     32 
     33   def test_simple(self):
     34     """Tests a simple scenario."""
     35     op = sparse_ops._sparse_cross([
     36         self._sparse_tensor([['batch1-FC1-F1'],
     37                              ['batch2-FC1-F1', 'batch2-FC1-F2']]),
     38         self._sparse_tensor([['batch1-FC2-F1'],
     39                              ['batch2-FC2-F1', 'batch2-FC2-F2']])
     40     ])
     41     expected_out = self._sparse_tensor([['batch1-FC1-F1_X_batch1-FC2-F1'], [
     42         'batch2-FC1-F1_X_batch2-FC2-F1', 'batch2-FC1-F1_X_batch2-FC2-F2',
     43         'batch2-FC1-F2_X_batch2-FC2-F1', 'batch2-FC1-F2_X_batch2-FC2-F2'
     44     ]])
     45     with self.test_session() as sess:
     46       self._assert_sparse_tensor_equals(expected_out, sess.run(op))
     47 
     48   def test_dense(self):
     49     """Tests only dense inputs."""
     50     op = sparse_ops._sparse_cross([
     51         constant_op.constant([['batch1-FC1-F1', 'batch1-FC1-F2'],
     52                               ['batch2-FC1-F1', 'batch2-FC1-F2']],
     53                              dtypes.string),
     54         constant_op.constant([['batch1-FC2-F1', 'batch1-FC2-F2'],
     55                               ['batch2-FC2-F1', 'batch2-FC2-F2']],
     56                              dtypes.string),
     57     ])
     58     expected_out = self._sparse_tensor([[
     59         'batch1-FC1-F1_X_batch1-FC2-F1', 'batch1-FC1-F1_X_batch1-FC2-F2',
     60         'batch1-FC1-F2_X_batch1-FC2-F1', 'batch1-FC1-F2_X_batch1-FC2-F2'
     61     ], [
     62         'batch2-FC1-F1_X_batch2-FC2-F1', 'batch2-FC1-F1_X_batch2-FC2-F2',
     63         'batch2-FC1-F2_X_batch2-FC2-F1', 'batch2-FC1-F2_X_batch2-FC2-F2'
     64     ]])
     65     with self.test_session() as sess:
     66       self._assert_sparse_tensor_equals(expected_out, sess.run(op))
     67 
     68   def test_integer_mixed_string_sparse(self):
     69     """Tests mixed type."""
     70     op = sparse_ops._sparse_cross([
     71         self._sparse_tensor([[11], [333, 55555]]),
     72         self._sparse_tensor([['batch1-FC2-F1'],
     73                              ['batch2-FC2-F1', 'batch2-FC2-F2']])
     74     ])
     75     expected_out = self._sparse_tensor([['11_X_batch1-FC2-F1'], [
     76         '333_X_batch2-FC2-F1', '333_X_batch2-FC2-F2', '55555_X_batch2-FC2-F1',
     77         '55555_X_batch2-FC2-F2'
     78     ]])
     79     with self.test_session() as sess:
     80       self._assert_sparse_tensor_equals(expected_out, sess.run(op))
     81 
     82   def test_integer_mixed_string_dense(self):
     83     """Tests mixed dense inputs."""
     84     op = sparse_ops._sparse_cross([
     85         constant_op.constant([[11, 333], [55555, 999999]], dtypes.int64),
     86         constant_op.constant([['batch1-FC2-F1', 'batch1-FC2-F2'],
     87                               ['batch2-FC2-F1', 'batch2-FC2-F2']],
     88                              dtypes.string),
     89     ])
     90     expected_out = self._sparse_tensor([[
     91         '11_X_batch1-FC2-F1', '11_X_batch1-FC2-F2', '333_X_batch1-FC2-F1',
     92         '333_X_batch1-FC2-F2'
     93     ], [
     94         '55555_X_batch2-FC2-F1', '55555_X_batch2-FC2-F2',
     95         '999999_X_batch2-FC2-F1', '999999_X_batch2-FC2-F2'
     96     ]])
     97     with self.test_session() as sess:
     98       self._assert_sparse_tensor_equals(expected_out, sess.run(op))
     99 
    100   def test_sparse_cross_dense(self):
    101     """Tests sparse and dense inputs."""
    102     op = sparse_ops._sparse_cross([
    103         self._sparse_tensor([['batch1-FC1-F1'],
    104                              ['batch2-FC1-F1', 'batch2-FC1-F2']]),
    105         constant_op.constant([['batch1-FC2-F1', 'batch1-FC2-F2'],
    106                               ['batch2-FC2-F1', 'batch2-FC2-F2']],
    107                              dtypes.string),
    108     ])
    109     expected_out = self._sparse_tensor(
    110         [['batch1-FC1-F1_X_batch1-FC2-F1', 'batch1-FC1-F1_X_batch1-FC2-F2'], [
    111             'batch2-FC1-F1_X_batch2-FC2-F1', 'batch2-FC1-F1_X_batch2-FC2-F2',
    112             'batch2-FC1-F2_X_batch2-FC2-F1', 'batch2-FC1-F2_X_batch2-FC2-F2'
    113         ]])
    114     with self.test_session() as sess:
    115       self._assert_sparse_tensor_equals(expected_out, sess.run(op))
    116 
    117   def test_integer_sparse_input(self):
    118     """Tests mixed type sparse and dense inputs."""
    119     op = sparse_ops._sparse_cross([
    120         self._sparse_tensor([[11], [333, 5555]]),
    121         constant_op.constant([['batch1-FC2-F1', 'batch1-FC2-F2'],
    122                               ['batch2-FC2-F1', 'batch2-FC2-F2']],
    123                              dtypes.string),
    124     ])
    125     expected_out = self._sparse_tensor(
    126         [['11_X_batch1-FC2-F1', '11_X_batch1-FC2-F2'], [
    127             '333_X_batch2-FC2-F1', '333_X_batch2-FC2-F2',
    128             '5555_X_batch2-FC2-F1', '5555_X_batch2-FC2-F2'
    129         ]])
    130     with self.test_session() as sess:
    131       self._assert_sparse_tensor_equals(expected_out, sess.run(op))
    132 
    133   def test_permutation_3x3x3(self):
    134     """Tests 3x3x3 permutation."""
    135     op = sparse_ops._sparse_cross([
    136         self._sparse_tensor(
    137             [['batch1-FC1-F1', 'batch1-FC1-F2', 'batch1-FC1-F3']]),
    138         self._sparse_tensor(
    139             [['batch1-FC2-F1', 'batch1-FC2-F2', 'batch1-FC2-F3']]),
    140         self._sparse_tensor(
    141             [['batch1-FC3-F1', 'batch1-FC3-F2', 'batch1-FC3-F3']])
    142     ])
    143     expected_out = self._sparse_tensor([[
    144         'batch1-FC1-F1_X_batch1-FC2-F1_X_batch1-FC3-F1',
    145         'batch1-FC1-F1_X_batch1-FC2-F1_X_batch1-FC3-F2',
    146         'batch1-FC1-F1_X_batch1-FC2-F1_X_batch1-FC3-F3',
    147         'batch1-FC1-F1_X_batch1-FC2-F2_X_batch1-FC3-F1',
    148         'batch1-FC1-F1_X_batch1-FC2-F2_X_batch1-FC3-F2',
    149         'batch1-FC1-F1_X_batch1-FC2-F2_X_batch1-FC3-F3',
    150         'batch1-FC1-F1_X_batch1-FC2-F3_X_batch1-FC3-F1',
    151         'batch1-FC1-F1_X_batch1-FC2-F3_X_batch1-FC3-F2',
    152         'batch1-FC1-F1_X_batch1-FC2-F3_X_batch1-FC3-F3',
    153         'batch1-FC1-F2_X_batch1-FC2-F1_X_batch1-FC3-F1',
    154         'batch1-FC1-F2_X_batch1-FC2-F1_X_batch1-FC3-F2',
    155         'batch1-FC1-F2_X_batch1-FC2-F1_X_batch1-FC3-F3',
    156         'batch1-FC1-F2_X_batch1-FC2-F2_X_batch1-FC3-F1',
    157         'batch1-FC1-F2_X_batch1-FC2-F2_X_batch1-FC3-F2',
    158         'batch1-FC1-F2_X_batch1-FC2-F2_X_batch1-FC3-F3',
    159         'batch1-FC1-F2_X_batch1-FC2-F3_X_batch1-FC3-F1',
    160         'batch1-FC1-F2_X_batch1-FC2-F3_X_batch1-FC3-F2',
    161         'batch1-FC1-F2_X_batch1-FC2-F3_X_batch1-FC3-F3',
    162         'batch1-FC1-F3_X_batch1-FC2-F1_X_batch1-FC3-F1',
    163         'batch1-FC1-F3_X_batch1-FC2-F1_X_batch1-FC3-F2',
    164         'batch1-FC1-F3_X_batch1-FC2-F1_X_batch1-FC3-F3',
    165         'batch1-FC1-F3_X_batch1-FC2-F2_X_batch1-FC3-F1',
    166         'batch1-FC1-F3_X_batch1-FC2-F2_X_batch1-FC3-F2',
    167         'batch1-FC1-F3_X_batch1-FC2-F2_X_batch1-FC3-F3',
    168         'batch1-FC1-F3_X_batch1-FC2-F3_X_batch1-FC3-F1',
    169         'batch1-FC1-F3_X_batch1-FC2-F3_X_batch1-FC3-F2',
    170         'batch1-FC1-F3_X_batch1-FC2-F3_X_batch1-FC3-F3'
    171     ]])
    172     with self.test_session() as sess:
    173       self._assert_sparse_tensor_equals(expected_out, sess.run(op))
    174 
    175   def test_permutation_3x1x2(self):
    176     """Tests 3x1x2 permutation."""
    177     op = sparse_ops._sparse_cross([
    178         self._sparse_tensor(
    179             [['batch1-FC1-F1', 'batch1-FC1-F2', 'batch1-FC1-F3']]),
    180         self._sparse_tensor([['batch1-FC2-F1']]),
    181         self._sparse_tensor([['batch1-FC3-F1', 'batch1-FC3-F2']])
    182     ])
    183     expected_out = self._sparse_tensor([[
    184         'batch1-FC1-F1_X_batch1-FC2-F1_X_batch1-FC3-F1',
    185         'batch1-FC1-F1_X_batch1-FC2-F1_X_batch1-FC3-F2',
    186         'batch1-FC1-F2_X_batch1-FC2-F1_X_batch1-FC3-F1',
    187         'batch1-FC1-F2_X_batch1-FC2-F1_X_batch1-FC3-F2',
    188         'batch1-FC1-F3_X_batch1-FC2-F1_X_batch1-FC3-F1',
    189         'batch1-FC1-F3_X_batch1-FC2-F1_X_batch1-FC3-F2'
    190     ]])
    191     with self.test_session() as sess:
    192       self._assert_sparse_tensor_equals(expected_out, sess.run(op))
    193 
    194   def test_large_batch(self):
    195     """Tests with large batch size to force multithreading."""
    196     batch_size = 5000
    197     col1 = []
    198     col2 = []
    199     col3 = []
    200     for b in range(batch_size):
    201       col1.append(
    202           ['batch%d-FC1-F1' % b, 'batch%d-FC1-F2' % b, 'batch%d-FC1-F3' % b])
    203       col2.append(['batch%d-FC2-F1' % b])
    204       col3.append(['batch%d-FC3-F1' % b, 'batch%d-FC3-F2' % b])
    205 
    206     op = sparse_ops._sparse_cross([
    207         self._sparse_tensor(col1), self._sparse_tensor(col2),
    208         self._sparse_tensor(col3)
    209     ])
    210 
    211     col_out = []
    212     for b in range(batch_size):
    213       col_out.append([
    214           'batch%d-FC1-F1_X_batch%d-FC2-F1_X_batch%d-FC3-F1' % (b, b, b),
    215           'batch%d-FC1-F1_X_batch%d-FC2-F1_X_batch%d-FC3-F2' % (b, b, b),
    216           'batch%d-FC1-F2_X_batch%d-FC2-F1_X_batch%d-FC3-F1' % (b, b, b),
    217           'batch%d-FC1-F2_X_batch%d-FC2-F1_X_batch%d-FC3-F2' % (b, b, b),
    218           'batch%d-FC1-F3_X_batch%d-FC2-F1_X_batch%d-FC3-F1' % (b, b, b),
    219           'batch%d-FC1-F3_X_batch%d-FC2-F1_X_batch%d-FC3-F2' % (b, b, b)
    220       ])
    221 
    222     expected_out = self._sparse_tensor(col_out)
    223     with self.test_session() as sess:
    224       self._assert_sparse_tensor_equals(expected_out, sess.run(op))
    225 
    226   def test_one_column_empty(self):
    227     """Tests when one column is empty.
    228 
    229     The crossed tensor should be empty.
    230     """
    231     op = sparse_ops._sparse_cross([
    232         self._sparse_tensor([['batch1-FC1-F1', 'batch1-FC1-F2']]),
    233         self._sparse_tensor([], 1),
    234         self._sparse_tensor([['batch1-FC3-F1', 'batch1-FC3-F2']])
    235     ])
    236     with self.test_session() as sess:
    237       self._assert_sparse_tensor_empty(sess.run(op))
    238 
    239   def test_some_columns_empty(self):
    240     """Tests when more than one columns are empty.
    241 
    242     Cross for the corresponding batch should be empty.
    243     """
    244     op = sparse_ops._sparse_cross([
    245         self._sparse_tensor([['batch1-FC1-F1', 'batch1-FC1-F2']], 2),
    246         self._sparse_tensor([['batch1-FC2-F1'], ['batch2-FC2-F1']], 2),
    247         self._sparse_tensor([['batch1-FC3-F1', 'batch1-FC3-F2']], 2)
    248     ])
    249     expected_out = self._sparse_tensor([[
    250         'batch1-FC1-F1_X_batch1-FC2-F1_X_batch1-FC3-F1',
    251         'batch1-FC1-F1_X_batch1-FC2-F1_X_batch1-FC3-F2',
    252         'batch1-FC1-F2_X_batch1-FC2-F1_X_batch1-FC3-F1',
    253         'batch1-FC1-F2_X_batch1-FC2-F1_X_batch1-FC3-F2'
    254     ]], 2)
    255     with self.test_session() as sess:
    256       self._assert_sparse_tensor_equals(expected_out, sess.run(op))
    257 
    258   def test_all_columns_empty(self):
    259     """Tests when all columns are empty.
    260 
    261     The crossed tensor should be empty.
    262     """
    263     op = sparse_ops._sparse_cross([
    264         self._sparse_tensor([]), self._sparse_tensor([]),
    265         self._sparse_tensor([])
    266     ])
    267     with self.test_session() as sess:
    268       self._assert_sparse_tensor_empty(sess.run(op))
    269 
    270   def test_hashed_zero_bucket_no_hash_key(self):
    271     op = sparse_ops._sparse_cross_hashed(
    272         [
    273             self._sparse_tensor([['batch1-FC1-F1']]),
    274             self._sparse_tensor([['batch1-FC2-F1']]),
    275             self._sparse_tensor([['batch1-FC3-F1']])
    276         ])
    277     # Check actual hashed output to prevent unintentional hashing changes.
    278     expected_out = self._sparse_tensor([[1971693436396284976]])
    279     with self.test_session() as sess:
    280       self._assert_sparse_tensor_equals(expected_out, sess.run(op))
    281 
    282   def test_hashed_zero_bucket(self):
    283     op = sparse_ops._sparse_cross_hashed(
    284         [
    285             self._sparse_tensor([['batch1-FC1-F1']]),
    286             self._sparse_tensor([['batch1-FC2-F1']]),
    287             self._sparse_tensor([['batch1-FC3-F1']])
    288         ],
    289         hash_key=sparse_ops._DEFAULT_HASH_KEY + 1)
    290     # Check actual hashed output to prevent unintentional hashing changes.
    291     expected_out = self._sparse_tensor([[4847552627144134031]])
    292     with self.test_session() as sess:
    293       self._assert_sparse_tensor_equals(expected_out, sess.run(op))
    294 
    295   # TODO(sibyl-Aix6ihai): Add benchmark to compare Hashed vs Non-hashed.
    296   def test_hashed_no_hash_key(self):
    297     op = sparse_ops._sparse_cross_hashed(
    298         [
    299             self._sparse_tensor([['batch1-FC1-F1']]),
    300             self._sparse_tensor([['batch1-FC2-F1']]),
    301             self._sparse_tensor([['batch1-FC3-F1']])
    302         ],
    303         num_buckets=100)
    304     # Check actual hashed output to prevent unintentional hashing changes.
    305     expected_out = self._sparse_tensor([[83]])
    306     with self.test_session() as sess:
    307       self._assert_sparse_tensor_equals(expected_out, sess.run(op))
    308 
    309   def test_hashed_output(self):
    310     op = sparse_ops._sparse_cross_hashed(
    311         [
    312             self._sparse_tensor([['batch1-FC1-F1']]),
    313             self._sparse_tensor([['batch1-FC2-F1']]),
    314             self._sparse_tensor([['batch1-FC3-F1']])
    315         ],
    316         num_buckets=100,
    317         hash_key=sparse_ops._DEFAULT_HASH_KEY + 1)
    318     # Check actual hashed output to prevent unintentional hashing changes.
    319     expected_out = self._sparse_tensor([[31]])
    320     with self.test_session() as sess:
    321       self._assert_sparse_tensor_equals(expected_out, sess.run(op))
    322 
    323   def test_hashed__has_no_collision(self):
    324     """Tests that fingerprint concatenation has no collisions."""
    325     # Although the last 10 bits of 359 and 1024+359 are identical.
    326     # As a result, all the crosses shouldn't collide.
    327     t1 = constant_op.constant([[359], [359 + 1024]])
    328     t2 = constant_op.constant([list(range(10)), list(range(10))])
    329     cross = sparse_ops._sparse_cross_hashed(
    330         [t2, t1],
    331         num_buckets=1024,
    332         hash_key=sparse_ops._DEFAULT_HASH_KEY + 1)
    333     cross_dense = sparse_ops.sparse_tensor_to_dense(cross)
    334     with session.Session():
    335       values = cross_dense.eval()
    336       self.assertTrue(numpy.not_equal(values[0], values[1]).all())
    337 
    338   def test_hashed_3x1x2(self):
    339     """Tests 3x1x2 permutation with hashed output."""
    340     op = sparse_ops._sparse_cross_hashed(
    341         [
    342             self._sparse_tensor(
    343                 [['batch1-FC1-F1', 'batch1-FC1-F2', 'batch1-FC1-F3']]),
    344             self._sparse_tensor([['batch1-FC2-F1']]),
    345             self._sparse_tensor([['batch1-FC3-F1', 'batch1-FC3-F2']])
    346         ],
    347         num_buckets=1000)
    348     with self.test_session() as sess:
    349       out = sess.run(op)
    350       self.assertEqual(6, len(out.values))
    351       self.assertAllEqual([[0, i] for i in range(6)], out.indices)
    352       self.assertTrue(all(x < 1000 and x >= 0 for x in out.values))
    353       all_values_are_different = len(out.values) == len(set(out.values))
    354       self.assertTrue(all_values_are_different)
    355 
    356   def _assert_sparse_tensor_empty(self, sp):
    357     self.assertEquals(0, sp.indices.size)
    358     self.assertEquals(0, sp.values.size)
    359     # TODO(zakaria): check if we can ignore the first dim of the shape.
    360     self.assertEquals(0, sp.dense_shape[1])
    361 
    362   def _assert_sparse_tensor_equals(self, sp1, sp2):
    363     self.assertAllEqual(sp1.indices.eval(), sp2.indices)
    364     self.assertAllEqual(sp1.values.eval(), sp2.values)
    365     self.assertAllEqual(sp1.dense_shape.eval(), sp2.dense_shape)
    366 
    367   def _sparse_tensor(self, data, batch_size=-1):
    368     """Generates a SparseTensor.
    369 
    370     Args:
    371       data: Should be a list of list of strings or int64. Each item of the outer
    372           list represents a batch. Each item of the batch is a feature of a
    373           specific feature column.
    374       batch_size: optional batch size, especially for cases when data has no
    375           entry for some batches.
    376 
    377     Returns:
    378      A SparseTensor.
    379     """
    380     indices = []
    381     values = []
    382     max_col_count = 0
    383     for batch, batch_ix in zip(data, range(len(data))):
    384       for column, column_ix in zip(batch, range(len(batch))):
    385         indices.append([batch_ix, column_ix])
    386         values.append(column)
    387         max_col_count = max(max_col_count, column_ix + 1)
    388     shape = [batch_size if batch_size != -1 else len(data), max_col_count]
    389     value_type = (dtypes.string if not values or isinstance(values[0], str) else
    390                   dtypes.int64)
    391     return sparse_tensor.SparseTensor(
    392         constant_op.constant(indices, dtypes.int64, [len(indices), 2]),
    393         constant_op.constant(values, value_type, [len(indices)]),
    394         constant_op.constant(shape, dtypes.int64))
    395 
    396 
    397 if __name__ == '__main__':
    398   test.main()
    399