Home | History | Annotate | Download | only in kernel_tests
      1 # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
      2 #
      3 # Licensed under the Apache License, Version 2.0 (the "License");
      4 # you may not use this file except in compliance with the License.
      5 # You may obtain a copy of the License at
      6 #
      7 #     http://www.apache.org/licenses/LICENSE-2.0
      8 #
      9 # Unless required by applicable law or agreed to in writing, software
     10 # distributed under the License is distributed on an "AS IS" BASIS,
     11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 # See the License for the specific language governing permissions and
     13 # limitations under the License.
     14 # ==============================================================================
     15 """Tests for tensorflow.ctc_ops.ctc_decoder_ops."""
     16 
     17 from __future__ import absolute_import
     18 from __future__ import division
     19 from __future__ import print_function
     20 
     21 import numpy as np
     22 
     23 from tensorflow.python.framework import constant_op
     24 from tensorflow.python.framework import dtypes
     25 from tensorflow.python.framework import errors_impl
     26 from tensorflow.python.framework import sparse_tensor
     27 from tensorflow.python.ops import ctc_ops
     28 from tensorflow.python.ops import gradients_impl
     29 from tensorflow.python.platform import test
     30 
     31 
     32 def SimpleSparseTensorFrom(x):
     33   """Create a very simple SparseTensor with dimensions (batch, time).
     34 
     35   Args:
     36     x: a list of lists of type int
     37 
     38   Returns:
     39     x_ix and x_val, the indices and values of the SparseTensor<2>.
     40   """
     41   x_ix = []
     42   x_val = []
     43   for batch_i, batch in enumerate(x):
     44     for time, val in enumerate(batch):
     45       x_ix.append([batch_i, time])
     46       x_val.append(val)
     47   x_shape = [len(x), np.asarray(x_ix).max(0)[1] + 1]
     48   x_ix = constant_op.constant(x_ix, dtypes.int64)
     49   x_val = constant_op.constant(x_val, dtypes.int32)
     50   x_shape = constant_op.constant(x_shape, dtypes.int64)
     51 
     52   return sparse_tensor.SparseTensor(x_ix, x_val, x_shape)
     53 
     54 
     55 class CTCLossTest(test.TestCase):
     56 
     57   def _testCTCLoss(self,
     58                    inputs,
     59                    seq_lens,
     60                    labels,
     61                    loss_truth,
     62                    grad_truth,
     63                    expected_err_re=None):
     64     self.assertEquals(len(inputs), len(grad_truth))
     65 
     66     inputs_t = constant_op.constant(inputs)
     67 
     68     with self.test_session(use_gpu=False) as sess:
     69       loss = ctc_ops.ctc_loss(
     70           inputs=inputs_t, labels=labels, sequence_length=seq_lens)
     71       grad = gradients_impl.gradients(loss, [inputs_t])[0]
     72 
     73       self.assertShapeEqual(loss_truth, loss)
     74       self.assertShapeEqual(grad_truth, grad)
     75 
     76       if expected_err_re is None:
     77         (tf_loss, tf_grad) = sess.run([loss, grad])
     78         self.assertAllClose(tf_loss, loss_truth, atol=1e-6)
     79         self.assertAllClose(tf_grad, grad_truth, atol=1e-6)
     80       else:
     81         with self.assertRaisesOpError(expected_err_re):
     82           sess.run([loss, grad])
     83 
     84   def testBasic(self):
     85     """Test two batch entries."""
     86     # Input and ground truth from Alex Graves' implementation.
     87     #
     88     #### Batch entry 0 #####
     89     # targets: 0 1 2 1 0
     90     # outputs:
     91     # 0 0.633766 0.221185 0.0917319 0.0129757 0.0142857 0.0260553
     92     # 1 0.111121 0.588392 0.278779 0.0055756 0.00569609 0.010436
     93     # 2 0.0357786 0.633813 0.321418 0.00249248 0.00272882 0.0037688
     94     # 3 0.0663296 0.643849 0.280111 0.00283995 0.0035545 0.00331533
     95     # 4 0.458235 0.396634 0.123377 0.00648837 0.00903441 0.00623107
     96     # alpha:
     97     # 0 -3.64753 -0.456075 -inf -inf -inf -inf -inf -inf -inf -inf -inf
     98     # 1 -inf -inf -inf -0.986437 -inf -inf -inf -inf -inf -inf -inf
     99     # 2 -inf -inf -inf -inf -inf -2.12145 -inf -inf -inf -inf -inf
    100     # 3 -inf -inf -inf -inf -inf -inf -inf -2.56174 -inf -inf -inf
    101     # 4 -inf -inf -inf -inf -inf -inf -inf -inf -inf -3.34211 -inf
    102     # beta:
    103     # 0 -inf -2.88604 -inf -inf -inf -inf -inf -inf -inf -inf -inf
    104     # 1 -inf -inf -inf -2.35568 -inf -inf -inf -inf -inf -inf -inf
    105     # 2 -inf -inf -inf -inf -inf -1.22066 -inf -inf -inf -inf -inf
    106     # 3 -inf -inf -inf -inf -inf -inf -inf -0.780373 -inf -inf -inf
    107     # 4 -inf -inf -inf -inf -inf -inf -inf -inf -inf 0 0
    108     # prob: -3.34211
    109     # outputDerivs:
    110     # 0 -0.366234 0.221185 0.0917319 0.0129757 0.0142857 0.0260553
    111     # 1 0.111121 -0.411608 0.278779 0.0055756 0.00569609 0.010436
    112     # 2 0.0357786 0.633813 -0.678582 0.00249248 0.00272882 0.0037688
    113     # 3 0.0663296 -0.356151 0.280111 0.00283995 0.0035545 0.00331533
    114     # 4 -0.541765 0.396634 0.123377 0.00648837 0.00903441 0.00623107
    115     #
    116     #### Batch entry 1 #####
    117     #
    118     # targets: 0 1 1 0
    119     # outputs:
    120     # 0 0.30176 0.28562 0.0831517 0.0862751 0.0816851 0.161508
    121     # 1 0.24082 0.397533 0.0557226 0.0546814 0.0557528 0.19549
    122     # 2 0.230246 0.450868 0.0389607 0.038309 0.0391602 0.202456
    123     # 3 0.280884 0.429522 0.0326593 0.0339046 0.0326856 0.190345
    124     # 4 0.423286 0.315517 0.0338439 0.0393744 0.0339315 0.154046
    125     # alpha:
    126     # 0 -1.8232 -1.19812 -inf -inf -inf -inf -inf -inf -inf
    127     # 1 -inf -2.19315 -2.83037 -2.1206 -inf -inf -inf -inf -inf
    128     # 2 -inf -inf -inf -2.03268 -3.71783 -inf -inf -inf -inf
    129     # 3 -inf -inf -inf -inf -inf -4.56292 -inf -inf -inf
    130     # 4 -inf -inf -inf -inf -inf -inf -inf -5.42262 -inf
    131     # beta:
    132     # 0 -inf -4.2245 -inf -inf -inf -inf -inf -inf -inf
    133     # 1 -inf -inf -inf -3.30202 -inf -inf -inf -inf -inf
    134     # 2 -inf -inf -inf -inf -1.70479 -0.856738 -inf -inf -inf
    135     # 3 -inf -inf -inf -inf -inf -0.859706 -0.859706 -0.549337 -inf
    136     # 4 -inf -inf -inf -inf -inf -inf -inf 0 0
    137     # prob: -5.42262
    138     # outputDerivs:
    139     # 0 -0.69824 0.28562 0.0831517 0.0862751 0.0816851 0.161508
    140     # 1 0.24082 -0.602467 0.0557226 0.0546814 0.0557528 0.19549
    141     # 2 0.230246 0.450868 0.0389607 0.038309 0.0391602 -0.797544
    142     # 3 0.280884 -0.570478 0.0326593 0.0339046 0.0326856 0.190345
    143     # 4 -0.576714 0.315517 0.0338439 0.0393744 0.0339315 0.154046
    144 
    145     # max_time_steps == 7
    146     depth = 6
    147 
    148     # seq_len_0 == 5
    149     targets_0 = [0, 1, 2, 1, 0]
    150     loss_log_prob_0 = -3.34211
    151     # dimensions are time x depth
    152     input_prob_matrix_0 = np.asarray(
    153         [[0.633766, 0.221185, 0.0917319, 0.0129757, 0.0142857, 0.0260553],
    154          [0.111121, 0.588392, 0.278779, 0.0055756, 0.00569609, 0.010436],
    155          [0.0357786, 0.633813, 0.321418, 0.00249248, 0.00272882, 0.0037688],
    156          [0.0663296, 0.643849, 0.280111, 0.00283995, 0.0035545, 0.00331533],
    157          [0.458235, 0.396634, 0.123377, 0.00648837, 0.00903441, 0.00623107]],
    158         dtype=np.float32)
    159     input_log_prob_matrix_0 = np.log(input_prob_matrix_0)
    160     gradient_log_prob_0 = np.asarray(
    161         [[-0.366234, 0.221185, 0.0917319, 0.0129757, 0.0142857, 0.0260553],
    162          [0.111121, -0.411608, 0.278779, 0.0055756, 0.00569609, 0.010436],
    163          [0.0357786, 0.633813, -0.678582, 0.00249248, 0.00272882, 0.0037688],
    164          [0.0663296, -0.356151, 0.280111, 0.00283995, 0.0035545, 0.00331533],
    165          [-0.541765, 0.396634, 0.123377, 0.00648837, 0.00903441, 0.00623107]],
    166         dtype=np.float32)
    167 
    168     # seq_len_1 == 5
    169     targets_1 = [0, 1, 1, 0]
    170     loss_log_prob_1 = -5.42262
    171     # dimensions are time x depth
    172 
    173     input_prob_matrix_1 = np.asarray(
    174         [[0.30176, 0.28562, 0.0831517, 0.0862751, 0.0816851, 0.161508],
    175          [0.24082, 0.397533, 0.0557226, 0.0546814, 0.0557528, 0.19549],
    176          [0.230246, 0.450868, 0.0389607, 0.038309, 0.0391602, 0.202456],
    177          [0.280884, 0.429522, 0.0326593, 0.0339046, 0.0326856, 0.190345],
    178          [0.423286, 0.315517, 0.0338439, 0.0393744, 0.0339315, 0.154046]],
    179         dtype=np.float32)
    180     input_log_prob_matrix_1 = np.log(input_prob_matrix_1)
    181     gradient_log_prob_1 = np.asarray(
    182         [[-0.69824, 0.28562, 0.0831517, 0.0862751, 0.0816851, 0.161508],
    183          [0.24082, -0.602467, 0.0557226, 0.0546814, 0.0557528, 0.19549],
    184          [0.230246, 0.450868, 0.0389607, 0.038309, 0.0391602, -0.797544],
    185          [0.280884, -0.570478, 0.0326593, 0.0339046, 0.0326856, 0.190345],
    186          [-0.576714, 0.315517, 0.0338439, 0.0393744, 0.0339315, 0.154046]],
    187         dtype=np.float32)
    188 
    189     # len max_time_steps array of 2 x depth matrices
    190     inputs = [
    191         np.vstack(
    192             [input_log_prob_matrix_0[t, :], input_log_prob_matrix_1[t, :]])
    193         for t in range(5)
    194     ] + 2 * [np.nan * np.ones((2, depth), np.float32)]
    195 
    196     # convert inputs into [max_time x batch_size x depth tensor] Tensor
    197     inputs = np.asarray(inputs, dtype=np.float32)
    198 
    199     # len batch_size array of label vectors
    200     labels = SimpleSparseTensorFrom([targets_0, targets_1])
    201 
    202     # batch_size length vector of sequence_lengths
    203     seq_lens = np.array([5, 5], dtype=np.int32)
    204 
    205     # output: batch_size length vector of negative log probabilities
    206     loss_truth = np.array([-loss_log_prob_0, -loss_log_prob_1], np.float32)
    207 
    208     # output: len max_time_steps array of 2 x depth matrices
    209     grad_truth = [
    210         np.vstack([gradient_log_prob_0[t, :], gradient_log_prob_1[t, :]])
    211         for t in range(5)
    212     ] + 2 * [np.zeros((2, depth), np.float32)]
    213 
    214     # convert grad_truth into [max_time x batch_size x depth] Tensor
    215     grad_truth = np.asarray(grad_truth, dtype=np.float32)
    216 
    217     self._testCTCLoss(inputs, seq_lens, labels, loss_truth, grad_truth)
    218 
    219   def test_time_major(self):
    220     """Testing time_major param.
    221 
    222 
    223     testing if transposing and setting time_major=False will result in the same
    224     loss
    225     """
    226     # [max_time x batch_size x depth tensor]
    227     inputs = np.random.randn(2, 2, 3).astype(np.float32)
    228     labels = SimpleSparseTensorFrom([[0, 1], [1, 0]])
    229     seq_lens = np.array([2, 2], dtype=np.int32)
    230 
    231     inputs_t = constant_op.constant(inputs)
    232 
    233     # Transposing tensor to [batch_size x max_time x depth tensor]
    234     inputs_t_transposed = constant_op.constant(inputs.transpose(1, 0, 2))
    235 
    236     with self.test_session(use_gpu=False) as sess:
    237       loss = ctc_ops.ctc_loss(
    238           inputs=inputs_t, labels=labels, sequence_length=seq_lens)
    239       loss_transposed = ctc_ops.ctc_loss(
    240           inputs=inputs_t_transposed,
    241           labels=labels,
    242           sequence_length=seq_lens,
    243           time_major=False)
    244 
    245       (tf_loss, tf_loss_transposed) = sess.run([loss, loss_transposed])
    246       self.assertAllEqual(tf_loss, tf_loss_transposed)
    247 
    248   def testInvalidSecondGradient(self):
    249     inputs = np.random.randn(2, 2, 3).astype(np.float32)
    250     inputs_t = constant_op.constant(inputs)
    251     labels = SimpleSparseTensorFrom([[0, 1], [1, 0]])
    252     seq_lens = np.array([2, 2], dtype=np.int32)
    253     v = [1.0]
    254 
    255     with self.test_session(use_gpu=False):
    256       loss = ctc_ops.ctc_loss(
    257           inputs=inputs_t, labels=labels, sequence_length=seq_lens)
    258       # Taking ths second gradient should fail, since it is not
    259       # yet supported.
    260       with self.assertRaisesRegexp(LookupError,
    261                                    "explicitly disabled"):
    262         _ = gradients_impl._hessian_vector_product(loss, [inputs_t], v)
    263 
    264   def testEmptyBatch(self):
    265     inputs = constant_op.constant([], dtype=dtypes.float32, shape=(1, 0, 2))
    266     sequence_lengths = constant_op.constant([], dtype=dtypes.int32)
    267     labels = sparse_tensor.SparseTensor(
    268         indices=constant_op.constant([], shape=(0, 2), dtype=dtypes.int64),
    269         values=constant_op.constant([], shape=(0,), dtype=dtypes.int32),
    270         dense_shape=[5, 5])
    271 
    272     with self.test_session(use_gpu=False) as sess:
    273       with self.assertRaisesRegexp(errors_impl.InvalidArgumentError,
    274                                    "batch_size must not be 0"):
    275         sess.run(ctc_ops.ctc_loss(labels, inputs, sequence_lengths))
    276 
    277 if __name__ == "__main__":
    278   test.main()
    279