Home | History | Annotate | Download | only in kernel_tests
      1 # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
      2 #
      3 # Licensed under the Apache License, Version 2.0 (the "License");
      4 # you may not use this file except in compliance with the License.
      5 # You may obtain a copy of the License at
      6 #
      7 #     http://www.apache.org/licenses/LICENSE-2.0
      8 #
      9 # Unless required by applicable law or agreed to in writing, software
     10 # distributed under the License is distributed on an "AS IS" BASIS,
     11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 # See the License for the specific language governing permissions and
     13 # limitations under the License.
     14 # ==============================================================================
     15 """Tests for Block GRU module."""
     16 
     17 from __future__ import absolute_import
     18 from __future__ import division
     19 from __future__ import print_function
     20 
     21 import numpy as np
     22 
     23 from tensorflow.contrib.rnn.python.kernel_tests import benchmarking
     24 from tensorflow.contrib.rnn.python.ops import gru_ops
     25 from tensorflow.python.client import session
     26 from tensorflow.python.framework import dtypes
     27 from tensorflow.python.framework import ops
     28 from tensorflow.python.ops import array_ops
     29 from tensorflow.python.ops import gradient_checker
     30 from tensorflow.python.ops import gradients_impl
     31 from tensorflow.python.ops import init_ops
     32 from tensorflow.python.ops import math_ops
     33 from tensorflow.python.ops import rnn
     34 from tensorflow.python.ops import rnn_cell
     35 from tensorflow.python.ops import variable_scope as vs
     36 from tensorflow.python.ops import variables
     37 from tensorflow.python.platform import test
     38 from tensorflow.python.training import gradient_descent
     39 
     40 
     41 class GRUBlockCellTest(test.TestCase):
     42 
     43   def testNoneDimsWithDynamicRNN(self):
     44     with self.test_session(use_gpu=True, graph=ops.Graph()) as sess:
     45       batch_size = 4
     46       cell_size = 5
     47       input_size = 6
     48       num_steps = 7
     49 
     50       cell = gru_ops.GRUBlockCell(cell_size)
     51 
     52       x = array_ops.placeholder(dtypes.float32, shape=(None, None, input_size))
     53       _, output = rnn.dynamic_rnn(
     54           cell, x, time_major=True, dtype=dtypes.float32)
     55       sess.run(variables.global_variables_initializer())
     56       feed = {}
     57       feed[x] = np.random.randn(num_steps, batch_size, input_size)
     58       sess.run(output, feed)
     59 
     60   def testBlockGRUToGRUCellSingleStep(self):
     61     with self.test_session(use_gpu=True, graph=ops.Graph()) as sess:
     62       batch_size = 4
     63       cell_size = 5
     64       input_size = 6
     65 
     66       seed = 1994
     67       initializer = init_ops.random_uniform_initializer(-0.01, 0.01, seed=seed)
     68 
     69       # Inputs
     70       x = array_ops.zeros([batch_size, input_size])
     71       h = array_ops.zeros([batch_size, cell_size])
     72 
     73       # Values for the inputs.
     74       x_value = np.random.rand(batch_size, input_size)
     75       h_value = np.random.rand(batch_size, cell_size)
     76 
     77       # Output from the basic GRU cell implementation.
     78       with vs.variable_scope("basic", initializer=initializer):
     79         output = rnn_cell.GRUCell(cell_size)(x, h)
     80         sess.run([variables.global_variables_initializer()])
     81         basic_res = sess.run([output], {x: x_value, h: h_value})
     82 
     83       # Output from the block GRU cell implementation.
     84       with vs.variable_scope("block", initializer=initializer):
     85         output = gru_ops.GRUBlockCell(cell_size)(x, h)
     86         sess.run([variables.global_variables_initializer()])
     87         block_res = sess.run([output], {x: x_value, h: h_value})
     88 
     89       self.assertEqual(len(block_res), len(basic_res))
     90       for block, basic in zip(block_res, basic_res):
     91         self.assertAllClose(block, basic)
     92 
     93   def testBlockGRUToGRUCellMultiStep(self):
     94     with self.test_session(use_gpu=True, graph=ops.Graph()) as sess:
     95       batch_size = 2
     96       cell_size = 3
     97       input_size = 3
     98       time_steps = 4
     99 
    100       # Random initializers.
    101       seed = 1994
    102       initializer = init_ops.random_uniform_initializer(-0.01, 0.01, seed=seed)
    103       np.random.seed(seed)
    104 
    105       # Inputs
    106       concat_x = array_ops.placeholder(
    107           dtypes.float32, shape=(time_steps, batch_size, input_size))
    108       h = array_ops.zeros([batch_size, cell_size])
    109 
    110       # Values for the inputs.
    111       x_values = np.random.rand(time_steps, batch_size, input_size)
    112       h_value = np.random.rand(batch_size, cell_size)
    113 
    114       # Output from the block GRU cell implementation.
    115       with vs.variable_scope("block", initializer=initializer):
    116         cell = gru_ops.GRUBlockCell(cell_size)
    117         outputs_dynamic, state_dynamic = rnn.dynamic_rnn(
    118             cell,
    119             inputs=concat_x,
    120             initial_state=h,
    121             time_major=True,
    122             dtype=dtypes.float32)
    123         feeds = {concat_x: x_values, h: h_value}
    124         sess.run([variables.global_variables_initializer()])
    125         block_res = sess.run([outputs_dynamic, state_dynamic], feeds)
    126 
    127       # Output from the basic GRU cell implementation.
    128       with vs.variable_scope("basic", initializer=initializer):
    129         cell = rnn_cell.GRUCell(cell_size)
    130         outputs_dynamic, state_dynamic = rnn.dynamic_rnn(
    131             cell,
    132             inputs=concat_x,
    133             initial_state=h,
    134             time_major=True,
    135             dtype=dtypes.float32)
    136         feeds = {concat_x: x_values, h: h_value}
    137         sess.run([variables.global_variables_initializer()])
    138         basic_res = sess.run([outputs_dynamic, state_dynamic], feeds)
    139 
    140       # Check the lengths of the outputs_dynamic, and states.
    141       self.assertEqual(len(block_res), len(basic_res))
    142       self.assertEqual(len(block_res[0]), len(basic_res[0]))
    143       self.assertEqual(len(block_res[1]), len(basic_res[1]))
    144 
    145       # Check the outputs_dynamic values.
    146       for block_output, basic_output in zip(block_res[0], basic_res[0]):
    147         self.assertAllClose(block_output, basic_output)
    148 
    149       # Check the state_dynamic value.
    150       self.assertAllClose(block_res[1], block_res[1])
    151 
    152   def testDerivativeOfBlockGRUToGRUCellSingleStep(self):
    153     with self.test_session(use_gpu=True, graph=ops.Graph()) as sess:
    154       batch_size = 2
    155       cell_size = 3
    156       input_size = 4
    157 
    158       seed = 1994
    159       initializer = init_ops.random_uniform_initializer(-0.01, 0.01, seed=seed)
    160       np.random.seed(seed)
    161 
    162       # Inputs
    163       x = array_ops.zeros([batch_size, input_size])
    164       h = array_ops.zeros([batch_size, cell_size])
    165 
    166       # Values for the inputs.
    167       x_value = np.random.rand(batch_size, input_size)
    168       h_value = np.random.rand(batch_size, cell_size)
    169 
    170       # Gradients from the block GRU cell implementation.
    171       with vs.variable_scope("block", initializer=initializer):
    172         output = gru_ops.GRUBlockCell(cell_size)(x, h)
    173         sess.run([variables.global_variables_initializer()])
    174 
    175         all_variables = variables.global_variables()[0:4]
    176         [w_ru, b_ru, w_c, b_c] = all_variables
    177 
    178         d_new_h_wrt_x = gradients_impl.gradients([output], x)
    179         d_new_h_wrt_h = gradients_impl.gradients([output], h)
    180         d_new_h_wrt_w_ru = gradients_impl.gradients([output], w_ru)
    181         d_new_h_wrt_w_c = gradients_impl.gradients([output], w_c)
    182         d_new_h_wrt_b_ru = gradients_impl.gradients([output], b_ru)
    183         d_new_h_wrt_b_c = gradients_impl.gradients([output], b_c)
    184 
    185         d_block_res = sess.run([
    186             d_new_h_wrt_x, d_new_h_wrt_h, d_new_h_wrt_w_ru, d_new_h_wrt_w_c,
    187             d_new_h_wrt_b_ru, d_new_h_wrt_b_c
    188         ], {x: x_value,
    189             h: h_value})
    190 
    191       # Gradients from the basic GRU cell implementation.
    192       with vs.variable_scope("basic", initializer=initializer):
    193         output = rnn_cell.GRUCell(cell_size)(x, h)
    194         sess.run([variables.global_variables_initializer()])
    195 
    196         all_variables = variables.global_variables()[4:8]
    197         [w_ru, b_ru, w_c, b_c] = all_variables
    198 
    199         d_new_h_wrt_x = gradients_impl.gradients([output], x)
    200         d_new_h_wrt_h = gradients_impl.gradients([output], h)
    201         d_new_h_wrt_w_ru = gradients_impl.gradients([output], w_ru)
    202         d_new_h_wrt_w_c = gradients_impl.gradients([output], w_c)
    203         d_new_h_wrt_b_ru = gradients_impl.gradients([output], b_ru)
    204         d_new_h_wrt_b_c = gradients_impl.gradients([output], b_c)
    205 
    206         d_basic_res = sess.run([
    207             d_new_h_wrt_x, d_new_h_wrt_h, d_new_h_wrt_w_ru, d_new_h_wrt_w_c,
    208             d_new_h_wrt_b_ru, d_new_h_wrt_b_c
    209         ], {x: x_value,
    210             h: h_value})
    211 
    212       # Check lengths of derivative results.
    213       self.assertEqual(len(d_block_res), len(d_basic_res))
    214       # Check the value of every derivative result.
    215       for block, basic in zip(d_block_res, d_basic_res):
    216         self.assertAllClose(block, basic)
    217 
    218   def testDerivativeOfBlockGRUToGRUCellMultiSteps(self):
    219     batch_size = 2
    220     cell_size = 3
    221     input_size = 4
    222     time_steps = 2
    223     with self.test_session(use_gpu=True, graph=ops.Graph()) as sess:
    224       # Random initializers.
    225       seed = 1994
    226       initializer = init_ops.random_uniform_initializer(-0.01, 0.01, seed=seed)
    227       np.random.seed(seed)
    228 
    229       # Inputs
    230       concat_x = array_ops.placeholder(
    231           dtypes.float32, shape=(time_steps, batch_size, input_size))
    232       h = array_ops.zeros([batch_size, cell_size])
    233 
    234       # Values for the inputs.
    235       x_values = np.random.rand(time_steps, batch_size, input_size)
    236       h_value = np.random.rand(batch_size, cell_size)
    237       feeds = {concat_x: x_values, h: h_value}
    238 
    239       # Gradients from the block GRU cell implementation.
    240       with vs.variable_scope("block", initializer=initializer):
    241         cell = gru_ops.GRUBlockCell(cell_size)
    242 
    243         outputs_dynamic, _ = rnn.dynamic_rnn(
    244             cell,
    245             inputs=concat_x,
    246             initial_state=h,
    247             time_major=True,
    248             dtype=dtypes.float32)
    249         grad_output_wrt_x = gradients_impl.gradients([outputs_dynamic[0]],
    250                                                      concat_x)
    251         grad_output_wrt_h = gradients_impl.gradients([outputs_dynamic[0]], h)
    252 
    253         sess.run([variables.global_variables_initializer()])
    254         block_grad_res_x, block_grad_res_h = sess.run(
    255             [grad_output_wrt_x, grad_output_wrt_h], feeds)
    256 
    257       # Gradients from the basic GRU cell implementation.
    258       with vs.variable_scope("basic", initializer=initializer):
    259         cell = rnn_cell.GRUCell(cell_size)
    260 
    261         outputs_dynamic, _ = rnn.dynamic_rnn(
    262             cell,
    263             inputs=concat_x,
    264             initial_state=h,
    265             time_major=True,
    266             dtype=dtypes.float32)
    267         grad_output_wrt_x = gradients_impl.gradients([outputs_dynamic[0]],
    268                                                      concat_x)
    269         grad_output_wrt_h = gradients_impl.gradients([outputs_dynamic[0]], h)
    270 
    271         sess.run([variables.global_variables_initializer()])
    272         basic_grad_res_x, basic_grad_res_h = sess.run(
    273             [grad_output_wrt_x, grad_output_wrt_h], feeds)
    274 
    275     # Check derivatives values of the outputs wrt to x.
    276     self.assertEqual(len(block_grad_res_x), len(basic_grad_res_x))
    277 
    278     # Check derivatives values of the outputs wrt to h.
    279     for block, basic in zip(block_grad_res_x, basic_grad_res_x):
    280       self.assertAllClose(block, basic)
    281 
    282     # Check derivatives values of the outputs wrt to x.
    283     self.assertEqual(len(block_grad_res_h), len(basic_grad_res_h))
    284 
    285     # Check derivatives values of the outputs wrt to h.
    286     for block, basic in zip(block_grad_res_h, basic_grad_res_h):
    287       self.assertAllClose(block, basic)
    288 
    289   def testGradient(self):
    290     with self.test_session(use_gpu=True, graph=ops.Graph()) as sess:
    291       batch_size = 1
    292       cell_size = 3
    293       input_size = 2
    294 
    295       # Inputs
    296       x = array_ops.zeros([batch_size, input_size])
    297       h = array_ops.zeros([batch_size, cell_size])
    298       output = gru_ops.GRUBlockCell(cell_size)(x, h)
    299 
    300       sess.run([variables.global_variables_initializer()])
    301 
    302       all_variables = variables.global_variables()
    303 
    304       [w_ru, b_ru, w_c, b_c] = all_variables[:4]
    305 
    306       error_x = gradient_checker.compute_gradient_error(
    307           x, (batch_size, input_size), output[0], (batch_size, cell_size))
    308       error_h = gradient_checker.compute_gradient_error(h,
    309                                                         (batch_size, cell_size),
    310                                                         output[0],
    311                                                         (batch_size, cell_size))
    312       error_w_ru = gradient_checker.compute_gradient_error(
    313           w_ru, (input_size + cell_size, 2 * cell_size), output[0],
    314           (batch_size, cell_size))
    315       error_w_c = gradient_checker.compute_gradient_error(
    316           w_c, (input_size + cell_size, cell_size), output[0],
    317           (batch_size, cell_size))
    318       error_b_ru = gradient_checker.compute_gradient_error(
    319           b_ru, (2 * cell_size,), output[0], (batch_size, cell_size))
    320       error_b_c = gradient_checker.compute_gradient_error(
    321           b_c, (cell_size,), output[0], (batch_size, cell_size))
    322 
    323     eps = 1e-4
    324     self.assertLess(error_x, eps)
    325     self.assertLess(error_h, eps)
    326     self.assertLess(error_w_ru, eps)
    327     self.assertLess(error_w_c, eps)
    328     self.assertLess(error_b_ru, eps)
    329     self.assertLess(error_b_c, eps)
    330 
    331 
    332 #### Benchmarking GRUBlockCell vs GRUCell.
    333 
    334 
    335 def training_gru_block_vs_gru_cell(batch_size,
    336                                    cell_size,
    337                                    input_size,
    338                                    time_steps,
    339                                    use_gpu=False,
    340                                    iters=30):
    341   """Benchmark training speed between GRUBlockCell vs GRUCell."""
    342   ops.reset_default_graph()
    343   with session.Session(graph=ops.Graph()) as sess:
    344     # Specify the device which is been used.
    345     with benchmarking.device(use_gpu):
    346 
    347       # Random initializers.
    348       seed = 1994
    349       initializer = init_ops.random_uniform_initializer(-1, 1, seed=seed)
    350       np.random.seed(seed)
    351 
    352       # Inputs
    353       concat_x = vs.get_variable("concat_x",
    354                                  [time_steps, batch_size, input_size])
    355       h = vs.get_variable("h", [batch_size, cell_size])
    356       y = vs.get_variable("y", [time_steps, batch_size, cell_size])
    357 
    358       # Output from the basic GRU cell implementation.
    359       with vs.variable_scope("basic", initializer=initializer):
    360         cell = rnn_cell.GRUCell(cell_size)
    361 
    362         outputs_dynamic, _ = rnn.dynamic_rnn(
    363             cell,
    364             inputs=concat_x,
    365             initial_state=h,
    366             time_major=True,
    367             dtype=dtypes.float32)
    368         sess.run([variables.global_variables_initializer()])
    369         cost = math_ops.reduce_mean(math_ops.square(outputs_dynamic - y))
    370         learning_rate = 0.01
    371         optimizer = gradient_descent.GradientDescentOptimizer(
    372             learning_rate).minimize(cost)
    373 
    374         # time for a training step.
    375         basic_time_training = benchmarking.seconds_per_run(
    376             optimizer, sess, iters)
    377 
    378       # Output from the basic GRU cell implementation.
    379       with vs.variable_scope("block", initializer=initializer):
    380         cell = gru_ops.GRUBlockCell(cell_size)
    381 
    382         outputs_dynamic, _ = rnn.dynamic_rnn(
    383             cell,
    384             inputs=concat_x,
    385             initial_state=h,
    386             time_major=True,
    387             dtype=dtypes.float32)
    388         sess.run([variables.global_variables_initializer()])
    389         cost = math_ops.reduce_mean(math_ops.square(outputs_dynamic - y))
    390         learning_rate = 0.01
    391         optimizer = gradient_descent.GradientDescentOptimizer(
    392             learning_rate).minimize(cost)
    393 
    394         # time for a training step.
    395         block_time_training = benchmarking.seconds_per_run(
    396             optimizer, sess, iters)
    397 
    398     performance_training = (
    399         basic_time_training - block_time_training) * 100 / basic_time_training
    400 
    401     print(",".join([
    402         str(batch_size), str(cell_size), str(input_size), str(time_steps), str(
    403             use_gpu), str(basic_time_training), str(block_time_training), str(
    404                 performance_training)
    405     ]))
    406 
    407     return basic_time_training, block_time_training
    408 
    409 
    410 def inference_gru_block_vs_gru_cell(batch_size,
    411                                     cell_size,
    412                                     input_size,
    413                                     time_steps,
    414                                     use_gpu=False,
    415                                     iters=30):
    416   """Benchmark inference speed between GRUBlockCell vs GRUCell."""
    417   ops.reset_default_graph()
    418   with session.Session(graph=ops.Graph()) as sess:
    419     with benchmarking.device(use_gpu):
    420 
    421       # Random initializers.
    422       seed = 1994
    423       initializer = init_ops.random_uniform_initializer(-1, 1, seed=seed)
    424       np.random.seed(seed)
    425 
    426       # Inputs
    427       concat_x = vs.get_variable("concat_x",
    428                                  [time_steps, batch_size, input_size])
    429       h = vs.get_variable("h", [batch_size, cell_size])
    430 
    431       # Output from the basic GRU cell implementation.
    432       with vs.variable_scope("basic", initializer=initializer):
    433         cell = rnn_cell.GRUCell(cell_size)
    434         outputs_dynamic, _ = rnn.dynamic_rnn(
    435             cell,
    436             inputs=concat_x,
    437             initial_state=h,
    438             time_major=True,
    439             dtype=dtypes.float32)
    440         sess.run([variables.global_variables_initializer()])
    441         basic_time_inference = benchmarking.seconds_per_run(
    442             outputs_dynamic, sess, iters)
    443 
    444       # Output from the block GRU cell implementation.
    445       with vs.variable_scope("block", initializer=initializer):
    446         cell = gru_ops.GRUBlockCell(cell_size)
    447         outputs_dynamic, _ = rnn.dynamic_rnn(
    448             cell,
    449             inputs=concat_x,
    450             initial_state=h,
    451             time_major=True,
    452             dtype=dtypes.float32)
    453         sess.run([variables.global_variables_initializer()])
    454         block_time_inference = benchmarking.seconds_per_run(
    455             outputs_dynamic, sess, iters)
    456 
    457     performance_inference = (basic_time_inference - block_time_inference
    458                             ) * 100 / basic_time_inference
    459     print(",".join([
    460         str(batch_size), str(cell_size), str(input_size), str(time_steps), str(
    461             use_gpu), str(basic_time_inference), str(block_time_inference), str(
    462                 performance_inference)
    463     ]))
    464 
    465     return basic_time_inference, block_time_inference
    466 
    467 
    468 def single_bprop_step_gru_block_vs_gru_cell(batch_size,
    469                                             cell_size,
    470                                             input_size,
    471                                             use_gpu=False,
    472                                             iters=30):
    473   """Benchmark single bprop step speed between GRUBlockCell vs GRUCell."""
    474   ops.reset_default_graph()
    475   with session.Session(graph=ops.Graph()) as sess:
    476     with benchmarking.device(use_gpu):
    477       initializer = init_ops.random_uniform_initializer(-1, 1, seed=1989)
    478       # Inputs
    479       x = vs.get_variable("x", [batch_size, input_size])
    480       h = vs.get_variable("h", [batch_size, cell_size])
    481 
    482       # Output from the basic GRU cell implementation.
    483       with vs.variable_scope("basic", initializer=initializer):
    484         output = rnn_cell.GRUCell(cell_size)(array_ops.identity(x),
    485                                              array_ops.identity(h))
    486         sess.run([variables.global_variables_initializer()])
    487         grad_output_wrt_input = gradients_impl.gradients([output], h)
    488         basic_time_bprop = benchmarking.seconds_per_run(grad_output_wrt_input,
    489                                                         sess, iters)
    490 
    491       # Output from the block GRU cell implementation.
    492       with vs.variable_scope("block", initializer=initializer):
    493         output = gru_ops.GRUBlockCell(cell_size)(array_ops.identity(x),
    494                                                  array_ops.identity(h))
    495         sess.run([variables.global_variables_initializer()])
    496         grad_output_wrt_input = gradients_impl.gradients([output], h)
    497         block_time_bprop = benchmarking.seconds_per_run(grad_output_wrt_input,
    498                                                         sess, iters)
    499 
    500   performance_inference = (
    501       basic_time_bprop - block_time_bprop) * 100 / basic_time_bprop
    502 
    503   print(",".join([
    504       str(batch_size), str(cell_size), str(input_size), str(use_gpu), str(
    505           basic_time_bprop), str(block_time_bprop), str(performance_inference)
    506   ]))
    507 
    508   return basic_time_bprop, block_time_bprop
    509 
    510 
    511 class BenchmarkGRUBlock(test.Benchmark):
    512 
    513   def benchmarkTrainingBlockGRUVsGRUCell(self):
    514     print("Comparison GRUBlockCell vs GRUCell")
    515     print("--------------------------------------------------------------")
    516     print("Training speed GRUBlockCell vs GRUCell")
    517     print("batch_size, cell_size, input_size, time_steps, GPU, "
    518           "basic_time_training, block_time_training, performance_training[%]")
    519     iters = 10
    520 
    521     for config in benchmarking.dict_product({
    522         "use_gpu": [True, False],
    523         "batch_size": [1, 32, 128],
    524         "cell_size": [128, 512],
    525         "input_size": [128, 512],
    526         "time_steps": [50]
    527     }):
    528       basic_time, block_time = training_gru_block_vs_gru_cell(
    529           config["batch_size"], config["cell_size"], config["input_size"],
    530           config["time_steps"], config["use_gpu"], iters)
    531       self.report_benchmark(
    532           name="GRUCell_training_time_BS%i_CS%i_IS%i_TS%i_gpu_%s" %
    533           (config["batch_size"], config["cell_size"], config["input_size"],
    534            config["time_steps"], config["use_gpu"]),
    535           iters=iters,
    536           wall_time=basic_time)
    537       self.report_benchmark(
    538           name="GRUBlockCell_training_time_BS%i_CS%i_IS%i_TS%i_gpu_%s" %
    539           (config["batch_size"], config["cell_size"], config["input_size"],
    540            config["time_steps"], config["use_gpu"]),
    541           iters=iters,
    542           wall_time=block_time)
    543 
    544   def benchmarkInferenceBlockGRUVsGRUCell(self):
    545     print("--------------------------------------------------------------")
    546     print("Inference speed GRUBlockCell vs GRUCell")
    547     print(
    548         "batch_size, cell_size, input_size, time_steps, GPU, "
    549         "basic_time_inference, block_time_inference, performance_inference[%]")
    550     iters = 10
    551     for config in benchmarking.dict_product({
    552         "use_gpu": [True, False],
    553         "batch_size": [1, 32, 128],
    554         "cell_size": [128, 512],
    555         "input_size": [128, 512],
    556         "time_steps": [50]
    557     }):
    558       basic_time, block_time = inference_gru_block_vs_gru_cell(
    559           config["batch_size"], config["cell_size"], config["input_size"],
    560           config["time_steps"], config["use_gpu"], iters)
    561       self.report_benchmark(
    562           name="GRUCell_inference_time_BS%i_CS%i_IS%i_TS%i_gpu_%s" %
    563           (config["batch_size"], config["cell_size"], config["input_size"],
    564            config["time_steps"], config["use_gpu"]),
    565           iters=iters,
    566           wall_time=basic_time)
    567       self.report_benchmark(
    568           name="GRUBlockCell_inference_time_BS%i_CS%i_IS%i_TS%i_gpu_%s" %
    569           (config["batch_size"], config["cell_size"], config["input_size"],
    570            config["time_steps"], config["use_gpu"]),
    571           iters=iters,
    572           wall_time=block_time)
    573 
    574   def benchmarkSingleBpropStepBlockGRUVsGRUCell(self):
    575     print("--------------------------------------------------------------")
    576     print("Single bprop step speed GRUBlockCell vs GRUCell")
    577     print("batch_size, cell_size, input_size, GPU, basic_time, "
    578           "block_time, performance_inference[%]")
    579     iters = 10
    580     for config in benchmarking.dict_product({
    581         "use_gpu": [True, False],
    582         "batch_size": [1, 32, 128],
    583         "cell_size": [128, 512],
    584         "input_size": [128, 512]
    585     }):
    586       basic_time, block_time = single_bprop_step_gru_block_vs_gru_cell(
    587           config["batch_size"], config["cell_size"], config["input_size"],
    588           config["use_gpu"], iters)
    589       self.report_benchmark(
    590           name="GRUCell_Bprop_single_step_time_BS%i_CS%i_IS%i_gpu_%s" %
    591           (config["batch_size"], config["cell_size"], config["input_size"],
    592            config["use_gpu"]),
    593           iters=iters,
    594           wall_time=basic_time)
    595       self.report_benchmark(
    596           name="GRUBlockCell_Bprop_single_step_time_BS%i_CS%i_IS%i_gpu_%s" %
    597           (config["batch_size"], config["cell_size"], config["input_size"],
    598            config["use_gpu"]),
    599           iters=iters,
    600           wall_time=block_time)
    601 
    602     print("--------------------------------------------------------------")
    603 
    604 
    605 if __name__ == "__main__":
    606   test.main()
    607