Home | History | Annotate | Download | only in grappler
      1 # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
      2 #
      3 # Licensed under the Apache License, Version 2.0 (the "License");
      4 # you may not use this file except in compliance with the License.
      5 # You may obtain a copy of the License at
      6 #
      7 #     http://www.apache.org/licenses/LICENSE-2.0
      8 #
      9 # Unless required by applicable law or agreed to in writing, software
     10 # distributed under the License is distributed on an "AS IS" BASIS,
     11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 # See the License for the specific language governing permissions and
     13 # limitations under the License.
     14 # ==============================================================================
     15 """Tests for Grappler LayoutOptimizer."""
     16 
     17 from __future__ import absolute_import
     18 from __future__ import division
     19 from __future__ import print_function
     20 
     21 import numpy as np
     22 
     23 from tensorflow.core.protobuf import config_pb2
     24 from tensorflow.core.protobuf import device_properties_pb2
     25 from tensorflow.core.protobuf import rewriter_config_pb2
     26 from tensorflow.core.protobuf import saver_pb2
     27 from tensorflow.python.client import session
     28 from tensorflow.python.framework import constant_op
     29 from tensorflow.python.framework import dtypes
     30 from tensorflow.python.framework import ops
     31 from tensorflow.python.framework import random_seed
     32 from tensorflow.python.grappler import cluster as gcluster
     33 from tensorflow.python.grappler import tf_optimizer
     34 from tensorflow.python.layers import convolutional as conv_layers
     35 from tensorflow.python.ops import array_ops
     36 from tensorflow.python.ops import functional_ops
     37 from tensorflow.python.ops import gen_array_ops
     38 from tensorflow.python.ops import gen_math_ops
     39 from tensorflow.python.ops import gen_nn_ops
     40 from tensorflow.python.ops import math_ops
     41 from tensorflow.python.ops import nn
     42 from tensorflow.python.ops import random_ops
     43 from tensorflow.python.ops import state_ops
     44 from tensorflow.python.ops import variables
     45 from tensorflow.python.platform import test
     46 from tensorflow.python.training import gradient_descent
     47 from tensorflow.python.training import saver as saver_lib
     48 
     49 
     50 def _weight(shape):
     51   """Generates a weight of a given shape."""
     52   return random_ops.truncated_normal(shape, seed=0, stddev=0.1)
     53 
     54 
     55 def _bias(shape):
     56   """Generates a bias of a given shape."""
     57   return constant_op.constant(0.1, shape=shape)
     58 
     59 
     60 def _conv2d(x, w):
     61   """Returns a 2d convolution layer with full stride."""
     62   return nn.conv2d(x, w, strides=[1, 1, 1, 1], padding='SAME')
     63 
     64 
     65 def _max_pool_2x2(x):
     66   """Downsamples a feature map by 2X."""
     67   return nn.max_pool(
     68       x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
     69 
     70 
     71 # Taken from tensorflow/examples/tutorials/mnist/mnist_deep.py
     72 def _two_layer_model(x):
     73   x_image = array_ops.reshape(x, [-1, 28, 28, 1])
     74   w_conv1 = _weight([5, 5, 1, 32])
     75   b_conv1 = _bias([32])
     76   h_conv1 = nn.relu(_conv2d(x_image, w_conv1) + b_conv1)
     77   h_pool1 = _max_pool_2x2(h_conv1)
     78   w_conv2 = _weight([5, 5, 32, 64])
     79   b_conv2 = _bias([64])
     80   h_conv2 = nn.relu(_conv2d(h_pool1, w_conv2) + b_conv2)
     81   h_pool2 = _max_pool_2x2(h_conv2)
     82   return h_pool2
     83 
     84 
     85 def _model_with_second_port():
     86   random_seed.set_random_seed(0)
     87   x = random_ops.truncated_normal([2, 5, 5, 4], seed=0)
     88   scale = constant_op.constant(0.1, shape=[4])
     89   offset = constant_op.constant(0.3, shape=[4])
     90   y, mean, _ = nn.fused_batch_norm(x, scale, offset)
     91   mul = math_ops.add(y, mean)
     92   output = array_ops.identity(mul)
     93   return output
     94 
     95 
     96 def _model_with_branch(x):
     97   x_image = array_ops.reshape(x, [-1, 28, 28, 1])
     98   w_conv1 = _weight([5, 5, 1, 32])
     99   w_conv2 = _weight([5, 5, 1, 32])
    100   c_conv1 = _conv2d(x_image, w_conv1)
    101   c_conv2 = _conv2d(x_image, w_conv2)
    102   add = math_ops.add(c_conv1, c_conv2)
    103   return add
    104 
    105 
    106 def _model_with_vec_and_4d(x):
    107   x_image = array_ops.reshape(x, [-1, 28, 28, 1])
    108   w_conv1 = _weight([5, 5, 1, 32])
    109   c_conv1 = _conv2d(x_image, w_conv1)
    110   vector = constant_op.constant(6.4, shape=[32])
    111   add = math_ops.add(c_conv1, vector)
    112   return add
    113 
    114 
    115 def _loop():
    116   random_seed.set_random_seed(0)
    117   x1 = random_ops.truncated_normal([1, 784], seed=0)
    118   x2 = random_ops.truncated_normal([1, 784], seed=0)
    119   x3 = random_ops.truncated_normal([1, 784], seed=0)
    120   x4 = random_ops.truncated_normal([1, 784], seed=0)
    121   elems = (x1, x2, x3, x4)
    122   outputs = functional_ops.map_fn(_two_layer_model, elems, dtype=dtypes.float32)
    123   return outputs
    124 
    125 
    126 def _loop_with_branch():
    127   random_seed.set_random_seed(0)
    128   x1 = random_ops.truncated_normal([1, 784], seed=0)
    129   x2 = random_ops.truncated_normal([1, 784], seed=0)
    130   x3 = random_ops.truncated_normal([1, 784], seed=0)
    131   x4 = random_ops.truncated_normal([1, 784], seed=0)
    132   elems = (x1, x2, x3, x4)
    133   outputs = functional_ops.map_fn(
    134       _model_with_branch, elems, dtype=dtypes.float32)
    135   return outputs
    136 
    137 
    138 def _loop_with_vec_and_4d():
    139   random_seed.set_random_seed(0)
    140   x1 = random_ops.truncated_normal([1, 784], seed=0)
    141   x2 = random_ops.truncated_normal([1, 784], seed=0)
    142   x3 = random_ops.truncated_normal([1, 784], seed=0)
    143   x4 = random_ops.truncated_normal([1, 784], seed=0)
    144   elems = (x1, x2, x3, x4)
    145   outputs = functional_ops.map_fn(
    146       _model_with_vec_and_4d, elems, dtype=dtypes.float32)
    147   return outputs
    148 
    149 
    150 def _get_config(layout_optimizer=True):
    151   if layout_optimizer:
    152     rewrite_options = rewriter_config_pb2.RewriterConfig(
    153         layout_optimizer=rewriter_config_pb2.RewriterConfig.ON)
    154   else:
    155     rewrite_options = rewriter_config_pb2.RewriterConfig(
    156         layout_optimizer=rewriter_config_pb2.RewriterConfig.OFF)
    157   graph_options = config_pb2.GraphOptions(
    158       rewrite_options=rewrite_options, build_cost_model=1)
    159   config = config_pb2.ConfigProto(graph_options=graph_options)
    160   config.graph_options.optimizer_options.opt_level = -1
    161   return config
    162 
    163 
    164 def _simple_metagraph(depthwise=False):
    165   random_seed.set_random_seed(0)
    166   x = variables.Variable(random_ops.truncated_normal([1, 200, 200, 3], seed=0))
    167   conv = conv_layers.separable_conv2d if depthwise else conv_layers.conv2d
    168   y = conv(x, 32, [3, 3])
    169   z = conv(y, 32, [3, 3])
    170   optimizer = gradient_descent.GradientDescentOptimizer(1e-4)
    171   loss = math_ops.reduce_mean(z)
    172   train_op = optimizer.minimize(loss)
    173   graph = ops.get_default_graph()
    174   graph.add_to_collection('train_op', train_op)
    175   meta_graph = saver_lib.export_meta_graph(graph_def=graph.as_graph_def())
    176   return meta_graph
    177 
    178 
    179 def _get_cluster():
    180   named_device = device_properties_pb2.NamedDevice()
    181   named_device.name = '/GPU:0'
    182   named_device.properties.type = 'GPU'
    183   named_device.properties.num_cores = 24
    184   named_device.properties.frequency = 1000
    185   named_device.properties.environment['architecture'] = '4'
    186   cluster = gcluster.Cluster(devices=[named_device])
    187   return cluster
    188 
    189 
    190 def _is_transpose(node):
    191   return node.endswith('TransposeNHWCToNCHW-LayoutOptimizer') or node.endswith(
    192       'TransposeNCHWToNHWC-LayoutOptimizer')
    193 
    194 
    195 def _is_permute(node):
    196   return node.endswith('VecPermuteNHWCToNCHW-LayoutOptimizer') or node.endswith(
    197       'VecPermuteNCHWToNHWC-LayoutOptimizer')
    198 
    199 
    200 class LayoutOptimizerTest(test.TestCase):
    201   """Tests the Grappler layout optimizer."""
    202 
    203   def _assert_trans_nchw_to_nhwc(self, name, nodes):
    204     self.assertIn(name + '-TransposeNCHWToNHWC-LayoutOptimizer', nodes)
    205 
    206   def _assert_trans_nhwc_to_nchw(self, name, nodes):
    207     self.assertIn(name + '-TransposeNHWCToNCHW-LayoutOptimizer', nodes)
    208 
    209   def _assert_map_nhwc_to_nchw(self, name, nodes):
    210     self.assertIn(name + '-DimMapNHWCToNCHW-LayoutOptimizer', nodes)
    211 
    212   def _assert_vec_nchw_to_nhwc(self, name, nodes):
    213     self.assertIn(name + '-VecPermuteNCHWToNHWC-LayoutOptimizer', nodes)
    214 
    215   def _assert_vec_nhwc_to_nchw(self, name, nodes):
    216     self.assertIn(name + '-VecPermuteNHWCToNCHW-LayoutOptimizer', nodes)
    217 
    218   def _train(self, checkpoint_path, layout_optimizer=False, restore=False):
    219     ops.reset_default_graph()
    220     graph = ops.get_default_graph()
    221     with session.Session(
    222         config=_get_config(layout_optimizer), graph=graph) as sess:
    223       batch = 2
    224       height = 6
    225       width = 7
    226       input_channels = 3
    227       shape = [batch, height, width, input_channels]
    228       image = array_ops.placeholder(dtype='float32', shape=shape)
    229       conv1 = conv_layers.conv2d(image, 32, [3, 3])
    230       conv2 = conv_layers.conv2d(conv1, 32, [3, 3])
    231       optimizer = gradient_descent.GradientDescentOptimizer(0.01)
    232       loss = math_ops.reduce_mean(conv2)
    233       train_op = optimizer.minimize(loss)
    234       saver = saver_lib.Saver(write_version=saver_pb2.SaverDef.V2)
    235 
    236       if restore:
    237         saver.restore(sess, checkpoint_path)
    238       else:
    239         sess.run(variables.global_variables_initializer())
    240 
    241       np.random.seed(0)
    242       for _ in range(2):
    243         image_val = np.random.rand(*shape).astype(np.float32)
    244         sess.run([loss, train_op], feed_dict={image: image_val})
    245 
    246       if restore:
    247         all_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
    248         all_vars_values = [var.eval(session=sess) for var in all_vars]
    249         return all_vars_values
    250       else:
    251         saver.save(sess, checkpoint_path)
    252 
    253   def testTwoConvLayers(self):
    254     if test.is_gpu_available(cuda_only=True):
    255       random_seed.set_random_seed(0)
    256       x = random_ops.truncated_normal([1, 784], seed=0)
    257       output = _two_layer_model(x)
    258 
    259       with session.Session(config=_get_config(False)) as sess:
    260         output_val_ref = sess.run(output)
    261 
    262       with session.Session(config=_get_config()) as sess:
    263         metadata = config_pb2.RunMetadata()
    264         output_val = sess.run(output, run_metadata=metadata)
    265 
    266       nodes = []
    267       num_transposes = 0
    268       for node in metadata.cost_graph.node:
    269         if _is_transpose(node.name):
    270           num_transposes += 1
    271         nodes.append(node.name)
    272 
    273       # Four transposes were initially added in the Expand phase of
    274       # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
    275       expected_num_transposes = 2
    276       self.assertEqual(expected_num_transposes, num_transposes)
    277       self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
    278       self._assert_trans_nchw_to_nhwc('Relu_1-0-0', nodes)
    279 
    280       self.assertAllClose(output_val_ref, output_val, atol=1e-3)
    281 
    282   def testSplitWithNonConstAxis(self):
    283     if test.is_gpu_available(cuda_only=True):
    284       random_seed.set_random_seed(0)
    285       x = random_ops.truncated_normal([1, 784], seed=0)
    286       conv = _two_layer_model(x)
    287       dim = array_ops.placeholder(dtype='int32')
    288       split = array_ops.split(conv, 2, axis=dim)
    289       scale = constant_op.constant(0.1, shape=[32])
    290       offset = constant_op.constant(0.3, shape=[32])
    291       bn0 = nn.fused_batch_norm(split[0], scale, offset)
    292       bn1 = nn.fused_batch_norm(split[1], scale, offset)
    293       add = bn0[0] + bn1[0]
    294       output = array_ops.identity(add)
    295 
    296       with session.Session(config=_get_config(False)) as sess:
    297         output_val_ref = sess.run(output, feed_dict={dim: 3})
    298 
    299       with session.Session(config=_get_config()) as sess:
    300         metadata = config_pb2.RunMetadata()
    301         output_val = sess.run(output, run_metadata=metadata, feed_dict={dim: 3})
    302 
    303       nodes = []
    304       num_transposes = 0
    305       for node in metadata.cost_graph.node:
    306         if _is_transpose(node.name):
    307           num_transposes += 1
    308         nodes.append(node.name)
    309 
    310       expected_num_transposes = 2
    311       self.assertEqual(expected_num_transposes, num_transposes)
    312       self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
    313       self._assert_trans_nchw_to_nhwc('add_2-0-0', nodes)
    314       self._assert_map_nhwc_to_nchw('split-0', nodes)
    315       self.assertAllClose(output_val_ref, output_val, atol=1e-3)
    316 
    317   def testSplitVWithNonConstAxis(self):
    318     if test.is_gpu_available(cuda_only=True):
    319       random_seed.set_random_seed(0)
    320       x = random_ops.truncated_normal([1, 784], seed=0)
    321       conv = _two_layer_model(x)
    322       dim = array_ops.placeholder(dtype='int32')
    323       sizes = constant_op.constant([50, 10, 4], shape=[3])
    324       split = gen_array_ops._split_v(
    325           value=conv, size_splits=sizes, axis=dim, num_split=3)
    326       output = math_ops.reduce_sum(split[0])
    327 
    328       with session.Session(config=_get_config(False)) as sess:
    329         output_val_ref = sess.run(output, feed_dict={dim: 3})
    330 
    331       with session.Session(config=_get_config()) as sess:
    332         metadata = config_pb2.RunMetadata()
    333         output_val = sess.run(output, run_metadata=metadata, feed_dict={dim: 3})
    334 
    335       nodes = []
    336       num_transposes = 0
    337       for node in metadata.cost_graph.node:
    338         if _is_transpose(node.name):
    339           num_transposes += 1
    340         nodes.append(node.name)
    341 
    342       # Four transposes were initially added in the Expand phase of
    343       # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
    344       expected_num_transposes = 2
    345       self.assertEqual(expected_num_transposes, num_transposes)
    346       self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
    347       self._assert_trans_nchw_to_nhwc('SplitV-0-0', nodes)
    348       self._assert_map_nhwc_to_nchw('SplitV-2', nodes)
    349       self.assertAllClose(output_val_ref, output_val, atol=1e-3)
    350 
    351   def testPadWithConstPaddings(self):
    352     if test.is_gpu_available(cuda_only=True):
    353       random_seed.set_random_seed(0)
    354       x = random_ops.truncated_normal([1, 784], seed=0)
    355       conv = _two_layer_model(x)
    356       paddings_val = [[1, 2], [3, 4], [5, 6], [7, 8]]
    357       paddings = constant_op.constant(
    358           paddings_val, dtype='int32', name='PaddingsConst')
    359       pad = array_ops.pad(conv, paddings)
    360       output = array_ops.identity(pad)
    361 
    362       with session.Session(config=_get_config(False)) as sess:
    363         output_val_ref = sess.run(output)
    364 
    365       with session.Session(config=_get_config()) as sess:
    366         metadata = config_pb2.RunMetadata()
    367         output_val = sess.run(output, run_metadata=metadata)
    368 
    369       nodes = []
    370       num_transposes = 0
    371       for node in metadata.cost_graph.node:
    372         if _is_transpose(node.name):
    373           num_transposes += 1
    374         nodes.append(node.name)
    375 
    376       # Four transposes were initially added in the Expand phase of
    377       # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
    378       expected_num_transposes = 2
    379       self.assertEqual(expected_num_transposes, num_transposes)
    380       self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
    381       self._assert_trans_nchw_to_nhwc('Pad-0-0', nodes)
    382       self.assertIn('Pad-1-LayoutOptimizer', nodes)
    383       self.assertAllClose(output_val_ref, output_val, atol=1e-3)
    384 
    385   def testReduceSum(self):
    386     if test.is_gpu_available(cuda_only=True):
    387       random_seed.set_random_seed(0)
    388       x = random_ops.truncated_normal([1, 784], seed=0)
    389       conv = _two_layer_model(x)
    390       reduce_sum = math_ops.reduce_sum(conv)
    391       output = array_ops.identity(reduce_sum)
    392 
    393       with session.Session(config=_get_config(False)) as sess:
    394         output_val_ref = sess.run(output)
    395 
    396       with session.Session(config=_get_config()) as sess:
    397         metadata = config_pb2.RunMetadata()
    398         output_val = sess.run(output, run_metadata=metadata)
    399 
    400       nodes = []
    401       num_transposes = 0
    402       for node in metadata.cost_graph.node:
    403         if _is_transpose(node.name):
    404           num_transposes += 1
    405         nodes.append(node.name)
    406 
    407       # Three transposes were initially added in the Expand phase of
    408       # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
    409       expected_num_transposes = 1
    410       self.assertEqual(expected_num_transposes, num_transposes)
    411       self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
    412       self.assertAllClose(output_val_ref, output_val, atol=1e-3)
    413 
    414   def testCast(self):
    415     if test.is_gpu_available(cuda_only=True):
    416       random_seed.set_random_seed(0)
    417       x = random_ops.truncated_normal([1, 784], seed=0)
    418       conv = _two_layer_model(x)
    419       cast = math_ops.cast(conv, dtype='bool')
    420       output = array_ops.identity(cast)
    421 
    422       with session.Session(config=_get_config(False)) as sess:
    423         output_val_ref = sess.run(output)
    424 
    425       with session.Session(config=_get_config()) as sess:
    426         metadata = config_pb2.RunMetadata()
    427         output_val = sess.run(output, run_metadata=metadata)
    428 
    429       nodes = []
    430       num_transposes = 0
    431       for node in metadata.cost_graph.node:
    432         if _is_transpose(node.name):
    433           num_transposes += 1
    434         nodes.append(node.name)
    435 
    436       # Four transposes were initially added in the Expand phase of
    437       # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
    438       expected_num_transposes = 2
    439       self.assertEqual(expected_num_transposes, num_transposes)
    440       self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
    441       self._assert_trans_nchw_to_nhwc('Cast-0-0', nodes)
    442       self.assertAllClose(output_val_ref, output_val, atol=1e-3)
    443 
    444   def testSqueeze(self):
    445     if test.is_gpu_available(cuda_only=True):
    446       random_seed.set_random_seed(0)
    447       x = random_ops.truncated_normal([1, 784], seed=0)
    448       conv = _two_layer_model(x)
    449       reduce_sum = math_ops.reduce_sum(conv, axis=[1, 2])
    450       squeeze = array_ops.squeeze(reduce_sum)
    451       output = array_ops.identity(squeeze)
    452 
    453       with session.Session(config=_get_config(False)) as sess:
    454         output_val_ref = sess.run(output)
    455 
    456       with session.Session(config=_get_config()) as sess:
    457         metadata = config_pb2.RunMetadata()
    458         output_val = sess.run(output, run_metadata=metadata)
    459 
    460       nodes = []
    461       num_transposes = 0
    462       for node in metadata.cost_graph.node:
    463         if _is_transpose(node.name):
    464           num_transposes += 1
    465         nodes.append(node.name)
    466 
    467       # Three transposes were initially added in the Expand phase of
    468       # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
    469       expected_num_transposes = 1
    470       self.assertEqual(expected_num_transposes, num_transposes)
    471       self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
    472       self.assertAllClose(output_val_ref, output_val, atol=1e-3)
    473 
    474   def testSqueezeAlongHW(self):
    475     if test.is_gpu_available(cuda_only=True):
    476       random_seed.set_random_seed(0)
    477       x = random_ops.truncated_normal([1, 784], seed=0)
    478       conv = _two_layer_model(x)
    479       reduce_sum = math_ops.reduce_sum(conv, axis=[1, 2], keep_dims=True)
    480       squeeze = array_ops.squeeze(reduce_sum, axis=[1, 2])
    481       output = array_ops.identity(squeeze)
    482 
    483       with session.Session(config=_get_config(False)) as sess:
    484         output_val_ref = sess.run(output)
    485 
    486       with session.Session(config=_get_config()) as sess:
    487         metadata = config_pb2.RunMetadata()
    488         output_val = sess.run(output, run_metadata=metadata)
    489 
    490       nodes = []
    491       num_transposes = 0
    492       for node in metadata.cost_graph.node:
    493         if _is_transpose(node.name):
    494           num_transposes += 1
    495         nodes.append(node.name)
    496 
    497       # Three transposes were initially added in the Expand phase of
    498       # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
    499       expected_num_transposes = 1
    500       self.assertEqual(expected_num_transposes, num_transposes)
    501       self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
    502       self.assertAllClose(output_val_ref, output_val, atol=1e-3)
    503 
    504   def testSqueezeAlongNHW(self):
    505     if test.is_gpu_available(cuda_only=True):
    506       random_seed.set_random_seed(0)
    507       x = random_ops.truncated_normal([1, 784], seed=0)
    508       conv = _two_layer_model(x)
    509       reduce_sum = math_ops.reduce_sum(conv, axis=[0, 1, 2], keep_dims=True)
    510       squeeze = array_ops.squeeze(reduce_sum, axis=[0, 1, 2])
    511       output = array_ops.identity(squeeze)
    512 
    513       with session.Session(config=_get_config(False)) as sess:
    514         output_val_ref = sess.run(output)
    515 
    516       with session.Session(config=_get_config()) as sess:
    517         metadata = config_pb2.RunMetadata()
    518         output_val = sess.run(output, run_metadata=metadata)
    519 
    520       nodes = []
    521       num_transposes = 0
    522       for node in metadata.cost_graph.node:
    523         if _is_transpose(node.name):
    524           num_transposes += 1
    525         nodes.append(node.name)
    526 
    527       # Three transposes were initially added in the Expand phase of
    528       # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
    529       expected_num_transposes = 1
    530       self.assertEqual(expected_num_transposes, num_transposes)
    531       self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
    532       self.assertAllClose(output_val_ref, output_val, atol=1e-3)
    533 
    534   def testReduceSumAlongHWC(self):
    535     if test.is_gpu_available(cuda_only=True):
    536       random_seed.set_random_seed(0)
    537       x = random_ops.truncated_normal([1, 784], seed=0)
    538       conv = _two_layer_model(x)
    539       reduce_sum = math_ops.reduce_sum(conv, axis=[1, 2, 3])
    540       output = array_ops.identity(reduce_sum)
    541 
    542       with session.Session(config=_get_config(False)) as sess:
    543         output_val_ref = sess.run(output)
    544 
    545       with session.Session(config=_get_config()) as sess:
    546         metadata = config_pb2.RunMetadata()
    547         output_val = sess.run(output, run_metadata=metadata)
    548 
    549       nodes = []
    550       num_transposes = 0
    551       for node in metadata.cost_graph.node:
    552         if _is_transpose(node.name):
    553           num_transposes += 1
    554         nodes.append(node.name)
    555 
    556       # Three transposes were initially added in the Expand phase of
    557       # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
    558       expected_num_transposes = 1
    559       self.assertEqual(expected_num_transposes, num_transposes)
    560       self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
    561       self.assertAllClose(output_val_ref, output_val, atol=1e-3)
    562 
    563   def testReduceSumAlongNHW(self):
    564     if test.is_gpu_available(cuda_only=True):
    565       random_seed.set_random_seed(0)
    566       x = random_ops.truncated_normal([1, 784], seed=0)
    567       conv = _two_layer_model(x)
    568       reduce_sum = math_ops.reduce_sum(conv, axis=[0, 1, 2])
    569       output = array_ops.identity(reduce_sum)
    570 
    571       with session.Session(config=_get_config(False)) as sess:
    572         output_val_ref = sess.run(output)
    573 
    574       with session.Session(config=_get_config()) as sess:
    575         metadata = config_pb2.RunMetadata()
    576         output_val = sess.run(output, run_metadata=metadata)
    577 
    578       nodes = []
    579       num_transposes = 0
    580       for node in metadata.cost_graph.node:
    581         if _is_transpose(node.name):
    582           num_transposes += 1
    583         nodes.append(node.name)
    584 
    585       # Three transposes were initially added in the Expand phase of
    586       # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
    587       expected_num_transposes = 1
    588       self.assertEqual(expected_num_transposes, num_transposes)
    589       self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
    590       self.assertAllClose(output_val_ref, output_val, atol=1e-3)
    591 
    592   def testReduceSumAlongC(self):
    593     if test.is_gpu_available(cuda_only=True):
    594       random_seed.set_random_seed(0)
    595       x = random_ops.truncated_normal([1, 784], seed=0)
    596       conv = _two_layer_model(x)
    597       reduce_sum = math_ops.reduce_sum(conv, axis=[3])
    598       output = array_ops.identity(reduce_sum)
    599 
    600       with session.Session(config=_get_config(False)) as sess:
    601         output_val_ref = sess.run(output)
    602 
    603       with session.Session(config=_get_config()) as sess:
    604         metadata = config_pb2.RunMetadata()
    605         output_val = sess.run(output, run_metadata=metadata)
    606 
    607       nodes = []
    608       num_transposes = 0
    609       for node in metadata.cost_graph.node:
    610         if _is_transpose(node.name):
    611           num_transposes += 1
    612         nodes.append(node.name)
    613 
    614       # Three transposes were initially added in the Expand phase of
    615       # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
    616       expected_num_transposes = 1
    617       self.assertEqual(expected_num_transposes, num_transposes)
    618       self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
    619       self.assertAllClose(output_val_ref, output_val, atol=1e-3)
    620 
    621   def testReduceSumAlongCKeepDims(self):
    622     if test.is_gpu_available(cuda_only=True):
    623       random_seed.set_random_seed(0)
    624       x = random_ops.truncated_normal([1, 784], seed=0)
    625       conv = _two_layer_model(x)
    626       reduce_sum = math_ops.reduce_sum(conv, axis=[3], keep_dims=True)
    627       output = array_ops.identity(reduce_sum)
    628 
    629       with session.Session(config=_get_config(False)) as sess:
    630         output_val_ref = sess.run(output)
    631 
    632       with session.Session(config=_get_config()) as sess:
    633         metadata = config_pb2.RunMetadata()
    634         output_val = sess.run(output, run_metadata=metadata)
    635 
    636       nodes = []
    637       num_transposes = 0
    638       for node in metadata.cost_graph.node:
    639         if _is_transpose(node.name):
    640           num_transposes += 1
    641         nodes.append(node.name)
    642 
    643       # Four transposes were initially added in the Expand phase of
    644       # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
    645       expected_num_transposes = 2
    646       self.assertEqual(expected_num_transposes, num_transposes)
    647       self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
    648       self._assert_trans_nchw_to_nhwc('Sum-0-0', nodes)
    649       self.assertAllClose(output_val_ref, output_val, atol=1e-3)
    650 
    651   def testReduceSumAlongHKeepDims(self):
    652     if test.is_gpu_available(cuda_only=True):
    653       random_seed.set_random_seed(0)
    654       x = random_ops.truncated_normal([1, 784], seed=0)
    655       conv = _two_layer_model(x)
    656       reduce_sum = math_ops.reduce_sum(conv, axis=[2], keep_dims=True)
    657       output = array_ops.identity(reduce_sum)
    658 
    659       with session.Session(config=_get_config(False)) as sess:
    660         output_val_ref = sess.run(output)
    661 
    662       with session.Session(config=_get_config()) as sess:
    663         metadata = config_pb2.RunMetadata()
    664         output_val = sess.run(output, run_metadata=metadata)
    665 
    666       nodes = []
    667       num_transposes = 0
    668       for node in metadata.cost_graph.node:
    669         if _is_transpose(node.name):
    670           num_transposes += 1
    671         nodes.append(node.name)
    672 
    673       # Four transposes were initially added in the Expand phase of
    674       # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
    675       expected_num_transposes = 2
    676       self.assertEqual(expected_num_transposes, num_transposes)
    677       self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
    678       self.assertAllClose(output_val_ref, output_val, atol=1e-3)
    679 
    680   def testReduceSumAlongWCKeepDims(self):
    681     if test.is_gpu_available(cuda_only=True):
    682       random_seed.set_random_seed(0)
    683       x = random_ops.truncated_normal([1, 784], seed=0)
    684       conv = _two_layer_model(x)
    685       reduce_sum = math_ops.reduce_sum(conv, axis=[2, 3], keep_dims=True)
    686       output = array_ops.identity(reduce_sum)
    687 
    688       with session.Session(config=_get_config(False)) as sess:
    689         output_val_ref = sess.run(output)
    690 
    691       with session.Session(config=_get_config()) as sess:
    692         metadata = config_pb2.RunMetadata()
    693         output_val = sess.run(output, run_metadata=metadata)
    694 
    695       nodes = []
    696       num_transposes = 0
    697       for node in metadata.cost_graph.node:
    698         if _is_transpose(node.name):
    699           num_transposes += 1
    700         nodes.append(node.name)
    701 
    702       # Four transposes were initially added in the Expand phase of
    703       # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
    704       expected_num_transposes = 2
    705       self.assertEqual(expected_num_transposes, num_transposes)
    706       self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
    707       self.assertAllClose(output_val_ref, output_val, atol=1e-3)
    708 
    709   def testConcatWithControlDependency(self):
    710     if test.is_gpu_available(cuda_only=True):
    711       random_seed.set_random_seed(0)
    712       x = random_ops.truncated_normal([1, 784], seed=0)
    713       conv = _two_layer_model(x)
    714       axis = constant_op.constant(3)
    715       var = variables.Variable(3)
    716       assign = state_ops.assign(var, 6)
    717       with ops.control_dependencies([assign]):
    718         concat = array_ops.concat([conv, conv], axis)
    719       output = array_ops.identity(concat)
    720 
    721       with session.Session(config=_get_config(False)) as sess:
    722         output_val_ref = sess.run(output)
    723 
    724       with session.Session(config=_get_config()) as sess:
    725         metadata = config_pb2.RunMetadata()
    726         output_val = sess.run(output, run_metadata=metadata)
    727 
    728       nodes = []
    729       num_transposes = 0
    730       for node in metadata.cost_graph.node:
    731         if _is_transpose(node.name):
    732           num_transposes += 1
    733         nodes.append(node.name)
    734 
    735       # Four transposes were initially added in the Expand phase of
    736       # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
    737       expected_num_transposes = 2
    738       self.assertEqual(expected_num_transposes, num_transposes)
    739       self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
    740       self._assert_trans_nchw_to_nhwc('concat-0-0', nodes)
    741       self.assertIn('concat-2-LayoutOptimizer', nodes)
    742       self.assertAllClose(output_val_ref, output_val, atol=1e-3)
    743 
    744   def testFill(self):
    745     if test.is_gpu_available(cuda_only=True):
    746       random_seed.set_random_seed(0)
    747       x = array_ops.placeholder(dtype='float32')
    748       conv = _two_layer_model(x)
    749       shape = array_ops.shape(conv)
    750       scalar = array_ops.constant(5.7)
    751       fill = array_ops.fill(shape, scalar)
    752       output = array_ops.identity(fill)
    753 
    754       x_val = [3.4] * 784
    755       with session.Session(config=_get_config(False)) as sess:
    756         output_val_ref = sess.run(output, feed_dict={x: x_val})
    757 
    758       with session.Session(config=_get_config()) as sess:
    759         metadata = config_pb2.RunMetadata()
    760         output_val = sess.run(
    761             output, run_metadata=metadata, feed_dict={
    762                 x: x_val
    763             })
    764 
    765       nodes = []
    766       num_transposes = 0
    767       num_vec_permute = 0
    768       for node in metadata.cost_graph.node:
    769         if _is_transpose(node.name):
    770           num_transposes += 1
    771         if _is_permute(node.name):
    772           num_vec_permute += 1
    773         nodes.append(node.name)
    774 
    775       # Four transposes were initially added in the Expand phase of
    776       # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
    777       expected_num_transposes = 2
    778       self.assertEqual(expected_num_transposes, num_transposes)
    779       # Two vector permute nodes were initially added in the Expand phase of
    780       # LayoutOptimizer; they cancelled out each other in the Collapse phase.
    781       expected_vec_permute = 0
    782       self.assertEqual(expected_vec_permute, num_vec_permute)
    783       self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
    784       self._assert_trans_nchw_to_nhwc('Fill-0-0', nodes)
    785       self.assertAllClose(output_val_ref, output_val, atol=1e-3)
    786 
    787   def testTile(self):
    788     if test.is_gpu_available(cuda_only=True):
    789       random_seed.set_random_seed(0)
    790       x = random_ops.truncated_normal([1, 784], seed=0)
    791       conv = _two_layer_model(x)
    792       multiple = array_ops.placeholder(dtype='int32')
    793       tile = array_ops.tile(conv, multiple)
    794       output = array_ops.identity(tile)
    795 
    796       multiple_val = [2, 3, 4, 1]
    797       with session.Session(config=_get_config(False)) as sess:
    798         output_val_ref = sess.run(output, feed_dict={multiple: multiple_val})
    799 
    800       with session.Session(config=_get_config()) as sess:
    801         metadata = config_pb2.RunMetadata()
    802         output_val = sess.run(
    803             output, run_metadata=metadata, feed_dict={
    804                 multiple: multiple_val
    805             })
    806 
    807       nodes = []
    808       num_transposes = 0
    809       for node in metadata.cost_graph.node:
    810         if _is_transpose(node.name):
    811           num_transposes += 1
    812         nodes.append(node.name)
    813 
    814       # Four transposes were initially added in the Expand phase of
    815       # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
    816       expected_num_transposes = 2
    817       self.assertEqual(expected_num_transposes, num_transposes)
    818       self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
    819       self._assert_trans_nchw_to_nhwc('Tile-0-0', nodes)
    820       self._assert_vec_nhwc_to_nchw('Tile-1', nodes)
    821       self.assertAllClose(output_val_ref, output_val, atol=1e-3)
    822 
    823   def testReverseWithConstDims(self):
    824     if test.is_gpu_available(cuda_only=True):
    825       random_seed.set_random_seed(0)
    826       x = random_ops.truncated_normal([1, 784], seed=0)
    827       conv = _two_layer_model(x)
    828       dims = constant_op.constant([3, 1], name='DimsConst')
    829       reverse = array_ops.reverse(conv, dims)
    830       output = array_ops.identity(reverse)
    831 
    832       with session.Session(config=_get_config(False)) as sess:
    833         output_val_ref = sess.run(output)
    834 
    835       with session.Session(config=_get_config()) as sess:
    836         metadata = config_pb2.RunMetadata()
    837         output_val = sess.run(output, run_metadata=metadata)
    838 
    839       nodes = []
    840       num_transposes = 0
    841       for node in metadata.cost_graph.node:
    842         if _is_transpose(node.name):
    843           num_transposes += 1
    844         nodes.append(node.name)
    845 
    846       # Four transposes were initially added in the Expand phase of
    847       # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
    848       expected_num_transposes = 2
    849       self.assertEqual(expected_num_transposes, num_transposes)
    850       self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
    851       self._assert_trans_nchw_to_nhwc('ReverseV2-0-0', nodes)
    852       self.assertIn('ReverseV2-1-LayoutOptimizer', nodes)
    853       self.assertAllClose(output_val_ref, output_val, atol=1e-3)
    854 
    855   def testReverseWithNonConstDims(self):
    856     if test.is_gpu_available(cuda_only=True):
    857       random_seed.set_random_seed(0)
    858       x = random_ops.truncated_normal([1, 784], seed=0)
    859       conv = _two_layer_model(x)
    860       dims = array_ops.placeholder(dtype='int32')
    861       reverse = array_ops.reverse(conv, dims)
    862       output = array_ops.identity(reverse)
    863 
    864       dims_val = [2, 3]
    865       with session.Session(config=_get_config(False)) as sess:
    866         output_val_ref = sess.run(output, feed_dict={dims: dims_val})
    867 
    868       with session.Session(config=_get_config()) as sess:
    869         metadata = config_pb2.RunMetadata()
    870         output_val = sess.run(
    871             output, run_metadata=metadata, feed_dict={
    872                 dims: dims_val
    873             })
    874 
    875       nodes = []
    876       num_transposes = 0
    877       for node in metadata.cost_graph.node:
    878         if _is_transpose(node.name):
    879           num_transposes += 1
    880         nodes.append(node.name)
    881 
    882       # Four transposes were initially added in the Expand phase of
    883       # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
    884       expected_num_transposes = 2
    885       self.assertEqual(expected_num_transposes, num_transposes)
    886       self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
    887       self._assert_trans_nchw_to_nhwc('ReverseV2-0-0', nodes)
    888       self._assert_map_nhwc_to_nchw('ReverseV2-1', nodes)
    889       self.assertAllClose(output_val_ref, output_val, atol=1e-3)
    890 
    891   def testSelectOp(self):
    892     if test.is_gpu_available(cuda_only=True):
    893       random_seed.set_random_seed(0)
    894       x = random_ops.truncated_normal([1, 784], seed=0)
    895       conv = _two_layer_model(x)
    896       add = math_ops.add(conv, conv)
    897       mean = math_ops.reduce_mean(conv)
    898       condition = math_ops.less(conv, mean)
    899       select = gen_math_ops._select(condition, conv, add)
    900       output = array_ops.identity(select)
    901 
    902       with session.Session(config=_get_config(False)) as sess:
    903         output_val_ref = sess.run(output)
    904 
    905       with session.Session(config=_get_config()) as sess:
    906         metadata = config_pb2.RunMetadata()
    907         output_val = sess.run(output, run_metadata=metadata)
    908 
    909       nodes = []
    910       num_transposes = 0
    911       for node in metadata.cost_graph.node:
    912         if _is_transpose(node.name):
    913           num_transposes += 1
    914         nodes.append(node.name)
    915 
    916       expected_num_transposes = 2
    917       self.assertEqual(expected_num_transposes, num_transposes)
    918       self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
    919       self._assert_trans_nchw_to_nhwc('Select-0-0', nodes)
    920       self.assertAllClose(output_val_ref, output_val, atol=1e-3)
    921 
    922   def testSelectOpConditionUnknownShape(self):
    923     if test.is_gpu_available(cuda_only=True):
    924       random_seed.set_random_seed(0)
    925       x = random_ops.truncated_normal([1, 784], seed=0)
    926       conv = _two_layer_model(x)
    927       add = math_ops.add(conv, conv)
    928       condition = array_ops.placeholder(dtype='bool')
    929       select = gen_math_ops._select(condition, conv, add)
    930       output = array_ops.identity(select)
    931 
    932       condition_val = np.zeros((1, 7, 7, 64))
    933       with session.Session(config=_get_config(False)) as sess:
    934         output_val_ref = sess.run(output, feed_dict={condition: condition_val})
    935 
    936       with session.Session(config=_get_config()) as sess:
    937         metadata = config_pb2.RunMetadata()
    938         output_val = sess.run(
    939             output, run_metadata=metadata, feed_dict={condition: condition_val})
    940 
    941       nodes = []
    942       num_transposes = 0
    943       for node in metadata.cost_graph.node:
    944         if _is_transpose(node.name):
    945           num_transposes += 1
    946         nodes.append(node.name)
    947 
    948       expected_num_transposes = 3
    949       self.assertEqual(expected_num_transposes, num_transposes)
    950       self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
    951       self.assertAllClose(output_val_ref, output_val, atol=1e-3)
    952 
    953   def testSelectOpScalarCondition(self):
    954     if test.is_gpu_available(cuda_only=True):
    955       random_seed.set_random_seed(0)
    956       x = random_ops.truncated_normal([1, 784], seed=0)
    957       conv = _two_layer_model(x)
    958       add = math_ops.add(conv, conv)
    959       condition = constant_op.constant(True)
    960       select = gen_math_ops._select(condition, conv, add)
    961       output = array_ops.identity(select)
    962 
    963       with session.Session(config=_get_config(False)) as sess:
    964         output_val_ref = sess.run(output)
    965 
    966       with session.Session(config=_get_config()) as sess:
    967         metadata = config_pb2.RunMetadata()
    968         output_val = sess.run(output, run_metadata=metadata)
    969 
    970       nodes = []
    971       num_transposes = 0
    972       for node in metadata.cost_graph.node:
    973         if _is_transpose(node.name):
    974           num_transposes += 1
    975         nodes.append(node.name)
    976 
    977       expected_num_transposes = 2
    978       self.assertEqual(expected_num_transposes, num_transposes)
    979       self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
    980       self._assert_trans_nchw_to_nhwc('Select-0-0', nodes)
    981       self.assertAllClose(output_val_ref, output_val, atol=1e-3)
    982 
    983   def testPadWithNonConstPaddings(self):
    984     if test.is_gpu_available(cuda_only=True):
    985       random_seed.set_random_seed(0)
    986       x = random_ops.truncated_normal([1, 784], seed=0)
    987       conv = _two_layer_model(x)
    988       paddings = array_ops.placeholder(dtype='int32')
    989       pad = array_ops.pad(conv, paddings)
    990       output = array_ops.identity(pad)
    991 
    992       paddings_val = [[1, 2], [3, 4], [5, 6], [7, 8]]
    993       with session.Session(config=_get_config(False)) as sess:
    994         output_val_ref = sess.run(output, feed_dict={paddings: paddings_val})
    995 
    996       with session.Session(config=_get_config()) as sess:
    997         metadata = config_pb2.RunMetadata()
    998         output_val = sess.run(
    999             output, run_metadata=metadata, feed_dict={
   1000                 paddings: paddings_val
   1001             })
   1002 
   1003       nodes = []
   1004       num_transposes = 0
   1005       for node in metadata.cost_graph.node:
   1006         if _is_transpose(node.name):
   1007           num_transposes += 1
   1008         nodes.append(node.name)
   1009 
   1010       # Four transposes were initially added in the Expand phase of
   1011       # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
   1012       expected_num_transposes = 2
   1013       self.assertEqual(expected_num_transposes, num_transposes)
   1014       self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
   1015       self._assert_trans_nchw_to_nhwc('Pad-0-0', nodes)
   1016       self._assert_vec_nhwc_to_nchw('Pad-1', nodes)
   1017       self.assertAllClose(output_val_ref, output_val, atol=1e-3)
   1018 
   1019   def testMaxPoolV2(self):
   1020     if test.is_gpu_available(cuda_only=True):
   1021       random_seed.set_random_seed(0)
   1022       x = random_ops.truncated_normal([1, 784], seed=0)
   1023       conv = _two_layer_model(x)
   1024       ksize = constant_op.constant([1, 2, 3, 1], shape=[4])
   1025       strides = array_ops.placeholder(dtype='int32', shape=[4])
   1026       max_pool = gen_nn_ops._max_pool_v2(conv, ksize, strides, 'VALID')
   1027       output = array_ops.identity(max_pool)
   1028 
   1029       strides_val = [1, 3, 2, 1]
   1030       with session.Session(config=_get_config(False)) as sess:
   1031         output_val_ref = sess.run(output, feed_dict={strides: strides_val})
   1032 
   1033       with session.Session(config=_get_config()) as sess:
   1034         metadata = config_pb2.RunMetadata()
   1035         output_val = sess.run(
   1036             output, run_metadata=metadata, feed_dict={
   1037                 strides: strides_val
   1038             })
   1039 
   1040       nodes = []
   1041       num_transposes = 0
   1042       for node in metadata.cost_graph.node:
   1043         if _is_transpose(node.name):
   1044           num_transposes += 1
   1045         nodes.append(node.name)
   1046 
   1047       expected_num_transposes = 2
   1048       self.assertEqual(expected_num_transposes, num_transposes)
   1049       self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
   1050       self._assert_trans_nchw_to_nhwc('MaxPoolV2-0-0', nodes)
   1051       self._assert_vec_nhwc_to_nchw('MaxPoolV2-2', nodes)
   1052       self.assertIn('MaxPoolV2-1-LayoutOptimizer', nodes)
   1053       self.assertAllClose(output_val_ref, output_val, atol=1e-3)
   1054 
   1055   def testMaxPoolGradV2(self):
   1056     if test.is_gpu_available(cuda_only=True):
   1057       random_seed.set_random_seed(0)
   1058       x = random_ops.truncated_normal([1, 784], seed=0)
   1059       conv = _two_layer_model(x)
   1060       ksize = constant_op.constant([1, 2, 3, 1], shape=[4])
   1061       strides = array_ops.placeholder(dtype='int32', shape=[4])
   1062       max_pool_grad = gen_nn_ops.max_pool_grad_v2(conv, conv, conv, ksize,
   1063                                                   strides, 'VALID')
   1064       output = array_ops.identity(max_pool_grad)
   1065 
   1066       strides_val = [1, 3, 2, 1]
   1067       with session.Session(config=_get_config(False)) as sess:
   1068         output_val_ref = sess.run(output, feed_dict={strides: strides_val})
   1069 
   1070       with session.Session(config=_get_config()) as sess:
   1071         metadata = config_pb2.RunMetadata()
   1072         output_val = sess.run(
   1073             output, run_metadata=metadata, feed_dict={
   1074                 strides: strides_val
   1075             })
   1076 
   1077       nodes = []
   1078       num_transposes = 0
   1079       for node in metadata.cost_graph.node:
   1080         if _is_transpose(node.name):
   1081           num_transposes += 1
   1082         nodes.append(node.name)
   1083 
   1084       expected_num_transposes = 2
   1085       self.assertEqual(expected_num_transposes, num_transposes)
   1086       self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
   1087       self._assert_trans_nchw_to_nhwc('MaxPoolGradV2-0-0', nodes)
   1088       self._assert_vec_nhwc_to_nchw('MaxPoolGradV2-4', nodes)
   1089       self.assertIn('MaxPoolGradV2-3-LayoutOptimizer', nodes)
   1090       self.assertAllClose(output_val_ref, output_val, atol=1e-3)
   1091 
   1092   def testSliceWithNonConstAxis(self):
   1093     if test.is_gpu_available(cuda_only=True):
   1094       random_seed.set_random_seed(0)
   1095       x = random_ops.truncated_normal([1, 784], seed=0)
   1096       conv = _two_layer_model(x)
   1097       size = array_ops.placeholder(dtype='int32')
   1098       s = array_ops.slice(conv, [0, 0, 0, 0], size)
   1099       output = array_ops.identity(s)
   1100 
   1101       size_val = [1, 2, 3, 4]
   1102       with session.Session(config=_get_config(False)) as sess:
   1103         output_val_ref = sess.run(output, feed_dict={size: size_val})
   1104 
   1105       with session.Session(config=_get_config()) as sess:
   1106         metadata = config_pb2.RunMetadata()
   1107         output_val = sess.run(
   1108             output, run_metadata=metadata, feed_dict={
   1109                 size: size_val
   1110             })
   1111 
   1112       nodes = []
   1113       num_transposes = 0
   1114       for node in metadata.cost_graph.node:
   1115         if _is_transpose(node.name):
   1116           num_transposes += 1
   1117         nodes.append(node.name)
   1118 
   1119       # Four transposes were initially added in the Expand phase of
   1120       # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
   1121       expected_num_transposes = 2
   1122       self.assertEqual(expected_num_transposes, num_transposes)
   1123       self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
   1124       self._assert_trans_nchw_to_nhwc('Slice-0-0', nodes)
   1125       self._assert_vec_nhwc_to_nchw('Slice-2', nodes)
   1126       self.assertAllClose(output_val_ref, output_val, atol=1e-3)
   1127 
   1128   def testStridedSliceWithNonConstAxis(self):
   1129     if test.is_gpu_available(cuda_only=True):
   1130       random_seed.set_random_seed(0)
   1131       x = random_ops.truncated_normal([1, 784], seed=0)
   1132       conv = _two_layer_model(x)
   1133       end = array_ops.placeholder(dtype='int32')
   1134       s = array_ops.strided_slice(conv, [0, 0, 0, 0], end, strides=[1, 2, 3, 1])
   1135       output = array_ops.identity(s)
   1136 
   1137       end_val = [1, 2, 3, 4]
   1138       with session.Session(config=_get_config(False)) as sess:
   1139         output_val_ref = sess.run(output, feed_dict={end: end_val})
   1140 
   1141       with session.Session(config=_get_config()) as sess:
   1142         metadata = config_pb2.RunMetadata()
   1143         output_val = sess.run(
   1144             output, run_metadata=metadata, feed_dict={
   1145                 end: end_val
   1146             })
   1147 
   1148       nodes = []
   1149       num_transposes = 0
   1150       for node in metadata.cost_graph.node:
   1151         if _is_transpose(node.name):
   1152           num_transposes += 1
   1153         nodes.append(node.name)
   1154 
   1155       # Four transposes were initially added in the Expand phase of
   1156       # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
   1157       expected_num_transposes = 2
   1158       self.assertEqual(expected_num_transposes, num_transposes)
   1159       self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
   1160       self._assert_trans_nchw_to_nhwc('StridedSlice-0-0', nodes)
   1161       self._assert_vec_nhwc_to_nchw('StridedSlice-2', nodes)
   1162       self.assertIn('StridedSlice-1-LayoutOptimizer', nodes)
   1163       self.assertIn('StridedSlice-3-LayoutOptimizer', nodes)
   1164       self.assertAllClose(output_val_ref, output_val, atol=1e-3)
   1165 
   1166   def testStridedSliceWithMask1011(self):
   1167     if test.is_gpu_available(cuda_only=True):
   1168       random_seed.set_random_seed(0)
   1169       x = random_ops.truncated_normal([1, 784], seed=0)
   1170       conv = _two_layer_model(x)
   1171       # This will generate a StridedSlice op with begin mask and
   1172       # end mask 11(1011).
   1173       s = conv[:, :, 1:-1, :]
   1174       output = array_ops.identity(s)
   1175 
   1176       with session.Session(config=_get_config(False)) as sess:
   1177         output_val_ref = sess.run(output)
   1178 
   1179       with session.Session(config=_get_config()) as sess:
   1180         metadata = config_pb2.RunMetadata()
   1181         output_val = sess.run(output, run_metadata=metadata)
   1182 
   1183       nodes = []
   1184       num_transposes = 0
   1185       for node in metadata.cost_graph.node:
   1186         if _is_transpose(node.name):
   1187           num_transposes += 1
   1188         nodes.append(node.name)
   1189 
   1190       # Four transposes were initially added in the Expand phase of
   1191       # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
   1192       expected_num_transposes = 2
   1193       self.assertEqual(expected_num_transposes, num_transposes)
   1194       self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
   1195       self._assert_trans_nchw_to_nhwc('strided_slice-0-0', nodes)
   1196       self.assertIn('strided_slice-1-LayoutOptimizer', nodes)
   1197       self.assertIn('strided_slice-2-LayoutOptimizer', nodes)
   1198       self.assertIn('strided_slice-3-LayoutOptimizer', nodes)
   1199       self.assertAllClose(output_val_ref, output_val, atol=1e-3)
   1200 
   1201   def testStridedSliceWithMask0111(self):
   1202     if test.is_gpu_available(cuda_only=True):
   1203       random_seed.set_random_seed(0)
   1204       x = random_ops.truncated_normal([1, 784], seed=0)
   1205       conv = _two_layer_model(x)
   1206       # This will generate a StridedSlice op with begin mask and
   1207       # end mask 7(0111).
   1208       s = conv[:, :, :, 1:-1]
   1209       output = array_ops.identity(s)
   1210 
   1211       with session.Session(config=_get_config(False)) as sess:
   1212         output_val_ref = sess.run(output)
   1213 
   1214       with session.Session(config=_get_config()) as sess:
   1215         metadata = config_pb2.RunMetadata()
   1216         output_val = sess.run(output, run_metadata=metadata)
   1217 
   1218       nodes = []
   1219       num_transposes = 0
   1220       for node in metadata.cost_graph.node:
   1221         if _is_transpose(node.name):
   1222           num_transposes += 1
   1223         nodes.append(node.name)
   1224 
   1225       # Four transposes were initially added in the Expand phase of
   1226       # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
   1227       expected_num_transposes = 2
   1228       self.assertEqual(expected_num_transposes, num_transposes)
   1229       self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
   1230       self._assert_trans_nchw_to_nhwc('strided_slice-0-0', nodes)
   1231       self.assertIn('strided_slice-1-LayoutOptimizer', nodes)
   1232       self.assertIn('strided_slice-2-LayoutOptimizer', nodes)
   1233       self.assertIn('strided_slice-3-LayoutOptimizer', nodes)
   1234       self.assertAllClose(output_val_ref, output_val, atol=1e-3)
   1235 
   1236   def testStridedSliceGradWithNonConstAxis(self):
   1237     if test.is_gpu_available(cuda_only=True):
   1238       random_seed.set_random_seed(0)
   1239       x = random_ops.truncated_normal([1, 784], seed=0)
   1240       conv = _two_layer_model(x)
   1241       end = array_ops.placeholder(dtype='int32')
   1242       shape = array_ops.shape(conv)
   1243       end_val = [1, 2, 3, 4]
   1244       s = array_ops.strided_slice(
   1245           conv, [0, 0, 0, 0], end_val, strides=[1, 2, 3, 1])
   1246       s_grad = array_ops.strided_slice_grad(shape, [0, 0, 0, 0], end,
   1247                                             [1, 2, 3, 1], s)
   1248       output = array_ops.identity(s_grad)
   1249 
   1250       with session.Session(config=_get_config(False)) as sess:
   1251         output_val_ref = sess.run(output, feed_dict={end: end_val})
   1252 
   1253       with session.Session(config=_get_config()) as sess:
   1254         metadata = config_pb2.RunMetadata()
   1255         output_val = sess.run(
   1256             output, run_metadata=metadata, feed_dict={
   1257                 end: end_val
   1258             })
   1259 
   1260       nodes = []
   1261       num_transposes = 0
   1262       for node in metadata.cost_graph.node:
   1263         if _is_transpose(node.name):
   1264           num_transposes += 1
   1265         nodes.append(node.name)
   1266 
   1267       # Four transposes were initially added in the Expand phase of
   1268       # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
   1269       expected_num_transposes = 2
   1270       self.assertEqual(expected_num_transposes, num_transposes)
   1271       self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
   1272       self._assert_trans_nchw_to_nhwc('StridedSliceGrad-0-0', nodes)
   1273       self._assert_vec_nhwc_to_nchw('StridedSliceGrad-2', nodes)
   1274       self.assertIn('StridedSlice-1-LayoutOptimizer', nodes)
   1275       self.assertIn('StridedSlice-2-LayoutOptimizer', nodes)
   1276       self.assertAllClose(output_val_ref, output_val, atol=1e-3)
   1277 
   1278   def testShapeN(self):
   1279     if test.is_gpu_available(cuda_only=True):
   1280       x = array_ops.placeholder(dtype='float32')
   1281       conv = _two_layer_model(x)
   1282       shapen = array_ops.shape_n([conv, conv])
   1283       output = math_ops.add(shapen[0], shapen[1])
   1284 
   1285       x_val = [1.7] * 784
   1286       with session.Session(config=_get_config(False)) as sess:
   1287         output_val_ref = sess.run(output, feed_dict={x: x_val})
   1288 
   1289       with session.Session(config=_get_config()) as sess:
   1290         metadata = config_pb2.RunMetadata()
   1291         output_val = sess.run(
   1292             output, run_metadata=metadata, feed_dict={
   1293                 x: x_val
   1294             })
   1295 
   1296       nodes = []
   1297       num_transposes = 0
   1298       for node in metadata.cost_graph.node:
   1299         if _is_transpose(node.name):
   1300           num_transposes += 1
   1301         nodes.append(node.name)
   1302 
   1303       expected_num_transposes = 1
   1304       self.assertEqual(expected_num_transposes, num_transposes)
   1305       self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
   1306       self._assert_vec_nchw_to_nhwc('ShapeN-0-0', nodes)
   1307       self.assertAllEqual(output_val_ref, output_val)
   1308 
   1309   def testShapeNFollowedByNotConvertibleNodeReshape(self):
   1310     if test.is_gpu_available(cuda_only=True):
   1311       x = array_ops.placeholder(dtype='float32')
   1312       conv = _two_layer_model(x)
   1313       conv_reshape = array_ops.reshape(conv, [1, 1, 1, -1])
   1314       shapen = array_ops.shape_n([conv, conv_reshape])
   1315       shape = array_ops.identity(shapen[1])
   1316       ones = array_ops.ones(shape)
   1317       output = math_ops.add_n([conv_reshape, ones])
   1318 
   1319       x_val = [1.7] * 784
   1320       with session.Session(config=_get_config(False)) as sess:
   1321         output_val_ref = sess.run(output, feed_dict={x: x_val})
   1322 
   1323       with session.Session(config=_get_config()) as sess:
   1324         metadata = config_pb2.RunMetadata()
   1325         output_val = sess.run(
   1326             output, run_metadata=metadata, feed_dict={x: x_val})
   1327 
   1328       nodes = []
   1329       num_transposes = 0
   1330       for node in metadata.cost_graph.node:
   1331         if _is_transpose(node.name):
   1332           num_transposes += 1
   1333         nodes.append(node.name)
   1334 
   1335       expected_num_transposes = 2
   1336       self.assertEqual(expected_num_transposes, num_transposes)
   1337       self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
   1338       self.assertAllEqual(output_val_ref, output_val)
   1339 
   1340   def testLoop(self):
   1341     if test.is_gpu_available(cuda_only=True):
   1342       output = _loop()
   1343 
   1344       with session.Session(config=_get_config(False)) as sess:
   1345         output_val_ref = sess.run(output)
   1346 
   1347       with session.Session(config=_get_config()) as sess:
   1348         metadata = config_pb2.RunMetadata()
   1349         output_val = sess.run(output, run_metadata=metadata)
   1350 
   1351       nodes = []
   1352       num_transposes = 0
   1353       for node in metadata.cost_graph.node:
   1354         if _is_transpose(node.name):
   1355           num_transposes += 1
   1356         nodes.append(node.name)
   1357 
   1358       # Four transposes were initially added in the Expand phase of
   1359       # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
   1360       expected_num_transposes = 2
   1361       self.assertEqual(expected_num_transposes, num_transposes)
   1362       self.assertEqual(expected_num_transposes, num_transposes)
   1363       self._assert_trans_nhwc_to_nchw('map/while/Conv2D-0', nodes)
   1364       self._assert_trans_nchw_to_nhwc('map/while/MaxPool_1-0-2', nodes)
   1365       self.assertAllClose(output_val_ref, output_val, atol=1e-3)
   1366 
   1367   def testLoopWithBranch(self):
   1368     if test.is_gpu_available(cuda_only=True):
   1369       output = _loop_with_branch()
   1370 
   1371       with session.Session(config=_get_config(False)) as sess:
   1372         output_val_ref = sess.run(output)
   1373 
   1374       with session.Session(config=_get_config()) as sess:
   1375         metadata = config_pb2.RunMetadata()
   1376         output_val = sess.run(output, run_metadata=metadata)
   1377 
   1378       nodes = []
   1379       num_transposes = 0
   1380       for node in metadata.cost_graph.node:
   1381         if _is_transpose(node.name):
   1382           num_transposes += 1
   1383         nodes.append(node.name)
   1384 
   1385       expected_num_transposes = 3
   1386       self.assertEqual(expected_num_transposes, num_transposes)
   1387       self._assert_trans_nhwc_to_nchw('map/while/Conv2D-0', nodes)
   1388       self._assert_trans_nchw_to_nhwc('map/while/Add-0-2', nodes)
   1389       self.assertAllClose(output_val_ref, output_val, atol=1e-3)
   1390 
   1391   def testLoopWithVecAnd4D(self):
   1392     if test.is_gpu_available(cuda_only=True):
   1393       output = _loop_with_vec_and_4d()
   1394 
   1395       with session.Session(config=_get_config(False)) as sess:
   1396         output_val_ref = sess.run(output)
   1397 
   1398       with session.Session(config=_get_config()) as sess:
   1399         metadata = config_pb2.RunMetadata()
   1400         output_val = sess.run(output, run_metadata=metadata)
   1401 
   1402       nodes = []
   1403       num_transposes = 0
   1404       for node in metadata.cost_graph.node:
   1405         if _is_transpose(node.name):
   1406           num_transposes += 1
   1407         nodes.append(node.name)
   1408 
   1409       expected_num_transposes = 2
   1410       self.assertEqual(expected_num_transposes, num_transposes)
   1411       self._assert_trans_nhwc_to_nchw('map/while/Conv2D-0', nodes)
   1412       self._assert_trans_nchw_to_nhwc('map/while/Add-0-2', nodes)
   1413       self.assertAllClose(output_val_ref, output_val, atol=1e-3)
   1414 
   1415   def testBinaryOpSecondPort(self):
   1416     if test.is_gpu_available(cuda_only=True):
   1417       output = _model_with_second_port()
   1418 
   1419       with session.Session(config=_get_config(False)) as sess:
   1420         output_val_ref = sess.run(output)
   1421 
   1422       with session.Session(config=_get_config()) as sess:
   1423         metadata = config_pb2.RunMetadata()
   1424         output_val = sess.run(output, run_metadata=metadata)
   1425 
   1426       nodes = []
   1427       num_transposes = 0
   1428       for node in metadata.cost_graph.node:
   1429         if _is_transpose(node.name):
   1430           num_transposes += 1
   1431         nodes.append(node.name)
   1432 
   1433       expected_num_transposes = 2
   1434       self.assertEqual(expected_num_transposes, num_transposes)
   1435       self._assert_trans_nhwc_to_nchw('FusedBatchNorm-0', nodes)
   1436       self._assert_trans_nchw_to_nhwc('Add-0-0', nodes)
   1437       self.assertAllClose(output_val_ref, output_val, atol=1e-3)
   1438 
   1439   def testGradient(self):
   1440     meta_graph = _simple_metagraph()
   1441     rewrite_options = rewriter_config_pb2.RewriterConfig(
   1442         layout_optimizer=rewriter_config_pb2.RewriterConfig.ON)
   1443     optimized_graph = tf_optimizer.OptimizeGraph(
   1444         rewrite_options, meta_graph, cluster=_get_cluster())
   1445 
   1446     found = 0
   1447     for node in optimized_graph.node:
   1448       if node.op in ['Conv2D', 'Conv2DBackpropFilter', 'Conv2DBackpropInput']:
   1449         found += 1
   1450         self.assertEqual(node.attr['data_format'].s, b'NCHW')
   1451     self.assertEqual(found, 5)
   1452 
   1453   def testDepthwise(self):
   1454     meta_graph = _simple_metagraph(depthwise=True)
   1455     rewrite_options = rewriter_config_pb2.RewriterConfig(
   1456         layout_optimizer=rewriter_config_pb2.RewriterConfig.ON)
   1457     optimized_graph = tf_optimizer.OptimizeGraph(
   1458         rewrite_options, meta_graph, cluster=_get_cluster())
   1459 
   1460     found = 0
   1461     for node in optimized_graph.node:
   1462       if node.op in [
   1463           'DepthwiseConv2dNative', 'DepthwiseConv2dNativeBackpropFilter',
   1464           'DepthwiseConv2dNativeBackpropInput'
   1465       ]:
   1466         found += 1
   1467         self.assertEqual(node.attr['data_format'].s, b'NCHW')
   1468     self.assertEqual(found, 6)
   1469 
   1470   def testCheckpointCompatibility(self):
   1471     if not test.is_gpu_available(cuda_only=True):
   1472       self.skipTest('GPU required')
   1473 
   1474     checkpoint_path = self.get_temp_dir()
   1475     self._train(checkpoint_path)
   1476     vars_expected = self._train(checkpoint_path, restore=True)
   1477     vars_layout_optimized = self._train(
   1478         checkpoint_path, restore=True, layout_optimizer=True)
   1479 
   1480     for var_expected, var_layout_optimized in zip(vars_expected,
   1481                                                   vars_layout_optimized):
   1482       self.assertAllClose(var_expected, var_layout_optimized, atol=1e-6)
   1483 
   1484 
   1485 if __name__ == '__main__':
   1486   test.main()
   1487