1 # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 # 3 # Licensed under the Apache License, Version 2.0 (the "License"); 4 # you may not use this file except in compliance with the License. 5 # You may obtain a copy of the License at 6 # 7 # http://www.apache.org/licenses/LICENSE-2.0 8 # 9 # Unless required by applicable law or agreed to in writing, software 10 # distributed under the License is distributed on an "AS IS" BASIS, 11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 # See the License for the specific language governing permissions and 13 # limitations under the License. 14 # ============================================================================== 15 """Tests for Grappler LayoutOptimizer.""" 16 17 from __future__ import absolute_import 18 from __future__ import division 19 from __future__ import print_function 20 21 import numpy as np 22 23 from tensorflow.core.protobuf import config_pb2 24 from tensorflow.core.protobuf import device_properties_pb2 25 from tensorflow.core.protobuf import rewriter_config_pb2 26 from tensorflow.core.protobuf import saver_pb2 27 from tensorflow.python.client import session 28 from tensorflow.python.framework import constant_op 29 from tensorflow.python.framework import dtypes 30 from tensorflow.python.framework import ops 31 from tensorflow.python.framework import random_seed 32 from tensorflow.python.grappler import cluster as gcluster 33 from tensorflow.python.grappler import tf_optimizer 34 from tensorflow.python.layers import convolutional as conv_layers 35 from tensorflow.python.ops import array_ops 36 from tensorflow.python.ops import functional_ops 37 from tensorflow.python.ops import gen_array_ops 38 from tensorflow.python.ops import gen_math_ops 39 from tensorflow.python.ops import gen_nn_ops 40 from tensorflow.python.ops import math_ops 41 from tensorflow.python.ops import nn 42 from tensorflow.python.ops import random_ops 43 from tensorflow.python.ops import state_ops 44 from tensorflow.python.ops import variables 45 from tensorflow.python.platform import test 46 from tensorflow.python.training import gradient_descent 47 from tensorflow.python.training import saver as saver_lib 48 49 50 def _weight(shape): 51 """Generates a weight of a given shape.""" 52 return random_ops.truncated_normal(shape, seed=0, stddev=0.1) 53 54 55 def _bias(shape): 56 """Generates a bias of a given shape.""" 57 return constant_op.constant(0.1, shape=shape) 58 59 60 def _conv2d(x, w): 61 """Returns a 2d convolution layer with full stride.""" 62 return nn.conv2d(x, w, strides=[1, 1, 1, 1], padding='SAME') 63 64 65 def _max_pool_2x2(x): 66 """Downsamples a feature map by 2X.""" 67 return nn.max_pool( 68 x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') 69 70 71 # Taken from tensorflow/examples/tutorials/mnist/mnist_deep.py 72 def _two_layer_model(x): 73 x_image = array_ops.reshape(x, [-1, 28, 28, 1]) 74 w_conv1 = _weight([5, 5, 1, 32]) 75 b_conv1 = _bias([32]) 76 h_conv1 = nn.relu(_conv2d(x_image, w_conv1) + b_conv1) 77 h_pool1 = _max_pool_2x2(h_conv1) 78 w_conv2 = _weight([5, 5, 32, 64]) 79 b_conv2 = _bias([64]) 80 h_conv2 = nn.relu(_conv2d(h_pool1, w_conv2) + b_conv2) 81 h_pool2 = _max_pool_2x2(h_conv2) 82 return h_pool2 83 84 85 def _model_with_second_port(): 86 random_seed.set_random_seed(0) 87 x = random_ops.truncated_normal([2, 5, 5, 4], seed=0) 88 scale = constant_op.constant(0.1, shape=[4]) 89 offset = constant_op.constant(0.3, shape=[4]) 90 y, mean, _ = nn.fused_batch_norm(x, scale, offset) 91 mul = math_ops.add(y, mean) 92 output = array_ops.identity(mul) 93 return output 94 95 96 def _model_with_branch(x): 97 x_image = array_ops.reshape(x, [-1, 28, 28, 1]) 98 w_conv1 = _weight([5, 5, 1, 32]) 99 w_conv2 = _weight([5, 5, 1, 32]) 100 c_conv1 = _conv2d(x_image, w_conv1) 101 c_conv2 = _conv2d(x_image, w_conv2) 102 add = math_ops.add(c_conv1, c_conv2) 103 return add 104 105 106 def _model_with_vec_and_4d(x): 107 x_image = array_ops.reshape(x, [-1, 28, 28, 1]) 108 w_conv1 = _weight([5, 5, 1, 32]) 109 c_conv1 = _conv2d(x_image, w_conv1) 110 vector = constant_op.constant(6.4, shape=[32]) 111 add = math_ops.add(c_conv1, vector) 112 return add 113 114 115 def _loop(): 116 random_seed.set_random_seed(0) 117 x1 = random_ops.truncated_normal([1, 784], seed=0) 118 x2 = random_ops.truncated_normal([1, 784], seed=0) 119 x3 = random_ops.truncated_normal([1, 784], seed=0) 120 x4 = random_ops.truncated_normal([1, 784], seed=0) 121 elems = (x1, x2, x3, x4) 122 outputs = functional_ops.map_fn(_two_layer_model, elems, dtype=dtypes.float32) 123 return outputs 124 125 126 def _loop_with_branch(): 127 random_seed.set_random_seed(0) 128 x1 = random_ops.truncated_normal([1, 784], seed=0) 129 x2 = random_ops.truncated_normal([1, 784], seed=0) 130 x3 = random_ops.truncated_normal([1, 784], seed=0) 131 x4 = random_ops.truncated_normal([1, 784], seed=0) 132 elems = (x1, x2, x3, x4) 133 outputs = functional_ops.map_fn( 134 _model_with_branch, elems, dtype=dtypes.float32) 135 return outputs 136 137 138 def _loop_with_vec_and_4d(): 139 random_seed.set_random_seed(0) 140 x1 = random_ops.truncated_normal([1, 784], seed=0) 141 x2 = random_ops.truncated_normal([1, 784], seed=0) 142 x3 = random_ops.truncated_normal([1, 784], seed=0) 143 x4 = random_ops.truncated_normal([1, 784], seed=0) 144 elems = (x1, x2, x3, x4) 145 outputs = functional_ops.map_fn( 146 _model_with_vec_and_4d, elems, dtype=dtypes.float32) 147 return outputs 148 149 150 def _get_config(layout_optimizer=True): 151 if layout_optimizer: 152 rewrite_options = rewriter_config_pb2.RewriterConfig( 153 layout_optimizer=rewriter_config_pb2.RewriterConfig.ON) 154 else: 155 rewrite_options = rewriter_config_pb2.RewriterConfig( 156 layout_optimizer=rewriter_config_pb2.RewriterConfig.OFF) 157 graph_options = config_pb2.GraphOptions( 158 rewrite_options=rewrite_options, build_cost_model=1) 159 config = config_pb2.ConfigProto(graph_options=graph_options) 160 config.graph_options.optimizer_options.opt_level = -1 161 return config 162 163 164 def _simple_metagraph(depthwise=False): 165 random_seed.set_random_seed(0) 166 x = variables.Variable(random_ops.truncated_normal([1, 200, 200, 3], seed=0)) 167 conv = conv_layers.separable_conv2d if depthwise else conv_layers.conv2d 168 y = conv(x, 32, [3, 3]) 169 z = conv(y, 32, [3, 3]) 170 optimizer = gradient_descent.GradientDescentOptimizer(1e-4) 171 loss = math_ops.reduce_mean(z) 172 train_op = optimizer.minimize(loss) 173 graph = ops.get_default_graph() 174 graph.add_to_collection('train_op', train_op) 175 meta_graph = saver_lib.export_meta_graph(graph_def=graph.as_graph_def()) 176 return meta_graph 177 178 179 def _get_cluster(): 180 named_device = device_properties_pb2.NamedDevice() 181 named_device.name = '/GPU:0' 182 named_device.properties.type = 'GPU' 183 named_device.properties.num_cores = 24 184 named_device.properties.frequency = 1000 185 named_device.properties.environment['architecture'] = '4' 186 cluster = gcluster.Cluster(devices=[named_device]) 187 return cluster 188 189 190 def _is_transpose(node): 191 return node.endswith('TransposeNHWCToNCHW-LayoutOptimizer') or node.endswith( 192 'TransposeNCHWToNHWC-LayoutOptimizer') 193 194 195 def _is_permute(node): 196 return node.endswith('VecPermuteNHWCToNCHW-LayoutOptimizer') or node.endswith( 197 'VecPermuteNCHWToNHWC-LayoutOptimizer') 198 199 200 class LayoutOptimizerTest(test.TestCase): 201 """Tests the Grappler layout optimizer.""" 202 203 def _assert_trans_nchw_to_nhwc(self, name, nodes): 204 self.assertIn(name + '-TransposeNCHWToNHWC-LayoutOptimizer', nodes) 205 206 def _assert_trans_nhwc_to_nchw(self, name, nodes): 207 self.assertIn(name + '-TransposeNHWCToNCHW-LayoutOptimizer', nodes) 208 209 def _assert_map_nhwc_to_nchw(self, name, nodes): 210 self.assertIn(name + '-DimMapNHWCToNCHW-LayoutOptimizer', nodes) 211 212 def _assert_vec_nchw_to_nhwc(self, name, nodes): 213 self.assertIn(name + '-VecPermuteNCHWToNHWC-LayoutOptimizer', nodes) 214 215 def _assert_vec_nhwc_to_nchw(self, name, nodes): 216 self.assertIn(name + '-VecPermuteNHWCToNCHW-LayoutOptimizer', nodes) 217 218 def _train(self, checkpoint_path, layout_optimizer=False, restore=False): 219 ops.reset_default_graph() 220 graph = ops.get_default_graph() 221 with session.Session( 222 config=_get_config(layout_optimizer), graph=graph) as sess: 223 batch = 2 224 height = 6 225 width = 7 226 input_channels = 3 227 shape = [batch, height, width, input_channels] 228 image = array_ops.placeholder(dtype='float32', shape=shape) 229 conv1 = conv_layers.conv2d(image, 32, [3, 3]) 230 conv2 = conv_layers.conv2d(conv1, 32, [3, 3]) 231 optimizer = gradient_descent.GradientDescentOptimizer(0.01) 232 loss = math_ops.reduce_mean(conv2) 233 train_op = optimizer.minimize(loss) 234 saver = saver_lib.Saver(write_version=saver_pb2.SaverDef.V2) 235 236 if restore: 237 saver.restore(sess, checkpoint_path) 238 else: 239 sess.run(variables.global_variables_initializer()) 240 241 np.random.seed(0) 242 for _ in range(2): 243 image_val = np.random.rand(*shape).astype(np.float32) 244 sess.run([loss, train_op], feed_dict={image: image_val}) 245 246 if restore: 247 all_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) 248 all_vars_values = [var.eval(session=sess) for var in all_vars] 249 return all_vars_values 250 else: 251 saver.save(sess, checkpoint_path) 252 253 def testTwoConvLayers(self): 254 if test.is_gpu_available(cuda_only=True): 255 random_seed.set_random_seed(0) 256 x = random_ops.truncated_normal([1, 784], seed=0) 257 output = _two_layer_model(x) 258 259 with session.Session(config=_get_config(False)) as sess: 260 output_val_ref = sess.run(output) 261 262 with session.Session(config=_get_config()) as sess: 263 metadata = config_pb2.RunMetadata() 264 output_val = sess.run(output, run_metadata=metadata) 265 266 nodes = [] 267 num_transposes = 0 268 for node in metadata.cost_graph.node: 269 if _is_transpose(node.name): 270 num_transposes += 1 271 nodes.append(node.name) 272 273 # Four transposes were initially added in the Expand phase of 274 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 275 expected_num_transposes = 2 276 self.assertEqual(expected_num_transposes, num_transposes) 277 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 278 self._assert_trans_nchw_to_nhwc('Relu_1-0-0', nodes) 279 280 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 281 282 def testSplitWithNonConstAxis(self): 283 if test.is_gpu_available(cuda_only=True): 284 random_seed.set_random_seed(0) 285 x = random_ops.truncated_normal([1, 784], seed=0) 286 conv = _two_layer_model(x) 287 dim = array_ops.placeholder(dtype='int32') 288 split = array_ops.split(conv, 2, axis=dim) 289 scale = constant_op.constant(0.1, shape=[32]) 290 offset = constant_op.constant(0.3, shape=[32]) 291 bn0 = nn.fused_batch_norm(split[0], scale, offset) 292 bn1 = nn.fused_batch_norm(split[1], scale, offset) 293 add = bn0[0] + bn1[0] 294 output = array_ops.identity(add) 295 296 with session.Session(config=_get_config(False)) as sess: 297 output_val_ref = sess.run(output, feed_dict={dim: 3}) 298 299 with session.Session(config=_get_config()) as sess: 300 metadata = config_pb2.RunMetadata() 301 output_val = sess.run(output, run_metadata=metadata, feed_dict={dim: 3}) 302 303 nodes = [] 304 num_transposes = 0 305 for node in metadata.cost_graph.node: 306 if _is_transpose(node.name): 307 num_transposes += 1 308 nodes.append(node.name) 309 310 expected_num_transposes = 2 311 self.assertEqual(expected_num_transposes, num_transposes) 312 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 313 self._assert_trans_nchw_to_nhwc('add_2-0-0', nodes) 314 self._assert_map_nhwc_to_nchw('split-0', nodes) 315 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 316 317 def testSplitVWithNonConstAxis(self): 318 if test.is_gpu_available(cuda_only=True): 319 random_seed.set_random_seed(0) 320 x = random_ops.truncated_normal([1, 784], seed=0) 321 conv = _two_layer_model(x) 322 dim = array_ops.placeholder(dtype='int32') 323 sizes = constant_op.constant([50, 10, 4], shape=[3]) 324 split = gen_array_ops._split_v( 325 value=conv, size_splits=sizes, axis=dim, num_split=3) 326 output = math_ops.reduce_sum(split[0]) 327 328 with session.Session(config=_get_config(False)) as sess: 329 output_val_ref = sess.run(output, feed_dict={dim: 3}) 330 331 with session.Session(config=_get_config()) as sess: 332 metadata = config_pb2.RunMetadata() 333 output_val = sess.run(output, run_metadata=metadata, feed_dict={dim: 3}) 334 335 nodes = [] 336 num_transposes = 0 337 for node in metadata.cost_graph.node: 338 if _is_transpose(node.name): 339 num_transposes += 1 340 nodes.append(node.name) 341 342 # Four transposes were initially added in the Expand phase of 343 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 344 expected_num_transposes = 2 345 self.assertEqual(expected_num_transposes, num_transposes) 346 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 347 self._assert_trans_nchw_to_nhwc('SplitV-0-0', nodes) 348 self._assert_map_nhwc_to_nchw('SplitV-2', nodes) 349 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 350 351 def testPadWithConstPaddings(self): 352 if test.is_gpu_available(cuda_only=True): 353 random_seed.set_random_seed(0) 354 x = random_ops.truncated_normal([1, 784], seed=0) 355 conv = _two_layer_model(x) 356 paddings_val = [[1, 2], [3, 4], [5, 6], [7, 8]] 357 paddings = constant_op.constant( 358 paddings_val, dtype='int32', name='PaddingsConst') 359 pad = array_ops.pad(conv, paddings) 360 output = array_ops.identity(pad) 361 362 with session.Session(config=_get_config(False)) as sess: 363 output_val_ref = sess.run(output) 364 365 with session.Session(config=_get_config()) as sess: 366 metadata = config_pb2.RunMetadata() 367 output_val = sess.run(output, run_metadata=metadata) 368 369 nodes = [] 370 num_transposes = 0 371 for node in metadata.cost_graph.node: 372 if _is_transpose(node.name): 373 num_transposes += 1 374 nodes.append(node.name) 375 376 # Four transposes were initially added in the Expand phase of 377 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 378 expected_num_transposes = 2 379 self.assertEqual(expected_num_transposes, num_transposes) 380 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 381 self._assert_trans_nchw_to_nhwc('Pad-0-0', nodes) 382 self.assertIn('Pad-1-LayoutOptimizer', nodes) 383 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 384 385 def testReduceSum(self): 386 if test.is_gpu_available(cuda_only=True): 387 random_seed.set_random_seed(0) 388 x = random_ops.truncated_normal([1, 784], seed=0) 389 conv = _two_layer_model(x) 390 reduce_sum = math_ops.reduce_sum(conv) 391 output = array_ops.identity(reduce_sum) 392 393 with session.Session(config=_get_config(False)) as sess: 394 output_val_ref = sess.run(output) 395 396 with session.Session(config=_get_config()) as sess: 397 metadata = config_pb2.RunMetadata() 398 output_val = sess.run(output, run_metadata=metadata) 399 400 nodes = [] 401 num_transposes = 0 402 for node in metadata.cost_graph.node: 403 if _is_transpose(node.name): 404 num_transposes += 1 405 nodes.append(node.name) 406 407 # Three transposes were initially added in the Expand phase of 408 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 409 expected_num_transposes = 1 410 self.assertEqual(expected_num_transposes, num_transposes) 411 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 412 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 413 414 def testCast(self): 415 if test.is_gpu_available(cuda_only=True): 416 random_seed.set_random_seed(0) 417 x = random_ops.truncated_normal([1, 784], seed=0) 418 conv = _two_layer_model(x) 419 cast = math_ops.cast(conv, dtype='bool') 420 output = array_ops.identity(cast) 421 422 with session.Session(config=_get_config(False)) as sess: 423 output_val_ref = sess.run(output) 424 425 with session.Session(config=_get_config()) as sess: 426 metadata = config_pb2.RunMetadata() 427 output_val = sess.run(output, run_metadata=metadata) 428 429 nodes = [] 430 num_transposes = 0 431 for node in metadata.cost_graph.node: 432 if _is_transpose(node.name): 433 num_transposes += 1 434 nodes.append(node.name) 435 436 # Four transposes were initially added in the Expand phase of 437 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 438 expected_num_transposes = 2 439 self.assertEqual(expected_num_transposes, num_transposes) 440 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 441 self._assert_trans_nchw_to_nhwc('Cast-0-0', nodes) 442 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 443 444 def testSqueeze(self): 445 if test.is_gpu_available(cuda_only=True): 446 random_seed.set_random_seed(0) 447 x = random_ops.truncated_normal([1, 784], seed=0) 448 conv = _two_layer_model(x) 449 reduce_sum = math_ops.reduce_sum(conv, axis=[1, 2]) 450 squeeze = array_ops.squeeze(reduce_sum) 451 output = array_ops.identity(squeeze) 452 453 with session.Session(config=_get_config(False)) as sess: 454 output_val_ref = sess.run(output) 455 456 with session.Session(config=_get_config()) as sess: 457 metadata = config_pb2.RunMetadata() 458 output_val = sess.run(output, run_metadata=metadata) 459 460 nodes = [] 461 num_transposes = 0 462 for node in metadata.cost_graph.node: 463 if _is_transpose(node.name): 464 num_transposes += 1 465 nodes.append(node.name) 466 467 # Three transposes were initially added in the Expand phase of 468 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 469 expected_num_transposes = 1 470 self.assertEqual(expected_num_transposes, num_transposes) 471 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 472 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 473 474 def testSqueezeAlongHW(self): 475 if test.is_gpu_available(cuda_only=True): 476 random_seed.set_random_seed(0) 477 x = random_ops.truncated_normal([1, 784], seed=0) 478 conv = _two_layer_model(x) 479 reduce_sum = math_ops.reduce_sum(conv, axis=[1, 2], keep_dims=True) 480 squeeze = array_ops.squeeze(reduce_sum, axis=[1, 2]) 481 output = array_ops.identity(squeeze) 482 483 with session.Session(config=_get_config(False)) as sess: 484 output_val_ref = sess.run(output) 485 486 with session.Session(config=_get_config()) as sess: 487 metadata = config_pb2.RunMetadata() 488 output_val = sess.run(output, run_metadata=metadata) 489 490 nodes = [] 491 num_transposes = 0 492 for node in metadata.cost_graph.node: 493 if _is_transpose(node.name): 494 num_transposes += 1 495 nodes.append(node.name) 496 497 # Three transposes were initially added in the Expand phase of 498 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 499 expected_num_transposes = 1 500 self.assertEqual(expected_num_transposes, num_transposes) 501 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 502 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 503 504 def testSqueezeAlongNHW(self): 505 if test.is_gpu_available(cuda_only=True): 506 random_seed.set_random_seed(0) 507 x = random_ops.truncated_normal([1, 784], seed=0) 508 conv = _two_layer_model(x) 509 reduce_sum = math_ops.reduce_sum(conv, axis=[0, 1, 2], keep_dims=True) 510 squeeze = array_ops.squeeze(reduce_sum, axis=[0, 1, 2]) 511 output = array_ops.identity(squeeze) 512 513 with session.Session(config=_get_config(False)) as sess: 514 output_val_ref = sess.run(output) 515 516 with session.Session(config=_get_config()) as sess: 517 metadata = config_pb2.RunMetadata() 518 output_val = sess.run(output, run_metadata=metadata) 519 520 nodes = [] 521 num_transposes = 0 522 for node in metadata.cost_graph.node: 523 if _is_transpose(node.name): 524 num_transposes += 1 525 nodes.append(node.name) 526 527 # Three transposes were initially added in the Expand phase of 528 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 529 expected_num_transposes = 1 530 self.assertEqual(expected_num_transposes, num_transposes) 531 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 532 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 533 534 def testReduceSumAlongHWC(self): 535 if test.is_gpu_available(cuda_only=True): 536 random_seed.set_random_seed(0) 537 x = random_ops.truncated_normal([1, 784], seed=0) 538 conv = _two_layer_model(x) 539 reduce_sum = math_ops.reduce_sum(conv, axis=[1, 2, 3]) 540 output = array_ops.identity(reduce_sum) 541 542 with session.Session(config=_get_config(False)) as sess: 543 output_val_ref = sess.run(output) 544 545 with session.Session(config=_get_config()) as sess: 546 metadata = config_pb2.RunMetadata() 547 output_val = sess.run(output, run_metadata=metadata) 548 549 nodes = [] 550 num_transposes = 0 551 for node in metadata.cost_graph.node: 552 if _is_transpose(node.name): 553 num_transposes += 1 554 nodes.append(node.name) 555 556 # Three transposes were initially added in the Expand phase of 557 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 558 expected_num_transposes = 1 559 self.assertEqual(expected_num_transposes, num_transposes) 560 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 561 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 562 563 def testReduceSumAlongNHW(self): 564 if test.is_gpu_available(cuda_only=True): 565 random_seed.set_random_seed(0) 566 x = random_ops.truncated_normal([1, 784], seed=0) 567 conv = _two_layer_model(x) 568 reduce_sum = math_ops.reduce_sum(conv, axis=[0, 1, 2]) 569 output = array_ops.identity(reduce_sum) 570 571 with session.Session(config=_get_config(False)) as sess: 572 output_val_ref = sess.run(output) 573 574 with session.Session(config=_get_config()) as sess: 575 metadata = config_pb2.RunMetadata() 576 output_val = sess.run(output, run_metadata=metadata) 577 578 nodes = [] 579 num_transposes = 0 580 for node in metadata.cost_graph.node: 581 if _is_transpose(node.name): 582 num_transposes += 1 583 nodes.append(node.name) 584 585 # Three transposes were initially added in the Expand phase of 586 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 587 expected_num_transposes = 1 588 self.assertEqual(expected_num_transposes, num_transposes) 589 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 590 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 591 592 def testReduceSumAlongC(self): 593 if test.is_gpu_available(cuda_only=True): 594 random_seed.set_random_seed(0) 595 x = random_ops.truncated_normal([1, 784], seed=0) 596 conv = _two_layer_model(x) 597 reduce_sum = math_ops.reduce_sum(conv, axis=[3]) 598 output = array_ops.identity(reduce_sum) 599 600 with session.Session(config=_get_config(False)) as sess: 601 output_val_ref = sess.run(output) 602 603 with session.Session(config=_get_config()) as sess: 604 metadata = config_pb2.RunMetadata() 605 output_val = sess.run(output, run_metadata=metadata) 606 607 nodes = [] 608 num_transposes = 0 609 for node in metadata.cost_graph.node: 610 if _is_transpose(node.name): 611 num_transposes += 1 612 nodes.append(node.name) 613 614 # Three transposes were initially added in the Expand phase of 615 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 616 expected_num_transposes = 1 617 self.assertEqual(expected_num_transposes, num_transposes) 618 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 619 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 620 621 def testReduceSumAlongCKeepDims(self): 622 if test.is_gpu_available(cuda_only=True): 623 random_seed.set_random_seed(0) 624 x = random_ops.truncated_normal([1, 784], seed=0) 625 conv = _two_layer_model(x) 626 reduce_sum = math_ops.reduce_sum(conv, axis=[3], keep_dims=True) 627 output = array_ops.identity(reduce_sum) 628 629 with session.Session(config=_get_config(False)) as sess: 630 output_val_ref = sess.run(output) 631 632 with session.Session(config=_get_config()) as sess: 633 metadata = config_pb2.RunMetadata() 634 output_val = sess.run(output, run_metadata=metadata) 635 636 nodes = [] 637 num_transposes = 0 638 for node in metadata.cost_graph.node: 639 if _is_transpose(node.name): 640 num_transposes += 1 641 nodes.append(node.name) 642 643 # Four transposes were initially added in the Expand phase of 644 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 645 expected_num_transposes = 2 646 self.assertEqual(expected_num_transposes, num_transposes) 647 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 648 self._assert_trans_nchw_to_nhwc('Sum-0-0', nodes) 649 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 650 651 def testReduceSumAlongHKeepDims(self): 652 if test.is_gpu_available(cuda_only=True): 653 random_seed.set_random_seed(0) 654 x = random_ops.truncated_normal([1, 784], seed=0) 655 conv = _two_layer_model(x) 656 reduce_sum = math_ops.reduce_sum(conv, axis=[2], keep_dims=True) 657 output = array_ops.identity(reduce_sum) 658 659 with session.Session(config=_get_config(False)) as sess: 660 output_val_ref = sess.run(output) 661 662 with session.Session(config=_get_config()) as sess: 663 metadata = config_pb2.RunMetadata() 664 output_val = sess.run(output, run_metadata=metadata) 665 666 nodes = [] 667 num_transposes = 0 668 for node in metadata.cost_graph.node: 669 if _is_transpose(node.name): 670 num_transposes += 1 671 nodes.append(node.name) 672 673 # Four transposes were initially added in the Expand phase of 674 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 675 expected_num_transposes = 2 676 self.assertEqual(expected_num_transposes, num_transposes) 677 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 678 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 679 680 def testReduceSumAlongWCKeepDims(self): 681 if test.is_gpu_available(cuda_only=True): 682 random_seed.set_random_seed(0) 683 x = random_ops.truncated_normal([1, 784], seed=0) 684 conv = _two_layer_model(x) 685 reduce_sum = math_ops.reduce_sum(conv, axis=[2, 3], keep_dims=True) 686 output = array_ops.identity(reduce_sum) 687 688 with session.Session(config=_get_config(False)) as sess: 689 output_val_ref = sess.run(output) 690 691 with session.Session(config=_get_config()) as sess: 692 metadata = config_pb2.RunMetadata() 693 output_val = sess.run(output, run_metadata=metadata) 694 695 nodes = [] 696 num_transposes = 0 697 for node in metadata.cost_graph.node: 698 if _is_transpose(node.name): 699 num_transposes += 1 700 nodes.append(node.name) 701 702 # Four transposes were initially added in the Expand phase of 703 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 704 expected_num_transposes = 2 705 self.assertEqual(expected_num_transposes, num_transposes) 706 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 707 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 708 709 def testConcatWithControlDependency(self): 710 if test.is_gpu_available(cuda_only=True): 711 random_seed.set_random_seed(0) 712 x = random_ops.truncated_normal([1, 784], seed=0) 713 conv = _two_layer_model(x) 714 axis = constant_op.constant(3) 715 var = variables.Variable(3) 716 assign = state_ops.assign(var, 6) 717 with ops.control_dependencies([assign]): 718 concat = array_ops.concat([conv, conv], axis) 719 output = array_ops.identity(concat) 720 721 with session.Session(config=_get_config(False)) as sess: 722 output_val_ref = sess.run(output) 723 724 with session.Session(config=_get_config()) as sess: 725 metadata = config_pb2.RunMetadata() 726 output_val = sess.run(output, run_metadata=metadata) 727 728 nodes = [] 729 num_transposes = 0 730 for node in metadata.cost_graph.node: 731 if _is_transpose(node.name): 732 num_transposes += 1 733 nodes.append(node.name) 734 735 # Four transposes were initially added in the Expand phase of 736 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 737 expected_num_transposes = 2 738 self.assertEqual(expected_num_transposes, num_transposes) 739 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 740 self._assert_trans_nchw_to_nhwc('concat-0-0', nodes) 741 self.assertIn('concat-2-LayoutOptimizer', nodes) 742 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 743 744 def testFill(self): 745 if test.is_gpu_available(cuda_only=True): 746 random_seed.set_random_seed(0) 747 x = array_ops.placeholder(dtype='float32') 748 conv = _two_layer_model(x) 749 shape = array_ops.shape(conv) 750 scalar = array_ops.constant(5.7) 751 fill = array_ops.fill(shape, scalar) 752 output = array_ops.identity(fill) 753 754 x_val = [3.4] * 784 755 with session.Session(config=_get_config(False)) as sess: 756 output_val_ref = sess.run(output, feed_dict={x: x_val}) 757 758 with session.Session(config=_get_config()) as sess: 759 metadata = config_pb2.RunMetadata() 760 output_val = sess.run( 761 output, run_metadata=metadata, feed_dict={ 762 x: x_val 763 }) 764 765 nodes = [] 766 num_transposes = 0 767 num_vec_permute = 0 768 for node in metadata.cost_graph.node: 769 if _is_transpose(node.name): 770 num_transposes += 1 771 if _is_permute(node.name): 772 num_vec_permute += 1 773 nodes.append(node.name) 774 775 # Four transposes were initially added in the Expand phase of 776 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 777 expected_num_transposes = 2 778 self.assertEqual(expected_num_transposes, num_transposes) 779 # Two vector permute nodes were initially added in the Expand phase of 780 # LayoutOptimizer; they cancelled out each other in the Collapse phase. 781 expected_vec_permute = 0 782 self.assertEqual(expected_vec_permute, num_vec_permute) 783 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 784 self._assert_trans_nchw_to_nhwc('Fill-0-0', nodes) 785 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 786 787 def testTile(self): 788 if test.is_gpu_available(cuda_only=True): 789 random_seed.set_random_seed(0) 790 x = random_ops.truncated_normal([1, 784], seed=0) 791 conv = _two_layer_model(x) 792 multiple = array_ops.placeholder(dtype='int32') 793 tile = array_ops.tile(conv, multiple) 794 output = array_ops.identity(tile) 795 796 multiple_val = [2, 3, 4, 1] 797 with session.Session(config=_get_config(False)) as sess: 798 output_val_ref = sess.run(output, feed_dict={multiple: multiple_val}) 799 800 with session.Session(config=_get_config()) as sess: 801 metadata = config_pb2.RunMetadata() 802 output_val = sess.run( 803 output, run_metadata=metadata, feed_dict={ 804 multiple: multiple_val 805 }) 806 807 nodes = [] 808 num_transposes = 0 809 for node in metadata.cost_graph.node: 810 if _is_transpose(node.name): 811 num_transposes += 1 812 nodes.append(node.name) 813 814 # Four transposes were initially added in the Expand phase of 815 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 816 expected_num_transposes = 2 817 self.assertEqual(expected_num_transposes, num_transposes) 818 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 819 self._assert_trans_nchw_to_nhwc('Tile-0-0', nodes) 820 self._assert_vec_nhwc_to_nchw('Tile-1', nodes) 821 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 822 823 def testReverseWithConstDims(self): 824 if test.is_gpu_available(cuda_only=True): 825 random_seed.set_random_seed(0) 826 x = random_ops.truncated_normal([1, 784], seed=0) 827 conv = _two_layer_model(x) 828 dims = constant_op.constant([3, 1], name='DimsConst') 829 reverse = array_ops.reverse(conv, dims) 830 output = array_ops.identity(reverse) 831 832 with session.Session(config=_get_config(False)) as sess: 833 output_val_ref = sess.run(output) 834 835 with session.Session(config=_get_config()) as sess: 836 metadata = config_pb2.RunMetadata() 837 output_val = sess.run(output, run_metadata=metadata) 838 839 nodes = [] 840 num_transposes = 0 841 for node in metadata.cost_graph.node: 842 if _is_transpose(node.name): 843 num_transposes += 1 844 nodes.append(node.name) 845 846 # Four transposes were initially added in the Expand phase of 847 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 848 expected_num_transposes = 2 849 self.assertEqual(expected_num_transposes, num_transposes) 850 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 851 self._assert_trans_nchw_to_nhwc('ReverseV2-0-0', nodes) 852 self.assertIn('ReverseV2-1-LayoutOptimizer', nodes) 853 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 854 855 def testReverseWithNonConstDims(self): 856 if test.is_gpu_available(cuda_only=True): 857 random_seed.set_random_seed(0) 858 x = random_ops.truncated_normal([1, 784], seed=0) 859 conv = _two_layer_model(x) 860 dims = array_ops.placeholder(dtype='int32') 861 reverse = array_ops.reverse(conv, dims) 862 output = array_ops.identity(reverse) 863 864 dims_val = [2, 3] 865 with session.Session(config=_get_config(False)) as sess: 866 output_val_ref = sess.run(output, feed_dict={dims: dims_val}) 867 868 with session.Session(config=_get_config()) as sess: 869 metadata = config_pb2.RunMetadata() 870 output_val = sess.run( 871 output, run_metadata=metadata, feed_dict={ 872 dims: dims_val 873 }) 874 875 nodes = [] 876 num_transposes = 0 877 for node in metadata.cost_graph.node: 878 if _is_transpose(node.name): 879 num_transposes += 1 880 nodes.append(node.name) 881 882 # Four transposes were initially added in the Expand phase of 883 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 884 expected_num_transposes = 2 885 self.assertEqual(expected_num_transposes, num_transposes) 886 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 887 self._assert_trans_nchw_to_nhwc('ReverseV2-0-0', nodes) 888 self._assert_map_nhwc_to_nchw('ReverseV2-1', nodes) 889 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 890 891 def testSelectOp(self): 892 if test.is_gpu_available(cuda_only=True): 893 random_seed.set_random_seed(0) 894 x = random_ops.truncated_normal([1, 784], seed=0) 895 conv = _two_layer_model(x) 896 add = math_ops.add(conv, conv) 897 mean = math_ops.reduce_mean(conv) 898 condition = math_ops.less(conv, mean) 899 select = gen_math_ops._select(condition, conv, add) 900 output = array_ops.identity(select) 901 902 with session.Session(config=_get_config(False)) as sess: 903 output_val_ref = sess.run(output) 904 905 with session.Session(config=_get_config()) as sess: 906 metadata = config_pb2.RunMetadata() 907 output_val = sess.run(output, run_metadata=metadata) 908 909 nodes = [] 910 num_transposes = 0 911 for node in metadata.cost_graph.node: 912 if _is_transpose(node.name): 913 num_transposes += 1 914 nodes.append(node.name) 915 916 expected_num_transposes = 2 917 self.assertEqual(expected_num_transposes, num_transposes) 918 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 919 self._assert_trans_nchw_to_nhwc('Select-0-0', nodes) 920 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 921 922 def testSelectOpConditionUnknownShape(self): 923 if test.is_gpu_available(cuda_only=True): 924 random_seed.set_random_seed(0) 925 x = random_ops.truncated_normal([1, 784], seed=0) 926 conv = _two_layer_model(x) 927 add = math_ops.add(conv, conv) 928 condition = array_ops.placeholder(dtype='bool') 929 select = gen_math_ops._select(condition, conv, add) 930 output = array_ops.identity(select) 931 932 condition_val = np.zeros((1, 7, 7, 64)) 933 with session.Session(config=_get_config(False)) as sess: 934 output_val_ref = sess.run(output, feed_dict={condition: condition_val}) 935 936 with session.Session(config=_get_config()) as sess: 937 metadata = config_pb2.RunMetadata() 938 output_val = sess.run( 939 output, run_metadata=metadata, feed_dict={condition: condition_val}) 940 941 nodes = [] 942 num_transposes = 0 943 for node in metadata.cost_graph.node: 944 if _is_transpose(node.name): 945 num_transposes += 1 946 nodes.append(node.name) 947 948 expected_num_transposes = 3 949 self.assertEqual(expected_num_transposes, num_transposes) 950 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 951 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 952 953 def testSelectOpScalarCondition(self): 954 if test.is_gpu_available(cuda_only=True): 955 random_seed.set_random_seed(0) 956 x = random_ops.truncated_normal([1, 784], seed=0) 957 conv = _two_layer_model(x) 958 add = math_ops.add(conv, conv) 959 condition = constant_op.constant(True) 960 select = gen_math_ops._select(condition, conv, add) 961 output = array_ops.identity(select) 962 963 with session.Session(config=_get_config(False)) as sess: 964 output_val_ref = sess.run(output) 965 966 with session.Session(config=_get_config()) as sess: 967 metadata = config_pb2.RunMetadata() 968 output_val = sess.run(output, run_metadata=metadata) 969 970 nodes = [] 971 num_transposes = 0 972 for node in metadata.cost_graph.node: 973 if _is_transpose(node.name): 974 num_transposes += 1 975 nodes.append(node.name) 976 977 expected_num_transposes = 2 978 self.assertEqual(expected_num_transposes, num_transposes) 979 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 980 self._assert_trans_nchw_to_nhwc('Select-0-0', nodes) 981 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 982 983 def testPadWithNonConstPaddings(self): 984 if test.is_gpu_available(cuda_only=True): 985 random_seed.set_random_seed(0) 986 x = random_ops.truncated_normal([1, 784], seed=0) 987 conv = _two_layer_model(x) 988 paddings = array_ops.placeholder(dtype='int32') 989 pad = array_ops.pad(conv, paddings) 990 output = array_ops.identity(pad) 991 992 paddings_val = [[1, 2], [3, 4], [5, 6], [7, 8]] 993 with session.Session(config=_get_config(False)) as sess: 994 output_val_ref = sess.run(output, feed_dict={paddings: paddings_val}) 995 996 with session.Session(config=_get_config()) as sess: 997 metadata = config_pb2.RunMetadata() 998 output_val = sess.run( 999 output, run_metadata=metadata, feed_dict={ 1000 paddings: paddings_val 1001 }) 1002 1003 nodes = [] 1004 num_transposes = 0 1005 for node in metadata.cost_graph.node: 1006 if _is_transpose(node.name): 1007 num_transposes += 1 1008 nodes.append(node.name) 1009 1010 # Four transposes were initially added in the Expand phase of 1011 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 1012 expected_num_transposes = 2 1013 self.assertEqual(expected_num_transposes, num_transposes) 1014 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 1015 self._assert_trans_nchw_to_nhwc('Pad-0-0', nodes) 1016 self._assert_vec_nhwc_to_nchw('Pad-1', nodes) 1017 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 1018 1019 def testMaxPoolV2(self): 1020 if test.is_gpu_available(cuda_only=True): 1021 random_seed.set_random_seed(0) 1022 x = random_ops.truncated_normal([1, 784], seed=0) 1023 conv = _two_layer_model(x) 1024 ksize = constant_op.constant([1, 2, 3, 1], shape=[4]) 1025 strides = array_ops.placeholder(dtype='int32', shape=[4]) 1026 max_pool = gen_nn_ops._max_pool_v2(conv, ksize, strides, 'VALID') 1027 output = array_ops.identity(max_pool) 1028 1029 strides_val = [1, 3, 2, 1] 1030 with session.Session(config=_get_config(False)) as sess: 1031 output_val_ref = sess.run(output, feed_dict={strides: strides_val}) 1032 1033 with session.Session(config=_get_config()) as sess: 1034 metadata = config_pb2.RunMetadata() 1035 output_val = sess.run( 1036 output, run_metadata=metadata, feed_dict={ 1037 strides: strides_val 1038 }) 1039 1040 nodes = [] 1041 num_transposes = 0 1042 for node in metadata.cost_graph.node: 1043 if _is_transpose(node.name): 1044 num_transposes += 1 1045 nodes.append(node.name) 1046 1047 expected_num_transposes = 2 1048 self.assertEqual(expected_num_transposes, num_transposes) 1049 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 1050 self._assert_trans_nchw_to_nhwc('MaxPoolV2-0-0', nodes) 1051 self._assert_vec_nhwc_to_nchw('MaxPoolV2-2', nodes) 1052 self.assertIn('MaxPoolV2-1-LayoutOptimizer', nodes) 1053 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 1054 1055 def testMaxPoolGradV2(self): 1056 if test.is_gpu_available(cuda_only=True): 1057 random_seed.set_random_seed(0) 1058 x = random_ops.truncated_normal([1, 784], seed=0) 1059 conv = _two_layer_model(x) 1060 ksize = constant_op.constant([1, 2, 3, 1], shape=[4]) 1061 strides = array_ops.placeholder(dtype='int32', shape=[4]) 1062 max_pool_grad = gen_nn_ops.max_pool_grad_v2(conv, conv, conv, ksize, 1063 strides, 'VALID') 1064 output = array_ops.identity(max_pool_grad) 1065 1066 strides_val = [1, 3, 2, 1] 1067 with session.Session(config=_get_config(False)) as sess: 1068 output_val_ref = sess.run(output, feed_dict={strides: strides_val}) 1069 1070 with session.Session(config=_get_config()) as sess: 1071 metadata = config_pb2.RunMetadata() 1072 output_val = sess.run( 1073 output, run_metadata=metadata, feed_dict={ 1074 strides: strides_val 1075 }) 1076 1077 nodes = [] 1078 num_transposes = 0 1079 for node in metadata.cost_graph.node: 1080 if _is_transpose(node.name): 1081 num_transposes += 1 1082 nodes.append(node.name) 1083 1084 expected_num_transposes = 2 1085 self.assertEqual(expected_num_transposes, num_transposes) 1086 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 1087 self._assert_trans_nchw_to_nhwc('MaxPoolGradV2-0-0', nodes) 1088 self._assert_vec_nhwc_to_nchw('MaxPoolGradV2-4', nodes) 1089 self.assertIn('MaxPoolGradV2-3-LayoutOptimizer', nodes) 1090 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 1091 1092 def testSliceWithNonConstAxis(self): 1093 if test.is_gpu_available(cuda_only=True): 1094 random_seed.set_random_seed(0) 1095 x = random_ops.truncated_normal([1, 784], seed=0) 1096 conv = _two_layer_model(x) 1097 size = array_ops.placeholder(dtype='int32') 1098 s = array_ops.slice(conv, [0, 0, 0, 0], size) 1099 output = array_ops.identity(s) 1100 1101 size_val = [1, 2, 3, 4] 1102 with session.Session(config=_get_config(False)) as sess: 1103 output_val_ref = sess.run(output, feed_dict={size: size_val}) 1104 1105 with session.Session(config=_get_config()) as sess: 1106 metadata = config_pb2.RunMetadata() 1107 output_val = sess.run( 1108 output, run_metadata=metadata, feed_dict={ 1109 size: size_val 1110 }) 1111 1112 nodes = [] 1113 num_transposes = 0 1114 for node in metadata.cost_graph.node: 1115 if _is_transpose(node.name): 1116 num_transposes += 1 1117 nodes.append(node.name) 1118 1119 # Four transposes were initially added in the Expand phase of 1120 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 1121 expected_num_transposes = 2 1122 self.assertEqual(expected_num_transposes, num_transposes) 1123 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 1124 self._assert_trans_nchw_to_nhwc('Slice-0-0', nodes) 1125 self._assert_vec_nhwc_to_nchw('Slice-2', nodes) 1126 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 1127 1128 def testStridedSliceWithNonConstAxis(self): 1129 if test.is_gpu_available(cuda_only=True): 1130 random_seed.set_random_seed(0) 1131 x = random_ops.truncated_normal([1, 784], seed=0) 1132 conv = _two_layer_model(x) 1133 end = array_ops.placeholder(dtype='int32') 1134 s = array_ops.strided_slice(conv, [0, 0, 0, 0], end, strides=[1, 2, 3, 1]) 1135 output = array_ops.identity(s) 1136 1137 end_val = [1, 2, 3, 4] 1138 with session.Session(config=_get_config(False)) as sess: 1139 output_val_ref = sess.run(output, feed_dict={end: end_val}) 1140 1141 with session.Session(config=_get_config()) as sess: 1142 metadata = config_pb2.RunMetadata() 1143 output_val = sess.run( 1144 output, run_metadata=metadata, feed_dict={ 1145 end: end_val 1146 }) 1147 1148 nodes = [] 1149 num_transposes = 0 1150 for node in metadata.cost_graph.node: 1151 if _is_transpose(node.name): 1152 num_transposes += 1 1153 nodes.append(node.name) 1154 1155 # Four transposes were initially added in the Expand phase of 1156 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 1157 expected_num_transposes = 2 1158 self.assertEqual(expected_num_transposes, num_transposes) 1159 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 1160 self._assert_trans_nchw_to_nhwc('StridedSlice-0-0', nodes) 1161 self._assert_vec_nhwc_to_nchw('StridedSlice-2', nodes) 1162 self.assertIn('StridedSlice-1-LayoutOptimizer', nodes) 1163 self.assertIn('StridedSlice-3-LayoutOptimizer', nodes) 1164 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 1165 1166 def testStridedSliceWithMask1011(self): 1167 if test.is_gpu_available(cuda_only=True): 1168 random_seed.set_random_seed(0) 1169 x = random_ops.truncated_normal([1, 784], seed=0) 1170 conv = _two_layer_model(x) 1171 # This will generate a StridedSlice op with begin mask and 1172 # end mask 11(1011). 1173 s = conv[:, :, 1:-1, :] 1174 output = array_ops.identity(s) 1175 1176 with session.Session(config=_get_config(False)) as sess: 1177 output_val_ref = sess.run(output) 1178 1179 with session.Session(config=_get_config()) as sess: 1180 metadata = config_pb2.RunMetadata() 1181 output_val = sess.run(output, run_metadata=metadata) 1182 1183 nodes = [] 1184 num_transposes = 0 1185 for node in metadata.cost_graph.node: 1186 if _is_transpose(node.name): 1187 num_transposes += 1 1188 nodes.append(node.name) 1189 1190 # Four transposes were initially added in the Expand phase of 1191 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 1192 expected_num_transposes = 2 1193 self.assertEqual(expected_num_transposes, num_transposes) 1194 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 1195 self._assert_trans_nchw_to_nhwc('strided_slice-0-0', nodes) 1196 self.assertIn('strided_slice-1-LayoutOptimizer', nodes) 1197 self.assertIn('strided_slice-2-LayoutOptimizer', nodes) 1198 self.assertIn('strided_slice-3-LayoutOptimizer', nodes) 1199 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 1200 1201 def testStridedSliceWithMask0111(self): 1202 if test.is_gpu_available(cuda_only=True): 1203 random_seed.set_random_seed(0) 1204 x = random_ops.truncated_normal([1, 784], seed=0) 1205 conv = _two_layer_model(x) 1206 # This will generate a StridedSlice op with begin mask and 1207 # end mask 7(0111). 1208 s = conv[:, :, :, 1:-1] 1209 output = array_ops.identity(s) 1210 1211 with session.Session(config=_get_config(False)) as sess: 1212 output_val_ref = sess.run(output) 1213 1214 with session.Session(config=_get_config()) as sess: 1215 metadata = config_pb2.RunMetadata() 1216 output_val = sess.run(output, run_metadata=metadata) 1217 1218 nodes = [] 1219 num_transposes = 0 1220 for node in metadata.cost_graph.node: 1221 if _is_transpose(node.name): 1222 num_transposes += 1 1223 nodes.append(node.name) 1224 1225 # Four transposes were initially added in the Expand phase of 1226 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 1227 expected_num_transposes = 2 1228 self.assertEqual(expected_num_transposes, num_transposes) 1229 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 1230 self._assert_trans_nchw_to_nhwc('strided_slice-0-0', nodes) 1231 self.assertIn('strided_slice-1-LayoutOptimizer', nodes) 1232 self.assertIn('strided_slice-2-LayoutOptimizer', nodes) 1233 self.assertIn('strided_slice-3-LayoutOptimizer', nodes) 1234 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 1235 1236 def testStridedSliceGradWithNonConstAxis(self): 1237 if test.is_gpu_available(cuda_only=True): 1238 random_seed.set_random_seed(0) 1239 x = random_ops.truncated_normal([1, 784], seed=0) 1240 conv = _two_layer_model(x) 1241 end = array_ops.placeholder(dtype='int32') 1242 shape = array_ops.shape(conv) 1243 end_val = [1, 2, 3, 4] 1244 s = array_ops.strided_slice( 1245 conv, [0, 0, 0, 0], end_val, strides=[1, 2, 3, 1]) 1246 s_grad = array_ops.strided_slice_grad(shape, [0, 0, 0, 0], end, 1247 [1, 2, 3, 1], s) 1248 output = array_ops.identity(s_grad) 1249 1250 with session.Session(config=_get_config(False)) as sess: 1251 output_val_ref = sess.run(output, feed_dict={end: end_val}) 1252 1253 with session.Session(config=_get_config()) as sess: 1254 metadata = config_pb2.RunMetadata() 1255 output_val = sess.run( 1256 output, run_metadata=metadata, feed_dict={ 1257 end: end_val 1258 }) 1259 1260 nodes = [] 1261 num_transposes = 0 1262 for node in metadata.cost_graph.node: 1263 if _is_transpose(node.name): 1264 num_transposes += 1 1265 nodes.append(node.name) 1266 1267 # Four transposes were initially added in the Expand phase of 1268 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 1269 expected_num_transposes = 2 1270 self.assertEqual(expected_num_transposes, num_transposes) 1271 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 1272 self._assert_trans_nchw_to_nhwc('StridedSliceGrad-0-0', nodes) 1273 self._assert_vec_nhwc_to_nchw('StridedSliceGrad-2', nodes) 1274 self.assertIn('StridedSlice-1-LayoutOptimizer', nodes) 1275 self.assertIn('StridedSlice-2-LayoutOptimizer', nodes) 1276 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 1277 1278 def testShapeN(self): 1279 if test.is_gpu_available(cuda_only=True): 1280 x = array_ops.placeholder(dtype='float32') 1281 conv = _two_layer_model(x) 1282 shapen = array_ops.shape_n([conv, conv]) 1283 output = math_ops.add(shapen[0], shapen[1]) 1284 1285 x_val = [1.7] * 784 1286 with session.Session(config=_get_config(False)) as sess: 1287 output_val_ref = sess.run(output, feed_dict={x: x_val}) 1288 1289 with session.Session(config=_get_config()) as sess: 1290 metadata = config_pb2.RunMetadata() 1291 output_val = sess.run( 1292 output, run_metadata=metadata, feed_dict={ 1293 x: x_val 1294 }) 1295 1296 nodes = [] 1297 num_transposes = 0 1298 for node in metadata.cost_graph.node: 1299 if _is_transpose(node.name): 1300 num_transposes += 1 1301 nodes.append(node.name) 1302 1303 expected_num_transposes = 1 1304 self.assertEqual(expected_num_transposes, num_transposes) 1305 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 1306 self._assert_vec_nchw_to_nhwc('ShapeN-0-0', nodes) 1307 self.assertAllEqual(output_val_ref, output_val) 1308 1309 def testShapeNFollowedByNotConvertibleNodeReshape(self): 1310 if test.is_gpu_available(cuda_only=True): 1311 x = array_ops.placeholder(dtype='float32') 1312 conv = _two_layer_model(x) 1313 conv_reshape = array_ops.reshape(conv, [1, 1, 1, -1]) 1314 shapen = array_ops.shape_n([conv, conv_reshape]) 1315 shape = array_ops.identity(shapen[1]) 1316 ones = array_ops.ones(shape) 1317 output = math_ops.add_n([conv_reshape, ones]) 1318 1319 x_val = [1.7] * 784 1320 with session.Session(config=_get_config(False)) as sess: 1321 output_val_ref = sess.run(output, feed_dict={x: x_val}) 1322 1323 with session.Session(config=_get_config()) as sess: 1324 metadata = config_pb2.RunMetadata() 1325 output_val = sess.run( 1326 output, run_metadata=metadata, feed_dict={x: x_val}) 1327 1328 nodes = [] 1329 num_transposes = 0 1330 for node in metadata.cost_graph.node: 1331 if _is_transpose(node.name): 1332 num_transposes += 1 1333 nodes.append(node.name) 1334 1335 expected_num_transposes = 2 1336 self.assertEqual(expected_num_transposes, num_transposes) 1337 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 1338 self.assertAllEqual(output_val_ref, output_val) 1339 1340 def testLoop(self): 1341 if test.is_gpu_available(cuda_only=True): 1342 output = _loop() 1343 1344 with session.Session(config=_get_config(False)) as sess: 1345 output_val_ref = sess.run(output) 1346 1347 with session.Session(config=_get_config()) as sess: 1348 metadata = config_pb2.RunMetadata() 1349 output_val = sess.run(output, run_metadata=metadata) 1350 1351 nodes = [] 1352 num_transposes = 0 1353 for node in metadata.cost_graph.node: 1354 if _is_transpose(node.name): 1355 num_transposes += 1 1356 nodes.append(node.name) 1357 1358 # Four transposes were initially added in the Expand phase of 1359 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 1360 expected_num_transposes = 2 1361 self.assertEqual(expected_num_transposes, num_transposes) 1362 self.assertEqual(expected_num_transposes, num_transposes) 1363 self._assert_trans_nhwc_to_nchw('map/while/Conv2D-0', nodes) 1364 self._assert_trans_nchw_to_nhwc('map/while/MaxPool_1-0-2', nodes) 1365 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 1366 1367 def testLoopWithBranch(self): 1368 if test.is_gpu_available(cuda_only=True): 1369 output = _loop_with_branch() 1370 1371 with session.Session(config=_get_config(False)) as sess: 1372 output_val_ref = sess.run(output) 1373 1374 with session.Session(config=_get_config()) as sess: 1375 metadata = config_pb2.RunMetadata() 1376 output_val = sess.run(output, run_metadata=metadata) 1377 1378 nodes = [] 1379 num_transposes = 0 1380 for node in metadata.cost_graph.node: 1381 if _is_transpose(node.name): 1382 num_transposes += 1 1383 nodes.append(node.name) 1384 1385 expected_num_transposes = 3 1386 self.assertEqual(expected_num_transposes, num_transposes) 1387 self._assert_trans_nhwc_to_nchw('map/while/Conv2D-0', nodes) 1388 self._assert_trans_nchw_to_nhwc('map/while/Add-0-2', nodes) 1389 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 1390 1391 def testLoopWithVecAnd4D(self): 1392 if test.is_gpu_available(cuda_only=True): 1393 output = _loop_with_vec_and_4d() 1394 1395 with session.Session(config=_get_config(False)) as sess: 1396 output_val_ref = sess.run(output) 1397 1398 with session.Session(config=_get_config()) as sess: 1399 metadata = config_pb2.RunMetadata() 1400 output_val = sess.run(output, run_metadata=metadata) 1401 1402 nodes = [] 1403 num_transposes = 0 1404 for node in metadata.cost_graph.node: 1405 if _is_transpose(node.name): 1406 num_transposes += 1 1407 nodes.append(node.name) 1408 1409 expected_num_transposes = 2 1410 self.assertEqual(expected_num_transposes, num_transposes) 1411 self._assert_trans_nhwc_to_nchw('map/while/Conv2D-0', nodes) 1412 self._assert_trans_nchw_to_nhwc('map/while/Add-0-2', nodes) 1413 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 1414 1415 def testBinaryOpSecondPort(self): 1416 if test.is_gpu_available(cuda_only=True): 1417 output = _model_with_second_port() 1418 1419 with session.Session(config=_get_config(False)) as sess: 1420 output_val_ref = sess.run(output) 1421 1422 with session.Session(config=_get_config()) as sess: 1423 metadata = config_pb2.RunMetadata() 1424 output_val = sess.run(output, run_metadata=metadata) 1425 1426 nodes = [] 1427 num_transposes = 0 1428 for node in metadata.cost_graph.node: 1429 if _is_transpose(node.name): 1430 num_transposes += 1 1431 nodes.append(node.name) 1432 1433 expected_num_transposes = 2 1434 self.assertEqual(expected_num_transposes, num_transposes) 1435 self._assert_trans_nhwc_to_nchw('FusedBatchNorm-0', nodes) 1436 self._assert_trans_nchw_to_nhwc('Add-0-0', nodes) 1437 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 1438 1439 def testGradient(self): 1440 meta_graph = _simple_metagraph() 1441 rewrite_options = rewriter_config_pb2.RewriterConfig( 1442 layout_optimizer=rewriter_config_pb2.RewriterConfig.ON) 1443 optimized_graph = tf_optimizer.OptimizeGraph( 1444 rewrite_options, meta_graph, cluster=_get_cluster()) 1445 1446 found = 0 1447 for node in optimized_graph.node: 1448 if node.op in ['Conv2D', 'Conv2DBackpropFilter', 'Conv2DBackpropInput']: 1449 found += 1 1450 self.assertEqual(node.attr['data_format'].s, b'NCHW') 1451 self.assertEqual(found, 5) 1452 1453 def testDepthwise(self): 1454 meta_graph = _simple_metagraph(depthwise=True) 1455 rewrite_options = rewriter_config_pb2.RewriterConfig( 1456 layout_optimizer=rewriter_config_pb2.RewriterConfig.ON) 1457 optimized_graph = tf_optimizer.OptimizeGraph( 1458 rewrite_options, meta_graph, cluster=_get_cluster()) 1459 1460 found = 0 1461 for node in optimized_graph.node: 1462 if node.op in [ 1463 'DepthwiseConv2dNative', 'DepthwiseConv2dNativeBackpropFilter', 1464 'DepthwiseConv2dNativeBackpropInput' 1465 ]: 1466 found += 1 1467 self.assertEqual(node.attr['data_format'].s, b'NCHW') 1468 self.assertEqual(found, 6) 1469 1470 def testCheckpointCompatibility(self): 1471 if not test.is_gpu_available(cuda_only=True): 1472 self.skipTest('GPU required') 1473 1474 checkpoint_path = self.get_temp_dir() 1475 self._train(checkpoint_path) 1476 vars_expected = self._train(checkpoint_path, restore=True) 1477 vars_layout_optimized = self._train( 1478 checkpoint_path, restore=True, layout_optimizer=True) 1479 1480 for var_expected, var_layout_optimized in zip(vars_expected, 1481 vars_layout_optimized): 1482 self.assertAllClose(var_expected, var_layout_optimized, atol=1e-6) 1483 1484 1485 if __name__ == '__main__': 1486 test.main() 1487