1 # Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 # 3 # Licensed under the Apache License, Version 2.0 (the "License"); 4 # you may not use this file except in compliance with the License. 5 # You may obtain a copy of the License at 6 # 7 # http://www.apache.org/licenses/LICENSE-2.0 8 # 9 # Unless required by applicable law or agreed to in writing, software 10 # distributed under the License is distributed on an "AS IS" BASIS, 11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 # See the License for the specific language governing permissions and 13 # limitations under the License. 14 # ============================================================================== 15 """Tests for optimizers.""" 16 17 from __future__ import absolute_import 18 from __future__ import division 19 from __future__ import print_function 20 21 import numpy as np 22 23 from tensorflow.contrib.layers.python.layers import optimizers as optimizers_lib 24 from tensorflow.python.framework import constant_op 25 from tensorflow.python.framework import dtypes 26 from tensorflow.python.framework import ops 27 from tensorflow.python.framework import random_seed 28 from tensorflow.python.ops import array_ops 29 from tensorflow.python.ops import init_ops 30 from tensorflow.python.ops import math_ops 31 from tensorflow.python.ops import state_ops 32 from tensorflow.python.ops import variable_scope 33 from tensorflow.python.ops import variables 34 from tensorflow.python.platform import test 35 from tensorflow.python.training import gradient_descent 36 37 38 def _setup_model(): 39 x = array_ops.placeholder(dtypes.float32, []) 40 var = variable_scope.get_variable( 41 "test", [], initializer=init_ops.constant_initializer(10)) 42 loss = math_ops.abs(var * x) 43 global_step = variable_scope.get_variable( 44 "global_step", [], 45 trainable=False, 46 dtype=dtypes.int64, 47 initializer=init_ops.constant_initializer( 48 0, dtype=dtypes.int64)) 49 return x, var, loss, global_step 50 51 52 def _no_op_learning_rate_decay_fn(lr, global_step): 53 assert lr is not None 54 assert global_step is not None 55 return lr 56 57 58 class OptimizersTest(test.TestCase): 59 60 def testSGDOptimizer(self): 61 optimizers = [ 62 "SGD", gradient_descent.GradientDescentOptimizer, 63 gradient_descent.GradientDescentOptimizer(learning_rate=0.1), 64 lambda lr: gradient_descent.GradientDescentOptimizer(learning_rate=lr) 65 ] 66 for optimizer in optimizers: 67 with ops.Graph().as_default() as g: 68 with self.test_session(graph=g) as session: 69 x, var, loss, global_step = _setup_model() 70 train = optimizers_lib.optimize_loss( 71 loss, global_step, learning_rate=0.1, optimizer=optimizer) 72 variables.global_variables_initializer().run() 73 session.run(train, feed_dict={x: 5}) 74 var_value, global_step_value = session.run([var, global_step]) 75 self.assertEqual(var_value, 9.5) 76 self.assertEqual(global_step_value, 1) 77 78 def testNoLrCallable(self): 79 80 def optimizer_fn(): 81 return gradient_descent.GradientDescentOptimizer(learning_rate=0.1) 82 83 with ops.Graph().as_default() as g: 84 with self.test_session(graph=g) as session: 85 x, var, loss, global_step = _setup_model() 86 train = optimizers_lib.optimize_loss( 87 loss, global_step, learning_rate=None, optimizer=optimizer_fn) 88 variables.global_variables_initializer().run() 89 session.run(train, feed_dict={x: 5}) 90 var_value, global_step_value = session.run([var, global_step]) 91 self.assertEqual(var_value, 9.5) 92 self.assertEqual(global_step_value, 1) 93 94 def testWrongOptimizer(self): 95 optimizers = ["blah", variables.Variable, object(), lambda x: None] 96 for optimizer in optimizers: 97 with ops.Graph().as_default() as g: 98 with self.test_session(graph=g): 99 _, _, loss, global_step = _setup_model() 100 with self.assertRaises(ValueError): 101 optimizers_lib.optimize_loss( 102 loss, global_step, learning_rate=0.1, optimizer=optimizer) 103 104 def testBadSummaries(self): 105 with ops.Graph().as_default() as g, self.test_session(graph=g): 106 _, _, loss, global_step = _setup_model() 107 with self.assertRaises(ValueError): 108 optimizers_lib.optimize_loss( 109 loss, global_step, learning_rate=0.1, optimizer="SGD", 110 summaries=["loss", "bad_summary"]) 111 112 def testInvalidLoss(self): 113 with ops.Graph().as_default() as g, self.test_session(graph=g): 114 _, _, _, global_step = _setup_model() 115 with self.assertRaises(ValueError): 116 optimizers_lib.optimize_loss( 117 None, global_step, learning_rate=0.1, optimizer="SGD") 118 with self.assertRaises(ValueError): 119 optimizers_lib.optimize_loss( 120 [[1.0]], global_step, learning_rate=0.1, optimizer="SGD") 121 122 def testInvalidGlobalStep(self): 123 with ops.Graph().as_default() as g, self.test_session(graph=g): 124 x = array_ops.placeholder(dtypes.float32, []) 125 var = variable_scope.get_variable( 126 "test", [], initializer=init_ops.constant_initializer(10)) 127 loss = math_ops.abs(var * x) 128 with self.assertRaises(AttributeError): 129 optimizers_lib.optimize_loss( 130 loss, 131 global_step=constant_op.constant( 132 43, dtype=dtypes.int64), 133 learning_rate=0.1, 134 optimizer="SGD") 135 with self.assertRaises(TypeError): 136 optimizers_lib.optimize_loss( 137 loss, 138 global_step=variable_scope.get_variable( 139 "global_step", [], 140 trainable=False, 141 dtype=dtypes.float64, 142 initializer=init_ops.constant_initializer( 143 0.0, dtype=dtypes.float64)), 144 learning_rate=0.1, 145 optimizer="SGD") 146 with self.assertRaises(ValueError): 147 optimizers_lib.optimize_loss( 148 loss, 149 global_step=variable_scope.get_variable( 150 "global_step", [1], 151 trainable=False, 152 dtype=dtypes.int64, 153 initializer=init_ops.constant_initializer( 154 [0], dtype=dtypes.int64)), 155 learning_rate=0.1, 156 optimizer="SGD") 157 158 def testInvalidLearningRate(self): 159 with ops.Graph().as_default() as g, self.test_session(graph=g): 160 _, _, loss, global_step = _setup_model() 161 with self.assertRaises(ValueError): 162 optimizers_lib.optimize_loss( 163 loss, global_step, learning_rate=-0.1, optimizer="SGD") 164 165 def testGradientNoise(self): 166 random_seed.set_random_seed(42) 167 with self.test_session() as session: 168 x, var, loss, global_step = _setup_model() 169 train = optimizers_lib.optimize_loss( 170 loss, 171 global_step, 172 learning_rate=0.1, 173 optimizer="SGD", 174 gradient_noise_scale=10.0) 175 variables.global_variables_initializer().run() 176 session.run(train, feed_dict={x: 5}) 177 var_value, global_step_value = session.run([var, global_step]) 178 # Due to randomness the following number may change if graph is different. 179 self.assertAlmostEqual(var_value, 9.86912, 4) 180 self.assertEqual(global_step_value, 1) 181 182 def testGradientNoiseWithClipping(self): 183 random_seed.set_random_seed(42) 184 with self.test_session() as session: 185 x, var, loss, global_step = _setup_model() 186 train = optimizers_lib.optimize_loss( 187 loss, 188 global_step, 189 learning_rate=0.1, 190 optimizer="SGD", 191 gradient_noise_scale=10.0, 192 clip_gradients=10.0) 193 variables.global_variables_initializer().run() 194 session.run(train, feed_dict={x: 5}) 195 var_value, global_step_value = session.run([var, global_step]) 196 self.assertAlmostEqual(var_value, 9.86912, 4) 197 self.assertEqual(global_step_value, 1) 198 199 def testGradientClip(self): 200 with self.test_session() as session: 201 x, var, loss, global_step = _setup_model() 202 train = optimizers_lib.optimize_loss( 203 loss, 204 global_step, 205 learning_rate=0.1, 206 optimizer="SGD", 207 clip_gradients=0.1) 208 variables.global_variables_initializer().run() 209 session.run(train, feed_dict={x: 5}) 210 var_value, global_step_value = session.run([var, global_step]) 211 self.assertAlmostEqual(var_value, 9.98999, 4) 212 self.assertEqual(global_step_value, 1) 213 214 def testAdaptiveGradientClip(self): 215 with self.test_session() as session: 216 x, var, loss, global_step = _setup_model() 217 clip_gradients = optimizers_lib.adaptive_clipping_fn() 218 train = optimizers_lib.optimize_loss( 219 loss, 220 global_step, 221 learning_rate=0.1, 222 optimizer="SGD", 223 clip_gradients=clip_gradients) 224 variables.global_variables_initializer().run() 225 session.run(train, feed_dict={x: 5}) 226 var_value, global_step_value = session.run([var, global_step]) 227 self.assertAlmostEqual(var_value, 9.8916, 4) 228 self.assertEqual(global_step_value, 1) 229 var_count = 0 230 for var in variables.global_variables(): 231 if var.name.startswith("OptimizeLoss/AdaptiveMaxNorm"): 232 var_count += 1 233 self.assertEqual(2, var_count) 234 235 def testGradientMultiply(self): 236 with self.test_session() as session: 237 x, var, loss, global_step = _setup_model() 238 train = optimizers_lib.optimize_loss( 239 loss, 240 global_step, 241 learning_rate=0.1, 242 optimizer="SGD", 243 gradient_multipliers={var: 7.}) 244 variables.global_variables_initializer().run() 245 session.run(train, feed_dict={x: 5}) 246 var_value, global_step_value = session.run([var, global_step]) 247 # var(0) = 10, x = 5, var(0)/dx = 5, 248 # var(1) = var(0) - learning_rate * gradient_multiplier * var(0)/dx 249 self.assertAlmostEqual(var_value, 6.5, 4) 250 self.assertEqual(global_step_value, 1) 251 252 def testIgnoreVariablesWithNoGradients(self): 253 _, _, loss, global_step = _setup_model() 254 255 unused_variable = variable_scope.get_variable("ignore_me", []) 256 257 optimizers_lib.optimize_loss( 258 loss, 259 global_step, 260 learning_rate=0.1, 261 optimizer="SGD", 262 gradient_noise_scale=10.0, 263 gradient_multipliers={unused_variable: 1.}, 264 clip_gradients=10.0) 265 266 def testNoGlobalStep(self): 267 optimizers = [ 268 "SGD", gradient_descent.GradientDescentOptimizer, 269 gradient_descent.GradientDescentOptimizer(learning_rate=0.1) 270 ] 271 for optimizer in optimizers: 272 with ops.Graph().as_default() as g, self.test_session(graph=g) as session: 273 x = array_ops.placeholder(dtypes.float32, []) 274 var = variable_scope.get_variable( 275 "test", [], initializer=init_ops.constant_initializer(10)) 276 loss = math_ops.abs(var * x) 277 update_var = variable_scope.get_variable( 278 "update", [], initializer=init_ops.constant_initializer(10)) 279 update_op = state_ops.assign(update_var, 20) 280 train = optimizers_lib.optimize_loss( 281 loss, 282 global_step=None, 283 learning_rate=0.1, 284 optimizer=optimizer, 285 update_ops=[update_op]) 286 variables.global_variables_initializer().run() 287 session.run(train, feed_dict={x: 5}) 288 self.assertEqual(9.5, var.eval()) 289 self.assertEqual(20, update_var.eval()) 290 291 def testNoGlobalStepWithDecay(self): 292 optimizers = [ 293 "SGD", gradient_descent.GradientDescentOptimizer, 294 gradient_descent.GradientDescentOptimizer(learning_rate=0.1) 295 ] 296 for optimizer in optimizers: 297 with ops.Graph().as_default() as g, self.test_session(graph=g): 298 x = array_ops.placeholder(dtypes.float32, []) 299 var = variable_scope.get_variable( 300 "test", [], initializer=init_ops.constant_initializer(10)) 301 loss = math_ops.abs(var * x) 302 update_var = variable_scope.get_variable( 303 "update", [], initializer=init_ops.constant_initializer(10)) 304 update_op = state_ops.assign(update_var, 20) 305 with self.assertRaisesRegexp( 306 ValueError, "global_step is required for learning_rate_decay_fn"): 307 optimizers_lib.optimize_loss( 308 loss, 309 global_step=None, 310 learning_rate=0.1, 311 learning_rate_decay_fn=_no_op_learning_rate_decay_fn, 312 optimizer=optimizer, 313 update_ops=[update_op]) 314 315 def testNoGlobalStepArg(self): 316 optimizers = [ 317 "SGD", gradient_descent.GradientDescentOptimizer, 318 gradient_descent.GradientDescentOptimizer(learning_rate=0.1) 319 ] 320 for optimizer in optimizers: 321 with ops.Graph().as_default() as g, self.test_session(graph=g) as session: 322 x, var, loss, global_step = _setup_model() 323 update_var = variable_scope.get_variable( 324 "update", [], initializer=init_ops.constant_initializer(10)) 325 update_op = state_ops.assign(update_var, 20) 326 train = optimizers_lib.optimize_loss( 327 loss, 328 global_step=None, 329 learning_rate=0.1, 330 optimizer=optimizer, 331 update_ops=[update_op]) 332 variables.global_variables_initializer().run() 333 session.run(train, feed_dict={x: 5}) 334 self.assertEqual(9.5, var.eval()) 335 self.assertEqual(20, update_var.eval()) 336 self.assertEqual(1, global_step.eval()) 337 338 def testUpdateOp(self): 339 optimizers = [ 340 "SGD", gradient_descent.GradientDescentOptimizer, 341 gradient_descent.GradientDescentOptimizer(learning_rate=0.1) 342 ] 343 for optimizer in optimizers: 344 with ops.Graph().as_default() as g, self.test_session(graph=g) as session: 345 x, var, loss, global_step = _setup_model() 346 update_var = variable_scope.get_variable( 347 "update", [], initializer=init_ops.constant_initializer(10)) 348 update_op = state_ops.assign(update_var, 20) 349 train = optimizers_lib.optimize_loss( 350 loss, 351 global_step, 352 learning_rate=0.1, 353 optimizer=optimizer, 354 update_ops=[update_op]) 355 variables.global_variables_initializer().run() 356 session.run(train, feed_dict={x: 5}) 357 self.assertEqual(9.5, var.eval()) 358 self.assertEqual(20, update_var.eval()) 359 self.assertEqual(1, global_step.eval()) 360 361 def testUpdateOpNoIncrementGlobalStep(self): 362 optimizers = [ 363 "SGD", gradient_descent.GradientDescentOptimizer, 364 gradient_descent.GradientDescentOptimizer(learning_rate=0.1) 365 ] 366 for optimizer in optimizers: 367 with ops.Graph().as_default() as g, self.test_session(graph=g) as session: 368 x, var, loss, global_step = _setup_model() 369 update_var = variable_scope.get_variable( 370 "update", [], initializer=init_ops.constant_initializer(10)) 371 update_op = state_ops.assign(update_var, 20) 372 train = optimizers_lib.optimize_loss( 373 loss, 374 global_step, 375 learning_rate=0.1, 376 optimizer=optimizer, 377 update_ops=[update_op], 378 increment_global_step=False) 379 variables.global_variables_initializer().run() 380 session.run(train, feed_dict={x: 5}) 381 self.assertEqual(9.5, var.eval()) 382 self.assertEqual(20, update_var.eval()) 383 self.assertEqual(0, global_step.eval()) 384 385 def testUpdateOpWithNoOpDecay(self): 386 optimizers = [ 387 "SGD", gradient_descent.GradientDescentOptimizer, 388 gradient_descent.GradientDescentOptimizer(learning_rate=0.1) 389 ] 390 for optimizer in optimizers: 391 with ops.Graph().as_default() as g, self.test_session(graph=g) as session: 392 x, var, loss, global_step = _setup_model() 393 update_var = variable_scope.get_variable( 394 "update", [], initializer=init_ops.constant_initializer(10)) 395 update_op = state_ops.assign(update_var, 20) 396 train = optimizers_lib.optimize_loss( 397 loss, 398 global_step, 399 learning_rate=0.1, 400 learning_rate_decay_fn=_no_op_learning_rate_decay_fn, 401 optimizer=optimizer, 402 update_ops=[update_op]) 403 variables.global_variables_initializer().run() 404 session.run(train, feed_dict={x: 5}) 405 self.assertEqual(9.5, var.eval()) 406 self.assertEqual(20, update_var.eval()) 407 self.assertEqual(1, global_step.eval()) 408 409 def testUpdateOpFromCollection(self): 410 optimizers = [ 411 "SGD", gradient_descent.GradientDescentOptimizer, 412 gradient_descent.GradientDescentOptimizer(learning_rate=0.1) 413 ] 414 for optimizer in optimizers: 415 with ops.Graph().as_default() as g, self.test_session(graph=g) as session: 416 x, var, loss, global_step = _setup_model() 417 update_var = variable_scope.get_variable( 418 "update", [], initializer=init_ops.constant_initializer(10)) 419 update_op = state_ops.assign(update_var, 20) 420 ops.add_to_collection(ops.GraphKeys.UPDATE_OPS, update_op) 421 train = optimizers_lib.optimize_loss( 422 loss, global_step, learning_rate=0.1, optimizer=optimizer) 423 variables.global_variables_initializer().run() 424 session.run(train, feed_dict={x: 5}) 425 var_value, update_var_value, global_step_value = session.run( 426 [var, update_var, global_step]) 427 self.assertEqual(var_value, 9.5) 428 self.assertEqual(update_var_value, 20) 429 self.assertEqual(global_step_value, 1) 430 431 432 class AdaptiveClipping(test.TestCase): 433 434 def testAverages(self): 435 with self.test_session() as session: 436 scale = 2. 437 grad = array_ops.ones([3, 4]) * scale 438 log_norm = np.log(np.sqrt(scale**2 * grad.get_shape().num_elements())) 439 grads_and_vars = [(grad, grad)] 440 grads_and_vars = optimizers_lib.adaptive_clipping_fn( 441 decay=0.5)(grads_and_vars) 442 443 var_dict = {} 444 for var in variables.global_variables(): 445 if var.name.startswith("AdaptiveMaxNorm"): 446 var_dict[var.name.split(":")[0]] = var 447 self.assertEqual(2, len(var_dict)) 448 moving_mean = var_dict["AdaptiveMaxNorm/mean"] 449 moving_sq_mean = var_dict["AdaptiveMaxNorm/sq_mean"] 450 variables.global_variables_initializer().run() 451 mean, sq_mean = session.run([moving_mean, moving_sq_mean]) 452 self.assertEqual([0], mean) 453 self.assertEqual([0], sq_mean) 454 for i in range(20): 455 mean, sq_mean, _ = session.run( 456 [moving_mean, moving_sq_mean, grads_and_vars[0][0]]) 457 if i == 0: 458 self.assertLess(mean, 0.9 * log_norm) 459 self.assertLess(sq_mean, 0.9 * log_norm**2) 460 461 self.assertAlmostEqual(float(mean), log_norm, places=4) 462 self.assertAlmostEqual(float(sq_mean), log_norm**2, places=4) 463 464 def testClip(self): 465 with self.test_session() as session: 466 spike = 1000. 467 multiplier = array_ops.placeholder(dtypes.float32, [], "multiplier") 468 step = array_ops.placeholder(dtypes.int32, [], "step") 469 470 grad = array_ops.ones([3, 4]) * multiplier 471 grads_and_vars = [(grad, grad)] 472 grads_and_vars = optimizers_lib.adaptive_clipping_fn( 473 decay=0.9, global_step=step)(grads_and_vars) 474 475 variables.global_variables_initializer().run() 476 477 def run(scale, i): 478 return session.run(grads_and_vars[0][0], 479 feed_dict={multiplier: scale, 480 step: i}) 481 482 for i in range(20): 483 scale = [1., -2.][i % 2] 484 clipped_grad = run(scale, i) 485 if i > 3: 486 self.assertAllClose(np.ones(clipped_grad.shape) * scale, clipped_grad) 487 488 # assert that the spike will have low influence. 489 clipped_grad = run(spike, 20) 490 self.assertTrue((clipped_grad < 25.).all()) 491 492 # assert that a repeated spike will converge to this new value. 493 for i in range(10): 494 clipped_grad = run(spike, i + 21) 495 496 self.assertAllClose(np.ones(clipped_grad.shape) * spike, clipped_grad) 497 498 499 if __name__ == "__main__": 500 test.main() 501