Home | History | Annotate | Download | only in layers
      1 # Copyright 2015 The TensorFlow Authors. All Rights Reserved.
      2 #
      3 # Licensed under the Apache License, Version 2.0 (the "License");
      4 # you may not use this file except in compliance with the License.
      5 # You may obtain a copy of the License at
      6 #
      7 #     http://www.apache.org/licenses/LICENSE-2.0
      8 #
      9 # Unless required by applicable law or agreed to in writing, software
     10 # distributed under the License is distributed on an "AS IS" BASIS,
     11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 # See the License for the specific language governing permissions and
     13 # limitations under the License.
     14 # ==============================================================================
     15 """Tests for optimizers."""
     16 
     17 from __future__ import absolute_import
     18 from __future__ import division
     19 from __future__ import print_function
     20 
     21 import numpy as np
     22 
     23 from tensorflow.contrib.layers.python.layers import optimizers as optimizers_lib
     24 from tensorflow.python.framework import constant_op
     25 from tensorflow.python.framework import dtypes
     26 from tensorflow.python.framework import ops
     27 from tensorflow.python.framework import random_seed
     28 from tensorflow.python.ops import array_ops
     29 from tensorflow.python.ops import init_ops
     30 from tensorflow.python.ops import math_ops
     31 from tensorflow.python.ops import state_ops
     32 from tensorflow.python.ops import variable_scope
     33 from tensorflow.python.ops import variables
     34 from tensorflow.python.platform import test
     35 from tensorflow.python.training import gradient_descent
     36 
     37 
     38 def _setup_model():
     39   x = array_ops.placeholder(dtypes.float32, [])
     40   var = variable_scope.get_variable(
     41       "test", [], initializer=init_ops.constant_initializer(10))
     42   loss = math_ops.abs(var * x)
     43   global_step = variable_scope.get_variable(
     44       "global_step", [],
     45       trainable=False,
     46       dtype=dtypes.int64,
     47       initializer=init_ops.constant_initializer(
     48           0, dtype=dtypes.int64))
     49   return x, var, loss, global_step
     50 
     51 
     52 def _no_op_learning_rate_decay_fn(lr, global_step):
     53   assert lr is not None
     54   assert global_step is not None
     55   return lr
     56 
     57 
     58 class OptimizersTest(test.TestCase):
     59 
     60   def testSGDOptimizer(self):
     61     optimizers = [
     62         "SGD", gradient_descent.GradientDescentOptimizer,
     63         gradient_descent.GradientDescentOptimizer(learning_rate=0.1),
     64         lambda lr: gradient_descent.GradientDescentOptimizer(learning_rate=lr)
     65     ]
     66     for optimizer in optimizers:
     67       with ops.Graph().as_default() as g:
     68         with self.test_session(graph=g) as session:
     69           x, var, loss, global_step = _setup_model()
     70           train = optimizers_lib.optimize_loss(
     71               loss, global_step, learning_rate=0.1, optimizer=optimizer)
     72           variables.global_variables_initializer().run()
     73           session.run(train, feed_dict={x: 5})
     74           var_value, global_step_value = session.run([var, global_step])
     75           self.assertEqual(var_value, 9.5)
     76           self.assertEqual(global_step_value, 1)
     77 
     78   def testNoLrCallable(self):
     79 
     80     def optimizer_fn():
     81       return gradient_descent.GradientDescentOptimizer(learning_rate=0.1)
     82 
     83     with ops.Graph().as_default() as g:
     84       with self.test_session(graph=g) as session:
     85         x, var, loss, global_step = _setup_model()
     86         train = optimizers_lib.optimize_loss(
     87             loss, global_step, learning_rate=None, optimizer=optimizer_fn)
     88         variables.global_variables_initializer().run()
     89         session.run(train, feed_dict={x: 5})
     90         var_value, global_step_value = session.run([var, global_step])
     91         self.assertEqual(var_value, 9.5)
     92         self.assertEqual(global_step_value, 1)
     93 
     94   def testWrongOptimizer(self):
     95     optimizers = ["blah", variables.Variable, object(), lambda x: None]
     96     for optimizer in optimizers:
     97       with ops.Graph().as_default() as g:
     98         with self.test_session(graph=g):
     99           _, _, loss, global_step = _setup_model()
    100           with self.assertRaises(ValueError):
    101             optimizers_lib.optimize_loss(
    102                 loss, global_step, learning_rate=0.1, optimizer=optimizer)
    103 
    104   def testBadSummaries(self):
    105     with ops.Graph().as_default() as g, self.test_session(graph=g):
    106       _, _, loss, global_step = _setup_model()
    107       with self.assertRaises(ValueError):
    108         optimizers_lib.optimize_loss(
    109             loss, global_step, learning_rate=0.1, optimizer="SGD",
    110             summaries=["loss", "bad_summary"])
    111 
    112   def testInvalidLoss(self):
    113     with ops.Graph().as_default() as g, self.test_session(graph=g):
    114       _, _, _, global_step = _setup_model()
    115       with self.assertRaises(ValueError):
    116         optimizers_lib.optimize_loss(
    117             None, global_step, learning_rate=0.1, optimizer="SGD")
    118       with self.assertRaises(ValueError):
    119         optimizers_lib.optimize_loss(
    120             [[1.0]], global_step, learning_rate=0.1, optimizer="SGD")
    121 
    122   def testInvalidGlobalStep(self):
    123     with ops.Graph().as_default() as g, self.test_session(graph=g):
    124       x = array_ops.placeholder(dtypes.float32, [])
    125       var = variable_scope.get_variable(
    126           "test", [], initializer=init_ops.constant_initializer(10))
    127       loss = math_ops.abs(var * x)
    128       with self.assertRaises(AttributeError):
    129         optimizers_lib.optimize_loss(
    130             loss,
    131             global_step=constant_op.constant(
    132                 43, dtype=dtypes.int64),
    133             learning_rate=0.1,
    134             optimizer="SGD")
    135       with self.assertRaises(TypeError):
    136         optimizers_lib.optimize_loss(
    137             loss,
    138             global_step=variable_scope.get_variable(
    139                 "global_step", [],
    140                 trainable=False,
    141                 dtype=dtypes.float64,
    142                 initializer=init_ops.constant_initializer(
    143                     0.0, dtype=dtypes.float64)),
    144             learning_rate=0.1,
    145             optimizer="SGD")
    146       with self.assertRaises(ValueError):
    147         optimizers_lib.optimize_loss(
    148             loss,
    149             global_step=variable_scope.get_variable(
    150                 "global_step", [1],
    151                 trainable=False,
    152                 dtype=dtypes.int64,
    153                 initializer=init_ops.constant_initializer(
    154                     [0], dtype=dtypes.int64)),
    155             learning_rate=0.1,
    156             optimizer="SGD")
    157 
    158   def testInvalidLearningRate(self):
    159     with ops.Graph().as_default() as g, self.test_session(graph=g):
    160       _, _, loss, global_step = _setup_model()
    161       with self.assertRaises(ValueError):
    162         optimizers_lib.optimize_loss(
    163             loss, global_step, learning_rate=-0.1, optimizer="SGD")
    164 
    165   def testGradientNoise(self):
    166     random_seed.set_random_seed(42)
    167     with self.test_session() as session:
    168       x, var, loss, global_step = _setup_model()
    169       train = optimizers_lib.optimize_loss(
    170           loss,
    171           global_step,
    172           learning_rate=0.1,
    173           optimizer="SGD",
    174           gradient_noise_scale=10.0)
    175       variables.global_variables_initializer().run()
    176       session.run(train, feed_dict={x: 5})
    177       var_value, global_step_value = session.run([var, global_step])
    178       # Due to randomness the following number may change if graph is different.
    179       self.assertAlmostEqual(var_value, 9.86912, 4)
    180       self.assertEqual(global_step_value, 1)
    181 
    182   def testGradientNoiseWithClipping(self):
    183     random_seed.set_random_seed(42)
    184     with self.test_session() as session:
    185       x, var, loss, global_step = _setup_model()
    186       train = optimizers_lib.optimize_loss(
    187           loss,
    188           global_step,
    189           learning_rate=0.1,
    190           optimizer="SGD",
    191           gradient_noise_scale=10.0,
    192           clip_gradients=10.0)
    193       variables.global_variables_initializer().run()
    194       session.run(train, feed_dict={x: 5})
    195       var_value, global_step_value = session.run([var, global_step])
    196       self.assertAlmostEqual(var_value, 9.86912, 4)
    197       self.assertEqual(global_step_value, 1)
    198 
    199   def testGradientClip(self):
    200     with self.test_session() as session:
    201       x, var, loss, global_step = _setup_model()
    202       train = optimizers_lib.optimize_loss(
    203           loss,
    204           global_step,
    205           learning_rate=0.1,
    206           optimizer="SGD",
    207           clip_gradients=0.1)
    208       variables.global_variables_initializer().run()
    209       session.run(train, feed_dict={x: 5})
    210       var_value, global_step_value = session.run([var, global_step])
    211       self.assertAlmostEqual(var_value, 9.98999, 4)
    212       self.assertEqual(global_step_value, 1)
    213 
    214   def testAdaptiveGradientClip(self):
    215     with self.test_session() as session:
    216       x, var, loss, global_step = _setup_model()
    217       clip_gradients = optimizers_lib.adaptive_clipping_fn()
    218       train = optimizers_lib.optimize_loss(
    219           loss,
    220           global_step,
    221           learning_rate=0.1,
    222           optimizer="SGD",
    223           clip_gradients=clip_gradients)
    224       variables.global_variables_initializer().run()
    225       session.run(train, feed_dict={x: 5})
    226       var_value, global_step_value = session.run([var, global_step])
    227       self.assertAlmostEqual(var_value, 9.8916, 4)
    228       self.assertEqual(global_step_value, 1)
    229       var_count = 0
    230       for var in variables.global_variables():
    231         if var.name.startswith("OptimizeLoss/AdaptiveMaxNorm"):
    232           var_count += 1
    233       self.assertEqual(2, var_count)
    234 
    235   def testGradientMultiply(self):
    236     with self.test_session() as session:
    237       x, var, loss, global_step = _setup_model()
    238       train = optimizers_lib.optimize_loss(
    239           loss,
    240           global_step,
    241           learning_rate=0.1,
    242           optimizer="SGD",
    243           gradient_multipliers={var: 7.})
    244       variables.global_variables_initializer().run()
    245       session.run(train, feed_dict={x: 5})
    246       var_value, global_step_value = session.run([var, global_step])
    247       # var(0) = 10, x = 5, var(0)/dx = 5,
    248       # var(1) = var(0) - learning_rate * gradient_multiplier * var(0)/dx
    249       self.assertAlmostEqual(var_value, 6.5, 4)
    250       self.assertEqual(global_step_value, 1)
    251 
    252   def testIgnoreVariablesWithNoGradients(self):
    253     _, _, loss, global_step = _setup_model()
    254 
    255     unused_variable = variable_scope.get_variable("ignore_me", [])
    256 
    257     optimizers_lib.optimize_loss(
    258         loss,
    259         global_step,
    260         learning_rate=0.1,
    261         optimizer="SGD",
    262         gradient_noise_scale=10.0,
    263         gradient_multipliers={unused_variable: 1.},
    264         clip_gradients=10.0)
    265 
    266   def testNoGlobalStep(self):
    267     optimizers = [
    268         "SGD", gradient_descent.GradientDescentOptimizer,
    269         gradient_descent.GradientDescentOptimizer(learning_rate=0.1)
    270     ]
    271     for optimizer in optimizers:
    272       with ops.Graph().as_default() as g, self.test_session(graph=g) as session:
    273         x = array_ops.placeholder(dtypes.float32, [])
    274         var = variable_scope.get_variable(
    275             "test", [], initializer=init_ops.constant_initializer(10))
    276         loss = math_ops.abs(var * x)
    277         update_var = variable_scope.get_variable(
    278             "update", [], initializer=init_ops.constant_initializer(10))
    279         update_op = state_ops.assign(update_var, 20)
    280         train = optimizers_lib.optimize_loss(
    281             loss,
    282             global_step=None,
    283             learning_rate=0.1,
    284             optimizer=optimizer,
    285             update_ops=[update_op])
    286         variables.global_variables_initializer().run()
    287         session.run(train, feed_dict={x: 5})
    288         self.assertEqual(9.5, var.eval())
    289         self.assertEqual(20, update_var.eval())
    290 
    291   def testNoGlobalStepWithDecay(self):
    292     optimizers = [
    293         "SGD", gradient_descent.GradientDescentOptimizer,
    294         gradient_descent.GradientDescentOptimizer(learning_rate=0.1)
    295     ]
    296     for optimizer in optimizers:
    297       with ops.Graph().as_default() as g, self.test_session(graph=g):
    298         x = array_ops.placeholder(dtypes.float32, [])
    299         var = variable_scope.get_variable(
    300             "test", [], initializer=init_ops.constant_initializer(10))
    301         loss = math_ops.abs(var * x)
    302         update_var = variable_scope.get_variable(
    303             "update", [], initializer=init_ops.constant_initializer(10))
    304         update_op = state_ops.assign(update_var, 20)
    305         with self.assertRaisesRegexp(
    306             ValueError, "global_step is required for learning_rate_decay_fn"):
    307           optimizers_lib.optimize_loss(
    308               loss,
    309               global_step=None,
    310               learning_rate=0.1,
    311               learning_rate_decay_fn=_no_op_learning_rate_decay_fn,
    312               optimizer=optimizer,
    313               update_ops=[update_op])
    314 
    315   def testNoGlobalStepArg(self):
    316     optimizers = [
    317         "SGD", gradient_descent.GradientDescentOptimizer,
    318         gradient_descent.GradientDescentOptimizer(learning_rate=0.1)
    319     ]
    320     for optimizer in optimizers:
    321       with ops.Graph().as_default() as g, self.test_session(graph=g) as session:
    322         x, var, loss, global_step = _setup_model()
    323         update_var = variable_scope.get_variable(
    324             "update", [], initializer=init_ops.constant_initializer(10))
    325         update_op = state_ops.assign(update_var, 20)
    326         train = optimizers_lib.optimize_loss(
    327             loss,
    328             global_step=None,
    329             learning_rate=0.1,
    330             optimizer=optimizer,
    331             update_ops=[update_op])
    332         variables.global_variables_initializer().run()
    333         session.run(train, feed_dict={x: 5})
    334         self.assertEqual(9.5, var.eval())
    335         self.assertEqual(20, update_var.eval())
    336         self.assertEqual(1, global_step.eval())
    337 
    338   def testUpdateOp(self):
    339     optimizers = [
    340         "SGD", gradient_descent.GradientDescentOptimizer,
    341         gradient_descent.GradientDescentOptimizer(learning_rate=0.1)
    342     ]
    343     for optimizer in optimizers:
    344       with ops.Graph().as_default() as g, self.test_session(graph=g) as session:
    345         x, var, loss, global_step = _setup_model()
    346         update_var = variable_scope.get_variable(
    347             "update", [], initializer=init_ops.constant_initializer(10))
    348         update_op = state_ops.assign(update_var, 20)
    349         train = optimizers_lib.optimize_loss(
    350             loss,
    351             global_step,
    352             learning_rate=0.1,
    353             optimizer=optimizer,
    354             update_ops=[update_op])
    355         variables.global_variables_initializer().run()
    356         session.run(train, feed_dict={x: 5})
    357         self.assertEqual(9.5, var.eval())
    358         self.assertEqual(20, update_var.eval())
    359         self.assertEqual(1, global_step.eval())
    360 
    361   def testUpdateOpNoIncrementGlobalStep(self):
    362     optimizers = [
    363         "SGD", gradient_descent.GradientDescentOptimizer,
    364         gradient_descent.GradientDescentOptimizer(learning_rate=0.1)
    365     ]
    366     for optimizer in optimizers:
    367       with ops.Graph().as_default() as g, self.test_session(graph=g) as session:
    368         x, var, loss, global_step = _setup_model()
    369         update_var = variable_scope.get_variable(
    370             "update", [], initializer=init_ops.constant_initializer(10))
    371         update_op = state_ops.assign(update_var, 20)
    372         train = optimizers_lib.optimize_loss(
    373             loss,
    374             global_step,
    375             learning_rate=0.1,
    376             optimizer=optimizer,
    377             update_ops=[update_op],
    378             increment_global_step=False)
    379         variables.global_variables_initializer().run()
    380         session.run(train, feed_dict={x: 5})
    381         self.assertEqual(9.5, var.eval())
    382         self.assertEqual(20, update_var.eval())
    383         self.assertEqual(0, global_step.eval())
    384 
    385   def testUpdateOpWithNoOpDecay(self):
    386     optimizers = [
    387         "SGD", gradient_descent.GradientDescentOptimizer,
    388         gradient_descent.GradientDescentOptimizer(learning_rate=0.1)
    389     ]
    390     for optimizer in optimizers:
    391       with ops.Graph().as_default() as g, self.test_session(graph=g) as session:
    392         x, var, loss, global_step = _setup_model()
    393         update_var = variable_scope.get_variable(
    394             "update", [], initializer=init_ops.constant_initializer(10))
    395         update_op = state_ops.assign(update_var, 20)
    396         train = optimizers_lib.optimize_loss(
    397             loss,
    398             global_step,
    399             learning_rate=0.1,
    400             learning_rate_decay_fn=_no_op_learning_rate_decay_fn,
    401             optimizer=optimizer,
    402             update_ops=[update_op])
    403         variables.global_variables_initializer().run()
    404         session.run(train, feed_dict={x: 5})
    405         self.assertEqual(9.5, var.eval())
    406         self.assertEqual(20, update_var.eval())
    407         self.assertEqual(1, global_step.eval())
    408 
    409   def testUpdateOpFromCollection(self):
    410     optimizers = [
    411         "SGD", gradient_descent.GradientDescentOptimizer,
    412         gradient_descent.GradientDescentOptimizer(learning_rate=0.1)
    413     ]
    414     for optimizer in optimizers:
    415       with ops.Graph().as_default() as g, self.test_session(graph=g) as session:
    416         x, var, loss, global_step = _setup_model()
    417         update_var = variable_scope.get_variable(
    418             "update", [], initializer=init_ops.constant_initializer(10))
    419         update_op = state_ops.assign(update_var, 20)
    420         ops.add_to_collection(ops.GraphKeys.UPDATE_OPS, update_op)
    421         train = optimizers_lib.optimize_loss(
    422             loss, global_step, learning_rate=0.1, optimizer=optimizer)
    423         variables.global_variables_initializer().run()
    424         session.run(train, feed_dict={x: 5})
    425         var_value, update_var_value, global_step_value = session.run(
    426             [var, update_var, global_step])
    427         self.assertEqual(var_value, 9.5)
    428         self.assertEqual(update_var_value, 20)
    429         self.assertEqual(global_step_value, 1)
    430 
    431 
    432 class AdaptiveClipping(test.TestCase):
    433 
    434   def testAverages(self):
    435     with self.test_session() as session:
    436       scale = 2.
    437       grad = array_ops.ones([3, 4]) * scale
    438       log_norm = np.log(np.sqrt(scale**2 * grad.get_shape().num_elements()))
    439       grads_and_vars = [(grad, grad)]
    440       grads_and_vars = optimizers_lib.adaptive_clipping_fn(
    441           decay=0.5)(grads_and_vars)
    442 
    443       var_dict = {}
    444       for var in variables.global_variables():
    445         if var.name.startswith("AdaptiveMaxNorm"):
    446           var_dict[var.name.split(":")[0]] = var
    447       self.assertEqual(2, len(var_dict))
    448       moving_mean = var_dict["AdaptiveMaxNorm/mean"]
    449       moving_sq_mean = var_dict["AdaptiveMaxNorm/sq_mean"]
    450       variables.global_variables_initializer().run()
    451       mean, sq_mean = session.run([moving_mean, moving_sq_mean])
    452       self.assertEqual([0], mean)
    453       self.assertEqual([0], sq_mean)
    454       for i in range(20):
    455         mean, sq_mean, _ = session.run(
    456             [moving_mean, moving_sq_mean, grads_and_vars[0][0]])
    457         if i == 0:
    458           self.assertLess(mean, 0.9 * log_norm)
    459           self.assertLess(sq_mean, 0.9 * log_norm**2)
    460 
    461       self.assertAlmostEqual(float(mean), log_norm, places=4)
    462       self.assertAlmostEqual(float(sq_mean), log_norm**2, places=4)
    463 
    464   def testClip(self):
    465     with self.test_session() as session:
    466       spike = 1000.
    467       multiplier = array_ops.placeholder(dtypes.float32, [], "multiplier")
    468       step = array_ops.placeholder(dtypes.int32, [], "step")
    469 
    470       grad = array_ops.ones([3, 4]) * multiplier
    471       grads_and_vars = [(grad, grad)]
    472       grads_and_vars = optimizers_lib.adaptive_clipping_fn(
    473           decay=0.9, global_step=step)(grads_and_vars)
    474 
    475       variables.global_variables_initializer().run()
    476 
    477       def run(scale, i):
    478         return session.run(grads_and_vars[0][0],
    479                            feed_dict={multiplier: scale,
    480                                       step: i})
    481 
    482       for i in range(20):
    483         scale = [1., -2.][i % 2]
    484         clipped_grad = run(scale, i)
    485         if i > 3:
    486           self.assertAllClose(np.ones(clipped_grad.shape) * scale, clipped_grad)
    487 
    488       # assert that the spike will have low influence.
    489       clipped_grad = run(spike, 20)
    490       self.assertTrue((clipped_grad < 25.).all())
    491 
    492       # assert that a repeated spike will converge to this new value.
    493       for i in range(10):
    494         clipped_grad = run(spike, i + 21)
    495 
    496       self.assertAllClose(np.ones(clipped_grad.shape) * spike, clipped_grad)
    497 
    498 
    499 if __name__ == "__main__":
    500   test.main()
    501