Home | History | Annotate | Download | only in training
      1 # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
      2 #
      3 # Licensed under the Apache License, Version 2.0 (the "License");
      4 # you may not use this file except in compliance with the License.
      5 # You may obtain a copy of the License at
      6 #
      7 #     http://www.apache.org/licenses/LICENSE-2.0
      8 #
      9 # Unless required by applicable law or agreed to in writing, software
     10 # distributed under the License is distributed on an "AS IS" BASIS,
     11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 # See the License for the specific language governing permissions and
     13 # limitations under the License.
     14 # ==============================================================================
     15 """Functional tests for AdagradDA operations."""
     16 
     17 from __future__ import absolute_import
     18 from __future__ import division
     19 from __future__ import print_function
     20 
     21 import numpy as np
     22 
     23 from tensorflow.python.framework import constant_op
     24 from tensorflow.python.framework import dtypes
     25 from tensorflow.python.ops import embedding_ops
     26 from tensorflow.python.ops import math_ops
     27 from tensorflow.python.ops import resource_variable_ops
     28 from tensorflow.python.ops import variables
     29 from tensorflow.python.platform import test
     30 from tensorflow.python.training import adagrad_da
     31 
     32 
     33 class AdagradDAOptimizerTest(test.TestCase):
     34 
     35   def doTestAdagradDAwithoutRegularizationBasic1(self, use_resource=False):
     36     for dtype in [dtypes.float64, dtypes.float32]:
     37       with self.test_session() as sess:
     38         global_step = variables.Variable(0, dtype=dtypes.int64)
     39         if use_resource:
     40           var0 = resource_variable_ops.ResourceVariable([0.0, 0.0], dtype=dtype)
     41           var1 = resource_variable_ops.ResourceVariable([0.0, 0.0], dtype=dtype)
     42         else:
     43           var0 = variables.Variable([0.0, 0.0], dtype=dtype)
     44           var1 = variables.Variable([0.0, 0.0], dtype=dtype)
     45         grads0 = constant_op.constant([0.1, 0.2], dtype=dtype)
     46         grads1 = constant_op.constant([0.01, 0.02], dtype=dtype)
     47         opt = adagrad_da.AdagradDAOptimizer(
     48             3.0,
     49             global_step,
     50             initial_gradient_squared_accumulator_value=0.1,
     51             l1_regularization_strength=0.0,
     52             l2_regularization_strength=0.0)
     53         update = opt.apply_gradients(
     54             zip([grads0, grads1], [var0, var1]), global_step=global_step)
     55         variables.global_variables_initializer().run()
     56 
     57         v0_val, v1_val = sess.run([var0, var1])
     58         self.assertAllClose([0.0, 0.0], v0_val)
     59         self.assertAllClose([0.0, 0.0], v1_val)
     60 
     61         # Run a step of AdagradDA
     62         update.run()
     63 
     64         v0_val, v1_val = sess.run([var0, var1])
     65         # Let g to be gradient accumulator, gg to be gradient squared
     66         # accumulator, T be the global step, lr is the learning rate, and k the
     67         # initial gradient squared accumulator value.
     68         # w = \dfrac{sign(-g)*lr*|g - l1*T|_{+}}{l2*T*lr + \sqrt{k+gg})}
     69         # For -0.1*3.0*(0.1 - 0)/(0 + sqrt(0.1 + 0.1*0.1)) = -0.904534
     70         # similarly for others.
     71         self.assertAllCloseAccordingToType(
     72             np.array([-0.904534, -1.603567]), v0_val)
     73         self.assertAllCloseAccordingToType(
     74             np.array([-0.094821, -0.189358]), v1_val)
     75 
     76   def testAdagradDAWithoutRegularizationBasic1(self):
     77     self.doTestAdagradDAwithoutRegularizationBasic1()
     78 
     79   def testResourceAdagradDAWithoutRegularizationBasic1(self):
     80     self.doTestAdagradDAwithoutRegularizationBasic1(use_resource=True)
     81 
     82   def testMinimizeSparseResourceVariable(self):
     83     for dtype in [dtypes.float32, dtypes.float64]:
     84       with self.test_session():
     85         var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]], dtype=dtype)
     86         global_step = resource_variable_ops.ResourceVariable(
     87             0, dtype=dtypes.int64)
     88         x = constant_op.constant([[4.0], [5.0]], dtype=dtype)
     89         pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x)
     90         loss = pred * pred
     91         sgd_op = adagrad_da.AdagradDAOptimizer(
     92             1.0, global_step).minimize(loss)
     93         variables.global_variables_initializer().run()
     94         # Fetch params to validate initial values
     95         self.assertAllCloseAccordingToType([[1.0, 2.0]], var0.eval())
     96         # Run 1 step of sgd
     97         sgd_op.run()
     98         # Validate updated params
     99         self.assertAllCloseAccordingToType(
    100             [[-1, -1]], var0.eval(), rtol=0.01)
    101 
    102   def testAdagradDAwithoutRegularizationBasic2(self):
    103     for dtype in [dtypes.float64, dtypes.float32]:
    104       with self.test_session() as sess:
    105         global_step = variables.Variable(0, dtype=dtypes.int64)
    106         var0 = variables.Variable([1.0, 2.0], dtype=dtype)
    107         var1 = variables.Variable([4.0, 3.0], dtype=dtype)
    108         grads0 = constant_op.constant([0.1, 0.2], dtype=dtype)
    109         grads1 = constant_op.constant([0.01, 0.02], dtype=dtype)
    110 
    111         opt = adagrad_da.AdagradDAOptimizer(
    112             3.0,
    113             global_step,
    114             initial_gradient_squared_accumulator_value=0.1,
    115             l1_regularization_strength=0.0,
    116             l2_regularization_strength=0.0)
    117         update = opt.apply_gradients(
    118             zip([grads0, grads1], [var0, var1]), global_step=global_step)
    119         variables.global_variables_initializer().run()
    120 
    121         v0_val, v1_val = sess.run([var0, var1])
    122         self.assertAllCloseAccordingToType([1.0, 2.0], v0_val)
    123         self.assertAllCloseAccordingToType([4.0, 3.0], v1_val)
    124 
    125         # Run a step of AdagradDA
    126         update.run()
    127 
    128         v0_val, v1_val = sess.run([var0, var1])
    129         self.assertAllCloseAccordingToType(
    130             np.array([-0.904534, -1.603567]), v0_val)
    131         self.assertAllCloseAccordingToType(
    132             np.array([-0.094821, -0.189358]), v1_val)
    133 
    134   def testAdagradDAWithL1(self):
    135     for dtype in [dtypes.float64, dtypes.float32]:
    136       with self.test_session() as sess:
    137         global_step = variables.Variable(0, dtype=dtypes.int64)
    138         var0 = variables.Variable([1.0, 2.0], dtype=dtype)
    139         var1 = variables.Variable([4.0, 3.0], dtype=dtype)
    140         grads0 = constant_op.constant([0.1, 0.2], dtype=dtype)
    141         grads1 = constant_op.constant([0.01, 0.02], dtype=dtype)
    142 
    143         opt = adagrad_da.AdagradDAOptimizer(
    144             3.0,
    145             global_step,
    146             initial_gradient_squared_accumulator_value=0.1,
    147             l1_regularization_strength=0.001,
    148             l2_regularization_strength=0.0)
    149         update = opt.apply_gradients(
    150             zip([grads0, grads1], [var0, var1]), global_step=global_step)
    151         variables.global_variables_initializer().run()
    152 
    153         v0_val, v1_val = sess.run([var0, var1])
    154         self.assertAllCloseAccordingToType([1.0, 2.0], v0_val)
    155         self.assertAllCloseAccordingToType([4.0, 3.0], v1_val)
    156 
    157         # Run a step of AdagradDA
    158         update.run()
    159 
    160         v0_val, v1_val = sess.run([var0, var1])
    161         self.assertAllCloseAccordingToType(
    162             np.array([-0.895489, -1.59555]), v0_val)
    163         self.assertAllCloseAccordingToType(
    164             np.array([-0.085339, -0.17989]), v1_val)
    165 
    166   def testAdagradDAWithL1_L2(self):
    167     for dtype in [dtypes.float64, dtypes.float32]:
    168       with self.test_session() as sess:
    169         global_step = variables.Variable(0, dtype=dtypes.int64)
    170         var0 = variables.Variable([1.0, 2.0], dtype=dtype)
    171         var1 = variables.Variable([4.0, 3.0], dtype=dtype)
    172         grads0 = constant_op.constant([0.1, 0.2], dtype=dtype)
    173         grads1 = constant_op.constant([0.01, 0.02], dtype=dtype)
    174 
    175         opt = adagrad_da.AdagradDAOptimizer(
    176             3.0,
    177             global_step,
    178             initial_gradient_squared_accumulator_value=0.1,
    179             l1_regularization_strength=0.001,
    180             l2_regularization_strength=2.0)
    181         update = opt.apply_gradients(
    182             zip([grads0, grads1], [var0, var1]), global_step=global_step)
    183         variables.global_variables_initializer().run()
    184 
    185         v0_val, v1_val = sess.run([var0, var1])
    186         self.assertAllCloseAccordingToType([1.0, 2.0], v0_val)
    187         self.assertAllCloseAccordingToType([4.0, 3.0], v1_val)
    188 
    189         # Run a step of AdagradDA
    190         update.run()
    191 
    192         v0_val, v1_val = sess.run([var0, var1])
    193         self.assertAllCloseAccordingToType(
    194             np.array([-0.046907, -0.093659]), v0_val)
    195         self.assertAllCloseAccordingToType(
    196             np.array([-0.004275, -0.009023]), v1_val)
    197 
    198 
    199 if __name__ == "__main__":
    200   test.main()
    201