1 # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 # 3 # Licensed under the Apache License, Version 2.0 (the "License"); 4 # you may not use this file except in compliance with the License. 5 # You may obtain a copy of the License at 6 # 7 # http://www.apache.org/licenses/LICENSE-2.0 8 # 9 # Unless required by applicable law or agreed to in writing, software 10 # distributed under the License is distributed on an "AS IS" BASIS, 11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 # See the License for the specific language governing permissions and 13 # limitations under the License. 14 # ============================================================================== 15 """Functional tests for AdagradDA operations.""" 16 17 from __future__ import absolute_import 18 from __future__ import division 19 from __future__ import print_function 20 21 import numpy as np 22 23 from tensorflow.python.framework import constant_op 24 from tensorflow.python.framework import dtypes 25 from tensorflow.python.ops import embedding_ops 26 from tensorflow.python.ops import math_ops 27 from tensorflow.python.ops import resource_variable_ops 28 from tensorflow.python.ops import variables 29 from tensorflow.python.platform import test 30 from tensorflow.python.training import adagrad_da 31 32 33 class AdagradDAOptimizerTest(test.TestCase): 34 35 def doTestAdagradDAwithoutRegularizationBasic1(self, use_resource=False): 36 for dtype in [dtypes.float64, dtypes.float32]: 37 with self.test_session() as sess: 38 global_step = variables.Variable(0, dtype=dtypes.int64) 39 if use_resource: 40 var0 = resource_variable_ops.ResourceVariable([0.0, 0.0], dtype=dtype) 41 var1 = resource_variable_ops.ResourceVariable([0.0, 0.0], dtype=dtype) 42 else: 43 var0 = variables.Variable([0.0, 0.0], dtype=dtype) 44 var1 = variables.Variable([0.0, 0.0], dtype=dtype) 45 grads0 = constant_op.constant([0.1, 0.2], dtype=dtype) 46 grads1 = constant_op.constant([0.01, 0.02], dtype=dtype) 47 opt = adagrad_da.AdagradDAOptimizer( 48 3.0, 49 global_step, 50 initial_gradient_squared_accumulator_value=0.1, 51 l1_regularization_strength=0.0, 52 l2_regularization_strength=0.0) 53 update = opt.apply_gradients( 54 zip([grads0, grads1], [var0, var1]), global_step=global_step) 55 variables.global_variables_initializer().run() 56 57 v0_val, v1_val = sess.run([var0, var1]) 58 self.assertAllClose([0.0, 0.0], v0_val) 59 self.assertAllClose([0.0, 0.0], v1_val) 60 61 # Run a step of AdagradDA 62 update.run() 63 64 v0_val, v1_val = sess.run([var0, var1]) 65 # Let g to be gradient accumulator, gg to be gradient squared 66 # accumulator, T be the global step, lr is the learning rate, and k the 67 # initial gradient squared accumulator value. 68 # w = \dfrac{sign(-g)*lr*|g - l1*T|_{+}}{l2*T*lr + \sqrt{k+gg})} 69 # For -0.1*3.0*(0.1 - 0)/(0 + sqrt(0.1 + 0.1*0.1)) = -0.904534 70 # similarly for others. 71 self.assertAllCloseAccordingToType( 72 np.array([-0.904534, -1.603567]), v0_val) 73 self.assertAllCloseAccordingToType( 74 np.array([-0.094821, -0.189358]), v1_val) 75 76 def testAdagradDAWithoutRegularizationBasic1(self): 77 self.doTestAdagradDAwithoutRegularizationBasic1() 78 79 def testResourceAdagradDAWithoutRegularizationBasic1(self): 80 self.doTestAdagradDAwithoutRegularizationBasic1(use_resource=True) 81 82 def testMinimizeSparseResourceVariable(self): 83 for dtype in [dtypes.float32, dtypes.float64]: 84 with self.test_session(): 85 var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]], dtype=dtype) 86 global_step = resource_variable_ops.ResourceVariable( 87 0, dtype=dtypes.int64) 88 x = constant_op.constant([[4.0], [5.0]], dtype=dtype) 89 pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x) 90 loss = pred * pred 91 sgd_op = adagrad_da.AdagradDAOptimizer( 92 1.0, global_step).minimize(loss) 93 variables.global_variables_initializer().run() 94 # Fetch params to validate initial values 95 self.assertAllCloseAccordingToType([[1.0, 2.0]], var0.eval()) 96 # Run 1 step of sgd 97 sgd_op.run() 98 # Validate updated params 99 self.assertAllCloseAccordingToType( 100 [[-1, -1]], var0.eval(), rtol=0.01) 101 102 def testAdagradDAwithoutRegularizationBasic2(self): 103 for dtype in [dtypes.float64, dtypes.float32]: 104 with self.test_session() as sess: 105 global_step = variables.Variable(0, dtype=dtypes.int64) 106 var0 = variables.Variable([1.0, 2.0], dtype=dtype) 107 var1 = variables.Variable([4.0, 3.0], dtype=dtype) 108 grads0 = constant_op.constant([0.1, 0.2], dtype=dtype) 109 grads1 = constant_op.constant([0.01, 0.02], dtype=dtype) 110 111 opt = adagrad_da.AdagradDAOptimizer( 112 3.0, 113 global_step, 114 initial_gradient_squared_accumulator_value=0.1, 115 l1_regularization_strength=0.0, 116 l2_regularization_strength=0.0) 117 update = opt.apply_gradients( 118 zip([grads0, grads1], [var0, var1]), global_step=global_step) 119 variables.global_variables_initializer().run() 120 121 v0_val, v1_val = sess.run([var0, var1]) 122 self.assertAllCloseAccordingToType([1.0, 2.0], v0_val) 123 self.assertAllCloseAccordingToType([4.0, 3.0], v1_val) 124 125 # Run a step of AdagradDA 126 update.run() 127 128 v0_val, v1_val = sess.run([var0, var1]) 129 self.assertAllCloseAccordingToType( 130 np.array([-0.904534, -1.603567]), v0_val) 131 self.assertAllCloseAccordingToType( 132 np.array([-0.094821, -0.189358]), v1_val) 133 134 def testAdagradDAWithL1(self): 135 for dtype in [dtypes.float64, dtypes.float32]: 136 with self.test_session() as sess: 137 global_step = variables.Variable(0, dtype=dtypes.int64) 138 var0 = variables.Variable([1.0, 2.0], dtype=dtype) 139 var1 = variables.Variable([4.0, 3.0], dtype=dtype) 140 grads0 = constant_op.constant([0.1, 0.2], dtype=dtype) 141 grads1 = constant_op.constant([0.01, 0.02], dtype=dtype) 142 143 opt = adagrad_da.AdagradDAOptimizer( 144 3.0, 145 global_step, 146 initial_gradient_squared_accumulator_value=0.1, 147 l1_regularization_strength=0.001, 148 l2_regularization_strength=0.0) 149 update = opt.apply_gradients( 150 zip([grads0, grads1], [var0, var1]), global_step=global_step) 151 variables.global_variables_initializer().run() 152 153 v0_val, v1_val = sess.run([var0, var1]) 154 self.assertAllCloseAccordingToType([1.0, 2.0], v0_val) 155 self.assertAllCloseAccordingToType([4.0, 3.0], v1_val) 156 157 # Run a step of AdagradDA 158 update.run() 159 160 v0_val, v1_val = sess.run([var0, var1]) 161 self.assertAllCloseAccordingToType( 162 np.array([-0.895489, -1.59555]), v0_val) 163 self.assertAllCloseAccordingToType( 164 np.array([-0.085339, -0.17989]), v1_val) 165 166 def testAdagradDAWithL1_L2(self): 167 for dtype in [dtypes.float64, dtypes.float32]: 168 with self.test_session() as sess: 169 global_step = variables.Variable(0, dtype=dtypes.int64) 170 var0 = variables.Variable([1.0, 2.0], dtype=dtype) 171 var1 = variables.Variable([4.0, 3.0], dtype=dtype) 172 grads0 = constant_op.constant([0.1, 0.2], dtype=dtype) 173 grads1 = constant_op.constant([0.01, 0.02], dtype=dtype) 174 175 opt = adagrad_da.AdagradDAOptimizer( 176 3.0, 177 global_step, 178 initial_gradient_squared_accumulator_value=0.1, 179 l1_regularization_strength=0.001, 180 l2_regularization_strength=2.0) 181 update = opt.apply_gradients( 182 zip([grads0, grads1], [var0, var1]), global_step=global_step) 183 variables.global_variables_initializer().run() 184 185 v0_val, v1_val = sess.run([var0, var1]) 186 self.assertAllCloseAccordingToType([1.0, 2.0], v0_val) 187 self.assertAllCloseAccordingToType([4.0, 3.0], v1_val) 188 189 # Run a step of AdagradDA 190 update.run() 191 192 v0_val, v1_val = sess.run([var0, var1]) 193 self.assertAllCloseAccordingToType( 194 np.array([-0.046907, -0.093659]), v0_val) 195 self.assertAllCloseAccordingToType( 196 np.array([-0.004275, -0.009023]), v1_val) 197 198 199 if __name__ == "__main__": 200 test.main() 201