1 # Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 # 3 # Licensed under the Apache License, Version 2.0 (the "License"); 4 # you may not use this file except in compliance with the License. 5 # You may obtain a copy of the License at 6 # 7 # http://www.apache.org/licenses/LICENSE-2.0 8 # 9 # Unless required by applicable law or agreed to in writing, software 10 # distributed under the License is distributed on an "AS IS" BASIS, 11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 # See the License for the specific language governing permissions and 13 # limitations under the License. 14 # ============================================================================== 15 """Functional tests for Ftrl operations.""" 16 17 from __future__ import absolute_import 18 from __future__ import division 19 from __future__ import print_function 20 21 import numpy as np 22 23 from tensorflow.python.framework import constant_op 24 from tensorflow.python.framework import dtypes 25 from tensorflow.python.framework import ops 26 from tensorflow.python.ops import embedding_ops 27 from tensorflow.python.ops import math_ops 28 from tensorflow.python.ops import resource_variable_ops 29 from tensorflow.python.ops import variables 30 from tensorflow.python.platform import test 31 from tensorflow.python.training import adagrad 32 from tensorflow.python.training import ftrl 33 from tensorflow.python.training import gradient_descent 34 35 36 class FtrlOptimizerTest(test.TestCase): 37 38 def doTestFtrlwithoutRegularization(self, use_resource=False): 39 for dtype in [dtypes.half, dtypes.float32]: 40 with self.test_session() as sess: 41 if use_resource: 42 var0 = resource_variable_ops.ResourceVariable([0.0, 0.0], dtype=dtype) 43 var1 = resource_variable_ops.ResourceVariable([0.0, 0.0], dtype=dtype) 44 else: 45 var0 = variables.Variable([0.0, 0.0], dtype=dtype) 46 var1 = variables.Variable([0.0, 0.0], dtype=dtype) 47 grads0 = constant_op.constant([0.1, 0.2], dtype=dtype) 48 grads1 = constant_op.constant([0.01, 0.02], dtype=dtype) 49 opt = ftrl.FtrlOptimizer( 50 3.0, 51 initial_accumulator_value=0.1, 52 l1_regularization_strength=0.0, 53 l2_regularization_strength=0.0) 54 update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) 55 variables.global_variables_initializer().run() 56 57 v0_val, v1_val = sess.run([var0, var1]) 58 self.assertAllClose([0.0, 0.0], v0_val) 59 self.assertAllClose([0.0, 0.0], v1_val) 60 61 # Run 3 steps FTRL 62 for _ in range(3): 63 update.run() 64 65 v0_val, v1_val = sess.run([var0, var1]) 66 self.assertAllCloseAccordingToType( 67 np.array([-2.60260963, -4.29698515]), v0_val) 68 self.assertAllCloseAccordingToType( 69 np.array([-0.28432083, -0.56694895]), v1_val) 70 71 def testFtrlWithoutRegularization(self): 72 self.doTestFtrlwithoutRegularization(use_resource=False) 73 74 def testResourceFtrlWithoutRegularization(self): 75 self.doTestFtrlwithoutRegularization(use_resource=True) 76 77 def testFtrlwithoutRegularization2(self): 78 for dtype in [dtypes.half, dtypes.float32]: 79 with self.test_session() as sess: 80 var0 = variables.Variable([1.0, 2.0], dtype=dtype) 81 var1 = variables.Variable([4.0, 3.0], dtype=dtype) 82 grads0 = constant_op.constant([0.1, 0.2], dtype=dtype) 83 grads1 = constant_op.constant([0.01, 0.02], dtype=dtype) 84 85 opt = ftrl.FtrlOptimizer( 86 3.0, 87 initial_accumulator_value=0.1, 88 l1_regularization_strength=0.0, 89 l2_regularization_strength=0.0) 90 update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) 91 variables.global_variables_initializer().run() 92 93 v0_val, v1_val = sess.run([var0, var1]) 94 self.assertAllCloseAccordingToType([1.0, 2.0], v0_val) 95 self.assertAllCloseAccordingToType([4.0, 3.0], v1_val) 96 97 # Run 3 steps FTRL 98 for _ in range(3): 99 update.run() 100 v0_val, v1_val = sess.run([var0, var1]) 101 self.assertAllCloseAccordingToType( 102 np.array([-2.55607247, -3.98729396]), v0_val) 103 self.assertAllCloseAccordingToType( 104 np.array([-0.28232238, -0.56096673]), v1_val) 105 106 def testMinimizeSparseResourceVariable(self): 107 for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: 108 with self.test_session(): 109 var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]], dtype=dtype) 110 x = constant_op.constant([[4.0], [5.0]], dtype=dtype) 111 pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x) 112 loss = pred * pred 113 sgd_op = ftrl.FtrlOptimizer(1.0).minimize(loss) 114 variables.global_variables_initializer().run() 115 # Fetch params to validate initial values 116 self.assertAllCloseAccordingToType([[1.0, 2.0]], var0.eval()) 117 # Run 1 step of sgd 118 sgd_op.run() 119 # Validate updated params 120 self.assertAllCloseAccordingToType( 121 [[0, 1]], var0.eval(), atol=0.01) 122 123 def testFtrlWithL1(self): 124 for dtype in [dtypes.half, dtypes.float32]: 125 with self.test_session() as sess: 126 var0 = variables.Variable([1.0, 2.0], dtype=dtype) 127 var1 = variables.Variable([4.0, 3.0], dtype=dtype) 128 grads0 = constant_op.constant([0.1, 0.2], dtype=dtype) 129 grads1 = constant_op.constant([0.01, 0.02], dtype=dtype) 130 131 opt = ftrl.FtrlOptimizer( 132 3.0, 133 initial_accumulator_value=0.1, 134 l1_regularization_strength=0.001, 135 l2_regularization_strength=0.0) 136 update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) 137 variables.global_variables_initializer().run() 138 139 v0_val, v1_val = sess.run([var0, var1]) 140 self.assertAllCloseAccordingToType([1.0, 2.0], v0_val) 141 self.assertAllCloseAccordingToType([4.0, 3.0], v1_val) 142 143 # Run 10 steps FTRL 144 for _ in range(10): 145 update.run() 146 v0_val, v1_val = sess.run([var0, var1]) 147 self.assertAllCloseAccordingToType( 148 np.array([-7.66718769, -10.91273689]), v0_val) 149 self.assertAllCloseAccordingToType( 150 np.array([-0.93460727, -1.86147261]), v1_val) 151 152 def testFtrlWithL1_L2(self): 153 for dtype in [dtypes.half, dtypes.float32]: 154 with self.test_session() as sess: 155 var0 = variables.Variable([1.0, 2.0], dtype=dtype) 156 var1 = variables.Variable([4.0, 3.0], dtype=dtype) 157 grads0 = constant_op.constant([0.1, 0.2], dtype=dtype) 158 grads1 = constant_op.constant([0.01, 0.02], dtype=dtype) 159 160 opt = ftrl.FtrlOptimizer( 161 3.0, 162 initial_accumulator_value=0.1, 163 l1_regularization_strength=0.001, 164 l2_regularization_strength=2.0) 165 update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) 166 variables.global_variables_initializer().run() 167 168 v0_val, v1_val = sess.run([var0, var1]) 169 self.assertAllCloseAccordingToType([1.0, 2.0], v0_val) 170 self.assertAllCloseAccordingToType([4.0, 3.0], v1_val) 171 172 # Run 10 steps FTRL 173 for _ in range(10): 174 update.run() 175 176 v0_val, v1_val = sess.run([var0, var1]) 177 self.assertAllCloseAccordingToType( 178 np.array([-0.24059935, -0.46829352]), v0_val) 179 self.assertAllCloseAccordingToType( 180 np.array([-0.02406147, -0.04830509]), v1_val) 181 182 def testFtrlWithL1_L2_L2Shrinkage(self): 183 """Test the new FTRL op with support for l2 shrinkage. 184 185 The addition of this parameter which places a constant pressure on weights 186 towards the origin causes the gradient descent trajectory to differ. The 187 weights will tend to have smaller magnitudes with this parameter set. 188 """ 189 for dtype in [dtypes.half, dtypes.float32]: 190 with self.test_session() as sess: 191 var0 = variables.Variable([1.0, 2.0], dtype=dtype) 192 var1 = variables.Variable([4.0, 3.0], dtype=dtype) 193 grads0 = constant_op.constant([0.1, 0.2], dtype=dtype) 194 grads1 = constant_op.constant([0.01, 0.02], dtype=dtype) 195 196 opt = ftrl.FtrlOptimizer( 197 3.0, 198 initial_accumulator_value=0.1, 199 l1_regularization_strength=0.001, 200 l2_regularization_strength=2.0, 201 l2_shrinkage_regularization_strength=0.1) 202 update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) 203 variables.global_variables_initializer().run() 204 205 v0_val, v1_val = sess.run([var0, var1]) 206 self.assertAllCloseAccordingToType([1.0, 2.0], v0_val) 207 self.assertAllCloseAccordingToType([4.0, 3.0], v1_val) 208 209 # Run 10 steps FTRL 210 for _ in range(10): 211 update.run() 212 213 v0_val, v1_val = sess.run([var0, var1]) 214 self.assertAllCloseAccordingToType( 215 np.array([-0.22078767, -0.41378114]), v0_val) 216 self.assertAllCloseAccordingToType( 217 np.array([-0.02919818, -0.07343706]), v1_val) 218 219 def applyOptimizer(self, opt, dtype, steps=5, is_sparse=False): 220 if is_sparse: 221 var0 = variables.Variable([[0.0], [0.0]], dtype=dtype) 222 var1 = variables.Variable([[0.0], [0.0]], dtype=dtype) 223 grads0 = ops.IndexedSlices( 224 constant_op.constant( 225 [0.1], shape=[1, 1], dtype=dtype), 226 constant_op.constant([0]), 227 constant_op.constant([2, 1])) 228 grads1 = ops.IndexedSlices( 229 constant_op.constant( 230 [0.02], shape=[1, 1], dtype=dtype), 231 constant_op.constant([1]), 232 constant_op.constant([2, 1])) 233 else: 234 var0 = variables.Variable([0.0, 0.0], dtype=dtype) 235 var1 = variables.Variable([0.0, 0.0], dtype=dtype) 236 grads0 = constant_op.constant([0.1, 0.2], dtype=dtype) 237 grads1 = constant_op.constant([0.01, 0.02], dtype=dtype) 238 239 update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) 240 variables.global_variables_initializer().run() 241 242 sess = ops.get_default_session() 243 v0_val, v1_val = sess.run([var0, var1]) 244 if is_sparse: 245 self.assertAllCloseAccordingToType([[0.0], [0.0]], v0_val) 246 self.assertAllCloseAccordingToType([[0.0], [0.0]], v1_val) 247 else: 248 self.assertAllCloseAccordingToType([0.0, 0.0], v0_val) 249 self.assertAllCloseAccordingToType([0.0, 0.0], v1_val) 250 251 # Run Ftrl for a few steps 252 for _ in range(steps): 253 update.run() 254 255 v0_val, v1_val = sess.run([var0, var1]) 256 return v0_val, v1_val 257 258 # When variables are initialized with Zero, FTRL-Proximal has two properties: 259 # 1. Without L1&L2 but with fixed learning rate, FTRL-Proximal is identical 260 # with GradientDescent. 261 # 2. Without L1&L2 but with adaptive learning rate, FTRL-Proximal is identical 262 # with Adagrad. 263 # So, basing on these two properties, we test if our implementation of 264 # FTRL-Proximal performs same updates as Adagrad or GradientDescent. 265 def testEquivAdagradwithoutRegularization(self): 266 for dtype in [dtypes.half, dtypes.float32]: 267 with self.test_session(): 268 val0, val1 = self.applyOptimizer( 269 ftrl.FtrlOptimizer( 270 3.0, 271 # Adagrad learning rate 272 learning_rate_power=-0.5, 273 initial_accumulator_value=0.1, 274 l1_regularization_strength=0.0, 275 l2_regularization_strength=0.0), 276 dtype) 277 278 with self.test_session(): 279 val2, val3 = self.applyOptimizer( 280 adagrad.AdagradOptimizer( 281 3.0, initial_accumulator_value=0.1), dtype) 282 283 self.assertAllCloseAccordingToType(val0, val2) 284 self.assertAllCloseAccordingToType(val1, val3) 285 286 def testEquivSparseAdagradwithoutRegularization(self): 287 for dtype in [dtypes.half, dtypes.float32]: 288 with self.test_session(): 289 val0, val1 = self.applyOptimizer( 290 ftrl.FtrlOptimizer( 291 3.0, 292 # Adagrad learning rate 293 learning_rate_power=-0.5, 294 initial_accumulator_value=0.1, 295 l1_regularization_strength=0.0, 296 l2_regularization_strength=0.0), 297 dtype, 298 is_sparse=True) 299 300 with self.test_session(): 301 val2, val3 = self.applyOptimizer( 302 adagrad.AdagradOptimizer( 303 3.0, initial_accumulator_value=0.1), 304 dtype, 305 is_sparse=True) 306 307 self.assertAllCloseAccordingToType(val0, val2) 308 self.assertAllCloseAccordingToType(val1, val3) 309 310 def testEquivSparseGradientDescentwithoutRegularization(self): 311 for dtype in [dtypes.half, dtypes.float32]: 312 with self.test_session(): 313 val0, val1 = self.applyOptimizer( 314 ftrl.FtrlOptimizer( 315 3.0, 316 # Fixed learning rate 317 learning_rate_power=-0.0, 318 initial_accumulator_value=0.1, 319 l1_regularization_strength=0.0, 320 l2_regularization_strength=0.0), 321 dtype, 322 is_sparse=True) 323 324 with self.test_session(): 325 val2, val3 = self.applyOptimizer( 326 gradient_descent.GradientDescentOptimizer(3.0), 327 dtype, 328 is_sparse=True) 329 330 self.assertAllCloseAccordingToType(val0, val2) 331 self.assertAllCloseAccordingToType(val1, val3) 332 333 def testEquivGradientDescentwithoutRegularization(self): 334 for dtype in [dtypes.half, dtypes.float32]: 335 with self.test_session(): 336 val0, val1 = self.applyOptimizer( 337 ftrl.FtrlOptimizer( 338 3.0, 339 # Fixed learning rate 340 learning_rate_power=-0.0, 341 initial_accumulator_value=0.1, 342 l1_regularization_strength=0.0, 343 l2_regularization_strength=0.0), 344 dtype) 345 346 with self.test_session(): 347 val2, val3 = self.applyOptimizer( 348 gradient_descent.GradientDescentOptimizer(3.0), dtype) 349 350 self.assertAllCloseAccordingToType(val0, val2) 351 self.assertAllCloseAccordingToType(val1, val3) 352 353 354 if __name__ == "__main__": 355 test.main() 356