Home | History | Annotate | Download | only in training
      1 # Copyright 2015 The TensorFlow Authors. All Rights Reserved.
      2 #
      3 # Licensed under the Apache License, Version 2.0 (the "License");
      4 # you may not use this file except in compliance with the License.
      5 # You may obtain a copy of the License at
      6 #
      7 #     http://www.apache.org/licenses/LICENSE-2.0
      8 #
      9 # Unless required by applicable law or agreed to in writing, software
     10 # distributed under the License is distributed on an "AS IS" BASIS,
     11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 # See the License for the specific language governing permissions and
     13 # limitations under the License.
     14 # ==============================================================================
     15 """Functional tests for Ftrl operations."""
     16 
     17 from __future__ import absolute_import
     18 from __future__ import division
     19 from __future__ import print_function
     20 
     21 import numpy as np
     22 
     23 from tensorflow.python.framework import constant_op
     24 from tensorflow.python.framework import dtypes
     25 from tensorflow.python.framework import ops
     26 from tensorflow.python.ops import embedding_ops
     27 from tensorflow.python.ops import math_ops
     28 from tensorflow.python.ops import resource_variable_ops
     29 from tensorflow.python.ops import variables
     30 from tensorflow.python.platform import test
     31 from tensorflow.python.training import adagrad
     32 from tensorflow.python.training import ftrl
     33 from tensorflow.python.training import gradient_descent
     34 
     35 
     36 class FtrlOptimizerTest(test.TestCase):
     37 
     38   def doTestFtrlwithoutRegularization(self, use_resource=False):
     39     for dtype in [dtypes.half, dtypes.float32]:
     40       with self.test_session() as sess:
     41         if use_resource:
     42           var0 = resource_variable_ops.ResourceVariable([0.0, 0.0], dtype=dtype)
     43           var1 = resource_variable_ops.ResourceVariable([0.0, 0.0], dtype=dtype)
     44         else:
     45           var0 = variables.Variable([0.0, 0.0], dtype=dtype)
     46           var1 = variables.Variable([0.0, 0.0], dtype=dtype)
     47         grads0 = constant_op.constant([0.1, 0.2], dtype=dtype)
     48         grads1 = constant_op.constant([0.01, 0.02], dtype=dtype)
     49         opt = ftrl.FtrlOptimizer(
     50             3.0,
     51             initial_accumulator_value=0.1,
     52             l1_regularization_strength=0.0,
     53             l2_regularization_strength=0.0)
     54         update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
     55         variables.global_variables_initializer().run()
     56 
     57         v0_val, v1_val = sess.run([var0, var1])
     58         self.assertAllClose([0.0, 0.0], v0_val)
     59         self.assertAllClose([0.0, 0.0], v1_val)
     60 
     61         # Run 3 steps FTRL
     62         for _ in range(3):
     63           update.run()
     64 
     65         v0_val, v1_val = sess.run([var0, var1])
     66         self.assertAllCloseAccordingToType(
     67             np.array([-2.60260963, -4.29698515]), v0_val)
     68         self.assertAllCloseAccordingToType(
     69             np.array([-0.28432083, -0.56694895]), v1_val)
     70 
     71   def testFtrlWithoutRegularization(self):
     72     self.doTestFtrlwithoutRegularization(use_resource=False)
     73 
     74   def testResourceFtrlWithoutRegularization(self):
     75     self.doTestFtrlwithoutRegularization(use_resource=True)
     76 
     77   def testFtrlwithoutRegularization2(self):
     78     for dtype in [dtypes.half, dtypes.float32]:
     79       with self.test_session() as sess:
     80         var0 = variables.Variable([1.0, 2.0], dtype=dtype)
     81         var1 = variables.Variable([4.0, 3.0], dtype=dtype)
     82         grads0 = constant_op.constant([0.1, 0.2], dtype=dtype)
     83         grads1 = constant_op.constant([0.01, 0.02], dtype=dtype)
     84 
     85         opt = ftrl.FtrlOptimizer(
     86             3.0,
     87             initial_accumulator_value=0.1,
     88             l1_regularization_strength=0.0,
     89             l2_regularization_strength=0.0)
     90         update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
     91         variables.global_variables_initializer().run()
     92 
     93         v0_val, v1_val = sess.run([var0, var1])
     94         self.assertAllCloseAccordingToType([1.0, 2.0], v0_val)
     95         self.assertAllCloseAccordingToType([4.0, 3.0], v1_val)
     96 
     97         # Run 3 steps FTRL
     98         for _ in range(3):
     99           update.run()
    100         v0_val, v1_val = sess.run([var0, var1])
    101         self.assertAllCloseAccordingToType(
    102             np.array([-2.55607247, -3.98729396]), v0_val)
    103         self.assertAllCloseAccordingToType(
    104             np.array([-0.28232238, -0.56096673]), v1_val)
    105 
    106   def testMinimizeSparseResourceVariable(self):
    107     for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
    108       with self.test_session():
    109         var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]], dtype=dtype)
    110         x = constant_op.constant([[4.0], [5.0]], dtype=dtype)
    111         pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x)
    112         loss = pred * pred
    113         sgd_op = ftrl.FtrlOptimizer(1.0).minimize(loss)
    114         variables.global_variables_initializer().run()
    115         # Fetch params to validate initial values
    116         self.assertAllCloseAccordingToType([[1.0, 2.0]], var0.eval())
    117         # Run 1 step of sgd
    118         sgd_op.run()
    119         # Validate updated params
    120         self.assertAllCloseAccordingToType(
    121             [[0, 1]], var0.eval(), atol=0.01)
    122 
    123   def testFtrlWithL1(self):
    124     for dtype in [dtypes.half, dtypes.float32]:
    125       with self.test_session() as sess:
    126         var0 = variables.Variable([1.0, 2.0], dtype=dtype)
    127         var1 = variables.Variable([4.0, 3.0], dtype=dtype)
    128         grads0 = constant_op.constant([0.1, 0.2], dtype=dtype)
    129         grads1 = constant_op.constant([0.01, 0.02], dtype=dtype)
    130 
    131         opt = ftrl.FtrlOptimizer(
    132             3.0,
    133             initial_accumulator_value=0.1,
    134             l1_regularization_strength=0.001,
    135             l2_regularization_strength=0.0)
    136         update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
    137         variables.global_variables_initializer().run()
    138 
    139         v0_val, v1_val = sess.run([var0, var1])
    140         self.assertAllCloseAccordingToType([1.0, 2.0], v0_val)
    141         self.assertAllCloseAccordingToType([4.0, 3.0], v1_val)
    142 
    143         # Run 10 steps FTRL
    144         for _ in range(10):
    145           update.run()
    146         v0_val, v1_val = sess.run([var0, var1])
    147         self.assertAllCloseAccordingToType(
    148             np.array([-7.66718769, -10.91273689]), v0_val)
    149         self.assertAllCloseAccordingToType(
    150             np.array([-0.93460727, -1.86147261]), v1_val)
    151 
    152   def testFtrlWithL1_L2(self):
    153     for dtype in [dtypes.half, dtypes.float32]:
    154       with self.test_session() as sess:
    155         var0 = variables.Variable([1.0, 2.0], dtype=dtype)
    156         var1 = variables.Variable([4.0, 3.0], dtype=dtype)
    157         grads0 = constant_op.constant([0.1, 0.2], dtype=dtype)
    158         grads1 = constant_op.constant([0.01, 0.02], dtype=dtype)
    159 
    160         opt = ftrl.FtrlOptimizer(
    161             3.0,
    162             initial_accumulator_value=0.1,
    163             l1_regularization_strength=0.001,
    164             l2_regularization_strength=2.0)
    165         update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
    166         variables.global_variables_initializer().run()
    167 
    168         v0_val, v1_val = sess.run([var0, var1])
    169         self.assertAllCloseAccordingToType([1.0, 2.0], v0_val)
    170         self.assertAllCloseAccordingToType([4.0, 3.0], v1_val)
    171 
    172         # Run 10 steps FTRL
    173         for _ in range(10):
    174           update.run()
    175 
    176         v0_val, v1_val = sess.run([var0, var1])
    177         self.assertAllCloseAccordingToType(
    178             np.array([-0.24059935, -0.46829352]), v0_val)
    179         self.assertAllCloseAccordingToType(
    180             np.array([-0.02406147, -0.04830509]), v1_val)
    181 
    182   def testFtrlWithL1_L2_L2Shrinkage(self):
    183     """Test the new FTRL op with support for l2 shrinkage.
    184 
    185     The addition of this parameter which places a constant pressure on weights
    186     towards the origin causes the gradient descent trajectory to differ. The
    187     weights will tend to have smaller magnitudes with this parameter set.
    188     """
    189     for dtype in [dtypes.half, dtypes.float32]:
    190       with self.test_session() as sess:
    191         var0 = variables.Variable([1.0, 2.0], dtype=dtype)
    192         var1 = variables.Variable([4.0, 3.0], dtype=dtype)
    193         grads0 = constant_op.constant([0.1, 0.2], dtype=dtype)
    194         grads1 = constant_op.constant([0.01, 0.02], dtype=dtype)
    195 
    196         opt = ftrl.FtrlOptimizer(
    197             3.0,
    198             initial_accumulator_value=0.1,
    199             l1_regularization_strength=0.001,
    200             l2_regularization_strength=2.0,
    201             l2_shrinkage_regularization_strength=0.1)
    202         update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
    203         variables.global_variables_initializer().run()
    204 
    205         v0_val, v1_val = sess.run([var0, var1])
    206         self.assertAllCloseAccordingToType([1.0, 2.0], v0_val)
    207         self.assertAllCloseAccordingToType([4.0, 3.0], v1_val)
    208 
    209         # Run 10 steps FTRL
    210         for _ in range(10):
    211           update.run()
    212 
    213         v0_val, v1_val = sess.run([var0, var1])
    214         self.assertAllCloseAccordingToType(
    215             np.array([-0.22078767, -0.41378114]), v0_val)
    216         self.assertAllCloseAccordingToType(
    217             np.array([-0.02919818, -0.07343706]), v1_val)
    218 
    219   def applyOptimizer(self, opt, dtype, steps=5, is_sparse=False):
    220     if is_sparse:
    221       var0 = variables.Variable([[0.0], [0.0]], dtype=dtype)
    222       var1 = variables.Variable([[0.0], [0.0]], dtype=dtype)
    223       grads0 = ops.IndexedSlices(
    224           constant_op.constant(
    225               [0.1], shape=[1, 1], dtype=dtype),
    226           constant_op.constant([0]),
    227           constant_op.constant([2, 1]))
    228       grads1 = ops.IndexedSlices(
    229           constant_op.constant(
    230               [0.02], shape=[1, 1], dtype=dtype),
    231           constant_op.constant([1]),
    232           constant_op.constant([2, 1]))
    233     else:
    234       var0 = variables.Variable([0.0, 0.0], dtype=dtype)
    235       var1 = variables.Variable([0.0, 0.0], dtype=dtype)
    236       grads0 = constant_op.constant([0.1, 0.2], dtype=dtype)
    237       grads1 = constant_op.constant([0.01, 0.02], dtype=dtype)
    238 
    239     update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
    240     variables.global_variables_initializer().run()
    241 
    242     sess = ops.get_default_session()
    243     v0_val, v1_val = sess.run([var0, var1])
    244     if is_sparse:
    245       self.assertAllCloseAccordingToType([[0.0], [0.0]], v0_val)
    246       self.assertAllCloseAccordingToType([[0.0], [0.0]], v1_val)
    247     else:
    248       self.assertAllCloseAccordingToType([0.0, 0.0], v0_val)
    249       self.assertAllCloseAccordingToType([0.0, 0.0], v1_val)
    250 
    251     # Run Ftrl for a few steps
    252     for _ in range(steps):
    253       update.run()
    254 
    255     v0_val, v1_val = sess.run([var0, var1])
    256     return v0_val, v1_val
    257 
    258   # When variables are initialized with Zero, FTRL-Proximal has two properties:
    259   # 1. Without L1&L2 but with fixed learning rate, FTRL-Proximal is identical
    260   # with GradientDescent.
    261   # 2. Without L1&L2 but with adaptive learning rate, FTRL-Proximal is identical
    262   # with Adagrad.
    263   # So, basing on these two properties, we test if our implementation of
    264   # FTRL-Proximal performs same updates as Adagrad or GradientDescent.
    265   def testEquivAdagradwithoutRegularization(self):
    266     for dtype in [dtypes.half, dtypes.float32]:
    267       with self.test_session():
    268         val0, val1 = self.applyOptimizer(
    269             ftrl.FtrlOptimizer(
    270                 3.0,
    271                 # Adagrad learning rate
    272                 learning_rate_power=-0.5,
    273                 initial_accumulator_value=0.1,
    274                 l1_regularization_strength=0.0,
    275                 l2_regularization_strength=0.0),
    276             dtype)
    277 
    278       with self.test_session():
    279         val2, val3 = self.applyOptimizer(
    280             adagrad.AdagradOptimizer(
    281                 3.0, initial_accumulator_value=0.1), dtype)
    282 
    283       self.assertAllCloseAccordingToType(val0, val2)
    284       self.assertAllCloseAccordingToType(val1, val3)
    285 
    286   def testEquivSparseAdagradwithoutRegularization(self):
    287     for dtype in [dtypes.half, dtypes.float32]:
    288       with self.test_session():
    289         val0, val1 = self.applyOptimizer(
    290             ftrl.FtrlOptimizer(
    291                 3.0,
    292                 # Adagrad learning rate
    293                 learning_rate_power=-0.5,
    294                 initial_accumulator_value=0.1,
    295                 l1_regularization_strength=0.0,
    296                 l2_regularization_strength=0.0),
    297             dtype,
    298             is_sparse=True)
    299 
    300       with self.test_session():
    301         val2, val3 = self.applyOptimizer(
    302             adagrad.AdagradOptimizer(
    303                 3.0, initial_accumulator_value=0.1),
    304             dtype,
    305             is_sparse=True)
    306 
    307       self.assertAllCloseAccordingToType(val0, val2)
    308       self.assertAllCloseAccordingToType(val1, val3)
    309 
    310   def testEquivSparseGradientDescentwithoutRegularization(self):
    311     for dtype in [dtypes.half, dtypes.float32]:
    312       with self.test_session():
    313         val0, val1 = self.applyOptimizer(
    314             ftrl.FtrlOptimizer(
    315                 3.0,
    316                 # Fixed learning rate
    317                 learning_rate_power=-0.0,
    318                 initial_accumulator_value=0.1,
    319                 l1_regularization_strength=0.0,
    320                 l2_regularization_strength=0.0),
    321             dtype,
    322             is_sparse=True)
    323 
    324       with self.test_session():
    325         val2, val3 = self.applyOptimizer(
    326             gradient_descent.GradientDescentOptimizer(3.0),
    327             dtype,
    328             is_sparse=True)
    329 
    330       self.assertAllCloseAccordingToType(val0, val2)
    331       self.assertAllCloseAccordingToType(val1, val3)
    332 
    333   def testEquivGradientDescentwithoutRegularization(self):
    334     for dtype in [dtypes.half, dtypes.float32]:
    335       with self.test_session():
    336         val0, val1 = self.applyOptimizer(
    337             ftrl.FtrlOptimizer(
    338                 3.0,
    339                 # Fixed learning rate
    340                 learning_rate_power=-0.0,
    341                 initial_accumulator_value=0.1,
    342                 l1_regularization_strength=0.0,
    343                 l2_regularization_strength=0.0),
    344             dtype)
    345 
    346       with self.test_session():
    347         val2, val3 = self.applyOptimizer(
    348             gradient_descent.GradientDescentOptimizer(3.0), dtype)
    349 
    350       self.assertAllCloseAccordingToType(val0, val2)
    351       self.assertAllCloseAccordingToType(val1, val3)
    352 
    353 
    354 if __name__ == "__main__":
    355   test.main()
    356