1 # Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 # 3 # Licensed under the Apache License, Version 2.0 (the "License"); 4 # you may not use this file except in compliance with the License. 5 # You may obtain a copy of the License at 6 # 7 # http://www.apache.org/licenses/LICENSE-2.0 8 # 9 # Unless required by applicable law or agreed to in writing, software 10 # distributed under the License is distributed on an "AS IS" BASIS, 11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 # See the License for the specific language governing permissions and 13 # limitations under the License. 14 # ============================================================================== 15 16 """Adagrad for TensorFlow.""" 17 from __future__ import absolute_import 18 from __future__ import division 19 from __future__ import print_function 20 21 from tensorflow.python.framework import ops 22 from tensorflow.python.ops import array_ops 23 from tensorflow.python.ops import gen_array_ops 24 from tensorflow.python.ops import init_ops 25 from tensorflow.python.ops import math_ops 26 from tensorflow.python.training import optimizer 27 from tensorflow.python.training import training_ops 28 from tensorflow.python.util.tf_export import tf_export 29 30 31 @tf_export("train.AdagradOptimizer") 32 class AdagradOptimizer(optimizer.Optimizer): 33 """Optimizer that implements the Adagrad algorithm. 34 35 See this [paper](http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf) 36 or this 37 [intro](http://cs.stanford.edu/~ppasupat/a9online/uploads/proximal_notes.pdf). 38 """ 39 40 def __init__(self, learning_rate, initial_accumulator_value=0.1, 41 use_locking=False, name="Adagrad"): 42 """Construct a new Adagrad optimizer. 43 44 Args: 45 learning_rate: A `Tensor` or a floating point value. The learning rate. 46 initial_accumulator_value: A floating point value. 47 Starting value for the accumulators, must be positive. 48 use_locking: If `True` use locks for update operations. 49 name: Optional name prefix for the operations created when applying 50 gradients. Defaults to "Adagrad". 51 52 Raises: 53 ValueError: If the `initial_accumulator_value` is invalid. 54 """ 55 if initial_accumulator_value <= 0.0: 56 raise ValueError("initial_accumulator_value must be positive: %s" % 57 initial_accumulator_value) 58 super(AdagradOptimizer, self).__init__(use_locking, name) 59 self._learning_rate = learning_rate 60 self._initial_accumulator_value = initial_accumulator_value 61 # Created in Initialize. 62 self._learning_rate_tensor = None 63 64 def _create_slots(self, var_list): 65 for v in var_list: 66 with ops.colocate_with(v): 67 dtype = v.dtype.base_dtype 68 if v.get_shape().is_fully_defined(): 69 init = init_ops.constant_initializer(self._initial_accumulator_value, 70 dtype=dtype) 71 else: 72 # Use a Tensor instead of initializer if variable does not have static 73 # shape. 74 init_constant = gen_array_ops.fill(array_ops.shape(v), 75 self._initial_accumulator_value) 76 init = math_ops.cast(init_constant, dtype) 77 self._get_or_make_slot_with_initializer(v, init, v.get_shape(), dtype, 78 "accumulator", self._name) 79 80 def _prepare(self): 81 self._learning_rate_tensor = ops.convert_to_tensor(self._learning_rate, 82 name="learning_rate") 83 84 def _apply_dense(self, grad, var): 85 acc = self.get_slot(var, "accumulator") 86 return training_ops.apply_adagrad( 87 var, 88 acc, 89 math_ops.cast(self._learning_rate_tensor, var.dtype.base_dtype), 90 grad, 91 use_locking=self._use_locking) 92 93 def _resource_apply_dense(self, grad, var): 94 acc = self.get_slot(var, "accumulator") 95 return training_ops.resource_apply_adagrad( 96 var.handle, 97 acc.handle, 98 math_ops.cast(self._learning_rate_tensor, grad.dtype.base_dtype), 99 grad, 100 use_locking=self._use_locking) 101 102 def _apply_sparse(self, grad, var): 103 acc = self.get_slot(var, "accumulator") 104 return training_ops.sparse_apply_adagrad( 105 var, 106 acc, 107 math_ops.cast(self._learning_rate_tensor, var.dtype.base_dtype), 108 grad.values, 109 grad.indices, 110 use_locking=self._use_locking) 111 112 def _resource_apply_sparse(self, grad, var, indices): 113 acc = self.get_slot(var, "accumulator") 114 return training_ops.resource_sparse_apply_adagrad( 115 var.handle, 116 acc.handle, 117 math_ops.cast(self._learning_rate_tensor, grad.dtype), 118 grad, 119 indices, 120 use_locking=self._use_locking) 121