1 # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 # 3 # Licensed under the Apache License, Version 2.0 (the "License"); 4 # you may not use this file except in compliance with the License. 5 # You may obtain a copy of the License at 6 # 7 # http://www.apache.org/licenses/LICENSE-2.0 8 # 9 # Unless required by applicable law or agreed to in writing, software 10 # distributed under the License is distributed on an "AS IS" BASIS, 11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 # See the License for the specific language governing permissions and 13 # limitations under the License. 14 # ============================================================================== 15 """Linear Estimators.""" 16 17 from __future__ import absolute_import 18 from __future__ import division 19 from __future__ import print_function 20 21 import math 22 23 import six 24 25 from tensorflow.python.estimator import estimator 26 from tensorflow.python.estimator.canned import head as head_lib 27 from tensorflow.python.estimator.canned import optimizers 28 from tensorflow.python.feature_column import feature_column as feature_column_lib 29 from tensorflow.python.ops import array_ops 30 from tensorflow.python.ops import nn 31 from tensorflow.python.ops import partitioned_variables 32 from tensorflow.python.ops import variable_scope 33 from tensorflow.python.ops.losses import losses 34 from tensorflow.python.summary import summary 35 from tensorflow.python.training import ftrl 36 from tensorflow.python.training import training_util 37 from tensorflow.python.util.tf_export import tf_export 38 39 40 # The default learning rate of 0.2 is a historical artifact of the initial 41 # implementation, but seems a reasonable choice. 42 _LEARNING_RATE = 0.2 43 44 45 def _get_default_optimizer(feature_columns): 46 learning_rate = min(_LEARNING_RATE, 1.0 / math.sqrt(len(feature_columns))) 47 return ftrl.FtrlOptimizer(learning_rate=learning_rate) 48 49 50 def _compute_fraction_of_zero(cols_to_vars): 51 """Given a linear cols_to_vars dict, compute the fraction of zero weights. 52 53 Args: 54 cols_to_vars: A dictionary mapping FeatureColumns to lists of tf.Variables 55 like one returned from feature_column_lib.linear_model. 56 57 Returns: 58 The fraction of zeros (sparsity) in the linear model. 59 """ 60 all_weight_vars = [] 61 for var_or_var_list in cols_to_vars.values(): 62 # Skip empty-lists associated with columns that created no Variables. 63 if var_or_var_list: 64 all_weight_vars += [ 65 array_ops.reshape(var, [-1]) for var in var_or_var_list 66 ] 67 return nn.zero_fraction(array_ops.concat(all_weight_vars, axis=0)) 68 69 70 def _linear_logit_fn_builder(units, feature_columns): 71 """Function builder for a linear logit_fn. 72 73 Args: 74 units: An int indicating the dimension of the logit layer. 75 feature_columns: An iterable containing all the feature columns used by 76 the model. 77 78 Returns: 79 A logit_fn (see below). 80 81 """ 82 83 def linear_logit_fn(features): 84 """Linear model logit_fn. 85 86 Args: 87 features: This is the first item returned from the `input_fn` 88 passed to `train`, `evaluate`, and `predict`. This should be a 89 single `Tensor` or `dict` of same. 90 91 Returns: 92 A `Tensor` representing the logits. 93 """ 94 cols_to_vars = {} 95 logits = feature_column_lib.linear_model( 96 features=features, 97 feature_columns=feature_columns, 98 units=units, 99 cols_to_vars=cols_to_vars) 100 bias = cols_to_vars.pop('bias') 101 if units > 1: 102 summary.histogram('bias', bias) 103 else: 104 # If units == 1, the bias value is a length-1 list of a scalar Tensor, 105 # so we should provide a scalar summary. 106 summary.scalar('bias', bias[0][0]) 107 summary.scalar('fraction_of_zero_weights', 108 _compute_fraction_of_zero(cols_to_vars)) 109 return logits 110 111 return linear_logit_fn 112 113 114 def _linear_model_fn(features, labels, mode, head, feature_columns, optimizer, 115 partitioner, config): 116 """A model_fn for linear models that use a gradient-based optimizer. 117 118 Args: 119 features: dict of `Tensor`. 120 labels: `Tensor` of shape `[batch_size, logits_dimension]`. 121 mode: Defines whether this is training, evaluation or prediction. 122 See `ModeKeys`. 123 head: A `Head` instance. 124 feature_columns: An iterable containing all the feature columns used by 125 the model. 126 optimizer: string, `Optimizer` object, or callable that defines the 127 optimizer to use for training. If `None`, will use a FTRL optimizer. 128 partitioner: Partitioner for variables. 129 config: `RunConfig` object to configure the runtime settings. 130 131 Returns: 132 An `EstimatorSpec` instance. 133 134 Raises: 135 ValueError: mode or params are invalid, or features has the wrong type. 136 """ 137 if not isinstance(features, dict): 138 raise ValueError('features should be a dictionary of `Tensor`s. ' 139 'Given type: {}'.format(type(features))) 140 141 optimizer = optimizers.get_optimizer_instance( 142 optimizer or _get_default_optimizer(feature_columns), 143 learning_rate=_LEARNING_RATE) 144 num_ps_replicas = config.num_ps_replicas if config else 0 145 146 partitioner = partitioner or ( 147 partitioned_variables.min_max_variable_partitioner( 148 max_partitions=num_ps_replicas, 149 min_slice_size=64 << 20)) 150 151 with variable_scope.variable_scope( 152 'linear', 153 values=tuple(six.itervalues(features)), 154 partitioner=partitioner): 155 156 logit_fn = _linear_logit_fn_builder( 157 units=head.logits_dimension, feature_columns=feature_columns) 158 logits = logit_fn(features=features) 159 160 def _train_op_fn(loss): 161 """Returns the op to optimize the loss.""" 162 return optimizer.minimize( 163 loss, 164 global_step=training_util.get_global_step()) 165 166 return head.create_estimator_spec( 167 features=features, 168 mode=mode, 169 labels=labels, 170 train_op_fn=_train_op_fn, 171 logits=logits) 172 173 174 @tf_export('estimator.LinearClassifier') 175 class LinearClassifier(estimator.Estimator): 176 """Linear classifier model. 177 178 Train a linear model to classify instances into one of multiple possible 179 classes. When number of possible classes is 2, this is binary classification. 180 181 Example: 182 183 ```python 184 categorical_column_a = categorical_column_with_hash_bucket(...) 185 categorical_column_b = categorical_column_with_hash_bucket(...) 186 187 categorical_feature_a_x_categorical_feature_b = crossed_column(...) 188 189 # Estimator using the default optimizer. 190 estimator = LinearClassifier( 191 feature_columns=[categorical_column_a, 192 categorical_feature_a_x_categorical_feature_b]) 193 194 # Or estimator using the FTRL optimizer with regularization. 195 estimator = LinearClassifier( 196 feature_columns=[categorical_column_a, 197 categorical_feature_a_x_categorical_feature_b], 198 optimizer=tf.train.FtrlOptimizer( 199 learning_rate=0.1, 200 l1_regularization_strength=0.001 201 )) 202 203 # Or estimator with warm-starting from a previous checkpoint. 204 estimator = LinearClassifier( 205 feature_columns=[categorical_column_a, 206 categorical_feature_a_x_categorical_feature_b], 207 warm_start_from="/path/to/checkpoint/dir") 208 209 210 # Input builders 211 def input_fn_train: # returns x, y (where y represents label's class index). 212 ... 213 def input_fn_eval: # returns x, y (where y represents label's class index). 214 ... 215 estimator.train(input_fn=input_fn_train) 216 estimator.evaluate(input_fn=input_fn_eval) 217 estimator.predict(input_fn=input_fn_predict) 218 ``` 219 220 Input of `train` and `evaluate` should have following features, 221 otherwise there will be a `KeyError`: 222 223 * if `weight_column` is not `None`, a feature with 224 `key=weight_column` whose value is a `Tensor`. 225 * for each `column` in `feature_columns`: 226 - if `column` is a `SparseColumn`, a feature with `key=column.name` 227 whose `value` is a `SparseTensor`. 228 - if `column` is a `WeightedSparseColumn`, two features: the first with 229 `key` the id column name, the second with `key` the weight column name. 230 Both features' `value` must be a `SparseTensor`. 231 - if `column` is a `RealValuedColumn`, a feature with `key=column.name` 232 whose `value` is a `Tensor`. 233 234 Loss is calculated by using softmax cross entropy. 235 236 @compatibility(eager) 237 Estimators are not compatible with eager execution. 238 @end_compatibility 239 """ 240 241 def __init__(self, 242 feature_columns, 243 model_dir=None, 244 n_classes=2, 245 weight_column=None, 246 label_vocabulary=None, 247 optimizer='Ftrl', 248 config=None, 249 partitioner=None, 250 warm_start_from=None, 251 loss_reduction=losses.Reduction.SUM): 252 """Construct a `LinearClassifier` estimator object. 253 254 Args: 255 feature_columns: An iterable containing all the feature columns used by 256 the model. All items in the set should be instances of classes derived 257 from `FeatureColumn`. 258 model_dir: Directory to save model parameters, graph and etc. This can 259 also be used to load checkpoints from the directory into a estimator 260 to continue training a previously saved model. 261 n_classes: number of label classes. Default is binary classification. 262 Note that class labels are integers representing the class index (i.e. 263 values from 0 to n_classes-1). For arbitrary label values (e.g. string 264 labels), convert to class indices first. 265 weight_column: A string or a `_NumericColumn` created by 266 `tf.feature_column.numeric_column` defining feature column representing 267 weights. It is used to down weight or boost examples during training. It 268 will be multiplied by the loss of the example. If it is a string, it is 269 used as a key to fetch weight tensor from the `features`. If it is a 270 `_NumericColumn`, raw tensor is fetched by key `weight_column.key`, 271 then weight_column.normalizer_fn is applied on it to get weight tensor. 272 label_vocabulary: A list of strings represents possible label values. If 273 given, labels must be string type and have any value in 274 `label_vocabulary`. If it is not given, that means labels are 275 already encoded as integer or float within [0, 1] for `n_classes=2` and 276 encoded as integer values in {0, 1,..., n_classes-1} for `n_classes`>2 . 277 Also there will be errors if vocabulary is not provided and labels are 278 string. 279 optimizer: An instance of `tf.Optimizer` used to train the model. Defaults 280 to FTRL optimizer. 281 config: `RunConfig` object to configure the runtime settings. 282 partitioner: Optional. Partitioner for input layer. 283 warm_start_from: A string filepath to a checkpoint to warm-start from, or 284 a `WarmStartSettings` object to fully configure warm-starting. If the 285 string filepath is provided instead of a `WarmStartSettings`, then all 286 weights and biases are warm-started, and it is assumed that vocabularies 287 and Tensor names are unchanged. 288 loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how 289 to reduce training loss over batch. Defaults to `SUM`. 290 291 Returns: 292 A `LinearClassifier` estimator. 293 294 Raises: 295 ValueError: if n_classes < 2. 296 """ 297 if n_classes == 2: 298 head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss( # pylint: disable=protected-access 299 weight_column=weight_column, 300 label_vocabulary=label_vocabulary, 301 loss_reduction=loss_reduction) 302 else: 303 head = head_lib._multi_class_head_with_softmax_cross_entropy_loss( # pylint: disable=protected-access 304 n_classes, weight_column=weight_column, 305 label_vocabulary=label_vocabulary, 306 loss_reduction=loss_reduction) 307 308 def _model_fn(features, labels, mode, config): 309 """Call the defined shared _linear_model_fn.""" 310 return _linear_model_fn( 311 features=features, 312 labels=labels, 313 mode=mode, 314 head=head, 315 feature_columns=tuple(feature_columns or []), 316 optimizer=optimizer, 317 partitioner=partitioner, 318 config=config) 319 320 super(LinearClassifier, self).__init__( 321 model_fn=_model_fn, 322 model_dir=model_dir, 323 config=config, 324 warm_start_from=warm_start_from) 325 326 327 @tf_export('estimator.LinearRegressor') 328 class LinearRegressor(estimator.Estimator): 329 """An estimator for TensorFlow Linear regression problems. 330 331 Train a linear regression model to predict label value given observation of 332 feature values. 333 334 Example: 335 336 ```python 337 categorical_column_a = categorical_column_with_hash_bucket(...) 338 categorical_column_b = categorical_column_with_hash_bucket(...) 339 340 categorical_feature_a_x_categorical_feature_b = crossed_column(...) 341 342 estimator = LinearRegressor( 343 feature_columns=[categorical_column_a, 344 categorical_feature_a_x_categorical_feature_b]) 345 346 # Or estimator with warm-starting from a previous checkpoint. 347 estimator = LinearRegressor( 348 feature_columns=[categorical_column_a, 349 categorical_feature_a_x_categorical_feature_b], 350 warm_start_from="/path/to/checkpoint/dir") 351 352 353 # Input builders 354 def input_fn_train: # returns x, y 355 ... 356 def input_fn_eval: # returns x, y 357 ... 358 estimator.train(input_fn=input_fn_train) 359 estimator.evaluate(input_fn=input_fn_eval) 360 estimator.predict(input_fn=input_fn_predict) 361 ``` 362 363 Input of `train` and `evaluate` should have following features, 364 otherwise there will be a KeyError: 365 366 * if `weight_column` is not `None`: 367 key=weight_column, value=a `Tensor` 368 * for column in `feature_columns`: 369 - if isinstance(column, `SparseColumn`): 370 key=column.name, value=a `SparseTensor` 371 - if isinstance(column, `WeightedSparseColumn`): 372 {key=id column name, value=a `SparseTensor`, 373 key=weight column name, value=a `SparseTensor`} 374 - if isinstance(column, `RealValuedColumn`): 375 key=column.name, value=a `Tensor` 376 377 Loss is calculated by using mean squared error. 378 379 @compatibility(eager) 380 Estimators are not compatible with eager execution. 381 @end_compatibility 382 """ 383 384 def __init__(self, 385 feature_columns, 386 model_dir=None, 387 label_dimension=1, 388 weight_column=None, 389 optimizer='Ftrl', 390 config=None, 391 partitioner=None, 392 warm_start_from=None, 393 loss_reduction=losses.Reduction.SUM): 394 """Initializes a `LinearRegressor` instance. 395 396 Args: 397 feature_columns: An iterable containing all the feature columns used by 398 the model. All items in the set should be instances of classes derived 399 from `FeatureColumn`. 400 model_dir: Directory to save model parameters, graph and etc. This can 401 also be used to load checkpoints from the directory into a estimator 402 to continue training a previously saved model. 403 label_dimension: Number of regression targets per example. This is the 404 size of the last dimension of the labels and logits `Tensor` objects 405 (typically, these have shape `[batch_size, label_dimension]`). 406 weight_column: A string or a `_NumericColumn` created by 407 `tf.feature_column.numeric_column` defining feature column representing 408 weights. It is used to down weight or boost examples during training. It 409 will be multiplied by the loss of the example. If it is a string, it is 410 used as a key to fetch weight tensor from the `features`. If it is a 411 `_NumericColumn`, raw tensor is fetched by key `weight_column.key`, 412 then weight_column.normalizer_fn is applied on it to get weight tensor. 413 optimizer: An instance of `tf.Optimizer` used to train the model. Defaults 414 to FTRL optimizer. 415 config: `RunConfig` object to configure the runtime settings. 416 partitioner: Optional. Partitioner for input layer. 417 warm_start_from: A string filepath to a checkpoint to warm-start from, or 418 a `WarmStartSettings` object to fully configure warm-starting. If the 419 string filepath is provided instead of a `WarmStartSettings`, then all 420 weights and biases are warm-started, and it is assumed that vocabularies 421 and Tensor names are unchanged. 422 loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how 423 to reduce training loss over batch. Defaults to `SUM`. 424 """ 425 head = head_lib._regression_head_with_mean_squared_error_loss( # pylint: disable=protected-access 426 label_dimension=label_dimension, weight_column=weight_column, 427 loss_reduction=loss_reduction) 428 429 def _model_fn(features, labels, mode, config): 430 """Call the defined shared _linear_model_fn.""" 431 return _linear_model_fn( 432 features=features, 433 labels=labels, 434 mode=mode, 435 head=head, 436 feature_columns=tuple(feature_columns or []), 437 optimizer=optimizer, 438 partitioner=partitioner, 439 config=config) 440 441 super(LinearRegressor, self).__init__( 442 model_fn=_model_fn, 443 model_dir=model_dir, 444 config=config, 445 warm_start_from=warm_start_from) 446