Home | History | Annotate | Download | only in canned
      1 # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
      2 #
      3 # Licensed under the Apache License, Version 2.0 (the "License");
      4 # you may not use this file except in compliance with the License.
      5 # You may obtain a copy of the License at
      6 #
      7 #     http://www.apache.org/licenses/LICENSE-2.0
      8 #
      9 # Unless required by applicable law or agreed to in writing, software
     10 # distributed under the License is distributed on an "AS IS" BASIS,
     11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 # See the License for the specific language governing permissions and
     13 # limitations under the License.
     14 # ==============================================================================
     15 """Linear Estimators."""
     16 
     17 from __future__ import absolute_import
     18 from __future__ import division
     19 from __future__ import print_function
     20 
     21 import math
     22 
     23 import six
     24 
     25 from tensorflow.python.estimator import estimator
     26 from tensorflow.python.estimator.canned import head as head_lib
     27 from tensorflow.python.estimator.canned import optimizers
     28 from tensorflow.python.feature_column import feature_column as feature_column_lib
     29 from tensorflow.python.ops import array_ops
     30 from tensorflow.python.ops import nn
     31 from tensorflow.python.ops import partitioned_variables
     32 from tensorflow.python.ops import variable_scope
     33 from tensorflow.python.ops.losses import losses
     34 from tensorflow.python.summary import summary
     35 from tensorflow.python.training import ftrl
     36 from tensorflow.python.training import training_util
     37 from tensorflow.python.util.tf_export import tf_export
     38 
     39 
     40 # The default learning rate of 0.2 is a historical artifact of the initial
     41 # implementation, but seems a reasonable choice.
     42 _LEARNING_RATE = 0.2
     43 
     44 
     45 def _get_default_optimizer(feature_columns):
     46   learning_rate = min(_LEARNING_RATE, 1.0 / math.sqrt(len(feature_columns)))
     47   return ftrl.FtrlOptimizer(learning_rate=learning_rate)
     48 
     49 
     50 def _compute_fraction_of_zero(cols_to_vars):
     51   """Given a linear cols_to_vars dict, compute the fraction of zero weights.
     52 
     53   Args:
     54     cols_to_vars: A dictionary mapping FeatureColumns to lists of tf.Variables
     55       like one returned from feature_column_lib.linear_model.
     56 
     57   Returns:
     58     The fraction of zeros (sparsity) in the linear model.
     59   """
     60   all_weight_vars = []
     61   for var_or_var_list in cols_to_vars.values():
     62     # Skip empty-lists associated with columns that created no Variables.
     63     if var_or_var_list:
     64       all_weight_vars += [
     65           array_ops.reshape(var, [-1]) for var in var_or_var_list
     66       ]
     67   return nn.zero_fraction(array_ops.concat(all_weight_vars, axis=0))
     68 
     69 
     70 def _linear_logit_fn_builder(units, feature_columns):
     71   """Function builder for a linear logit_fn.
     72 
     73   Args:
     74     units: An int indicating the dimension of the logit layer.
     75     feature_columns: An iterable containing all the feature columns used by
     76       the model.
     77 
     78   Returns:
     79     A logit_fn (see below).
     80 
     81   """
     82 
     83   def linear_logit_fn(features):
     84     """Linear model logit_fn.
     85 
     86     Args:
     87       features: This is the first item returned from the `input_fn`
     88                 passed to `train`, `evaluate`, and `predict`. This should be a
     89                 single `Tensor` or `dict` of same.
     90 
     91     Returns:
     92       A `Tensor` representing the logits.
     93     """
     94     cols_to_vars = {}
     95     logits = feature_column_lib.linear_model(
     96         features=features,
     97         feature_columns=feature_columns,
     98         units=units,
     99         cols_to_vars=cols_to_vars)
    100     bias = cols_to_vars.pop('bias')
    101     if units > 1:
    102       summary.histogram('bias', bias)
    103     else:
    104       # If units == 1, the bias value is a length-1 list of a scalar Tensor,
    105       # so we should provide a scalar summary.
    106       summary.scalar('bias', bias[0][0])
    107     summary.scalar('fraction_of_zero_weights',
    108                    _compute_fraction_of_zero(cols_to_vars))
    109     return logits
    110 
    111   return linear_logit_fn
    112 
    113 
    114 def _linear_model_fn(features, labels, mode, head, feature_columns, optimizer,
    115                      partitioner, config):
    116   """A model_fn for linear models that use a gradient-based optimizer.
    117 
    118   Args:
    119     features: dict of `Tensor`.
    120     labels: `Tensor` of shape `[batch_size, logits_dimension]`.
    121     mode: Defines whether this is training, evaluation or prediction.
    122       See `ModeKeys`.
    123     head: A `Head` instance.
    124     feature_columns: An iterable containing all the feature columns used by
    125       the model.
    126     optimizer: string, `Optimizer` object, or callable that defines the
    127       optimizer to use for training. If `None`, will use a FTRL optimizer.
    128     partitioner: Partitioner for variables.
    129     config: `RunConfig` object to configure the runtime settings.
    130 
    131   Returns:
    132     An `EstimatorSpec` instance.
    133 
    134   Raises:
    135     ValueError: mode or params are invalid, or features has the wrong type.
    136   """
    137   if not isinstance(features, dict):
    138     raise ValueError('features should be a dictionary of `Tensor`s. '
    139                      'Given type: {}'.format(type(features)))
    140 
    141   optimizer = optimizers.get_optimizer_instance(
    142       optimizer or _get_default_optimizer(feature_columns),
    143       learning_rate=_LEARNING_RATE)
    144   num_ps_replicas = config.num_ps_replicas if config else 0
    145 
    146   partitioner = partitioner or (
    147       partitioned_variables.min_max_variable_partitioner(
    148           max_partitions=num_ps_replicas,
    149           min_slice_size=64 << 20))
    150 
    151   with variable_scope.variable_scope(
    152       'linear',
    153       values=tuple(six.itervalues(features)),
    154       partitioner=partitioner):
    155 
    156     logit_fn = _linear_logit_fn_builder(
    157         units=head.logits_dimension, feature_columns=feature_columns)
    158     logits = logit_fn(features=features)
    159 
    160     def _train_op_fn(loss):
    161       """Returns the op to optimize the loss."""
    162       return optimizer.minimize(
    163           loss,
    164           global_step=training_util.get_global_step())
    165 
    166     return head.create_estimator_spec(
    167         features=features,
    168         mode=mode,
    169         labels=labels,
    170         train_op_fn=_train_op_fn,
    171         logits=logits)
    172 
    173 
    174 @tf_export('estimator.LinearClassifier')
    175 class LinearClassifier(estimator.Estimator):
    176   """Linear classifier model.
    177 
    178   Train a linear model to classify instances into one of multiple possible
    179   classes. When number of possible classes is 2, this is binary classification.
    180 
    181   Example:
    182 
    183   ```python
    184   categorical_column_a = categorical_column_with_hash_bucket(...)
    185   categorical_column_b = categorical_column_with_hash_bucket(...)
    186 
    187   categorical_feature_a_x_categorical_feature_b = crossed_column(...)
    188 
    189   # Estimator using the default optimizer.
    190   estimator = LinearClassifier(
    191       feature_columns=[categorical_column_a,
    192                        categorical_feature_a_x_categorical_feature_b])
    193 
    194   # Or estimator using the FTRL optimizer with regularization.
    195   estimator = LinearClassifier(
    196       feature_columns=[categorical_column_a,
    197                        categorical_feature_a_x_categorical_feature_b],
    198       optimizer=tf.train.FtrlOptimizer(
    199         learning_rate=0.1,
    200         l1_regularization_strength=0.001
    201       ))
    202 
    203   # Or estimator with warm-starting from a previous checkpoint.
    204   estimator = LinearClassifier(
    205       feature_columns=[categorical_column_a,
    206                        categorical_feature_a_x_categorical_feature_b],
    207       warm_start_from="/path/to/checkpoint/dir")
    208 
    209 
    210   # Input builders
    211   def input_fn_train: # returns x, y (where y represents label's class index).
    212     ...
    213   def input_fn_eval: # returns x, y (where y represents label's class index).
    214     ...
    215   estimator.train(input_fn=input_fn_train)
    216   estimator.evaluate(input_fn=input_fn_eval)
    217   estimator.predict(input_fn=input_fn_predict)
    218   ```
    219 
    220   Input of `train` and `evaluate` should have following features,
    221     otherwise there will be a `KeyError`:
    222 
    223   * if `weight_column` is not `None`, a feature with
    224     `key=weight_column` whose value is a `Tensor`.
    225   * for each `column` in `feature_columns`:
    226     - if `column` is a `SparseColumn`, a feature with `key=column.name`
    227       whose `value` is a `SparseTensor`.
    228     - if `column` is a `WeightedSparseColumn`, two features: the first with
    229       `key` the id column name, the second with `key` the weight column name.
    230       Both features' `value` must be a `SparseTensor`.
    231     - if `column` is a `RealValuedColumn`, a feature with `key=column.name`
    232       whose `value` is a `Tensor`.
    233 
    234   Loss is calculated by using softmax cross entropy.
    235 
    236   @compatibility(eager)
    237   Estimators are not compatible with eager execution.
    238   @end_compatibility
    239   """
    240 
    241   def __init__(self,
    242                feature_columns,
    243                model_dir=None,
    244                n_classes=2,
    245                weight_column=None,
    246                label_vocabulary=None,
    247                optimizer='Ftrl',
    248                config=None,
    249                partitioner=None,
    250                warm_start_from=None,
    251                loss_reduction=losses.Reduction.SUM):
    252     """Construct a `LinearClassifier` estimator object.
    253 
    254     Args:
    255       feature_columns: An iterable containing all the feature columns used by
    256         the model. All items in the set should be instances of classes derived
    257         from `FeatureColumn`.
    258       model_dir: Directory to save model parameters, graph and etc. This can
    259         also be used to load checkpoints from the directory into a estimator
    260         to continue training a previously saved model.
    261       n_classes: number of label classes. Default is binary classification.
    262         Note that class labels are integers representing the class index (i.e.
    263         values from 0 to n_classes-1). For arbitrary label values (e.g. string
    264         labels), convert to class indices first.
    265       weight_column: A string or a `_NumericColumn` created by
    266         `tf.feature_column.numeric_column` defining feature column representing
    267         weights. It is used to down weight or boost examples during training. It
    268         will be multiplied by the loss of the example. If it is a string, it is
    269         used as a key to fetch weight tensor from the `features`. If it is a
    270         `_NumericColumn`, raw tensor is fetched by key `weight_column.key`,
    271         then weight_column.normalizer_fn is applied on it to get weight tensor.
    272       label_vocabulary: A list of strings represents possible label values. If
    273         given, labels must be string type and have any value in
    274         `label_vocabulary`. If it is not given, that means labels are
    275         already encoded as integer or float within [0, 1] for `n_classes=2` and
    276         encoded as integer values in {0, 1,..., n_classes-1} for `n_classes`>2 .
    277         Also there will be errors if vocabulary is not provided and labels are
    278         string.
    279       optimizer: An instance of `tf.Optimizer` used to train the model. Defaults
    280         to FTRL optimizer.
    281       config: `RunConfig` object to configure the runtime settings.
    282       partitioner: Optional. Partitioner for input layer.
    283       warm_start_from: A string filepath to a checkpoint to warm-start from, or
    284         a `WarmStartSettings` object to fully configure warm-starting.  If the
    285         string filepath is provided instead of a `WarmStartSettings`, then all
    286         weights and biases are warm-started, and it is assumed that vocabularies
    287         and Tensor names are unchanged.
    288       loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how
    289         to reduce training loss over batch. Defaults to `SUM`.
    290 
    291     Returns:
    292       A `LinearClassifier` estimator.
    293 
    294     Raises:
    295       ValueError: if n_classes < 2.
    296     """
    297     if n_classes == 2:
    298       head = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(  # pylint: disable=protected-access
    299           weight_column=weight_column,
    300           label_vocabulary=label_vocabulary,
    301           loss_reduction=loss_reduction)
    302     else:
    303       head = head_lib._multi_class_head_with_softmax_cross_entropy_loss(  # pylint: disable=protected-access
    304           n_classes, weight_column=weight_column,
    305           label_vocabulary=label_vocabulary,
    306           loss_reduction=loss_reduction)
    307 
    308     def _model_fn(features, labels, mode, config):
    309       """Call the defined shared _linear_model_fn."""
    310       return _linear_model_fn(
    311           features=features,
    312           labels=labels,
    313           mode=mode,
    314           head=head,
    315           feature_columns=tuple(feature_columns or []),
    316           optimizer=optimizer,
    317           partitioner=partitioner,
    318           config=config)
    319 
    320     super(LinearClassifier, self).__init__(
    321         model_fn=_model_fn,
    322         model_dir=model_dir,
    323         config=config,
    324         warm_start_from=warm_start_from)
    325 
    326 
    327 @tf_export('estimator.LinearRegressor')
    328 class LinearRegressor(estimator.Estimator):
    329   """An estimator for TensorFlow Linear regression problems.
    330 
    331   Train a linear regression model to predict label value given observation of
    332   feature values.
    333 
    334   Example:
    335 
    336   ```python
    337   categorical_column_a = categorical_column_with_hash_bucket(...)
    338   categorical_column_b = categorical_column_with_hash_bucket(...)
    339 
    340   categorical_feature_a_x_categorical_feature_b = crossed_column(...)
    341 
    342   estimator = LinearRegressor(
    343       feature_columns=[categorical_column_a,
    344                        categorical_feature_a_x_categorical_feature_b])
    345 
    346   # Or estimator with warm-starting from a previous checkpoint.
    347   estimator = LinearRegressor(
    348       feature_columns=[categorical_column_a,
    349                        categorical_feature_a_x_categorical_feature_b],
    350       warm_start_from="/path/to/checkpoint/dir")
    351 
    352 
    353   # Input builders
    354   def input_fn_train: # returns x, y
    355     ...
    356   def input_fn_eval: # returns x, y
    357     ...
    358   estimator.train(input_fn=input_fn_train)
    359   estimator.evaluate(input_fn=input_fn_eval)
    360   estimator.predict(input_fn=input_fn_predict)
    361   ```
    362 
    363   Input of `train` and `evaluate` should have following features,
    364     otherwise there will be a KeyError:
    365 
    366   * if `weight_column` is not `None`:
    367     key=weight_column, value=a `Tensor`
    368   * for column in `feature_columns`:
    369     - if isinstance(column, `SparseColumn`):
    370         key=column.name, value=a `SparseTensor`
    371     - if isinstance(column, `WeightedSparseColumn`):
    372         {key=id column name, value=a `SparseTensor`,
    373          key=weight column name, value=a `SparseTensor`}
    374     - if isinstance(column, `RealValuedColumn`):
    375         key=column.name, value=a `Tensor`
    376 
    377   Loss is calculated by using mean squared error.
    378 
    379   @compatibility(eager)
    380   Estimators are not compatible with eager execution.
    381   @end_compatibility
    382   """
    383 
    384   def __init__(self,
    385                feature_columns,
    386                model_dir=None,
    387                label_dimension=1,
    388                weight_column=None,
    389                optimizer='Ftrl',
    390                config=None,
    391                partitioner=None,
    392                warm_start_from=None,
    393                loss_reduction=losses.Reduction.SUM):
    394     """Initializes a `LinearRegressor` instance.
    395 
    396     Args:
    397       feature_columns: An iterable containing all the feature columns used by
    398         the model. All items in the set should be instances of classes derived
    399         from `FeatureColumn`.
    400       model_dir: Directory to save model parameters, graph and etc. This can
    401         also be used to load checkpoints from the directory into a estimator
    402         to continue training a previously saved model.
    403       label_dimension: Number of regression targets per example. This is the
    404         size of the last dimension of the labels and logits `Tensor` objects
    405         (typically, these have shape `[batch_size, label_dimension]`).
    406       weight_column: A string or a `_NumericColumn` created by
    407         `tf.feature_column.numeric_column` defining feature column representing
    408         weights. It is used to down weight or boost examples during training. It
    409         will be multiplied by the loss of the example. If it is a string, it is
    410         used as a key to fetch weight tensor from the `features`. If it is a
    411         `_NumericColumn`, raw tensor is fetched by key `weight_column.key`,
    412         then weight_column.normalizer_fn is applied on it to get weight tensor.
    413       optimizer: An instance of `tf.Optimizer` used to train the model. Defaults
    414         to FTRL optimizer.
    415       config: `RunConfig` object to configure the runtime settings.
    416       partitioner: Optional. Partitioner for input layer.
    417       warm_start_from: A string filepath to a checkpoint to warm-start from, or
    418         a `WarmStartSettings` object to fully configure warm-starting.  If the
    419         string filepath is provided instead of a `WarmStartSettings`, then all
    420         weights and biases are warm-started, and it is assumed that vocabularies
    421         and Tensor names are unchanged.
    422       loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how
    423         to reduce training loss over batch. Defaults to `SUM`.
    424     """
    425     head = head_lib._regression_head_with_mean_squared_error_loss(  # pylint: disable=protected-access
    426         label_dimension=label_dimension, weight_column=weight_column,
    427         loss_reduction=loss_reduction)
    428 
    429     def _model_fn(features, labels, mode, config):
    430       """Call the defined shared _linear_model_fn."""
    431       return _linear_model_fn(
    432           features=features,
    433           labels=labels,
    434           mode=mode,
    435           head=head,
    436           feature_columns=tuple(feature_columns or []),
    437           optimizer=optimizer,
    438           partitioner=partitioner,
    439           config=config)
    440 
    441     super(LinearRegressor, self).__init__(
    442         model_fn=_model_fn,
    443         model_dir=model_dir,
    444         config=config,
    445         warm_start_from=warm_start_from)
    446