Home | History | Annotate | Download | only in python
      1 # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
      2 #
      3 # Licensed under the Apache License, Version 2.0 (the "License");
      4 # you may not use this file except in compliance with the License.
      5 # You may obtain a copy of the License at
      6 #
      7 #     http://www.apache.org/licenses/LICENSE-2.0
      8 #
      9 # Unless required by applicable law or agreed to in writing, software
     10 # distributed under the License is distributed on an "AS IS" BASIS,
     11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 # See the License for the specific language governing permissions and
     13 # limitations under the License.
     14 # ==============================================================================
     15 """Linear Estimators."""
     16 
     17 from __future__ import absolute_import
     18 from __future__ import division
     19 from __future__ import print_function
     20 
     21 from tensorflow.contrib import layers
     22 from tensorflow.python.training import training_util
     23 from tensorflow.contrib.learn.python.learn.estimators import estimator
     24 from tensorflow.contrib.learn.python.learn.estimators import head as head_lib
     25 from tensorflow.contrib.learn.python.learn.estimators import prediction_key
     26 from tensorflow.contrib.linear_optimizer.python import sdca_optimizer
     27 from tensorflow.python.framework import dtypes
     28 from tensorflow.python.framework import sparse_tensor
     29 from tensorflow.python.framework import tensor_util
     30 from tensorflow.python.ops import array_ops
     31 from tensorflow.python.ops import variable_scope
     32 from tensorflow.python.training import session_run_hook
     33 
     34 
     35 def _head_is_valid_for_sdca(head):
     36   """Returns true if the provided head is supported by SDCAOptimizer."""
     37   # pylint: disable=protected-access
     38   return isinstance(head, head_lib._BinaryLogisticHead) or isinstance(
     39       head, head_lib._BinarySvmHead) or isinstance(head,
     40                                                    head_lib._RegressionHead)
     41   # pylint: enable=protected-access
     42 
     43 
     44 def _add_bias_column(feature_columns, columns_to_tensors, bias_variable,
     45                      columns_to_variables):
     46   """Adds a fake bias feature column filled with all 1s."""
     47   # TODO(b/31008490): Move definition to a common constants place.
     48   bias_column_name = "tf_virtual_bias_column"
     49   if any(col.name is bias_column_name for col in feature_columns):
     50     raise ValueError("%s is a reserved column name." % bias_column_name)
     51   if not feature_columns:
     52     raise ValueError("feature_columns can't be empty.")
     53 
     54   # Loop through input tensors until we can figure out batch_size.
     55   batch_size = None
     56   for column in columns_to_tensors.values():
     57     if isinstance(column, tuple):
     58       column = column[0]
     59     if isinstance(column, sparse_tensor.SparseTensor):
     60       shape = tensor_util.constant_value(column.dense_shape)
     61       if shape is not None:
     62         batch_size = shape[0]
     63         break
     64     else:
     65       batch_size = array_ops.shape(column)[0]
     66       break
     67   if batch_size is None:
     68     raise ValueError("Could not infer batch size from input features.")
     69 
     70   bias_column = layers.real_valued_column(bias_column_name)
     71   columns_to_tensors[bias_column] = array_ops.ones(
     72       [batch_size, 1], dtype=dtypes.float32)
     73   columns_to_variables[bias_column] = [bias_variable]
     74 
     75 
     76 def sdca_model_fn(features, labels, mode, params, config=None):
     77   """A model_fn for linear models that use the SDCA optimizer.
     78 
     79   Args:
     80     features: A dict of `Tensor` keyed by column name.
     81     labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of
     82       dtype `int32` or `int64` with values in the set {0, 1}.
     83     mode: Defines whether this is training, evaluation or prediction.
     84       See `ModeKeys`.
     85     params: A dict of hyperparameters.
     86       The following hyperparameters are expected:
     87       * head: A `Head` instance. Type must be one of `_BinarySvmHead`,
     88           `_RegressionHead` or `_BinaryLogisticHead`.
     89       * feature_columns: An iterable containing all the feature columns used by
     90           the model.
     91       * l1_regularization: Global (across all examples) L1-regularization
     92           parameter.
     93       * l2_regularization: Global (across all examples) L2-regularization
     94           parameter.
     95       * num_loss_partitions: Number of partitions of the global loss function
     96           optimized by `SDCAOptimizer`.
     97       * weight_column_name: A string defining the weight feature column, or
     98           None if there are no weights.
     99       * update_weights_hook: A `SessionRunHook` object or None. Used to update
    100           model weights.
    101     config: `RunConfig` object to configure the runtime settings.
    102 
    103   Returns:
    104     A `ModelFnOps` instance.
    105 
    106   Raises:
    107     ValueError: If the type of head is not one of `_BinarySvmHead`,
    108       `_RegressionHead` or `_MultiClassHead`.
    109     ValueError: If mode is not any of the `ModeKeys`.
    110   """
    111   head = params["head"]
    112   feature_columns = params["feature_columns"]
    113   example_id_column = params["example_id_column"]
    114   l1_regularization = params["l1_regularization"]
    115   l2_regularization = params["l2_regularization"]
    116   num_loss_partitions = params["num_loss_partitions"]
    117   weight_column_name = params["weight_column_name"]
    118   update_weights_hook = params.get("update_weights_hook", None)
    119   partitioner = params["partitioner"]
    120 
    121   loss_type = None
    122   if isinstance(head, head_lib._BinarySvmHead):  # pylint: disable=protected-access
    123     loss_type = "hinge_loss"
    124   elif isinstance(head, head_lib._BinaryLogisticHead):  # pylint: disable=protected-access
    125     loss_type = "logistic_loss"
    126   elif isinstance(head, head_lib._RegressionHead):  # pylint: disable=protected-access
    127     loss_type = "squared_loss"
    128   else:
    129     raise ValueError("Unsupported head type: {}".format(type(head)))
    130 
    131   assert head.logits_dimension == 1, (
    132       "SDCA only applies to logits_dimension=1.")
    133 
    134   # Update num_loss_partitions based on number of workers.
    135   n_loss_partitions = num_loss_partitions or max(1, config.num_worker_replicas)
    136   optimizer = sdca_optimizer.SDCAOptimizer(
    137       example_id_column=example_id_column,
    138       num_loss_partitions=n_loss_partitions,
    139       symmetric_l1_regularization=l1_regularization,
    140       symmetric_l2_regularization=l2_regularization,
    141       partitioner=partitioner)
    142 
    143   parent_scope = "linear"
    144 
    145   with variable_scope.variable_scope(
    146       values=features.values(), name_or_scope=parent_scope,
    147       partitioner=partitioner) as scope:
    148     features = features.copy()
    149     features.update(layers.transform_features(features, feature_columns))
    150     logits, columns_to_variables, bias = (
    151         layers.weighted_sum_from_feature_columns(
    152             columns_to_tensors=features,
    153             feature_columns=feature_columns,
    154             num_outputs=1,
    155             scope=scope))
    156 
    157     _add_bias_column(feature_columns, features, bias, columns_to_variables)
    158 
    159   def _train_op_fn(unused_loss):
    160     global_step = training_util.get_global_step()
    161     sdca_model, train_op = optimizer.get_train_step(
    162         columns_to_variables, weight_column_name, loss_type, features, labels,
    163         global_step)
    164     if update_weights_hook is not None:
    165       update_weights_hook.set_parameters(sdca_model, train_op)
    166     return train_op
    167 
    168   model_fn_ops = head.create_model_fn_ops(
    169       features=features,
    170       labels=labels,
    171       mode=mode,
    172       train_op_fn=_train_op_fn,
    173       logits=logits)
    174   if update_weights_hook is not None:
    175     return model_fn_ops._replace(training_chief_hooks=(
    176         model_fn_ops.training_chief_hooks + [update_weights_hook]))
    177   return model_fn_ops
    178 
    179 
    180 class _SdcaUpdateWeightsHook(session_run_hook.SessionRunHook):
    181   """SessionRunHook to update and shrink SDCA model weights."""
    182 
    183   def __init__(self):
    184     pass
    185 
    186   def set_parameters(self, sdca_model, train_op):
    187     self._sdca_model = sdca_model
    188     self._train_op = train_op
    189 
    190   def begin(self):
    191     """Construct the update_weights op.
    192 
    193     The op is implicitly added to the default graph.
    194     """
    195     self._update_op = self._sdca_model.update_weights(self._train_op)
    196 
    197   def before_run(self, run_context):
    198     """Return the update_weights op so that it is executed during this run."""
    199     return session_run_hook.SessionRunArgs(self._update_op)
    200 
    201 
    202 class _SDCAEstimator(estimator.Estimator):
    203   """Base estimator class for linear models using the SDCA optimizer.
    204 
    205   This class should not be used directly. Rather, users should call one of the
    206   derived estimators.
    207   """
    208 
    209   def __init__(self,
    210                example_id_column,
    211                feature_columns,
    212                weight_column_name=None,
    213                model_dir=None,
    214                head=None,
    215                l1_regularization=0.0,
    216                l2_regularization=1.0,
    217                num_loss_partitions=None,
    218                config=None,
    219                feature_engineering_fn=None,
    220                partitioner=None):
    221     """Construct a `_SDCAEstimator` estimator object.
    222 
    223     Args:
    224       example_id_column: A string defining the feature column name representing
    225         example ids. Used to initialize the underlying SDCA optimizer.
    226       feature_columns: An iterable containing all the feature columns used by
    227         the model. All items in the set should be instances of classes derived
    228         from `FeatureColumn`.
    229       weight_column_name: A string defining feature column name representing
    230         weights. It is used to down weight or boost examples during training. It
    231         will be multiplied by the loss of the example.
    232       model_dir: Directory to save model parameters, graph etc. This can also be
    233         used to load checkpoints from the directory into an estimator to
    234         continue training a previously saved model.
    235       head: type of head. Currently, _BinaryLogisticHead and _BinarySvmHead are
    236         supported for classification and _RegressionHead for regression. It
    237         should be a subclass of _SingleHead.
    238       l1_regularization: L1-regularization parameter. Refers to global L1
    239         regularization (across all examples).
    240       l2_regularization: L2-regularization parameter. Refers to global L2
    241         regularization (across all examples).
    242       num_loss_partitions: number of partitions of the (global) loss function
    243         optimized by the underlying optimizer (SDCAOptimizer).
    244       config: `RunConfig` object to configure the runtime settings.
    245       feature_engineering_fn: Feature engineering function. Takes features and
    246         labels which are the output of `input_fn` and returns features and
    247         labels which will be fed into the model.
    248       partitioner: Variable partitioner for the primal weights (`div`
    249         partitioning strategy will be used).
    250 
    251     Returns:
    252       A `_SDCAEstimator` estimator.
    253 
    254     Raises:
    255       ValueError: if head is not supported by SDCA.
    256     """
    257     self._feature_columns = tuple(feature_columns or [])
    258     assert self._feature_columns
    259 
    260     if not _head_is_valid_for_sdca(head):
    261       raise ValueError(
    262           "head type: {} is not supported. Supported head types: "
    263           "_BinaryLogisticHead, _BinarySvmHead and _RegressionHead.".format(
    264               type(head)))
    265     assert head.logits_dimension == 1
    266 
    267     params = {
    268         "head": head,
    269         "feature_columns": feature_columns,
    270         "example_id_column": example_id_column,
    271         "num_loss_partitions": num_loss_partitions,
    272         "l1_regularization": l1_regularization,
    273         "l2_regularization": l2_regularization,
    274         "weight_column_name": weight_column_name,
    275         "update_weights_hook": _SdcaUpdateWeightsHook(),
    276         "partitioner": partitioner,
    277     }
    278 
    279     super(_SDCAEstimator, self).__init__(
    280         model_fn=sdca_model_fn,
    281         model_dir=model_dir,
    282         config=config,
    283         params=params,
    284         feature_engineering_fn=feature_engineering_fn)
    285 
    286 
    287 class SDCALogisticClassifier(_SDCAEstimator):
    288   """Logistic regression binary classifier using the SDCA optimizer.
    289 
    290   Example usage:
    291 
    292   ```python
    293   sparse_column_a = sparse_column_with_hash_bucket(...)
    294   sparse_column_b = sparse_column_with_hash_bucket(...)
    295 
    296   sparse_feature_a_x_sparse_feature_b = crossed_column(...)
    297 
    298   classifier = SDCALogisticClassifier(
    299       example_id_column='example_id',
    300       feature_columns=[sparse_column_a, sparse_feature_a_x_sparse_feature_b]),
    301       weight_column_name=...,
    302       l2_regularization=...,
    303       num_loss_partitions=...,
    304   )
    305 
    306   # Input builders
    307   # returns x, y (where y is the label Tensor (with 0/1 values)
    308   def input_fn_{train, eval}:
    309 
    310   # returns x (features dict)
    311   def input_fn_test:
    312     ...
    313   classifier.fit(input_fn=input_fn_train)
    314   classifier.evaluate(input_fn=input_fn_eval)
    315   # Returns predicted classes.
    316   classifier.predict_classes(input_fn=input_fn_test)
    317   # Returns predicted probabilities.
    318   classifier.predict_proba(input_fn=input_fn_test)
    319   ```
    320 
    321   The input_fn provided to `fit`, `evaluate` and predict_* methods should return
    322   the following features, otherwise there will be a `KeyError`:
    323     * A feature with `key=example_id_column` whose value is a `Tensor` of dtype
    324       string.
    325     * If `weight_column_name` is not `None`, a feature with
    326       `key=weight_column_name` whose value is a `Tensor`.
    327     * For each `column` in `feature_columns`:
    328       - if `column` is a `SparseColumn`, a feature with `key=column.name` whose
    329         `value` is a `SparseTensor`
    330       - if `column` is a `RealValuedColumn, a feature with `key=column.name`
    331         whose `value` is a `Tensor`
    332       - if `column` is a `WeightedSparseColumn`, two features: the first with
    333         `key` the id column name, the second with `key` the weight column name.
    334         Both features' `value` must be a `SparseTensor`
    335   """
    336 
    337   def __init__(self,
    338                example_id_column,
    339                feature_columns,
    340                weight_column_name=None,
    341                model_dir=None,
    342                l1_regularization=0.0,
    343                l2_regularization=1.0,
    344                num_loss_partitions=None,
    345                config=None,
    346                feature_engineering_fn=None,
    347                partitioner=None):
    348     """Construct a `SDCALogisticClassifier` object.
    349 
    350     Args:
    351       example_id_column: A string defining the feature column name representing
    352         example ids. Used to initialize the underlying SDCA optimizer.
    353       feature_columns: An iterable containing all the feature columns used by
    354         the model. All items in the iterable should derive from `FeatureColumn`.
    355         Note that the order of the items is ignored at model construction time.
    356       weight_column_name: A string defining feature column name representing
    357         weights. It is used to downweight or boost examples during training. It
    358         will be multiplied by the loss of the example.
    359       model_dir: Directory to save model parameters, graph etc. This can also be
    360         used to load checkpoints from the directory into an estimator to
    361         continue training a previously saved model.
    362       l1_regularization: L1-regularization parameter. Refers to global L1
    363         regularization (across all examples).
    364       l2_regularization: L2-regularization parameter. Refers to global L2
    365         regularization (across all examples).
    366       num_loss_partitions: Number of partitions of the global loss function
    367         optimized by the underlying optimizer (SDCAOptimizer).
    368       config: `RunConfig` object to configure the runtime settings.
    369       feature_engineering_fn: Feature engineering function. Takes features and
    370         labels which are the output of `input_fn` and returns features and
    371         labels which will be fed into the model.
    372       partitioner: Variable partitioner for the primal weights (`div`
    373         partitioning strategy will be used).
    374 
    375     Returns:
    376       A `SDCALogisiticClassifier` estimator.
    377     """
    378     super(SDCALogisticClassifier, self).__init__(
    379         example_id_column=example_id_column,
    380         feature_columns=feature_columns,
    381         weight_column_name=weight_column_name,
    382         model_dir=model_dir,
    383         head=head_lib.multi_class_head(
    384             n_classes=2, weight_column_name=weight_column_name),
    385         l1_regularization=l1_regularization,
    386         l2_regularization=l2_regularization,
    387         num_loss_partitions=num_loss_partitions,
    388         config=config,
    389         feature_engineering_fn=None,
    390         partitioner=partitioner)
    391 
    392   def predict_classes(self, input_fn=None):
    393     """Runs inference to determine the predicted class.
    394 
    395     Args:
    396       input_fn: The input function providing features.
    397 
    398     Returns:
    399       A generator of predicted classes for the features provided by input_fn.
    400     """
    401     key = prediction_key.PredictionKey.CLASSES
    402     predictions = super(SDCALogisticClassifier, self).predict(
    403         input_fn=input_fn, outputs=[key])
    404     return (pred[key] for pred in predictions)
    405 
    406   def predict_proba(self, input_fn=None):
    407     """Runs inference to determine the class probability predictions.
    408 
    409     Args:
    410       input_fn: The input function providing features.
    411 
    412     Returns:
    413       A generator of predicted class probabilities for the features provided by
    414         input_fn.
    415     """
    416     key = prediction_key.PredictionKey.PROBABILITIES
    417     predictions = super(SDCALogisticClassifier, self).predict(
    418         input_fn=input_fn, outputs=[key])
    419     return (pred[key] for pred in predictions)
    420 
    421 
    422 class SDCALinearRegressor(_SDCAEstimator):
    423   """Linear regression model using SDCA to solve the underlying optimization.
    424 
    425   Example usage:
    426 
    427   ```python
    428   real_column_a = real_valued_column(...)
    429   sparse_column_b = sparse_column_with_hash_bucket(...)
    430 
    431   regressor = SDCALinearRegressor(
    432       example_id_column='example_id',
    433       feature_columns=[real_column_a, sparse_column_b]),
    434       weight_column_name=...,
    435       l2_regularization=...,
    436       num_loss_partitions=...,
    437   )
    438 
    439   # Input builders
    440   # returns x, y (where y is the label Tensor (with 0/1 values)
    441   def input_fn_{train, eval}:
    442 
    443   # returns x (features dict)
    444   def input_fn_test:
    445     ...
    446   regressor.fit(input_fn=input_fn_train)
    447   regressor.evaluate(input_fn=input_fn_eval)
    448   regressor.predict_scores(input_fn=input_fn_test) # returns predicted scores.
    449   ```
    450 
    451   The input_fn provided to `fit`, `evaluate` and predict_* methods should return
    452   the following features, otherwise there will be a `KeyError`:
    453     * A feature with `key=example_id_column` whose value is a `Tensor` of dtype
    454       string.
    455     * If `weight_column_name` is not `None`, a feature with
    456       `key=weight_column_name` whose value is a `Tensor`.
    457     * For each `column` in `feature_columns`:
    458       - if `column` is a `SparseColumn`, a feature with `key=column.name` whose
    459         `value` is a `SparseTensor`
    460       - if `column` is a `RealValuedColumn, a feature with `key=column.name`
    461         whose `value` is a `Tensor`
    462       - if `column` is a `WeightedSparseColumn`, two features: the first with
    463         `key` the id column name, the second with `key` the weight column name.
    464         Both features' `value` must be a `SparseTensor`
    465 
    466   """
    467 
    468   def __init__(self,
    469                example_id_column,
    470                feature_columns,
    471                weight_column_name=None,
    472                model_dir=None,
    473                l1_regularization=0.0,
    474                l2_regularization=1.0,
    475                num_loss_partitions=None,
    476                config=None,
    477                feature_engineering_fn=None,
    478                partitioner=None):
    479     """Construct a `SDCALinearRegressor` estimator object.
    480 
    481 
    482     Args:
    483       example_id_column: A string defining the feature column name representing
    484         example ids. Used to initialize the underlying SDCA optimizer.
    485       feature_columns: An iterable containing all the feature columns used by
    486         the model. All items in the iterable should derive from `FeatureColumn`.
    487         Note that the order of the items is ignored at model construction time.
    488       weight_column_name: A string defining feature column name representing
    489         weights. It is used to down weight or boost examples during training. It
    490         will be multiplied by the loss of the example.
    491       model_dir: Directory to save model parameters, graph etc. This can also be
    492         used to load checkpoints from the directory into an estimator to
    493         continue training a previously saved model.
    494       l1_regularization: L1-regularization parameter. Refers to global L1
    495         regularization (across all examples).
    496       l2_regularization: L2-regularization parameter. Refers to global L2
    497         regularization (across all examples).
    498       num_loss_partitions: number of partitions of the (global) loss function
    499         optimized by the underlying optimizer (SDCAOptimizer).
    500       config: `RunConfig` object to configure the runtime settings.
    501       feature_engineering_fn: Feature engineering function. Takes features and
    502         labels which are the output of `input_fn` and returns features and
    503         labels which will be fed into the model.
    504       partitioner: Variable partitioner for the primal weights (`div`
    505         partitioning strategy will be used).
    506 
    507     Returns:
    508       A `SDCALinearRegressor` estimator.
    509     """
    510     super(SDCALinearRegressor, self).__init__(
    511         example_id_column=example_id_column,
    512         feature_columns=feature_columns,
    513         weight_column_name=weight_column_name,
    514         model_dir=model_dir,
    515         head=head_lib.regression_head(weight_column_name=weight_column_name),
    516         l1_regularization=l1_regularization,
    517         l2_regularization=l2_regularization,
    518         num_loss_partitions=num_loss_partitions,
    519         config=config,
    520         feature_engineering_fn=None,
    521         partitioner=partitioner)
    522 
    523   def predict_scores(self, input_fn):
    524     """Returns predicted scores for given features.
    525 
    526     Args:
    527       input_fn: The input function providing features.
    528 
    529     Returns:
    530       A generator of predicted scores for the features provided by input_fn.
    531     """
    532     key = prediction_key.PredictionKey.SCORES
    533     predictions = super(SDCALinearRegressor, self).predict(
    534         input_fn=input_fn, outputs=[key])
    535     return (pred[key] for pred in predictions)
    536