1 # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 # 3 # Licensed under the Apache License, Version 2.0 (the "License"); 4 # you may not use this file except in compliance with the License. 5 # You may obtain a copy of the License at 6 # 7 # http://www.apache.org/licenses/LICENSE-2.0 8 # 9 # Unless required by applicable law or agreed to in writing, software 10 # distributed under the License is distributed on an "AS IS" BASIS, 11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 # See the License for the specific language governing permissions and 13 # limitations under the License. 14 # ============================================================================== 15 """Linear Estimators.""" 16 17 from __future__ import absolute_import 18 from __future__ import division 19 from __future__ import print_function 20 21 from tensorflow.contrib import layers 22 from tensorflow.python.training import training_util 23 from tensorflow.contrib.learn.python.learn.estimators import estimator 24 from tensorflow.contrib.learn.python.learn.estimators import head as head_lib 25 from tensorflow.contrib.learn.python.learn.estimators import prediction_key 26 from tensorflow.contrib.linear_optimizer.python import sdca_optimizer 27 from tensorflow.python.framework import dtypes 28 from tensorflow.python.framework import sparse_tensor 29 from tensorflow.python.framework import tensor_util 30 from tensorflow.python.ops import array_ops 31 from tensorflow.python.ops import variable_scope 32 from tensorflow.python.training import session_run_hook 33 34 35 def _head_is_valid_for_sdca(head): 36 """Returns true if the provided head is supported by SDCAOptimizer.""" 37 # pylint: disable=protected-access 38 return isinstance(head, head_lib._BinaryLogisticHead) or isinstance( 39 head, head_lib._BinarySvmHead) or isinstance(head, 40 head_lib._RegressionHead) 41 # pylint: enable=protected-access 42 43 44 def _add_bias_column(feature_columns, columns_to_tensors, bias_variable, 45 columns_to_variables): 46 """Adds a fake bias feature column filled with all 1s.""" 47 # TODO(b/31008490): Move definition to a common constants place. 48 bias_column_name = "tf_virtual_bias_column" 49 if any(col.name is bias_column_name for col in feature_columns): 50 raise ValueError("%s is a reserved column name." % bias_column_name) 51 if not feature_columns: 52 raise ValueError("feature_columns can't be empty.") 53 54 # Loop through input tensors until we can figure out batch_size. 55 batch_size = None 56 for column in columns_to_tensors.values(): 57 if isinstance(column, tuple): 58 column = column[0] 59 if isinstance(column, sparse_tensor.SparseTensor): 60 shape = tensor_util.constant_value(column.dense_shape) 61 if shape is not None: 62 batch_size = shape[0] 63 break 64 else: 65 batch_size = array_ops.shape(column)[0] 66 break 67 if batch_size is None: 68 raise ValueError("Could not infer batch size from input features.") 69 70 bias_column = layers.real_valued_column(bias_column_name) 71 columns_to_tensors[bias_column] = array_ops.ones( 72 [batch_size, 1], dtype=dtypes.float32) 73 columns_to_variables[bias_column] = [bias_variable] 74 75 76 def sdca_model_fn(features, labels, mode, params, config=None): 77 """A model_fn for linear models that use the SDCA optimizer. 78 79 Args: 80 features: A dict of `Tensor` keyed by column name. 81 labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of 82 dtype `int32` or `int64` with values in the set {0, 1}. 83 mode: Defines whether this is training, evaluation or prediction. 84 See `ModeKeys`. 85 params: A dict of hyperparameters. 86 The following hyperparameters are expected: 87 * head: A `Head` instance. Type must be one of `_BinarySvmHead`, 88 `_RegressionHead` or `_BinaryLogisticHead`. 89 * feature_columns: An iterable containing all the feature columns used by 90 the model. 91 * l1_regularization: Global (across all examples) L1-regularization 92 parameter. 93 * l2_regularization: Global (across all examples) L2-regularization 94 parameter. 95 * num_loss_partitions: Number of partitions of the global loss function 96 optimized by `SDCAOptimizer`. 97 * weight_column_name: A string defining the weight feature column, or 98 None if there are no weights. 99 * update_weights_hook: A `SessionRunHook` object or None. Used to update 100 model weights. 101 config: `RunConfig` object to configure the runtime settings. 102 103 Returns: 104 A `ModelFnOps` instance. 105 106 Raises: 107 ValueError: If the type of head is not one of `_BinarySvmHead`, 108 `_RegressionHead` or `_MultiClassHead`. 109 ValueError: If mode is not any of the `ModeKeys`. 110 """ 111 head = params["head"] 112 feature_columns = params["feature_columns"] 113 example_id_column = params["example_id_column"] 114 l1_regularization = params["l1_regularization"] 115 l2_regularization = params["l2_regularization"] 116 num_loss_partitions = params["num_loss_partitions"] 117 weight_column_name = params["weight_column_name"] 118 update_weights_hook = params.get("update_weights_hook", None) 119 partitioner = params["partitioner"] 120 121 loss_type = None 122 if isinstance(head, head_lib._BinarySvmHead): # pylint: disable=protected-access 123 loss_type = "hinge_loss" 124 elif isinstance(head, head_lib._BinaryLogisticHead): # pylint: disable=protected-access 125 loss_type = "logistic_loss" 126 elif isinstance(head, head_lib._RegressionHead): # pylint: disable=protected-access 127 loss_type = "squared_loss" 128 else: 129 raise ValueError("Unsupported head type: {}".format(type(head))) 130 131 assert head.logits_dimension == 1, ( 132 "SDCA only applies to logits_dimension=1.") 133 134 # Update num_loss_partitions based on number of workers. 135 n_loss_partitions = num_loss_partitions or max(1, config.num_worker_replicas) 136 optimizer = sdca_optimizer.SDCAOptimizer( 137 example_id_column=example_id_column, 138 num_loss_partitions=n_loss_partitions, 139 symmetric_l1_regularization=l1_regularization, 140 symmetric_l2_regularization=l2_regularization, 141 partitioner=partitioner) 142 143 parent_scope = "linear" 144 145 with variable_scope.variable_scope( 146 values=features.values(), name_or_scope=parent_scope, 147 partitioner=partitioner) as scope: 148 features = features.copy() 149 features.update(layers.transform_features(features, feature_columns)) 150 logits, columns_to_variables, bias = ( 151 layers.weighted_sum_from_feature_columns( 152 columns_to_tensors=features, 153 feature_columns=feature_columns, 154 num_outputs=1, 155 scope=scope)) 156 157 _add_bias_column(feature_columns, features, bias, columns_to_variables) 158 159 def _train_op_fn(unused_loss): 160 global_step = training_util.get_global_step() 161 sdca_model, train_op = optimizer.get_train_step( 162 columns_to_variables, weight_column_name, loss_type, features, labels, 163 global_step) 164 if update_weights_hook is not None: 165 update_weights_hook.set_parameters(sdca_model, train_op) 166 return train_op 167 168 model_fn_ops = head.create_model_fn_ops( 169 features=features, 170 labels=labels, 171 mode=mode, 172 train_op_fn=_train_op_fn, 173 logits=logits) 174 if update_weights_hook is not None: 175 return model_fn_ops._replace(training_chief_hooks=( 176 model_fn_ops.training_chief_hooks + [update_weights_hook])) 177 return model_fn_ops 178 179 180 class _SdcaUpdateWeightsHook(session_run_hook.SessionRunHook): 181 """SessionRunHook to update and shrink SDCA model weights.""" 182 183 def __init__(self): 184 pass 185 186 def set_parameters(self, sdca_model, train_op): 187 self._sdca_model = sdca_model 188 self._train_op = train_op 189 190 def begin(self): 191 """Construct the update_weights op. 192 193 The op is implicitly added to the default graph. 194 """ 195 self._update_op = self._sdca_model.update_weights(self._train_op) 196 197 def before_run(self, run_context): 198 """Return the update_weights op so that it is executed during this run.""" 199 return session_run_hook.SessionRunArgs(self._update_op) 200 201 202 class _SDCAEstimator(estimator.Estimator): 203 """Base estimator class for linear models using the SDCA optimizer. 204 205 This class should not be used directly. Rather, users should call one of the 206 derived estimators. 207 """ 208 209 def __init__(self, 210 example_id_column, 211 feature_columns, 212 weight_column_name=None, 213 model_dir=None, 214 head=None, 215 l1_regularization=0.0, 216 l2_regularization=1.0, 217 num_loss_partitions=None, 218 config=None, 219 feature_engineering_fn=None, 220 partitioner=None): 221 """Construct a `_SDCAEstimator` estimator object. 222 223 Args: 224 example_id_column: A string defining the feature column name representing 225 example ids. Used to initialize the underlying SDCA optimizer. 226 feature_columns: An iterable containing all the feature columns used by 227 the model. All items in the set should be instances of classes derived 228 from `FeatureColumn`. 229 weight_column_name: A string defining feature column name representing 230 weights. It is used to down weight or boost examples during training. It 231 will be multiplied by the loss of the example. 232 model_dir: Directory to save model parameters, graph etc. This can also be 233 used to load checkpoints from the directory into an estimator to 234 continue training a previously saved model. 235 head: type of head. Currently, _BinaryLogisticHead and _BinarySvmHead are 236 supported for classification and _RegressionHead for regression. It 237 should be a subclass of _SingleHead. 238 l1_regularization: L1-regularization parameter. Refers to global L1 239 regularization (across all examples). 240 l2_regularization: L2-regularization parameter. Refers to global L2 241 regularization (across all examples). 242 num_loss_partitions: number of partitions of the (global) loss function 243 optimized by the underlying optimizer (SDCAOptimizer). 244 config: `RunConfig` object to configure the runtime settings. 245 feature_engineering_fn: Feature engineering function. Takes features and 246 labels which are the output of `input_fn` and returns features and 247 labels which will be fed into the model. 248 partitioner: Variable partitioner for the primal weights (`div` 249 partitioning strategy will be used). 250 251 Returns: 252 A `_SDCAEstimator` estimator. 253 254 Raises: 255 ValueError: if head is not supported by SDCA. 256 """ 257 self._feature_columns = tuple(feature_columns or []) 258 assert self._feature_columns 259 260 if not _head_is_valid_for_sdca(head): 261 raise ValueError( 262 "head type: {} is not supported. Supported head types: " 263 "_BinaryLogisticHead, _BinarySvmHead and _RegressionHead.".format( 264 type(head))) 265 assert head.logits_dimension == 1 266 267 params = { 268 "head": head, 269 "feature_columns": feature_columns, 270 "example_id_column": example_id_column, 271 "num_loss_partitions": num_loss_partitions, 272 "l1_regularization": l1_regularization, 273 "l2_regularization": l2_regularization, 274 "weight_column_name": weight_column_name, 275 "update_weights_hook": _SdcaUpdateWeightsHook(), 276 "partitioner": partitioner, 277 } 278 279 super(_SDCAEstimator, self).__init__( 280 model_fn=sdca_model_fn, 281 model_dir=model_dir, 282 config=config, 283 params=params, 284 feature_engineering_fn=feature_engineering_fn) 285 286 287 class SDCALogisticClassifier(_SDCAEstimator): 288 """Logistic regression binary classifier using the SDCA optimizer. 289 290 Example usage: 291 292 ```python 293 sparse_column_a = sparse_column_with_hash_bucket(...) 294 sparse_column_b = sparse_column_with_hash_bucket(...) 295 296 sparse_feature_a_x_sparse_feature_b = crossed_column(...) 297 298 classifier = SDCALogisticClassifier( 299 example_id_column='example_id', 300 feature_columns=[sparse_column_a, sparse_feature_a_x_sparse_feature_b]), 301 weight_column_name=..., 302 l2_regularization=..., 303 num_loss_partitions=..., 304 ) 305 306 # Input builders 307 # returns x, y (where y is the label Tensor (with 0/1 values) 308 def input_fn_{train, eval}: 309 310 # returns x (features dict) 311 def input_fn_test: 312 ... 313 classifier.fit(input_fn=input_fn_train) 314 classifier.evaluate(input_fn=input_fn_eval) 315 # Returns predicted classes. 316 classifier.predict_classes(input_fn=input_fn_test) 317 # Returns predicted probabilities. 318 classifier.predict_proba(input_fn=input_fn_test) 319 ``` 320 321 The input_fn provided to `fit`, `evaluate` and predict_* methods should return 322 the following features, otherwise there will be a `KeyError`: 323 * A feature with `key=example_id_column` whose value is a `Tensor` of dtype 324 string. 325 * If `weight_column_name` is not `None`, a feature with 326 `key=weight_column_name` whose value is a `Tensor`. 327 * For each `column` in `feature_columns`: 328 - if `column` is a `SparseColumn`, a feature with `key=column.name` whose 329 `value` is a `SparseTensor` 330 - if `column` is a `RealValuedColumn, a feature with `key=column.name` 331 whose `value` is a `Tensor` 332 - if `column` is a `WeightedSparseColumn`, two features: the first with 333 `key` the id column name, the second with `key` the weight column name. 334 Both features' `value` must be a `SparseTensor` 335 """ 336 337 def __init__(self, 338 example_id_column, 339 feature_columns, 340 weight_column_name=None, 341 model_dir=None, 342 l1_regularization=0.0, 343 l2_regularization=1.0, 344 num_loss_partitions=None, 345 config=None, 346 feature_engineering_fn=None, 347 partitioner=None): 348 """Construct a `SDCALogisticClassifier` object. 349 350 Args: 351 example_id_column: A string defining the feature column name representing 352 example ids. Used to initialize the underlying SDCA optimizer. 353 feature_columns: An iterable containing all the feature columns used by 354 the model. All items in the iterable should derive from `FeatureColumn`. 355 Note that the order of the items is ignored at model construction time. 356 weight_column_name: A string defining feature column name representing 357 weights. It is used to downweight or boost examples during training. It 358 will be multiplied by the loss of the example. 359 model_dir: Directory to save model parameters, graph etc. This can also be 360 used to load checkpoints from the directory into an estimator to 361 continue training a previously saved model. 362 l1_regularization: L1-regularization parameter. Refers to global L1 363 regularization (across all examples). 364 l2_regularization: L2-regularization parameter. Refers to global L2 365 regularization (across all examples). 366 num_loss_partitions: Number of partitions of the global loss function 367 optimized by the underlying optimizer (SDCAOptimizer). 368 config: `RunConfig` object to configure the runtime settings. 369 feature_engineering_fn: Feature engineering function. Takes features and 370 labels which are the output of `input_fn` and returns features and 371 labels which will be fed into the model. 372 partitioner: Variable partitioner for the primal weights (`div` 373 partitioning strategy will be used). 374 375 Returns: 376 A `SDCALogisiticClassifier` estimator. 377 """ 378 super(SDCALogisticClassifier, self).__init__( 379 example_id_column=example_id_column, 380 feature_columns=feature_columns, 381 weight_column_name=weight_column_name, 382 model_dir=model_dir, 383 head=head_lib.multi_class_head( 384 n_classes=2, weight_column_name=weight_column_name), 385 l1_regularization=l1_regularization, 386 l2_regularization=l2_regularization, 387 num_loss_partitions=num_loss_partitions, 388 config=config, 389 feature_engineering_fn=None, 390 partitioner=partitioner) 391 392 def predict_classes(self, input_fn=None): 393 """Runs inference to determine the predicted class. 394 395 Args: 396 input_fn: The input function providing features. 397 398 Returns: 399 A generator of predicted classes for the features provided by input_fn. 400 """ 401 key = prediction_key.PredictionKey.CLASSES 402 predictions = super(SDCALogisticClassifier, self).predict( 403 input_fn=input_fn, outputs=[key]) 404 return (pred[key] for pred in predictions) 405 406 def predict_proba(self, input_fn=None): 407 """Runs inference to determine the class probability predictions. 408 409 Args: 410 input_fn: The input function providing features. 411 412 Returns: 413 A generator of predicted class probabilities for the features provided by 414 input_fn. 415 """ 416 key = prediction_key.PredictionKey.PROBABILITIES 417 predictions = super(SDCALogisticClassifier, self).predict( 418 input_fn=input_fn, outputs=[key]) 419 return (pred[key] for pred in predictions) 420 421 422 class SDCALinearRegressor(_SDCAEstimator): 423 """Linear regression model using SDCA to solve the underlying optimization. 424 425 Example usage: 426 427 ```python 428 real_column_a = real_valued_column(...) 429 sparse_column_b = sparse_column_with_hash_bucket(...) 430 431 regressor = SDCALinearRegressor( 432 example_id_column='example_id', 433 feature_columns=[real_column_a, sparse_column_b]), 434 weight_column_name=..., 435 l2_regularization=..., 436 num_loss_partitions=..., 437 ) 438 439 # Input builders 440 # returns x, y (where y is the label Tensor (with 0/1 values) 441 def input_fn_{train, eval}: 442 443 # returns x (features dict) 444 def input_fn_test: 445 ... 446 regressor.fit(input_fn=input_fn_train) 447 regressor.evaluate(input_fn=input_fn_eval) 448 regressor.predict_scores(input_fn=input_fn_test) # returns predicted scores. 449 ``` 450 451 The input_fn provided to `fit`, `evaluate` and predict_* methods should return 452 the following features, otherwise there will be a `KeyError`: 453 * A feature with `key=example_id_column` whose value is a `Tensor` of dtype 454 string. 455 * If `weight_column_name` is not `None`, a feature with 456 `key=weight_column_name` whose value is a `Tensor`. 457 * For each `column` in `feature_columns`: 458 - if `column` is a `SparseColumn`, a feature with `key=column.name` whose 459 `value` is a `SparseTensor` 460 - if `column` is a `RealValuedColumn, a feature with `key=column.name` 461 whose `value` is a `Tensor` 462 - if `column` is a `WeightedSparseColumn`, two features: the first with 463 `key` the id column name, the second with `key` the weight column name. 464 Both features' `value` must be a `SparseTensor` 465 466 """ 467 468 def __init__(self, 469 example_id_column, 470 feature_columns, 471 weight_column_name=None, 472 model_dir=None, 473 l1_regularization=0.0, 474 l2_regularization=1.0, 475 num_loss_partitions=None, 476 config=None, 477 feature_engineering_fn=None, 478 partitioner=None): 479 """Construct a `SDCALinearRegressor` estimator object. 480 481 482 Args: 483 example_id_column: A string defining the feature column name representing 484 example ids. Used to initialize the underlying SDCA optimizer. 485 feature_columns: An iterable containing all the feature columns used by 486 the model. All items in the iterable should derive from `FeatureColumn`. 487 Note that the order of the items is ignored at model construction time. 488 weight_column_name: A string defining feature column name representing 489 weights. It is used to down weight or boost examples during training. It 490 will be multiplied by the loss of the example. 491 model_dir: Directory to save model parameters, graph etc. This can also be 492 used to load checkpoints from the directory into an estimator to 493 continue training a previously saved model. 494 l1_regularization: L1-regularization parameter. Refers to global L1 495 regularization (across all examples). 496 l2_regularization: L2-regularization parameter. Refers to global L2 497 regularization (across all examples). 498 num_loss_partitions: number of partitions of the (global) loss function 499 optimized by the underlying optimizer (SDCAOptimizer). 500 config: `RunConfig` object to configure the runtime settings. 501 feature_engineering_fn: Feature engineering function. Takes features and 502 labels which are the output of `input_fn` and returns features and 503 labels which will be fed into the model. 504 partitioner: Variable partitioner for the primal weights (`div` 505 partitioning strategy will be used). 506 507 Returns: 508 A `SDCALinearRegressor` estimator. 509 """ 510 super(SDCALinearRegressor, self).__init__( 511 example_id_column=example_id_column, 512 feature_columns=feature_columns, 513 weight_column_name=weight_column_name, 514 model_dir=model_dir, 515 head=head_lib.regression_head(weight_column_name=weight_column_name), 516 l1_regularization=l1_regularization, 517 l2_regularization=l2_regularization, 518 num_loss_partitions=num_loss_partitions, 519 config=config, 520 feature_engineering_fn=None, 521 partitioner=partitioner) 522 523 def predict_scores(self, input_fn): 524 """Returns predicted scores for given features. 525 526 Args: 527 input_fn: The input function providing features. 528 529 Returns: 530 A generator of predicted scores for the features provided by input_fn. 531 """ 532 key = prediction_key.PredictionKey.SCORES 533 predictions = super(SDCALinearRegressor, self).predict( 534 input_fn=input_fn, outputs=[key]) 535 return (pred[key] for pred in predictions) 536