1 # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 # 3 # Licensed under the Apache License, Version 2.0 (the "License"); 4 # you may not use this file except in compliance with the License. 5 # You may obtain a copy of the License at 6 # 7 # http://www.apache.org/licenses/LICENSE-2.0 8 # 9 # Unless required by applicable law or agreed to in writing, software 10 # distributed under the License is distributed on an "AS IS" BASIS, 11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 # See the License for the specific language governing permissions and 13 # limitations under the License. 14 # ============================================================================== 15 """Tests for DNNLinearCombinedEstimators.""" 16 17 from __future__ import absolute_import 18 from __future__ import division 19 from __future__ import print_function 20 21 import functools 22 import json 23 import tempfile 24 25 import numpy as np 26 27 from tensorflow.contrib.layers.python.layers import feature_column 28 from tensorflow.contrib.learn.python.learn import experiment 29 from tensorflow.contrib.learn.python.learn.datasets import base 30 from tensorflow.contrib.learn.python.learn.estimators import _sklearn 31 from tensorflow.contrib.learn.python.learn.estimators import dnn_linear_combined 32 from tensorflow.contrib.learn.python.learn.estimators import estimator_test_utils 33 from tensorflow.contrib.learn.python.learn.estimators import head as head_lib 34 from tensorflow.contrib.learn.python.learn.estimators import model_fn 35 from tensorflow.contrib.learn.python.learn.estimators import run_config 36 from tensorflow.contrib.learn.python.learn.estimators import test_data 37 from tensorflow.contrib.learn.python.learn.metric_spec import MetricSpec 38 from tensorflow.contrib.metrics.python.ops import metric_ops 39 from tensorflow.python.feature_column import feature_column as fc_core 40 from tensorflow.python.framework import constant_op 41 from tensorflow.python.framework import dtypes 42 from tensorflow.python.framework import ops 43 from tensorflow.python.framework import sparse_tensor 44 from tensorflow.python.ops import array_ops 45 from tensorflow.python.ops import init_ops 46 from tensorflow.python.ops import math_ops 47 from tensorflow.python.ops.losses import losses 48 from tensorflow.python.platform import test 49 from tensorflow.python.training import adagrad 50 from tensorflow.python.training import ftrl 51 from tensorflow.python.training import input as input_lib 52 from tensorflow.python.training import learning_rate_decay 53 from tensorflow.python.training import monitored_session 54 from tensorflow.python.training import server_lib 55 from tensorflow.python.training import session_run_hook 56 from tensorflow.python.training import sync_replicas_optimizer 57 from tensorflow.python.training import training_util 58 59 60 def _assert_metrics_in_range(keys, metrics): 61 epsilon = 0.00001 # Added for floating point edge cases. 62 for key in keys: 63 estimator_test_utils.assert_in_range(0.0 - epsilon, 1.0 + epsilon, key, 64 metrics) 65 66 67 class _CheckCallsHead(head_lib.Head): 68 """Head that checks whether head_ops is called.""" 69 70 def __init__(self): 71 self._head_ops_called_times = 0 72 73 @property 74 def logits_dimension(self): 75 return 1 76 77 def create_model_fn_ops( 78 self, mode, features, labels=None, train_op_fn=None, logits=None, 79 logits_input=None, scope=None): 80 """See `_Head`.""" 81 self._head_ops_called_times += 1 82 loss = losses.mean_squared_error(labels, logits) 83 return model_fn.ModelFnOps( 84 mode, 85 predictions={'loss': loss}, 86 loss=loss, 87 train_op=train_op_fn(loss), 88 eval_metric_ops={'loss': loss}) 89 90 @property 91 def head_ops_called_times(self): 92 return self._head_ops_called_times 93 94 95 class _StepCounterHook(session_run_hook.SessionRunHook): 96 """Counts the number of training steps.""" 97 98 def __init__(self): 99 self._steps = 0 100 101 def after_run(self, run_context, run_values): 102 del run_context, run_values 103 self._steps += 1 104 105 @property 106 def steps(self): 107 return self._steps 108 109 110 class EmbeddingMultiplierTest(test.TestCase): 111 """dnn_model_fn tests.""" 112 113 def testRaisesNonEmbeddingColumn(self): 114 one_hot_language = feature_column.one_hot_column( 115 feature_column.sparse_column_with_hash_bucket('language', 10)) 116 117 params = { 118 'dnn_feature_columns': [one_hot_language], 119 'head': head_lib.multi_class_head(2), 120 'dnn_hidden_units': [1], 121 # Set lr mult to 0. to keep embeddings constant. 122 'embedding_lr_multipliers': { 123 one_hot_language: 0.0 124 }, 125 'dnn_optimizer': 'Adagrad', 126 } 127 features = { 128 'language': 129 sparse_tensor.SparseTensor( 130 values=['en', 'fr', 'zh'], 131 indices=[[0, 0], [1, 0], [2, 0]], 132 dense_shape=[3, 1]), 133 } 134 labels = constant_op.constant([[0], [0], [0]], dtype=dtypes.int32) 135 with self.assertRaisesRegexp(ValueError, 136 'can only be defined for embedding columns'): 137 dnn_linear_combined._dnn_linear_combined_model_fn(features, labels, 138 model_fn.ModeKeys.TRAIN, 139 params) 140 141 def testMultipliesGradient(self): 142 embedding_language = feature_column.embedding_column( 143 feature_column.sparse_column_with_hash_bucket('language', 10), 144 dimension=1, 145 initializer=init_ops.constant_initializer(0.1)) 146 embedding_wire = feature_column.embedding_column( 147 feature_column.sparse_column_with_hash_bucket('wire', 10), 148 dimension=1, 149 initializer=init_ops.constant_initializer(0.1)) 150 151 params = { 152 'dnn_feature_columns': [embedding_language, embedding_wire], 153 'head': head_lib.multi_class_head(2), 154 'dnn_hidden_units': [1], 155 # Set lr mult to 0. to keep language embeddings constant, whereas wire 156 # embeddings will be trained. 157 'embedding_lr_multipliers': { 158 embedding_language: 0.0 159 }, 160 'dnn_optimizer': 'Adagrad', 161 } 162 with ops.Graph().as_default(): 163 features = { 164 'language': 165 sparse_tensor.SparseTensor( 166 values=['en', 'fr', 'zh'], 167 indices=[[0, 0], [1, 0], [2, 0]], 168 dense_shape=[3, 1]), 169 'wire': 170 sparse_tensor.SparseTensor( 171 values=['omar', 'stringer', 'marlo'], 172 indices=[[0, 0], [1, 0], [2, 0]], 173 dense_shape=[3, 1]), 174 } 175 labels = constant_op.constant([[1], [0], [0]], dtype=dtypes.int32) 176 training_util.create_global_step() 177 model_ops = dnn_linear_combined._dnn_linear_combined_model_fn( 178 features, labels, model_fn.ModeKeys.TRAIN, params) 179 with monitored_session.MonitoredSession() as sess: 180 language_var = dnn_linear_combined._get_embedding_variable( 181 embedding_language, 'dnn', 'dnn/input_from_feature_columns') 182 language_initial_value = sess.run(language_var) 183 for _ in range(2): 184 _, language_value = sess.run([model_ops.train_op, language_var]) 185 186 self.assertAllClose(language_value, language_initial_value) 187 # We could also test that wire_value changed, but that test would be flaky. 188 189 190 class DNNLinearCombinedEstimatorTest(test.TestCase): 191 192 def testEstimatorContract(self): 193 estimator_test_utils.assert_estimator_contract( 194 self, dnn_linear_combined.DNNLinearCombinedEstimator) 195 196 def testNoFeatureColumns(self): 197 with self.assertRaisesRegexp( 198 ValueError, 199 'Either linear_feature_columns or dnn_feature_columns must be defined'): 200 dnn_linear_combined.DNNLinearCombinedEstimator( 201 head=_CheckCallsHead(), 202 linear_feature_columns=None, 203 dnn_feature_columns=None, 204 dnn_hidden_units=[3, 3]) 205 206 def testCheckCallsHead(self): 207 """Tests binary classification using matrix data as input.""" 208 head = _CheckCallsHead() 209 iris = test_data.prepare_iris_data_for_logistic_regression() 210 cont_features = [ 211 feature_column.real_valued_column('feature', dimension=4)] 212 bucketized_feature = [feature_column.bucketized_column( 213 cont_features[0], test_data.get_quantile_based_buckets(iris.data, 10))] 214 215 estimator = dnn_linear_combined.DNNLinearCombinedEstimator( 216 head, 217 linear_feature_columns=bucketized_feature, 218 dnn_feature_columns=cont_features, 219 dnn_hidden_units=[3, 3]) 220 221 estimator.fit(input_fn=test_data.iris_input_multiclass_fn, steps=10) 222 self.assertEqual(1, head.head_ops_called_times) 223 224 estimator.evaluate(input_fn=test_data.iris_input_multiclass_fn, steps=10) 225 self.assertEqual(2, head.head_ops_called_times) 226 227 estimator.predict(input_fn=test_data.iris_input_multiclass_fn) 228 self.assertEqual(3, head.head_ops_called_times) 229 230 231 class DNNLinearCombinedClassifierTest(test.TestCase): 232 233 def testEstimatorContract(self): 234 estimator_test_utils.assert_estimator_contract( 235 self, dnn_linear_combined.DNNLinearCombinedClassifier) 236 237 def testExperimentIntegration(self): 238 cont_features = [feature_column.real_valued_column('feature', dimension=4)] 239 240 exp = experiment.Experiment( 241 estimator=dnn_linear_combined.DNNLinearCombinedClassifier( 242 linear_feature_columns=cont_features, 243 dnn_feature_columns=cont_features, 244 dnn_hidden_units=[3, 3]), 245 train_input_fn=test_data.iris_input_logistic_fn, 246 eval_input_fn=test_data.iris_input_logistic_fn) 247 exp.test() 248 249 def testNoFeatureColumns(self): 250 with self.assertRaisesRegexp( 251 ValueError, 252 'Either linear_feature_columns or dnn_feature_columns must be defined'): 253 dnn_linear_combined.DNNLinearCombinedClassifier( 254 linear_feature_columns=None, 255 dnn_feature_columns=None, 256 dnn_hidden_units=[3, 3]) 257 258 def testNoDnnHiddenUnits(self): 259 def _input_fn(): 260 return { 261 'age': 262 constant_op.constant([1]), 263 'language': 264 sparse_tensor.SparseTensor( 265 values=['english'], indices=[[0, 0]], dense_shape=[1, 1]) 266 }, constant_op.constant([[1]]) 267 268 language = feature_column.sparse_column_with_hash_bucket('language', 100) 269 age = feature_column.real_valued_column('age') 270 271 with self.assertRaisesRegexp( 272 ValueError, 273 'dnn_hidden_units must be defined when dnn_feature_columns is ' 274 'specified'): 275 classifier = dnn_linear_combined.DNNLinearCombinedClassifier( 276 dnn_feature_columns=[age, language]) 277 classifier.fit(input_fn=_input_fn, steps=2) 278 279 def testSyncReplicasOptimizerUnsupported(self): 280 cont_features = [feature_column.real_valued_column('feature', dimension=4)] 281 282 sync_optimizer = sync_replicas_optimizer.SyncReplicasOptimizer( 283 opt=adagrad.AdagradOptimizer(learning_rate=0.1), 284 replicas_to_aggregate=1, 285 total_num_replicas=1) 286 sync_hook = sync_optimizer.make_session_run_hook(is_chief=True) 287 classifier = dnn_linear_combined.DNNLinearCombinedClassifier( 288 n_classes=3, 289 dnn_feature_columns=cont_features, 290 dnn_hidden_units=[3, 3], 291 dnn_optimizer=sync_optimizer) 292 293 with self.assertRaisesRegexp( 294 ValueError, 295 'SyncReplicasOptimizer is not supported in DNNLinearCombined model'): 296 classifier.fit( 297 input_fn=test_data.iris_input_multiclass_fn, steps=100, 298 monitors=[sync_hook]) 299 300 def testEmbeddingMultiplier(self): 301 embedding_language = feature_column.embedding_column( 302 feature_column.sparse_column_with_hash_bucket('language', 10), 303 dimension=1, 304 initializer=init_ops.constant_initializer(0.1)) 305 classifier = dnn_linear_combined.DNNLinearCombinedClassifier( 306 dnn_feature_columns=[embedding_language], 307 dnn_hidden_units=[3, 3], 308 embedding_lr_multipliers={embedding_language: 0.8}) 309 self.assertEqual({ 310 embedding_language: 0.8 311 }, classifier.params['embedding_lr_multipliers']) 312 313 def testInputPartitionSize(self): 314 def _input_fn_float_label(num_epochs=None): 315 features = { 316 'language': 317 sparse_tensor.SparseTensor( 318 values=input_lib.limit_epochs( 319 ['en', 'fr', 'zh'], num_epochs=num_epochs), 320 indices=[[0, 0], [0, 1], [2, 0]], 321 dense_shape=[3, 2]) 322 } 323 labels = constant_op.constant([[0.8], [0.], [0.2]], dtype=dtypes.float32) 324 return features, labels 325 326 language_column = feature_column.sparse_column_with_hash_bucket( 327 'language', hash_bucket_size=20) 328 feature_columns = [ 329 feature_column.embedding_column(language_column, dimension=1), 330 ] 331 332 # Set num_ps_replica to be 10 and the min slice size to be extremely small, 333 # so as to ensure that there'll be 10 partititions produced. 334 config = run_config.RunConfig(tf_random_seed=1) 335 config._num_ps_replicas = 10 336 classifier = dnn_linear_combined.DNNLinearCombinedClassifier( 337 n_classes=2, 338 dnn_feature_columns=feature_columns, 339 dnn_hidden_units=[3, 3], 340 dnn_optimizer='Adagrad', 341 config=config, 342 input_layer_min_slice_size=1) 343 344 # Ensure the param is passed in. 345 self.assertTrue(callable(classifier.params['input_layer_partitioner'])) 346 347 # Ensure the partition count is 10. 348 classifier.fit(input_fn=_input_fn_float_label, steps=50) 349 partition_count = 0 350 for name in classifier.get_variable_names(): 351 if 'language_embedding' in name and 'Adagrad' in name: 352 partition_count += 1 353 self.assertEqual(10, partition_count) 354 355 def testLogisticRegression_MatrixData(self): 356 """Tests binary classification using matrix data as input.""" 357 iris = test_data.prepare_iris_data_for_logistic_regression() 358 cont_features = [feature_column.real_valued_column('feature', dimension=4)] 359 bucketized_feature = [ 360 feature_column.bucketized_column( 361 cont_features[0], 362 test_data.get_quantile_based_buckets(iris.data, 10)) 363 ] 364 365 classifier = dnn_linear_combined.DNNLinearCombinedClassifier( 366 linear_feature_columns=bucketized_feature, 367 dnn_feature_columns=cont_features, 368 dnn_hidden_units=[3, 3]) 369 370 classifier.fit(input_fn=test_data.iris_input_logistic_fn, steps=100) 371 scores = classifier.evaluate( 372 input_fn=test_data.iris_input_logistic_fn, steps=100) 373 _assert_metrics_in_range(('accuracy', 'auc'), scores) 374 375 def testLogisticRegression_TensorData(self): 376 """Tests binary classification using Tensor data as input.""" 377 378 def _input_fn(): 379 iris = test_data.prepare_iris_data_for_logistic_regression() 380 features = {} 381 for i in range(4): 382 # The following shows how to provide the Tensor data for 383 # RealValuedColumns. 384 features.update({ 385 str(i): 386 array_ops.reshape( 387 constant_op.constant( 388 iris.data[:, i], dtype=dtypes.float32), [-1, 1]) 389 }) 390 # The following shows how to provide the SparseTensor data for 391 # a SparseColumn. 392 features['dummy_sparse_column'] = sparse_tensor.SparseTensor( 393 values=['en', 'fr', 'zh'], 394 indices=[[0, 0], [0, 1], [60, 0]], 395 dense_shape=[len(iris.target), 2]) 396 labels = array_ops.reshape( 397 constant_op.constant( 398 iris.target, dtype=dtypes.int32), [-1, 1]) 399 return features, labels 400 401 iris = test_data.prepare_iris_data_for_logistic_regression() 402 cont_features = [ 403 feature_column.real_valued_column(str(i)) for i in range(4) 404 ] 405 linear_features = [ 406 feature_column.bucketized_column(cont_features[i], 407 test_data.get_quantile_based_buckets( 408 iris.data[:, i], 10)) 409 for i in range(4) 410 ] 411 linear_features.append( 412 feature_column.sparse_column_with_hash_bucket( 413 'dummy_sparse_column', hash_bucket_size=100)) 414 415 classifier = dnn_linear_combined.DNNLinearCombinedClassifier( 416 linear_feature_columns=linear_features, 417 dnn_feature_columns=cont_features, 418 dnn_hidden_units=[3, 3]) 419 420 classifier.fit(input_fn=_input_fn, steps=100) 421 scores = classifier.evaluate(input_fn=_input_fn, steps=100) 422 _assert_metrics_in_range(('accuracy', 'auc'), scores) 423 424 def testEstimatorWithCoreFeatureColumns(self): 425 """Tests binary classification using Tensor data as input.""" 426 427 def _input_fn(): 428 iris = test_data.prepare_iris_data_for_logistic_regression() 429 features = {} 430 for i in range(4): 431 # The following shows how to provide the Tensor data for 432 # RealValuedColumns. 433 features.update({ 434 str(i): 435 array_ops.reshape( 436 constant_op.constant(iris.data[:, i], dtype=dtypes.float32), 437 [-1, 1]) 438 }) 439 # The following shows how to provide the SparseTensor data for 440 # a SparseColumn. 441 features['dummy_sparse_column'] = sparse_tensor.SparseTensor( 442 values=['en', 'fr', 'zh'], 443 indices=[[0, 0], [0, 1], [60, 0]], 444 dense_shape=[len(iris.target), 2]) 445 labels = array_ops.reshape( 446 constant_op.constant(iris.target, dtype=dtypes.int32), [-1, 1]) 447 return features, labels 448 449 iris = test_data.prepare_iris_data_for_logistic_regression() 450 cont_features = [fc_core.numeric_column(str(i)) for i in range(4)] 451 linear_features = [ 452 fc_core.bucketized_column( 453 cont_features[i], 454 sorted(set(test_data.get_quantile_based_buckets( 455 iris.data[:, i], 10)))) for i in range(4) 456 ] 457 linear_features.append( 458 fc_core.categorical_column_with_hash_bucket( 459 'dummy_sparse_column', hash_bucket_size=100)) 460 461 classifier = dnn_linear_combined.DNNLinearCombinedClassifier( 462 linear_feature_columns=linear_features, 463 dnn_feature_columns=cont_features, 464 dnn_hidden_units=[3, 3]) 465 466 classifier.fit(input_fn=_input_fn, steps=100) 467 scores = classifier.evaluate(input_fn=_input_fn, steps=100) 468 _assert_metrics_in_range(('accuracy', 'auc'), scores) 469 470 def testTrainWithPartitionedVariables(self): 471 """Tests training with partitioned variables.""" 472 473 def _input_fn(): 474 features = { 475 'language': 476 sparse_tensor.SparseTensor( 477 values=['en', 'fr', 'zh'], 478 indices=[[0, 0], [0, 1], [2, 0]], 479 dense_shape=[3, 2]) 480 } 481 labels = constant_op.constant([[1], [0], [0]]) 482 return features, labels 483 484 sparse_features = [ 485 # The given hash_bucket_size results in variables larger than the 486 # default min_slice_size attribute, so the variables are partitioned. 487 feature_column.sparse_column_with_hash_bucket( 488 'language', hash_bucket_size=2e7) 489 ] 490 embedding_features = [ 491 feature_column.embedding_column( 492 sparse_features[0], dimension=1) 493 ] 494 495 tf_config = { 496 'cluster': { 497 run_config.TaskType.PS: ['fake_ps_0', 'fake_ps_1'] 498 } 499 } 500 with test.mock.patch.dict('os.environ', 501 {'TF_CONFIG': json.dumps(tf_config)}): 502 config = run_config.RunConfig() 503 # Because we did not start a distributed cluster, we need to pass an 504 # empty ClusterSpec, otherwise the device_setter will look for 505 # distributed jobs, such as "/job:ps" which are not present. 506 config._cluster_spec = server_lib.ClusterSpec({}) 507 508 classifier = dnn_linear_combined.DNNLinearCombinedClassifier( 509 linear_feature_columns=sparse_features, 510 dnn_feature_columns=embedding_features, 511 dnn_hidden_units=[3, 3], 512 config=config) 513 514 classifier.fit(input_fn=_input_fn, steps=100) 515 scores = classifier.evaluate(input_fn=_input_fn, steps=1) 516 _assert_metrics_in_range(('accuracy', 'auc'), scores) 517 518 def testMultiClass(self): 519 """Tests multi-class classification using matrix data as input. 520 521 Please see testLogisticRegression_TensorData() for how to use Tensor 522 data as input instead. 523 """ 524 iris = base.load_iris() 525 cont_features = [feature_column.real_valued_column('feature', dimension=4)] 526 bucketized_features = [ 527 feature_column.bucketized_column( 528 cont_features[0], 529 test_data.get_quantile_based_buckets(iris.data, 10)) 530 ] 531 532 classifier = dnn_linear_combined.DNNLinearCombinedClassifier( 533 n_classes=3, 534 linear_feature_columns=bucketized_features, 535 dnn_feature_columns=cont_features, 536 dnn_hidden_units=[3, 3]) 537 538 classifier.fit(input_fn=test_data.iris_input_multiclass_fn, steps=100) 539 scores = classifier.evaluate( 540 input_fn=test_data.iris_input_multiclass_fn, steps=100) 541 _assert_metrics_in_range(('accuracy',), scores) 542 543 def testMultiClassLabelKeys(self): 544 """Tests n_classes > 2 with label_keys vocabulary for labels.""" 545 # Byte literals needed for python3 test to pass. 546 label_keys = [b'label0', b'label1', b'label2'] 547 548 def _input_fn(num_epochs=None): 549 features = { 550 'age': 551 input_lib.limit_epochs( 552 constant_op.constant([[.8], [0.2], [.1]]), 553 num_epochs=num_epochs), 554 'language': 555 sparse_tensor.SparseTensor( 556 values=input_lib.limit_epochs( 557 ['en', 'fr', 'zh'], num_epochs=num_epochs), 558 indices=[[0, 0], [0, 1], [2, 0]], 559 dense_shape=[3, 2]) 560 } 561 labels = constant_op.constant( 562 [[label_keys[1]], [label_keys[0]], [label_keys[0]]], 563 dtype=dtypes.string) 564 return features, labels 565 566 language_column = feature_column.sparse_column_with_hash_bucket( 567 'language', hash_bucket_size=20) 568 569 classifier = dnn_linear_combined.DNNLinearCombinedClassifier( 570 n_classes=3, 571 linear_feature_columns=[language_column], 572 dnn_feature_columns=[ 573 feature_column.embedding_column( 574 language_column, dimension=1), 575 feature_column.real_valued_column('age') 576 ], 577 dnn_hidden_units=[3, 3], 578 label_keys=label_keys) 579 580 classifier.fit(input_fn=_input_fn, steps=50) 581 582 scores = classifier.evaluate(input_fn=_input_fn, steps=1) 583 _assert_metrics_in_range(('accuracy',), scores) 584 self.assertIn('loss', scores) 585 predict_input_fn = functools.partial(_input_fn, num_epochs=1) 586 predicted_classes = list( 587 classifier.predict_classes( 588 input_fn=predict_input_fn, as_iterable=True)) 589 self.assertEqual(3, len(predicted_classes)) 590 for pred in predicted_classes: 591 self.assertIn(pred, label_keys) 592 predictions = list( 593 classifier.predict(input_fn=predict_input_fn, as_iterable=True)) 594 self.assertAllEqual(predicted_classes, predictions) 595 596 def testLoss(self): 597 """Tests loss calculation.""" 598 599 def _input_fn_train(): 600 # Create 4 rows, one of them (y = x), three of them (y=Not(x)) 601 # The logistic prediction should be (y = 0.25). 602 features = {'x': array_ops.ones(shape=[4, 1], dtype=dtypes.float32),} 603 labels = constant_op.constant([[1], [0], [0], [0]]) 604 return features, labels 605 606 classifier = dnn_linear_combined.DNNLinearCombinedClassifier( 607 n_classes=2, 608 linear_feature_columns=[feature_column.real_valued_column('x')], 609 dnn_feature_columns=[feature_column.real_valued_column('x')], 610 dnn_hidden_units=[3, 3], 611 config=run_config.RunConfig(tf_random_seed=1)) 612 613 classifier.fit(input_fn=_input_fn_train, steps=100) 614 scores = classifier.evaluate(input_fn=_input_fn_train, steps=1) 615 # Cross entropy = -0.25*log(0.25)-0.75*log(0.75) = 0.562 616 self.assertAlmostEqual(0.562, scores['loss'], delta=0.1) 617 618 def testLossWithWeights(self): 619 """Tests loss calculation with weights.""" 620 621 def _input_fn_train(): 622 # 4 rows with equal weight, one of them (y = x), three of them (y=Not(x)) 623 # The logistic prediction should be (y = 0.25). 624 features = { 625 'x': array_ops.ones( 626 shape=[4, 1], dtype=dtypes.float32), 627 'w': constant_op.constant([[1.], [1.], [1.], [1.]]) 628 } 629 labels = constant_op.constant([[1.], [0.], [0.], [0.]]) 630 return features, labels 631 632 def _input_fn_eval(): 633 # 4 rows, with different weights. 634 features = { 635 'x': array_ops.ones( 636 shape=[4, 1], dtype=dtypes.float32), 637 'w': constant_op.constant([[7.], [1.], [1.], [1.]]) 638 } 639 labels = constant_op.constant([[1.], [0.], [0.], [0.]]) 640 return features, labels 641 642 classifier = dnn_linear_combined.DNNLinearCombinedClassifier( 643 weight_column_name='w', 644 n_classes=2, 645 linear_feature_columns=[feature_column.real_valued_column('x')], 646 dnn_feature_columns=[feature_column.real_valued_column('x')], 647 dnn_hidden_units=[3, 3], 648 config=run_config.RunConfig(tf_random_seed=1)) 649 classifier.fit(input_fn=_input_fn_train, steps=100) 650 scores = classifier.evaluate(input_fn=_input_fn_eval, steps=1) 651 # Weighted cross entropy = (-7*log(0.25)-3*log(0.75))/10 = 1.06 652 self.assertAlmostEqual(1.06, scores['loss'], delta=0.1) 653 654 def testTrainWithWeights(self): 655 """Tests training with given weight column.""" 656 657 def _input_fn_train(): 658 # Create 4 rows, one of them (y = x), three of them (y=Not(x)) 659 # First row has more weight than others. Model should fit (y=x) better 660 # than (y=Not(x)) due to the relative higher weight of the first row. 661 labels = constant_op.constant([[1], [0], [0], [0]]) 662 features = { 663 'x': array_ops.ones( 664 shape=[4, 1], dtype=dtypes.float32), 665 'w': constant_op.constant([[100.], [3.], [2.], [2.]]) 666 } 667 return features, labels 668 669 def _input_fn_eval(): 670 # Create 4 rows (y = x). 671 labels = constant_op.constant([[1], [1], [1], [1]]) 672 features = { 673 'x': array_ops.ones( 674 shape=[4, 1], dtype=dtypes.float32), 675 'w': constant_op.constant([[1.], [1.], [1.], [1.]]) 676 } 677 return features, labels 678 679 classifier = dnn_linear_combined.DNNLinearCombinedClassifier( 680 weight_column_name='w', 681 linear_feature_columns=[feature_column.real_valued_column('x')], 682 dnn_feature_columns=[feature_column.real_valued_column('x')], 683 dnn_hidden_units=[3, 3], 684 config=run_config.RunConfig(tf_random_seed=1)) 685 classifier.fit(input_fn=_input_fn_train, steps=100) 686 scores = classifier.evaluate(input_fn=_input_fn_eval, steps=1) 687 _assert_metrics_in_range(('accuracy',), scores) 688 689 def testCustomOptimizerByObject(self): 690 """Tests binary classification using matrix data as input.""" 691 iris = test_data.prepare_iris_data_for_logistic_regression() 692 cont_features = [feature_column.real_valued_column('feature', dimension=4)] 693 bucketized_features = [ 694 feature_column.bucketized_column( 695 cont_features[0], 696 test_data.get_quantile_based_buckets(iris.data, 10)) 697 ] 698 699 classifier = dnn_linear_combined.DNNLinearCombinedClassifier( 700 linear_feature_columns=bucketized_features, 701 linear_optimizer=ftrl.FtrlOptimizer(learning_rate=0.1), 702 dnn_feature_columns=cont_features, 703 dnn_hidden_units=[3, 3], 704 dnn_optimizer=adagrad.AdagradOptimizer(learning_rate=0.1)) 705 706 classifier.fit(input_fn=test_data.iris_input_logistic_fn, steps=100) 707 scores = classifier.evaluate( 708 input_fn=test_data.iris_input_logistic_fn, steps=100) 709 _assert_metrics_in_range(('accuracy',), scores) 710 711 def testCustomOptimizerByString(self): 712 """Tests binary classification using matrix data as input.""" 713 iris = test_data.prepare_iris_data_for_logistic_regression() 714 cont_features = [feature_column.real_valued_column('feature', dimension=4)] 715 bucketized_features = [ 716 feature_column.bucketized_column( 717 cont_features[0], 718 test_data.get_quantile_based_buckets(iris.data, 10)) 719 ] 720 721 classifier = dnn_linear_combined.DNNLinearCombinedClassifier( 722 linear_feature_columns=bucketized_features, 723 linear_optimizer='Ftrl', 724 dnn_feature_columns=cont_features, 725 dnn_hidden_units=[3, 3], 726 dnn_optimizer='Adagrad') 727 728 classifier.fit(input_fn=test_data.iris_input_logistic_fn, steps=100) 729 scores = classifier.evaluate( 730 input_fn=test_data.iris_input_logistic_fn, steps=100) 731 _assert_metrics_in_range(('accuracy',), scores) 732 733 def testCustomOptimizerByFunction(self): 734 """Tests binary classification using matrix data as input.""" 735 iris = test_data.prepare_iris_data_for_logistic_regression() 736 cont_features = [feature_column.real_valued_column('feature', dimension=4)] 737 bucketized_features = [ 738 feature_column.bucketized_column( 739 cont_features[0], 740 test_data.get_quantile_based_buckets(iris.data, 10)) 741 ] 742 743 def _optimizer_exp_decay(): 744 global_step = training_util.get_global_step() 745 learning_rate = learning_rate_decay.exponential_decay( 746 learning_rate=0.1, 747 global_step=global_step, 748 decay_steps=100, 749 decay_rate=0.001) 750 return adagrad.AdagradOptimizer(learning_rate=learning_rate) 751 752 classifier = dnn_linear_combined.DNNLinearCombinedClassifier( 753 linear_feature_columns=bucketized_features, 754 linear_optimizer=_optimizer_exp_decay, 755 dnn_feature_columns=cont_features, 756 dnn_hidden_units=[3, 3], 757 dnn_optimizer=_optimizer_exp_decay) 758 759 classifier.fit(input_fn=test_data.iris_input_logistic_fn, steps=100) 760 scores = classifier.evaluate( 761 input_fn=test_data.iris_input_logistic_fn, steps=100) 762 _assert_metrics_in_range(('accuracy',), scores) 763 764 def testPredict(self): 765 """Tests weight column in evaluation.""" 766 767 def _input_fn_train(): 768 # Create 4 rows, one of them (y = x), three of them (y=Not(x)) 769 labels = constant_op.constant([[1], [0], [0], [0]]) 770 features = {'x': array_ops.ones(shape=[4, 1], dtype=dtypes.float32)} 771 return features, labels 772 773 def _input_fn_predict(): 774 y = input_lib.limit_epochs( 775 array_ops.ones( 776 shape=[4, 1], dtype=dtypes.float32), num_epochs=1) 777 features = {'x': y} 778 return features 779 780 classifier = dnn_linear_combined.DNNLinearCombinedClassifier( 781 linear_feature_columns=[feature_column.real_valued_column('x')], 782 dnn_feature_columns=[feature_column.real_valued_column('x')], 783 dnn_hidden_units=[3, 3]) 784 785 classifier.fit(input_fn=_input_fn_train, steps=100) 786 787 probs = list(classifier.predict_proba(input_fn=_input_fn_predict)) 788 self.assertAllClose([[0.75, 0.25]] * 4, probs, 0.05) 789 classes = list(classifier.predict_classes(input_fn=_input_fn_predict)) 790 self.assertListEqual([0] * 4, classes) 791 792 def testCustomMetrics(self): 793 """Tests custom evaluation metrics.""" 794 795 def _input_fn(num_epochs=None): 796 # Create 4 rows, one of them (y = x), three of them (y=Not(x)) 797 labels = constant_op.constant([[1], [0], [0], [0]]) 798 features = { 799 'x': 800 input_lib.limit_epochs( 801 array_ops.ones( 802 shape=[4, 1], dtype=dtypes.float32), 803 num_epochs=num_epochs) 804 } 805 return features, labels 806 807 def _my_metric_op(predictions, labels): 808 # For the case of binary classification, the 2nd column of "predictions" 809 # denotes the model predictions. 810 labels = math_ops.to_float(labels) 811 predictions = array_ops.strided_slice( 812 predictions, [0, 1], [-1, 2], end_mask=1) 813 return math_ops.reduce_sum(math_ops.multiply(predictions, labels)) 814 815 classifier = dnn_linear_combined.DNNLinearCombinedClassifier( 816 linear_feature_columns=[feature_column.real_valued_column('x')], 817 dnn_feature_columns=[feature_column.real_valued_column('x')], 818 dnn_hidden_units=[3, 3]) 819 820 classifier.fit(input_fn=_input_fn, steps=100) 821 scores = classifier.evaluate( 822 input_fn=_input_fn, 823 steps=100, 824 metrics={ 825 'my_accuracy': 826 MetricSpec( 827 metric_fn=metric_ops.streaming_accuracy, 828 prediction_key='classes'), 829 'my_precision': 830 MetricSpec( 831 metric_fn=metric_ops.streaming_precision, 832 prediction_key='classes'), 833 'my_metric': 834 MetricSpec( 835 metric_fn=_my_metric_op, prediction_key='probabilities') 836 }) 837 self.assertTrue( 838 set(['loss', 'my_accuracy', 'my_precision', 'my_metric']).issubset( 839 set(scores.keys()))) 840 predict_input_fn = functools.partial(_input_fn, num_epochs=1) 841 predictions = np.array(list(classifier.predict_classes( 842 input_fn=predict_input_fn))) 843 self.assertEqual( 844 _sklearn.accuracy_score([1, 0, 0, 0], predictions), 845 scores['my_accuracy']) 846 847 # Test the case where the 2nd element of the key is neither "classes" nor 848 # "probabilities". 849 with self.assertRaisesRegexp(KeyError, 'bad_type'): 850 classifier.evaluate( 851 input_fn=_input_fn, 852 steps=100, 853 metrics={('bad_name', 'bad_type'): metric_ops.streaming_auc}) 854 855 # Test the case where the tuple of the key doesn't have 2 elements. 856 with self.assertRaises(ValueError): 857 classifier.evaluate( 858 input_fn=_input_fn, 859 steps=100, 860 metrics={ 861 ('bad_length_name', 'classes', 'bad_length'): 862 metric_ops.streaming_accuracy 863 }) 864 865 # Test the case where the prediction_key is neither "classes" nor 866 # "probabilities". 867 with self.assertRaisesRegexp(KeyError, 'bad_type'): 868 classifier.evaluate( 869 input_fn=_input_fn, 870 steps=100, 871 metrics={ 872 'bad_name': 873 MetricSpec( 874 metric_fn=metric_ops.streaming_auc, 875 prediction_key='bad_type') 876 }) 877 878 def testVariableQuery(self): 879 """Tests get_variable_names and get_variable_value.""" 880 881 def _input_fn_train(): 882 # Create 4 rows, three (y = x), one (y=Not(x)) 883 labels = constant_op.constant([[1], [1], [1], [0]]) 884 features = {'x': array_ops.ones(shape=[4, 1], dtype=dtypes.float32),} 885 return features, labels 886 887 classifier = dnn_linear_combined.DNNLinearCombinedClassifier( 888 linear_feature_columns=[feature_column.real_valued_column('x')], 889 dnn_feature_columns=[feature_column.real_valued_column('x')], 890 dnn_hidden_units=[3, 3]) 891 892 classifier.fit(input_fn=_input_fn_train, steps=500) 893 var_names = classifier.get_variable_names() 894 self.assertGreater(len(var_names), 3) 895 for name in var_names: 896 classifier.get_variable_value(name) 897 898 def testExport(self): 899 """Tests export model for servo.""" 900 901 def input_fn(): 902 return { 903 'age': 904 constant_op.constant([1]), 905 'language': 906 sparse_tensor.SparseTensor( 907 values=['english'], indices=[[0, 0]], dense_shape=[1, 1]) 908 }, constant_op.constant([[1]]) 909 910 language = feature_column.sparse_column_with_hash_bucket('language', 100) 911 912 classifier = dnn_linear_combined.DNNLinearCombinedClassifier( 913 linear_feature_columns=[ 914 feature_column.real_valued_column('age'), 915 language, 916 ], 917 dnn_feature_columns=[ 918 feature_column.embedding_column( 919 language, dimension=1), 920 ], 921 dnn_hidden_units=[3, 3]) 922 classifier.fit(input_fn=input_fn, steps=100) 923 924 export_dir = tempfile.mkdtemp() 925 input_feature_key = 'examples' 926 927 def serving_input_fn(): 928 features, targets = input_fn() 929 features[input_feature_key] = array_ops.placeholder(dtypes.string) 930 return features, targets 931 932 classifier.export( 933 export_dir, 934 serving_input_fn, 935 input_feature_key, 936 use_deprecated_input_fn=False) 937 938 def testCenteredBias(self): 939 """Tests bias is centered or not.""" 940 941 def _input_fn_train(): 942 # Create 4 rows, three (y = x), one (y=Not(x)) 943 labels = constant_op.constant([[1], [1], [1], [0]]) 944 features = {'x': array_ops.ones(shape=[4, 1], dtype=dtypes.float32),} 945 return features, labels 946 947 classifier = dnn_linear_combined.DNNLinearCombinedClassifier( 948 linear_feature_columns=[feature_column.real_valued_column('x')], 949 dnn_feature_columns=[feature_column.real_valued_column('x')], 950 dnn_hidden_units=[3, 3], 951 enable_centered_bias=True) 952 953 classifier.fit(input_fn=_input_fn_train, steps=1000) 954 self.assertIn('binary_logistic_head/centered_bias_weight', 955 classifier.get_variable_names()) 956 # logodds(0.75) = 1.09861228867 957 self.assertAlmostEqual( 958 1.0986, 959 float(classifier.get_variable_value( 960 'binary_logistic_head/centered_bias_weight')[0]), 961 places=2) 962 963 def testDisableCenteredBias(self): 964 """Tests bias is centered or not.""" 965 966 def _input_fn_train(): 967 # Create 4 rows, three (y = x), one (y=Not(x)) 968 labels = constant_op.constant([[1], [1], [1], [0]]) 969 features = {'x': array_ops.ones(shape=[4, 1], dtype=dtypes.float32),} 970 return features, labels 971 972 classifier = dnn_linear_combined.DNNLinearCombinedClassifier( 973 linear_feature_columns=[feature_column.real_valued_column('x')], 974 dnn_feature_columns=[feature_column.real_valued_column('x')], 975 dnn_hidden_units=[3, 3], 976 enable_centered_bias=False) 977 978 classifier.fit(input_fn=_input_fn_train, steps=500) 979 self.assertNotIn('centered_bias_weight', classifier.get_variable_names()) 980 981 def testGlobalStepLinearOnly(self): 982 """Tests global step update for linear-only model.""" 983 984 def input_fn(): 985 return { 986 'age': constant_op.constant([1]), 987 'language': 988 sparse_tensor.SparseTensor( 989 values=['english'], indices=[[0, 0]], dense_shape=[1, 1]) 990 }, constant_op.constant([[1]]) 991 992 language = feature_column.sparse_column_with_hash_bucket('language', 10) 993 age = feature_column.real_valued_column('age') 994 995 step_counter = _StepCounterHook() 996 classifier = dnn_linear_combined.DNNLinearCombinedClassifier( 997 linear_feature_columns=[age, language]) 998 classifier.fit(input_fn=input_fn, steps=100, monitors=[step_counter]) 999 1000 self.assertEqual(100, step_counter.steps) 1001 1002 def testGlobalStepDNNOnly(self): 1003 """Tests global step update for dnn-only model.""" 1004 1005 def input_fn(): 1006 return { 1007 'language': 1008 sparse_tensor.SparseTensor( 1009 values=['english'], indices=[[0, 0]], dense_shape=[1, 1]) 1010 }, constant_op.constant([[1]]) 1011 1012 language = feature_column.sparse_column_with_hash_bucket('language', 10) 1013 1014 step_counter = _StepCounterHook() 1015 classifier = dnn_linear_combined.DNNLinearCombinedClassifier( 1016 dnn_feature_columns=[ 1017 feature_column.embedding_column(language, dimension=1)], 1018 dnn_hidden_units=[3, 3]) 1019 classifier.fit(input_fn=input_fn, steps=100, monitors=[step_counter]) 1020 1021 self.assertEqual(100, step_counter.steps) 1022 1023 def testGlobalStepDNNLinearCombinedBug(self): 1024 """Tests global step update for dnn-linear combined model.""" 1025 1026 def input_fn(): 1027 return { 1028 'age': constant_op.constant([1]), 1029 'language': 1030 sparse_tensor.SparseTensor( 1031 values=['english'], indices=[[0, 0]], dense_shape=[1, 1]) 1032 }, constant_op.constant([[1]]) 1033 1034 language = feature_column.sparse_column_with_hash_bucket('language', 10) 1035 age = feature_column.real_valued_column('age') 1036 1037 step_counter = _StepCounterHook() 1038 classifier = dnn_linear_combined.DNNLinearCombinedClassifier( 1039 linear_feature_columns=[age, language], 1040 dnn_feature_columns=[ 1041 feature_column.embedding_column(language, dimension=1)], 1042 dnn_hidden_units=[3, 3], 1043 fix_global_step_increment_bug=False) 1044 classifier.fit(input_fn=input_fn, steps=100, monitors=[step_counter]) 1045 global_step = classifier.get_variable_value('global_step') 1046 1047 if global_step == 100: 1048 # Expected is 100, but because of the global step increment bug, is 50. 1049 # Occasionally, step increments one more time due to a race condition, 1050 # reaching 51 steps. 1051 self.assertIn(step_counter.steps, [50, 51]) 1052 else: 1053 # Occasionally, training stops when global_step == 102, due to a race 1054 # condition. In addition, occasionally step increments one more time due 1055 # to a race condition reaching 52 steps. 1056 self.assertIn(step_counter.steps, [51, 52]) 1057 1058 def testGlobalStepDNNLinearCombinedBugFixed(self): 1059 """Tests global step update for dnn-linear combined model.""" 1060 1061 def input_fn(): 1062 return { 1063 'age': constant_op.constant([1]), 1064 'language': 1065 sparse_tensor.SparseTensor( 1066 values=['english'], indices=[[0, 0]], dense_shape=[1, 1]) 1067 }, constant_op.constant([[1]]) 1068 1069 language = feature_column.sparse_column_with_hash_bucket('language', 10) 1070 age = feature_column.real_valued_column('age') 1071 1072 step_counter = _StepCounterHook() 1073 classifier = dnn_linear_combined.DNNLinearCombinedClassifier( 1074 linear_feature_columns=[age, language], 1075 dnn_feature_columns=[ 1076 feature_column.embedding_column(language, dimension=1)], 1077 dnn_hidden_units=[3, 3], 1078 fix_global_step_increment_bug=True) 1079 classifier.fit(input_fn=input_fn, steps=100, monitors=[step_counter]) 1080 1081 self.assertEqual(100, step_counter.steps) 1082 1083 def testLinearOnly(self): 1084 """Tests that linear-only instantiation works.""" 1085 1086 def input_fn(): 1087 return { 1088 'age': 1089 constant_op.constant([1]), 1090 'language': 1091 sparse_tensor.SparseTensor( 1092 values=['english'], indices=[[0, 0]], dense_shape=[1, 1]) 1093 }, constant_op.constant([[1]]) 1094 1095 language = feature_column.sparse_column_with_hash_bucket('language', 100) 1096 age = feature_column.real_valued_column('age') 1097 1098 classifier = dnn_linear_combined.DNNLinearCombinedClassifier( 1099 linear_feature_columns=[age, language]) 1100 classifier.fit(input_fn=input_fn, steps=100) 1101 loss1 = classifier.evaluate(input_fn=input_fn, steps=1)['loss'] 1102 classifier.fit(input_fn=input_fn, steps=200) 1103 loss2 = classifier.evaluate(input_fn=input_fn, steps=1)['loss'] 1104 self.assertLess(loss2, loss1) 1105 1106 variable_names = classifier.get_variable_names() 1107 self.assertNotIn('dnn/logits/biases', variable_names) 1108 self.assertNotIn('dnn/logits/weights', variable_names) 1109 self.assertIn('linear/bias_weight', variable_names) 1110 self.assertIn('linear/age/weight', variable_names) 1111 self.assertIn('linear/language/weights', variable_names) 1112 self.assertEquals( 1113 1, len(classifier.get_variable_value('linear/age/weight'))) 1114 self.assertEquals( 1115 100, len(classifier.get_variable_value('linear/language/weights'))) 1116 1117 def testLinearOnlyOneFeature(self): 1118 """Tests that linear-only instantiation works for one feature only.""" 1119 1120 def input_fn(): 1121 return { 1122 'language': 1123 sparse_tensor.SparseTensor( 1124 values=['english'], indices=[[0, 0]], dense_shape=[1, 1]) 1125 }, constant_op.constant([[1]]) 1126 1127 language = feature_column.sparse_column_with_hash_bucket('language', 99) 1128 1129 classifier = dnn_linear_combined.DNNLinearCombinedClassifier( 1130 linear_feature_columns=[language]) 1131 classifier.fit(input_fn=input_fn, steps=100) 1132 loss1 = classifier.evaluate(input_fn=input_fn, steps=1)['loss'] 1133 classifier.fit(input_fn=input_fn, steps=200) 1134 loss2 = classifier.evaluate(input_fn=input_fn, steps=1)['loss'] 1135 self.assertLess(loss2, loss1) 1136 1137 variable_names = classifier.get_variable_names() 1138 self.assertNotIn('dnn/logits/biases', variable_names) 1139 self.assertNotIn('dnn/logits/weights', variable_names) 1140 self.assertIn('linear/bias_weight', variable_names) 1141 self.assertIn('linear/language/weights', variable_names) 1142 self.assertEquals( 1143 1, len(classifier.get_variable_value('linear/bias_weight'))) 1144 self.assertEquals( 1145 99, len(classifier.get_variable_value('linear/language/weights'))) 1146 1147 def testDNNOnly(self): 1148 """Tests that DNN-only instantiation works.""" 1149 cont_features = [feature_column.real_valued_column('feature', dimension=4)] 1150 1151 classifier = dnn_linear_combined.DNNLinearCombinedClassifier( 1152 n_classes=3, dnn_feature_columns=cont_features, dnn_hidden_units=[3, 3]) 1153 1154 classifier.fit(input_fn=test_data.iris_input_multiclass_fn, steps=1000) 1155 classifier.evaluate(input_fn=test_data.iris_input_multiclass_fn, steps=100) 1156 1157 variable_names = classifier.get_variable_names() 1158 self.assertIn('dnn/hiddenlayer_0/weights', variable_names) 1159 self.assertIn('dnn/hiddenlayer_0/biases', variable_names) 1160 self.assertIn('dnn/hiddenlayer_1/weights', variable_names) 1161 self.assertIn('dnn/hiddenlayer_1/biases', variable_names) 1162 self.assertIn('dnn/logits/weights', variable_names) 1163 self.assertIn('dnn/logits/biases', variable_names) 1164 self.assertNotIn('linear/bias_weight', variable_names) 1165 self.assertNotIn('linear/feature_BUCKETIZED/weight', variable_names) 1166 1167 def testDNNWeightsBiasesNames(self): 1168 """Tests the names of DNN weights and biases in the checkpoints.""" 1169 1170 def _input_fn_train(): 1171 # Create 4 rows, three (y = x), one (y=Not(x)) 1172 labels = constant_op.constant([[1], [1], [1], [0]]) 1173 features = {'x': array_ops.ones(shape=[4, 1], dtype=dtypes.float32),} 1174 return features, labels 1175 1176 classifier = dnn_linear_combined.DNNLinearCombinedClassifier( 1177 linear_feature_columns=[feature_column.real_valued_column('x')], 1178 dnn_feature_columns=[feature_column.real_valued_column('x')], 1179 dnn_hidden_units=[3, 3]) 1180 1181 classifier.fit(input_fn=_input_fn_train, steps=5) 1182 variable_names = classifier.get_variable_names() 1183 self.assertIn('dnn/hiddenlayer_0/weights', variable_names) 1184 self.assertIn('dnn/hiddenlayer_0/biases', variable_names) 1185 self.assertIn('dnn/hiddenlayer_1/weights', variable_names) 1186 self.assertIn('dnn/hiddenlayer_1/biases', variable_names) 1187 self.assertIn('dnn/logits/weights', variable_names) 1188 self.assertIn('dnn/logits/biases', variable_names) 1189 1190 1191 class DNNLinearCombinedRegressorTest(test.TestCase): 1192 1193 def testExperimentIntegration(self): 1194 cont_features = [feature_column.real_valued_column('feature', dimension=4)] 1195 1196 exp = experiment.Experiment( 1197 estimator=dnn_linear_combined.DNNLinearCombinedRegressor( 1198 linear_feature_columns=cont_features, 1199 dnn_feature_columns=cont_features, 1200 dnn_hidden_units=[3, 3]), 1201 train_input_fn=test_data.iris_input_logistic_fn, 1202 eval_input_fn=test_data.iris_input_logistic_fn) 1203 exp.test() 1204 1205 def testEstimatorContract(self): 1206 estimator_test_utils.assert_estimator_contract( 1207 self, dnn_linear_combined.DNNLinearCombinedRegressor) 1208 1209 def testRegression_MatrixData(self): 1210 """Tests regression using matrix data as input.""" 1211 cont_features = [feature_column.real_valued_column('feature', dimension=4)] 1212 1213 regressor = dnn_linear_combined.DNNLinearCombinedRegressor( 1214 linear_feature_columns=cont_features, 1215 dnn_feature_columns=cont_features, 1216 dnn_hidden_units=[3, 3], 1217 config=run_config.RunConfig(tf_random_seed=1)) 1218 1219 regressor.fit(input_fn=test_data.iris_input_logistic_fn, steps=10) 1220 scores = regressor.evaluate( 1221 input_fn=test_data.iris_input_logistic_fn, steps=1) 1222 self.assertIn('loss', scores.keys()) 1223 1224 def testRegression_TensorData(self): 1225 """Tests regression using tensor data as input.""" 1226 1227 def _input_fn(): 1228 # Create 4 rows of (y = x) 1229 labels = constant_op.constant([[100.], [3.], [2.], [2.]]) 1230 features = {'x': constant_op.constant([[100.], [3.], [2.], [2.]])} 1231 return features, labels 1232 1233 classifier = dnn_linear_combined.DNNLinearCombinedRegressor( 1234 linear_feature_columns=[feature_column.real_valued_column('x')], 1235 dnn_feature_columns=[feature_column.real_valued_column('x')], 1236 dnn_hidden_units=[3, 3], 1237 config=run_config.RunConfig(tf_random_seed=1)) 1238 1239 classifier.fit(input_fn=_input_fn, steps=10) 1240 classifier.evaluate(input_fn=_input_fn, steps=1) 1241 1242 def testLoss(self): 1243 """Tests loss calculation.""" 1244 1245 def _input_fn_train(): 1246 # Create 4 rows, one of them (y = x), three of them (y=Not(x)) 1247 # The algorithm should learn (y = 0.25). 1248 labels = constant_op.constant([[1.], [0.], [0.], [0.]]) 1249 features = {'x': array_ops.ones(shape=[4, 1], dtype=dtypes.float32),} 1250 return features, labels 1251 1252 regressor = dnn_linear_combined.DNNLinearCombinedRegressor( 1253 linear_feature_columns=[feature_column.real_valued_column('x')], 1254 dnn_feature_columns=[feature_column.real_valued_column('x')], 1255 dnn_hidden_units=[3, 3], 1256 config=run_config.RunConfig(tf_random_seed=1)) 1257 1258 regressor.fit(input_fn=_input_fn_train, steps=100) 1259 scores = regressor.evaluate(input_fn=_input_fn_train, steps=1) 1260 # Average square loss = (0.75^2 + 3*0.25^2) / 4 = 0.1875 1261 self.assertAlmostEqual(0.1875, scores['loss'], delta=0.1) 1262 1263 def testLossWithWeights(self): 1264 """Tests loss calculation with weights.""" 1265 1266 def _input_fn_train(): 1267 # 4 rows with equal weight, one of them (y = x), three of them (y=Not(x)) 1268 # The algorithm should learn (y = 0.25). 1269 labels = constant_op.constant([[1.], [0.], [0.], [0.]]) 1270 features = { 1271 'x': array_ops.ones( 1272 shape=[4, 1], dtype=dtypes.float32), 1273 'w': constant_op.constant([[1.], [1.], [1.], [1.]]) 1274 } 1275 return features, labels 1276 1277 def _input_fn_eval(): 1278 # 4 rows, with different weights. 1279 labels = constant_op.constant([[1.], [0.], [0.], [0.]]) 1280 features = { 1281 'x': array_ops.ones( 1282 shape=[4, 1], dtype=dtypes.float32), 1283 'w': constant_op.constant([[7.], [1.], [1.], [1.]]) 1284 } 1285 return features, labels 1286 1287 regressor = dnn_linear_combined.DNNLinearCombinedRegressor( 1288 weight_column_name='w', 1289 linear_feature_columns=[feature_column.real_valued_column('x')], 1290 dnn_feature_columns=[feature_column.real_valued_column('x')], 1291 dnn_hidden_units=[3, 3], 1292 config=run_config.RunConfig(tf_random_seed=1)) 1293 1294 regressor.fit(input_fn=_input_fn_train, steps=100) 1295 scores = regressor.evaluate(input_fn=_input_fn_eval, steps=1) 1296 # Weighted average square loss = (7*0.75^2 + 3*0.25^2) / 10 = 0.4125 1297 self.assertAlmostEqual(0.4125, scores['loss'], delta=0.1) 1298 1299 def testTrainWithWeights(self): 1300 """Tests training with given weight column.""" 1301 1302 def _input_fn_train(): 1303 # Create 4 rows, one of them (y = x), three of them (y=Not(x)) 1304 # First row has more weight than others. Model should fit (y=x) better 1305 # than (y=Not(x)) due to the relative higher weight of the first row. 1306 labels = constant_op.constant([[1.], [0.], [0.], [0.]]) 1307 features = { 1308 'x': array_ops.ones( 1309 shape=[4, 1], dtype=dtypes.float32), 1310 'w': constant_op.constant([[100.], [3.], [2.], [2.]]) 1311 } 1312 return features, labels 1313 1314 def _input_fn_eval(): 1315 # Create 4 rows (y = x) 1316 labels = constant_op.constant([[1.], [1.], [1.], [1.]]) 1317 features = { 1318 'x': array_ops.ones( 1319 shape=[4, 1], dtype=dtypes.float32), 1320 'w': constant_op.constant([[1.], [1.], [1.], [1.]]) 1321 } 1322 return features, labels 1323 1324 regressor = dnn_linear_combined.DNNLinearCombinedRegressor( 1325 weight_column_name='w', 1326 linear_feature_columns=[feature_column.real_valued_column('x')], 1327 dnn_feature_columns=[feature_column.real_valued_column('x')], 1328 dnn_hidden_units=[3, 3], 1329 config=run_config.RunConfig(tf_random_seed=1)) 1330 1331 regressor.fit(input_fn=_input_fn_train, steps=100) 1332 scores = regressor.evaluate(input_fn=_input_fn_eval, steps=1) 1333 # The model should learn (y = x) because of the weights, so the loss should 1334 # be close to zero. 1335 self.assertLess(scores['loss'], 0.2) 1336 1337 def testPredict_AsIterableFalse(self): 1338 """Tests predict method with as_iterable=False.""" 1339 labels = [1., 0., 0.2] 1340 1341 def _input_fn(num_epochs=None): 1342 features = { 1343 'age': 1344 input_lib.limit_epochs( 1345 constant_op.constant([[0.8], [0.15], [0.]]), 1346 num_epochs=num_epochs), 1347 'language': 1348 sparse_tensor.SparseTensor( 1349 values=['en', 'fr', 'zh'], 1350 indices=[[0, 0], [0, 1], [2, 0]], 1351 dense_shape=[3, 2]) 1352 } 1353 return features, constant_op.constant(labels, dtype=dtypes.float32) 1354 1355 language_column = feature_column.sparse_column_with_hash_bucket( 1356 'language', hash_bucket_size=20) 1357 1358 regressor = dnn_linear_combined.DNNLinearCombinedRegressor( 1359 linear_feature_columns=[ 1360 language_column, feature_column.real_valued_column('age') 1361 ], 1362 dnn_feature_columns=[ 1363 feature_column.embedding_column( 1364 language_column, dimension=1), 1365 feature_column.real_valued_column('age') 1366 ], 1367 dnn_hidden_units=[3, 3], 1368 config=run_config.RunConfig(tf_random_seed=1)) 1369 1370 regressor.fit(input_fn=_input_fn, steps=10) 1371 1372 scores = regressor.evaluate(input_fn=_input_fn, steps=1) 1373 self.assertIn('loss', scores.keys()) 1374 regressor.predict_scores(input_fn=_input_fn, as_iterable=False) 1375 1376 def testPredict_AsIterable(self): 1377 """Tests predict method with as_iterable=True.""" 1378 labels = [1., 0., 0.2] 1379 1380 def _input_fn(num_epochs=None): 1381 features = { 1382 'age': 1383 input_lib.limit_epochs( 1384 constant_op.constant([[0.8], [0.15], [0.]]), 1385 num_epochs=num_epochs), 1386 'language': 1387 sparse_tensor.SparseTensor( 1388 values=['en', 'fr', 'zh'], 1389 indices=[[0, 0], [0, 1], [2, 0]], 1390 dense_shape=[3, 2]) 1391 } 1392 return features, constant_op.constant(labels, dtype=dtypes.float32) 1393 1394 language_column = feature_column.sparse_column_with_hash_bucket( 1395 'language', hash_bucket_size=20) 1396 1397 regressor = dnn_linear_combined.DNNLinearCombinedRegressor( 1398 linear_feature_columns=[ 1399 language_column, feature_column.real_valued_column('age') 1400 ], 1401 dnn_feature_columns=[ 1402 feature_column.embedding_column( 1403 language_column, dimension=1), 1404 feature_column.real_valued_column('age') 1405 ], 1406 dnn_hidden_units=[3, 3], 1407 config=run_config.RunConfig(tf_random_seed=1)) 1408 1409 regressor.fit(input_fn=_input_fn, steps=10) 1410 1411 scores = regressor.evaluate(input_fn=_input_fn, steps=1) 1412 self.assertIn('loss', scores.keys()) 1413 predict_input_fn = functools.partial(_input_fn, num_epochs=1) 1414 regressor.predict_scores(input_fn=predict_input_fn, as_iterable=True) 1415 1416 def testCustomMetrics(self): 1417 """Tests custom evaluation metrics.""" 1418 1419 def _input_fn(num_epochs=None): 1420 # Create 4 rows, one of them (y = x), three of them (y=Not(x)) 1421 labels = constant_op.constant([[1.], [0.], [0.], [0.]]) 1422 features = { 1423 'x': 1424 input_lib.limit_epochs( 1425 array_ops.ones( 1426 shape=[4, 1], dtype=dtypes.float32), 1427 num_epochs=num_epochs) 1428 } 1429 return features, labels 1430 1431 def _my_metric_op(predictions, labels): 1432 return math_ops.reduce_sum(math_ops.multiply(predictions, labels)) 1433 1434 regressor = dnn_linear_combined.DNNLinearCombinedRegressor( 1435 linear_feature_columns=[feature_column.real_valued_column('x')], 1436 dnn_feature_columns=[feature_column.real_valued_column('x')], 1437 dnn_hidden_units=[3, 3], 1438 config=run_config.RunConfig(tf_random_seed=1)) 1439 1440 regressor.fit(input_fn=_input_fn, steps=10) 1441 scores = regressor.evaluate( 1442 input_fn=_input_fn, 1443 steps=1, 1444 metrics={ 1445 'my_error': metric_ops.streaming_mean_squared_error, 1446 ('my_metric', 'scores'): _my_metric_op 1447 }) 1448 self.assertIn('loss', set(scores.keys())) 1449 self.assertIn('my_error', set(scores.keys())) 1450 self.assertIn('my_metric', set(scores.keys())) 1451 predict_input_fn = functools.partial(_input_fn, num_epochs=1) 1452 predictions = np.array(list(regressor.predict_scores( 1453 input_fn=predict_input_fn))) 1454 self.assertAlmostEqual( 1455 _sklearn.mean_squared_error(np.array([1, 0, 0, 0]), predictions), 1456 scores['my_error']) 1457 1458 # Tests the case that the 2nd element of the key is not "scores". 1459 with self.assertRaises(KeyError): 1460 regressor.evaluate( 1461 input_fn=_input_fn, 1462 steps=1, 1463 metrics={ 1464 ('my_error', 'predictions'): 1465 metric_ops.streaming_mean_squared_error 1466 }) 1467 1468 # Tests the case where the tuple of the key doesn't have 2 elements. 1469 with self.assertRaises(ValueError): 1470 regressor.evaluate( 1471 input_fn=_input_fn, 1472 steps=1, 1473 metrics={ 1474 ('bad_length_name', 'scores', 'bad_length'): 1475 metric_ops.streaming_mean_squared_error 1476 }) 1477 1478 def testCustomMetricsWithMetricSpec(self): 1479 """Tests custom evaluation metrics.""" 1480 1481 def _input_fn(num_epochs=None): 1482 # Create 4 rows, one of them (y = x), three of them (y=Not(x)) 1483 labels = constant_op.constant([[1.], [0.], [0.], [0.]]) 1484 features = { 1485 'x': 1486 input_lib.limit_epochs( 1487 array_ops.ones( 1488 shape=[4, 1], dtype=dtypes.float32), 1489 num_epochs=num_epochs) 1490 } 1491 return features, labels 1492 1493 def _my_metric_op(predictions, labels): 1494 return math_ops.reduce_sum(math_ops.multiply(predictions, labels)) 1495 1496 regressor = dnn_linear_combined.DNNLinearCombinedRegressor( 1497 linear_feature_columns=[feature_column.real_valued_column('x')], 1498 dnn_feature_columns=[feature_column.real_valued_column('x')], 1499 dnn_hidden_units=[3, 3], 1500 config=run_config.RunConfig(tf_random_seed=1)) 1501 1502 regressor.fit(input_fn=_input_fn, steps=5) 1503 scores = regressor.evaluate( 1504 input_fn=_input_fn, 1505 steps=1, 1506 metrics={ 1507 'my_error': 1508 MetricSpec( 1509 metric_fn=metric_ops.streaming_mean_squared_error, 1510 prediction_key='scores'), 1511 'my_metric': 1512 MetricSpec( 1513 metric_fn=_my_metric_op, prediction_key='scores') 1514 }) 1515 self.assertIn('loss', set(scores.keys())) 1516 self.assertIn('my_error', set(scores.keys())) 1517 self.assertIn('my_metric', set(scores.keys())) 1518 predict_input_fn = functools.partial(_input_fn, num_epochs=1) 1519 predictions = np.array(list(regressor.predict_scores( 1520 input_fn=predict_input_fn))) 1521 self.assertAlmostEqual( 1522 _sklearn.mean_squared_error(np.array([1, 0, 0, 0]), predictions), 1523 scores['my_error']) 1524 1525 # Tests the case where the prediction_key is not "scores". 1526 with self.assertRaisesRegexp(KeyError, 'bad_type'): 1527 regressor.evaluate( 1528 input_fn=_input_fn, 1529 steps=1, 1530 metrics={ 1531 'bad_name': 1532 MetricSpec( 1533 metric_fn=metric_ops.streaming_auc, 1534 prediction_key='bad_type') 1535 }) 1536 1537 def testExport(self): 1538 """Tests export model for servo.""" 1539 labels = [1., 0., 0.2] 1540 1541 def _input_fn(num_epochs=None): 1542 features = { 1543 'age': 1544 input_lib.limit_epochs( 1545 constant_op.constant([[0.8], [0.15], [0.]]), 1546 num_epochs=num_epochs), 1547 'language': 1548 sparse_tensor.SparseTensor( 1549 values=['en', 'fr', 'zh'], 1550 indices=[[0, 0], [0, 1], [2, 0]], 1551 dense_shape=[3, 2]) 1552 } 1553 return features, constant_op.constant(labels, dtype=dtypes.float32) 1554 1555 language_column = feature_column.sparse_column_with_hash_bucket( 1556 'language', hash_bucket_size=20) 1557 1558 regressor = dnn_linear_combined.DNNLinearCombinedRegressor( 1559 linear_feature_columns=[ 1560 language_column, feature_column.real_valued_column('age') 1561 ], 1562 dnn_feature_columns=[ 1563 feature_column.embedding_column( 1564 language_column, dimension=1), 1565 ], 1566 dnn_hidden_units=[3, 3], 1567 config=run_config.RunConfig(tf_random_seed=1)) 1568 1569 regressor.fit(input_fn=_input_fn, steps=10) 1570 1571 export_dir = tempfile.mkdtemp() 1572 input_feature_key = 'examples' 1573 1574 def serving_input_fn(): 1575 features, targets = _input_fn() 1576 features[input_feature_key] = array_ops.placeholder(dtypes.string) 1577 return features, targets 1578 1579 regressor.export( 1580 export_dir, 1581 serving_input_fn, 1582 input_feature_key, 1583 use_deprecated_input_fn=False) 1584 1585 def testTrainSaveLoad(self): 1586 """Tests regression with restarting training / evaluate.""" 1587 1588 def _input_fn(num_epochs=None): 1589 # Create 4 rows of (y = x) 1590 labels = constant_op.constant([[100.], [3.], [2.], [2.]]) 1591 features = { 1592 'x': 1593 input_lib.limit_epochs( 1594 constant_op.constant([[100.], [3.], [2.], [2.]]), 1595 num_epochs=num_epochs) 1596 } 1597 return features, labels 1598 1599 model_dir = tempfile.mkdtemp() 1600 # pylint: disable=g-long-lambda 1601 new_regressor = lambda: dnn_linear_combined.DNNLinearCombinedRegressor( 1602 linear_feature_columns=[feature_column.real_valued_column('x')], 1603 dnn_feature_columns=[feature_column.real_valued_column('x')], 1604 dnn_hidden_units=[3, 3], 1605 model_dir=model_dir, 1606 config=run_config.RunConfig(tf_random_seed=1)) 1607 1608 predict_input_fn = functools.partial(_input_fn, num_epochs=1) 1609 regressor = new_regressor() 1610 regressor.fit(input_fn=_input_fn, steps=10) 1611 predictions = list(regressor.predict_scores(input_fn=predict_input_fn)) 1612 del regressor 1613 1614 regressor = new_regressor() 1615 predictions2 = list(regressor.predict_scores(input_fn=predict_input_fn)) 1616 self.assertAllClose(predictions, predictions2) 1617 1618 def testTrainWithPartitionedVariables(self): 1619 """Tests training with partitioned variables.""" 1620 1621 def _input_fn(num_epochs=None): 1622 features = { 1623 'age': 1624 input_lib.limit_epochs( 1625 constant_op.constant([[0.8], [0.15], [0.]]), 1626 num_epochs=num_epochs), 1627 'language': 1628 sparse_tensor.SparseTensor( 1629 values=['en', 'fr', 'zh'], 1630 indices=[[0, 0], [0, 1], [2, 0]], 1631 dense_shape=[3, 2]) 1632 } 1633 return features, constant_op.constant([1., 0., 0.2], dtype=dtypes.float32) 1634 1635 # The given hash_bucket_size results in variables larger than the 1636 # default min_slice_size attribute, so the variables are partitioned. 1637 language_column = feature_column.sparse_column_with_hash_bucket( 1638 'language', hash_bucket_size=2e7) 1639 1640 tf_config = { 1641 'cluster': { 1642 run_config.TaskType.PS: ['fake_ps_0', 'fake_ps_1'] 1643 } 1644 } 1645 with test.mock.patch.dict('os.environ', 1646 {'TF_CONFIG': json.dumps(tf_config)}): 1647 config = run_config.RunConfig(tf_random_seed=1) 1648 # Because we did not start a distributed cluster, we need to pass an 1649 # empty ClusterSpec, otherwise the device_setter will look for 1650 # distributed jobs, such as "/job:ps" which are not present. 1651 config._cluster_spec = server_lib.ClusterSpec({}) 1652 1653 regressor = dnn_linear_combined.DNNLinearCombinedRegressor( 1654 linear_feature_columns=[ 1655 language_column, feature_column.real_valued_column('age') 1656 ], 1657 dnn_feature_columns=[ 1658 feature_column.embedding_column( 1659 language_column, dimension=1), 1660 feature_column.real_valued_column('age') 1661 ], 1662 dnn_hidden_units=[3, 3], 1663 config=config) 1664 1665 regressor.fit(input_fn=_input_fn, steps=100) 1666 1667 scores = regressor.evaluate(input_fn=_input_fn, steps=1) 1668 self.assertIn('loss', scores.keys()) 1669 1670 def testDisableCenteredBias(self): 1671 """Tests that we can disable centered bias.""" 1672 1673 def _input_fn(num_epochs=None): 1674 features = { 1675 'age': 1676 input_lib.limit_epochs( 1677 constant_op.constant([[0.8], [0.15], [0.]]), 1678 num_epochs=num_epochs), 1679 'language': 1680 sparse_tensor.SparseTensor( 1681 values=['en', 'fr', 'zh'], 1682 indices=[[0, 0], [0, 1], [2, 0]], 1683 dense_shape=[3, 2]) 1684 } 1685 return features, constant_op.constant([1., 0., 0.2], dtype=dtypes.float32) 1686 1687 language_column = feature_column.sparse_column_with_hash_bucket( 1688 'language', hash_bucket_size=20) 1689 1690 regressor = dnn_linear_combined.DNNLinearCombinedRegressor( 1691 linear_feature_columns=[ 1692 language_column, feature_column.real_valued_column('age') 1693 ], 1694 dnn_feature_columns=[ 1695 feature_column.embedding_column( 1696 language_column, dimension=1), 1697 feature_column.real_valued_column('age') 1698 ], 1699 dnn_hidden_units=[3, 3], 1700 enable_centered_bias=False, 1701 config=run_config.RunConfig(tf_random_seed=1)) 1702 1703 regressor.fit(input_fn=_input_fn, steps=100) 1704 1705 scores = regressor.evaluate(input_fn=_input_fn, steps=1) 1706 self.assertIn('loss', scores.keys()) 1707 1708 def testLinearOnly(self): 1709 """Tests linear-only instantiation and training.""" 1710 1711 def _input_fn(num_epochs=None): 1712 features = { 1713 'age': 1714 input_lib.limit_epochs( 1715 constant_op.constant([[0.8], [0.15], [0.]]), 1716 num_epochs=num_epochs), 1717 'language': 1718 sparse_tensor.SparseTensor( 1719 values=['en', 'fr', 'zh'], 1720 indices=[[0, 0], [0, 1], [2, 0]], 1721 dense_shape=[3, 2]) 1722 } 1723 return features, constant_op.constant([1., 0., 0.2], dtype=dtypes.float32) 1724 1725 language_column = feature_column.sparse_column_with_hash_bucket( 1726 'language', hash_bucket_size=20) 1727 1728 regressor = dnn_linear_combined.DNNLinearCombinedRegressor( 1729 linear_feature_columns=[ 1730 language_column, feature_column.real_valued_column('age') 1731 ], 1732 config=run_config.RunConfig(tf_random_seed=1)) 1733 1734 regressor.fit(input_fn=_input_fn, steps=100) 1735 1736 scores = regressor.evaluate(input_fn=_input_fn, steps=1) 1737 self.assertIn('loss', scores.keys()) 1738 1739 def testDNNOnly(self): 1740 """Tests DNN-only instantiation and training.""" 1741 1742 def _input_fn(num_epochs=None): 1743 features = { 1744 'age': 1745 input_lib.limit_epochs( 1746 constant_op.constant([[0.8], [0.15], [0.]]), 1747 num_epochs=num_epochs), 1748 'language': 1749 sparse_tensor.SparseTensor( 1750 values=['en', 'fr', 'zh'], 1751 indices=[[0, 0], [0, 1], [2, 0]], 1752 dense_shape=[3, 2]) 1753 } 1754 return features, constant_op.constant([1., 0., 0.2], dtype=dtypes.float32) 1755 1756 language_column = feature_column.sparse_column_with_hash_bucket( 1757 'language', hash_bucket_size=20) 1758 1759 regressor = dnn_linear_combined.DNNLinearCombinedRegressor( 1760 dnn_feature_columns=[ 1761 feature_column.embedding_column( 1762 language_column, dimension=1), 1763 feature_column.real_valued_column('age') 1764 ], 1765 dnn_hidden_units=[3, 3], 1766 config=run_config.RunConfig(tf_random_seed=1)) 1767 1768 regressor.fit(input_fn=_input_fn, steps=100) 1769 1770 scores = regressor.evaluate(input_fn=_input_fn, steps=1) 1771 self.assertIn('loss', scores.keys()) 1772 1773 1774 class FeatureEngineeringFunctionTest(test.TestCase): 1775 """Tests feature_engineering_fn.""" 1776 1777 def testNoneFeatureEngineeringFn(self): 1778 1779 def input_fn(): 1780 # Create 4 rows of (y = x) 1781 labels = constant_op.constant([[100.], [3.], [2.], [2.]]) 1782 features = {'x': constant_op.constant([[100.], [3.], [2.], [2.]])} 1783 return features, labels 1784 1785 def feature_engineering_fn(features, labels): 1786 _, _ = features, labels 1787 labels = constant_op.constant([[1000.], [30.], [20.], [20.]]) 1788 features = {'x': constant_op.constant([[1000.], [30.], [20.], [20.]])} 1789 return features, labels 1790 1791 estimator_with_fe_fn = dnn_linear_combined.DNNLinearCombinedRegressor( 1792 linear_feature_columns=[feature_column.real_valued_column('x')], 1793 dnn_feature_columns=[feature_column.real_valued_column('x')], 1794 dnn_hidden_units=[3, 3], 1795 config=run_config.RunConfig(tf_random_seed=1), 1796 feature_engineering_fn=feature_engineering_fn) 1797 estimator_with_fe_fn.fit(input_fn=input_fn, steps=110) 1798 1799 estimator_without_fe_fn = dnn_linear_combined.DNNLinearCombinedRegressor( 1800 linear_feature_columns=[feature_column.real_valued_column('x')], 1801 dnn_feature_columns=[feature_column.real_valued_column('x')], 1802 dnn_hidden_units=[3, 3], 1803 config=run_config.RunConfig(tf_random_seed=1)) 1804 estimator_without_fe_fn.fit(input_fn=input_fn, steps=110) 1805 1806 # predictions = y 1807 prediction_with_fe_fn = next( 1808 estimator_with_fe_fn.predict_scores( 1809 input_fn=input_fn, as_iterable=True)) 1810 self.assertAlmostEqual(1000., prediction_with_fe_fn, delta=10.0) 1811 prediction_without_fe_fn = next( 1812 estimator_without_fe_fn.predict_scores( 1813 input_fn=input_fn, as_iterable=True)) 1814 self.assertAlmostEqual(100., prediction_without_fe_fn, delta=1.0) 1815 1816 1817 if __name__ == '__main__': 1818 test.main() 1819