Home | History | Annotate | Download | only in estimators
      1 # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
      2 #
      3 # Licensed under the Apache License, Version 2.0 (the "License");
      4 # you may not use this file except in compliance with the License.
      5 # You may obtain a copy of the License at
      6 #
      7 #     http://www.apache.org/licenses/LICENSE-2.0
      8 #
      9 # Unless required by applicable law or agreed to in writing, software
     10 # distributed under the License is distributed on an "AS IS" BASIS,
     11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 # See the License for the specific language governing permissions and
     13 # limitations under the License.
     14 # ==============================================================================
     15 """Tests for DNNLinearCombinedEstimators."""
     16 
     17 from __future__ import absolute_import
     18 from __future__ import division
     19 from __future__ import print_function
     20 
     21 import functools
     22 import json
     23 import tempfile
     24 
     25 import numpy as np
     26 
     27 from tensorflow.contrib.layers.python.layers import feature_column
     28 from tensorflow.contrib.learn.python.learn import experiment
     29 from tensorflow.contrib.learn.python.learn.datasets import base
     30 from tensorflow.contrib.learn.python.learn.estimators import _sklearn
     31 from tensorflow.contrib.learn.python.learn.estimators import dnn_linear_combined
     32 from tensorflow.contrib.learn.python.learn.estimators import estimator_test_utils
     33 from tensorflow.contrib.learn.python.learn.estimators import head as head_lib
     34 from tensorflow.contrib.learn.python.learn.estimators import model_fn
     35 from tensorflow.contrib.learn.python.learn.estimators import run_config
     36 from tensorflow.contrib.learn.python.learn.estimators import test_data
     37 from tensorflow.contrib.learn.python.learn.metric_spec import MetricSpec
     38 from tensorflow.contrib.metrics.python.ops import metric_ops
     39 from tensorflow.python.feature_column import feature_column as fc_core
     40 from tensorflow.python.framework import constant_op
     41 from tensorflow.python.framework import dtypes
     42 from tensorflow.python.framework import ops
     43 from tensorflow.python.framework import sparse_tensor
     44 from tensorflow.python.ops import array_ops
     45 from tensorflow.python.ops import init_ops
     46 from tensorflow.python.ops import math_ops
     47 from tensorflow.python.ops.losses import losses
     48 from tensorflow.python.platform import test
     49 from tensorflow.python.training import adagrad
     50 from tensorflow.python.training import ftrl
     51 from tensorflow.python.training import input as input_lib
     52 from tensorflow.python.training import learning_rate_decay
     53 from tensorflow.python.training import monitored_session
     54 from tensorflow.python.training import server_lib
     55 from tensorflow.python.training import session_run_hook
     56 from tensorflow.python.training import sync_replicas_optimizer
     57 from tensorflow.python.training import training_util
     58 
     59 
     60 def _assert_metrics_in_range(keys, metrics):
     61   epsilon = 0.00001  # Added for floating point edge cases.
     62   for key in keys:
     63     estimator_test_utils.assert_in_range(0.0 - epsilon, 1.0 + epsilon, key,
     64                                          metrics)
     65 
     66 
     67 class _CheckCallsHead(head_lib.Head):
     68   """Head that checks whether head_ops is called."""
     69 
     70   def __init__(self):
     71     self._head_ops_called_times = 0
     72 
     73   @property
     74   def logits_dimension(self):
     75     return 1
     76 
     77   def create_model_fn_ops(
     78       self, mode, features, labels=None, train_op_fn=None, logits=None,
     79       logits_input=None, scope=None):
     80     """See `_Head`."""
     81     self._head_ops_called_times += 1
     82     loss = losses.mean_squared_error(labels, logits)
     83     return model_fn.ModelFnOps(
     84         mode,
     85         predictions={'loss': loss},
     86         loss=loss,
     87         train_op=train_op_fn(loss),
     88         eval_metric_ops={'loss': loss})
     89 
     90   @property
     91   def head_ops_called_times(self):
     92     return self._head_ops_called_times
     93 
     94 
     95 class _StepCounterHook(session_run_hook.SessionRunHook):
     96   """Counts the number of training steps."""
     97 
     98   def __init__(self):
     99     self._steps = 0
    100 
    101   def after_run(self, run_context, run_values):
    102     del run_context, run_values
    103     self._steps += 1
    104 
    105   @property
    106   def steps(self):
    107     return self._steps
    108 
    109 
    110 class EmbeddingMultiplierTest(test.TestCase):
    111   """dnn_model_fn tests."""
    112 
    113   def testRaisesNonEmbeddingColumn(self):
    114     one_hot_language = feature_column.one_hot_column(
    115         feature_column.sparse_column_with_hash_bucket('language', 10))
    116 
    117     params = {
    118         'dnn_feature_columns': [one_hot_language],
    119         'head': head_lib.multi_class_head(2),
    120         'dnn_hidden_units': [1],
    121         # Set lr mult to 0. to keep embeddings constant.
    122         'embedding_lr_multipliers': {
    123             one_hot_language: 0.0
    124         },
    125         'dnn_optimizer': 'Adagrad',
    126     }
    127     features = {
    128         'language':
    129             sparse_tensor.SparseTensor(
    130                 values=['en', 'fr', 'zh'],
    131                 indices=[[0, 0], [1, 0], [2, 0]],
    132                 dense_shape=[3, 1]),
    133     }
    134     labels = constant_op.constant([[0], [0], [0]], dtype=dtypes.int32)
    135     with self.assertRaisesRegexp(ValueError,
    136                                  'can only be defined for embedding columns'):
    137       dnn_linear_combined._dnn_linear_combined_model_fn(features, labels,
    138                                                         model_fn.ModeKeys.TRAIN,
    139                                                         params)
    140 
    141   def testMultipliesGradient(self):
    142     embedding_language = feature_column.embedding_column(
    143         feature_column.sparse_column_with_hash_bucket('language', 10),
    144         dimension=1,
    145         initializer=init_ops.constant_initializer(0.1))
    146     embedding_wire = feature_column.embedding_column(
    147         feature_column.sparse_column_with_hash_bucket('wire', 10),
    148         dimension=1,
    149         initializer=init_ops.constant_initializer(0.1))
    150 
    151     params = {
    152         'dnn_feature_columns': [embedding_language, embedding_wire],
    153         'head': head_lib.multi_class_head(2),
    154         'dnn_hidden_units': [1],
    155         # Set lr mult to 0. to keep language embeddings constant, whereas wire
    156         # embeddings will be trained.
    157         'embedding_lr_multipliers': {
    158             embedding_language: 0.0
    159         },
    160         'dnn_optimizer': 'Adagrad',
    161     }
    162     with ops.Graph().as_default():
    163       features = {
    164           'language':
    165               sparse_tensor.SparseTensor(
    166                   values=['en', 'fr', 'zh'],
    167                   indices=[[0, 0], [1, 0], [2, 0]],
    168                   dense_shape=[3, 1]),
    169           'wire':
    170               sparse_tensor.SparseTensor(
    171                   values=['omar', 'stringer', 'marlo'],
    172                   indices=[[0, 0], [1, 0], [2, 0]],
    173                   dense_shape=[3, 1]),
    174       }
    175       labels = constant_op.constant([[1], [0], [0]], dtype=dtypes.int32)
    176       training_util.create_global_step()
    177       model_ops = dnn_linear_combined._dnn_linear_combined_model_fn(
    178           features, labels, model_fn.ModeKeys.TRAIN, params)
    179       with monitored_session.MonitoredSession() as sess:
    180         language_var = dnn_linear_combined._get_embedding_variable(
    181             embedding_language, 'dnn', 'dnn/input_from_feature_columns')
    182         language_initial_value = sess.run(language_var)
    183         for _ in range(2):
    184           _, language_value = sess.run([model_ops.train_op, language_var])
    185 
    186     self.assertAllClose(language_value, language_initial_value)
    187     # We could also test that wire_value changed, but that test would be flaky.
    188 
    189 
    190 class DNNLinearCombinedEstimatorTest(test.TestCase):
    191 
    192   def testEstimatorContract(self):
    193     estimator_test_utils.assert_estimator_contract(
    194         self, dnn_linear_combined.DNNLinearCombinedEstimator)
    195 
    196   def testNoFeatureColumns(self):
    197     with self.assertRaisesRegexp(
    198         ValueError,
    199         'Either linear_feature_columns or dnn_feature_columns must be defined'):
    200       dnn_linear_combined.DNNLinearCombinedEstimator(
    201           head=_CheckCallsHead(),
    202           linear_feature_columns=None,
    203           dnn_feature_columns=None,
    204           dnn_hidden_units=[3, 3])
    205 
    206   def testCheckCallsHead(self):
    207     """Tests binary classification using matrix data as input."""
    208     head = _CheckCallsHead()
    209     iris = test_data.prepare_iris_data_for_logistic_regression()
    210     cont_features = [
    211         feature_column.real_valued_column('feature', dimension=4)]
    212     bucketized_feature = [feature_column.bucketized_column(
    213         cont_features[0], test_data.get_quantile_based_buckets(iris.data, 10))]
    214 
    215     estimator = dnn_linear_combined.DNNLinearCombinedEstimator(
    216         head,
    217         linear_feature_columns=bucketized_feature,
    218         dnn_feature_columns=cont_features,
    219         dnn_hidden_units=[3, 3])
    220 
    221     estimator.fit(input_fn=test_data.iris_input_multiclass_fn, steps=10)
    222     self.assertEqual(1, head.head_ops_called_times)
    223 
    224     estimator.evaluate(input_fn=test_data.iris_input_multiclass_fn, steps=10)
    225     self.assertEqual(2, head.head_ops_called_times)
    226 
    227     estimator.predict(input_fn=test_data.iris_input_multiclass_fn)
    228     self.assertEqual(3, head.head_ops_called_times)
    229 
    230 
    231 class DNNLinearCombinedClassifierTest(test.TestCase):
    232 
    233   def testEstimatorContract(self):
    234     estimator_test_utils.assert_estimator_contract(
    235         self, dnn_linear_combined.DNNLinearCombinedClassifier)
    236 
    237   def testExperimentIntegration(self):
    238     cont_features = [feature_column.real_valued_column('feature', dimension=4)]
    239 
    240     exp = experiment.Experiment(
    241         estimator=dnn_linear_combined.DNNLinearCombinedClassifier(
    242             linear_feature_columns=cont_features,
    243             dnn_feature_columns=cont_features,
    244             dnn_hidden_units=[3, 3]),
    245         train_input_fn=test_data.iris_input_logistic_fn,
    246         eval_input_fn=test_data.iris_input_logistic_fn)
    247     exp.test()
    248 
    249   def testNoFeatureColumns(self):
    250     with self.assertRaisesRegexp(
    251         ValueError,
    252         'Either linear_feature_columns or dnn_feature_columns must be defined'):
    253       dnn_linear_combined.DNNLinearCombinedClassifier(
    254           linear_feature_columns=None,
    255           dnn_feature_columns=None,
    256           dnn_hidden_units=[3, 3])
    257 
    258   def testNoDnnHiddenUnits(self):
    259     def _input_fn():
    260       return {
    261           'age':
    262               constant_op.constant([1]),
    263           'language':
    264               sparse_tensor.SparseTensor(
    265                   values=['english'], indices=[[0, 0]], dense_shape=[1, 1])
    266       }, constant_op.constant([[1]])
    267 
    268     language = feature_column.sparse_column_with_hash_bucket('language', 100)
    269     age = feature_column.real_valued_column('age')
    270 
    271     with self.assertRaisesRegexp(
    272         ValueError,
    273         'dnn_hidden_units must be defined when dnn_feature_columns is '
    274         'specified'):
    275       classifier = dnn_linear_combined.DNNLinearCombinedClassifier(
    276           dnn_feature_columns=[age, language])
    277       classifier.fit(input_fn=_input_fn, steps=2)
    278 
    279   def testSyncReplicasOptimizerUnsupported(self):
    280     cont_features = [feature_column.real_valued_column('feature', dimension=4)]
    281 
    282     sync_optimizer = sync_replicas_optimizer.SyncReplicasOptimizer(
    283         opt=adagrad.AdagradOptimizer(learning_rate=0.1),
    284         replicas_to_aggregate=1,
    285         total_num_replicas=1)
    286     sync_hook = sync_optimizer.make_session_run_hook(is_chief=True)
    287     classifier = dnn_linear_combined.DNNLinearCombinedClassifier(
    288         n_classes=3,
    289         dnn_feature_columns=cont_features,
    290         dnn_hidden_units=[3, 3],
    291         dnn_optimizer=sync_optimizer)
    292 
    293     with self.assertRaisesRegexp(
    294         ValueError,
    295         'SyncReplicasOptimizer is not supported in DNNLinearCombined model'):
    296       classifier.fit(
    297           input_fn=test_data.iris_input_multiclass_fn, steps=100,
    298           monitors=[sync_hook])
    299 
    300   def testEmbeddingMultiplier(self):
    301     embedding_language = feature_column.embedding_column(
    302         feature_column.sparse_column_with_hash_bucket('language', 10),
    303         dimension=1,
    304         initializer=init_ops.constant_initializer(0.1))
    305     classifier = dnn_linear_combined.DNNLinearCombinedClassifier(
    306         dnn_feature_columns=[embedding_language],
    307         dnn_hidden_units=[3, 3],
    308         embedding_lr_multipliers={embedding_language: 0.8})
    309     self.assertEqual({
    310         embedding_language: 0.8
    311     }, classifier.params['embedding_lr_multipliers'])
    312 
    313   def testInputPartitionSize(self):
    314     def _input_fn_float_label(num_epochs=None):
    315       features = {
    316           'language':
    317               sparse_tensor.SparseTensor(
    318                   values=input_lib.limit_epochs(
    319                       ['en', 'fr', 'zh'], num_epochs=num_epochs),
    320                   indices=[[0, 0], [0, 1], [2, 0]],
    321                   dense_shape=[3, 2])
    322       }
    323       labels = constant_op.constant([[0.8], [0.], [0.2]], dtype=dtypes.float32)
    324       return features, labels
    325 
    326     language_column = feature_column.sparse_column_with_hash_bucket(
    327         'language', hash_bucket_size=20)
    328     feature_columns = [
    329         feature_column.embedding_column(language_column, dimension=1),
    330     ]
    331 
    332     # Set num_ps_replica to be 10 and the min slice size to be extremely small,
    333     # so as to ensure that there'll be 10 partititions produced.
    334     config = run_config.RunConfig(tf_random_seed=1)
    335     config._num_ps_replicas = 10
    336     classifier = dnn_linear_combined.DNNLinearCombinedClassifier(
    337         n_classes=2,
    338         dnn_feature_columns=feature_columns,
    339         dnn_hidden_units=[3, 3],
    340         dnn_optimizer='Adagrad',
    341         config=config,
    342         input_layer_min_slice_size=1)
    343 
    344     # Ensure the param is passed in.
    345     self.assertTrue(callable(classifier.params['input_layer_partitioner']))
    346 
    347     # Ensure the partition count is 10.
    348     classifier.fit(input_fn=_input_fn_float_label, steps=50)
    349     partition_count = 0
    350     for name in classifier.get_variable_names():
    351       if 'language_embedding' in name and 'Adagrad' in name:
    352         partition_count += 1
    353     self.assertEqual(10, partition_count)
    354 
    355   def testLogisticRegression_MatrixData(self):
    356     """Tests binary classification using matrix data as input."""
    357     iris = test_data.prepare_iris_data_for_logistic_regression()
    358     cont_features = [feature_column.real_valued_column('feature', dimension=4)]
    359     bucketized_feature = [
    360         feature_column.bucketized_column(
    361             cont_features[0],
    362             test_data.get_quantile_based_buckets(iris.data, 10))
    363     ]
    364 
    365     classifier = dnn_linear_combined.DNNLinearCombinedClassifier(
    366         linear_feature_columns=bucketized_feature,
    367         dnn_feature_columns=cont_features,
    368         dnn_hidden_units=[3, 3])
    369 
    370     classifier.fit(input_fn=test_data.iris_input_logistic_fn, steps=100)
    371     scores = classifier.evaluate(
    372         input_fn=test_data.iris_input_logistic_fn, steps=100)
    373     _assert_metrics_in_range(('accuracy', 'auc'), scores)
    374 
    375   def testLogisticRegression_TensorData(self):
    376     """Tests binary classification using Tensor data as input."""
    377 
    378     def _input_fn():
    379       iris = test_data.prepare_iris_data_for_logistic_regression()
    380       features = {}
    381       for i in range(4):
    382         # The following shows how to provide the Tensor data for
    383         # RealValuedColumns.
    384         features.update({
    385             str(i):
    386                 array_ops.reshape(
    387                     constant_op.constant(
    388                         iris.data[:, i], dtype=dtypes.float32), [-1, 1])
    389         })
    390       # The following shows how to provide the SparseTensor data for
    391       # a SparseColumn.
    392       features['dummy_sparse_column'] = sparse_tensor.SparseTensor(
    393           values=['en', 'fr', 'zh'],
    394           indices=[[0, 0], [0, 1], [60, 0]],
    395           dense_shape=[len(iris.target), 2])
    396       labels = array_ops.reshape(
    397           constant_op.constant(
    398               iris.target, dtype=dtypes.int32), [-1, 1])
    399       return features, labels
    400 
    401     iris = test_data.prepare_iris_data_for_logistic_regression()
    402     cont_features = [
    403         feature_column.real_valued_column(str(i)) for i in range(4)
    404     ]
    405     linear_features = [
    406         feature_column.bucketized_column(cont_features[i],
    407                                          test_data.get_quantile_based_buckets(
    408                                              iris.data[:, i], 10))
    409         for i in range(4)
    410     ]
    411     linear_features.append(
    412         feature_column.sparse_column_with_hash_bucket(
    413             'dummy_sparse_column', hash_bucket_size=100))
    414 
    415     classifier = dnn_linear_combined.DNNLinearCombinedClassifier(
    416         linear_feature_columns=linear_features,
    417         dnn_feature_columns=cont_features,
    418         dnn_hidden_units=[3, 3])
    419 
    420     classifier.fit(input_fn=_input_fn, steps=100)
    421     scores = classifier.evaluate(input_fn=_input_fn, steps=100)
    422     _assert_metrics_in_range(('accuracy', 'auc'), scores)
    423 
    424   def testEstimatorWithCoreFeatureColumns(self):
    425     """Tests binary classification using Tensor data as input."""
    426 
    427     def _input_fn():
    428       iris = test_data.prepare_iris_data_for_logistic_regression()
    429       features = {}
    430       for i in range(4):
    431         # The following shows how to provide the Tensor data for
    432         # RealValuedColumns.
    433         features.update({
    434             str(i):
    435                 array_ops.reshape(
    436                     constant_op.constant(iris.data[:, i], dtype=dtypes.float32),
    437                     [-1, 1])
    438         })
    439       # The following shows how to provide the SparseTensor data for
    440       # a SparseColumn.
    441       features['dummy_sparse_column'] = sparse_tensor.SparseTensor(
    442           values=['en', 'fr', 'zh'],
    443           indices=[[0, 0], [0, 1], [60, 0]],
    444           dense_shape=[len(iris.target), 2])
    445       labels = array_ops.reshape(
    446           constant_op.constant(iris.target, dtype=dtypes.int32), [-1, 1])
    447       return features, labels
    448 
    449     iris = test_data.prepare_iris_data_for_logistic_regression()
    450     cont_features = [fc_core.numeric_column(str(i)) for i in range(4)]
    451     linear_features = [
    452         fc_core.bucketized_column(
    453             cont_features[i],
    454             sorted(set(test_data.get_quantile_based_buckets(
    455                 iris.data[:, i], 10)))) for i in range(4)
    456     ]
    457     linear_features.append(
    458         fc_core.categorical_column_with_hash_bucket(
    459             'dummy_sparse_column', hash_bucket_size=100))
    460 
    461     classifier = dnn_linear_combined.DNNLinearCombinedClassifier(
    462         linear_feature_columns=linear_features,
    463         dnn_feature_columns=cont_features,
    464         dnn_hidden_units=[3, 3])
    465 
    466     classifier.fit(input_fn=_input_fn, steps=100)
    467     scores = classifier.evaluate(input_fn=_input_fn, steps=100)
    468     _assert_metrics_in_range(('accuracy', 'auc'), scores)
    469 
    470   def testTrainWithPartitionedVariables(self):
    471     """Tests training with partitioned variables."""
    472 
    473     def _input_fn():
    474       features = {
    475           'language':
    476               sparse_tensor.SparseTensor(
    477                   values=['en', 'fr', 'zh'],
    478                   indices=[[0, 0], [0, 1], [2, 0]],
    479                   dense_shape=[3, 2])
    480       }
    481       labels = constant_op.constant([[1], [0], [0]])
    482       return features, labels
    483 
    484     sparse_features = [
    485         # The given hash_bucket_size results in variables larger than the
    486         # default min_slice_size attribute, so the variables are partitioned.
    487         feature_column.sparse_column_with_hash_bucket(
    488             'language', hash_bucket_size=2e7)
    489     ]
    490     embedding_features = [
    491         feature_column.embedding_column(
    492             sparse_features[0], dimension=1)
    493     ]
    494 
    495     tf_config = {
    496         'cluster': {
    497             run_config.TaskType.PS: ['fake_ps_0', 'fake_ps_1']
    498         }
    499     }
    500     with test.mock.patch.dict('os.environ',
    501                               {'TF_CONFIG': json.dumps(tf_config)}):
    502       config = run_config.RunConfig()
    503       # Because we did not start a distributed cluster, we need to pass an
    504       # empty ClusterSpec, otherwise the device_setter will look for
    505       # distributed jobs, such as "/job:ps" which are not present.
    506       config._cluster_spec = server_lib.ClusterSpec({})
    507 
    508     classifier = dnn_linear_combined.DNNLinearCombinedClassifier(
    509         linear_feature_columns=sparse_features,
    510         dnn_feature_columns=embedding_features,
    511         dnn_hidden_units=[3, 3],
    512         config=config)
    513 
    514     classifier.fit(input_fn=_input_fn, steps=100)
    515     scores = classifier.evaluate(input_fn=_input_fn, steps=1)
    516     _assert_metrics_in_range(('accuracy', 'auc'), scores)
    517 
    518   def testMultiClass(self):
    519     """Tests multi-class classification using matrix data as input.
    520 
    521     Please see testLogisticRegression_TensorData() for how to use Tensor
    522     data as input instead.
    523     """
    524     iris = base.load_iris()
    525     cont_features = [feature_column.real_valued_column('feature', dimension=4)]
    526     bucketized_features = [
    527         feature_column.bucketized_column(
    528             cont_features[0],
    529             test_data.get_quantile_based_buckets(iris.data, 10))
    530     ]
    531 
    532     classifier = dnn_linear_combined.DNNLinearCombinedClassifier(
    533         n_classes=3,
    534         linear_feature_columns=bucketized_features,
    535         dnn_feature_columns=cont_features,
    536         dnn_hidden_units=[3, 3])
    537 
    538     classifier.fit(input_fn=test_data.iris_input_multiclass_fn, steps=100)
    539     scores = classifier.evaluate(
    540         input_fn=test_data.iris_input_multiclass_fn, steps=100)
    541     _assert_metrics_in_range(('accuracy',), scores)
    542 
    543   def testMultiClassLabelKeys(self):
    544     """Tests n_classes > 2 with label_keys vocabulary for labels."""
    545     # Byte literals needed for python3 test to pass.
    546     label_keys = [b'label0', b'label1', b'label2']
    547 
    548     def _input_fn(num_epochs=None):
    549       features = {
    550           'age':
    551               input_lib.limit_epochs(
    552                   constant_op.constant([[.8], [0.2], [.1]]),
    553                   num_epochs=num_epochs),
    554           'language':
    555               sparse_tensor.SparseTensor(
    556                   values=input_lib.limit_epochs(
    557                       ['en', 'fr', 'zh'], num_epochs=num_epochs),
    558                   indices=[[0, 0], [0, 1], [2, 0]],
    559                   dense_shape=[3, 2])
    560       }
    561       labels = constant_op.constant(
    562           [[label_keys[1]], [label_keys[0]], [label_keys[0]]],
    563           dtype=dtypes.string)
    564       return features, labels
    565 
    566     language_column = feature_column.sparse_column_with_hash_bucket(
    567         'language', hash_bucket_size=20)
    568 
    569     classifier = dnn_linear_combined.DNNLinearCombinedClassifier(
    570         n_classes=3,
    571         linear_feature_columns=[language_column],
    572         dnn_feature_columns=[
    573             feature_column.embedding_column(
    574                 language_column, dimension=1),
    575             feature_column.real_valued_column('age')
    576         ],
    577         dnn_hidden_units=[3, 3],
    578         label_keys=label_keys)
    579 
    580     classifier.fit(input_fn=_input_fn, steps=50)
    581 
    582     scores = classifier.evaluate(input_fn=_input_fn, steps=1)
    583     _assert_metrics_in_range(('accuracy',), scores)
    584     self.assertIn('loss', scores)
    585     predict_input_fn = functools.partial(_input_fn, num_epochs=1)
    586     predicted_classes = list(
    587         classifier.predict_classes(
    588             input_fn=predict_input_fn, as_iterable=True))
    589     self.assertEqual(3, len(predicted_classes))
    590     for pred in predicted_classes:
    591       self.assertIn(pred, label_keys)
    592     predictions = list(
    593         classifier.predict(input_fn=predict_input_fn, as_iterable=True))
    594     self.assertAllEqual(predicted_classes, predictions)
    595 
    596   def testLoss(self):
    597     """Tests loss calculation."""
    598 
    599     def _input_fn_train():
    600       # Create 4 rows, one of them (y = x), three of them (y=Not(x))
    601       # The logistic prediction should be (y = 0.25).
    602       features = {'x': array_ops.ones(shape=[4, 1], dtype=dtypes.float32),}
    603       labels = constant_op.constant([[1], [0], [0], [0]])
    604       return features, labels
    605 
    606     classifier = dnn_linear_combined.DNNLinearCombinedClassifier(
    607         n_classes=2,
    608         linear_feature_columns=[feature_column.real_valued_column('x')],
    609         dnn_feature_columns=[feature_column.real_valued_column('x')],
    610         dnn_hidden_units=[3, 3],
    611         config=run_config.RunConfig(tf_random_seed=1))
    612 
    613     classifier.fit(input_fn=_input_fn_train, steps=100)
    614     scores = classifier.evaluate(input_fn=_input_fn_train, steps=1)
    615     # Cross entropy = -0.25*log(0.25)-0.75*log(0.75) = 0.562
    616     self.assertAlmostEqual(0.562, scores['loss'], delta=0.1)
    617 
    618   def testLossWithWeights(self):
    619     """Tests loss calculation with weights."""
    620 
    621     def _input_fn_train():
    622       # 4 rows with equal weight, one of them (y = x), three of them (y=Not(x))
    623       # The logistic prediction should be (y = 0.25).
    624       features = {
    625           'x': array_ops.ones(
    626               shape=[4, 1], dtype=dtypes.float32),
    627           'w': constant_op.constant([[1.], [1.], [1.], [1.]])
    628       }
    629       labels = constant_op.constant([[1.], [0.], [0.], [0.]])
    630       return features, labels
    631 
    632     def _input_fn_eval():
    633       # 4 rows, with different weights.
    634       features = {
    635           'x': array_ops.ones(
    636               shape=[4, 1], dtype=dtypes.float32),
    637           'w': constant_op.constant([[7.], [1.], [1.], [1.]])
    638       }
    639       labels = constant_op.constant([[1.], [0.], [0.], [0.]])
    640       return features, labels
    641 
    642     classifier = dnn_linear_combined.DNNLinearCombinedClassifier(
    643         weight_column_name='w',
    644         n_classes=2,
    645         linear_feature_columns=[feature_column.real_valued_column('x')],
    646         dnn_feature_columns=[feature_column.real_valued_column('x')],
    647         dnn_hidden_units=[3, 3],
    648         config=run_config.RunConfig(tf_random_seed=1))
    649     classifier.fit(input_fn=_input_fn_train, steps=100)
    650     scores = classifier.evaluate(input_fn=_input_fn_eval, steps=1)
    651     # Weighted cross entropy = (-7*log(0.25)-3*log(0.75))/10 = 1.06
    652     self.assertAlmostEqual(1.06, scores['loss'], delta=0.1)
    653 
    654   def testTrainWithWeights(self):
    655     """Tests training with given weight column."""
    656 
    657     def _input_fn_train():
    658       # Create 4 rows, one of them (y = x), three of them (y=Not(x))
    659       # First row has more weight than others. Model should fit (y=x) better
    660       # than (y=Not(x)) due to the relative higher weight of the first row.
    661       labels = constant_op.constant([[1], [0], [0], [0]])
    662       features = {
    663           'x': array_ops.ones(
    664               shape=[4, 1], dtype=dtypes.float32),
    665           'w': constant_op.constant([[100.], [3.], [2.], [2.]])
    666       }
    667       return features, labels
    668 
    669     def _input_fn_eval():
    670       # Create 4 rows (y = x).
    671       labels = constant_op.constant([[1], [1], [1], [1]])
    672       features = {
    673           'x': array_ops.ones(
    674               shape=[4, 1], dtype=dtypes.float32),
    675           'w': constant_op.constant([[1.], [1.], [1.], [1.]])
    676       }
    677       return features, labels
    678 
    679     classifier = dnn_linear_combined.DNNLinearCombinedClassifier(
    680         weight_column_name='w',
    681         linear_feature_columns=[feature_column.real_valued_column('x')],
    682         dnn_feature_columns=[feature_column.real_valued_column('x')],
    683         dnn_hidden_units=[3, 3],
    684         config=run_config.RunConfig(tf_random_seed=1))
    685     classifier.fit(input_fn=_input_fn_train, steps=100)
    686     scores = classifier.evaluate(input_fn=_input_fn_eval, steps=1)
    687     _assert_metrics_in_range(('accuracy',), scores)
    688 
    689   def testCustomOptimizerByObject(self):
    690     """Tests binary classification using matrix data as input."""
    691     iris = test_data.prepare_iris_data_for_logistic_regression()
    692     cont_features = [feature_column.real_valued_column('feature', dimension=4)]
    693     bucketized_features = [
    694         feature_column.bucketized_column(
    695             cont_features[0],
    696             test_data.get_quantile_based_buckets(iris.data, 10))
    697     ]
    698 
    699     classifier = dnn_linear_combined.DNNLinearCombinedClassifier(
    700         linear_feature_columns=bucketized_features,
    701         linear_optimizer=ftrl.FtrlOptimizer(learning_rate=0.1),
    702         dnn_feature_columns=cont_features,
    703         dnn_hidden_units=[3, 3],
    704         dnn_optimizer=adagrad.AdagradOptimizer(learning_rate=0.1))
    705 
    706     classifier.fit(input_fn=test_data.iris_input_logistic_fn, steps=100)
    707     scores = classifier.evaluate(
    708         input_fn=test_data.iris_input_logistic_fn, steps=100)
    709     _assert_metrics_in_range(('accuracy',), scores)
    710 
    711   def testCustomOptimizerByString(self):
    712     """Tests binary classification using matrix data as input."""
    713     iris = test_data.prepare_iris_data_for_logistic_regression()
    714     cont_features = [feature_column.real_valued_column('feature', dimension=4)]
    715     bucketized_features = [
    716         feature_column.bucketized_column(
    717             cont_features[0],
    718             test_data.get_quantile_based_buckets(iris.data, 10))
    719     ]
    720 
    721     classifier = dnn_linear_combined.DNNLinearCombinedClassifier(
    722         linear_feature_columns=bucketized_features,
    723         linear_optimizer='Ftrl',
    724         dnn_feature_columns=cont_features,
    725         dnn_hidden_units=[3, 3],
    726         dnn_optimizer='Adagrad')
    727 
    728     classifier.fit(input_fn=test_data.iris_input_logistic_fn, steps=100)
    729     scores = classifier.evaluate(
    730         input_fn=test_data.iris_input_logistic_fn, steps=100)
    731     _assert_metrics_in_range(('accuracy',), scores)
    732 
    733   def testCustomOptimizerByFunction(self):
    734     """Tests binary classification using matrix data as input."""
    735     iris = test_data.prepare_iris_data_for_logistic_regression()
    736     cont_features = [feature_column.real_valued_column('feature', dimension=4)]
    737     bucketized_features = [
    738         feature_column.bucketized_column(
    739             cont_features[0],
    740             test_data.get_quantile_based_buckets(iris.data, 10))
    741     ]
    742 
    743     def _optimizer_exp_decay():
    744       global_step = training_util.get_global_step()
    745       learning_rate = learning_rate_decay.exponential_decay(
    746           learning_rate=0.1,
    747           global_step=global_step,
    748           decay_steps=100,
    749           decay_rate=0.001)
    750       return adagrad.AdagradOptimizer(learning_rate=learning_rate)
    751 
    752     classifier = dnn_linear_combined.DNNLinearCombinedClassifier(
    753         linear_feature_columns=bucketized_features,
    754         linear_optimizer=_optimizer_exp_decay,
    755         dnn_feature_columns=cont_features,
    756         dnn_hidden_units=[3, 3],
    757         dnn_optimizer=_optimizer_exp_decay)
    758 
    759     classifier.fit(input_fn=test_data.iris_input_logistic_fn, steps=100)
    760     scores = classifier.evaluate(
    761         input_fn=test_data.iris_input_logistic_fn, steps=100)
    762     _assert_metrics_in_range(('accuracy',), scores)
    763 
    764   def testPredict(self):
    765     """Tests weight column in evaluation."""
    766 
    767     def _input_fn_train():
    768       # Create 4 rows, one of them (y = x), three of them (y=Not(x))
    769       labels = constant_op.constant([[1], [0], [0], [0]])
    770       features = {'x': array_ops.ones(shape=[4, 1], dtype=dtypes.float32)}
    771       return features, labels
    772 
    773     def _input_fn_predict():
    774       y = input_lib.limit_epochs(
    775           array_ops.ones(
    776               shape=[4, 1], dtype=dtypes.float32), num_epochs=1)
    777       features = {'x': y}
    778       return features
    779 
    780     classifier = dnn_linear_combined.DNNLinearCombinedClassifier(
    781         linear_feature_columns=[feature_column.real_valued_column('x')],
    782         dnn_feature_columns=[feature_column.real_valued_column('x')],
    783         dnn_hidden_units=[3, 3])
    784 
    785     classifier.fit(input_fn=_input_fn_train, steps=100)
    786 
    787     probs = list(classifier.predict_proba(input_fn=_input_fn_predict))
    788     self.assertAllClose([[0.75, 0.25]] * 4, probs, 0.05)
    789     classes = list(classifier.predict_classes(input_fn=_input_fn_predict))
    790     self.assertListEqual([0] * 4, classes)
    791 
    792   def testCustomMetrics(self):
    793     """Tests custom evaluation metrics."""
    794 
    795     def _input_fn(num_epochs=None):
    796       # Create 4 rows, one of them (y = x), three of them (y=Not(x))
    797       labels = constant_op.constant([[1], [0], [0], [0]])
    798       features = {
    799           'x':
    800               input_lib.limit_epochs(
    801                   array_ops.ones(
    802                       shape=[4, 1], dtype=dtypes.float32),
    803                   num_epochs=num_epochs)
    804       }
    805       return features, labels
    806 
    807     def _my_metric_op(predictions, labels):
    808       # For the case of binary classification, the 2nd column of "predictions"
    809       # denotes the model predictions.
    810       labels = math_ops.to_float(labels)
    811       predictions = array_ops.strided_slice(
    812           predictions, [0, 1], [-1, 2], end_mask=1)
    813       return math_ops.reduce_sum(math_ops.multiply(predictions, labels))
    814 
    815     classifier = dnn_linear_combined.DNNLinearCombinedClassifier(
    816         linear_feature_columns=[feature_column.real_valued_column('x')],
    817         dnn_feature_columns=[feature_column.real_valued_column('x')],
    818         dnn_hidden_units=[3, 3])
    819 
    820     classifier.fit(input_fn=_input_fn, steps=100)
    821     scores = classifier.evaluate(
    822         input_fn=_input_fn,
    823         steps=100,
    824         metrics={
    825             'my_accuracy':
    826                 MetricSpec(
    827                     metric_fn=metric_ops.streaming_accuracy,
    828                     prediction_key='classes'),
    829             'my_precision':
    830                 MetricSpec(
    831                     metric_fn=metric_ops.streaming_precision,
    832                     prediction_key='classes'),
    833             'my_metric':
    834                 MetricSpec(
    835                     metric_fn=_my_metric_op, prediction_key='probabilities')
    836         })
    837     self.assertTrue(
    838         set(['loss', 'my_accuracy', 'my_precision', 'my_metric']).issubset(
    839             set(scores.keys())))
    840     predict_input_fn = functools.partial(_input_fn, num_epochs=1)
    841     predictions = np.array(list(classifier.predict_classes(
    842         input_fn=predict_input_fn)))
    843     self.assertEqual(
    844         _sklearn.accuracy_score([1, 0, 0, 0], predictions),
    845         scores['my_accuracy'])
    846 
    847     # Test the case where the 2nd element of the key is neither "classes" nor
    848     # "probabilities".
    849     with self.assertRaisesRegexp(KeyError, 'bad_type'):
    850       classifier.evaluate(
    851           input_fn=_input_fn,
    852           steps=100,
    853           metrics={('bad_name', 'bad_type'): metric_ops.streaming_auc})
    854 
    855     # Test the case where the tuple of the key doesn't have 2 elements.
    856     with self.assertRaises(ValueError):
    857       classifier.evaluate(
    858           input_fn=_input_fn,
    859           steps=100,
    860           metrics={
    861               ('bad_length_name', 'classes', 'bad_length'):
    862                   metric_ops.streaming_accuracy
    863           })
    864 
    865     # Test the case where the prediction_key is neither "classes" nor
    866     # "probabilities".
    867     with self.assertRaisesRegexp(KeyError, 'bad_type'):
    868       classifier.evaluate(
    869           input_fn=_input_fn,
    870           steps=100,
    871           metrics={
    872               'bad_name':
    873                   MetricSpec(
    874                       metric_fn=metric_ops.streaming_auc,
    875                       prediction_key='bad_type')
    876           })
    877 
    878   def testVariableQuery(self):
    879     """Tests get_variable_names and get_variable_value."""
    880 
    881     def _input_fn_train():
    882       # Create 4 rows, three (y = x), one (y=Not(x))
    883       labels = constant_op.constant([[1], [1], [1], [0]])
    884       features = {'x': array_ops.ones(shape=[4, 1], dtype=dtypes.float32),}
    885       return features, labels
    886 
    887     classifier = dnn_linear_combined.DNNLinearCombinedClassifier(
    888         linear_feature_columns=[feature_column.real_valued_column('x')],
    889         dnn_feature_columns=[feature_column.real_valued_column('x')],
    890         dnn_hidden_units=[3, 3])
    891 
    892     classifier.fit(input_fn=_input_fn_train, steps=500)
    893     var_names = classifier.get_variable_names()
    894     self.assertGreater(len(var_names), 3)
    895     for name in var_names:
    896       classifier.get_variable_value(name)
    897 
    898   def testExport(self):
    899     """Tests export model for servo."""
    900 
    901     def input_fn():
    902       return {
    903           'age':
    904               constant_op.constant([1]),
    905           'language':
    906               sparse_tensor.SparseTensor(
    907                   values=['english'], indices=[[0, 0]], dense_shape=[1, 1])
    908       }, constant_op.constant([[1]])
    909 
    910     language = feature_column.sparse_column_with_hash_bucket('language', 100)
    911 
    912     classifier = dnn_linear_combined.DNNLinearCombinedClassifier(
    913         linear_feature_columns=[
    914             feature_column.real_valued_column('age'),
    915             language,
    916         ],
    917         dnn_feature_columns=[
    918             feature_column.embedding_column(
    919                 language, dimension=1),
    920         ],
    921         dnn_hidden_units=[3, 3])
    922     classifier.fit(input_fn=input_fn, steps=100)
    923 
    924     export_dir = tempfile.mkdtemp()
    925     input_feature_key = 'examples'
    926 
    927     def serving_input_fn():
    928       features, targets = input_fn()
    929       features[input_feature_key] = array_ops.placeholder(dtypes.string)
    930       return features, targets
    931 
    932     classifier.export(
    933         export_dir,
    934         serving_input_fn,
    935         input_feature_key,
    936         use_deprecated_input_fn=False)
    937 
    938   def testCenteredBias(self):
    939     """Tests bias is centered or not."""
    940 
    941     def _input_fn_train():
    942       # Create 4 rows, three (y = x), one (y=Not(x))
    943       labels = constant_op.constant([[1], [1], [1], [0]])
    944       features = {'x': array_ops.ones(shape=[4, 1], dtype=dtypes.float32),}
    945       return features, labels
    946 
    947     classifier = dnn_linear_combined.DNNLinearCombinedClassifier(
    948         linear_feature_columns=[feature_column.real_valued_column('x')],
    949         dnn_feature_columns=[feature_column.real_valued_column('x')],
    950         dnn_hidden_units=[3, 3],
    951         enable_centered_bias=True)
    952 
    953     classifier.fit(input_fn=_input_fn_train, steps=1000)
    954     self.assertIn('binary_logistic_head/centered_bias_weight',
    955                   classifier.get_variable_names())
    956     # logodds(0.75) = 1.09861228867
    957     self.assertAlmostEqual(
    958         1.0986,
    959         float(classifier.get_variable_value(
    960             'binary_logistic_head/centered_bias_weight')[0]),
    961         places=2)
    962 
    963   def testDisableCenteredBias(self):
    964     """Tests bias is centered or not."""
    965 
    966     def _input_fn_train():
    967       # Create 4 rows, three (y = x), one (y=Not(x))
    968       labels = constant_op.constant([[1], [1], [1], [0]])
    969       features = {'x': array_ops.ones(shape=[4, 1], dtype=dtypes.float32),}
    970       return features, labels
    971 
    972     classifier = dnn_linear_combined.DNNLinearCombinedClassifier(
    973         linear_feature_columns=[feature_column.real_valued_column('x')],
    974         dnn_feature_columns=[feature_column.real_valued_column('x')],
    975         dnn_hidden_units=[3, 3],
    976         enable_centered_bias=False)
    977 
    978     classifier.fit(input_fn=_input_fn_train, steps=500)
    979     self.assertNotIn('centered_bias_weight', classifier.get_variable_names())
    980 
    981   def testGlobalStepLinearOnly(self):
    982     """Tests global step update for linear-only model."""
    983 
    984     def input_fn():
    985       return {
    986           'age': constant_op.constant([1]),
    987           'language':
    988               sparse_tensor.SparseTensor(
    989                   values=['english'], indices=[[0, 0]], dense_shape=[1, 1])
    990       }, constant_op.constant([[1]])
    991 
    992     language = feature_column.sparse_column_with_hash_bucket('language', 10)
    993     age = feature_column.real_valued_column('age')
    994 
    995     step_counter = _StepCounterHook()
    996     classifier = dnn_linear_combined.DNNLinearCombinedClassifier(
    997         linear_feature_columns=[age, language])
    998     classifier.fit(input_fn=input_fn, steps=100, monitors=[step_counter])
    999 
   1000     self.assertEqual(100, step_counter.steps)
   1001 
   1002   def testGlobalStepDNNOnly(self):
   1003     """Tests global step update for dnn-only model."""
   1004 
   1005     def input_fn():
   1006       return {
   1007           'language':
   1008               sparse_tensor.SparseTensor(
   1009                   values=['english'], indices=[[0, 0]], dense_shape=[1, 1])
   1010       }, constant_op.constant([[1]])
   1011 
   1012     language = feature_column.sparse_column_with_hash_bucket('language', 10)
   1013 
   1014     step_counter = _StepCounterHook()
   1015     classifier = dnn_linear_combined.DNNLinearCombinedClassifier(
   1016         dnn_feature_columns=[
   1017             feature_column.embedding_column(language, dimension=1)],
   1018         dnn_hidden_units=[3, 3])
   1019     classifier.fit(input_fn=input_fn, steps=100, monitors=[step_counter])
   1020 
   1021     self.assertEqual(100, step_counter.steps)
   1022 
   1023   def testGlobalStepDNNLinearCombinedBug(self):
   1024     """Tests global step update for dnn-linear combined model."""
   1025 
   1026     def input_fn():
   1027       return {
   1028           'age': constant_op.constant([1]),
   1029           'language':
   1030               sparse_tensor.SparseTensor(
   1031                   values=['english'], indices=[[0, 0]], dense_shape=[1, 1])
   1032       }, constant_op.constant([[1]])
   1033 
   1034     language = feature_column.sparse_column_with_hash_bucket('language', 10)
   1035     age = feature_column.real_valued_column('age')
   1036 
   1037     step_counter = _StepCounterHook()
   1038     classifier = dnn_linear_combined.DNNLinearCombinedClassifier(
   1039         linear_feature_columns=[age, language],
   1040         dnn_feature_columns=[
   1041             feature_column.embedding_column(language, dimension=1)],
   1042         dnn_hidden_units=[3, 3],
   1043         fix_global_step_increment_bug=False)
   1044     classifier.fit(input_fn=input_fn, steps=100, monitors=[step_counter])
   1045     global_step = classifier.get_variable_value('global_step')
   1046 
   1047     if global_step == 100:
   1048       # Expected is 100, but because of the global step increment bug, is 50.
   1049       # Occasionally, step increments one more time due to a race condition,
   1050       # reaching 51 steps.
   1051       self.assertIn(step_counter.steps, [50, 51])
   1052     else:
   1053       # Occasionally, training stops when global_step == 102, due to a race
   1054       # condition. In addition, occasionally step increments one more time due
   1055       # to a race condition reaching 52 steps.
   1056       self.assertIn(step_counter.steps, [51, 52])
   1057 
   1058   def testGlobalStepDNNLinearCombinedBugFixed(self):
   1059     """Tests global step update for dnn-linear combined model."""
   1060 
   1061     def input_fn():
   1062       return {
   1063           'age': constant_op.constant([1]),
   1064           'language':
   1065               sparse_tensor.SparseTensor(
   1066                   values=['english'], indices=[[0, 0]], dense_shape=[1, 1])
   1067       }, constant_op.constant([[1]])
   1068 
   1069     language = feature_column.sparse_column_with_hash_bucket('language', 10)
   1070     age = feature_column.real_valued_column('age')
   1071 
   1072     step_counter = _StepCounterHook()
   1073     classifier = dnn_linear_combined.DNNLinearCombinedClassifier(
   1074         linear_feature_columns=[age, language],
   1075         dnn_feature_columns=[
   1076             feature_column.embedding_column(language, dimension=1)],
   1077         dnn_hidden_units=[3, 3],
   1078         fix_global_step_increment_bug=True)
   1079     classifier.fit(input_fn=input_fn, steps=100, monitors=[step_counter])
   1080 
   1081     self.assertEqual(100, step_counter.steps)
   1082 
   1083   def testLinearOnly(self):
   1084     """Tests that linear-only instantiation works."""
   1085 
   1086     def input_fn():
   1087       return {
   1088           'age':
   1089               constant_op.constant([1]),
   1090           'language':
   1091               sparse_tensor.SparseTensor(
   1092                   values=['english'], indices=[[0, 0]], dense_shape=[1, 1])
   1093       }, constant_op.constant([[1]])
   1094 
   1095     language = feature_column.sparse_column_with_hash_bucket('language', 100)
   1096     age = feature_column.real_valued_column('age')
   1097 
   1098     classifier = dnn_linear_combined.DNNLinearCombinedClassifier(
   1099         linear_feature_columns=[age, language])
   1100     classifier.fit(input_fn=input_fn, steps=100)
   1101     loss1 = classifier.evaluate(input_fn=input_fn, steps=1)['loss']
   1102     classifier.fit(input_fn=input_fn, steps=200)
   1103     loss2 = classifier.evaluate(input_fn=input_fn, steps=1)['loss']
   1104     self.assertLess(loss2, loss1)
   1105 
   1106     variable_names = classifier.get_variable_names()
   1107     self.assertNotIn('dnn/logits/biases', variable_names)
   1108     self.assertNotIn('dnn/logits/weights', variable_names)
   1109     self.assertIn('linear/bias_weight', variable_names)
   1110     self.assertIn('linear/age/weight', variable_names)
   1111     self.assertIn('linear/language/weights', variable_names)
   1112     self.assertEquals(
   1113         1, len(classifier.get_variable_value('linear/age/weight')))
   1114     self.assertEquals(
   1115         100, len(classifier.get_variable_value('linear/language/weights')))
   1116 
   1117   def testLinearOnlyOneFeature(self):
   1118     """Tests that linear-only instantiation works for one feature only."""
   1119 
   1120     def input_fn():
   1121       return {
   1122           'language':
   1123               sparse_tensor.SparseTensor(
   1124                   values=['english'], indices=[[0, 0]], dense_shape=[1, 1])
   1125       }, constant_op.constant([[1]])
   1126 
   1127     language = feature_column.sparse_column_with_hash_bucket('language', 99)
   1128 
   1129     classifier = dnn_linear_combined.DNNLinearCombinedClassifier(
   1130         linear_feature_columns=[language])
   1131     classifier.fit(input_fn=input_fn, steps=100)
   1132     loss1 = classifier.evaluate(input_fn=input_fn, steps=1)['loss']
   1133     classifier.fit(input_fn=input_fn, steps=200)
   1134     loss2 = classifier.evaluate(input_fn=input_fn, steps=1)['loss']
   1135     self.assertLess(loss2, loss1)
   1136 
   1137     variable_names = classifier.get_variable_names()
   1138     self.assertNotIn('dnn/logits/biases', variable_names)
   1139     self.assertNotIn('dnn/logits/weights', variable_names)
   1140     self.assertIn('linear/bias_weight', variable_names)
   1141     self.assertIn('linear/language/weights', variable_names)
   1142     self.assertEquals(
   1143         1, len(classifier.get_variable_value('linear/bias_weight')))
   1144     self.assertEquals(
   1145         99, len(classifier.get_variable_value('linear/language/weights')))
   1146 
   1147   def testDNNOnly(self):
   1148     """Tests that DNN-only instantiation works."""
   1149     cont_features = [feature_column.real_valued_column('feature', dimension=4)]
   1150 
   1151     classifier = dnn_linear_combined.DNNLinearCombinedClassifier(
   1152         n_classes=3, dnn_feature_columns=cont_features, dnn_hidden_units=[3, 3])
   1153 
   1154     classifier.fit(input_fn=test_data.iris_input_multiclass_fn, steps=1000)
   1155     classifier.evaluate(input_fn=test_data.iris_input_multiclass_fn, steps=100)
   1156 
   1157     variable_names = classifier.get_variable_names()
   1158     self.assertIn('dnn/hiddenlayer_0/weights', variable_names)
   1159     self.assertIn('dnn/hiddenlayer_0/biases', variable_names)
   1160     self.assertIn('dnn/hiddenlayer_1/weights', variable_names)
   1161     self.assertIn('dnn/hiddenlayer_1/biases', variable_names)
   1162     self.assertIn('dnn/logits/weights', variable_names)
   1163     self.assertIn('dnn/logits/biases', variable_names)
   1164     self.assertNotIn('linear/bias_weight', variable_names)
   1165     self.assertNotIn('linear/feature_BUCKETIZED/weight', variable_names)
   1166 
   1167   def testDNNWeightsBiasesNames(self):
   1168     """Tests the names of DNN weights and biases in the checkpoints."""
   1169 
   1170     def _input_fn_train():
   1171       # Create 4 rows, three (y = x), one (y=Not(x))
   1172       labels = constant_op.constant([[1], [1], [1], [0]])
   1173       features = {'x': array_ops.ones(shape=[4, 1], dtype=dtypes.float32),}
   1174       return features, labels
   1175 
   1176     classifier = dnn_linear_combined.DNNLinearCombinedClassifier(
   1177         linear_feature_columns=[feature_column.real_valued_column('x')],
   1178         dnn_feature_columns=[feature_column.real_valued_column('x')],
   1179         dnn_hidden_units=[3, 3])
   1180 
   1181     classifier.fit(input_fn=_input_fn_train, steps=5)
   1182     variable_names = classifier.get_variable_names()
   1183     self.assertIn('dnn/hiddenlayer_0/weights', variable_names)
   1184     self.assertIn('dnn/hiddenlayer_0/biases', variable_names)
   1185     self.assertIn('dnn/hiddenlayer_1/weights', variable_names)
   1186     self.assertIn('dnn/hiddenlayer_1/biases', variable_names)
   1187     self.assertIn('dnn/logits/weights', variable_names)
   1188     self.assertIn('dnn/logits/biases', variable_names)
   1189 
   1190 
   1191 class DNNLinearCombinedRegressorTest(test.TestCase):
   1192 
   1193   def testExperimentIntegration(self):
   1194     cont_features = [feature_column.real_valued_column('feature', dimension=4)]
   1195 
   1196     exp = experiment.Experiment(
   1197         estimator=dnn_linear_combined.DNNLinearCombinedRegressor(
   1198             linear_feature_columns=cont_features,
   1199             dnn_feature_columns=cont_features,
   1200             dnn_hidden_units=[3, 3]),
   1201         train_input_fn=test_data.iris_input_logistic_fn,
   1202         eval_input_fn=test_data.iris_input_logistic_fn)
   1203     exp.test()
   1204 
   1205   def testEstimatorContract(self):
   1206     estimator_test_utils.assert_estimator_contract(
   1207         self, dnn_linear_combined.DNNLinearCombinedRegressor)
   1208 
   1209   def testRegression_MatrixData(self):
   1210     """Tests regression using matrix data as input."""
   1211     cont_features = [feature_column.real_valued_column('feature', dimension=4)]
   1212 
   1213     regressor = dnn_linear_combined.DNNLinearCombinedRegressor(
   1214         linear_feature_columns=cont_features,
   1215         dnn_feature_columns=cont_features,
   1216         dnn_hidden_units=[3, 3],
   1217         config=run_config.RunConfig(tf_random_seed=1))
   1218 
   1219     regressor.fit(input_fn=test_data.iris_input_logistic_fn, steps=10)
   1220     scores = regressor.evaluate(
   1221         input_fn=test_data.iris_input_logistic_fn, steps=1)
   1222     self.assertIn('loss', scores.keys())
   1223 
   1224   def testRegression_TensorData(self):
   1225     """Tests regression using tensor data as input."""
   1226 
   1227     def _input_fn():
   1228       # Create 4 rows of (y = x)
   1229       labels = constant_op.constant([[100.], [3.], [2.], [2.]])
   1230       features = {'x': constant_op.constant([[100.], [3.], [2.], [2.]])}
   1231       return features, labels
   1232 
   1233     classifier = dnn_linear_combined.DNNLinearCombinedRegressor(
   1234         linear_feature_columns=[feature_column.real_valued_column('x')],
   1235         dnn_feature_columns=[feature_column.real_valued_column('x')],
   1236         dnn_hidden_units=[3, 3],
   1237         config=run_config.RunConfig(tf_random_seed=1))
   1238 
   1239     classifier.fit(input_fn=_input_fn, steps=10)
   1240     classifier.evaluate(input_fn=_input_fn, steps=1)
   1241 
   1242   def testLoss(self):
   1243     """Tests loss calculation."""
   1244 
   1245     def _input_fn_train():
   1246       # Create 4 rows, one of them (y = x), three of them (y=Not(x))
   1247       # The algorithm should learn (y = 0.25).
   1248       labels = constant_op.constant([[1.], [0.], [0.], [0.]])
   1249       features = {'x': array_ops.ones(shape=[4, 1], dtype=dtypes.float32),}
   1250       return features, labels
   1251 
   1252     regressor = dnn_linear_combined.DNNLinearCombinedRegressor(
   1253         linear_feature_columns=[feature_column.real_valued_column('x')],
   1254         dnn_feature_columns=[feature_column.real_valued_column('x')],
   1255         dnn_hidden_units=[3, 3],
   1256         config=run_config.RunConfig(tf_random_seed=1))
   1257 
   1258     regressor.fit(input_fn=_input_fn_train, steps=100)
   1259     scores = regressor.evaluate(input_fn=_input_fn_train, steps=1)
   1260     # Average square loss = (0.75^2 + 3*0.25^2) / 4 = 0.1875
   1261     self.assertAlmostEqual(0.1875, scores['loss'], delta=0.1)
   1262 
   1263   def testLossWithWeights(self):
   1264     """Tests loss calculation with weights."""
   1265 
   1266     def _input_fn_train():
   1267       # 4 rows with equal weight, one of them (y = x), three of them (y=Not(x))
   1268       # The algorithm should learn (y = 0.25).
   1269       labels = constant_op.constant([[1.], [0.], [0.], [0.]])
   1270       features = {
   1271           'x': array_ops.ones(
   1272               shape=[4, 1], dtype=dtypes.float32),
   1273           'w': constant_op.constant([[1.], [1.], [1.], [1.]])
   1274       }
   1275       return features, labels
   1276 
   1277     def _input_fn_eval():
   1278       # 4 rows, with different weights.
   1279       labels = constant_op.constant([[1.], [0.], [0.], [0.]])
   1280       features = {
   1281           'x': array_ops.ones(
   1282               shape=[4, 1], dtype=dtypes.float32),
   1283           'w': constant_op.constant([[7.], [1.], [1.], [1.]])
   1284       }
   1285       return features, labels
   1286 
   1287     regressor = dnn_linear_combined.DNNLinearCombinedRegressor(
   1288         weight_column_name='w',
   1289         linear_feature_columns=[feature_column.real_valued_column('x')],
   1290         dnn_feature_columns=[feature_column.real_valued_column('x')],
   1291         dnn_hidden_units=[3, 3],
   1292         config=run_config.RunConfig(tf_random_seed=1))
   1293 
   1294     regressor.fit(input_fn=_input_fn_train, steps=100)
   1295     scores = regressor.evaluate(input_fn=_input_fn_eval, steps=1)
   1296     # Weighted average square loss = (7*0.75^2 + 3*0.25^2) / 10 = 0.4125
   1297     self.assertAlmostEqual(0.4125, scores['loss'], delta=0.1)
   1298 
   1299   def testTrainWithWeights(self):
   1300     """Tests training with given weight column."""
   1301 
   1302     def _input_fn_train():
   1303       # Create 4 rows, one of them (y = x), three of them (y=Not(x))
   1304       # First row has more weight than others. Model should fit (y=x) better
   1305       # than (y=Not(x)) due to the relative higher weight of the first row.
   1306       labels = constant_op.constant([[1.], [0.], [0.], [0.]])
   1307       features = {
   1308           'x': array_ops.ones(
   1309               shape=[4, 1], dtype=dtypes.float32),
   1310           'w': constant_op.constant([[100.], [3.], [2.], [2.]])
   1311       }
   1312       return features, labels
   1313 
   1314     def _input_fn_eval():
   1315       # Create 4 rows (y = x)
   1316       labels = constant_op.constant([[1.], [1.], [1.], [1.]])
   1317       features = {
   1318           'x': array_ops.ones(
   1319               shape=[4, 1], dtype=dtypes.float32),
   1320           'w': constant_op.constant([[1.], [1.], [1.], [1.]])
   1321       }
   1322       return features, labels
   1323 
   1324     regressor = dnn_linear_combined.DNNLinearCombinedRegressor(
   1325         weight_column_name='w',
   1326         linear_feature_columns=[feature_column.real_valued_column('x')],
   1327         dnn_feature_columns=[feature_column.real_valued_column('x')],
   1328         dnn_hidden_units=[3, 3],
   1329         config=run_config.RunConfig(tf_random_seed=1))
   1330 
   1331     regressor.fit(input_fn=_input_fn_train, steps=100)
   1332     scores = regressor.evaluate(input_fn=_input_fn_eval, steps=1)
   1333     # The model should learn (y = x) because of the weights, so the loss should
   1334     # be close to zero.
   1335     self.assertLess(scores['loss'], 0.2)
   1336 
   1337   def testPredict_AsIterableFalse(self):
   1338     """Tests predict method with as_iterable=False."""
   1339     labels = [1., 0., 0.2]
   1340 
   1341     def _input_fn(num_epochs=None):
   1342       features = {
   1343           'age':
   1344               input_lib.limit_epochs(
   1345                   constant_op.constant([[0.8], [0.15], [0.]]),
   1346                   num_epochs=num_epochs),
   1347           'language':
   1348               sparse_tensor.SparseTensor(
   1349                   values=['en', 'fr', 'zh'],
   1350                   indices=[[0, 0], [0, 1], [2, 0]],
   1351                   dense_shape=[3, 2])
   1352       }
   1353       return features, constant_op.constant(labels, dtype=dtypes.float32)
   1354 
   1355     language_column = feature_column.sparse_column_with_hash_bucket(
   1356         'language', hash_bucket_size=20)
   1357 
   1358     regressor = dnn_linear_combined.DNNLinearCombinedRegressor(
   1359         linear_feature_columns=[
   1360             language_column, feature_column.real_valued_column('age')
   1361         ],
   1362         dnn_feature_columns=[
   1363             feature_column.embedding_column(
   1364                 language_column, dimension=1),
   1365             feature_column.real_valued_column('age')
   1366         ],
   1367         dnn_hidden_units=[3, 3],
   1368         config=run_config.RunConfig(tf_random_seed=1))
   1369 
   1370     regressor.fit(input_fn=_input_fn, steps=10)
   1371 
   1372     scores = regressor.evaluate(input_fn=_input_fn, steps=1)
   1373     self.assertIn('loss', scores.keys())
   1374     regressor.predict_scores(input_fn=_input_fn, as_iterable=False)
   1375 
   1376   def testPredict_AsIterable(self):
   1377     """Tests predict method with as_iterable=True."""
   1378     labels = [1., 0., 0.2]
   1379 
   1380     def _input_fn(num_epochs=None):
   1381       features = {
   1382           'age':
   1383               input_lib.limit_epochs(
   1384                   constant_op.constant([[0.8], [0.15], [0.]]),
   1385                   num_epochs=num_epochs),
   1386           'language':
   1387               sparse_tensor.SparseTensor(
   1388                   values=['en', 'fr', 'zh'],
   1389                   indices=[[0, 0], [0, 1], [2, 0]],
   1390                   dense_shape=[3, 2])
   1391       }
   1392       return features, constant_op.constant(labels, dtype=dtypes.float32)
   1393 
   1394     language_column = feature_column.sparse_column_with_hash_bucket(
   1395         'language', hash_bucket_size=20)
   1396 
   1397     regressor = dnn_linear_combined.DNNLinearCombinedRegressor(
   1398         linear_feature_columns=[
   1399             language_column, feature_column.real_valued_column('age')
   1400         ],
   1401         dnn_feature_columns=[
   1402             feature_column.embedding_column(
   1403                 language_column, dimension=1),
   1404             feature_column.real_valued_column('age')
   1405         ],
   1406         dnn_hidden_units=[3, 3],
   1407         config=run_config.RunConfig(tf_random_seed=1))
   1408 
   1409     regressor.fit(input_fn=_input_fn, steps=10)
   1410 
   1411     scores = regressor.evaluate(input_fn=_input_fn, steps=1)
   1412     self.assertIn('loss', scores.keys())
   1413     predict_input_fn = functools.partial(_input_fn, num_epochs=1)
   1414     regressor.predict_scores(input_fn=predict_input_fn, as_iterable=True)
   1415 
   1416   def testCustomMetrics(self):
   1417     """Tests custom evaluation metrics."""
   1418 
   1419     def _input_fn(num_epochs=None):
   1420       # Create 4 rows, one of them (y = x), three of them (y=Not(x))
   1421       labels = constant_op.constant([[1.], [0.], [0.], [0.]])
   1422       features = {
   1423           'x':
   1424               input_lib.limit_epochs(
   1425                   array_ops.ones(
   1426                       shape=[4, 1], dtype=dtypes.float32),
   1427                   num_epochs=num_epochs)
   1428       }
   1429       return features, labels
   1430 
   1431     def _my_metric_op(predictions, labels):
   1432       return math_ops.reduce_sum(math_ops.multiply(predictions, labels))
   1433 
   1434     regressor = dnn_linear_combined.DNNLinearCombinedRegressor(
   1435         linear_feature_columns=[feature_column.real_valued_column('x')],
   1436         dnn_feature_columns=[feature_column.real_valued_column('x')],
   1437         dnn_hidden_units=[3, 3],
   1438         config=run_config.RunConfig(tf_random_seed=1))
   1439 
   1440     regressor.fit(input_fn=_input_fn, steps=10)
   1441     scores = regressor.evaluate(
   1442         input_fn=_input_fn,
   1443         steps=1,
   1444         metrics={
   1445             'my_error': metric_ops.streaming_mean_squared_error,
   1446             ('my_metric', 'scores'): _my_metric_op
   1447         })
   1448     self.assertIn('loss', set(scores.keys()))
   1449     self.assertIn('my_error', set(scores.keys()))
   1450     self.assertIn('my_metric', set(scores.keys()))
   1451     predict_input_fn = functools.partial(_input_fn, num_epochs=1)
   1452     predictions = np.array(list(regressor.predict_scores(
   1453         input_fn=predict_input_fn)))
   1454     self.assertAlmostEqual(
   1455         _sklearn.mean_squared_error(np.array([1, 0, 0, 0]), predictions),
   1456         scores['my_error'])
   1457 
   1458     # Tests the case that the 2nd element of the key is not "scores".
   1459     with self.assertRaises(KeyError):
   1460       regressor.evaluate(
   1461           input_fn=_input_fn,
   1462           steps=1,
   1463           metrics={
   1464               ('my_error', 'predictions'):
   1465                   metric_ops.streaming_mean_squared_error
   1466           })
   1467 
   1468     # Tests the case where the tuple of the key doesn't have 2 elements.
   1469     with self.assertRaises(ValueError):
   1470       regressor.evaluate(
   1471           input_fn=_input_fn,
   1472           steps=1,
   1473           metrics={
   1474               ('bad_length_name', 'scores', 'bad_length'):
   1475                   metric_ops.streaming_mean_squared_error
   1476           })
   1477 
   1478   def testCustomMetricsWithMetricSpec(self):
   1479     """Tests custom evaluation metrics."""
   1480 
   1481     def _input_fn(num_epochs=None):
   1482       # Create 4 rows, one of them (y = x), three of them (y=Not(x))
   1483       labels = constant_op.constant([[1.], [0.], [0.], [0.]])
   1484       features = {
   1485           'x':
   1486               input_lib.limit_epochs(
   1487                   array_ops.ones(
   1488                       shape=[4, 1], dtype=dtypes.float32),
   1489                   num_epochs=num_epochs)
   1490       }
   1491       return features, labels
   1492 
   1493     def _my_metric_op(predictions, labels):
   1494       return math_ops.reduce_sum(math_ops.multiply(predictions, labels))
   1495 
   1496     regressor = dnn_linear_combined.DNNLinearCombinedRegressor(
   1497         linear_feature_columns=[feature_column.real_valued_column('x')],
   1498         dnn_feature_columns=[feature_column.real_valued_column('x')],
   1499         dnn_hidden_units=[3, 3],
   1500         config=run_config.RunConfig(tf_random_seed=1))
   1501 
   1502     regressor.fit(input_fn=_input_fn, steps=5)
   1503     scores = regressor.evaluate(
   1504         input_fn=_input_fn,
   1505         steps=1,
   1506         metrics={
   1507             'my_error':
   1508                 MetricSpec(
   1509                     metric_fn=metric_ops.streaming_mean_squared_error,
   1510                     prediction_key='scores'),
   1511             'my_metric':
   1512                 MetricSpec(
   1513                     metric_fn=_my_metric_op, prediction_key='scores')
   1514         })
   1515     self.assertIn('loss', set(scores.keys()))
   1516     self.assertIn('my_error', set(scores.keys()))
   1517     self.assertIn('my_metric', set(scores.keys()))
   1518     predict_input_fn = functools.partial(_input_fn, num_epochs=1)
   1519     predictions = np.array(list(regressor.predict_scores(
   1520         input_fn=predict_input_fn)))
   1521     self.assertAlmostEqual(
   1522         _sklearn.mean_squared_error(np.array([1, 0, 0, 0]), predictions),
   1523         scores['my_error'])
   1524 
   1525     # Tests the case where the prediction_key is not "scores".
   1526     with self.assertRaisesRegexp(KeyError, 'bad_type'):
   1527       regressor.evaluate(
   1528           input_fn=_input_fn,
   1529           steps=1,
   1530           metrics={
   1531               'bad_name':
   1532                   MetricSpec(
   1533                       metric_fn=metric_ops.streaming_auc,
   1534                       prediction_key='bad_type')
   1535           })
   1536 
   1537   def testExport(self):
   1538     """Tests export model for servo."""
   1539     labels = [1., 0., 0.2]
   1540 
   1541     def _input_fn(num_epochs=None):
   1542       features = {
   1543           'age':
   1544               input_lib.limit_epochs(
   1545                   constant_op.constant([[0.8], [0.15], [0.]]),
   1546                   num_epochs=num_epochs),
   1547           'language':
   1548               sparse_tensor.SparseTensor(
   1549                   values=['en', 'fr', 'zh'],
   1550                   indices=[[0, 0], [0, 1], [2, 0]],
   1551                   dense_shape=[3, 2])
   1552       }
   1553       return features, constant_op.constant(labels, dtype=dtypes.float32)
   1554 
   1555     language_column = feature_column.sparse_column_with_hash_bucket(
   1556         'language', hash_bucket_size=20)
   1557 
   1558     regressor = dnn_linear_combined.DNNLinearCombinedRegressor(
   1559         linear_feature_columns=[
   1560             language_column, feature_column.real_valued_column('age')
   1561         ],
   1562         dnn_feature_columns=[
   1563             feature_column.embedding_column(
   1564                 language_column, dimension=1),
   1565         ],
   1566         dnn_hidden_units=[3, 3],
   1567         config=run_config.RunConfig(tf_random_seed=1))
   1568 
   1569     regressor.fit(input_fn=_input_fn, steps=10)
   1570 
   1571     export_dir = tempfile.mkdtemp()
   1572     input_feature_key = 'examples'
   1573 
   1574     def serving_input_fn():
   1575       features, targets = _input_fn()
   1576       features[input_feature_key] = array_ops.placeholder(dtypes.string)
   1577       return features, targets
   1578 
   1579     regressor.export(
   1580         export_dir,
   1581         serving_input_fn,
   1582         input_feature_key,
   1583         use_deprecated_input_fn=False)
   1584 
   1585   def testTrainSaveLoad(self):
   1586     """Tests regression with restarting training / evaluate."""
   1587 
   1588     def _input_fn(num_epochs=None):
   1589       # Create 4 rows of (y = x)
   1590       labels = constant_op.constant([[100.], [3.], [2.], [2.]])
   1591       features = {
   1592           'x':
   1593               input_lib.limit_epochs(
   1594                   constant_op.constant([[100.], [3.], [2.], [2.]]),
   1595                   num_epochs=num_epochs)
   1596       }
   1597       return features, labels
   1598 
   1599     model_dir = tempfile.mkdtemp()
   1600     # pylint: disable=g-long-lambda
   1601     new_regressor = lambda: dnn_linear_combined.DNNLinearCombinedRegressor(
   1602         linear_feature_columns=[feature_column.real_valued_column('x')],
   1603         dnn_feature_columns=[feature_column.real_valued_column('x')],
   1604         dnn_hidden_units=[3, 3],
   1605         model_dir=model_dir,
   1606         config=run_config.RunConfig(tf_random_seed=1))
   1607 
   1608     predict_input_fn = functools.partial(_input_fn, num_epochs=1)
   1609     regressor = new_regressor()
   1610     regressor.fit(input_fn=_input_fn, steps=10)
   1611     predictions = list(regressor.predict_scores(input_fn=predict_input_fn))
   1612     del regressor
   1613 
   1614     regressor = new_regressor()
   1615     predictions2 = list(regressor.predict_scores(input_fn=predict_input_fn))
   1616     self.assertAllClose(predictions, predictions2)
   1617 
   1618   def testTrainWithPartitionedVariables(self):
   1619     """Tests training with partitioned variables."""
   1620 
   1621     def _input_fn(num_epochs=None):
   1622       features = {
   1623           'age':
   1624               input_lib.limit_epochs(
   1625                   constant_op.constant([[0.8], [0.15], [0.]]),
   1626                   num_epochs=num_epochs),
   1627           'language':
   1628               sparse_tensor.SparseTensor(
   1629                   values=['en', 'fr', 'zh'],
   1630                   indices=[[0, 0], [0, 1], [2, 0]],
   1631                   dense_shape=[3, 2])
   1632       }
   1633       return features, constant_op.constant([1., 0., 0.2], dtype=dtypes.float32)
   1634 
   1635     # The given hash_bucket_size results in variables larger than the
   1636     # default min_slice_size attribute, so the variables are partitioned.
   1637     language_column = feature_column.sparse_column_with_hash_bucket(
   1638         'language', hash_bucket_size=2e7)
   1639 
   1640     tf_config = {
   1641         'cluster': {
   1642             run_config.TaskType.PS: ['fake_ps_0', 'fake_ps_1']
   1643         }
   1644     }
   1645     with test.mock.patch.dict('os.environ',
   1646                               {'TF_CONFIG': json.dumps(tf_config)}):
   1647       config = run_config.RunConfig(tf_random_seed=1)
   1648       # Because we did not start a distributed cluster, we need to pass an
   1649       # empty ClusterSpec, otherwise the device_setter will look for
   1650       # distributed jobs, such as "/job:ps" which are not present.
   1651       config._cluster_spec = server_lib.ClusterSpec({})
   1652 
   1653     regressor = dnn_linear_combined.DNNLinearCombinedRegressor(
   1654         linear_feature_columns=[
   1655             language_column, feature_column.real_valued_column('age')
   1656         ],
   1657         dnn_feature_columns=[
   1658             feature_column.embedding_column(
   1659                 language_column, dimension=1),
   1660             feature_column.real_valued_column('age')
   1661         ],
   1662         dnn_hidden_units=[3, 3],
   1663         config=config)
   1664 
   1665     regressor.fit(input_fn=_input_fn, steps=100)
   1666 
   1667     scores = regressor.evaluate(input_fn=_input_fn, steps=1)
   1668     self.assertIn('loss', scores.keys())
   1669 
   1670   def testDisableCenteredBias(self):
   1671     """Tests that we can disable centered bias."""
   1672 
   1673     def _input_fn(num_epochs=None):
   1674       features = {
   1675           'age':
   1676               input_lib.limit_epochs(
   1677                   constant_op.constant([[0.8], [0.15], [0.]]),
   1678                   num_epochs=num_epochs),
   1679           'language':
   1680               sparse_tensor.SparseTensor(
   1681                   values=['en', 'fr', 'zh'],
   1682                   indices=[[0, 0], [0, 1], [2, 0]],
   1683                   dense_shape=[3, 2])
   1684       }
   1685       return features, constant_op.constant([1., 0., 0.2], dtype=dtypes.float32)
   1686 
   1687     language_column = feature_column.sparse_column_with_hash_bucket(
   1688         'language', hash_bucket_size=20)
   1689 
   1690     regressor = dnn_linear_combined.DNNLinearCombinedRegressor(
   1691         linear_feature_columns=[
   1692             language_column, feature_column.real_valued_column('age')
   1693         ],
   1694         dnn_feature_columns=[
   1695             feature_column.embedding_column(
   1696                 language_column, dimension=1),
   1697             feature_column.real_valued_column('age')
   1698         ],
   1699         dnn_hidden_units=[3, 3],
   1700         enable_centered_bias=False,
   1701         config=run_config.RunConfig(tf_random_seed=1))
   1702 
   1703     regressor.fit(input_fn=_input_fn, steps=100)
   1704 
   1705     scores = regressor.evaluate(input_fn=_input_fn, steps=1)
   1706     self.assertIn('loss', scores.keys())
   1707 
   1708   def testLinearOnly(self):
   1709     """Tests linear-only instantiation and training."""
   1710 
   1711     def _input_fn(num_epochs=None):
   1712       features = {
   1713           'age':
   1714               input_lib.limit_epochs(
   1715                   constant_op.constant([[0.8], [0.15], [0.]]),
   1716                   num_epochs=num_epochs),
   1717           'language':
   1718               sparse_tensor.SparseTensor(
   1719                   values=['en', 'fr', 'zh'],
   1720                   indices=[[0, 0], [0, 1], [2, 0]],
   1721                   dense_shape=[3, 2])
   1722       }
   1723       return features, constant_op.constant([1., 0., 0.2], dtype=dtypes.float32)
   1724 
   1725     language_column = feature_column.sparse_column_with_hash_bucket(
   1726         'language', hash_bucket_size=20)
   1727 
   1728     regressor = dnn_linear_combined.DNNLinearCombinedRegressor(
   1729         linear_feature_columns=[
   1730             language_column, feature_column.real_valued_column('age')
   1731         ],
   1732         config=run_config.RunConfig(tf_random_seed=1))
   1733 
   1734     regressor.fit(input_fn=_input_fn, steps=100)
   1735 
   1736     scores = regressor.evaluate(input_fn=_input_fn, steps=1)
   1737     self.assertIn('loss', scores.keys())
   1738 
   1739   def testDNNOnly(self):
   1740     """Tests DNN-only instantiation and training."""
   1741 
   1742     def _input_fn(num_epochs=None):
   1743       features = {
   1744           'age':
   1745               input_lib.limit_epochs(
   1746                   constant_op.constant([[0.8], [0.15], [0.]]),
   1747                   num_epochs=num_epochs),
   1748           'language':
   1749               sparse_tensor.SparseTensor(
   1750                   values=['en', 'fr', 'zh'],
   1751                   indices=[[0, 0], [0, 1], [2, 0]],
   1752                   dense_shape=[3, 2])
   1753       }
   1754       return features, constant_op.constant([1., 0., 0.2], dtype=dtypes.float32)
   1755 
   1756     language_column = feature_column.sparse_column_with_hash_bucket(
   1757         'language', hash_bucket_size=20)
   1758 
   1759     regressor = dnn_linear_combined.DNNLinearCombinedRegressor(
   1760         dnn_feature_columns=[
   1761             feature_column.embedding_column(
   1762                 language_column, dimension=1),
   1763             feature_column.real_valued_column('age')
   1764         ],
   1765         dnn_hidden_units=[3, 3],
   1766         config=run_config.RunConfig(tf_random_seed=1))
   1767 
   1768     regressor.fit(input_fn=_input_fn, steps=100)
   1769 
   1770     scores = regressor.evaluate(input_fn=_input_fn, steps=1)
   1771     self.assertIn('loss', scores.keys())
   1772 
   1773 
   1774 class FeatureEngineeringFunctionTest(test.TestCase):
   1775   """Tests feature_engineering_fn."""
   1776 
   1777   def testNoneFeatureEngineeringFn(self):
   1778 
   1779     def input_fn():
   1780       # Create 4 rows of (y = x)
   1781       labels = constant_op.constant([[100.], [3.], [2.], [2.]])
   1782       features = {'x': constant_op.constant([[100.], [3.], [2.], [2.]])}
   1783       return features, labels
   1784 
   1785     def feature_engineering_fn(features, labels):
   1786       _, _ = features, labels
   1787       labels = constant_op.constant([[1000.], [30.], [20.], [20.]])
   1788       features = {'x': constant_op.constant([[1000.], [30.], [20.], [20.]])}
   1789       return features, labels
   1790 
   1791     estimator_with_fe_fn = dnn_linear_combined.DNNLinearCombinedRegressor(
   1792         linear_feature_columns=[feature_column.real_valued_column('x')],
   1793         dnn_feature_columns=[feature_column.real_valued_column('x')],
   1794         dnn_hidden_units=[3, 3],
   1795         config=run_config.RunConfig(tf_random_seed=1),
   1796         feature_engineering_fn=feature_engineering_fn)
   1797     estimator_with_fe_fn.fit(input_fn=input_fn, steps=110)
   1798 
   1799     estimator_without_fe_fn = dnn_linear_combined.DNNLinearCombinedRegressor(
   1800         linear_feature_columns=[feature_column.real_valued_column('x')],
   1801         dnn_feature_columns=[feature_column.real_valued_column('x')],
   1802         dnn_hidden_units=[3, 3],
   1803         config=run_config.RunConfig(tf_random_seed=1))
   1804     estimator_without_fe_fn.fit(input_fn=input_fn, steps=110)
   1805 
   1806     # predictions = y
   1807     prediction_with_fe_fn = next(
   1808         estimator_with_fe_fn.predict_scores(
   1809             input_fn=input_fn, as_iterable=True))
   1810     self.assertAlmostEqual(1000., prediction_with_fe_fn, delta=10.0)
   1811     prediction_without_fe_fn = next(
   1812         estimator_without_fe_fn.predict_scores(
   1813             input_fn=input_fn, as_iterable=True))
   1814     self.assertAlmostEqual(100., prediction_without_fe_fn, delta=1.0)
   1815 
   1816 
   1817 if __name__ == '__main__':
   1818   test.main()
   1819