Home | History | Annotate | Download | only in canned
      1 # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
      2 #
      3 # Licensed under the Apache License, Version 2.0 (the "License");
      4 # you may not use this file except in compliance with the License.
      5 # You may obtain a copy of the License at
      6 #
      7 #     http://www.apache.org/licenses/LICENSE-2.0
      8 #
      9 # Unless required by applicable law or agreed to in writing, software
     10 # distributed under the License is distributed on an "AS IS" BASIS,
     11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 # See the License for the specific language governing permissions and
     13 # limitations under the License.
     14 # ==============================================================================
     15 """Tests for baseline.py."""
     16 
     17 from __future__ import absolute_import
     18 from __future__ import division
     19 from __future__ import print_function
     20 
     21 import math
     22 import os
     23 import shutil
     24 import tempfile
     25 
     26 import numpy as np
     27 import six
     28 
     29 from tensorflow.core.example import example_pb2
     30 from tensorflow.core.example import feature_pb2
     31 from tensorflow.python.client import session as tf_session
     32 from tensorflow.python.estimator.canned import baseline
     33 from tensorflow.python.estimator.canned import metric_keys
     34 from tensorflow.python.estimator.export import export
     35 from tensorflow.python.estimator.inputs import numpy_io
     36 from tensorflow.python.estimator.inputs import pandas_io
     37 from tensorflow.python.feature_column import feature_column as feature_column_lib
     38 from tensorflow.python.framework import dtypes
     39 from tensorflow.python.framework import ops
     40 from tensorflow.python.ops import check_ops
     41 from tensorflow.python.ops import control_flow_ops
     42 from tensorflow.python.ops import data_flow_ops
     43 from tensorflow.python.ops import math_ops
     44 from tensorflow.python.ops import parsing_ops
     45 from tensorflow.python.ops import state_ops
     46 from tensorflow.python.ops import variable_scope
     47 from tensorflow.python.ops import variables
     48 from tensorflow.python.platform import gfile
     49 from tensorflow.python.platform import test
     50 from tensorflow.python.summary.writer import writer_cache
     51 from tensorflow.python.training import checkpoint_utils
     52 from tensorflow.python.training import input as input_lib
     53 from tensorflow.python.training import optimizer
     54 from tensorflow.python.training import queue_runner
     55 from tensorflow.python.training import saver
     56 
     57 
     58 try:
     59   # pylint: disable=g-import-not-at-top
     60   import pandas as pd
     61   HAS_PANDAS = True
     62 except IOError:
     63   # Pandas writes a temporary file during import. If it fails, don't use pandas.
     64   HAS_PANDAS = False
     65 except ImportError:
     66   HAS_PANDAS = False
     67 
     68 # pylint rules which are disabled by default for test files.
     69 # pylint: disable=invalid-name,protected-access,missing-docstring
     70 
     71 # Names of variables created by model.
     72 BIAS_NAME = 'baseline/bias'
     73 
     74 
     75 def assert_close(expected, actual, rtol=1e-04, name='assert_close'):
     76   with ops.name_scope(name, 'assert_close', (expected, actual, rtol)) as scope:
     77     expected = ops.convert_to_tensor(expected, name='expected')
     78     actual = ops.convert_to_tensor(actual, name='actual')
     79     rdiff = math_ops.abs(expected - actual, 'diff') / math_ops.abs(expected)
     80     rtol = ops.convert_to_tensor(rtol, name='rtol')
     81     return check_ops.assert_less(
     82         rdiff,
     83         rtol,
     84         data=('Condition expected =~ actual did not hold element-wise:'
     85               'expected = ', expected, 'actual = ', actual, 'rdiff = ', rdiff,
     86               'rtol = ', rtol,),
     87         name=scope)
     88 
     89 
     90 def save_variables_to_ckpt(model_dir):
     91   init_all_op = [variables.global_variables_initializer()]
     92   with tf_session.Session() as sess:
     93     sess.run(init_all_op)
     94     saver.Saver().save(sess, os.path.join(model_dir, 'model.ckpt'))
     95 
     96 
     97 def queue_parsed_features(feature_map):
     98   tensors_to_enqueue = []
     99   keys = []
    100   for key, tensor in six.iteritems(feature_map):
    101     keys.append(key)
    102     tensors_to_enqueue.append(tensor)
    103   queue_dtypes = [x.dtype for x in tensors_to_enqueue]
    104   input_queue = data_flow_ops.FIFOQueue(capacity=100, dtypes=queue_dtypes)
    105   queue_runner.add_queue_runner(
    106       queue_runner.QueueRunner(input_queue,
    107                                [input_queue.enqueue(tensors_to_enqueue)]))
    108   dequeued_tensors = input_queue.dequeue()
    109   return {keys[i]: dequeued_tensors[i] for i in range(len(dequeued_tensors))}
    110 
    111 
    112 def sorted_key_dict(unsorted_dict):
    113   return {k: unsorted_dict[k] for k in sorted(unsorted_dict)}
    114 
    115 
    116 def sigmoid(x):
    117   return 1 / (1 + np.exp(-1.0 * x))
    118 
    119 
    120 def _baseline_regressor_fn(*args, **kwargs):
    121   return baseline.BaselineRegressor(*args, **kwargs)
    122 
    123 
    124 def _baseline_classifier_fn(*args, **kwargs):
    125   return baseline.BaselineClassifier(*args, **kwargs)
    126 
    127 
    128 # Tests for Baseline Regressor.
    129 
    130 
    131 # TODO(b/36813849): Add tests with dynamic shape inputs using placeholders.
    132 class BaselineRegressorEvaluationTest(test.TestCase):
    133 
    134   def setUp(self):
    135     self._model_dir = tempfile.mkdtemp()
    136 
    137   def tearDown(self):
    138     if self._model_dir:
    139       writer_cache.FileWriterCache.clear()
    140       shutil.rmtree(self._model_dir)
    141 
    142   def test_evaluation_for_simple_data(self):
    143     with ops.Graph().as_default():
    144       variables.Variable([13.0], name=BIAS_NAME)
    145       variables.Variable(
    146           100, name=ops.GraphKeys.GLOBAL_STEP, dtype=dtypes.int64)
    147       save_variables_to_ckpt(self._model_dir)
    148 
    149     baseline_regressor = _baseline_regressor_fn(model_dir=self._model_dir)
    150     eval_metrics = baseline_regressor.evaluate(
    151         input_fn=lambda: ({'age': ((1,),)}, ((10.,),)), steps=1)
    152 
    153     # Logit is bias = 13, while label is 10. Loss is 3**2 = 9.
    154     self.assertDictEqual({
    155         metric_keys.MetricKeys.LOSS: 9.,
    156         metric_keys.MetricKeys.LOSS_MEAN: 9.,
    157         ops.GraphKeys.GLOBAL_STEP: 100
    158     }, eval_metrics)
    159 
    160   def test_evaluation_batch(self):
    161     """Tests evaluation for batch_size==2."""
    162     with ops.Graph().as_default():
    163       variables.Variable([13.0], name=BIAS_NAME)
    164       variables.Variable(
    165           100, name=ops.GraphKeys.GLOBAL_STEP, dtype=dtypes.int64)
    166       save_variables_to_ckpt(self._model_dir)
    167 
    168     baseline_regressor = _baseline_regressor_fn(model_dir=self._model_dir)
    169     eval_metrics = baseline_regressor.evaluate(
    170         input_fn=lambda: ({'age': ((1,), (1,))}, ((10.,), (10.,))), steps=1)
    171 
    172     # Logit is bias = 13, while label is 10.
    173     # Loss per example is 3**2 = 9.
    174     # Training loss is the sum over batch = 9 + 9 = 18
    175     # Average loss is the average over batch = 9
    176     self.assertDictEqual({
    177         metric_keys.MetricKeys.LOSS: 18.,
    178         metric_keys.MetricKeys.LOSS_MEAN: 9.,
    179         ops.GraphKeys.GLOBAL_STEP: 100
    180     }, eval_metrics)
    181 
    182   def test_evaluation_weights(self):
    183     """Tests evaluation with weights."""
    184     with ops.Graph().as_default():
    185       variables.Variable([13.0], name=BIAS_NAME)
    186       variables.Variable(
    187           100, name=ops.GraphKeys.GLOBAL_STEP, dtype=dtypes.int64)
    188       save_variables_to_ckpt(self._model_dir)
    189 
    190     def _input_fn():
    191       features = {'age': ((1,), (1,)), 'weights': ((1.,), (2.,))}
    192       labels = ((10.,), (10.,))
    193       return features, labels
    194 
    195     baseline_regressor = _baseline_regressor_fn(
    196         weight_column='weights',
    197         model_dir=self._model_dir)
    198     eval_metrics = baseline_regressor.evaluate(input_fn=_input_fn, steps=1)
    199 
    200     # Logit is bias = 13, while label is 10.
    201     # Loss per example is 3**2 = 9.
    202     # Training loss is the weighted sum over batch = 9 + 2*9 = 27
    203     # average loss is the weighted average = 9 + 2*9 / (1 + 2) = 9
    204     self.assertDictEqual({
    205         metric_keys.MetricKeys.LOSS: 27.,
    206         metric_keys.MetricKeys.LOSS_MEAN: 9.,
    207         ops.GraphKeys.GLOBAL_STEP: 100
    208     }, eval_metrics)
    209 
    210   def test_evaluation_for_multi_dimensions(self):
    211     label_dim = 2
    212     with ops.Graph().as_default():
    213       variables.Variable([46.0, 58.0], name=BIAS_NAME)
    214       variables.Variable(100, name='global_step', dtype=dtypes.int64)
    215       save_variables_to_ckpt(self._model_dir)
    216 
    217     baseline_regressor = _baseline_regressor_fn(
    218         label_dimension=label_dim,
    219         model_dir=self._model_dir)
    220     input_fn = numpy_io.numpy_input_fn(
    221         x={
    222             'age': np.array([[2., 4., 5.]]),
    223         },
    224         y=np.array([[46., 58.]]),
    225         batch_size=1,
    226         num_epochs=None,
    227         shuffle=False)
    228     eval_metrics = baseline_regressor.evaluate(input_fn=input_fn, steps=1)
    229 
    230     self.assertItemsEqual(
    231         (metric_keys.MetricKeys.LOSS, metric_keys.MetricKeys.LOSS_MEAN,
    232          ops.GraphKeys.GLOBAL_STEP), eval_metrics.keys())
    233 
    234     # Logit is bias which is [46, 58]
    235     self.assertAlmostEqual(0, eval_metrics[metric_keys.MetricKeys.LOSS])
    236 
    237 
    238 class BaselineRegressorPredictTest(test.TestCase):
    239 
    240   def setUp(self):
    241     self._model_dir = tempfile.mkdtemp()
    242 
    243   def tearDown(self):
    244     if self._model_dir:
    245       writer_cache.FileWriterCache.clear()
    246       shutil.rmtree(self._model_dir)
    247 
    248   def test_1d(self):
    249     """Tests predict when all variables are one-dimensional."""
    250     with ops.Graph().as_default():
    251       variables.Variable([.2], name=BIAS_NAME)
    252       variables.Variable(100, name='global_step', dtype=dtypes.int64)
    253       save_variables_to_ckpt(self._model_dir)
    254 
    255     baseline_regressor = _baseline_regressor_fn(model_dir=self._model_dir)
    256 
    257     predict_input_fn = numpy_io.numpy_input_fn(
    258         x={'x': np.array([[2.]])},
    259         y=None,
    260         batch_size=1,
    261         num_epochs=1,
    262         shuffle=False)
    263     predictions = baseline_regressor.predict(input_fn=predict_input_fn)
    264     predicted_scores = list([x['predictions'] for x in predictions])
    265     # x * weight + bias = 2. * 10. + .2 = 20.2
    266     self.assertAllClose([[.2]], predicted_scores)
    267 
    268   def testMultiDim(self):
    269     """Tests predict when all variables are multi-dimenstional."""
    270     batch_size = 2
    271     label_dimension = 3
    272     with ops.Graph().as_default():
    273       variables.Variable(  # shape=[label_dimension]
    274           [.2, .4, .6], name=BIAS_NAME)
    275       variables.Variable(100, name='global_step', dtype=dtypes.int64)
    276       save_variables_to_ckpt(self._model_dir)
    277 
    278     baseline_regressor = _baseline_regressor_fn(
    279         label_dimension=label_dimension,
    280         model_dir=self._model_dir)
    281 
    282     predict_input_fn = numpy_io.numpy_input_fn(
    283         # x shape=[batch_size, x_dim]
    284         x={'x': np.array([[1., 2., 3., 4.], [5., 6., 7., 8.]])},
    285         y=None,
    286         batch_size=batch_size,
    287         num_epochs=1,
    288         shuffle=False)
    289     predictions = baseline_regressor.predict(input_fn=predict_input_fn)
    290     predicted_scores = list([x['predictions'] for x in predictions])
    291     # score = bias, shape=[batch_size, label_dimension]
    292     self.assertAllClose([[0.2, 0.4, 0.6], [0.2, 0.4, 0.6]],
    293                         predicted_scores)
    294 
    295 
    296 class BaselineRegressorIntegrationTest(test.TestCase):
    297 
    298   def setUp(self):
    299     self._model_dir = tempfile.mkdtemp()
    300 
    301   def tearDown(self):
    302     if self._model_dir:
    303       writer_cache.FileWriterCache.clear()
    304       shutil.rmtree(self._model_dir)
    305 
    306   def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn,
    307                           input_dimension, label_dimension, prediction_length):
    308     feature_columns = [
    309         feature_column_lib.numeric_column('x', shape=(input_dimension,))
    310     ]
    311     est = _baseline_regressor_fn(
    312         label_dimension=label_dimension,
    313         model_dir=self._model_dir)
    314 
    315     # TRAIN
    316     # learn y = x
    317     est.train(train_input_fn, steps=200)
    318 
    319     # EVALUTE
    320     scores = est.evaluate(eval_input_fn)
    321     self.assertEqual(200, scores[ops.GraphKeys.GLOBAL_STEP])
    322     self.assertIn(metric_keys.MetricKeys.LOSS, six.iterkeys(scores))
    323 
    324     # PREDICT
    325     predictions = np.array(
    326         [x['predictions'] for x in est.predict(predict_input_fn)])
    327     self.assertAllEqual((prediction_length, label_dimension), predictions.shape)
    328 
    329     # EXPORT
    330     feature_spec = feature_column_lib.make_parse_example_spec(feature_columns)
    331     serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
    332         feature_spec)
    333     export_dir = est.export_savedmodel(tempfile.mkdtemp(),
    334                                        serving_input_receiver_fn)
    335     self.assertTrue(gfile.Exists(export_dir))
    336 
    337   def test_numpy_input_fn(self):
    338     """Tests complete flow with numpy_input_fn."""
    339     label_dimension = 2
    340     input_dimension = label_dimension
    341     batch_size = 10
    342     prediction_length = batch_size
    343     data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32)
    344     data = data.reshape(batch_size, label_dimension)
    345 
    346     train_input_fn = numpy_io.numpy_input_fn(
    347         x={'x': data},
    348         y=data,
    349         batch_size=batch_size,
    350         num_epochs=None,
    351         shuffle=True)
    352     eval_input_fn = numpy_io.numpy_input_fn(
    353         x={'x': data},
    354         y=data,
    355         batch_size=batch_size,
    356         num_epochs=1,
    357         shuffle=False)
    358     predict_input_fn = numpy_io.numpy_input_fn(
    359         x={'x': data},
    360         y=None,
    361         batch_size=batch_size,
    362         num_epochs=1,
    363         shuffle=False)
    364 
    365     self._test_complete_flow(
    366         train_input_fn=train_input_fn,
    367         eval_input_fn=eval_input_fn,
    368         predict_input_fn=predict_input_fn,
    369         input_dimension=input_dimension,
    370         label_dimension=label_dimension,
    371         prediction_length=prediction_length)
    372 
    373   def test_pandas_input_fn(self):
    374     """Tests complete flow with pandas_input_fn."""
    375     if not HAS_PANDAS:
    376       return
    377 
    378     # Pandas DataFrame natually supports 1 dim data only.
    379     label_dimension = 1
    380     input_dimension = label_dimension
    381     batch_size = 10
    382     data = np.array([1., 2., 3., 4.], dtype=np.float32)
    383     x = pd.DataFrame({'x': data})
    384     y = pd.Series(data)
    385     prediction_length = 4
    386 
    387     train_input_fn = pandas_io.pandas_input_fn(
    388         x=x, y=y, batch_size=batch_size, num_epochs=None, shuffle=True)
    389     eval_input_fn = pandas_io.pandas_input_fn(
    390         x=x, y=y, batch_size=batch_size, shuffle=False)
    391     predict_input_fn = pandas_io.pandas_input_fn(
    392         x=x, batch_size=batch_size, shuffle=False)
    393 
    394     self._test_complete_flow(
    395         train_input_fn=train_input_fn,
    396         eval_input_fn=eval_input_fn,
    397         predict_input_fn=predict_input_fn,
    398         input_dimension=input_dimension,
    399         label_dimension=label_dimension,
    400         prediction_length=prediction_length)
    401 
    402   def test_input_fn_from_parse_example(self):
    403     """Tests complete flow with input_fn constructed from parse_example."""
    404     label_dimension = 2
    405     input_dimension = label_dimension
    406     batch_size = 10
    407     prediction_length = batch_size
    408     data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32)
    409     data = data.reshape(batch_size, label_dimension)
    410 
    411     serialized_examples = []
    412     for datum in data:
    413       example = example_pb2.Example(features=feature_pb2.Features(
    414           feature={
    415               'x':
    416                   feature_pb2.Feature(float_list=feature_pb2.FloatList(
    417                       value=datum)),
    418               'y':
    419                   feature_pb2.Feature(float_list=feature_pb2.FloatList(
    420                       value=datum[:label_dimension])),
    421           }))
    422       serialized_examples.append(example.SerializeToString())
    423 
    424     feature_spec = {
    425         'x': parsing_ops.FixedLenFeature([input_dimension], dtypes.float32),
    426         'y': parsing_ops.FixedLenFeature([label_dimension], dtypes.float32),
    427     }
    428 
    429     def _train_input_fn():
    430       feature_map = parsing_ops.parse_example(serialized_examples, feature_spec)
    431       features = queue_parsed_features(feature_map)
    432       labels = features.pop('y')
    433       return features, labels
    434 
    435     def _eval_input_fn():
    436       feature_map = parsing_ops.parse_example(
    437           input_lib.limit_epochs(serialized_examples, num_epochs=1),
    438           feature_spec)
    439       features = queue_parsed_features(feature_map)
    440       labels = features.pop('y')
    441       return features, labels
    442 
    443     def _predict_input_fn():
    444       feature_map = parsing_ops.parse_example(
    445           input_lib.limit_epochs(serialized_examples, num_epochs=1),
    446           feature_spec)
    447       features = queue_parsed_features(feature_map)
    448       features.pop('y')
    449       return features, None
    450 
    451     self._test_complete_flow(
    452         train_input_fn=_train_input_fn,
    453         eval_input_fn=_eval_input_fn,
    454         predict_input_fn=_predict_input_fn,
    455         input_dimension=input_dimension,
    456         label_dimension=label_dimension,
    457         prediction_length=prediction_length)
    458 
    459 
    460 class BaselineRegressorTrainingTest(test.TestCase):
    461 
    462   def setUp(self):
    463     self._model_dir = tempfile.mkdtemp()
    464 
    465   def tearDown(self):
    466     if self._model_dir:
    467       writer_cache.FileWriterCache.clear()
    468       shutil.rmtree(self._model_dir)
    469 
    470   def _mock_optimizer(self, expected_loss=None):
    471     expected_var_names = [
    472         '%s:0' % BIAS_NAME
    473     ]
    474 
    475     def _minimize(loss, global_step=None, var_list=None):
    476       trainable_vars = var_list or ops.get_collection(
    477           ops.GraphKeys.TRAINABLE_VARIABLES)
    478       self.assertItemsEqual(expected_var_names,
    479                             [var.name for var in trainable_vars])
    480 
    481       # Verify loss. We can't check the value directly, so we add an assert op.
    482       self.assertEquals(0, loss.shape.ndims)
    483       if expected_loss is None:
    484         if global_step is not None:
    485           return state_ops.assign_add(global_step, 1).op
    486         return control_flow_ops.no_op()
    487       assert_loss = assert_close(
    488           math_ops.to_float(expected_loss, name='expected'),
    489           loss,
    490           name='assert_loss')
    491       with ops.control_dependencies((assert_loss,)):
    492         if global_step is not None:
    493           return state_ops.assign_add(global_step, 1).op
    494         return control_flow_ops.no_op()
    495 
    496     mock_optimizer = test.mock.NonCallableMock(
    497         spec=optimizer.Optimizer,
    498         wraps=optimizer.Optimizer(use_locking=False, name='my_optimizer'))
    499     mock_optimizer.minimize = test.mock.MagicMock(wraps=_minimize)
    500 
    501     # NOTE: Estimator.params performs a deepcopy, which wreaks havoc with mocks.
    502     # So, return mock_optimizer itself for deepcopy.
    503     mock_optimizer.__deepcopy__ = lambda _: mock_optimizer
    504     return mock_optimizer
    505 
    506   def _assert_checkpoint(self,
    507                          label_dimension,
    508                          expected_global_step,
    509                          expected_bias=None):
    510     shapes = {
    511         name: shape
    512         for (name, shape) in checkpoint_utils.list_variables(self._model_dir)
    513     }
    514 
    515     self.assertEqual([], shapes[ops.GraphKeys.GLOBAL_STEP])
    516     self.assertEqual(expected_global_step,
    517                      checkpoint_utils.load_variable(self._model_dir,
    518                                                     ops.GraphKeys.GLOBAL_STEP))
    519 
    520     self.assertEqual([label_dimension], shapes[BIAS_NAME])
    521     if expected_bias is not None:
    522       self.assertEqual(expected_bias,
    523                        checkpoint_utils.load_variable(self._model_dir,
    524                                                       BIAS_NAME))
    525 
    526   def testFromScratchWithDefaultOptimizer(self):
    527     # Create BaselineRegressor.
    528     label = 5.
    529     age = 17
    530     baseline_regressor = _baseline_regressor_fn(model_dir=self._model_dir)
    531 
    532     # Train for a few steps, and validate final checkpoint.
    533     num_steps = 10
    534     baseline_regressor.train(
    535         input_fn=lambda: ({'age': ((age,),)}, ((label,),)), steps=num_steps)
    536     self._assert_checkpoint(label_dimension=1, expected_global_step=num_steps)
    537 
    538   def testTrainWithOneDimLabel(self):
    539     label_dimension = 1
    540     batch_size = 20
    541     est = _baseline_regressor_fn(
    542         label_dimension=label_dimension,
    543         model_dir=self._model_dir)
    544     data_rank_1 = np.linspace(0., 2., batch_size, dtype=np.float32)
    545     self.assertEqual((batch_size,), data_rank_1.shape)
    546 
    547     train_input_fn = numpy_io.numpy_input_fn(
    548         x={'age': data_rank_1},
    549         y=data_rank_1,
    550         batch_size=batch_size,
    551         num_epochs=None,
    552         shuffle=True)
    553     est.train(train_input_fn, steps=200)
    554     self._assert_checkpoint(label_dimension=1, expected_global_step=200)
    555 
    556   def testTrainWithOneDimWeight(self):
    557     label_dimension = 1
    558     batch_size = 20
    559     est = _baseline_regressor_fn(
    560         label_dimension=label_dimension,
    561         weight_column='w',
    562         model_dir=self._model_dir)
    563 
    564     data_rank_1 = np.linspace(0., 2., batch_size, dtype=np.float32)
    565     self.assertEqual((batch_size,), data_rank_1.shape)
    566 
    567     train_input_fn = numpy_io.numpy_input_fn(
    568         x={'age': data_rank_1,
    569            'w': data_rank_1},
    570         y=data_rank_1,
    571         batch_size=batch_size,
    572         num_epochs=None,
    573         shuffle=True)
    574     est.train(train_input_fn, steps=200)
    575     self._assert_checkpoint(label_dimension=1, expected_global_step=200)
    576 
    577   def testFromScratch(self):
    578     # Create BaselineRegressor.
    579     label = 5.
    580     age = 17
    581     # loss = (logits - label)^2 = (0 - 5.)^2 = 25.
    582     mock_optimizer = self._mock_optimizer(expected_loss=25.)
    583     baseline_regressor = _baseline_regressor_fn(
    584         model_dir=self._model_dir,
    585         optimizer=mock_optimizer)
    586     self.assertEqual(0, mock_optimizer.minimize.call_count)
    587 
    588     # Train for a few steps, and validate optimizer and final checkpoint.
    589     num_steps = 10
    590     baseline_regressor.train(
    591         input_fn=lambda: ({'age': ((age,),)}, ((label,),)), steps=num_steps)
    592     self.assertEqual(1, mock_optimizer.minimize.call_count)
    593     self._assert_checkpoint(
    594         label_dimension=1,
    595         expected_global_step=num_steps,
    596         expected_bias=[0.])
    597 
    598   def testFromCheckpoint(self):
    599     # Create initial checkpoint.
    600     bias = 7.0
    601     initial_global_step = 100
    602     with ops.Graph().as_default():
    603       variables.Variable([bias], name=BIAS_NAME)
    604       variables.Variable(
    605           initial_global_step,
    606           name=ops.GraphKeys.GLOBAL_STEP,
    607           dtype=dtypes.int64)
    608       save_variables_to_ckpt(self._model_dir)
    609 
    610     # logits = bias = 6.
    611     # loss = (logits - label)^2 = (7 - 5)^2 = 4
    612     mock_optimizer = self._mock_optimizer(expected_loss=4.)
    613     baseline_regressor = _baseline_regressor_fn(
    614         model_dir=self._model_dir,
    615         optimizer=mock_optimizer)
    616     self.assertEqual(0, mock_optimizer.minimize.call_count)
    617 
    618     # Train for a few steps, and validate optimizer and final checkpoint.
    619     num_steps = 10
    620     baseline_regressor.train(
    621         input_fn=lambda: ({'age': ((17,),)}, ((5.,),)), steps=num_steps)
    622     self.assertEqual(1, mock_optimizer.minimize.call_count)
    623     self._assert_checkpoint(
    624         label_dimension=1,
    625         expected_global_step=initial_global_step + num_steps,
    626         expected_bias=[bias])
    627 
    628   def testFromCheckpointMultiBatch(self):
    629     # Create initial checkpoint.
    630     bias = 5.0
    631     initial_global_step = 100
    632     with ops.Graph().as_default():
    633       variables.Variable([bias], name=BIAS_NAME)
    634       variables.Variable(
    635           initial_global_step,
    636           name=ops.GraphKeys.GLOBAL_STEP,
    637           dtype=dtypes.int64)
    638       save_variables_to_ckpt(self._model_dir)
    639 
    640     # logits = bias
    641     # logits[0] = 5.
    642     # logits[1] = 5.
    643     # loss = sum(logits - label)^2 = (5 - 5)^2 + (5 - 3)^2 = 4
    644     mock_optimizer = self._mock_optimizer(expected_loss=4.)
    645     baseline_regressor = _baseline_regressor_fn(
    646         model_dir=self._model_dir,
    647         optimizer=mock_optimizer)
    648     self.assertEqual(0, mock_optimizer.minimize.call_count)
    649 
    650     # Train for a few steps, and validate optimizer and final checkpoint.
    651     num_steps = 10
    652     baseline_regressor.train(
    653         input_fn=lambda: ({'age': ((17,), (15,))}, ((5.,), (3.,))),
    654         steps=num_steps)
    655     self.assertEqual(1, mock_optimizer.minimize.call_count)
    656     self._assert_checkpoint(
    657         label_dimension=1,
    658         expected_global_step=initial_global_step + num_steps,
    659         expected_bias=bias)
    660 
    661 
    662 # Tests for Baseline Classifier.
    663 
    664 
    665 class BaselineClassifierTrainingTest(test.TestCase):
    666 
    667   def setUp(self):
    668     self._model_dir = tempfile.mkdtemp()
    669 
    670   def tearDown(self):
    671     if self._model_dir:
    672       shutil.rmtree(self._model_dir)
    673 
    674   def _mock_optimizer(self, expected_loss=None):
    675     expected_var_names = [
    676         '%s:0' % BIAS_NAME
    677     ]
    678 
    679     def _minimize(loss, global_step):
    680       trainable_vars = ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)
    681       self.assertItemsEqual(
    682           expected_var_names,
    683           [var.name for var in trainable_vars])
    684 
    685       # Verify loss. We can't check the value directly, so we add an assert op.
    686       self.assertEquals(0, loss.shape.ndims)
    687       if expected_loss is None:
    688         return state_ops.assign_add(global_step, 1).op
    689       assert_loss = assert_close(
    690           math_ops.to_float(expected_loss, name='expected'),
    691           loss,
    692           name='assert_loss')
    693       with ops.control_dependencies((assert_loss,)):
    694         return state_ops.assign_add(global_step, 1).op
    695 
    696     mock_optimizer = test.mock.NonCallableMock(
    697         spec=optimizer.Optimizer,
    698         wraps=optimizer.Optimizer(use_locking=False, name='my_optimizer'))
    699     mock_optimizer.minimize = test.mock.MagicMock(wraps=_minimize)
    700 
    701     # NOTE: Estimator.params performs a deepcopy, which wreaks havoc with mocks.
    702     # So, return mock_optimizer itself for deepcopy.
    703     mock_optimizer.__deepcopy__ = lambda _: mock_optimizer
    704     return mock_optimizer
    705 
    706   def _assert_checkpoint(
    707       self, n_classes, expected_global_step, expected_bias=None):
    708     logits_dimension = n_classes if n_classes > 2 else 1
    709 
    710     shapes = {
    711         name: shape for (name, shape) in
    712         checkpoint_utils.list_variables(self._model_dir)
    713     }
    714 
    715     self.assertEqual([], shapes[ops.GraphKeys.GLOBAL_STEP])
    716     self.assertEqual(
    717         expected_global_step,
    718         checkpoint_utils.load_variable(
    719             self._model_dir, ops.GraphKeys.GLOBAL_STEP))
    720 
    721     self.assertEqual([logits_dimension], shapes[BIAS_NAME])
    722     if expected_bias is not None:
    723       self.assertAllEqual(expected_bias,
    724                           checkpoint_utils.load_variable(
    725                               self._model_dir, BIAS_NAME))
    726 
    727   def _testFromScratchWithDefaultOptimizer(self, n_classes):
    728     label = 0
    729     age = 17
    730     est = baseline.BaselineClassifier(
    731         n_classes=n_classes,
    732         model_dir=self._model_dir)
    733 
    734     # Train for a few steps, and validate final checkpoint.
    735     num_steps = 10
    736     est.train(
    737         input_fn=lambda: ({'age': ((age,),)}, ((label,),)), steps=num_steps)
    738     self._assert_checkpoint(n_classes, num_steps)
    739 
    740   def testBinaryClassesFromScratchWithDefaultOptimizer(self):
    741     self._testFromScratchWithDefaultOptimizer(n_classes=2)
    742 
    743   def testMultiClassesFromScratchWithDefaultOptimizer(self):
    744     self._testFromScratchWithDefaultOptimizer(n_classes=4)
    745 
    746   def _testTrainWithTwoDimsLabel(self, n_classes):
    747     batch_size = 20
    748 
    749     est = baseline.BaselineClassifier(
    750         n_classes=n_classes,
    751         model_dir=self._model_dir)
    752     data_rank_1 = np.array([0, 1])
    753     data_rank_2 = np.array([[0], [1]])
    754     self.assertEqual((2,), data_rank_1.shape)
    755     self.assertEqual((2, 1), data_rank_2.shape)
    756 
    757     train_input_fn = numpy_io.numpy_input_fn(
    758         x={'age': data_rank_1},
    759         y=data_rank_2,
    760         batch_size=batch_size,
    761         num_epochs=None,
    762         shuffle=True)
    763     est.train(train_input_fn, steps=200)
    764     self._assert_checkpoint(n_classes, 200)
    765 
    766   def testBinaryClassesTrainWithTwoDimsLabel(self):
    767     self._testTrainWithTwoDimsLabel(n_classes=2)
    768 
    769   def testMultiClassesTrainWithTwoDimsLabel(self):
    770     self._testTrainWithTwoDimsLabel(n_classes=4)
    771 
    772   def _testTrainWithOneDimLabel(self, n_classes):
    773     batch_size = 20
    774 
    775     est = baseline.BaselineClassifier(
    776         n_classes=n_classes,
    777         model_dir=self._model_dir)
    778     data_rank_1 = np.array([0, 1])
    779     self.assertEqual((2,), data_rank_1.shape)
    780 
    781     train_input_fn = numpy_io.numpy_input_fn(
    782         x={'age': data_rank_1},
    783         y=data_rank_1,
    784         batch_size=batch_size,
    785         num_epochs=None,
    786         shuffle=True)
    787     est.train(train_input_fn, steps=200)
    788     self._assert_checkpoint(n_classes, 200)
    789 
    790   def testBinaryClassesTrainWithOneDimLabel(self):
    791     self._testTrainWithOneDimLabel(n_classes=2)
    792 
    793   def testMultiClassesTrainWithOneDimLabel(self):
    794     self._testTrainWithOneDimLabel(n_classes=4)
    795 
    796   def _testTrainWithTwoDimsWeight(self, n_classes):
    797     batch_size = 20
    798 
    799     est = baseline.BaselineClassifier(
    800         weight_column='w',
    801         n_classes=n_classes,
    802         model_dir=self._model_dir)
    803     data_rank_1 = np.array([0, 1])
    804     data_rank_2 = np.array([[0], [1]])
    805     self.assertEqual((2,), data_rank_1.shape)
    806     self.assertEqual((2, 1), data_rank_2.shape)
    807 
    808     train_input_fn = numpy_io.numpy_input_fn(
    809         x={'age': data_rank_1, 'w': data_rank_2}, y=data_rank_1,
    810         batch_size=batch_size, num_epochs=None,
    811         shuffle=True)
    812     est.train(train_input_fn, steps=200)
    813     self._assert_checkpoint(n_classes, 200)
    814 
    815   def testBinaryClassesTrainWithTwoDimsWeight(self):
    816     self._testTrainWithTwoDimsWeight(n_classes=2)
    817 
    818   def testMultiClassesTrainWithTwoDimsWeight(self):
    819     self._testTrainWithTwoDimsWeight(n_classes=4)
    820 
    821   def _testTrainWithOneDimWeight(self, n_classes):
    822     batch_size = 20
    823 
    824     est = baseline.BaselineClassifier(
    825         weight_column='w',
    826         n_classes=n_classes,
    827         model_dir=self._model_dir)
    828     data_rank_1 = np.array([0, 1])
    829     self.assertEqual((2,), data_rank_1.shape)
    830 
    831     train_input_fn = numpy_io.numpy_input_fn(
    832         x={'age': data_rank_1, 'w': data_rank_1}, y=data_rank_1,
    833         batch_size=batch_size, num_epochs=None,
    834         shuffle=True)
    835     est.train(train_input_fn, steps=200)
    836     self._assert_checkpoint(n_classes, 200)
    837 
    838   def testBinaryClassesTrainWithOneDimWeight(self):
    839     self._testTrainWithOneDimWeight(n_classes=2)
    840 
    841   def testMultiClassesTrainWithOneDimWeight(self):
    842     self._testTrainWithOneDimWeight(n_classes=4)
    843 
    844   def _testFromScratch(self, n_classes):
    845     label = 1
    846     age = 17
    847     # For binary classifier:
    848     #   loss = sigmoid_cross_entropy(logits, label) where logits=0 (weights are
    849     #   all zero initially) and label = 1 so,
    850     #      loss = 1 * -log ( sigmoid(logits) ) = 0.69315
    851     # For multi class classifier:
    852     #   loss = cross_entropy(logits, label) where logits are all 0s (weights are
    853     #   all zero initially) and label = 1 so,
    854     #      loss = 1 * -log ( 1.0 / n_classes )
    855     # For this particular test case, as logits are same, the formula
    856     # 1 * -log ( 1.0 / n_classes ) covers both binary and multi class cases.
    857     mock_optimizer = self._mock_optimizer(
    858         expected_loss=-1 * math.log(1.0/n_classes))
    859 
    860     est = baseline.BaselineClassifier(
    861         n_classes=n_classes,
    862         optimizer=mock_optimizer,
    863         model_dir=self._model_dir)
    864     self.assertEqual(0, mock_optimizer.minimize.call_count)
    865 
    866     # Train for a few steps, and validate optimizer and final checkpoint.
    867     num_steps = 10
    868     est.train(
    869         input_fn=lambda: ({'age': ((age,),)}, ((label,),)), steps=num_steps)
    870     self.assertEqual(1, mock_optimizer.minimize.call_count)
    871     self._assert_checkpoint(
    872         n_classes,
    873         expected_global_step=num_steps,
    874         expected_bias=[0.] if n_classes == 2 else [.0] * n_classes)
    875 
    876   def testBinaryClassesFromScratch(self):
    877     self._testFromScratch(n_classes=2)
    878 
    879   def testMultiClassesFromScratch(self):
    880     self._testFromScratch(n_classes=4)
    881 
    882   def _testFromCheckpoint(self, n_classes):
    883     # Create initial checkpoint.
    884     label = 1
    885     age = 17
    886     bias = [-1.0] if n_classes == 2 else [-1.0] * n_classes
    887     initial_global_step = 100
    888     with ops.Graph().as_default():
    889       variables.Variable(bias, name=BIAS_NAME)
    890       variables.Variable(
    891           initial_global_step, name=ops.GraphKeys.GLOBAL_STEP,
    892           dtype=dtypes.int64)
    893       save_variables_to_ckpt(self._model_dir)
    894 
    895     # For binary classifier:
    896     #   logits = bias = -1.
    897     #   loss = sigmoid_cross_entropy(logits, label)
    898     #   so, loss = 1 * -log ( sigmoid(-1) ) = 1.3133
    899     # For multi class classifier:
    900     #   loss = cross_entropy(logits, label)
    901     #   where logits = bias and label = 1
    902     #   so, loss = 1 * -log ( softmax(logits)[1] )
    903     if n_classes == 2:
    904       expected_loss = 1.3133
    905     else:
    906       logits = bias
    907       logits_exp = np.exp(logits)
    908       softmax = logits_exp / logits_exp.sum()
    909       expected_loss = -1 * math.log(softmax[label])
    910 
    911     mock_optimizer = self._mock_optimizer(expected_loss=expected_loss)
    912 
    913     est = baseline.BaselineClassifier(
    914         n_classes=n_classes,
    915         optimizer=mock_optimizer,
    916         model_dir=self._model_dir)
    917     self.assertEqual(0, mock_optimizer.minimize.call_count)
    918 
    919     # Train for a few steps, and validate optimizer and final checkpoint.
    920     num_steps = 10
    921     est.train(
    922         input_fn=lambda: ({'age': ((age,),)}, ((label,),)), steps=num_steps)
    923     self.assertEqual(1, mock_optimizer.minimize.call_count)
    924     self._assert_checkpoint(
    925         n_classes,
    926         expected_global_step=initial_global_step + num_steps,
    927         expected_bias=bias)
    928 
    929   def testBinaryClassesFromCheckpoint(self):
    930     self._testFromCheckpoint(n_classes=2)
    931 
    932   def testMultiClassesFromCheckpoint(self):
    933     self._testFromCheckpoint(n_classes=4)
    934 
    935   def _testFromCheckpointFloatLabels(self, n_classes):
    936     """Tests float labels for binary classification."""
    937     # Create initial checkpoint.
    938     if n_classes > 2:
    939       return
    940     label = 0.8
    941     age = 17
    942     bias = [-1.0]
    943     initial_global_step = 100
    944     with ops.Graph().as_default():
    945       variables.Variable(bias, name=BIAS_NAME)
    946       variables.Variable(
    947           initial_global_step, name=ops.GraphKeys.GLOBAL_STEP,
    948           dtype=dtypes.int64)
    949       save_variables_to_ckpt(self._model_dir)
    950 
    951     # logits = bias = -1.
    952     # loss = sigmoid_cross_entropy(logits, label)
    953     # => loss = -0.8 * log(sigmoid(-1)) -0.2 * log(sigmoid(+1)) = 1.1132617
    954     mock_optimizer = self._mock_optimizer(expected_loss=1.1132617)
    955 
    956     est = baseline.BaselineClassifier(
    957         n_classes=n_classes,
    958         optimizer=mock_optimizer,
    959         model_dir=self._model_dir)
    960     self.assertEqual(0, mock_optimizer.minimize.call_count)
    961 
    962     # Train for a few steps, and validate optimizer and final checkpoint.
    963     num_steps = 10
    964     est.train(
    965         input_fn=lambda: ({'age': ((age,),)}, ((label,),)), steps=num_steps)
    966     self.assertEqual(1, mock_optimizer.minimize.call_count)
    967 
    968   def testBinaryClassesFromCheckpointFloatLabels(self):
    969     self._testFromCheckpointFloatLabels(n_classes=2)
    970 
    971   def testMultiClassesFromCheckpointFloatLabels(self):
    972     self._testFromCheckpointFloatLabels(n_classes=4)
    973 
    974   def _testFromCheckpointMultiBatch(self, n_classes):
    975     # Create initial checkpoint.
    976     label = [1, 0]
    977     age = [17, 18.5]
    978     # For binary case, the expected weight has shape (1,1). For multi class
    979     # case, the shape is (1, n_classes). In order to test the weights, set
    980     # weights as 2.0 * range(n_classes).
    981     bias = [-1.0] if n_classes == 2 else [-1.0] * n_classes
    982     initial_global_step = 100
    983     with ops.Graph().as_default():
    984       variables.Variable(bias, name=BIAS_NAME)
    985       variables.Variable(
    986           initial_global_step, name=ops.GraphKeys.GLOBAL_STEP,
    987           dtype=dtypes.int64)
    988       save_variables_to_ckpt(self._model_dir)
    989 
    990     # For binary classifier:
    991     #   logits = bias
    992     #   logits[0] = -1.
    993     #   logits[1] = -1.
    994     #   loss = sigmoid_cross_entropy(logits, label)
    995     #   so, loss[0] = 1 * -log ( sigmoid(-1) ) = 1.3133
    996     #       loss[1] = (1 - 0) * -log ( 1- sigmoid(-1) ) = 0.3132
    997     # For multi class classifier:
    998     #   loss = cross_entropy(logits, label)
    999     #   where logits = bias and label = [1, 0]
   1000     #   so, loss = 1 * -log ( softmax(logits)[label] )
   1001     if n_classes == 2:
   1002       expected_loss = (1.3133 + 0.3132)
   1003     else:
   1004       # Expand logits since batch_size=2
   1005       logits = bias * np.ones(shape=(2, 1))
   1006       logits_exp = np.exp(logits)
   1007       softmax_row_0 = logits_exp[0] / logits_exp[0].sum()
   1008       softmax_row_1 = logits_exp[1] / logits_exp[1].sum()
   1009       expected_loss_0 = -1 * math.log(softmax_row_0[label[0]])
   1010       expected_loss_1 = -1 * math.log(softmax_row_1[label[1]])
   1011       expected_loss = expected_loss_0 + expected_loss_1
   1012 
   1013     mock_optimizer = self._mock_optimizer(expected_loss=expected_loss)
   1014 
   1015     est = baseline.BaselineClassifier(
   1016         n_classes=n_classes,
   1017         optimizer=mock_optimizer,
   1018         model_dir=self._model_dir)
   1019     self.assertEqual(0, mock_optimizer.minimize.call_count)
   1020 
   1021     # Train for a few steps, and validate optimizer and final checkpoint.
   1022     num_steps = 10
   1023     est.train(
   1024         input_fn=lambda: ({'age': (age)}, (label)),
   1025         steps=num_steps)
   1026     self.assertEqual(1, mock_optimizer.minimize.call_count)
   1027     self._assert_checkpoint(
   1028         n_classes,
   1029         expected_global_step=initial_global_step + num_steps,
   1030         expected_bias=bias)
   1031 
   1032   def testBinaryClassesFromCheckpointMultiBatch(self):
   1033     self._testFromCheckpointMultiBatch(n_classes=2)
   1034 
   1035   def testMultiClassesFromCheckpointMultiBatch(self):
   1036     self._testFromCheckpointMultiBatch(n_classes=4)
   1037 
   1038 
   1039 class BaselineClassifierEvaluationTest(test.TestCase):
   1040 
   1041   def setUp(self):
   1042     self._model_dir = tempfile.mkdtemp()
   1043 
   1044   def tearDown(self):
   1045     if self._model_dir:
   1046       shutil.rmtree(self._model_dir)
   1047 
   1048   def _test_evaluation_for_simple_data(self, n_classes):
   1049     label = 1
   1050     age = 1.
   1051 
   1052     bias = [-1.0] if n_classes == 2 else [-1.0] * n_classes
   1053 
   1054     with ops.Graph().as_default():
   1055       variables.Variable(bias, name=BIAS_NAME)
   1056       variables.Variable(
   1057           100, name=ops.GraphKeys.GLOBAL_STEP, dtype=dtypes.int64)
   1058       save_variables_to_ckpt(self._model_dir)
   1059 
   1060     est = _baseline_classifier_fn(
   1061         n_classes=n_classes,
   1062         model_dir=self._model_dir)
   1063     eval_metrics = est.evaluate(
   1064         input_fn=lambda: ({'age': ((age,),)}, ((label,),)), steps=1)
   1065 
   1066     if n_classes == 2:
   1067       # Binary classes: loss = -log(sigmoid(-1)) = 1.3133
   1068       # Prediction = sigmoid(-1) = 0.2689
   1069       expected_metrics = {
   1070           metric_keys.MetricKeys.LOSS: 1.3133,
   1071           ops.GraphKeys.GLOBAL_STEP: 100,
   1072           metric_keys.MetricKeys.LOSS_MEAN: 1.3133,
   1073           metric_keys.MetricKeys.ACCURACY: 0.,
   1074           metric_keys.MetricKeys.PREDICTION_MEAN: 0.2689,
   1075           metric_keys.MetricKeys.LABEL_MEAN: 1.,
   1076           metric_keys.MetricKeys.ACCURACY_BASELINE: 1,
   1077           metric_keys.MetricKeys.AUC: 0.,
   1078           metric_keys.MetricKeys.AUC_PR: 0.5,
   1079       }
   1080     else:
   1081       # Multi classes: loss = 1 * -log ( softmax(logits)[label] )
   1082       logits = bias
   1083       logits_exp = np.exp(logits)
   1084       softmax = logits_exp / logits_exp.sum()
   1085       expected_loss = -1 * math.log(softmax[label])
   1086 
   1087       expected_metrics = {
   1088           metric_keys.MetricKeys.LOSS: expected_loss,
   1089           ops.GraphKeys.GLOBAL_STEP: 100,
   1090           metric_keys.MetricKeys.LOSS_MEAN: expected_loss,
   1091           metric_keys.MetricKeys.ACCURACY: 0.,
   1092       }
   1093 
   1094     self.assertAllClose(sorted_key_dict(expected_metrics),
   1095                         sorted_key_dict(eval_metrics), rtol=1e-3)
   1096 
   1097   def test_binary_classes_evaluation_for_simple_data(self):
   1098     self._test_evaluation_for_simple_data(n_classes=2)
   1099 
   1100   def test_multi_classes_evaluation_for_simple_data(self):
   1101     self._test_evaluation_for_simple_data(n_classes=4)
   1102 
   1103   def _test_evaluation_batch(self, n_classes):
   1104     """Tests evaluation for batch_size==2."""
   1105     label = [1, 0]
   1106     age = [17., 18.]
   1107     bias = [-1.0] if n_classes == 2 else [-1.0] * n_classes
   1108     initial_global_step = 100
   1109     with ops.Graph().as_default():
   1110       variables.Variable(bias, name=BIAS_NAME)
   1111       variables.Variable(
   1112           initial_global_step, name=ops.GraphKeys.GLOBAL_STEP,
   1113           dtype=dtypes.int64)
   1114       save_variables_to_ckpt(self._model_dir)
   1115 
   1116     est = _baseline_classifier_fn(
   1117         n_classes=n_classes,
   1118         model_dir=self._model_dir)
   1119     eval_metrics = est.evaluate(
   1120         input_fn=lambda: ({'age': (age)}, (label)), steps=1)
   1121 
   1122     if n_classes == 2:
   1123       # Logits are (-1., -1.) labels are (1, 0).
   1124       # Loss is
   1125       #   loss for row 1: 1 * -log(sigmoid(-1)) = 1.3133
   1126       #   loss for row 2: (1 - 0) * -log(1 - sigmoid(-1)) = 0.3132
   1127       # Prediction = sigmoid(-1) = 0.2689
   1128       expected_loss = 1.3133 + 0.3132
   1129 
   1130       expected_metrics = {
   1131           metric_keys.MetricKeys.LOSS: expected_loss,
   1132           ops.GraphKeys.GLOBAL_STEP: 100,
   1133           metric_keys.MetricKeys.LOSS_MEAN: expected_loss / 2,
   1134           metric_keys.MetricKeys.ACCURACY: 0.5,
   1135           metric_keys.MetricKeys.PREDICTION_MEAN: 0.2689,
   1136           metric_keys.MetricKeys.LABEL_MEAN: 0.5,
   1137           metric_keys.MetricKeys.ACCURACY_BASELINE: 0.5,
   1138           metric_keys.MetricKeys.AUC: 0.5,
   1139           metric_keys.MetricKeys.AUC_PR: 0.25,
   1140       }
   1141     else:
   1142       # Expand logits since batch_size=2
   1143       logits = bias * np.ones(shape=(2, 1))
   1144       logits_exp = np.exp(logits)
   1145       softmax_row_0 = logits_exp[0] / logits_exp[0].sum()
   1146       softmax_row_1 = logits_exp[1] / logits_exp[1].sum()
   1147       expected_loss_0 = -1 * math.log(softmax_row_0[label[0]])
   1148       expected_loss_1 = -1 * math.log(softmax_row_1[label[1]])
   1149       expected_loss = expected_loss_0 + expected_loss_1
   1150 
   1151       expected_metrics = {
   1152           metric_keys.MetricKeys.LOSS: expected_loss,
   1153           ops.GraphKeys.GLOBAL_STEP: 100,
   1154           metric_keys.MetricKeys.LOSS_MEAN: expected_loss / 2,
   1155           metric_keys.MetricKeys.ACCURACY: 0.5,
   1156       }
   1157 
   1158     self.assertAllClose(sorted_key_dict(expected_metrics),
   1159                         sorted_key_dict(eval_metrics), rtol=1e-3)
   1160 
   1161   def test_binary_classes_evaluation_batch(self):
   1162     self._test_evaluation_batch(n_classes=2)
   1163 
   1164   def test_multi_classes_evaluation_batch(self):
   1165     self._test_evaluation_batch(n_classes=4)
   1166 
   1167   def _test_evaluation_weights(self, n_classes):
   1168     """Tests evaluation with weights."""
   1169 
   1170     label = [1, 0]
   1171     age = [17., 18.]
   1172     weights = [1., 2.]
   1173     # For binary case, the expected weight has shape (1,1). For multi class
   1174     # case, the shape is (1, n_classes). In order to test the weights, set
   1175     # weights as 2.0 * range(n_classes).
   1176     bias = [-1.0] if n_classes == 2 else [-1.0] * n_classes
   1177     initial_global_step = 100
   1178     with ops.Graph().as_default():
   1179       variables.Variable(bias, name=BIAS_NAME)
   1180       variables.Variable(
   1181           initial_global_step, name=ops.GraphKeys.GLOBAL_STEP,
   1182           dtype=dtypes.int64)
   1183       save_variables_to_ckpt(self._model_dir)
   1184 
   1185     est = _baseline_classifier_fn(
   1186         n_classes=n_classes,
   1187         weight_column='w',
   1188         model_dir=self._model_dir)
   1189     eval_metrics = est.evaluate(
   1190         input_fn=lambda: ({'age': (age), 'w': (weights)}, (label)), steps=1)
   1191 
   1192     if n_classes == 2:
   1193       # Logits are (-1., -1.) labels are (1, 0).
   1194       # Loss is
   1195       #   loss for row 1: 1 * -log(sigmoid(-1)) = 1.3133
   1196       #   loss for row 2: (1 - 0) * -log(1 - sigmoid(-1)) = 0.3132
   1197       #   weights = [1., 2.]
   1198       expected_loss = 1.3133 * 1. + 0.3132 * 2.
   1199       loss_mean = expected_loss / (1.0 + 2.0)
   1200       label_mean = np.average(label, weights=weights)
   1201       logits = [-1, -1]
   1202       logistics = sigmoid(np.array(logits))
   1203       predictions_mean = np.average(logistics, weights=weights)
   1204 
   1205       expected_metrics = {
   1206           metric_keys.MetricKeys.LOSS: expected_loss,
   1207           ops.GraphKeys.GLOBAL_STEP: 100,
   1208           metric_keys.MetricKeys.LOSS_MEAN: loss_mean,
   1209           metric_keys.MetricKeys.ACCURACY: 2. / (1. + 2.),
   1210           metric_keys.MetricKeys.PREDICTION_MEAN: predictions_mean,
   1211           metric_keys.MetricKeys.LABEL_MEAN: label_mean,
   1212           metric_keys.MetricKeys.ACCURACY_BASELINE: (
   1213               max(label_mean, 1-label_mean)),
   1214           metric_keys.MetricKeys.AUC: 0.5,
   1215           metric_keys.MetricKeys.AUC_PR: 0.16666645,
   1216       }
   1217     else:
   1218       # Multi classes: unweighted_loss = 1 * -log ( soft_max(logits)[label] )
   1219       # Expand logits since batch_size=2
   1220       logits = bias * np.ones(shape=(2, 1))
   1221       logits_exp = np.exp(logits)
   1222       softmax_row_0 = logits_exp[0] / logits_exp[0].sum()
   1223       softmax_row_1 = logits_exp[1] / logits_exp[1].sum()
   1224       expected_loss_0 = -1 * math.log(softmax_row_0[label[0]])
   1225       expected_loss_1 = -1 * math.log(softmax_row_1[label[1]])
   1226       loss_mean = np.average([expected_loss_0, expected_loss_1],
   1227                              weights=weights)
   1228       expected_loss = loss_mean * np.sum(weights)
   1229 
   1230       expected_metrics = {
   1231           metric_keys.MetricKeys.LOSS: expected_loss,
   1232           ops.GraphKeys.GLOBAL_STEP: 100,
   1233           metric_keys.MetricKeys.LOSS_MEAN: loss_mean,
   1234           metric_keys.MetricKeys.ACCURACY: 2. / (1. + 2.),
   1235       }
   1236 
   1237     self.assertAllClose(sorted_key_dict(expected_metrics),
   1238                         sorted_key_dict(eval_metrics), rtol=1e-3)
   1239 
   1240   def test_binary_classes_evaluation_weights(self):
   1241     self._test_evaluation_weights(n_classes=2)
   1242 
   1243   def test_multi_classes_evaluation_weights(self):
   1244     self._test_evaluation_weights(n_classes=4)
   1245 
   1246 
   1247 class BaselineClassifierPredictTest(test.TestCase):
   1248 
   1249   def setUp(self):
   1250     self._model_dir = tempfile.mkdtemp()
   1251 
   1252   def tearDown(self):
   1253     if self._model_dir:
   1254       shutil.rmtree(self._model_dir)
   1255 
   1256   def _testPredictions(self, n_classes, label_vocabulary, label_output_fn):
   1257     """Tests predict when all variables are one-dimensional."""
   1258     age = 1.
   1259 
   1260     bias = [10.0] if n_classes == 2 else [10.0] * n_classes
   1261 
   1262     with ops.Graph().as_default():
   1263       variables.Variable(bias, name=BIAS_NAME)
   1264       variables.Variable(100, name='global_step', dtype=dtypes.int64)
   1265       save_variables_to_ckpt(self._model_dir)
   1266 
   1267     est = _baseline_classifier_fn(
   1268         label_vocabulary=label_vocabulary,
   1269         n_classes=n_classes,
   1270         model_dir=self._model_dir)
   1271 
   1272     predict_input_fn = numpy_io.numpy_input_fn(
   1273         x={'age': np.array([[age]])},
   1274         y=None,
   1275         batch_size=1,
   1276         num_epochs=1,
   1277         shuffle=False)
   1278     predictions = list(est.predict(input_fn=predict_input_fn))
   1279 
   1280     if n_classes == 2:
   1281       scalar_logits = bias[0]
   1282       two_classes_logits = [0, scalar_logits]
   1283       two_classes_logits_exp = np.exp(two_classes_logits)
   1284       softmax = two_classes_logits_exp / two_classes_logits_exp.sum()
   1285 
   1286       expected_predictions = {
   1287           'class_ids': [1],
   1288           'classes': [label_output_fn(1)],
   1289           'logistic': [sigmoid(np.array(scalar_logits))],
   1290           'logits': [scalar_logits],
   1291           'probabilities': softmax,
   1292       }
   1293     else:
   1294       onedim_logits = np.array(bias)
   1295       class_ids = onedim_logits.argmax()
   1296       logits_exp = np.exp(onedim_logits)
   1297       softmax = logits_exp / logits_exp.sum()
   1298       expected_predictions = {
   1299           'class_ids': [class_ids],
   1300           'classes': [label_output_fn(class_ids)],
   1301           'logits': onedim_logits,
   1302           'probabilities': softmax,
   1303       }
   1304 
   1305     self.assertEqual(1, len(predictions))
   1306     # assertAllClose cannot handle byte type.
   1307     self.assertEqual(expected_predictions['classes'], predictions[0]['classes'])
   1308     expected_predictions.pop('classes')
   1309     predictions[0].pop('classes')
   1310     self.assertAllClose(sorted_key_dict(expected_predictions),
   1311                         sorted_key_dict(predictions[0]))
   1312 
   1313   def testBinaryClassesWithoutLabelVocabulary(self):
   1314     n_classes = 2
   1315     self._testPredictions(n_classes,
   1316                           label_vocabulary=None,
   1317                           label_output_fn=lambda x: ('%s' % x).encode())
   1318 
   1319   def testBinaryClassesWithLabelVocabulary(self):
   1320     n_classes = 2
   1321     self._testPredictions(
   1322         n_classes,
   1323         label_vocabulary=['class_vocab_{}'.format(i)
   1324                           for i in range(n_classes)],
   1325         label_output_fn=lambda x: ('class_vocab_%s' % x).encode())
   1326 
   1327   def testMultiClassesWithoutLabelVocabulary(self):
   1328     n_classes = 4
   1329     self._testPredictions(
   1330         n_classes,
   1331         label_vocabulary=None,
   1332         label_output_fn=lambda x: ('%s' % x).encode())
   1333 
   1334   def testMultiClassesWithLabelVocabulary(self):
   1335     n_classes = 4
   1336     self._testPredictions(
   1337         n_classes,
   1338         label_vocabulary=['class_vocab_{}'.format(i)
   1339                           for i in range(n_classes)],
   1340         label_output_fn=lambda x: ('class_vocab_%s' % x).encode())
   1341 
   1342 
   1343 class BaselineClassifierIntegrationTest(test.TestCase):
   1344 
   1345   def setUp(self):
   1346     self._model_dir = tempfile.mkdtemp()
   1347 
   1348   def tearDown(self):
   1349     if self._model_dir:
   1350       shutil.rmtree(self._model_dir)
   1351 
   1352   def _test_complete_flow(self, n_classes, train_input_fn, eval_input_fn,
   1353                           predict_input_fn, input_dimension, prediction_length):
   1354     feature_columns = [
   1355         feature_column_lib.numeric_column('x', shape=(input_dimension,))
   1356     ]
   1357     est = _baseline_classifier_fn(
   1358         n_classes=n_classes,
   1359         model_dir=self._model_dir)
   1360 
   1361     # TRAIN
   1362     # learn y = x
   1363     est.train(train_input_fn, steps=200)
   1364 
   1365     # EVALUTE
   1366     scores = est.evaluate(eval_input_fn)
   1367     self.assertEqual(200, scores[ops.GraphKeys.GLOBAL_STEP])
   1368     self.assertIn(metric_keys.MetricKeys.LOSS, six.iterkeys(scores))
   1369 
   1370     # PREDICT
   1371     predictions = np.array(
   1372         [x['classes'] for x in est.predict(predict_input_fn)])
   1373     self.assertAllEqual((prediction_length, 1), predictions.shape)
   1374 
   1375     # EXPORT
   1376     feature_spec = feature_column_lib.make_parse_example_spec(feature_columns)
   1377     serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
   1378         feature_spec)
   1379     export_dir = est.export_savedmodel(tempfile.mkdtemp(),
   1380                                        serving_input_receiver_fn)
   1381     self.assertTrue(gfile.Exists(export_dir))
   1382 
   1383   def _test_numpy_input_fn(self, n_classes):
   1384     """Tests complete flow with numpy_input_fn."""
   1385     input_dimension = 4
   1386     batch_size = 10
   1387     prediction_length = batch_size
   1388     data = np.linspace(0., 2., batch_size * input_dimension, dtype=np.float32)
   1389     data = data.reshape(batch_size, input_dimension)
   1390     target = np.array([1] * batch_size)
   1391 
   1392     train_input_fn = numpy_io.numpy_input_fn(
   1393         x={'x': data},
   1394         y=target,
   1395         batch_size=batch_size,
   1396         num_epochs=None,
   1397         shuffle=True)
   1398     eval_input_fn = numpy_io.numpy_input_fn(
   1399         x={'x': data},
   1400         y=target,
   1401         batch_size=batch_size,
   1402         num_epochs=1,
   1403         shuffle=False)
   1404     predict_input_fn = numpy_io.numpy_input_fn(
   1405         x={'x': data},
   1406         y=None,
   1407         batch_size=batch_size,
   1408         num_epochs=1,
   1409         shuffle=False)
   1410 
   1411     self._test_complete_flow(
   1412         n_classes=n_classes,
   1413         train_input_fn=train_input_fn,
   1414         eval_input_fn=eval_input_fn,
   1415         predict_input_fn=predict_input_fn,
   1416         input_dimension=input_dimension,
   1417         prediction_length=prediction_length)
   1418 
   1419   def test_binary_classes_numpy_input_fn(self):
   1420     self._test_numpy_input_fn(n_classes=2)
   1421 
   1422   def test_multi_classes_numpy_input_fn(self):
   1423     self._test_numpy_input_fn(n_classes=4)
   1424 
   1425   def _test_pandas_input_fn(self, n_classes):
   1426     """Tests complete flow with pandas_input_fn."""
   1427     if not HAS_PANDAS:
   1428       return
   1429 
   1430     # Pandas DataFrame natually supports 1 dim data only.
   1431     input_dimension = 1
   1432     batch_size = 10
   1433     data = np.array([1., 2., 3., 4.], dtype=np.float32)
   1434     target = np.array([1, 0, 1, 0], dtype=np.int32)
   1435     x = pd.DataFrame({'x': data})
   1436     y = pd.Series(target)
   1437     prediction_length = 4
   1438 
   1439     train_input_fn = pandas_io.pandas_input_fn(
   1440         x=x, y=y, batch_size=batch_size, num_epochs=None, shuffle=True)
   1441     eval_input_fn = pandas_io.pandas_input_fn(
   1442         x=x, y=y, batch_size=batch_size, shuffle=False)
   1443     predict_input_fn = pandas_io.pandas_input_fn(
   1444         x=x, batch_size=batch_size, shuffle=False)
   1445 
   1446     self._test_complete_flow(
   1447         n_classes=n_classes,
   1448         train_input_fn=train_input_fn,
   1449         eval_input_fn=eval_input_fn,
   1450         predict_input_fn=predict_input_fn,
   1451         input_dimension=input_dimension,
   1452         prediction_length=prediction_length)
   1453 
   1454   def test_binary_classes_pandas_input_fn(self):
   1455     self._test_pandas_input_fn(n_classes=2)
   1456 
   1457   def test_multi_classes_pandas_input_fn(self):
   1458     self._test_pandas_input_fn(n_classes=4)
   1459 
   1460   def _test_input_fn_from_parse_example(self, n_classes):
   1461     """Tests complete flow with input_fn constructed from parse_example."""
   1462     input_dimension = 2
   1463     batch_size = 10
   1464     prediction_length = batch_size
   1465     data = np.linspace(0., 2., batch_size * input_dimension, dtype=np.float32)
   1466     data = data.reshape(batch_size, input_dimension)
   1467     target = np.array([1] * batch_size, dtype=np.int64)
   1468 
   1469     serialized_examples = []
   1470     for x, y in zip(data, target):
   1471       example = example_pb2.Example(features=feature_pb2.Features(
   1472           feature={
   1473               'x':
   1474                   feature_pb2.Feature(float_list=feature_pb2.FloatList(
   1475                       value=x)),
   1476               'y':
   1477                   feature_pb2.Feature(int64_list=feature_pb2.Int64List(
   1478                       value=[y])),
   1479           }))
   1480       serialized_examples.append(example.SerializeToString())
   1481 
   1482     feature_spec = {
   1483         'x': parsing_ops.FixedLenFeature([input_dimension], dtypes.float32),
   1484         'y': parsing_ops.FixedLenFeature([1], dtypes.int64),
   1485     }
   1486 
   1487     def _train_input_fn():
   1488       feature_map = parsing_ops.parse_example(serialized_examples, feature_spec)
   1489       features = queue_parsed_features(feature_map)
   1490       labels = features.pop('y')
   1491       return features, labels
   1492 
   1493     def _eval_input_fn():
   1494       feature_map = parsing_ops.parse_example(
   1495           input_lib.limit_epochs(serialized_examples, num_epochs=1),
   1496           feature_spec)
   1497       features = queue_parsed_features(feature_map)
   1498       labels = features.pop('y')
   1499       return features, labels
   1500 
   1501     def _predict_input_fn():
   1502       feature_map = parsing_ops.parse_example(
   1503           input_lib.limit_epochs(serialized_examples, num_epochs=1),
   1504           feature_spec)
   1505       features = queue_parsed_features(feature_map)
   1506       features.pop('y')
   1507       return features, None
   1508 
   1509     self._test_complete_flow(
   1510         n_classes=n_classes,
   1511         train_input_fn=_train_input_fn,
   1512         eval_input_fn=_eval_input_fn,
   1513         predict_input_fn=_predict_input_fn,
   1514         input_dimension=input_dimension,
   1515         prediction_length=prediction_length)
   1516 
   1517   def test_binary_classes_input_fn_from_parse_example(self):
   1518     self._test_input_fn_from_parse_example(n_classes=2)
   1519 
   1520   def test_multi_classes_input_fn_from_parse_example(self):
   1521     self._test_input_fn_from_parse_example(n_classes=4)
   1522 
   1523 
   1524 # Tests for Baseline logit_fn.
   1525 
   1526 
   1527 class BaselineLogitFnTest(test.TestCase):
   1528 
   1529   def test_basic_logit_correctness(self):
   1530     """baseline_logit_fn simply returns the bias variable."""
   1531     with ops.Graph().as_default():
   1532       logit_fn = baseline._baseline_logit_fn_builder(num_outputs=2)
   1533       logits = logit_fn(features={'age': [[23.], [31.]]})
   1534       with variable_scope.variable_scope('baseline', reuse=True):
   1535         bias_var = variable_scope.get_variable('bias')
   1536       with tf_session.Session() as sess:
   1537         sess.run([variables.global_variables_initializer()])
   1538         self.assertAllClose([[0., 0.], [0., 0.]], logits.eval())
   1539         sess.run(bias_var.assign([10., 5.]))
   1540         self.assertAllClose([[10., 5.], [10., 5.]], logits.eval())
   1541 
   1542 
   1543 if __name__ == '__main__':
   1544   test.main()
   1545 
   1546