Home | History | Annotate | Download | only in slim
      1 # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
      2 #
      3 # Licensed under the Apache License, Version 2.0 (the "License");
      4 # you may not use this file except in compliance with the License.
      5 # You may obtain a copy of the License at
      6 #
      7 #     http://www.apache.org/licenses/LICENSE-2.0
      8 #
      9 # Unless required by applicable law or agreed to in writing, software
     10 # distributed under the License is distributed on an "AS IS" BASIS,
     11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 # See the License for the specific language governing permissions and
     13 # limitations under the License.
     14 # ==============================================================================
     15 """Contains functions for evaluation and summarization of metrics.
     16 
     17 The evaluation.py module contains helper functions for evaluating TensorFlow
     18 modules using a variety of metrics and summarizing the results.
     19 
     20 **********************
     21 * Evaluating Metrics *
     22 **********************
     23 
     24 In the simplest use case, we use a model to create the predictions, then specify
     25 the metrics and finally call the `evaluation` method:
     26 
     27   # Create model and obtain the predictions:
     28   images, labels = LoadData(...)
     29   predictions = MyModel(images)
     30 
     31   # Choose the metrics to compute:
     32   names_to_values, names_to_updates = slim.metrics.aggregate_metric_map({
     33       "accuracy": slim.metrics.accuracy(predictions, labels),
     34       "mse": slim.metrics.mean_squared_error(predictions, labels),
     35   })
     36 
     37   initial_op = tf.group(
     38       tf.global_variables_initializer(),
     39       tf.local_variables_initializer())
     40 
     41   with tf.Session() as sess:
     42     metric_values = slim.evaluation(
     43         sess,
     44         num_evals=1,
     45         initial_op=initial_op,
     46         eval_op=names_to_updates.values(),
     47         final_op=name_to_values.values())
     48 
     49     for metric, value in zip(names_to_values.keys(), metric_values):
     50       logging.info('Metric %s has value: %f', metric, value)
     51 
     52 ************************************************
     53 * Evaluating a Checkpointed Model with Metrics *
     54 ************************************************
     55 
     56 Often, one wants to evaluate a model checkpoint saved on disk. This can be
     57 performed once or repeatedly on a set schedule.
     58 
     59 To evaluate a particular model, users define zero or more metrics and zero or
     60 more summaries and call the evaluation_loop method:
     61 
     62   # Create model and obtain the predictions:
     63   images, labels = LoadData(...)
     64   predictions = MyModel(images)
     65 
     66   # Choose the metrics to compute:
     67   names_to_values, names_to_updates = slim.metrics.aggregate_metric_map({
     68       "accuracy": slim.metrics.accuracy(predictions, labels),
     69       "mse": slim.metrics.mean_squared_error(predictions, labels),
     70   })
     71 
     72   # Define the summaries to write:
     73   for metric_name, metric_value in metrics_to_values.iteritems():
     74     tf.summary.scalar(metric_name, metric_value)
     75 
     76   checkpoint_dir = '/tmp/my_model_dir/'
     77   log_dir = '/tmp/my_model_eval/'
     78 
     79   # We'll evaluate 1000 batches:
     80   num_evals = 1000
     81 
     82   # Evaluate every 10 minutes:
     83   slim.evaluation_loop(
     84       '',
     85       checkpoint_dir,
     86       logdir,
     87       num_evals=num_evals,
     88       eval_op=names_to_updates.values(),
     89       summary_op=tf.contrib.deprecated.merge_summary(summary_ops),
     90       eval_interval_secs=600)
     91 
     92 **************************************************
     93 * Evaluating a Checkpointed Model with Summaries *
     94 **************************************************
     95 
     96 At times, an evaluation can be performed without metrics at all but rather
     97 with only summaries. The user need only leave out the 'eval_op' argument:
     98 
     99   # Create model and obtain the predictions:
    100   images, labels = LoadData(...)
    101   predictions = MyModel(images)
    102 
    103   # Define the summaries to write:
    104   tf.summary.scalar(...)
    105   tf.summary.histogram(...)
    106 
    107   checkpoint_dir = '/tmp/my_model_dir/'
    108   log_dir = '/tmp/my_model_eval/'
    109 
    110   # Evaluate once every 10 minutes.
    111   slim.evaluation_loop(
    112       master='',
    113       checkpoint_dir,
    114       logdir,
    115       num_evals=1,
    116       summary_op=tf.contrib.deprecated.merge_summary(summary_ops),
    117       eval_interval_secs=600)
    118 
    119 """
    120 
    121 from __future__ import absolute_import
    122 from __future__ import division
    123 from __future__ import print_function
    124 
    125 from tensorflow.contrib.training.python.training import evaluation
    126 from tensorflow.python.summary import summary
    127 from tensorflow.python.training import monitored_session
    128 from tensorflow.python.training import saver as tf_saver
    129 
    130 __all__ = [
    131     'evaluate_once',
    132     'evaluation_loop',
    133     'wait_for_new_checkpoint',
    134     'checkpoints_iterator',
    135 ]
    136 
    137 wait_for_new_checkpoint = evaluation.wait_for_new_checkpoint
    138 checkpoints_iterator = evaluation.checkpoints_iterator
    139 
    140 _USE_DEFAULT = 0
    141 
    142 
    143 def evaluate_once(master,
    144                   checkpoint_path,
    145                   logdir,
    146                   num_evals=1,
    147                   initial_op=None,
    148                   initial_op_feed_dict=None,
    149                   eval_op=None,
    150                   eval_op_feed_dict=None,
    151                   final_op=None,
    152                   final_op_feed_dict=None,
    153                   summary_op=_USE_DEFAULT,
    154                   summary_op_feed_dict=None,
    155                   variables_to_restore=None,
    156                   session_config=None,
    157                   hooks=None):
    158   """Evaluates the model at the given checkpoint path.
    159 
    160   Args:
    161     master: The BNS address of the TensorFlow master.
    162     checkpoint_path: The path to a checkpoint to use for evaluation.
    163     logdir: The directory where the TensorFlow summaries are written to.
    164     num_evals: The number of times to run `eval_op`.
    165     initial_op: An operation run at the beginning of evaluation.
    166     initial_op_feed_dict: A feed dictionary to use when executing `initial_op`.
    167     eval_op: A operation run `num_evals` times.
    168     eval_op_feed_dict: The feed dictionary to use when executing the `eval_op`.
    169     final_op: An operation to execute after all of the `eval_op` executions. The
    170       value of `final_op` is returned.
    171     final_op_feed_dict: A feed dictionary to use when executing `final_op`.
    172     summary_op: The summary_op to evaluate after running TF-Slims metric ops. By
    173       default the summary_op is set to tf.summary.merge_all().
    174     summary_op_feed_dict: An optional feed dictionary to use when running the
    175       `summary_op`.
    176     variables_to_restore: A list of TensorFlow variables to restore during
    177       evaluation. If the argument is left as `None` then
    178       slim.variables.GetVariablesToRestore() is used.
    179     session_config: An instance of `tf.ConfigProto` that will be used to
    180       configure the `Session`. If left as `None`, the default will be used.
    181     hooks: A list of additional `SessionRunHook` objects to pass during the
    182       evaluation.
    183 
    184   Returns:
    185     The value of `final_op` or `None` if `final_op` is `None`.
    186   """
    187   if summary_op == _USE_DEFAULT:
    188     summary_op = summary.merge_all()
    189 
    190   all_hooks = [evaluation.StopAfterNEvalsHook(num_evals),]
    191 
    192   if summary_op is not None:
    193     all_hooks.append(evaluation.SummaryAtEndHook(
    194         log_dir=logdir, summary_op=summary_op, feed_dict=summary_op_feed_dict))
    195   if hooks is not None:
    196     all_hooks.extend(hooks)
    197 
    198   saver = None
    199   if variables_to_restore is not None:
    200     saver = tf_saver.Saver(variables_to_restore)
    201 
    202   return evaluation.evaluate_once(
    203       checkpoint_path,
    204       master=master,
    205       scaffold=monitored_session.Scaffold(
    206           init_op=initial_op, init_feed_dict=initial_op_feed_dict, saver=saver),
    207       eval_ops=eval_op,
    208       feed_dict=eval_op_feed_dict,
    209       final_ops=final_op,
    210       final_ops_feed_dict=final_op_feed_dict,
    211       hooks=all_hooks,
    212       config=session_config)
    213 
    214 
    215 def evaluation_loop(master,
    216                     checkpoint_dir,
    217                     logdir,
    218                     num_evals=1,
    219                     initial_op=None,
    220                     initial_op_feed_dict=None,
    221                     init_fn=None,
    222                     eval_op=None,
    223                     eval_op_feed_dict=None,
    224                     final_op=None,
    225                     final_op_feed_dict=None,
    226                     summary_op=_USE_DEFAULT,
    227                     summary_op_feed_dict=None,
    228                     variables_to_restore=None,
    229                     eval_interval_secs=60,
    230                     max_number_of_evaluations=None,
    231                     session_config=None,
    232                     timeout=None,
    233                     hooks=None):
    234   """Runs TF-Slim's Evaluation Loop.
    235 
    236   Args:
    237     master: The BNS address of the TensorFlow master.
    238     checkpoint_dir: The directory where checkpoints are stored.
    239     logdir: The directory where the TensorFlow summaries are written to.
    240     num_evals: The number of times to run `eval_op`.
    241     initial_op: An operation run at the beginning of evaluation.
    242     initial_op_feed_dict: A feed dictionary to use when executing `initial_op`.
    243     init_fn: An optional callable to be executed after `init_op` is called. The
    244       callable must accept one argument, the session being initialized.
    245     eval_op: A operation run `num_evals` times.
    246     eval_op_feed_dict: The feed dictionary to use when executing the `eval_op`.
    247     final_op: An operation to execute after all of the `eval_op` executions. The
    248       value of `final_op` is returned.
    249     final_op_feed_dict: A feed dictionary to use when executing `final_op`.
    250     summary_op: The summary_op to evaluate after running TF-Slims metric ops. By
    251       default the summary_op is set to tf.summary.merge_all().
    252     summary_op_feed_dict: An optional feed dictionary to use when running the
    253       `summary_op`.
    254     variables_to_restore: A list of TensorFlow variables to restore during
    255       evaluation. If the argument is left as `None` then
    256       slim.variables.GetVariablesToRestore() is used.
    257     eval_interval_secs: The minimum number of seconds between evaluations.
    258     max_number_of_evaluations: the max number of iterations of the evaluation.
    259       If the value is left as 'None', the evaluation continues indefinitely.
    260     session_config: An instance of `tf.ConfigProto` that will be used to
    261       configure the `Session`. If left as `None`, the default will be used.
    262     timeout: The maximum amount of time to wait between checkpoints. If left as
    263       `None`, then the process will wait indefinitely.
    264     hooks: A list of additional `SessionRunHook` objects to pass during
    265       repeated evaluations.
    266 
    267   Returns:
    268     The value of `final_op` or `None` if `final_op` is `None`.
    269   """
    270   if summary_op == _USE_DEFAULT:
    271     summary_op = summary.merge_all()
    272 
    273   all_hooks = [evaluation.StopAfterNEvalsHook(num_evals),]
    274 
    275   if summary_op is not None:
    276     all_hooks.append(evaluation.SummaryAtEndHook(
    277         log_dir=logdir, summary_op=summary_op, feed_dict=summary_op_feed_dict))
    278 
    279   if hooks is not None:
    280     # Add custom hooks if provided.
    281     all_hooks.extend(hooks)
    282 
    283   saver = None
    284   if variables_to_restore is not None:
    285     saver = tf_saver.Saver(variables_to_restore)
    286 
    287   return evaluation.evaluate_repeatedly(
    288       checkpoint_dir,
    289       master=master,
    290       scaffold=monitored_session.Scaffold(
    291           init_op=initial_op, init_feed_dict=initial_op_feed_dict,
    292           init_fn=init_fn, saver=saver),
    293       eval_ops=eval_op,
    294       feed_dict=eval_op_feed_dict,
    295       final_ops=final_op,
    296       final_ops_feed_dict=final_op_feed_dict,
    297       eval_interval_secs=eval_interval_secs,
    298       hooks=all_hooks,
    299       config=session_config,
    300       max_number_of_evaluations=max_number_of_evaluations,
    301       timeout=timeout)
    302