1 # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 # 3 # Licensed under the Apache License, Version 2.0 (the "License"); 4 # you may not use this file except in compliance with the License. 5 # You may obtain a copy of the License at 6 # 7 # http://www.apache.org/licenses/LICENSE-2.0 8 # 9 # Unless required by applicable law or agreed to in writing, software 10 # distributed under the License is distributed on an "AS IS" BASIS, 11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 # See the License for the specific language governing permissions and 13 # limitations under the License. 14 # ============================================================================== 15 """Contains functions for evaluation and summarization of metrics. 16 17 The evaluation.py module contains helper functions for evaluating TensorFlow 18 modules using a variety of metrics and summarizing the results. 19 20 ********************** 21 * Evaluating Metrics * 22 ********************** 23 24 In the simplest use case, we use a model to create the predictions, then specify 25 the metrics and finally call the `evaluation` method: 26 27 # Create model and obtain the predictions: 28 images, labels = LoadData(...) 29 predictions = MyModel(images) 30 31 # Choose the metrics to compute: 32 names_to_values, names_to_updates = slim.metrics.aggregate_metric_map({ 33 "accuracy": slim.metrics.accuracy(predictions, labels), 34 "mse": slim.metrics.mean_squared_error(predictions, labels), 35 }) 36 37 initial_op = tf.group( 38 tf.global_variables_initializer(), 39 tf.local_variables_initializer()) 40 41 with tf.Session() as sess: 42 metric_values = slim.evaluation( 43 sess, 44 num_evals=1, 45 initial_op=initial_op, 46 eval_op=names_to_updates.values(), 47 final_op=name_to_values.values()) 48 49 for metric, value in zip(names_to_values.keys(), metric_values): 50 logging.info('Metric %s has value: %f', metric, value) 51 52 ************************************************ 53 * Evaluating a Checkpointed Model with Metrics * 54 ************************************************ 55 56 Often, one wants to evaluate a model checkpoint saved on disk. This can be 57 performed once or repeatedly on a set schedule. 58 59 To evaluate a particular model, users define zero or more metrics and zero or 60 more summaries and call the evaluation_loop method: 61 62 # Create model and obtain the predictions: 63 images, labels = LoadData(...) 64 predictions = MyModel(images) 65 66 # Choose the metrics to compute: 67 names_to_values, names_to_updates = slim.metrics.aggregate_metric_map({ 68 "accuracy": slim.metrics.accuracy(predictions, labels), 69 "mse": slim.metrics.mean_squared_error(predictions, labels), 70 }) 71 72 # Define the summaries to write: 73 for metric_name, metric_value in metrics_to_values.iteritems(): 74 tf.summary.scalar(metric_name, metric_value) 75 76 checkpoint_dir = '/tmp/my_model_dir/' 77 log_dir = '/tmp/my_model_eval/' 78 79 # We'll evaluate 1000 batches: 80 num_evals = 1000 81 82 # Evaluate every 10 minutes: 83 slim.evaluation_loop( 84 '', 85 checkpoint_dir, 86 logdir, 87 num_evals=num_evals, 88 eval_op=names_to_updates.values(), 89 summary_op=tf.contrib.deprecated.merge_summary(summary_ops), 90 eval_interval_secs=600) 91 92 ************************************************** 93 * Evaluating a Checkpointed Model with Summaries * 94 ************************************************** 95 96 At times, an evaluation can be performed without metrics at all but rather 97 with only summaries. The user need only leave out the 'eval_op' argument: 98 99 # Create model and obtain the predictions: 100 images, labels = LoadData(...) 101 predictions = MyModel(images) 102 103 # Define the summaries to write: 104 tf.summary.scalar(...) 105 tf.summary.histogram(...) 106 107 checkpoint_dir = '/tmp/my_model_dir/' 108 log_dir = '/tmp/my_model_eval/' 109 110 # Evaluate once every 10 minutes. 111 slim.evaluation_loop( 112 master='', 113 checkpoint_dir, 114 logdir, 115 num_evals=1, 116 summary_op=tf.contrib.deprecated.merge_summary(summary_ops), 117 eval_interval_secs=600) 118 119 """ 120 121 from __future__ import absolute_import 122 from __future__ import division 123 from __future__ import print_function 124 125 from tensorflow.contrib.training.python.training import evaluation 126 from tensorflow.python.summary import summary 127 from tensorflow.python.training import monitored_session 128 from tensorflow.python.training import saver as tf_saver 129 130 __all__ = [ 131 'evaluate_once', 132 'evaluation_loop', 133 'wait_for_new_checkpoint', 134 'checkpoints_iterator', 135 ] 136 137 wait_for_new_checkpoint = evaluation.wait_for_new_checkpoint 138 checkpoints_iterator = evaluation.checkpoints_iterator 139 140 _USE_DEFAULT = 0 141 142 143 def evaluate_once(master, 144 checkpoint_path, 145 logdir, 146 num_evals=1, 147 initial_op=None, 148 initial_op_feed_dict=None, 149 eval_op=None, 150 eval_op_feed_dict=None, 151 final_op=None, 152 final_op_feed_dict=None, 153 summary_op=_USE_DEFAULT, 154 summary_op_feed_dict=None, 155 variables_to_restore=None, 156 session_config=None, 157 hooks=None): 158 """Evaluates the model at the given checkpoint path. 159 160 Args: 161 master: The BNS address of the TensorFlow master. 162 checkpoint_path: The path to a checkpoint to use for evaluation. 163 logdir: The directory where the TensorFlow summaries are written to. 164 num_evals: The number of times to run `eval_op`. 165 initial_op: An operation run at the beginning of evaluation. 166 initial_op_feed_dict: A feed dictionary to use when executing `initial_op`. 167 eval_op: A operation run `num_evals` times. 168 eval_op_feed_dict: The feed dictionary to use when executing the `eval_op`. 169 final_op: An operation to execute after all of the `eval_op` executions. The 170 value of `final_op` is returned. 171 final_op_feed_dict: A feed dictionary to use when executing `final_op`. 172 summary_op: The summary_op to evaluate after running TF-Slims metric ops. By 173 default the summary_op is set to tf.summary.merge_all(). 174 summary_op_feed_dict: An optional feed dictionary to use when running the 175 `summary_op`. 176 variables_to_restore: A list of TensorFlow variables to restore during 177 evaluation. If the argument is left as `None` then 178 slim.variables.GetVariablesToRestore() is used. 179 session_config: An instance of `tf.ConfigProto` that will be used to 180 configure the `Session`. If left as `None`, the default will be used. 181 hooks: A list of additional `SessionRunHook` objects to pass during the 182 evaluation. 183 184 Returns: 185 The value of `final_op` or `None` if `final_op` is `None`. 186 """ 187 if summary_op == _USE_DEFAULT: 188 summary_op = summary.merge_all() 189 190 all_hooks = [evaluation.StopAfterNEvalsHook(num_evals),] 191 192 if summary_op is not None: 193 all_hooks.append(evaluation.SummaryAtEndHook( 194 log_dir=logdir, summary_op=summary_op, feed_dict=summary_op_feed_dict)) 195 if hooks is not None: 196 all_hooks.extend(hooks) 197 198 saver = None 199 if variables_to_restore is not None: 200 saver = tf_saver.Saver(variables_to_restore) 201 202 return evaluation.evaluate_once( 203 checkpoint_path, 204 master=master, 205 scaffold=monitored_session.Scaffold( 206 init_op=initial_op, init_feed_dict=initial_op_feed_dict, saver=saver), 207 eval_ops=eval_op, 208 feed_dict=eval_op_feed_dict, 209 final_ops=final_op, 210 final_ops_feed_dict=final_op_feed_dict, 211 hooks=all_hooks, 212 config=session_config) 213 214 215 def evaluation_loop(master, 216 checkpoint_dir, 217 logdir, 218 num_evals=1, 219 initial_op=None, 220 initial_op_feed_dict=None, 221 init_fn=None, 222 eval_op=None, 223 eval_op_feed_dict=None, 224 final_op=None, 225 final_op_feed_dict=None, 226 summary_op=_USE_DEFAULT, 227 summary_op_feed_dict=None, 228 variables_to_restore=None, 229 eval_interval_secs=60, 230 max_number_of_evaluations=None, 231 session_config=None, 232 timeout=None, 233 hooks=None): 234 """Runs TF-Slim's Evaluation Loop. 235 236 Args: 237 master: The BNS address of the TensorFlow master. 238 checkpoint_dir: The directory where checkpoints are stored. 239 logdir: The directory where the TensorFlow summaries are written to. 240 num_evals: The number of times to run `eval_op`. 241 initial_op: An operation run at the beginning of evaluation. 242 initial_op_feed_dict: A feed dictionary to use when executing `initial_op`. 243 init_fn: An optional callable to be executed after `init_op` is called. The 244 callable must accept one argument, the session being initialized. 245 eval_op: A operation run `num_evals` times. 246 eval_op_feed_dict: The feed dictionary to use when executing the `eval_op`. 247 final_op: An operation to execute after all of the `eval_op` executions. The 248 value of `final_op` is returned. 249 final_op_feed_dict: A feed dictionary to use when executing `final_op`. 250 summary_op: The summary_op to evaluate after running TF-Slims metric ops. By 251 default the summary_op is set to tf.summary.merge_all(). 252 summary_op_feed_dict: An optional feed dictionary to use when running the 253 `summary_op`. 254 variables_to_restore: A list of TensorFlow variables to restore during 255 evaluation. If the argument is left as `None` then 256 slim.variables.GetVariablesToRestore() is used. 257 eval_interval_secs: The minimum number of seconds between evaluations. 258 max_number_of_evaluations: the max number of iterations of the evaluation. 259 If the value is left as 'None', the evaluation continues indefinitely. 260 session_config: An instance of `tf.ConfigProto` that will be used to 261 configure the `Session`. If left as `None`, the default will be used. 262 timeout: The maximum amount of time to wait between checkpoints. If left as 263 `None`, then the process will wait indefinitely. 264 hooks: A list of additional `SessionRunHook` objects to pass during 265 repeated evaluations. 266 267 Returns: 268 The value of `final_op` or `None` if `final_op` is `None`. 269 """ 270 if summary_op == _USE_DEFAULT: 271 summary_op = summary.merge_all() 272 273 all_hooks = [evaluation.StopAfterNEvalsHook(num_evals),] 274 275 if summary_op is not None: 276 all_hooks.append(evaluation.SummaryAtEndHook( 277 log_dir=logdir, summary_op=summary_op, feed_dict=summary_op_feed_dict)) 278 279 if hooks is not None: 280 # Add custom hooks if provided. 281 all_hooks.extend(hooks) 282 283 saver = None 284 if variables_to_restore is not None: 285 saver = tf_saver.Saver(variables_to_restore) 286 287 return evaluation.evaluate_repeatedly( 288 checkpoint_dir, 289 master=master, 290 scaffold=monitored_session.Scaffold( 291 init_op=initial_op, init_feed_dict=initial_op_feed_dict, 292 init_fn=init_fn, saver=saver), 293 eval_ops=eval_op, 294 feed_dict=eval_op_feed_dict, 295 final_ops=final_op, 296 final_ops_feed_dict=final_op_feed_dict, 297 eval_interval_secs=eval_interval_secs, 298 hooks=all_hooks, 299 config=session_config, 300 max_number_of_evaluations=max_number_of_evaluations, 301 timeout=timeout) 302