1 # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 # 3 # Licensed under the Apache License, Version 2.0 (the "License"); 4 # you may not use this file except in compliance with the License. 5 # You may obtain a copy of the License at 6 # 7 # http://www.apache.org/licenses/LICENSE-2.0 8 # 9 # Unless required by applicable law or agreed to in writing, software 10 # distributed under the License is distributed on an "AS IS" BASIS, 11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 # See the License for the specific language governing permissions and 13 # limitations under the License. 14 # ============================================================================== 15 """TensorFlow Debugger: Tools for debugging gradients.""" 16 17 from __future__ import absolute_import 18 from __future__ import division 19 from __future__ import print_function 20 21 import re 22 import uuid 23 24 import six 25 26 from tensorflow.python.debug.lib import debug_data 27 from tensorflow.python.debug.lib import debug_graphs 28 from tensorflow.python.framework import ops 29 from tensorflow.python.ops import gen_array_ops 30 from tensorflow.python.ops import variables 31 32 _GRADIENT_DEBUG_TAG = "gradient_debug_" 33 34 _gradient_debuggers = {} 35 36 37 def _tensor_to_grad_debug_op_name(tensor, grad_debugger_uuid): 38 op_name, slot = debug_graphs.parse_node_or_tensor_name(tensor.name) 39 return "%s_%d/%s%s" % (op_name, slot, _GRADIENT_DEBUG_TAG, grad_debugger_uuid) 40 41 42 def _parse_grad_debug_op_name(op_name): 43 """Parse the name of a debug gradient op. 44 45 Args: 46 op_name: the name of the debug gradient op. 47 48 Returns: 49 1) The UUID of the GradientsDebugger that created the debug gradient op. 50 2) Name of the original tensor whose gradient is debugged by the debug 51 gradient op. 52 """ 53 name_items = op_name.split("/") 54 assert len(name_items) > 1 55 assert name_items[-1].startswith(_GRADIENT_DEBUG_TAG) 56 57 grad_debugger_uuid = name_items[-1][len(_GRADIENT_DEBUG_TAG):] 58 if "_" in grad_debugger_uuid: 59 grad_debugger_uuid = grad_debugger_uuid[:grad_debugger_uuid.index("_")] 60 orig_tensor_slot = int(name_items[-2][name_items[-2].rfind("_") + 1:]) 61 orig_base_op_name = name_items[-2][:name_items[-2].rfind("_")] 62 orig_tensor_name = ("/".join(name_items[:-2] + [orig_base_op_name]) + 63 ":%d" % orig_tensor_slot) 64 65 return grad_debugger_uuid, orig_tensor_name 66 67 68 class GradientsDebugger(object): 69 """Gradients Debugger. 70 71 Allows retrieval of gradient tensors created by TensorFlow's automatic 72 differentiation algorithm, i.e., @{tf.gradients} and optimizer classes that 73 use it. 74 """ 75 # TODO(cais): Add examples code in the doc string? 76 77 def __init__(self, y_tensor=None): 78 """Constructor of GradientsDebugger. 79 80 Args: 81 y_tensor: optional: the `tf.Tensor` to be differentiated, i.e., the tensor 82 on the numerator of the differentiation. 83 """ 84 85 self._uuid = uuid.uuid4().hex 86 _gradient_debuggers[self._uuid] = self 87 88 # A dict mapping x-tensor names to gradient tensor. x-tensor refers to the 89 # independent tf.Tensor, i.e., the tensor on the denominator of the 90 # differentiation. 91 self._gradient_tensors = {} 92 self._y_tensor = y_tensor 93 94 self._graph = None 95 if y_tensor: 96 self._graph = y_tensor.graph 97 98 self._is_active_context = False 99 100 @property 101 def y_tensor(self): 102 return self._y_tensor 103 104 @property 105 def graph(self): 106 return self._graph 107 108 def __enter__(self): 109 self._is_active_context = True 110 111 def __exit__(self, unused_type, unused_value, unused_traceback): 112 self._is_active_context = False 113 114 def identify_gradient(self, input_tensor): 115 """Create a debug identity tensor that registers and forwards gradients. 116 117 The side effect of this method is that when gradient tensor(s) are created 118 with respect to the any paths that include the `input_tensor`, the gradient 119 tensor(s) with repsect to `input_tensor` will be registered with this 120 this `GradientsDebugger` instance and can later be retrieved, with the 121 methods `gradient_tensor` and `gradient_tensors`. 122 123 Example: 124 125 ```python 126 x = tf.Variable(1.0) 127 y = tf.add(x, x) 128 129 grad_debugger = tf_debug.GradientsDebugger() 130 debug_y = grad_debugger.identify_gradient(y) 131 z = tf.square(debug_y) 132 133 # Create a train op under the grad_debugger context. 134 with grad_debugger: 135 train_op = tf.train.GradientDescentOptimizer(z) 136 137 # Now we can reflect through grad_debugger to get the gradient tensor 138 # with respect to y. 139 y_grad = grad_debugger.gradient_tensor(y) 140 ``` 141 142 Args: 143 input_tensor: the input `tf.Tensor` object whose related gradient tensors 144 are to be reigstered with this `GradientsDebugger` instance when they 145 are created, e.g., during @{tf.gradients} calls or the construction 146 of optimization (training) op that uses @{tf.gradients}. 147 148 Returns: 149 A forwarded identity of `input_tensor`, as a `tf.Tensor`. 150 151 Raises: 152 ValueError: If an op with name that duplicates the gradient-debugging op 153 already exists in the graph (highly unlikely). 154 """ 155 # TODO(cais): Allow overriding gradient. 156 # TODO(cais): Implement value_stack. 157 grad_debug_op_name = _tensor_to_grad_debug_op_name(input_tensor, self._uuid) 158 # pylint: disable=protected-access 159 identity_op = (gen_array_ops._debug_gradient_ref_identity 160 if input_tensor.dtype._is_ref_dtype 161 else gen_array_ops._debug_gradient_identity) 162 debug_grad_identity = identity_op(input_tensor, name=grad_debug_op_name) 163 # pylint: enable=protected-access 164 assert debug_grad_identity.dtype == input_tensor.dtype 165 if debug_grad_identity.op.name != grad_debug_op_name: 166 raise ValueError( 167 "The graph already contains an op named %s" % grad_debug_op_name) 168 return debug_grad_identity 169 170 def watch_gradients_by_tensors(self, graph, tensors): 171 """Watch gradient tensors by x-tensor(s). 172 173 The side effect of this method is that when gradient tensor(s) are created 174 with respect to the any paths that include the `x_tensor`s, the gradient 175 tensor(s) with repsect to the tensor will be registered with this 176 this `GradientsDebugger` instance and can later be retrieved, with the 177 methods `gradient_tensor` and `gradient_tensors`. 178 179 Unlike the method `identify_gradient`, this method is used to retrieve 180 gradient tensors after the construction of the forward subgraph has 181 completed (but before the construction of the backward subgraph). 182 183 This method is the same as `watch_gradients_by_x_tensor_names` except that 184 the tensors are specified by the Python `tf.Tensor` or `tf.Variable` 185 objects, instead by name patterns. 186 187 Example: 188 189 ```python 190 x = tf.Variable(1.0) 191 y = tf.add(x, x, name="y") 192 z = tf.square(debug_y) 193 194 # Create a train op under the grad_debugger context. 195 grad_debugger = tf_debug.GradientsDebugger() 196 with grad_debugger.watch_gradients_by_tensors(y): 197 train_op = tf.train.GradientDescentOptimizer(z) 198 199 # Now we can reflect through grad_debugger to get the gradient tensor 200 # with respect to y. 201 y_grad = grad_debugger.gradient_tensor(y) 202 # or 203 y_grad = grad_debugger.gradient_tensor("y:0") 204 ``` 205 206 Args: 207 graph: the `tf.Graph` to watch the gradients on. 208 tensors: a `tf.Tensor` or `tf.Variable` object, or a list of such objects. 209 210 Returns: 211 The GradientsDebugger instance itself. 212 """ 213 214 if not isinstance(tensors, list): 215 tensors = [tensors] 216 217 tensor_name_regex = [] 218 for tensor in tensors: 219 tensor_name_regex.append(re.escape(tensor.name) + "$") 220 tensor_name_regex = "(" + "|".join(tensor_name_regex) + ")" 221 return self.watch_gradients_by_tensor_names(graph, tensor_name_regex) 222 223 def watch_gradients_by_tensor_names(self, graph, tensor_name_regex): 224 """Watch gradient tensors by name(s) of the x-tensor(s). 225 226 The side effect of this method is that when gradient tensor(s) are created 227 with respect to the x-tensors, the gradient tensor(s) will be registered 228 with this `GradientsDebugger` instance and can later be retrieved. 229 230 Unlike the `identify_gradient` method, this method is used after the 231 construction of the forward graph has completed. Unlike the 232 `watch_gradients_by_tensor` method, this method does not use handles to the 233 tensors of interest; it uses their names. 234 235 This method is the same as `watch_gradients_by_tensors` except that the 236 x-tensors are specified by name patterns, instead of `tf.Tensor` or 237 `tf.Variable` objects. 238 239 Example: 240 241 ```python 242 x = tf.Variable(1.0, name="x") 243 y = tf.add(x, x, name="y") 244 z = tf.square(debug_y) 245 246 # Create a train op under the grad_debugger context. 247 grad_debugger = tf_debug.GradientsDebugger() 248 with grad_debugger.watch_gradients_by_tensor_names(r"(x|y):0$"): 249 train_op = tf.train.GradientDescentOptimizer(z) 250 251 # Now we can reflect through grad_debugger to get the gradient tensor 252 # with respect to x and y. 253 x_grad = grad_debugger.gradient_tensor("x:0") 254 y_grad = grad_debugger.gradient_tensor("y:0") 255 ``` 256 257 Args: 258 graph: the `tf.Graph` to watch the gradients on. 259 tensor_name_regex: the regular-expression pattern of the name(s) of the 260 x-tensor(s) to watch. x-tensor refers to the tensors on the denominator 261 of the differentiation. 262 263 Returns: 264 The GradientsDebugger instance itself. 265 """ 266 tensor_name_pattern = re.compile(tensor_name_regex) 267 with graph.as_default(): 268 for op in graph.get_operations(): 269 for output in op.outputs: 270 if tensor_name_pattern.match(output.name): 271 debug_op = self.identify_gradient(output) 272 273 # Make a copy of output.consumers() since we'll modify the consumers 274 # TODO(skyewm): this is unnecessary once the C API is enabled 275 for consumer in list(output.consumers()): 276 if consumer == debug_op.op: 277 continue 278 279 # Locate the slot index of the original input. 280 for i, consumer_input in enumerate(consumer.inputs): 281 if consumer_input == output: 282 consumer._update_input(i, debug_op) # pylint: disable=protected-access 283 return self 284 285 def _check_same_graph(self, tensor): 286 if self._graph is None: 287 self._graph = tensor.graph 288 elif self._graph != tensor.graph: 289 raise ValueError( 290 "The graph of the value (%s) is not the same as the graph %s" % 291 (tensor.graph, self._graph)) 292 293 def register_gradient_tensor(self, 294 x_tensor_name, 295 gradient_tensor): 296 """Register the gradient tensor for an x-tensor. 297 298 Args: 299 x_tensor_name: (`str`) the name of the independent `tf.Tensor`, i.e., 300 the tensor on the denominator of the differentiation. 301 gradient_tensor: the gradient `tf.Tensor`. 302 """ 303 if len(_gradient_debuggers) == 1 or self._is_active_context: 304 self._check_same_graph(gradient_tensor) 305 self._gradient_tensors[x_tensor_name] = gradient_tensor 306 307 def gradient_tensor(self, x_tensor): 308 """Get the gradient tensor of an x-tensor. 309 310 Args: 311 x_tensor: (`tf.Tensor`, `tf.Variable` or `str`) The x-tensor object or its 312 name. x-tensor refers to the independent `tf.Tensor`, i.e., the tensor 313 on the denominator of the differentiation. 314 315 Returns: 316 If found, the gradient tensor. 317 318 Raises: 319 TypeError: If `x_tensor` is not a `tf.Tensor`, `tf.Variable` or `str`. 320 LookupError: If the `x_tensor` has not been registered with a gradient 321 tensor. 322 """ 323 x_tensor_name = self._get_tensor_name(x_tensor) 324 if x_tensor_name not in self._gradient_tensors: 325 raise LookupError( 326 "This GradientsDebugger has not received any gradient tensor for " 327 "x-tensor %s" % x_tensor_name) 328 return self._gradient_tensors[x_tensor_name] 329 330 def gradient_tensors(self): 331 """Get the gradient tensors that this object is aware of. 332 333 Returns: 334 A dict mapping x-tensor names to gradient tensor objects. x-tensor refers 335 to the tensors on the denominator of the differentation. 336 """ 337 return self._gradient_tensors 338 339 def _get_tensor_name(self, tensor): 340 if isinstance(tensor, (ops.Tensor, variables.Variable)): 341 return tensor.name 342 elif isinstance(tensor, six.string_types): 343 return tensor 344 else: 345 raise TypeError( 346 "x_tensor must be a str or tf.Tensor or tf.Variable, " 347 "but instead has type %s" % type(tensor)) 348 349 350 def clear_gradient_debuggers(): 351 """Clear all globally registered gradient debuggers.""" 352 _gradient_debuggers.clear() 353 354 355 @ops.RegisterGradient("DebugGradientIdentity") 356 def _identify_gradient_grad(op, dy): 357 """Gradient function for the DebugIdentity op.""" 358 # TODO(cais): Allow overriding gradient. 359 grad_debugger_uuid, orig_tensor_name = _parse_grad_debug_op_name(op.name) 360 grad_debugger = _gradient_debuggers[grad_debugger_uuid] 361 grad_debugger.register_gradient_tensor(orig_tensor_name, dy) 362 return dy 363 364 365 @ops.RegisterGradient("DebugGradientRefIdentity") 366 def _identify_gradient_grad_ref(op, dy): 367 """Gradient function for the DebugIdentity op.""" 368 return _identify_gradient_grad(op, dy) 369 370 371 def gradient_values_from_dump(grad_debugger, x_tensor, dump): 372 """Find gradient values from a `DebugDumpDir` object. 373 374 Args: 375 grad_debugger: the `tf_debug.GradientsDebugger` instance to be used. 376 x_tensor: (`tf.Tensor`, `tf.Variable` or `str`) The x-tensor object or its 377 name. x-tensor refers to the independent `tf.Tensor`, i.e., the tensor 378 on the denominator of the differentiation. 379 dump: A `tfdbg.DebugDumpDir` object. 380 381 Returns: 382 If this `GradientsDebugger` instance has the gradient tensor of `x_tensor` 383 registered: a list of `numpy.ndarray` representing the value of the 384 gradient tensor from `dump`. The list could be empty, if the gradient 385 tensor is not executed in the `tf.Session.run()` call that generated 386 the `dump`. The list could also contain multiple values of the gradient 387 tensor, e.g., if gradient tensor is computed repeatedly in a 388 `tf.while_loop` during the run that generated the `dump`. 389 390 Raises: 391 LookupError: If this `GradientsDebugger` instance does not have the 392 gradient tensor of `x_tensor` registered. 393 ValueError: If this `GradientsDebugger` has a `tf.Graph` object that 394 does not match the `tf.Graph` object of the `dump`. 395 TypeError: If `x_tensor` is not a `tf.Tensor`, `tf.Variable` or `str`. 396 """ 397 # TODO(cais): Use this method in LocalCLIDebugWrapperSession to present the 398 # gradient tensors to the TFDBG CLI. 399 400 # If possible, verify that the Python graph of the dump and that of this 401 # GradientsDebugger match. 402 if (dump.python_graph and grad_debugger.graph and 403 dump.python_graph != grad_debugger.graph): 404 raise ValueError( 405 "This GradientsDebugger instance has a graph (%s) that differs from " 406 "the graph of the DebugDumpDir object (%s)." % 407 (grad_debugger.graph, dump.python_graph)) 408 409 gradient_tensor = grad_debugger.gradient_tensor(x_tensor) 410 node_name, output_slot = debug_graphs.parse_node_or_tensor_name( 411 gradient_tensor.name) 412 413 try: 414 return dump.get_tensors(node_name, output_slot, "DebugIdentity") 415 except debug_data.WatchKeyDoesNotExistInDebugDumpDirError: 416 return [] 417