Home | History | Annotate | Download | only in lib
      1 # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
      2 #
      3 # Licensed under the Apache License, Version 2.0 (the "License");
      4 # you may not use this file except in compliance with the License.
      5 # You may obtain a copy of the License at
      6 #
      7 #     http://www.apache.org/licenses/LICENSE-2.0
      8 #
      9 # Unless required by applicable law or agreed to in writing, software
     10 # distributed under the License is distributed on an "AS IS" BASIS,
     11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 # See the License for the specific language governing permissions and
     13 # limitations under the License.
     14 # ==============================================================================
     15 """TensorFlow Debugger: Tools for debugging gradients."""
     16 
     17 from __future__ import absolute_import
     18 from __future__ import division
     19 from __future__ import print_function
     20 
     21 import re
     22 import uuid
     23 
     24 import six
     25 
     26 from tensorflow.python.debug.lib import debug_data
     27 from tensorflow.python.debug.lib import debug_graphs
     28 from tensorflow.python.framework import ops
     29 from tensorflow.python.ops import gen_array_ops
     30 from tensorflow.python.ops import variables
     31 
     32 _GRADIENT_DEBUG_TAG = "gradient_debug_"
     33 
     34 _gradient_debuggers = {}
     35 
     36 
     37 def _tensor_to_grad_debug_op_name(tensor, grad_debugger_uuid):
     38   op_name, slot = debug_graphs.parse_node_or_tensor_name(tensor.name)
     39   return "%s_%d/%s%s" % (op_name, slot, _GRADIENT_DEBUG_TAG, grad_debugger_uuid)
     40 
     41 
     42 def _parse_grad_debug_op_name(op_name):
     43   """Parse the name of a debug gradient op.
     44 
     45   Args:
     46     op_name: the name of the debug gradient op.
     47 
     48   Returns:
     49     1) The UUID of the GradientsDebugger that created the debug gradient op.
     50     2) Name of the original tensor whose gradient is debugged by the debug
     51        gradient op.
     52   """
     53   name_items = op_name.split("/")
     54   assert len(name_items) > 1
     55   assert name_items[-1].startswith(_GRADIENT_DEBUG_TAG)
     56 
     57   grad_debugger_uuid = name_items[-1][len(_GRADIENT_DEBUG_TAG):]
     58   if "_" in grad_debugger_uuid:
     59     grad_debugger_uuid = grad_debugger_uuid[:grad_debugger_uuid.index("_")]
     60   orig_tensor_slot = int(name_items[-2][name_items[-2].rfind("_") + 1:])
     61   orig_base_op_name = name_items[-2][:name_items[-2].rfind("_")]
     62   orig_tensor_name = ("/".join(name_items[:-2] + [orig_base_op_name]) +
     63                       ":%d" % orig_tensor_slot)
     64 
     65   return grad_debugger_uuid, orig_tensor_name
     66 
     67 
     68 class GradientsDebugger(object):
     69   """Gradients Debugger.
     70 
     71   Allows retrieval of gradient tensors created by TensorFlow's automatic
     72   differentiation algorithm, i.e., @{tf.gradients} and optimizer classes that
     73   use it.
     74   """
     75   # TODO(cais): Add examples code in the doc string?
     76 
     77   def __init__(self, y_tensor=None):
     78     """Constructor of GradientsDebugger.
     79 
     80     Args:
     81       y_tensor: optional: the `tf.Tensor` to be differentiated, i.e., the tensor
     82         on the numerator of the differentiation.
     83     """
     84 
     85     self._uuid = uuid.uuid4().hex
     86     _gradient_debuggers[self._uuid] = self
     87 
     88     # A dict mapping x-tensor names to gradient tensor. x-tensor refers to the
     89     # independent tf.Tensor, i.e., the tensor on the denominator of the
     90     # differentiation.
     91     self._gradient_tensors = {}
     92     self._y_tensor = y_tensor
     93 
     94     self._graph = None
     95     if y_tensor:
     96       self._graph = y_tensor.graph
     97 
     98     self._is_active_context = False
     99 
    100   @property
    101   def y_tensor(self):
    102     return self._y_tensor
    103 
    104   @property
    105   def graph(self):
    106     return self._graph
    107 
    108   def __enter__(self):
    109     self._is_active_context = True
    110 
    111   def __exit__(self, unused_type, unused_value, unused_traceback):
    112     self._is_active_context = False
    113 
    114   def identify_gradient(self, input_tensor):
    115     """Create a debug identity tensor that registers and forwards gradients.
    116 
    117     The side effect of this method is that when gradient tensor(s) are created
    118     with respect to the any paths that include the `input_tensor`, the gradient
    119     tensor(s) with repsect to `input_tensor` will be registered with this
    120     this `GradientsDebugger` instance and can later be retrieved, with the
    121     methods `gradient_tensor` and `gradient_tensors`.
    122 
    123     Example:
    124 
    125     ```python
    126     x = tf.Variable(1.0)
    127     y = tf.add(x, x)
    128 
    129     grad_debugger = tf_debug.GradientsDebugger()
    130     debug_y = grad_debugger.identify_gradient(y)
    131     z = tf.square(debug_y)
    132 
    133     # Create a train op under the grad_debugger context.
    134     with grad_debugger:
    135       train_op = tf.train.GradientDescentOptimizer(z)
    136 
    137     # Now we can reflect through grad_debugger to get the gradient tensor
    138     # with respect to y.
    139     y_grad = grad_debugger.gradient_tensor(y)
    140     ```
    141 
    142     Args:
    143       input_tensor: the input `tf.Tensor` object whose related gradient tensors
    144         are to be reigstered with this `GradientsDebugger` instance when they
    145         are created, e.g., during @{tf.gradients} calls or the construction
    146         of optimization (training) op that uses @{tf.gradients}.
    147 
    148     Returns:
    149       A forwarded identity of `input_tensor`, as a `tf.Tensor`.
    150 
    151     Raises:
    152       ValueError: If an op with name that duplicates the gradient-debugging op
    153         already exists in the graph (highly unlikely).
    154     """
    155     # TODO(cais): Allow overriding gradient.
    156     # TODO(cais): Implement value_stack.
    157     grad_debug_op_name = _tensor_to_grad_debug_op_name(input_tensor, self._uuid)
    158     # pylint: disable=protected-access
    159     identity_op = (gen_array_ops._debug_gradient_ref_identity
    160                    if input_tensor.dtype._is_ref_dtype
    161                    else gen_array_ops._debug_gradient_identity)
    162     debug_grad_identity = identity_op(input_tensor, name=grad_debug_op_name)
    163     # pylint: enable=protected-access
    164     assert debug_grad_identity.dtype == input_tensor.dtype
    165     if debug_grad_identity.op.name != grad_debug_op_name:
    166       raise ValueError(
    167           "The graph already contains an op named %s" % grad_debug_op_name)
    168     return debug_grad_identity
    169 
    170   def watch_gradients_by_tensors(self, graph, tensors):
    171     """Watch gradient tensors by x-tensor(s).
    172 
    173     The side effect of this method is that when gradient tensor(s) are created
    174     with respect to the any paths that include the `x_tensor`s, the gradient
    175     tensor(s) with repsect to the tensor will be registered with this
    176     this `GradientsDebugger` instance and can later be retrieved, with the
    177     methods `gradient_tensor` and `gradient_tensors`.
    178 
    179     Unlike the method `identify_gradient`, this method is used to retrieve
    180     gradient tensors after the construction of the forward subgraph has
    181     completed (but before the construction of the backward subgraph).
    182 
    183     This method is the same as `watch_gradients_by_x_tensor_names` except that
    184     the tensors are specified by the Python `tf.Tensor` or `tf.Variable`
    185     objects, instead by name patterns.
    186 
    187     Example:
    188 
    189     ```python
    190     x = tf.Variable(1.0)
    191     y = tf.add(x, x, name="y")
    192     z = tf.square(debug_y)
    193 
    194     # Create a train op under the grad_debugger context.
    195     grad_debugger = tf_debug.GradientsDebugger()
    196     with grad_debugger.watch_gradients_by_tensors(y):
    197       train_op = tf.train.GradientDescentOptimizer(z)
    198 
    199     # Now we can reflect through grad_debugger to get the gradient tensor
    200     # with respect to y.
    201     y_grad = grad_debugger.gradient_tensor(y)
    202     # or
    203     y_grad = grad_debugger.gradient_tensor("y:0")
    204     ```
    205 
    206     Args:
    207       graph: the `tf.Graph` to watch the gradients on.
    208       tensors: a `tf.Tensor` or `tf.Variable` object, or a list of such objects.
    209 
    210     Returns:
    211       The GradientsDebugger instance itself.
    212     """
    213 
    214     if not isinstance(tensors, list):
    215       tensors = [tensors]
    216 
    217     tensor_name_regex = []
    218     for tensor in tensors:
    219       tensor_name_regex.append(re.escape(tensor.name) + "$")
    220     tensor_name_regex = "(" + "|".join(tensor_name_regex) + ")"
    221     return self.watch_gradients_by_tensor_names(graph, tensor_name_regex)
    222 
    223   def watch_gradients_by_tensor_names(self, graph, tensor_name_regex):
    224     """Watch gradient tensors by name(s) of the x-tensor(s).
    225 
    226     The side effect of this method is that when gradient tensor(s) are created
    227     with respect to the x-tensors, the gradient tensor(s) will be registered
    228     with this `GradientsDebugger` instance and can later be retrieved.
    229 
    230     Unlike the `identify_gradient` method, this method is used after the
    231     construction of the forward graph has completed. Unlike the
    232     `watch_gradients_by_tensor` method, this method does not use handles to the
    233     tensors of interest; it uses their names.
    234 
    235     This method is the same as `watch_gradients_by_tensors` except that the
    236     x-tensors are specified by name patterns, instead of `tf.Tensor` or
    237     `tf.Variable` objects.
    238 
    239     Example:
    240 
    241     ```python
    242     x = tf.Variable(1.0, name="x")
    243     y = tf.add(x, x, name="y")
    244     z = tf.square(debug_y)
    245 
    246     # Create a train op under the grad_debugger context.
    247     grad_debugger = tf_debug.GradientsDebugger()
    248     with grad_debugger.watch_gradients_by_tensor_names(r"(x|y):0$"):
    249       train_op = tf.train.GradientDescentOptimizer(z)
    250 
    251     # Now we can reflect through grad_debugger to get the gradient tensor
    252     # with respect to x and y.
    253     x_grad = grad_debugger.gradient_tensor("x:0")
    254     y_grad = grad_debugger.gradient_tensor("y:0")
    255     ```
    256 
    257     Args:
    258       graph: the `tf.Graph` to watch the gradients on.
    259       tensor_name_regex: the regular-expression pattern of the name(s) of the
    260         x-tensor(s) to watch. x-tensor refers to the tensors on the denominator
    261         of the differentiation.
    262 
    263     Returns:
    264       The GradientsDebugger instance itself.
    265     """
    266     tensor_name_pattern = re.compile(tensor_name_regex)
    267     with graph.as_default():
    268       for op in graph.get_operations():
    269         for output in op.outputs:
    270           if tensor_name_pattern.match(output.name):
    271             debug_op = self.identify_gradient(output)
    272 
    273             # Make a copy of output.consumers() since we'll modify the consumers
    274             # TODO(skyewm): this is unnecessary once the C API is enabled
    275             for consumer in list(output.consumers()):
    276               if consumer == debug_op.op:
    277                 continue
    278 
    279               # Locate the slot index of the original input.
    280               for i, consumer_input in enumerate(consumer.inputs):
    281                 if consumer_input == output:
    282                   consumer._update_input(i, debug_op)  # pylint: disable=protected-access
    283     return self
    284 
    285   def _check_same_graph(self, tensor):
    286     if self._graph is None:
    287       self._graph = tensor.graph
    288     elif self._graph != tensor.graph:
    289       raise ValueError(
    290           "The graph of the value (%s) is not the same as the graph %s" %
    291           (tensor.graph, self._graph))
    292 
    293   def register_gradient_tensor(self,
    294                                x_tensor_name,
    295                                gradient_tensor):
    296     """Register the gradient tensor for an x-tensor.
    297 
    298     Args:
    299       x_tensor_name: (`str`) the name of the independent `tf.Tensor`, i.e.,
    300         the tensor on the denominator of the differentiation.
    301       gradient_tensor: the gradient `tf.Tensor`.
    302     """
    303     if len(_gradient_debuggers) == 1 or self._is_active_context:
    304       self._check_same_graph(gradient_tensor)
    305       self._gradient_tensors[x_tensor_name] = gradient_tensor
    306 
    307   def gradient_tensor(self, x_tensor):
    308     """Get the gradient tensor of an x-tensor.
    309 
    310     Args:
    311       x_tensor: (`tf.Tensor`, `tf.Variable` or `str`) The x-tensor object or its
    312         name. x-tensor refers to the independent `tf.Tensor`, i.e., the tensor
    313         on the denominator of the differentiation.
    314 
    315     Returns:
    316       If found, the gradient tensor.
    317 
    318     Raises:
    319       TypeError: If `x_tensor` is not a `tf.Tensor`, `tf.Variable` or `str`.
    320       LookupError: If the `x_tensor` has not been registered with a gradient
    321         tensor.
    322     """
    323     x_tensor_name = self._get_tensor_name(x_tensor)
    324     if x_tensor_name not in self._gradient_tensors:
    325       raise LookupError(
    326           "This GradientsDebugger has not received any gradient tensor for "
    327           "x-tensor %s" % x_tensor_name)
    328     return self._gradient_tensors[x_tensor_name]
    329 
    330   def gradient_tensors(self):
    331     """Get the gradient tensors that this object is aware of.
    332 
    333     Returns:
    334       A dict mapping x-tensor names to gradient tensor objects. x-tensor refers
    335       to the tensors on the denominator of the differentation.
    336     """
    337     return self._gradient_tensors
    338 
    339   def _get_tensor_name(self, tensor):
    340     if isinstance(tensor, (ops.Tensor, variables.Variable)):
    341       return tensor.name
    342     elif isinstance(tensor, six.string_types):
    343       return tensor
    344     else:
    345       raise TypeError(
    346           "x_tensor must be a str or tf.Tensor or tf.Variable, "
    347           "but instead has type %s" % type(tensor))
    348 
    349 
    350 def clear_gradient_debuggers():
    351   """Clear all globally registered gradient debuggers."""
    352   _gradient_debuggers.clear()
    353 
    354 
    355 @ops.RegisterGradient("DebugGradientIdentity")
    356 def _identify_gradient_grad(op, dy):
    357   """Gradient function for the DebugIdentity op."""
    358   # TODO(cais): Allow overriding gradient.
    359   grad_debugger_uuid, orig_tensor_name = _parse_grad_debug_op_name(op.name)
    360   grad_debugger = _gradient_debuggers[grad_debugger_uuid]
    361   grad_debugger.register_gradient_tensor(orig_tensor_name, dy)
    362   return dy
    363 
    364 
    365 @ops.RegisterGradient("DebugGradientRefIdentity")
    366 def _identify_gradient_grad_ref(op, dy):
    367   """Gradient function for the DebugIdentity op."""
    368   return _identify_gradient_grad(op, dy)
    369 
    370 
    371 def gradient_values_from_dump(grad_debugger, x_tensor, dump):
    372   """Find gradient values from a `DebugDumpDir` object.
    373 
    374   Args:
    375     grad_debugger: the `tf_debug.GradientsDebugger` instance to be used.
    376     x_tensor: (`tf.Tensor`, `tf.Variable` or `str`) The x-tensor object or its
    377       name. x-tensor refers to the independent `tf.Tensor`, i.e., the tensor
    378       on the denominator of the differentiation.
    379     dump: A `tfdbg.DebugDumpDir` object.
    380 
    381   Returns:
    382     If this `GradientsDebugger` instance has the gradient tensor of `x_tensor`
    383       registered: a list of `numpy.ndarray` representing the value of the
    384       gradient tensor from `dump`. The list could be empty, if the gradient
    385       tensor is not executed in the `tf.Session.run()` call that generated
    386       the `dump`. The list could also contain multiple values of the gradient
    387       tensor, e.g., if gradient tensor is computed repeatedly in a
    388       `tf.while_loop` during the run that generated the `dump`.
    389 
    390   Raises:
    391     LookupError: If this `GradientsDebugger` instance does not have the
    392       gradient tensor of `x_tensor` registered.
    393     ValueError: If this `GradientsDebugger` has a `tf.Graph` object that
    394       does not match the `tf.Graph` object of the `dump`.
    395     TypeError: If `x_tensor` is not a `tf.Tensor`, `tf.Variable` or `str`.
    396   """
    397   # TODO(cais): Use this method in LocalCLIDebugWrapperSession to present the
    398   # gradient tensors to the TFDBG CLI.
    399 
    400   # If possible, verify that the Python graph of the dump and that of this
    401   # GradientsDebugger match.
    402   if (dump.python_graph and grad_debugger.graph and
    403       dump.python_graph != grad_debugger.graph):
    404     raise ValueError(
    405         "This GradientsDebugger instance has a graph (%s) that differs from "
    406         "the graph of the DebugDumpDir object (%s)." %
    407         (grad_debugger.graph, dump.python_graph))
    408 
    409   gradient_tensor = grad_debugger.gradient_tensor(x_tensor)
    410   node_name, output_slot = debug_graphs.parse_node_or_tensor_name(
    411       gradient_tensor.name)
    412 
    413   try:
    414     return dump.get_tensors(node_name, output_slot, "DebugIdentity")
    415   except debug_data.WatchKeyDoesNotExistInDebugDumpDirError:
    416     return []
    417