Home | History | Annotate | Download | only in util
      1 # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
      2 #
      3 # Licensed under the Apache License, Version 2.0 (the "License");
      4 # you may not use this file except in compliance with the License.
      5 # You may obtain a copy of the License at
      6 #
      7 #     http://www.apache.org/licenses/LICENSE-2.0
      8 #
      9 # Unless required by applicable law or agreed to in writing, software
     10 # distributed under the License is distributed on an "AS IS" BASIS,
     11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 # See the License for the specific language governing permissions and
     13 # limitations under the License.
     14 # ==============================================================================
     15 """Functions to compute receptive field of a fully-convolutional network."""
     16 
     17 from __future__ import absolute_import
     18 from __future__ import division
     19 from __future__ import print_function
     20 
     21 import numpy as np
     22 from tensorflow.contrib.receptive_field.python.util import graph_compute_order
     23 from tensorflow.contrib.receptive_field.python.util import parse_layer_parameters
     24 from tensorflow.python.framework import ops as framework_ops
     25 from tensorflow.python.platform import tf_logging as logging
     26 
     27 
     28 def _get_rf_size_node_input(stride, kernel_size, rf_size_output):
     29   """Computes RF size at the input of a given layer.
     30 
     31   Args:
     32     stride: Stride of given layer (integer).
     33     kernel_size: Kernel size of given layer (integer).
     34     rf_size_output: RF size at output of given layer (integer).
     35 
     36   Returns:
     37     rf_size_input: RF size at input of given layer (integer).
     38   """
     39   return stride * rf_size_output + kernel_size - stride
     40 
     41 
     42 def _get_effective_stride_node_input(stride, effective_stride_output):
     43   """Computes effective stride at the input of a given layer.
     44 
     45   Args:
     46     stride: Stride of given layer (integer).
     47     effective_stride_output: Effective stride at output of given layer
     48       (integer).
     49 
     50   Returns:
     51     effective_stride_input: Effective stride at input of given layer
     52       (integer).
     53   """
     54   return stride * effective_stride_output
     55 
     56 
     57 def _get_effective_padding_node_input(stride, padding,
     58                                       effective_padding_output):
     59   """Computes effective padding at the input of a given layer.
     60 
     61   Args:
     62     stride: Stride of given layer (integer).
     63     padding: Padding of given layer (integer).
     64     effective_padding_output: Effective padding at output of given layer
     65       (integer).
     66 
     67   Returns:
     68     effective_padding_input: Effective padding at input of given layer
     69       (integer).
     70   """
     71   return stride * effective_padding_output + padding
     72 
     73 
     74 class ReceptiveField(object):
     75   """Receptive field of a convolutional neural network.
     76 
     77   Args:
     78     size: Receptive field size.
     79     stride: Effective stride.
     80     padding: Effective padding.
     81   """
     82 
     83   def __init__(self, size, stride, padding):
     84     self.size = np.asarray(size)
     85     self.stride = np.asarray(stride)
     86     self.padding = np.asarray(padding)
     87 
     88   def compute_input_center_coordinates(self, y, axis=None):
     89     """Computes the center of the receptive field that generated a feature.
     90 
     91     Args:
     92       y: An array of feature coordinates with shape `(..., d)`, where `d` is the
     93         number of dimensions of the coordinates.
     94       axis: The dimensions for which to compute the input center coordinates. If
     95         `None` (the default), compute the input center coordinates for all
     96         dimensions.
     97 
     98     Returns:
     99       x: Center of the receptive field that generated the features, at the input
    100         of the network.
    101 
    102     Raises:
    103       ValueError: If the number of dimensions of the feature coordinates does
    104         not match the number of elements in `axis`.
    105     """
    106     # Use all dimensions.
    107     if axis is None:
    108       axis = range(self.size.size)
    109     # Ensure axis is a list because tuples have different indexing behavior.
    110     axis = list(axis)
    111     y = np.asarray(y)
    112     if y.shape[-1] != len(axis):
    113       raise ValueError("Dimensionality of the feature coordinates `y` (%d) "
    114                        "does not match dimensionality of `axis` (%d)" %
    115                        (y.shape[-1], len(axis)))
    116     return -self.padding[axis] + y * self.stride[axis] + (
    117         self.size[axis] - 1) / 2
    118 
    119   def compute_feature_coordinates(self, x, axis=None):
    120     """Computes the position of a feature given the center of a receptive field.
    121 
    122     Args:
    123       x: An array of input center coordinates with shape `(..., d)`, where `d`
    124         is the number of dimensions of the coordinates.
    125       axis: The dimensions for which to compute the feature coordinates. If
    126         `None` (the default), compute the feature coordinates for all
    127         dimensions.
    128 
    129     Returns:
    130       y: Coordinates of the features.
    131 
    132     Raises:
    133       ValueError: If the number of dimensions of the input center coordinates
    134         does not match the number of elements in `axis`.
    135     """
    136     # Use all dimensions.
    137     if axis is None:
    138       axis = range(self.size.size)
    139     # Ensure axis is a list because tuples have different indexing behavior.
    140     axis = list(axis)
    141     x = np.asarray(x)
    142     if x.shape[-1] != len(axis):
    143       raise ValueError("Dimensionality of the input center coordinates `x` "
    144                        "(%d) does not match dimensionality of `axis` (%d)" %
    145                        (x.shape[-1], len(axis)))
    146     return (x + self.padding[axis] +
    147             (1 - self.size[axis]) / 2) / self.stride[axis]
    148 
    149   def __iter__(self):
    150     return iter(np.concatenate([self.size, self.stride, self.padding]))
    151 
    152 
    153 def compute_receptive_field_from_graph_def(graph_def,
    154                                            input_node,
    155                                            output_node,
    156                                            stop_propagation=None,
    157                                            input_resolution=None):
    158   """Computes receptive field (RF) parameters from a Graph or GraphDef object.
    159 
    160   The algorithm stops the calculation of the receptive field whenever it
    161   encounters an operation in the list `stop_propagation`. Stopping the
    162   calculation early can be useful to calculate the receptive field of a
    163   subgraph such as a single branch of the
    164   [inception network](https://arxiv.org/abs/1512.00567).
    165 
    166   Args:
    167     graph_def: Graph or GraphDef object.
    168     input_node: Name of the input node or Tensor object from graph.
    169     output_node: Name of the output node or Tensor object from graph.
    170     stop_propagation: List of operations or scope names for which to stop the
    171       propagation of the receptive field.
    172     input_resolution: 2D list. If the input resolution to the model is fixed and
    173       known, this may be set. This is helpful for cases where the RF parameters
    174       vary depending on the input resolution (this happens since SAME padding in
    175       tensorflow depends on input resolution in general). If this is None, it is
    176       assumed that the input resolution is unknown, so some RF parameters may be
    177       unknown (depending on the model architecture).
    178 
    179   Returns:
    180     rf_size_x: Receptive field size of network in the horizontal direction, with
    181       respect to specified input and output.
    182     rf_size_y: Receptive field size of network in the vertical direction, with
    183       respect to specified input and output.
    184     effective_stride_x: Effective stride of network in the horizontal direction,
    185       with respect to specified input and output.
    186     effective_stride_y: Effective stride of network in the vertical direction,
    187       with respect to specified input and output.
    188     effective_padding_x: Effective padding of network in the horizontal
    189       direction, with respect to specified input and output.
    190     effective_padding_y: Effective padding of network in the vertical
    191       direction, with respect to specified input and output.
    192 
    193   Raises:
    194     ValueError: If network is not aligned or if either input or output nodes
    195       cannot be found. For network criterion alignment, see
    196       photos/vision/features/delf/g3doc/rf_computation.md
    197   """
    198   # Convert a graph to graph_def if necessary.
    199   if isinstance(graph_def, framework_ops.Graph):
    200     graph_def = graph_def.as_graph_def()
    201 
    202   # Convert tensors to names.
    203   if isinstance(input_node, framework_ops.Tensor):
    204     input_node = input_node.op.name
    205   if isinstance(output_node, framework_ops.Tensor):
    206     output_node = output_node.op.name
    207 
    208   stop_propagation = stop_propagation or []
    209 
    210   # Computes order of computation for a given graph.
    211   node_info, name_to_node = graph_compute_order.get_compute_order(
    212       graph_def=graph_def,
    213       input_node_name=input_node,
    214       input_node_size=input_resolution)
    215 
    216   # Sort in reverse topological order.
    217   ordered_node_info = sorted(node_info.items(), key=lambda x: -x[1].order)
    218 
    219   # Dictionaries to keep track of receptive field, effective stride and
    220   # effective padding of different nodes.
    221   rf_sizes_x = {}
    222   rf_sizes_y = {}
    223   effective_strides_x = {}
    224   effective_strides_y = {}
    225   effective_paddings_x = {}
    226   effective_paddings_y = {}
    227 
    228   # Initialize dicts for output_node.
    229   rf_sizes_x[output_node] = 1
    230   rf_sizes_y[output_node] = 1
    231   effective_strides_x[output_node] = 1
    232   effective_strides_y[output_node] = 1
    233   effective_paddings_x[output_node] = 0
    234   effective_paddings_y[output_node] = 0
    235 
    236   # Flag to denote if we found output node yet. If we have not, we skip nodes
    237   # until the output node is found.
    238   found_output_node = False
    239 
    240   # Flag to denote if padding is undefined. This happens when SAME padding mode
    241   # is used in conjunction with stride and kernel sizes which make it such that
    242   # the padding to be applied would depend on the input size. In this case,
    243   # alignment checks are skipped, and the effective padding is None.
    244   undefined_padding = False
    245 
    246   for _, (o, node, _, _) in ordered_node_info:
    247     if node:
    248       logging.vlog(3, "%10d %-100s %-20s" % (o, node.name[:90], node.op))
    249     else:
    250       continue
    251 
    252     # When we find input node, we can stop.
    253     if node.name == input_node:
    254       break
    255 
    256     # Loop until we find the output node. All nodes before finding the output
    257     # one are irrelevant, so they can be skipped.
    258     if not found_output_node:
    259       if node.name == output_node:
    260         found_output_node = True
    261 
    262     if found_output_node:
    263       if node.name not in rf_sizes_x:
    264         assert node.name not in rf_sizes_y, ("Node %s is in rf_sizes_y, but "
    265                                              "not in rf_sizes_x" % node.name)
    266         # In this case, node is not relevant since it's not part of the
    267         # computation we're interested in.
    268         logging.vlog(3, "Irrelevant node %s, skipping it...", node.name)
    269         continue
    270 
    271       # Get params for this layer.
    272       (kernel_size_x, kernel_size_y, stride_x, stride_y, padding_x, padding_y,
    273        _, _) = parse_layer_parameters.get_layer_params(
    274            node, name_to_node, node_info[node.name].input_size)
    275       logging.vlog(
    276           3, "kernel_size_x = %s, kernel_size_y = %s, "
    277           "stride_x = %s, stride_y = %s, "
    278           "padding_x = %s, padding_y = %s, input size = %s" %
    279           (kernel_size_x, kernel_size_y, stride_x, stride_y, padding_x,
    280            padding_y, node_info[node.name].input_size))
    281       if padding_x is None or padding_y is None:
    282         undefined_padding = True
    283 
    284       # Get parameters at input of this layer which may or may not be propagated
    285       # to the input layers.
    286       rf_size_input_x = _get_rf_size_node_input(stride_x, kernel_size_x,
    287                                                 rf_sizes_x[node.name])
    288       rf_size_input_y = _get_rf_size_node_input(stride_y, kernel_size_y,
    289                                                 rf_sizes_y[node.name])
    290       effective_stride_input_x = _get_effective_stride_node_input(
    291           stride_x, effective_strides_x[node.name])
    292       effective_stride_input_y = _get_effective_stride_node_input(
    293           stride_y, effective_strides_y[node.name])
    294       if not undefined_padding:
    295         effective_padding_input_x = _get_effective_padding_node_input(
    296             stride_x, padding_x, effective_paddings_x[node.name])
    297         effective_padding_input_y = _get_effective_padding_node_input(
    298             stride_y, padding_y, effective_paddings_y[node.name])
    299       else:
    300         effective_padding_input_x = None
    301         effective_padding_input_y = None
    302       logging.vlog(
    303           4, "rf_size_input_x = %s, rf_size_input_y = %s, "
    304           "effective_stride_input_x = %s, effective_stride_input_y = %s, "
    305           "effective_padding_input_x = %s, effective_padding_input_y = %s" %
    306           (rf_size_input_x, rf_size_input_y, effective_stride_input_x,
    307            effective_stride_input_y, effective_padding_input_x,
    308            effective_padding_input_y))
    309 
    310       # Loop over this node's inputs and potentially propagate information down.
    311       for inp_name in node.input:
    312         # Stop the propagation of the receptive field.
    313         if any(inp_name.startswith(stop) for stop in stop_propagation):
    314           logging.vlog(3, "Skipping explicitly ignored node %s.", inp_name)
    315           continue
    316 
    317         logging.vlog(4, "inp_name = %s", inp_name)
    318         if inp_name.startswith("^"):
    319           # The character "^" denotes a control dependency, so this input node
    320           # can be safely ignored.
    321           continue
    322 
    323         inp_node = name_to_node[inp_name]
    324         logging.vlog(4, "inp_node = \n%s", inp_node)
    325         if inp_name in rf_sizes_x:
    326           assert inp_name in rf_sizes_y, ("Node %s is in rf_sizes_x, but "
    327                                           "not in rf_sizes_y" % inp_name)
    328           logging.vlog(
    329               4, "rf_sizes_x[inp_name] = %s,"
    330               " rf_sizes_y[inp_name] = %s, "
    331               "effective_strides_x[inp_name] = %s,"
    332               " effective_strides_y[inp_name] = %s, "
    333               "effective_paddings_x[inp_name] = %s,"
    334               " effective_paddings_y[inp_name] = %s" %
    335               (rf_sizes_x[inp_name], rf_sizes_y[inp_name],
    336                effective_strides_x[inp_name], effective_strides_y[inp_name],
    337                effective_paddings_x[inp_name], effective_paddings_y[inp_name]))
    338           # This node was already discovered through a previous path, so we need
    339           # to make sure that graph is aligned. This alignment check is skipped
    340           # if the padding is not defined, since in this case alignment cannot
    341           # be checked.
    342           if not undefined_padding:
    343             if effective_strides_x[inp_name] != effective_stride_input_x:
    344               raise ValueError(
    345                   "Graph is not aligned since effective stride from different "
    346                   "paths is different in horizontal direction")
    347             if effective_strides_y[inp_name] != effective_stride_input_y:
    348               raise ValueError(
    349                   "Graph is not aligned since effective stride from different "
    350                   "paths is different in vertical direction")
    351             if (rf_sizes_x[inp_name] -
    352                 1) / 2 - effective_paddings_x[inp_name] != (
    353                     rf_size_input_x - 1) / 2 - effective_padding_input_x:
    354               raise ValueError(
    355                   "Graph is not aligned since center shift from different "
    356                   "paths is different in horizontal direction")
    357             if (rf_sizes_y[inp_name] -
    358                 1) / 2 - effective_paddings_y[inp_name] != (
    359                     rf_size_input_y - 1) / 2 - effective_padding_input_y:
    360               raise ValueError(
    361                   "Graph is not aligned since center shift from different "
    362                   "paths is different in vertical direction")
    363           # Keep track of path with largest RF, for both directions.
    364           if rf_sizes_x[inp_name] < rf_size_input_x:
    365             rf_sizes_x[inp_name] = rf_size_input_x
    366             effective_strides_x[inp_name] = effective_stride_input_x
    367             effective_paddings_x[inp_name] = effective_padding_input_x
    368           if rf_sizes_y[inp_name] < rf_size_input_y:
    369             rf_sizes_y[inp_name] = rf_size_input_y
    370             effective_strides_y[inp_name] = effective_stride_input_y
    371             effective_paddings_y[inp_name] = effective_padding_input_y
    372         else:
    373           assert inp_name not in rf_sizes_y, ("Node %s is in rf_sizes_y, but "
    374                                               "not in rf_sizes_x" % inp_name)
    375           # In this case, it is the first time we encounter this node. So we
    376           # propagate the RF parameters.
    377           rf_sizes_x[inp_name] = rf_size_input_x
    378           rf_sizes_y[inp_name] = rf_size_input_y
    379           effective_strides_x[inp_name] = effective_stride_input_x
    380           effective_strides_y[inp_name] = effective_stride_input_y
    381           effective_paddings_x[inp_name] = effective_padding_input_x
    382           effective_paddings_y[inp_name] = effective_padding_input_y
    383 
    384   if not found_output_node:
    385     raise ValueError("Output node was not found")
    386   if input_node not in rf_sizes_x:
    387     raise ValueError("Input node was not found")
    388   return ReceptiveField(
    389       (rf_sizes_x[input_node], rf_sizes_y[input_node]),
    390       (effective_strides_x[input_node], effective_strides_y[input_node]),
    391       (effective_paddings_x[input_node], effective_paddings_y[input_node]))
    392