1 # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 # 3 # Licensed under the Apache License, Version 2.0 (the "License"); 4 # you may not use this file except in compliance with the License. 5 # You may obtain a copy of the License at 6 # 7 # http://www.apache.org/licenses/LICENSE-2.0 8 # 9 # Unless required by applicable law or agreed to in writing, software 10 # distributed under the License is distributed on an "AS IS" BASIS, 11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 # See the License for the specific language governing permissions and 13 # limitations under the License. 14 # ============================================================================== 15 """Functions to compute receptive field of a fully-convolutional network.""" 16 17 from __future__ import absolute_import 18 from __future__ import division 19 from __future__ import print_function 20 21 import numpy as np 22 from tensorflow.contrib.receptive_field.python.util import graph_compute_order 23 from tensorflow.contrib.receptive_field.python.util import parse_layer_parameters 24 from tensorflow.python.framework import ops as framework_ops 25 from tensorflow.python.platform import tf_logging as logging 26 27 28 def _get_rf_size_node_input(stride, kernel_size, rf_size_output): 29 """Computes RF size at the input of a given layer. 30 31 Args: 32 stride: Stride of given layer (integer). 33 kernel_size: Kernel size of given layer (integer). 34 rf_size_output: RF size at output of given layer (integer). 35 36 Returns: 37 rf_size_input: RF size at input of given layer (integer). 38 """ 39 return stride * rf_size_output + kernel_size - stride 40 41 42 def _get_effective_stride_node_input(stride, effective_stride_output): 43 """Computes effective stride at the input of a given layer. 44 45 Args: 46 stride: Stride of given layer (integer). 47 effective_stride_output: Effective stride at output of given layer 48 (integer). 49 50 Returns: 51 effective_stride_input: Effective stride at input of given layer 52 (integer). 53 """ 54 return stride * effective_stride_output 55 56 57 def _get_effective_padding_node_input(stride, padding, 58 effective_padding_output): 59 """Computes effective padding at the input of a given layer. 60 61 Args: 62 stride: Stride of given layer (integer). 63 padding: Padding of given layer (integer). 64 effective_padding_output: Effective padding at output of given layer 65 (integer). 66 67 Returns: 68 effective_padding_input: Effective padding at input of given layer 69 (integer). 70 """ 71 return stride * effective_padding_output + padding 72 73 74 class ReceptiveField(object): 75 """Receptive field of a convolutional neural network. 76 77 Args: 78 size: Receptive field size. 79 stride: Effective stride. 80 padding: Effective padding. 81 """ 82 83 def __init__(self, size, stride, padding): 84 self.size = np.asarray(size) 85 self.stride = np.asarray(stride) 86 self.padding = np.asarray(padding) 87 88 def compute_input_center_coordinates(self, y, axis=None): 89 """Computes the center of the receptive field that generated a feature. 90 91 Args: 92 y: An array of feature coordinates with shape `(..., d)`, where `d` is the 93 number of dimensions of the coordinates. 94 axis: The dimensions for which to compute the input center coordinates. If 95 `None` (the default), compute the input center coordinates for all 96 dimensions. 97 98 Returns: 99 x: Center of the receptive field that generated the features, at the input 100 of the network. 101 102 Raises: 103 ValueError: If the number of dimensions of the feature coordinates does 104 not match the number of elements in `axis`. 105 """ 106 # Use all dimensions. 107 if axis is None: 108 axis = range(self.size.size) 109 # Ensure axis is a list because tuples have different indexing behavior. 110 axis = list(axis) 111 y = np.asarray(y) 112 if y.shape[-1] != len(axis): 113 raise ValueError("Dimensionality of the feature coordinates `y` (%d) " 114 "does not match dimensionality of `axis` (%d)" % 115 (y.shape[-1], len(axis))) 116 return -self.padding[axis] + y * self.stride[axis] + ( 117 self.size[axis] - 1) / 2 118 119 def compute_feature_coordinates(self, x, axis=None): 120 """Computes the position of a feature given the center of a receptive field. 121 122 Args: 123 x: An array of input center coordinates with shape `(..., d)`, where `d` 124 is the number of dimensions of the coordinates. 125 axis: The dimensions for which to compute the feature coordinates. If 126 `None` (the default), compute the feature coordinates for all 127 dimensions. 128 129 Returns: 130 y: Coordinates of the features. 131 132 Raises: 133 ValueError: If the number of dimensions of the input center coordinates 134 does not match the number of elements in `axis`. 135 """ 136 # Use all dimensions. 137 if axis is None: 138 axis = range(self.size.size) 139 # Ensure axis is a list because tuples have different indexing behavior. 140 axis = list(axis) 141 x = np.asarray(x) 142 if x.shape[-1] != len(axis): 143 raise ValueError("Dimensionality of the input center coordinates `x` " 144 "(%d) does not match dimensionality of `axis` (%d)" % 145 (x.shape[-1], len(axis))) 146 return (x + self.padding[axis] + 147 (1 - self.size[axis]) / 2) / self.stride[axis] 148 149 def __iter__(self): 150 return iter(np.concatenate([self.size, self.stride, self.padding])) 151 152 153 def compute_receptive_field_from_graph_def(graph_def, 154 input_node, 155 output_node, 156 stop_propagation=None, 157 input_resolution=None): 158 """Computes receptive field (RF) parameters from a Graph or GraphDef object. 159 160 The algorithm stops the calculation of the receptive field whenever it 161 encounters an operation in the list `stop_propagation`. Stopping the 162 calculation early can be useful to calculate the receptive field of a 163 subgraph such as a single branch of the 164 [inception network](https://arxiv.org/abs/1512.00567). 165 166 Args: 167 graph_def: Graph or GraphDef object. 168 input_node: Name of the input node or Tensor object from graph. 169 output_node: Name of the output node or Tensor object from graph. 170 stop_propagation: List of operations or scope names for which to stop the 171 propagation of the receptive field. 172 input_resolution: 2D list. If the input resolution to the model is fixed and 173 known, this may be set. This is helpful for cases where the RF parameters 174 vary depending on the input resolution (this happens since SAME padding in 175 tensorflow depends on input resolution in general). If this is None, it is 176 assumed that the input resolution is unknown, so some RF parameters may be 177 unknown (depending on the model architecture). 178 179 Returns: 180 rf_size_x: Receptive field size of network in the horizontal direction, with 181 respect to specified input and output. 182 rf_size_y: Receptive field size of network in the vertical direction, with 183 respect to specified input and output. 184 effective_stride_x: Effective stride of network in the horizontal direction, 185 with respect to specified input and output. 186 effective_stride_y: Effective stride of network in the vertical direction, 187 with respect to specified input and output. 188 effective_padding_x: Effective padding of network in the horizontal 189 direction, with respect to specified input and output. 190 effective_padding_y: Effective padding of network in the vertical 191 direction, with respect to specified input and output. 192 193 Raises: 194 ValueError: If network is not aligned or if either input or output nodes 195 cannot be found. For network criterion alignment, see 196 photos/vision/features/delf/g3doc/rf_computation.md 197 """ 198 # Convert a graph to graph_def if necessary. 199 if isinstance(graph_def, framework_ops.Graph): 200 graph_def = graph_def.as_graph_def() 201 202 # Convert tensors to names. 203 if isinstance(input_node, framework_ops.Tensor): 204 input_node = input_node.op.name 205 if isinstance(output_node, framework_ops.Tensor): 206 output_node = output_node.op.name 207 208 stop_propagation = stop_propagation or [] 209 210 # Computes order of computation for a given graph. 211 node_info, name_to_node = graph_compute_order.get_compute_order( 212 graph_def=graph_def, 213 input_node_name=input_node, 214 input_node_size=input_resolution) 215 216 # Sort in reverse topological order. 217 ordered_node_info = sorted(node_info.items(), key=lambda x: -x[1].order) 218 219 # Dictionaries to keep track of receptive field, effective stride and 220 # effective padding of different nodes. 221 rf_sizes_x = {} 222 rf_sizes_y = {} 223 effective_strides_x = {} 224 effective_strides_y = {} 225 effective_paddings_x = {} 226 effective_paddings_y = {} 227 228 # Initialize dicts for output_node. 229 rf_sizes_x[output_node] = 1 230 rf_sizes_y[output_node] = 1 231 effective_strides_x[output_node] = 1 232 effective_strides_y[output_node] = 1 233 effective_paddings_x[output_node] = 0 234 effective_paddings_y[output_node] = 0 235 236 # Flag to denote if we found output node yet. If we have not, we skip nodes 237 # until the output node is found. 238 found_output_node = False 239 240 # Flag to denote if padding is undefined. This happens when SAME padding mode 241 # is used in conjunction with stride and kernel sizes which make it such that 242 # the padding to be applied would depend on the input size. In this case, 243 # alignment checks are skipped, and the effective padding is None. 244 undefined_padding = False 245 246 for _, (o, node, _, _) in ordered_node_info: 247 if node: 248 logging.vlog(3, "%10d %-100s %-20s" % (o, node.name[:90], node.op)) 249 else: 250 continue 251 252 # When we find input node, we can stop. 253 if node.name == input_node: 254 break 255 256 # Loop until we find the output node. All nodes before finding the output 257 # one are irrelevant, so they can be skipped. 258 if not found_output_node: 259 if node.name == output_node: 260 found_output_node = True 261 262 if found_output_node: 263 if node.name not in rf_sizes_x: 264 assert node.name not in rf_sizes_y, ("Node %s is in rf_sizes_y, but " 265 "not in rf_sizes_x" % node.name) 266 # In this case, node is not relevant since it's not part of the 267 # computation we're interested in. 268 logging.vlog(3, "Irrelevant node %s, skipping it...", node.name) 269 continue 270 271 # Get params for this layer. 272 (kernel_size_x, kernel_size_y, stride_x, stride_y, padding_x, padding_y, 273 _, _) = parse_layer_parameters.get_layer_params( 274 node, name_to_node, node_info[node.name].input_size) 275 logging.vlog( 276 3, "kernel_size_x = %s, kernel_size_y = %s, " 277 "stride_x = %s, stride_y = %s, " 278 "padding_x = %s, padding_y = %s, input size = %s" % 279 (kernel_size_x, kernel_size_y, stride_x, stride_y, padding_x, 280 padding_y, node_info[node.name].input_size)) 281 if padding_x is None or padding_y is None: 282 undefined_padding = True 283 284 # Get parameters at input of this layer which may or may not be propagated 285 # to the input layers. 286 rf_size_input_x = _get_rf_size_node_input(stride_x, kernel_size_x, 287 rf_sizes_x[node.name]) 288 rf_size_input_y = _get_rf_size_node_input(stride_y, kernel_size_y, 289 rf_sizes_y[node.name]) 290 effective_stride_input_x = _get_effective_stride_node_input( 291 stride_x, effective_strides_x[node.name]) 292 effective_stride_input_y = _get_effective_stride_node_input( 293 stride_y, effective_strides_y[node.name]) 294 if not undefined_padding: 295 effective_padding_input_x = _get_effective_padding_node_input( 296 stride_x, padding_x, effective_paddings_x[node.name]) 297 effective_padding_input_y = _get_effective_padding_node_input( 298 stride_y, padding_y, effective_paddings_y[node.name]) 299 else: 300 effective_padding_input_x = None 301 effective_padding_input_y = None 302 logging.vlog( 303 4, "rf_size_input_x = %s, rf_size_input_y = %s, " 304 "effective_stride_input_x = %s, effective_stride_input_y = %s, " 305 "effective_padding_input_x = %s, effective_padding_input_y = %s" % 306 (rf_size_input_x, rf_size_input_y, effective_stride_input_x, 307 effective_stride_input_y, effective_padding_input_x, 308 effective_padding_input_y)) 309 310 # Loop over this node's inputs and potentially propagate information down. 311 for inp_name in node.input: 312 # Stop the propagation of the receptive field. 313 if any(inp_name.startswith(stop) for stop in stop_propagation): 314 logging.vlog(3, "Skipping explicitly ignored node %s.", inp_name) 315 continue 316 317 logging.vlog(4, "inp_name = %s", inp_name) 318 if inp_name.startswith("^"): 319 # The character "^" denotes a control dependency, so this input node 320 # can be safely ignored. 321 continue 322 323 inp_node = name_to_node[inp_name] 324 logging.vlog(4, "inp_node = \n%s", inp_node) 325 if inp_name in rf_sizes_x: 326 assert inp_name in rf_sizes_y, ("Node %s is in rf_sizes_x, but " 327 "not in rf_sizes_y" % inp_name) 328 logging.vlog( 329 4, "rf_sizes_x[inp_name] = %s," 330 " rf_sizes_y[inp_name] = %s, " 331 "effective_strides_x[inp_name] = %s," 332 " effective_strides_y[inp_name] = %s, " 333 "effective_paddings_x[inp_name] = %s," 334 " effective_paddings_y[inp_name] = %s" % 335 (rf_sizes_x[inp_name], rf_sizes_y[inp_name], 336 effective_strides_x[inp_name], effective_strides_y[inp_name], 337 effective_paddings_x[inp_name], effective_paddings_y[inp_name])) 338 # This node was already discovered through a previous path, so we need 339 # to make sure that graph is aligned. This alignment check is skipped 340 # if the padding is not defined, since in this case alignment cannot 341 # be checked. 342 if not undefined_padding: 343 if effective_strides_x[inp_name] != effective_stride_input_x: 344 raise ValueError( 345 "Graph is not aligned since effective stride from different " 346 "paths is different in horizontal direction") 347 if effective_strides_y[inp_name] != effective_stride_input_y: 348 raise ValueError( 349 "Graph is not aligned since effective stride from different " 350 "paths is different in vertical direction") 351 if (rf_sizes_x[inp_name] - 352 1) / 2 - effective_paddings_x[inp_name] != ( 353 rf_size_input_x - 1) / 2 - effective_padding_input_x: 354 raise ValueError( 355 "Graph is not aligned since center shift from different " 356 "paths is different in horizontal direction") 357 if (rf_sizes_y[inp_name] - 358 1) / 2 - effective_paddings_y[inp_name] != ( 359 rf_size_input_y - 1) / 2 - effective_padding_input_y: 360 raise ValueError( 361 "Graph is not aligned since center shift from different " 362 "paths is different in vertical direction") 363 # Keep track of path with largest RF, for both directions. 364 if rf_sizes_x[inp_name] < rf_size_input_x: 365 rf_sizes_x[inp_name] = rf_size_input_x 366 effective_strides_x[inp_name] = effective_stride_input_x 367 effective_paddings_x[inp_name] = effective_padding_input_x 368 if rf_sizes_y[inp_name] < rf_size_input_y: 369 rf_sizes_y[inp_name] = rf_size_input_y 370 effective_strides_y[inp_name] = effective_stride_input_y 371 effective_paddings_y[inp_name] = effective_padding_input_y 372 else: 373 assert inp_name not in rf_sizes_y, ("Node %s is in rf_sizes_y, but " 374 "not in rf_sizes_x" % inp_name) 375 # In this case, it is the first time we encounter this node. So we 376 # propagate the RF parameters. 377 rf_sizes_x[inp_name] = rf_size_input_x 378 rf_sizes_y[inp_name] = rf_size_input_y 379 effective_strides_x[inp_name] = effective_stride_input_x 380 effective_strides_y[inp_name] = effective_stride_input_y 381 effective_paddings_x[inp_name] = effective_padding_input_x 382 effective_paddings_y[inp_name] = effective_padding_input_y 383 384 if not found_output_node: 385 raise ValueError("Output node was not found") 386 if input_node not in rf_sizes_x: 387 raise ValueError("Input node was not found") 388 return ReceptiveField( 389 (rf_sizes_x[input_node], rf_sizes_y[input_node]), 390 (effective_strides_x[input_node], effective_strides_y[input_node]), 391 (effective_paddings_x[input_node], effective_paddings_y[input_node])) 392