1 # Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 # 3 # Licensed under the Apache License, Version 2.0 (the "License"); 4 # you may not use this file except in compliance with the License. 5 # You may obtain a copy of the License at 6 # 7 # http://www.apache.org/licenses/LICENSE-2.0 8 # 9 # Unless required by applicable law or agreed to in writing, software 10 # distributed under the License is distributed on an "AS IS" BASIS, 11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 # See the License for the specific language governing permissions and 13 # limitations under the License. 14 # ============================================================================== 15 """Wrappers for primitive Neural Net (NN) Operations.""" 16 17 from __future__ import absolute_import 18 from __future__ import division 19 from __future__ import print_function 20 21 import collections 22 import numbers 23 24 import numpy as np 25 26 from tensorflow.python.compat import compat 27 from tensorflow.python.eager import context 28 from tensorflow.python.framework import dtypes 29 from tensorflow.python.framework import errors_impl 30 from tensorflow.python.framework import graph_util 31 from tensorflow.python.framework import ops 32 from tensorflow.python.framework import random_seed 33 from tensorflow.python.framework import tensor_shape 34 from tensorflow.python.framework import tensor_util 35 from tensorflow.python.ops import array_ops 36 from tensorflow.python.ops import check_ops 37 from tensorflow.python.ops import gen_nn_ops 38 from tensorflow.python.ops import math_ops 39 from tensorflow.python.ops import random_ops 40 # go/tf-wildcard-import 41 # pylint: disable=wildcard-import 42 from tensorflow.python.ops.gen_nn_ops import * 43 # pylint: enable=wildcard-import 44 from tensorflow.python.util import deprecation 45 from tensorflow.python.util.deprecation import deprecated_args 46 from tensorflow.python.util.deprecation import deprecated_argument_lookup 47 48 from tensorflow.python.util.tf_export import tf_export 49 50 # Aliases for some automatically-generated names. 51 local_response_normalization = gen_nn_ops.lrn 52 53 # pylint: disable=protected-access 54 55 56 def _get_sequence(value, n, channel_index, name): 57 """Formats a value input for gen_nn_ops.""" 58 if value is None: 59 value = [1] 60 elif not isinstance(value, collections.Sized): 61 value = [value] 62 63 current_n = len(value) 64 if current_n == n + 2: 65 return value 66 elif current_n == 1: 67 value = list((value[0],) * n) 68 elif current_n == n: 69 value = list(value) 70 else: 71 raise ValueError("{} should be of length 1, {} or {} but was {}".format( 72 name, n, n + 2, current_n)) 73 74 if channel_index == 1: 75 return [1, 1] + value 76 else: 77 return [1] + value + [1] 78 79 80 def _non_atrous_convolution( 81 input, # pylint: disable=redefined-builtin 82 filter, # pylint: disable=redefined-builtin 83 padding, 84 data_format=None, # pylint: disable=redefined-builtin 85 strides=None, 86 name=None): 87 """Computes sums of N-D convolutions (actually cross correlation). 88 89 It is required that 1 <= N <= 3. 90 91 This is used to implement the more generic `convolution` function, which 92 extends the interface of this function with a `dilation_rate` parameter. 93 94 Args: 95 96 input: Rank N+2 tensor of type T of shape 97 `[batch_size] + input_spatial_shape + [in_channels]` if `data_format` 98 does not start with `"NC"`, or 99 `[batch_size, in_channels] + input_spatial_shape` if `data_format` starts 100 with `"NC"`. 101 filter: Rank N+2 tensor of type T of shape 102 `filter_spatial_shape + [in_channels, out_channels]`. Rank of either 103 `input` or `filter` must be known. 104 padding: Padding method to use, must be either "VALID" or "SAME". 105 data_format: A string or None. Specifies whether the channel dimension of 106 the `input` and output is the last dimension (default, or if `data_format` 107 does not start with "NC"), or the second dimension (if `data_format` 108 starts with "NC"). For N=1, the valid values are "NWC" (default) and 109 "NCW". For N=2, the valid values are "NHWC" (default) and "NCHW". 110 For N=3, the valid values are "NDHWC" (default) and "NCDHW". 111 strides: Sequence of N positive integers, defaults to `[1] * N`. 112 name: Name prefix to use. 113 114 Returns: 115 Rank N+2 tensor of type T of shape 116 `[batch_size] + output_spatial_shape + [out_channels]`, where 117 if padding == "SAME": 118 output_spatial_shape = input_spatial_shape 119 if padding == "VALID": 120 output_spatial_shape = input_spatial_shape - filter_spatial_shape + 1. 121 122 Raises: 123 ValueError: if ranks are incompatible. 124 125 """ 126 with ops.name_scope(name, "non_atrous_convolution", [input, filter]) as scope: 127 input = ops.convert_to_tensor(input, name="input") # pylint: disable=redefined-builtin 128 input_shape = input.get_shape() 129 filter = ops.convert_to_tensor(filter, name="filter") # pylint: disable=redefined-builtin 130 filter_shape = filter.get_shape() 131 op = _NonAtrousConvolution( 132 input_shape, 133 filter_shape=filter_shape, 134 padding=padding, 135 data_format=data_format, 136 strides=strides, 137 name=scope) 138 return op(input, filter) 139 140 141 class _NonAtrousConvolution(object): 142 """Helper class for _non_atrous_convolution. 143 144 Note that this class assumes that shapes of input and filter passed to 145 __call__ are compatible with input_shape and filter_shape passed to the 146 constructor. 147 148 Arguments: 149 input_shape: static input shape, i.e. input.get_shape(). 150 filter_shape: static filter shape, i.e. filter.get_shape(). 151 padding: see _non_atrous_convolution. 152 data_format: see _non_atrous_convolution. 153 strides: see _non_atrous_convolution. 154 name: see _non_atrous_convolution. 155 """ 156 157 def __init__( 158 self, 159 input_shape, 160 filter_shape, # pylint: disable=redefined-builtin 161 padding, 162 data_format=None, 163 strides=None, 164 name=None): 165 filter_shape = filter_shape.with_rank(input_shape.ndims) 166 self.padding = padding 167 self.name = name 168 input_shape = input_shape.with_rank(filter_shape.ndims) 169 if input_shape.ndims is None: 170 raise ValueError("Rank of convolution must be known") 171 if input_shape.ndims < 3 or input_shape.ndims > 5: 172 raise ValueError( 173 "`input` and `filter` must have rank at least 3 and at most 5") 174 conv_dims = input_shape.ndims - 2 175 if strides is None: 176 strides = [1] * conv_dims 177 elif len(strides) != conv_dims: 178 raise ValueError("len(strides)=%d, but should be %d" % (len(strides), 179 conv_dims)) 180 if conv_dims == 1: 181 # conv1d uses the 2-d data format names 182 if data_format is None: 183 data_format = "NWC" 184 elif data_format not in {"NCW", "NWC", "NCHW", "NHWC"}: 185 raise ValueError("data_format must be \"NWC\" or \"NCW\".") 186 self.strides = strides[0] 187 self.data_format = data_format 188 self.conv_op = self._conv1d 189 elif conv_dims == 2: 190 if data_format is None or data_format == "NHWC": 191 data_format = "NHWC" 192 strides = [1] + list(strides) + [1] 193 elif data_format == "NCHW": 194 strides = [1, 1] + list(strides) 195 else: 196 raise ValueError("data_format must be \"NHWC\" or \"NCHW\".") 197 self.strides = strides 198 self.data_format = data_format 199 self.conv_op = conv2d 200 elif conv_dims == 3: 201 if data_format is None or data_format == "NDHWC": 202 strides = [1] + list(strides) + [1] 203 elif data_format == "NCDHW": 204 strides = [1, 1] + list(strides) 205 else: 206 raise ValueError("data_format must be \"NDHWC\" or \"NCDHW\". Have: %s" 207 % data_format) 208 self.strides = strides 209 self.data_format = data_format 210 self.conv_op = gen_nn_ops.conv3d 211 212 # Note that we need this adapter since argument names for conv1d don't match 213 # those for gen_nn_ops.conv2d and gen_nn_ops.conv3d. 214 # pylint: disable=redefined-builtin 215 def _conv1d(self, input, filter, strides, padding, data_format, name): 216 return conv1d( 217 value=input, 218 filters=filter, 219 stride=strides, 220 padding=padding, 221 data_format=data_format, 222 name=name) 223 224 # pylint: enable=redefined-builtin 225 226 def __call__(self, inp, filter): # pylint: disable=redefined-builtin 227 return self.conv_op( 228 input=inp, 229 filter=filter, 230 strides=self.strides, 231 padding=self.padding, 232 data_format=self.data_format, 233 name=self.name) 234 235 236 @tf_export("nn.dilation2d", v1=[]) 237 def dilation2d_v2( 238 input, # pylint: disable=redefined-builtin 239 filters, # pylint: disable=redefined-builtin 240 strides, 241 padding, 242 data_format, 243 dilations, 244 name=None): 245 """Computes the grayscale dilation of 4-D `input` and 3-D `filters` tensors. 246 247 The `input` tensor has shape `[batch, in_height, in_width, depth]` and the 248 `filters` tensor has shape `[filter_height, filter_width, depth]`, i.e., each 249 input channel is processed independently of the others with its own 250 structuring function. The `output` tensor has shape 251 `[batch, out_height, out_width, depth]`. The spatial dimensions of the output 252 tensor depend on the `padding` algorithm. We currently only support the 253 default "NHWC" `data_format`. 254 255 In detail, the grayscale morphological 2-D dilation is the max-sum correlation 256 (for consistency with `conv2d`, we use unmirrored filters): 257 258 output[b, y, x, c] = 259 max_{dy, dx} input[b, 260 strides[1] * y + rates[1] * dy, 261 strides[2] * x + rates[2] * dx, 262 c] + 263 filters[dy, dx, c] 264 265 Max-pooling is a special case when the filter has size equal to the pooling 266 kernel size and contains all zeros. 267 268 Note on duality: The dilation of `input` by the `filters` is equal to the 269 negation of the erosion of `-input` by the reflected `filters`. 270 271 Args: 272 input: A `Tensor`. Must be one of the following types: `float32`, `float64`, 273 `int32`, `uint8`, `int16`, `int8`, `int64`, `bfloat16`, `uint16`, `half`, 274 `uint32`, `uint64`. 275 4-D with shape `[batch, in_height, in_width, depth]`. 276 filters: A `Tensor`. Must have the same type as `input`. 277 3-D with shape `[filter_height, filter_width, depth]`. 278 strides: A list of `ints` that has length `>= 4`. 279 The stride of the sliding window for each dimension of the input 280 tensor. Must be: `[1, stride_height, stride_width, 1]`. 281 padding: A `string` from: `"SAME", "VALID"`. 282 The type of padding algorithm to use. 283 data_format: A `string`, only `"NCHW"` is currently supported. 284 dilations: A list of `ints` that has length `>= 4`. 285 The input stride for atrous morphological dilation. Must be: 286 `[1, rate_height, rate_width, 1]`. 287 name: A name for the operation (optional). 288 289 Returns: 290 A `Tensor`. Has the same type as `input`. 291 """ 292 if data_format != "NCHW": 293 raise ValueError("Data formats other than NCHW are not yet supported") 294 295 return gen_nn_ops.dilation2d(input=input, 296 filter=filters, 297 strides=strides, 298 rates=dilations, 299 padding=padding, 300 name=name) 301 302 303 @tf_export(v1=["nn.dilation2d"]) 304 def dilation2d_v1( # pylint: disable=missing-docstring 305 input, # pylint: disable=redefined-builtin 306 filter=None, # pylint: disable=redefined-builtin 307 strides=None, 308 rates=None, 309 padding=None, 310 name=None, 311 filters=None, 312 dilations=None): 313 filter = deprecated_argument_lookup("filters", filters, "filter", filter) 314 rates = deprecated_argument_lookup("dilations", dilations, "rates", rates) 315 return gen_nn_ops.dilation2d(input, filter, strides, rates, padding, name) 316 317 318 dilation2d_v1.__doc__ = gen_nn_ops.dilation2d.__doc__ 319 320 321 @tf_export("nn.with_space_to_batch") 322 def with_space_to_batch( 323 input, # pylint: disable=redefined-builtin 324 dilation_rate, 325 padding, 326 op, 327 filter_shape=None, 328 spatial_dims=None, 329 data_format=None): 330 """Performs `op` on the space-to-batch representation of `input`. 331 332 This has the effect of transforming sliding window operations into the 333 corresponding "atrous" operation in which the input is sampled at the 334 specified `dilation_rate`. 335 336 In the special case that `dilation_rate` is uniformly 1, this simply returns: 337 338 op(input, num_spatial_dims, padding) 339 340 Otherwise, it returns: 341 342 batch_to_space_nd( 343 op(space_to_batch_nd(input, adjusted_dilation_rate, adjusted_paddings), 344 num_spatial_dims, 345 "VALID") 346 adjusted_dilation_rate, 347 adjusted_crops), 348 349 where: 350 351 adjusted_dilation_rate is an int64 tensor of shape [max(spatial_dims)], 352 adjusted_{paddings,crops} are int64 tensors of shape [max(spatial_dims), 2] 353 354 defined as follows: 355 356 We first define two int64 tensors `paddings` and `crops` of shape 357 `[num_spatial_dims, 2]` based on the value of `padding` and the spatial 358 dimensions of the `input`: 359 360 If `padding = "VALID"`, then: 361 362 paddings, crops = required_space_to_batch_paddings( 363 input_shape[spatial_dims], 364 dilation_rate) 365 366 If `padding = "SAME"`, then: 367 368 dilated_filter_shape = 369 filter_shape + (filter_shape - 1) * (dilation_rate - 1) 370 371 paddings, crops = required_space_to_batch_paddings( 372 input_shape[spatial_dims], 373 dilation_rate, 374 [(dilated_filter_shape - 1) // 2, 375 dilated_filter_shape - 1 - (dilated_filter_shape - 1) // 2]) 376 377 Because `space_to_batch_nd` and `batch_to_space_nd` assume that the spatial 378 dimensions are contiguous starting at the second dimension, but the specified 379 `spatial_dims` may not be, we must adjust `dilation_rate`, `paddings` and 380 `crops` in order to be usable with these operations. For a given dimension, 381 if the block size is 1, and both the starting and ending padding and crop 382 amounts are 0, then space_to_batch_nd effectively leaves that dimension alone, 383 which is what is needed for dimensions not part of `spatial_dims`. 384 Furthermore, `space_to_batch_nd` and `batch_to_space_nd` handle this case 385 efficiently for any number of leading and trailing dimensions. 386 387 For 0 <= i < len(spatial_dims), we assign: 388 389 adjusted_dilation_rate[spatial_dims[i] - 1] = dilation_rate[i] 390 adjusted_paddings[spatial_dims[i] - 1, :] = paddings[i, :] 391 adjusted_crops[spatial_dims[i] - 1, :] = crops[i, :] 392 393 All unassigned values of `adjusted_dilation_rate` default to 1, while all 394 unassigned values of `adjusted_paddings` and `adjusted_crops` default to 0. 395 396 Note in the case that `dilation_rate` is not uniformly 1, specifying "VALID" 397 padding is equivalent to specifying `padding = "SAME"` with a filter_shape of 398 `[1]*N`. 399 400 Advanced usage. Note the following optimization: A sequence of 401 `with_space_to_batch` operations with identical (not uniformly 1) 402 `dilation_rate` parameters and "VALID" padding 403 404 net = with_space_to_batch(net, dilation_rate, "VALID", op_1) 405 ... 406 net = with_space_to_batch(net, dilation_rate, "VALID", op_k) 407 408 can be combined into a single `with_space_to_batch` operation as follows: 409 410 def combined_op(converted_input, num_spatial_dims, _): 411 result = op_1(converted_input, num_spatial_dims, "VALID") 412 ... 413 result = op_k(result, num_spatial_dims, "VALID") 414 415 net = with_space_to_batch(net, dilation_rate, "VALID", combined_op) 416 417 This eliminates the overhead of `k-1` calls to `space_to_batch_nd` and 418 `batch_to_space_nd`. 419 420 Similarly, a sequence of `with_space_to_batch` operations with identical (not 421 uniformly 1) `dilation_rate` parameters, "SAME" padding, and odd filter 422 dimensions 423 424 net = with_space_to_batch(net, dilation_rate, "SAME", op_1, filter_shape_1) 425 ... 426 net = with_space_to_batch(net, dilation_rate, "SAME", op_k, filter_shape_k) 427 428 can be combined into a single `with_space_to_batch` operation as follows: 429 430 def combined_op(converted_input, num_spatial_dims, _): 431 result = op_1(converted_input, num_spatial_dims, "SAME") 432 ... 433 result = op_k(result, num_spatial_dims, "SAME") 434 435 net = with_space_to_batch(net, dilation_rate, "VALID", combined_op) 436 437 Args: 438 input: Tensor of rank > max(spatial_dims). 439 dilation_rate: int32 Tensor of *known* shape [num_spatial_dims]. 440 padding: str constant equal to "VALID" or "SAME" 441 op: Function that maps (input, num_spatial_dims, padding) -> output 442 filter_shape: If padding = "SAME", specifies the shape of the convolution 443 kernel/pooling window as an integer Tensor of shape [>=num_spatial_dims]. 444 If padding = "VALID", filter_shape is ignored and need not be specified. 445 spatial_dims: Monotonically increasing sequence of `num_spatial_dims` 446 integers (which are >= 1) specifying the spatial dimensions of `input` 447 and output. Defaults to: `range(1, num_spatial_dims+1)`. 448 data_format: A string or None. Specifies whether the channel dimension of 449 the `input` and output is the last dimension (default, or if `data_format` 450 does not start with "NC"), or the second dimension (if `data_format` 451 starts with "NC"). For N=1, the valid values are "NWC" (default) and 452 "NCW". For N=2, the valid values are "NHWC" (default) and "NCHW". 453 For N=3, the valid values are "NDHWC" (default) and "NCDHW". 454 455 Returns: 456 The output Tensor as described above, dimensions will vary based on the op 457 provided. 458 459 Raises: 460 ValueError: if `padding` is invalid or the arguments are incompatible. 461 ValueError: if `spatial_dims` are invalid. 462 463 """ 464 input = ops.convert_to_tensor(input, name="input") # pylint: disable=redefined-builtin 465 input_shape = input.get_shape() 466 467 def build_op(num_spatial_dims, padding): 468 return lambda inp, _: op(inp, num_spatial_dims, padding) 469 470 new_op = _WithSpaceToBatch( 471 input_shape, 472 dilation_rate, 473 padding, 474 build_op, 475 filter_shape=filter_shape, 476 spatial_dims=spatial_dims, 477 data_format=data_format) 478 return new_op(input, None) 479 480 481 class _WithSpaceToBatch(object): 482 """Helper class for with_space_to_batch. 483 484 Note that this class assumes that shapes of input and filter passed to 485 __call__ are compatible with input_shape and filter_shape passed to the 486 constructor. 487 488 Arguments 489 input_shape: static shape of input. i.e. input.get_shape(). 490 dilation_rate: see with_space_to_batch 491 padding: see with_space_to_batch 492 build_op: Function that maps (num_spatial_dims, paddings) -> (function that 493 maps (input, filter) -> output). 494 filter_shape: see with_space_to_batch 495 spatial_dims: see with_space_to_batch 496 data_format: see with_space_to_batch 497 """ 498 499 def __init__(self, 500 input_shape, 501 dilation_rate, 502 padding, 503 build_op, 504 filter_shape=None, 505 spatial_dims=None, 506 data_format=None): 507 """Helper class for _with_space_to_batch.""" 508 dilation_rate = ops.convert_to_tensor( 509 dilation_rate, dtypes.int32, name="dilation_rate") 510 try: 511 rate_shape = dilation_rate.get_shape().with_rank(1) 512 except ValueError: 513 raise ValueError("rate must be rank 1") 514 515 if not dilation_rate.get_shape().is_fully_defined(): 516 raise ValueError("rate must have known shape") 517 518 num_spatial_dims = rate_shape.dims[0].value 519 520 if data_format is not None and data_format.startswith("NC"): 521 starting_spatial_dim = 2 522 else: 523 starting_spatial_dim = 1 524 525 if spatial_dims is None: 526 spatial_dims = range(starting_spatial_dim, 527 num_spatial_dims + starting_spatial_dim) 528 orig_spatial_dims = list(spatial_dims) 529 spatial_dims = sorted(set(int(x) for x in orig_spatial_dims)) 530 if spatial_dims != orig_spatial_dims or any(x < 1 for x in spatial_dims): 531 raise ValueError( 532 "spatial_dims must be a montonically increasing sequence of positive " 533 "integers") 534 535 if data_format is not None and data_format.startswith("NC"): 536 expected_input_rank = spatial_dims[-1] 537 else: 538 expected_input_rank = spatial_dims[-1] + 1 539 540 try: 541 input_shape.with_rank_at_least(expected_input_rank) 542 except ValueError: 543 raise ValueError( 544 "input tensor must have rank %d at least" % (expected_input_rank)) 545 546 const_rate = tensor_util.constant_value(dilation_rate) 547 rate_or_const_rate = dilation_rate 548 if const_rate is not None: 549 rate_or_const_rate = const_rate 550 if np.any(const_rate < 1): 551 raise ValueError("dilation_rate must be positive") 552 if np.all(const_rate == 1): 553 self.call = build_op(num_spatial_dims, padding) 554 return 555 556 # We have two padding contributions. The first is used for converting "SAME" 557 # to "VALID". The second is required so that the height and width of the 558 # zero-padded value tensor are multiples of rate. 559 560 # Padding required to reduce to "VALID" convolution 561 if padding == "SAME": 562 if filter_shape is None: 563 raise ValueError("filter_shape must be specified for SAME padding") 564 filter_shape = ops.convert_to_tensor(filter_shape, name="filter_shape") 565 const_filter_shape = tensor_util.constant_value(filter_shape) 566 if const_filter_shape is not None: 567 filter_shape = const_filter_shape 568 self.base_paddings = _with_space_to_batch_base_paddings( 569 const_filter_shape, num_spatial_dims, rate_or_const_rate) 570 else: 571 self.num_spatial_dims = num_spatial_dims 572 self.rate_or_const_rate = rate_or_const_rate 573 self.base_paddings = None 574 elif padding == "VALID": 575 self.base_paddings = np.zeros([num_spatial_dims, 2], np.int32) 576 else: 577 raise ValueError("Invalid padding method %r" % padding) 578 579 self.input_shape = input_shape 580 self.spatial_dims = spatial_dims 581 self.dilation_rate = dilation_rate 582 self.data_format = data_format 583 self.op = build_op(num_spatial_dims, "VALID") 584 self.call = self._with_space_to_batch_call 585 586 def _with_space_to_batch_call(self, inp, filter): # pylint: disable=redefined-builtin 587 """Call functionality for with_space_to_batch.""" 588 # Handle input whose shape is unknown during graph creation. 589 input_spatial_shape = None 590 input_shape = self.input_shape 591 spatial_dims = self.spatial_dims 592 if input_shape.ndims is not None: 593 input_shape_list = input_shape.as_list() 594 input_spatial_shape = [input_shape_list[i] for i in spatial_dims] 595 if input_spatial_shape is None or None in input_spatial_shape: 596 input_shape_tensor = array_ops.shape(inp) 597 input_spatial_shape = array_ops.stack( 598 [input_shape_tensor[i] for i in spatial_dims]) 599 600 base_paddings = self.base_paddings 601 if base_paddings is None: 602 # base_paddings could not be computed at build time since static filter 603 # shape was not fully defined. 604 filter_shape = array_ops.shape(filter) 605 base_paddings = _with_space_to_batch_base_paddings( 606 filter_shape, self.num_spatial_dims, self.rate_or_const_rate) 607 paddings, crops = array_ops.required_space_to_batch_paddings( 608 input_shape=input_spatial_shape, 609 base_paddings=base_paddings, 610 block_shape=self.dilation_rate) 611 612 dilation_rate = _with_space_to_batch_adjust(self.dilation_rate, 1, 613 spatial_dims) 614 paddings = _with_space_to_batch_adjust(paddings, 0, spatial_dims) 615 crops = _with_space_to_batch_adjust(crops, 0, spatial_dims) 616 input_converted = array_ops.space_to_batch_nd( 617 input=inp, block_shape=dilation_rate, paddings=paddings) 618 619 result = self.op(input_converted, filter) 620 621 result_converted = array_ops.batch_to_space_nd( 622 input=result, block_shape=dilation_rate, crops=crops) 623 624 # Recover channel information for output shape if channels are not last. 625 if self.data_format is not None and self.data_format.startswith("NC"): 626 if not result_converted.shape.dims[1].value and filter is not None: 627 output_shape = result_converted.shape.as_list() 628 output_shape[1] = filter.shape[-1] 629 result_converted.set_shape(output_shape) 630 631 return result_converted 632 633 def __call__(self, inp, filter): # pylint: disable=redefined-builtin 634 return self.call(inp, filter) 635 636 637 def _with_space_to_batch_base_paddings(filter_shape, num_spatial_dims, 638 rate_or_const_rate): 639 """Helper function to compute base_paddings.""" 640 # Spatial dimensions of the filters and the upsampled filters in which we 641 # introduce (rate - 1) zeros between consecutive filter values. 642 filter_spatial_shape = filter_shape[:num_spatial_dims] 643 dilated_filter_spatial_shape = ( 644 filter_spatial_shape + (filter_spatial_shape - 1) * 645 (rate_or_const_rate - 1)) 646 pad_extra_shape = dilated_filter_spatial_shape - 1 647 648 # When full_padding_shape is odd, we pad more at end, following the same 649 # convention as conv2d. 650 pad_extra_start = pad_extra_shape // 2 651 pad_extra_end = pad_extra_shape - pad_extra_start 652 base_paddings = array_ops.stack( 653 [[pad_extra_start[i], pad_extra_end[i]] for i in range(num_spatial_dims)]) 654 return base_paddings 655 656 657 def _with_space_to_batch_adjust(orig, fill_value, spatial_dims): 658 """Returns an `adjusted` version of `orig` based on `spatial_dims`. 659 660 Tensor of the same type as `orig` and with shape 661 `[max(spatial_dims), ...]` where: 662 663 adjusted[spatial_dims[i] - 1, ...] = orig[i, ...] 664 665 for 0 <= i < len(spatial_dims), and 666 667 adjusted[j, ...] = fill_value 668 669 for j != spatial_dims[i] - 1 for some i. 670 671 If `orig` is a constant value, then the result will be a constant value. 672 673 Args: 674 orig: Tensor of rank > max(spatial_dims). 675 fill_value: Numpy scalar (of same data type as `orig) specifying the fill 676 value for non-spatial dimensions. 677 spatial_dims: See with_space_to_batch. 678 679 Returns: 680 `adjusted` tensor. 681 """ 682 fill_dims = orig.get_shape().as_list()[1:] 683 dtype = orig.dtype.as_numpy_dtype 684 parts = [] 685 const_orig = tensor_util.constant_value(orig) 686 const_or_orig = const_orig if const_orig is not None else orig 687 prev_spatial_dim = 0 688 i = 0 689 while i < len(spatial_dims): 690 start_i = i 691 start_spatial_dim = spatial_dims[i] 692 if start_spatial_dim > 1: 693 # Fill in any gap from the previous spatial dimension (or dimension 1 if 694 # this is the first spatial dimension) with `fill_value`. 695 parts.append( 696 np.full( 697 [start_spatial_dim - 1 - prev_spatial_dim] + fill_dims, 698 fill_value, 699 dtype=dtype)) 700 # Find the largest value of i such that: 701 # [spatial_dims[start_i], ..., spatial_dims[i]] 702 # == [start_spatial_dim, ..., start_spatial_dim + i - start_i], 703 # i.e. the end of a contiguous group of spatial dimensions. 704 while (i + 1 < len(spatial_dims) and 705 spatial_dims[i + 1] == spatial_dims[i] + 1): 706 i += 1 707 parts.append(const_or_orig[start_i:i + 1]) 708 prev_spatial_dim = spatial_dims[i] 709 i += 1 710 if const_orig is not None: 711 return np.concatenate(parts) 712 else: 713 return array_ops.concat(parts, 0) 714 715 716 def _get_strides_and_dilation_rate(num_spatial_dims, strides, dilation_rate): 717 """Helper function for verifying strides and dilation_rate arguments. 718 719 This is used by `convolution` and `pool`. 720 721 Args: 722 num_spatial_dims: int 723 strides: Optional. List of N ints >= 1. Defaults to [1]*N. If any value 724 of strides is > 1, then all values of dilation_rate must be 1. 725 dilation_rate: Optional. List of N ints >= 1. Defaults to [1]*N. If any 726 value of dilation_rate is > 1, then all values of strides must be 1. 727 728 Returns: 729 Normalized (strides, dilation_rate) as int32 numpy arrays of shape 730 [num_spatial_dims]. 731 732 Raises: 733 ValueError: if the parameters are invalid. 734 """ 735 if dilation_rate is None: 736 dilation_rate = [1] * num_spatial_dims 737 elif len(dilation_rate) != num_spatial_dims: 738 raise ValueError("len(dilation_rate)=%d but should be %d" % 739 (len(dilation_rate), num_spatial_dims)) 740 dilation_rate = np.array(dilation_rate, dtype=np.int32) 741 if np.any(dilation_rate < 1): 742 raise ValueError("all values of dilation_rate must be positive") 743 744 if strides is None: 745 strides = [1] * num_spatial_dims 746 elif len(strides) != num_spatial_dims: 747 raise ValueError("len(strides)=%d but should be %d" % (len(strides), 748 num_spatial_dims)) 749 strides = np.array(strides, dtype=np.int32) 750 if np.any(strides < 1): 751 raise ValueError("all values of strides must be positive") 752 753 if np.any(strides > 1) and np.any(dilation_rate > 1): 754 raise ValueError( 755 "strides > 1 not supported in conjunction with dilation_rate > 1") 756 return strides, dilation_rate 757 758 759 @tf_export(v1=["nn.convolution"]) 760 def convolution( 761 input, # pylint: disable=redefined-builtin 762 filter, # pylint: disable=redefined-builtin 763 padding, 764 strides=None, 765 dilation_rate=None, 766 name=None, 767 data_format=None, 768 filters=None, 769 dilations=None): 770 """Computes sums of N-D convolutions (actually cross-correlation). 771 772 This also supports either output striding via the optional `strides` parameter 773 or atrous convolution (also known as convolution with holes or dilated 774 convolution, based on the French word "trous" meaning holes in English) via 775 the optional `dilation_rate` parameter. Currently, however, output striding 776 is not supported for atrous convolutions. 777 778 Specifically, in the case that `data_format` does not start with "NC", given 779 a rank (N+2) `input` Tensor of shape 780 781 [num_batches, 782 input_spatial_shape[0], 783 ..., 784 input_spatial_shape[N-1], 785 num_input_channels], 786 787 a rank (N+2) `filter` Tensor of shape 788 789 [spatial_filter_shape[0], 790 ..., 791 spatial_filter_shape[N-1], 792 num_input_channels, 793 num_output_channels], 794 795 an optional `dilation_rate` tensor of shape [N] (defaulting to [1]*N) 796 specifying the filter upsampling/input downsampling rate, and an optional list 797 of N `strides` (defaulting [1]*N), this computes for each N-D spatial output 798 position (x[0], ..., x[N-1]): 799 800 ``` 801 output[b, x[0], ..., x[N-1], k] = 802 sum_{z[0], ..., z[N-1], q} 803 filter[z[0], ..., z[N-1], q, k] * 804 padded_input[b, 805 x[0]*strides[0] + dilation_rate[0]*z[0], 806 ..., 807 x[N-1]*strides[N-1] + dilation_rate[N-1]*z[N-1], 808 q] 809 ``` 810 where b is the index into the batch, k is the output channel number, q is the 811 input channel number, and z is the N-D spatial offset within the filter. Here, 812 `padded_input` is obtained by zero padding the input using an effective 813 spatial filter shape of `(spatial_filter_shape-1) * dilation_rate + 1` and 814 output striding `strides` as described in the 815 [comment here](https://tensorflow.org/api_guides/python/nn#Convolution). 816 817 In the case that `data_format` does start with `"NC"`, the `input` and output 818 (but not the `filter`) are simply transposed as follows: 819 820 convolution(input, data_format, **kwargs) = 821 tf.transpose(convolution(tf.transpose(input, [0] + range(2,N+2) + [1]), 822 **kwargs), 823 [0, N+1] + range(1, N+1)) 824 825 It is required that 1 <= N <= 3. 826 827 Args: 828 input: An (N+2)-D `Tensor` of type `T`, of shape 829 `[batch_size] + input_spatial_shape + [in_channels]` if data_format does 830 not start with "NC" (default), or 831 `[batch_size, in_channels] + input_spatial_shape` if data_format starts 832 with "NC". 833 filter: An (N+2)-D `Tensor` with the same type as `input` and shape 834 `spatial_filter_shape + [in_channels, out_channels]`. 835 padding: A string, either `"VALID"` or `"SAME"`. The padding algorithm. 836 strides: Optional. Sequence of N ints >= 1. Specifies the output stride. 837 Defaults to [1]*N. If any value of strides is > 1, then all values of 838 dilation_rate must be 1. 839 dilation_rate: Optional. Sequence of N ints >= 1. Specifies the filter 840 upsampling/input downsampling rate. In the literature, the same parameter 841 is sometimes called `input stride` or `dilation`. The effective filter 842 size used for the convolution will be `spatial_filter_shape + 843 (spatial_filter_shape - 1) * (rate - 1)`, obtained by inserting 844 (dilation_rate[i]-1) zeros between consecutive elements of the original 845 filter in each spatial dimension i. If any value of dilation_rate is > 1, 846 then all values of strides must be 1. 847 name: Optional name for the returned tensor. 848 data_format: A string or None. Specifies whether the channel dimension of 849 the `input` and output is the last dimension (default, or if `data_format` 850 does not start with "NC"), or the second dimension (if `data_format` 851 starts with "NC"). For N=1, the valid values are "NWC" (default) and 852 "NCW". For N=2, the valid values are "NHWC" (default) and "NCHW". 853 For N=3, the valid values are "NDHWC" (default) and "NCDHW". 854 filters: Alias of filter. 855 dilations: Alias of dilation_rate. 856 857 Returns: 858 A `Tensor` with the same type as `input` of shape 859 860 `[batch_size] + output_spatial_shape + [out_channels]` 861 862 if data_format is None or does not start with "NC", or 863 864 `[batch_size, out_channels] + output_spatial_shape` 865 866 if data_format starts with "NC", 867 where `output_spatial_shape` depends on the value of `padding`. 868 869 If padding == "SAME": 870 output_spatial_shape[i] = ceil(input_spatial_shape[i] / strides[i]) 871 872 If padding == "VALID": 873 output_spatial_shape[i] = 874 ceil((input_spatial_shape[i] - 875 (spatial_filter_shape[i]-1) * dilation_rate[i]) 876 / strides[i]). 877 878 Raises: 879 ValueError: If input/output depth does not match `filter` shape, if padding 880 is other than `"VALID"` or `"SAME"`, or if data_format is invalid. 881 882 """ 883 filter = deprecated_argument_lookup("filters", filters, "filter", filter) 884 dilation_rate = deprecated_argument_lookup( 885 "dilations", dilations, "dilation_rate", dilation_rate) 886 return convolution_internal( 887 input, 888 filter, 889 strides=strides, 890 padding=padding, 891 data_format=data_format, 892 dilations=dilation_rate, 893 name=name) 894 895 896 @tf_export("nn.convolution", v1=[]) 897 def convolution_v2( 898 input, # pylint: disable=redefined-builtin 899 filters, 900 strides=None, 901 padding="VALID", 902 data_format=None, 903 dilations=None, 904 name=None): 905 return convolution_internal( 906 input, # pylint: disable=redefined-builtin 907 filters, 908 strides=strides, 909 padding=padding, 910 data_format=data_format, 911 dilations=dilations, 912 name=name) 913 914 915 convolution_v2.__doc__ = deprecation.rewrite_argument_docstring( 916 deprecation.rewrite_argument_docstring( 917 convolution.__doc__, "dilation_rate", "dilations"), 918 "filter", "filters") 919 920 921 def convolution_internal( 922 input, # pylint: disable=redefined-builtin 923 filters, 924 strides=None, 925 padding="VALID", 926 data_format=None, 927 dilations=None, 928 name=None): 929 """Internal function which performs rank agnostic convolution.""" 930 with ops.name_scope(name, "convolution", [input, filters]) as name: 931 if isinstance(input.shape, tensor_shape.TensorShape) and \ 932 input.shape.rank is not None: 933 n = len(input.shape) - 2 934 elif not isinstance(input.shape, tensor_shape.TensorShape) and \ 935 input.shape is not None: 936 n = len(input.shape) - 2 937 elif isinstance(filters.shape, tensor_shape.TensorShape) and \ 938 filters.shape.rank is not None: 939 n = len(filters.shape) - 2 940 elif not isinstance(filters.shape, tensor_shape.TensorShape) and \ 941 filters.shape is not None: 942 n = len(filters.shape) - 2 943 else: 944 raise ValueError("rank of input or filter must be known") 945 946 if not 1 <= n <= 3: 947 raise ValueError( 948 "Input tensor must be of rank 3, 4 or 5 but was {}.".format(n + 2)) 949 950 if data_format is None: 951 channel_index = n + 1 952 else: 953 channel_index = 1 if data_format.startswith("NC") else n + 1 954 955 strides = _get_sequence(strides, n, channel_index, "strides") 956 dilations = _get_sequence(dilations, n, channel_index, "dilations") 957 958 conv_ops = {1: conv1d, 2: gen_nn_ops.conv2d, 3: gen_nn_ops.conv3d} 959 960 if all(i == 1 for i in dilations): 961 # fast path if no dilation as gradient only supported on GPU for dilations 962 op = conv_ops[n] 963 return op( 964 input, 965 filters, 966 strides, 967 padding=padding, 968 data_format=data_format, 969 dilations=dilations, 970 name=name) 971 else: 972 if channel_index == 1: 973 strides = strides[2:] 974 dilations = dilations[2:] 975 else: 976 strides = strides[1:-1] 977 dilations = dilations[1:-1] 978 979 op = Convolution( 980 tensor_shape.as_shape(input.shape), 981 tensor_shape.as_shape(filters.shape), 982 padding, 983 strides=strides, 984 dilation_rate=dilations, 985 name=name, 986 data_format=data_format) 987 return op(input, filters) 988 989 990 class Convolution(object): 991 """Helper class for convolution. 992 993 Note that this class assumes that shapes of input and filter passed to 994 __call__ are compatible with input_shape and filter_shape passed to the 995 constructor. 996 997 Arguments 998 input_shape: static shape of input. i.e. input.get_shape(). 999 filter_shape: static shape of the filter. i.e. filter.get_shape(). 1000 padding: see convolution. 1001 strides: see convolution. 1002 dilation_rate: see convolution. 1003 name: see convolution. 1004 data_format: see convolution. 1005 """ 1006 1007 def __init__(self, 1008 input_shape, 1009 filter_shape, 1010 padding, 1011 strides=None, 1012 dilation_rate=None, 1013 name=None, 1014 data_format=None): 1015 """Helper function for convolution.""" 1016 num_total_dims = filter_shape.ndims 1017 if num_total_dims is None: 1018 num_total_dims = input_shape.ndims 1019 if num_total_dims is None: 1020 raise ValueError("rank of input or filter must be known") 1021 1022 num_spatial_dims = num_total_dims - 2 1023 1024 try: 1025 input_shape.with_rank(num_spatial_dims + 2) 1026 except ValueError: 1027 raise ValueError( 1028 "input tensor must have rank %d" % (num_spatial_dims + 2)) 1029 1030 try: 1031 filter_shape.with_rank(num_spatial_dims + 2) 1032 except ValueError: 1033 raise ValueError( 1034 "filter tensor must have rank %d" % (num_spatial_dims + 2)) 1035 1036 if data_format is None or not data_format.startswith("NC"): 1037 input_channels_dim = tensor_shape.dimension_at_index( 1038 input_shape, num_spatial_dims + 1) 1039 spatial_dims = range(1, num_spatial_dims + 1) 1040 else: 1041 input_channels_dim = tensor_shape.dimension_at_index(input_shape, 1) 1042 spatial_dims = range(2, num_spatial_dims + 2) 1043 1044 if not input_channels_dim.is_compatible_with( 1045 filter_shape[num_spatial_dims]): 1046 raise ValueError( 1047 "number of input channels does not match corresponding dimension of " 1048 "filter, {} != {}".format(input_channels_dim, 1049 filter_shape[num_spatial_dims])) 1050 1051 strides, dilation_rate = _get_strides_and_dilation_rate( 1052 num_spatial_dims, strides, dilation_rate) 1053 1054 self.input_shape = input_shape 1055 self.filter_shape = filter_shape 1056 self.data_format = data_format 1057 self.strides = strides 1058 self.name = name 1059 self.conv_op = _WithSpaceToBatch( 1060 input_shape, 1061 dilation_rate=dilation_rate, 1062 padding=padding, 1063 build_op=self._build_op, 1064 filter_shape=filter_shape, 1065 spatial_dims=spatial_dims, 1066 data_format=data_format) 1067 1068 def _build_op(self, _, padding): 1069 return _NonAtrousConvolution( 1070 self.input_shape, 1071 filter_shape=self.filter_shape, 1072 padding=padding, 1073 data_format=self.data_format, 1074 strides=self.strides, 1075 name=self.name) 1076 1077 def __call__(self, inp, filter): # pylint: disable=redefined-builtin 1078 return self.conv_op(inp, filter) 1079 1080 1081 @tf_export(v1=["nn.pool"]) 1082 def pool( 1083 input, # pylint: disable=redefined-builtin 1084 window_shape, 1085 pooling_type, 1086 padding, 1087 dilation_rate=None, 1088 strides=None, 1089 name=None, 1090 data_format=None, 1091 dilations=None): 1092 """Performs an N-D pooling operation. 1093 1094 In the case that `data_format` does not start with "NC", computes for 1095 0 <= b < batch_size, 1096 0 <= x[i] < output_spatial_shape[i], 1097 0 <= c < num_channels: 1098 1099 ``` 1100 output[b, x[0], ..., x[N-1], c] = 1101 REDUCE_{z[0], ..., z[N-1]} 1102 input[b, 1103 x[0] * strides[0] - pad_before[0] + dilation_rate[0]*z[0], 1104 ... 1105 x[N-1]*strides[N-1] - pad_before[N-1] + dilation_rate[N-1]*z[N-1], 1106 c], 1107 ``` 1108 1109 where the reduction function REDUCE depends on the value of `pooling_type`, 1110 and pad_before is defined based on the value of `padding` as described in 1111 the "returns" section of `tf.nn.convolution` for details. 1112 The reduction never includes out-of-bounds positions. 1113 1114 In the case that `data_format` starts with `"NC"`, the `input` and output are 1115 simply transposed as follows: 1116 1117 ``` 1118 pool(input, data_format, **kwargs) = 1119 tf.transpose(pool(tf.transpose(input, [0] + range(2,N+2) + [1]), 1120 **kwargs), 1121 [0, N+1] + range(1, N+1)) 1122 ``` 1123 1124 Args: 1125 input: Tensor of rank N+2, of shape 1126 `[batch_size] + input_spatial_shape + [num_channels]` if data_format does 1127 not start with "NC" (default), or 1128 `[batch_size, num_channels] + input_spatial_shape` if data_format starts 1129 with "NC". Pooling happens over the spatial dimensions only. 1130 window_shape: Sequence of N ints >= 1. 1131 pooling_type: Specifies pooling operation, must be "AVG" or "MAX". 1132 padding: The padding algorithm, must be "SAME" or "VALID". 1133 See the "returns" section of `tf.nn.convolution` for details. 1134 dilation_rate: Optional. Dilation rate. List of N ints >= 1. 1135 Defaults to [1]*N. If any value of dilation_rate is > 1, then all values 1136 of strides must be 1. 1137 strides: Optional. Sequence of N ints >= 1. Defaults to [1]*N. 1138 If any value of strides is > 1, then all values of dilation_rate must be 1139 1. 1140 name: Optional. Name of the op. 1141 data_format: A string or None. Specifies whether the channel dimension of 1142 the `input` and output is the last dimension (default, or if `data_format` 1143 does not start with "NC"), or the second dimension (if `data_format` 1144 starts with "NC"). For N=1, the valid values are "NWC" (default) and 1145 "NCW". For N=2, the valid values are "NHWC" (default) and "NCHW". 1146 For N=3, the valid values are "NDHWC" (default) and "NCDHW". 1147 dilations: Alias for dilation_rate 1148 1149 Returns: 1150 Tensor of rank N+2, of shape 1151 [batch_size] + output_spatial_shape + [num_channels] 1152 1153 if data_format is None or does not start with "NC", or 1154 1155 [batch_size, num_channels] + output_spatial_shape 1156 1157 if data_format starts with "NC", 1158 where `output_spatial_shape` depends on the value of padding: 1159 1160 If padding = "SAME": 1161 output_spatial_shape[i] = ceil(input_spatial_shape[i] / strides[i]) 1162 1163 If padding = "VALID": 1164 output_spatial_shape[i] = 1165 ceil((input_spatial_shape[i] - (window_shape[i] - 1) * dilation_rate[i]) 1166 / strides[i]). 1167 1168 Raises: 1169 ValueError: if arguments are invalid. 1170 1171 """ 1172 dilation_rate = deprecated_argument_lookup( 1173 "dilations", dilations, "dilation_rate", dilation_rate) 1174 # pylint: enable=line-too-long 1175 with ops.name_scope(name, "%s_pool" % (pooling_type.lower()), 1176 [input]) as scope: 1177 input = ops.convert_to_tensor(input, name="input") # pylint: disable=redefined-builtin 1178 1179 num_spatial_dims = len(window_shape) 1180 if num_spatial_dims < 1 or num_spatial_dims > 3: 1181 raise ValueError("It is required that 1 <= num_spatial_dims <= 3.") 1182 1183 input.get_shape().with_rank(num_spatial_dims + 2) 1184 1185 strides, dilation_rate = _get_strides_and_dilation_rate( 1186 num_spatial_dims, strides, dilation_rate) 1187 1188 if padding == "SAME" and np.any(dilation_rate > 1): 1189 raise ValueError( 1190 "pooling with SAME padding is not implemented for dilation_rate > 1") 1191 1192 if np.any(strides > window_shape): 1193 raise ValueError( 1194 "strides > window_shape not supported due to inconsistency between " 1195 "CPU and GPU implementations") 1196 1197 pooling_ops = { 1198 ("MAX", 1): max_pool, 1199 ("MAX", 2): max_pool, 1200 ("MAX", 3): max_pool3d, # pylint: disable=undefined-variable 1201 ("AVG", 1): avg_pool, 1202 ("AVG", 2): avg_pool, 1203 ("AVG", 3): avg_pool3d, # pylint: disable=undefined-variable 1204 } 1205 op_key = (pooling_type, num_spatial_dims) 1206 if op_key not in pooling_ops: 1207 raise ValueError("%d-D %s pooling is not supported." % (op_key[1], 1208 op_key[0])) 1209 1210 if data_format is None or not data_format.startswith("NC"): 1211 adjusted_window_shape = [1] + list(window_shape) + [1] 1212 adjusted_strides = [1] + list(strides) + [1] 1213 spatial_dims = range(1, num_spatial_dims + 1) 1214 else: 1215 adjusted_window_shape = [1, 1] + list(window_shape) 1216 adjusted_strides = [1, 1] + list(strides) 1217 spatial_dims = range(2, num_spatial_dims + 2) 1218 1219 if num_spatial_dims == 1: 1220 if data_format is None or data_format == "NWC": 1221 data_format_kwargs = dict(data_format="NHWC") 1222 elif data_format == "NCW": 1223 data_format_kwargs = dict(data_format="NCHW") 1224 else: 1225 raise ValueError("data_format must be either \"NWC\" or \"NCW\".") 1226 adjusted_window_shape = [1] + adjusted_window_shape 1227 adjusted_strides = [1] + adjusted_strides 1228 else: 1229 data_format_kwargs = dict(data_format=data_format) 1230 1231 def op(converted_input, _, converted_padding): # pylint: disable=missing-docstring 1232 if num_spatial_dims == 1: 1233 converted_input = array_ops.expand_dims(converted_input, 1234 spatial_dims[0]) 1235 result = pooling_ops[op_key]( 1236 converted_input, 1237 adjusted_window_shape, 1238 adjusted_strides, 1239 converted_padding, 1240 name=scope, 1241 **data_format_kwargs) 1242 if num_spatial_dims == 1: 1243 result = array_ops.squeeze(result, [spatial_dims[0]]) 1244 return result 1245 1246 return with_space_to_batch( 1247 input=input, 1248 dilation_rate=dilation_rate, 1249 padding=padding, 1250 op=op, 1251 spatial_dims=spatial_dims, 1252 filter_shape=window_shape) 1253 1254 1255 @tf_export("nn.pool", v1=[]) 1256 def pool_v2( 1257 input, # pylint: disable=redefined-builtin 1258 window_shape, 1259 pooling_type, 1260 strides=None, 1261 padding="VALID", 1262 data_format=None, 1263 dilations=None, 1264 name=None): 1265 # pylint: disable=line-too-long 1266 """Performs an N-D pooling operation. 1267 1268 In the case that `data_format` does not start with "NC", computes for 1269 0 <= b < batch_size, 1270 0 <= x[i] < output_spatial_shape[i], 1271 0 <= c < num_channels: 1272 1273 ``` 1274 output[b, x[0], ..., x[N-1], c] = 1275 REDUCE_{z[0], ..., z[N-1]} 1276 input[b, 1277 x[0] * strides[0] - pad_before[0] + dilation_rate[0]*z[0], 1278 ... 1279 x[N-1]*strides[N-1] - pad_before[N-1] + dilation_rate[N-1]*z[N-1], 1280 c], 1281 ``` 1282 1283 where the reduction function REDUCE depends on the value of `pooling_type`, 1284 and pad_before is defined based on the value of `padding` as described in 1285 the "returns" section of `tf.nn.convolution` for details. 1286 The reduction never includes out-of-bounds positions. 1287 1288 In the case that `data_format` starts with `"NC"`, the `input` and output are 1289 simply transposed as follows: 1290 1291 ``` 1292 pool(input, data_format, **kwargs) = 1293 tf.transpose(pool(tf.transpose(input, [0] + range(2,N+2) + [1]), 1294 **kwargs), 1295 [0, N+1] + range(1, N+1)) 1296 ``` 1297 1298 Args: 1299 input: Tensor of rank N+2, of shape `[batch_size] + input_spatial_shape + 1300 [num_channels]` if data_format does not start with "NC" (default), or 1301 `[batch_size, num_channels] + input_spatial_shape` if data_format starts 1302 with "NC". Pooling happens over the spatial dimensions only. 1303 window_shape: Sequence of N ints >= 1. 1304 pooling_type: Specifies pooling operation, must be "AVG" or "MAX". 1305 strides: Optional. Sequence of N ints >= 1. Defaults to [1]*N. If any value of 1306 strides is > 1, then all values of dilation_rate must be 1. 1307 padding: The padding algorithm, must be "SAME" or "VALID". Defaults to "SAME". 1308 See the "returns" section of `tf.nn.convolution` for details. 1309 data_format: A string or None. Specifies whether the channel dimension of 1310 the `input` and output is the last dimension (default, or if `data_format` 1311 does not start with "NC"), or the second dimension (if `data_format` 1312 starts with "NC"). For N=1, the valid values are "NWC" (default) and 1313 "NCW". For N=2, the valid values are "NHWC" (default) and "NCHW". For 1314 N=3, the valid values are "NDHWC" (default) and "NCDHW". 1315 dilations: Optional. Dilation rate. List of N ints >= 1. Defaults to 1316 [1]*N. If any value of dilation_rate is > 1, then all values of strides 1317 must be 1. 1318 name: Optional. Name of the op. 1319 1320 Returns: 1321 Tensor of rank N+2, of shape 1322 [batch_size] + output_spatial_shape + [num_channels] 1323 1324 if data_format is None or does not start with "NC", or 1325 1326 [batch_size, num_channels] + output_spatial_shape 1327 1328 if data_format starts with "NC", 1329 where `output_spatial_shape` depends on the value of padding: 1330 1331 If padding = "SAME": 1332 output_spatial_shape[i] = ceil(input_spatial_shape[i] / strides[i]) 1333 1334 If padding = "VALID": 1335 output_spatial_shape[i] = 1336 ceil((input_spatial_shape[i] - (window_shape[i] - 1) * dilation_rate[i]) 1337 / strides[i]). 1338 1339 Raises: 1340 ValueError: if arguments are invalid. 1341 1342 """ 1343 return pool( 1344 input=input, 1345 window_shape=window_shape, 1346 pooling_type=pooling_type, 1347 padding=padding, 1348 dilation_rate=dilations, 1349 strides=strides, 1350 name=name, 1351 data_format=data_format) 1352 1353 1354 @tf_export("nn.atrous_conv2d") 1355 def atrous_conv2d(value, filters, rate, padding, name=None): 1356 """Atrous convolution (a.k.a. convolution with holes or dilated convolution). 1357 1358 This function is a simpler wrapper around the more general 1359 `tf.nn.convolution`, and exists only for backwards compatibility. You can 1360 use `tf.nn.convolution` to perform 1-D, 2-D, or 3-D atrous convolution. 1361 1362 1363 Computes a 2-D atrous convolution, also known as convolution with holes or 1364 dilated convolution, given 4-D `value` and `filters` tensors. If the `rate` 1365 parameter is equal to one, it performs regular 2-D convolution. If the `rate` 1366 parameter is greater than one, it performs convolution with holes, sampling 1367 the input values every `rate` pixels in the `height` and `width` dimensions. 1368 This is equivalent to convolving the input with a set of upsampled filters, 1369 produced by inserting `rate - 1` zeros between two consecutive values of the 1370 filters along the `height` and `width` dimensions, hence the name atrous 1371 convolution or convolution with holes (the French word trous means holes in 1372 English). 1373 1374 More specifically: 1375 1376 ``` 1377 output[batch, height, width, out_channel] = 1378 sum_{dheight, dwidth, in_channel} ( 1379 filters[dheight, dwidth, in_channel, out_channel] * 1380 value[batch, height + rate*dheight, width + rate*dwidth, in_channel] 1381 ) 1382 ``` 1383 1384 Atrous convolution allows us to explicitly control how densely to compute 1385 feature responses in fully convolutional networks. Used in conjunction with 1386 bilinear interpolation, it offers an alternative to `conv2d_transpose` in 1387 dense prediction tasks such as semantic image segmentation, optical flow 1388 computation, or depth estimation. It also allows us to effectively enlarge 1389 the field of view of filters without increasing the number of parameters or 1390 the amount of computation. 1391 1392 For a description of atrous convolution and how it can be used for dense 1393 feature extraction, please see: [Semantic Image Segmentation with Deep 1394 Convolutional Nets and Fully Connected CRFs](http://arxiv.org/abs/1412.7062). 1395 The same operation is investigated further in [Multi-Scale Context Aggregation 1396 by Dilated Convolutions](http://arxiv.org/abs/1511.07122). Previous works 1397 that effectively use atrous convolution in different ways are, among others, 1398 [OverFeat: Integrated Recognition, Localization and Detection using 1399 Convolutional Networks](http://arxiv.org/abs/1312.6229) and [Fast Image 1400 Scanning with Deep Max-Pooling Convolutional Neural 1401 Networks](http://arxiv.org/abs/1302.1700). 1402 Atrous convolution is also closely related to the so-called noble identities 1403 in multi-rate signal processing. 1404 1405 There are many different ways to implement atrous convolution (see the refs 1406 above). The implementation here reduces 1407 1408 ```python 1409 atrous_conv2d(value, filters, rate, padding=padding) 1410 ``` 1411 1412 to the following three operations: 1413 1414 ```python 1415 paddings = ... 1416 net = space_to_batch(value, paddings, block_size=rate) 1417 net = conv2d(net, filters, strides=[1, 1, 1, 1], padding="VALID") 1418 crops = ... 1419 net = batch_to_space(net, crops, block_size=rate) 1420 ``` 1421 1422 Advanced usage. Note the following optimization: A sequence of `atrous_conv2d` 1423 operations with identical `rate` parameters, 'SAME' `padding`, and filters 1424 with odd heights/ widths: 1425 1426 ```python 1427 net = atrous_conv2d(net, filters1, rate, padding="SAME") 1428 net = atrous_conv2d(net, filters2, rate, padding="SAME") 1429 ... 1430 net = atrous_conv2d(net, filtersK, rate, padding="SAME") 1431 ``` 1432 1433 can be equivalently performed cheaper in terms of computation and memory as: 1434 1435 ```python 1436 pad = ... # padding so that the input dims are multiples of rate 1437 net = space_to_batch(net, paddings=pad, block_size=rate) 1438 net = conv2d(net, filters1, strides=[1, 1, 1, 1], padding="SAME") 1439 net = conv2d(net, filters2, strides=[1, 1, 1, 1], padding="SAME") 1440 ... 1441 net = conv2d(net, filtersK, strides=[1, 1, 1, 1], padding="SAME") 1442 net = batch_to_space(net, crops=pad, block_size=rate) 1443 ``` 1444 1445 because a pair of consecutive `space_to_batch` and `batch_to_space` ops with 1446 the same `block_size` cancel out when their respective `paddings` and `crops` 1447 inputs are identical. 1448 1449 Args: 1450 value: A 4-D `Tensor` of type `float`. It needs to be in the default "NHWC" 1451 format. Its shape is `[batch, in_height, in_width, in_channels]`. 1452 filters: A 4-D `Tensor` with the same type as `value` and shape 1453 `[filter_height, filter_width, in_channels, out_channels]`. `filters`' 1454 `in_channels` dimension must match that of `value`. Atrous convolution is 1455 equivalent to standard convolution with upsampled filters with effective 1456 height `filter_height + (filter_height - 1) * (rate - 1)` and effective 1457 width `filter_width + (filter_width - 1) * (rate - 1)`, produced by 1458 inserting `rate - 1` zeros along consecutive elements across the 1459 `filters`' spatial dimensions. 1460 rate: A positive int32. The stride with which we sample input values across 1461 the `height` and `width` dimensions. Equivalently, the rate by which we 1462 upsample the filter values by inserting zeros across the `height` and 1463 `width` dimensions. In the literature, the same parameter is sometimes 1464 called `input stride` or `dilation`. 1465 padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm. 1466 name: Optional name for the returned tensor. 1467 1468 Returns: 1469 A `Tensor` with the same type as `value`. 1470 Output shape with `'VALID'` padding is: 1471 1472 [batch, height - 2 * (filter_width - 1), 1473 width - 2 * (filter_height - 1), out_channels]. 1474 1475 Output shape with `'SAME'` padding is: 1476 1477 [batch, height, width, out_channels]. 1478 1479 Raises: 1480 ValueError: If input/output depth does not match `filters`' shape, or if 1481 padding is other than `'VALID'` or `'SAME'`. 1482 """ 1483 return convolution( 1484 input=value, 1485 filter=filters, 1486 padding=padding, 1487 dilation_rate=np.broadcast_to(rate, (2,)), 1488 name=name) 1489 1490 1491 def _convert_padding(padding): 1492 """Converts Python padding to C++ padding for ops which take EXPLICIT padding. 1493 1494 Args: 1495 padding: the `padding` argument for a Python op which supports EXPLICIT 1496 padding. 1497 1498 Returns: 1499 (padding, explicit_paddings) pair, which should be passed as attributes to a 1500 C++ op. 1501 1502 Raises: 1503 ValueError: If padding is invalid. 1504 """ 1505 explicit_paddings = [] 1506 if padding == "EXPLICIT": 1507 # Give a better error message if EXPLICIT is passed. 1508 raise ValueError('"EXPLICIT" is not a valid value for the padding ' 1509 "parameter. To use explicit padding, the padding " 1510 "parameter must be a list.") 1511 if isinstance(padding, (list, tuple)): 1512 for i, dim_paddings in enumerate(padding): 1513 if not isinstance(dim_paddings, (list, tuple)): 1514 raise ValueError("When padding is a list, each element of padding must " 1515 "be a list/tuple of size 2. Element with index %d of " 1516 "padding is not a list/tuple" % i) 1517 if len(dim_paddings) != 2: 1518 raise ValueError("When padding is a list, each element of padding must " 1519 "be a list/tuple of size 2. Element with index %d of " 1520 "padding has size %d" % (i, len(dim_paddings))) 1521 explicit_paddings.extend(dim_paddings) 1522 if len(padding) != 4: 1523 raise ValueError("When padding is a list, it must be of size 4. Got " 1524 "padding of size: %d" % len(padding)) 1525 padding = "EXPLICIT" 1526 return padding, explicit_paddings 1527 1528 1529 @tf_export(v1=["nn.conv1d"]) 1530 @deprecation.deprecated_arg_values( 1531 None, 1532 "`NCHW` for data_format is deprecated, use `NCW` instead", 1533 warn_once=True, 1534 data_format="NCHW") 1535 @deprecation.deprecated_arg_values( 1536 None, 1537 "`NHWC` for data_format is deprecated, use `NWC` instead", 1538 warn_once=True, 1539 data_format="NHWC") 1540 def conv1d( 1541 value=None, 1542 filters=None, 1543 stride=None, 1544 padding=None, 1545 use_cudnn_on_gpu=None, 1546 data_format=None, 1547 name=None, 1548 input=None, # pylint: disable=redefined-builtin 1549 dilations=None): 1550 r"""Computes a 1-D convolution given 3-D input and filter tensors. 1551 1552 Given an input tensor of shape 1553 [batch, in_width, in_channels] 1554 if data_format is "NWC", or 1555 [batch, in_channels, in_width] 1556 if data_format is "NCW", 1557 and a filter / kernel tensor of shape 1558 [filter_width, in_channels, out_channels], this op reshapes 1559 the arguments to pass them to conv2d to perform the equivalent 1560 convolution operation. 1561 1562 Internally, this op reshapes the input tensors and invokes `tf.nn.conv2d`. 1563 For example, if `data_format` does not start with "NC", a tensor of shape 1564 [batch, in_width, in_channels] 1565 is reshaped to 1566 [batch, 1, in_width, in_channels], 1567 and the filter is reshaped to 1568 [1, filter_width, in_channels, out_channels]. 1569 The result is then reshaped back to 1570 [batch, out_width, out_channels] 1571 \(where out_width is a function of the stride and padding as in conv2d\) and 1572 returned to the caller. 1573 1574 Args: 1575 value: A 3D `Tensor`. Must be of type `float16`, `float32`, or `float64`. 1576 filters: A 3D `Tensor`. Must have the same type as `value`. 1577 stride: An int or list of `ints` that has length `1` or `3`. The number of 1578 entries by which the filter is moved right at each step. 1579 padding: 'SAME' or 'VALID' 1580 use_cudnn_on_gpu: An optional `bool`. Defaults to `True`. 1581 data_format: An optional `string` from `"NWC", "NCW"`. Defaults to `"NWC"`, 1582 the data is stored in the order of [batch, in_width, in_channels]. The 1583 `"NCW"` format stores data as [batch, in_channels, in_width]. 1584 name: A name for the operation (optional). 1585 input: Alias for value. 1586 dilations: An int or list of `ints` that has length `1` or `3` which 1587 defaults to 1. The dilation factor for each dimension of input. If set to 1588 k > 1, there will be k-1 skipped cells between each filter element on that 1589 dimension. Dilations in the batch and depth dimensions must be 1. 1590 1591 Returns: 1592 A `Tensor`. Has the same type as input. 1593 1594 Raises: 1595 ValueError: if `data_format` is invalid. 1596 """ 1597 value = deprecation.deprecated_argument_lookup("input", input, "value", value) 1598 with ops.name_scope(name, "conv1d", [value, filters]) as name: 1599 # Reshape the input tensor to [batch, 1, in_width, in_channels] 1600 if data_format is None or data_format == "NHWC" or data_format == "NWC": 1601 data_format = "NHWC" 1602 spatial_start_dim = 1 1603 channel_index = 2 1604 elif data_format == "NCHW" or data_format == "NCW": 1605 data_format = "NCHW" 1606 spatial_start_dim = 2 1607 channel_index = 1 1608 else: 1609 raise ValueError("data_format must be \"NWC\" or \"NCW\".") 1610 strides = [1] + _get_sequence(stride, 1, channel_index, "stride") 1611 dilations = [1] + _get_sequence(dilations, 1, channel_index, "dilations") 1612 1613 value = array_ops.expand_dims(value, spatial_start_dim) 1614 filters = array_ops.expand_dims(filters, 0) 1615 result = gen_nn_ops.conv2d( 1616 value, 1617 filters, 1618 strides, 1619 padding, 1620 use_cudnn_on_gpu=use_cudnn_on_gpu, 1621 data_format=data_format, 1622 dilations=dilations, 1623 name=name) 1624 return array_ops.squeeze(result, [spatial_start_dim]) 1625 1626 1627 @tf_export("nn.conv1d", v1=[]) 1628 def conv1d_v2( 1629 input, # pylint: disable=redefined-builtin 1630 filters, 1631 stride, 1632 padding, 1633 data_format="NWC", 1634 dilations=None, 1635 name=None): 1636 r"""Computes a 1-D convolution given 3-D input and filter tensors. 1637 1638 Given an input tensor of shape 1639 [batch, in_width, in_channels] 1640 if data_format is "NWC", or 1641 [batch, in_channels, in_width] 1642 if data_format is "NCW", 1643 and a filter / kernel tensor of shape 1644 [filter_width, in_channels, out_channels], this op reshapes 1645 the arguments to pass them to conv2d to perform the equivalent 1646 convolution operation. 1647 1648 Internally, this op reshapes the input tensors and invokes `tf.nn.conv2d`. 1649 For example, if `data_format` does not start with "NC", a tensor of shape 1650 [batch, in_width, in_channels] 1651 is reshaped to 1652 [batch, 1, in_width, in_channels], 1653 and the filter is reshaped to 1654 [1, filter_width, in_channels, out_channels]. 1655 The result is then reshaped back to 1656 [batch, out_width, out_channels] 1657 \(where out_width is a function of the stride and padding as in conv2d\) and 1658 returned to the caller. 1659 1660 Args: 1661 input: A 3D `Tensor`. Must be of type `float16`, `float32`, or `float64`. 1662 filters: A 3D `Tensor`. Must have the same type as `input`. 1663 stride: An int or list of `ints` that has length `1` or `3`. The number of 1664 entries by which the filter is moved right at each step. 1665 padding: 'SAME' or 'VALID' 1666 data_format: An optional `string` from `"NWC", "NCW"`. Defaults to `"NWC"`, 1667 the data is stored in the order of [batch, in_width, in_channels]. The 1668 `"NCW"` format stores data as [batch, in_channels, in_width]. 1669 dilations: An int or list of `ints` that has length `1` or `3` which 1670 defaults to 1. The dilation factor for each dimension of input. If set to 1671 k > 1, there will be k-1 skipped cells between each filter element on that 1672 dimension. Dilations in the batch and depth dimensions must be 1. 1673 name: A name for the operation (optional). 1674 1675 Returns: 1676 A `Tensor`. Has the same type as input. 1677 1678 Raises: 1679 ValueError: if `data_format` is invalid. 1680 """ 1681 return conv1d( 1682 input, # pylint: disable=redefined-builtin 1683 filters, 1684 stride, 1685 padding, 1686 use_cudnn_on_gpu=True, 1687 data_format=data_format, 1688 name=name, 1689 dilations=dilations) 1690 1691 1692 @tf_export("nn.conv1d_transpose") 1693 def conv1d_transpose( 1694 input, # pylint: disable=redefined-builtin 1695 filters, 1696 output_shape, 1697 strides, 1698 padding="SAME", 1699 data_format="NWC", 1700 dilations=None, 1701 name=None): 1702 """The transpose of `conv1d`. 1703 1704 This operation is sometimes called "deconvolution" after [Deconvolutional 1705 Networks](https://www.matthewzeiler.com/mattzeiler/deconvolutionalnetworks.pdf), 1706 but is really the transpose (gradient) of `conv1d` rather than an actual 1707 deconvolution. 1708 1709 Args: 1710 input: A 3-D `Tensor` of type `float` and shape 1711 `[batch, in_width, in_channels]` for `NWC` data format or 1712 `[batch, in_channels, in_width]` for `NCW` data format. 1713 filters: A 3-D `Tensor` with the same type as `value` and shape 1714 `[filter_width, output_channels, in_channels]`. `filter`'s 1715 `in_channels` dimension must match that of `value`. 1716 output_shape: A 1-D `Tensor`, containing three elements, representing the 1717 output shape of the deconvolution op. 1718 strides: An int or list of `ints` that has length `1` or `3`. The number of 1719 entries by which the filter is moved right at each step. 1720 padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm. 1721 See the "returns" section of `tf.nn.convolution` for details. 1722 data_format: A string. `'NWC'` and `'NCW'` are supported. 1723 dilations: An int or list of `ints` that has length `1` or `3` which 1724 defaults to 1. The dilation factor for each dimension of input. If set to 1725 k > 1, there will be k-1 skipped cells between each filter element on that 1726 dimension. Dilations in the batch and depth dimensions must be 1. 1727 name: Optional name for the returned tensor. 1728 1729 Returns: 1730 A `Tensor` with the same type as `value`. 1731 1732 Raises: 1733 ValueError: If input/output depth does not match `filter`'s shape, if 1734 `output_shape` is not at 3-element vector, if `padding` is other than 1735 `'VALID'` or `'SAME'`, or if `data_format` is invalid. 1736 """ 1737 with ops.name_scope(name, "conv1d_transpose", 1738 [input, filters, output_shape]) as name: 1739 # The format could be either NWC or NCW, map to NHWC or NCHW 1740 if data_format is None or data_format == "NWC": 1741 data_format = "NHWC" 1742 spatial_start_dim = 1 1743 channel_index = 2 1744 elif data_format == "NCW": 1745 data_format = "NCHW" 1746 spatial_start_dim = 2 1747 channel_index = 1 1748 else: 1749 raise ValueError("data_format must be \"NWC\" or \"NCW\".") 1750 1751 # Reshape the input tensor to [batch, 1, in_width, in_channels] 1752 strides = [1] + _get_sequence(strides, 1, channel_index, "stride") 1753 dilations = [1] + _get_sequence(dilations, 1, channel_index, "dilations") 1754 1755 input = array_ops.expand_dims(input, spatial_start_dim) 1756 filters = array_ops.expand_dims(filters, 0) 1757 output_shape = list(output_shape) 1758 output_shape = output_shape[: spatial_start_dim] + [1] + \ 1759 output_shape[spatial_start_dim:] 1760 1761 result = gen_nn_ops.conv2d_backprop_input( 1762 input_sizes=output_shape, 1763 filter=filters, 1764 out_backprop=input, 1765 strides=strides, 1766 padding=padding, 1767 data_format=data_format, 1768 dilations=dilations, 1769 name=name) 1770 return array_ops.squeeze(result, spatial_start_dim) 1771 1772 1773 @tf_export("nn.conv2d", v1=[]) 1774 def conv2d_v2(input, # pylint: disable=redefined-builtin 1775 filters, 1776 strides, 1777 padding, 1778 data_format="NHWC", 1779 dilations=None, 1780 name=None): 1781 # pylint: disable=line-too-long 1782 r"""Computes a 2-D convolution given 4-D `input` and `filters` tensors. 1783 1784 Given an input tensor of shape `[batch, in_height, in_width, in_channels]` 1785 and a filter / kernel tensor of shape 1786 `[filter_height, filter_width, in_channels, out_channels]`, this op 1787 performs the following: 1788 1789 1. Flattens the filter to a 2-D matrix with shape 1790 `[filter_height * filter_width * in_channels, output_channels]`. 1791 2. Extracts image patches from the input tensor to form a *virtual* 1792 tensor of shape `[batch, out_height, out_width, 1793 filter_height * filter_width * in_channels]`. 1794 3. For each patch, right-multiplies the filter matrix and the image patch 1795 vector. 1796 1797 In detail, with the default NHWC format, 1798 1799 output[b, i, j, k] = 1800 sum_{di, dj, q} input[b, strides[1] * i + di, strides[2] * j + dj, q] * 1801 filter[di, dj, q, k] 1802 1803 Must have `strides[0] = strides[3] = 1`. For the most common case of the same 1804 horizontal and vertices strides, `strides = [1, stride, stride, 1]`. 1805 1806 Args: 1807 input: A `Tensor`. Must be one of the following types: 1808 `half`, `bfloat16`, `float32`, `float64`. 1809 A 4-D tensor. The dimension order is interpreted according to the value 1810 of `data_format`, see below for details. 1811 filters: A `Tensor`. Must have the same type as `input`. 1812 A 4-D tensor of shape 1813 `[filter_height, filter_width, in_channels, out_channels]` 1814 strides: An int or list of `ints` that has length `1`, `2` or `4`. The 1815 stride of the sliding window for each dimension of `input`. If a single 1816 value is given it is replicated in the `H` and `W` dimension. By default 1817 the `N` and `C` dimensions are set to 1. The dimension order is determined 1818 by the value of `data_format`, see below for details. 1819 padding: Either the `string `"SAME"` or `"VALID"` indicating the type of 1820 padding algorithm to use, or a list indicating the explicit paddings at 1821 the start and end of each dimension. When explicit padding is used and 1822 data_format is `"NHWC"`, this should be in the form `[[0, 0], [pad_top, 1823 pad_bottom], [pad_left, pad_right], [0, 0]]`. When explicit padding used 1824 and data_format is `"NCHW"`, this should be in the form `[[0, 0], [0, 0], 1825 [pad_top, pad_bottom], [pad_left, pad_right]]`. 1826 data_format: An optional `string` from: `"NHWC", "NCHW"`. 1827 Defaults to `"NHWC"`. 1828 Specify the data format of the input and output data. With the 1829 default format "NHWC", the data is stored in the order of: 1830 [batch, height, width, channels]. 1831 Alternatively, the format could be "NCHW", the data storage order of: 1832 [batch, channels, height, width]. 1833 dilations: An int or list of `ints` that has length `1`, `2` or `4`, 1834 defaults to 1. The dilation factor for each dimension of`input`. If a 1835 single value is given it is replicated in the `H` and `W` dimension. By 1836 default the `N` and `C` dimensions are set to 1. If set to k > 1, there 1837 will be k-1 skipped cells between each filter element on that dimension. 1838 The dimension order is determined by the value of `data_format`, see above 1839 for details. Dilations in the batch and depth dimensions if a 4-d tensor 1840 must be 1. 1841 name: A name for the operation (optional). 1842 1843 Returns: 1844 A `Tensor`. Has the same type as `input`. 1845 """ 1846 # pylint: enable=line-too-long 1847 return conv2d(input, # pylint: disable=redefined-builtin 1848 filters, 1849 strides, 1850 padding, 1851 use_cudnn_on_gpu=True, 1852 data_format=data_format, 1853 dilations=dilations, 1854 name=name) 1855 1856 1857 @tf_export(v1=["nn.conv2d"]) 1858 def conv2d( # pylint: disable=redefined-builtin,dangerous-default-value 1859 input, 1860 filter=None, 1861 strides=None, 1862 padding=None, 1863 use_cudnn_on_gpu=True, 1864 data_format="NHWC", 1865 dilations=[1, 1, 1, 1], 1866 name=None, 1867 filters=None): 1868 r"""Computes a 2-D convolution given 4-D `input` and `filter` tensors. 1869 1870 Given an input tensor of shape `[batch, in_height, in_width, in_channels]` 1871 and a filter / kernel tensor of shape 1872 `[filter_height, filter_width, in_channels, out_channels]`, this op 1873 performs the following: 1874 1875 1. Flattens the filter to a 2-D matrix with shape 1876 `[filter_height * filter_width * in_channels, output_channels]`. 1877 2. Extracts image patches from the input tensor to form a *virtual* 1878 tensor of shape `[batch, out_height, out_width, 1879 filter_height * filter_width * in_channels]`. 1880 3. For each patch, right-multiplies the filter matrix and the image patch 1881 vector. 1882 1883 In detail, with the default NHWC format, 1884 1885 output[b, i, j, k] = 1886 sum_{di, dj, q} input[b, strides[1] * i + di, strides[2] * j + dj, q] 1887 * filter[di, dj, q, k] 1888 1889 Must have `strides[0] = strides[3] = 1`. For the most common case of the same 1890 horizontal and vertices strides, `strides = [1, stride, stride, 1]`. 1891 1892 Args: 1893 input: A `Tensor`. Must be one of the following types: 1894 `half`, `bfloat16`, `float32`, `float64`. 1895 A 4-D tensor. The dimension order is interpreted according to the value 1896 of `data_format`, see below for details. 1897 filter: A `Tensor`. Must have the same type as `input`. 1898 A 4-D tensor of shape 1899 `[filter_height, filter_width, in_channels, out_channels]` 1900 strides: An int or list of `ints` that has length `1`, `2` or `4`. The 1901 stride of the sliding window for each dimension of `input`. If a single 1902 value is given it is replicated in the `H` and `W` dimension. By default 1903 the `N` and `C` dimensions are set to 1. The dimension order is determined 1904 by the value of `data_format`, see below for details. 1905 padding: Either the `string `"SAME"` or `"VALID"` indicating the type of 1906 padding algorithm to use, or a list indicating the explicit paddings at 1907 the start and end of each dimension. When explicit padding is used and 1908 data_format is `"NHWC"`, this should be in the form `[[0, 0], [pad_top, 1909 pad_bottom], [pad_left, pad_right], [0, 0]]`. When explicit padding used 1910 and data_format is `"NCHW"`, this should be in the form `[[0, 0], [0, 0], 1911 [pad_top, pad_bottom], [pad_left, pad_right]]`. 1912 use_cudnn_on_gpu: An optional `bool`. Defaults to `True`. 1913 data_format: An optional `string` from: `"NHWC", "NCHW"`. 1914 Defaults to `"NHWC"`. 1915 Specify the data format of the input and output data. With the 1916 default format "NHWC", the data is stored in the order of: 1917 [batch, height, width, channels]. 1918 Alternatively, the format could be "NCHW", the data storage order of: 1919 [batch, channels, height, width]. 1920 dilations: An int or list of `ints` that has length `1`, `2` or `4`, 1921 defaults to 1. The dilation factor for each dimension of`input`. If a 1922 single value is given it is replicated in the `H` and `W` dimension. By 1923 default the `N` and `C` dimensions are set to 1. If set to k > 1, there 1924 will be k-1 skipped cells between each filter element on that dimension. 1925 The dimension order is determined by the value of `data_format`, see above 1926 for details. Dilations in the batch and depth dimensions if a 4-d tensor 1927 must be 1. 1928 name: A name for the operation (optional). 1929 filters: Alias for filter. 1930 1931 Returns: 1932 A `Tensor`. Has the same type as `input`. 1933 """ 1934 filter = deprecation.deprecated_argument_lookup( 1935 "filters", filters, "filter", filter) 1936 padding, explicit_paddings = _convert_padding(padding) 1937 if data_format is None: 1938 data_format = "NHWC" 1939 channel_index = 1 if data_format.startswith("NC") else 3 1940 1941 strides = _get_sequence(strides, 2, channel_index, "strides") 1942 dilations = _get_sequence(dilations, 2, channel_index, "dilations") 1943 return gen_nn_ops.conv2d(input, # pylint: disable=redefined-builtin 1944 filter, 1945 strides, 1946 padding, 1947 use_cudnn_on_gpu=use_cudnn_on_gpu, 1948 explicit_paddings=explicit_paddings, 1949 data_format=data_format, 1950 dilations=dilations, 1951 name=name) 1952 1953 1954 @tf_export(v1=["nn.conv2d_backprop_filter"]) 1955 def conv2d_backprop_filter( # pylint: disable=redefined-builtin,dangerous-default-value 1956 input, 1957 filter_sizes, 1958 out_backprop, 1959 strides, 1960 padding, 1961 use_cudnn_on_gpu=True, 1962 data_format="NHWC", 1963 dilations=[1, 1, 1, 1], 1964 name=None): 1965 r"""Computes the gradients of convolution with respect to the filter. 1966 1967 Args: 1968 input: A `Tensor`. Must be one of the following types: 1969 `half`, `bfloat16`, `float32`, `float64`. 1970 4-D with shape `[batch, in_height, in_width, in_channels]`. 1971 filter_sizes: A `Tensor` of type `int32`. 1972 An integer vector representing the tensor shape of `filter`, 1973 where `filter` is a 4-D 1974 `[filter_height, filter_width, in_channels, out_channels]` tensor. 1975 out_backprop: A `Tensor`. Must have the same type as `input`. 1976 4-D with shape `[batch, out_height, out_width, out_channels]`. 1977 Gradients w.r.t. the output of the convolution. 1978 strides: A list of `ints`. 1979 The stride of the sliding window for each dimension of the input 1980 of the convolution. Must be in the same order as the dimension specified 1981 with format. 1982 padding: Either the `string `"SAME"` or `"VALID"` indicating the type of 1983 padding algorithm to use, or a list indicating the explicit paddings at 1984 the start and end of each dimension. When explicit padding is used and 1985 data_format is `"NHWC"`, this should be in the form `[[0, 0], [pad_top, 1986 pad_bottom], [pad_left, pad_right], [0, 0]]`. When explicit padding used 1987 and data_format is `"NCHW"`, this should be in the form `[[0, 0], [0, 0], 1988 [pad_top, pad_bottom], [pad_left, pad_right]]`. 1989 use_cudnn_on_gpu: An optional `bool`. Defaults to `True`. 1990 data_format: An optional `string` from: `"NHWC", "NCHW"`. 1991 Defaults to `"NHWC"`. 1992 Specify the data format of the input and output data. With the 1993 default format "NHWC", the data is stored in the order of: 1994 [batch, in_height, in_width, in_channels]. 1995 Alternatively, the format could be "NCHW", the data storage order of: 1996 [batch, in_channels, in_height, in_width]. 1997 dilations: An optional list of `ints`. Defaults to `[1, 1, 1, 1]`. 1998 1-D tensor of length 4. The dilation factor for each dimension of 1999 `input`. If set to k > 1, there will be k-1 skipped cells between each 2000 filter element on that dimension. The dimension order is determined by 2001 the value of `data_format`, see above for details. Dilations in the batch 2002 and depth dimensions must be 1. 2003 name: A name for the operation (optional). 2004 2005 Returns: 2006 A `Tensor`. Has the same type as `input`. 2007 """ 2008 padding, explicit_paddings = _convert_padding(padding) 2009 return gen_nn_ops.conv2d_backprop_filter( 2010 input, filter_sizes, out_backprop, strides, padding, use_cudnn_on_gpu, 2011 explicit_paddings, data_format, dilations, name) 2012 2013 2014 @tf_export(v1=["nn.conv2d_backprop_input"]) 2015 def conv2d_backprop_input( # pylint: disable=redefined-builtin,dangerous-default-value 2016 input_sizes, 2017 filter=None, 2018 out_backprop=None, 2019 strides=None, 2020 padding=None, 2021 use_cudnn_on_gpu=True, 2022 data_format="NHWC", 2023 dilations=[1, 1, 1, 1], 2024 name=None, 2025 filters=None): 2026 r"""Computes the gradients of convolution with respect to the input. 2027 2028 Args: 2029 input_sizes: A `Tensor` of type `int32`. 2030 An integer vector representing the shape of `input`, 2031 where `input` is a 4-D `[batch, height, width, channels]` tensor. 2032 filter: A `Tensor`. Must be one of the following types: 2033 `half`, `bfloat16`, `float32`, `float64`. 2034 4-D with shape 2035 `[filter_height, filter_width, in_channels, out_channels]`. 2036 out_backprop: A `Tensor`. Must have the same type as `filter`. 2037 4-D with shape `[batch, out_height, out_width, out_channels]`. 2038 Gradients w.r.t. the output of the convolution. 2039 strides: A list of `ints`. 2040 The stride of the sliding window for each dimension of the input 2041 of the convolution. Must be in the same order as the dimension specified 2042 with format. 2043 padding: Either the `string `"SAME"` or `"VALID"` indicating the type of 2044 padding algorithm to use, or a list indicating the explicit paddings at 2045 the start and end of each dimension. When explicit padding is used and 2046 data_format is `"NHWC"`, this should be in the form `[[0, 0], [pad_top, 2047 pad_bottom], [pad_left, pad_right], [0, 0]]`. When explicit padding used 2048 and data_format is `"NCHW"`, this should be in the form `[[0, 0], [0, 0], 2049 [pad_top, pad_bottom], [pad_left, pad_right]]`. 2050 use_cudnn_on_gpu: An optional `bool`. Defaults to `True`. 2051 data_format: An optional `string` from: `"NHWC", "NCHW"`. 2052 Defaults to `"NHWC"`. 2053 Specify the data format of the input and output data. With the 2054 default format "NHWC", the data is stored in the order of: 2055 [batch, in_height, in_width, in_channels]. 2056 Alternatively, the format could be "NCHW", the data storage order of: 2057 [batch, in_channels, in_height, in_width]. 2058 dilations: An optional list of `ints`. Defaults to `[1, 1, 1, 1]`. 2059 1-D tensor of length 4. The dilation factor for each dimension of 2060 `input`. If set to k > 1, there will be k-1 skipped cells between each 2061 filter element on that dimension. The dimension order is determined by 2062 the value of `data_format`, see above for details. Dilations in the batch 2063 and depth dimensions must be 1. 2064 name: A name for the operation (optional). 2065 filters: Alias for filter. 2066 2067 Returns: 2068 A `Tensor`. Has the same type as `filter`. 2069 """ 2070 filter = deprecation.deprecated_argument_lookup( 2071 "filters", filters, "filter", filter) 2072 padding, explicit_paddings = _convert_padding(padding) 2073 return gen_nn_ops.conv2d_backprop_input( 2074 input_sizes, filter, out_backprop, strides, padding, use_cudnn_on_gpu, 2075 explicit_paddings, data_format, dilations, name) 2076 2077 2078 @tf_export(v1=["nn.conv2d_transpose"]) 2079 def conv2d_transpose( 2080 value=None, 2081 filter=None, # pylint: disable=redefined-builtin 2082 output_shape=None, 2083 strides=None, 2084 padding="SAME", 2085 data_format="NHWC", 2086 name=None, 2087 input=None, # pylint: disable=redefined-builtin 2088 filters=None, 2089 dilations=None): 2090 """The transpose of `conv2d`. 2091 2092 This operation is sometimes called "deconvolution" after [Deconvolutional 2093 Networks](https://www.matthewzeiler.com/mattzeiler/deconvolutionalnetworks.pdf), 2094 but is really the transpose (gradient) of `conv2d` rather than an actual 2095 deconvolution. 2096 2097 Args: 2098 value: A 4-D `Tensor` of type `float` and shape 2099 `[batch, height, width, in_channels]` for `NHWC` data format or 2100 `[batch, in_channels, height, width]` for `NCHW` data format. 2101 filter: A 4-D `Tensor` with the same type as `value` and shape 2102 `[height, width, output_channels, in_channels]`. `filter`'s 2103 `in_channels` dimension must match that of `value`. 2104 output_shape: A 1-D `Tensor` representing the output shape of the 2105 deconvolution op. 2106 strides: An int or list of `ints` that has length `1`, `2` or `4`. The 2107 stride of the sliding window for each dimension of `input`. If a single 2108 value is given it is replicated in the `H` and `W` dimension. By default 2109 the `N` and `C` dimensions are set to 0. The dimension order is determined 2110 by the value of `data_format`, see below for details. 2111 padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm. 2112 See the "returns" section of `tf.nn.convolution` for details. 2113 data_format: A string. 'NHWC' and 'NCHW' are supported. 2114 name: Optional name for the returned tensor. 2115 input: Alias for value. 2116 filters: Alias for filter. 2117 dilations: An int or list of `ints` that has length `1`, `2` or `4`, 2118 defaults to 1. The dilation factor for each dimension of`input`. If a 2119 single value is given it is replicated in the `H` and `W` dimension. By 2120 default the `N` and `C` dimensions are set to 1. If set to k > 1, there 2121 will be k-1 skipped cells between each filter element on that dimension. 2122 The dimension order is determined by the value of `data_format`, see above 2123 for details. Dilations in the batch and depth dimensions if a 4-d tensor 2124 must be 1. 2125 2126 Returns: 2127 A `Tensor` with the same type as `value`. 2128 2129 Raises: 2130 ValueError: If input/output depth does not match `filter`'s shape, or if 2131 padding is other than `'VALID'` or `'SAME'`. 2132 """ 2133 value = deprecated_argument_lookup("input", input, "value", value) 2134 filter = deprecated_argument_lookup("filters", filters, "filter", filter) 2135 with ops.name_scope(name, "conv2d_transpose", 2136 [value, filter, output_shape]) as name: 2137 return conv2d_transpose_v2( 2138 value, 2139 filter, 2140 output_shape, 2141 strides, 2142 padding=padding, 2143 data_format=data_format, 2144 dilations=dilations, 2145 name=name) 2146 2147 2148 @tf_export("nn.conv2d_transpose", v1=[]) 2149 def conv2d_transpose_v2( 2150 input, # pylint: disable=redefined-builtin 2151 filters, # pylint: disable=redefined-builtin 2152 output_shape, 2153 strides, 2154 padding="SAME", 2155 data_format="NHWC", 2156 dilations=None, 2157 name=None): 2158 """The transpose of `conv2d`. 2159 2160 This operation is sometimes called "deconvolution" after [Deconvolutional 2161 Networks](http://www.matthewzeiler.com/pubs/cvpr2010/cvpr2010.pdf), but is 2162 actually the transpose (gradient) of `conv2d` rather than an actual 2163 deconvolution. 2164 2165 Args: 2166 input: A 4-D `Tensor` of type `float` and shape `[batch, height, width, 2167 in_channels]` for `NHWC` data format or `[batch, in_channels, height, 2168 width]` for `NCHW` data format. 2169 filters: A 4-D `Tensor` with the same type as `value` and shape `[height, 2170 width, output_channels, in_channels]`. `filter`'s `in_channels` dimension 2171 must match that of `value`. 2172 output_shape: A 1-D `Tensor` representing the output shape of the 2173 deconvolution op. 2174 strides: An int or list of `ints` that has length `1`, `2` or `4`. The 2175 stride of the sliding window for each dimension of `input`. If a single 2176 value is given it is replicated in the `H` and `W` dimension. By default 2177 the `N` and `C` dimensions are set to 0. The dimension order is determined 2178 by the value of `data_format`, see below for details. 2179 padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm. See 2180 the "returns" section of `tf.nn.convolution` for details. 2181 data_format: A string. 'NHWC' and 'NCHW' are supported. 2182 dilations: An int or list of `ints` that has length `1`, `2` or `4`, 2183 defaults to 1. The dilation factor for each dimension of`input`. If a 2184 single value is given it is replicated in the `H` and `W` dimension. By 2185 default the `N` and `C` dimensions are set to 1. If set to k > 1, there 2186 will be k-1 skipped cells between each filter element on that dimension. 2187 The dimension order is determined by the value of `data_format`, see above 2188 for details. Dilations in the batch and depth dimensions if a 4-d tensor 2189 must be 1. 2190 name: Optional name for the returned tensor. 2191 2192 Returns: 2193 A `Tensor` with the same type as `value`. 2194 2195 Raises: 2196 ValueError: If input/output depth does not match `filter`'s shape, or if 2197 padding is other than `'VALID'` or `'SAME'`. 2198 """ 2199 with ops.name_scope(name, "conv2d_transpose", 2200 [input, filter, output_shape]) as name: 2201 if data_format is None: 2202 data_format = "NHWC" 2203 channel_index = 1 if data_format.startswith("NC") else 3 2204 2205 strides = _get_sequence(strides, 2, channel_index, "strides") 2206 dilations = _get_sequence(dilations, 2, channel_index, "dilations") 2207 2208 return gen_nn_ops.conv2d_backprop_input( 2209 input_sizes=output_shape, 2210 filter=filters, 2211 out_backprop=input, 2212 strides=strides, 2213 padding=padding, 2214 data_format=data_format, 2215 dilations=dilations, 2216 name=name) 2217 2218 2219 @tf_export("nn.atrous_conv2d_transpose") 2220 def atrous_conv2d_transpose(value, 2221 filters, 2222 output_shape, 2223 rate, 2224 padding, 2225 name=None): 2226 """The transpose of `atrous_conv2d`. 2227 2228 This operation is sometimes called "deconvolution" after [Deconvolutional 2229 Networks](https://www.matthewzeiler.com/mattzeiler/deconvolutionalnetworks.pdf), 2230 but is really the transpose (gradient) of `atrous_conv2d` rather than an 2231 actual deconvolution. 2232 2233 Args: 2234 value: A 4-D `Tensor` of type `float`. It needs to be in the default `NHWC` 2235 format. Its shape is `[batch, in_height, in_width, in_channels]`. 2236 filters: A 4-D `Tensor` with the same type as `value` and shape 2237 `[filter_height, filter_width, out_channels, in_channels]`. `filters`' 2238 `in_channels` dimension must match that of `value`. Atrous convolution is 2239 equivalent to standard convolution with upsampled filters with effective 2240 height `filter_height + (filter_height - 1) * (rate - 1)` and effective 2241 width `filter_width + (filter_width - 1) * (rate - 1)`, produced by 2242 inserting `rate - 1` zeros along consecutive elements across the 2243 `filters`' spatial dimensions. 2244 output_shape: A 1-D `Tensor` of shape representing the output shape of the 2245 deconvolution op. 2246 rate: A positive int32. The stride with which we sample input values across 2247 the `height` and `width` dimensions. Equivalently, the rate by which we 2248 upsample the filter values by inserting zeros across the `height` and 2249 `width` dimensions. In the literature, the same parameter is sometimes 2250 called `input stride` or `dilation`. 2251 padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm. 2252 name: Optional name for the returned tensor. 2253 2254 Returns: 2255 A `Tensor` with the same type as `value`. 2256 2257 Raises: 2258 ValueError: If input/output depth does not match `filters`' shape, or if 2259 padding is other than `'VALID'` or `'SAME'`, or if the `rate` is less 2260 than one, or if the output_shape is not a tensor with 4 elements. 2261 """ 2262 with ops.name_scope(name, "atrous_conv2d_transpose", 2263 [value, filters, output_shape]) as name: 2264 value = ops.convert_to_tensor(value, name="value") 2265 filters = ops.convert_to_tensor(filters, name="filters") 2266 if not value.get_shape().dims[3].is_compatible_with(filters.get_shape()[3]): 2267 raise ValueError( 2268 "value's input channels does not match filters' input channels, " 2269 "{} != {}".format(value.get_shape()[3], 2270 filters.get_shape()[3])) 2271 if rate < 1: 2272 raise ValueError("rate {} cannot be less than one".format(rate)) 2273 2274 if rate == 1: 2275 return conv2d_transpose( 2276 value, 2277 filters, 2278 output_shape, 2279 strides=[1, 1, 1, 1], 2280 padding=padding, 2281 data_format="NHWC") 2282 2283 output_shape_ = ops.convert_to_tensor(output_shape, name="output_shape") 2284 if not output_shape_.get_shape().is_compatible_with(tensor_shape.vector(4)): 2285 raise ValueError("output_shape must have shape (4,), got {}".format( 2286 output_shape_.get_shape())) 2287 2288 if isinstance(output_shape, tuple): 2289 output_shape = list(output_shape) 2290 2291 if isinstance(output_shape, (list, np.ndarray)): 2292 # output_shape's shape should be == [4] if reached this point. 2293 if not filters.get_shape().dims[2].is_compatible_with(output_shape[3]): 2294 raise ValueError( 2295 "output_shape does not match filter's output channels, " 2296 "{} != {}".format(output_shape[3], 2297 filters.get_shape()[2])) 2298 2299 # We have two padding contributions. The first is used for converting "SAME" 2300 # to "VALID". The second is required so that the height and width of the 2301 # zero-padded value tensor are multiples of rate. 2302 2303 # Padding required to reduce to "VALID" convolution 2304 if padding == "SAME": 2305 # Handle filters whose shape is unknown during graph creation. 2306 if filters.get_shape().is_fully_defined(): 2307 filter_shape = filters.get_shape().as_list() 2308 else: 2309 filter_shape = array_ops.shape(filters) 2310 filter_height, filter_width = filter_shape[0], filter_shape[1] 2311 2312 # Spatial dimensions of the filters and the upsampled filters in which we 2313 # introduce (rate - 1) zeros between consecutive filter values. 2314 filter_height_up = filter_height + (filter_height - 1) * (rate - 1) 2315 filter_width_up = filter_width + (filter_width - 1) * (rate - 1) 2316 2317 pad_height = filter_height_up - 1 2318 pad_width = filter_width_up - 1 2319 2320 # When pad_height (pad_width) is odd, we pad more to bottom (right), 2321 # following the same convention as conv2d(). 2322 pad_top = pad_height // 2 2323 pad_bottom = pad_height - pad_top 2324 pad_left = pad_width // 2 2325 pad_right = pad_width - pad_left 2326 elif padding == "VALID": 2327 pad_top = 0 2328 pad_bottom = 0 2329 pad_left = 0 2330 pad_right = 0 2331 else: 2332 raise ValueError("padding must be either VALID or SAME:" 2333 " {}".format(padding)) 2334 2335 in_height = output_shape[1] + pad_top + pad_bottom 2336 in_width = output_shape[2] + pad_left + pad_right 2337 2338 # More padding so that rate divides the height and width of the input. 2339 pad_bottom_extra = (rate - in_height % rate) % rate 2340 pad_right_extra = (rate - in_width % rate) % rate 2341 2342 # The paddings argument to space_to_batch is just the extra padding 2343 # component. 2344 space_to_batch_pad = [[0, pad_bottom_extra], [0, pad_right_extra]] 2345 2346 value = array_ops.space_to_batch( 2347 input=value, paddings=space_to_batch_pad, block_size=rate) 2348 2349 input_sizes = [ 2350 rate * rate * output_shape[0], (in_height + pad_bottom_extra) // rate, 2351 (in_width + pad_right_extra) // rate, output_shape[3] 2352 ] 2353 2354 value = gen_nn_ops.conv2d_backprop_input( 2355 input_sizes=input_sizes, 2356 filter=filters, 2357 out_backprop=value, 2358 strides=[1, 1, 1, 1], 2359 padding="VALID", 2360 data_format="NHWC") 2361 2362 # The crops argument to batch_to_space includes both padding components. 2363 batch_to_space_crop = [[pad_top, pad_bottom + pad_bottom_extra], 2364 [pad_left, pad_right + pad_right_extra]] 2365 2366 return array_ops.batch_to_space( 2367 input=value, crops=batch_to_space_crop, block_size=rate) 2368 2369 2370 @tf_export("nn.conv3d", v1=[]) 2371 def conv3d_v2(input, # pylint: disable=redefined-builtin,missing-docstring 2372 filters, 2373 strides, 2374 padding, 2375 data_format="NDHWC", 2376 dilations=None, 2377 name=None): 2378 if dilations is None: 2379 dilations = [1, 1, 1, 1, 1] 2380 return gen_nn_ops.conv3d(input, 2381 filters, 2382 strides, 2383 padding, 2384 data_format=data_format, 2385 dilations=dilations, 2386 name=name) 2387 2388 2389 @tf_export(v1=["nn.conv3d"]) 2390 def conv3d_v1( # pylint: disable=missing-docstring,dangerous-default-value 2391 input, # pylint: disable=redefined-builtin 2392 filter=None, # pylint: disable=redefined-builtin 2393 strides=None, 2394 padding=None, 2395 data_format="NDHWC", 2396 dilations=[1, 1, 1, 1, 1], 2397 name=None, 2398 filters=None): 2399 filter = deprecated_argument_lookup("filters", filters, "filter", filter) 2400 return gen_nn_ops.conv3d( 2401 input, filter, strides, padding, data_format, dilations, name) 2402 2403 2404 conv3d_v2.__doc__ = deprecation.rewrite_argument_docstring( 2405 gen_nn_ops.conv3d.__doc__, "filter", "filters") 2406 conv3d_v1.__doc__ = gen_nn_ops.conv3d.__doc__ 2407 2408 2409 @tf_export(v1=["nn.conv3d_transpose"]) 2410 def conv3d_transpose( 2411 value, 2412 filter=None, # pylint: disable=redefined-builtin 2413 output_shape=None, 2414 strides=None, 2415 padding="SAME", 2416 data_format="NDHWC", 2417 name=None, 2418 input=None, # pylint: disable=redefined-builtin 2419 filters=None, 2420 dilations=None): 2421 """The transpose of `conv3d`. 2422 2423 This operation is sometimes called "deconvolution" after [Deconvolutional 2424 Networks](https://www.matthewzeiler.com/mattzeiler/deconvolutionalnetworks.pdf), 2425 but is really the transpose (gradient) of `conv3d` rather than an actual 2426 deconvolution. 2427 2428 Args: 2429 value: A 5-D `Tensor` of type `float` and shape 2430 `[batch, depth, height, width, in_channels]`. 2431 filter: A 5-D `Tensor` with the same type as `value` and shape 2432 `[depth, height, width, output_channels, in_channels]`. `filter`'s 2433 `in_channels` dimension must match that of `value`. 2434 output_shape: A 1-D `Tensor` representing the output shape of the 2435 deconvolution op. 2436 strides: A list of ints. The stride of the sliding window for each 2437 dimension of the input tensor. 2438 padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm. 2439 See the "returns" section of `tf.nn.convolution` for details. 2440 data_format: A string, either `'NDHWC'` or `'NCDHW`' specifying the layout 2441 of the input and output tensors. Defaults to `'NDHWC'`. 2442 name: Optional name for the returned tensor. 2443 input: Alias of value. 2444 filters: Alias of filter. 2445 dilations: An int or list of `ints` that has length `1`, `3` or `5`, 2446 defaults to 1. The dilation factor for each dimension of`input`. If a 2447 single value is given it is replicated in the `D`, `H` and `W` dimension. 2448 By default the `N` and `C` dimensions are set to 1. If set to k > 1, there 2449 will be k-1 skipped cells between each filter element on that dimension. 2450 The dimension order is determined by the value of `data_format`, see above 2451 for details. Dilations in the batch and depth dimensions if a 5-d tensor 2452 must be 1. 2453 2454 Returns: 2455 A `Tensor` with the same type as `value`. 2456 2457 Raises: 2458 ValueError: If input/output depth does not match `filter`'s shape, or if 2459 padding is other than `'VALID'` or `'SAME'`. 2460 """ 2461 filter = deprecated_argument_lookup("filters", filters, "filter", filter) 2462 value = deprecated_argument_lookup("input", input, "value", value) 2463 return conv3d_transpose_v2( 2464 value, 2465 filter, 2466 output_shape, 2467 strides, 2468 padding=padding, 2469 data_format=data_format, 2470 dilations=dilations, 2471 name=name) 2472 2473 2474 @tf_export("nn.conv3d_transpose", v1=[]) 2475 def conv3d_transpose_v2(input, # pylint: disable=redefined-builtin 2476 filters, 2477 output_shape, 2478 strides, 2479 padding="SAME", 2480 data_format="NDHWC", 2481 dilations=None, 2482 name=None): 2483 """The transpose of `conv3d`. 2484 2485 This operation is sometimes called "deconvolution" after [Deconvolutional 2486 Networks](http://www.matthewzeiler.com/pubs/cvpr2010/cvpr2010.pdf), but is 2487 actually the transpose (gradient) of `conv2d` rather than an actual 2488 deconvolution. 2489 2490 Args: 2491 input: A 5-D `Tensor` of type `float` and shape `[batch, height, width, 2492 in_channels]` for `NHWC` data format or `[batch, in_channels, height, 2493 width]` for `NCHW` data format. 2494 filters: A 5-D `Tensor` with the same type as `value` and shape `[height, 2495 width, output_channels, in_channels]`. `filter`'s `in_channels` dimension 2496 must match that of `value`. 2497 output_shape: A 1-D `Tensor` representing the output shape of the 2498 deconvolution op. 2499 strides: An int or list of `ints` that has length `1`, `3` or `5`. The 2500 stride of the sliding window for each dimension of `input`. If a single 2501 value is given it is replicated in the `D`, `H` and `W` dimension. By 2502 default the `N` and `C` dimensions are set to 0. The dimension order is 2503 determined by the value of `data_format`, see below for details. 2504 padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm. See 2505 the "returns" section of `tf.nn.convolution` for details. 2506 data_format: A string. 'NDHWC' and 'NCDHW' are supported. 2507 dilations: An int or list of `ints` that has length `1`, `3` or `5`, 2508 defaults to 1. The dilation factor for each dimension of`input`. If a 2509 single value is given it is replicated in the `D`, `H` and `W` dimension. 2510 By default the `N` and `C` dimensions are set to 1. If set to k > 1, there 2511 will be k-1 skipped cells between each filter element on that dimension. 2512 The dimension order is determined by the value of `data_format`, see above 2513 for details. Dilations in the batch and depth dimensions if a 5-d tensor 2514 must be 1. 2515 name: Optional name for the returned tensor. 2516 2517 Returns: 2518 A `Tensor` with the same type as `value`. 2519 """ 2520 with ops.name_scope(name, "conv3d_transpose", 2521 [input, filter, output_shape]) as name: 2522 if data_format is None: 2523 data_format = "NDHWC" 2524 channel_index = 1 if data_format.startswith("NC") else 4 2525 2526 strides = _get_sequence(strides, 3, channel_index, "strides") 2527 dilations = _get_sequence(dilations, 3, channel_index, "dilations") 2528 2529 return gen_nn_ops.conv3d_backprop_input_v2( 2530 input_sizes=output_shape, 2531 filter=filters, 2532 out_backprop=input, 2533 strides=strides, 2534 padding=padding, 2535 data_format=data_format, 2536 dilations=dilations, 2537 name=name) 2538 2539 2540 CONV_TRANSPOSE_OPS = ( 2541 conv1d_transpose, 2542 conv2d_transpose_v2, 2543 conv3d_transpose_v2, 2544 ) 2545 2546 2547 @tf_export("nn.conv_transpose") 2548 def conv_transpose(input, # pylint: disable=redefined-builtin 2549 filters, 2550 output_shape, 2551 strides, 2552 padding="SAME", 2553 data_format=None, 2554 dilations=None, 2555 name=None): 2556 """The transpose of `convolution`. 2557 2558 This operation is sometimes called "deconvolution" after [Deconvolutional 2559 Networks](http://www.matthewzeiler.com/pubs/cvpr2010/cvpr2010.pdf), but is 2560 actually the transpose (gradient) of `convolution` rather than an actual 2561 deconvolution. 2562 2563 Args: 2564 input: An N+2 dimensional `Tensor` of shape 2565 `[batch_size] + input_spatial_shape + [in_channels]` if data_format does 2566 not start with "NC" (default), or 2567 `[batch_size, in_channels] + input_spatial_shape` if data_format starts 2568 with "NC". It must be one of the following types: 2569 `half`, `bfloat16`, `float32`, `float64`. 2570 filters: An N+2 dimensional `Tensor` with the same type as `input` and 2571 shape `spatial_filter_shape + [in_channels, out_channels]`. 2572 output_shape: A 1-D `Tensor` representing the output shape of the 2573 deconvolution op. 2574 strides: An int or list of `ints` that has length `1`, `N` or `N+2`. The 2575 stride of the sliding window for each dimension of `input`. If a single 2576 value is given it is replicated in the spatial dimensions. By default 2577 the `N` and `C` dimensions are set to 0. The dimension order is determined 2578 by the value of `data_format`, see below for details. 2579 padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm. See 2580 the "returns" section of `tf.nn.convolution` for details. 2581 data_format: A string or None. Specifies whether the channel dimension of 2582 the `input` and output is the last dimension (default, or if `data_format` 2583 does not start with "NC"), or the second dimension (if `data_format` 2584 starts with "NC"). For N=1, the valid values are "NWC" (default) and 2585 "NCW". For N=2, the valid values are "NHWC" (default) and "NCHW". 2586 For N=3, the valid values are "NDHWC" (default) and "NCDHW". 2587 dilations: An int or list of `ints` that has length `1`, `N` or `N+2`, 2588 defaults to 1. The dilation factor for each dimension of`input`. If a 2589 single value is given it is replicated in the spatial dimensions. By 2590 default the `N` and `C` dimensions are set to 1. If set to k > 1, there 2591 will be k-1 skipped cells between each filter element on that dimension. 2592 The dimension order is determined by the value of `data_format`, see above 2593 for details. 2594 name: A name for the operation (optional). If not specified "conv_transpose" 2595 is used. 2596 2597 Returns: 2598 A `Tensor` with the same type as `value`. 2599 """ 2600 with ops.name_scope(name, "conv_transpose", 2601 [input, filter, output_shape]) as name: 2602 if output_shape is not None: 2603 n = len(output_shape) - 2 2604 else: 2605 raise ValueError("output_shape cannot be None") 2606 2607 if not 1 <= n <= 3: 2608 raise ValueError( 2609 "output_shape must be of length 3, 4 or 5 but was {}.".format(n + 2)) 2610 2611 op = CONV_TRANSPOSE_OPS[n-1] 2612 return op( 2613 input, 2614 filters, 2615 output_shape, 2616 strides, 2617 padding=padding, 2618 data_format=data_format, 2619 dilations=dilations, 2620 name=name) 2621 2622 2623 @tf_export("nn.bias_add") 2624 def bias_add(value, bias, data_format=None, name=None): 2625 """Adds `bias` to `value`. 2626 2627 This is (mostly) a special case of `tf.add` where `bias` is restricted to 1-D. 2628 Broadcasting is supported, so `value` may have any number of dimensions. 2629 Unlike `tf.add`, the type of `bias` is allowed to differ from `value` in the 2630 case where both types are quantized. 2631 2632 Args: 2633 value: A `Tensor` with type `float`, `double`, `int64`, `int32`, `uint8`, 2634 `int16`, `int8`, `complex64`, or `complex128`. 2635 bias: A 1-D `Tensor` with size matching the last dimension of `value`. 2636 Must be the same type as `value` unless `value` is a quantized type, 2637 in which case a different quantized type may be used. 2638 data_format: A string. 'N...C' and 'NC...' are supported. 2639 name: A name for the operation (optional). 2640 2641 Returns: 2642 A `Tensor` with the same type as `value`. 2643 """ 2644 with ops.name_scope(name, "BiasAdd", [value, bias]) as name: 2645 if data_format is not None: 2646 if data_format.startswith("NC"): 2647 data_format = "NCHW" 2648 elif data_format.startswith("N") and data_format.endswith("C"): 2649 data_format = "NHWC" 2650 else: 2651 raise ValueError("data_format must be of the form `N...C` or `NC...`") 2652 2653 if not context.executing_eagerly(): 2654 value = ops.convert_to_tensor(value, name="input") 2655 bias = ops.convert_to_tensor(bias, dtype=value.dtype, name="bias") 2656 return gen_nn_ops.bias_add(value, bias, data_format=data_format, name=name) 2657 2658 2659 def bias_add_v1(value, bias, name=None): 2660 """Adds `bias` to `value`. 2661 2662 This is a deprecated version of bias_add and will soon to be removed. 2663 2664 This is (mostly) a special case of `tf.add` where `bias` is restricted to 1-D. 2665 Broadcasting is supported, so `value` may have any number of dimensions. 2666 Unlike `tf.add`, the type of `bias` is allowed to differ from `value` in the 2667 case where both types are quantized. 2668 2669 Args: 2670 value: A `Tensor` with type `float`, `double`, `int64`, `int32`, `uint8`, 2671 `int16`, `int8`, `complex64`, or `complex128`. 2672 bias: A 1-D `Tensor` with size matching the last dimension of `value`. 2673 Must be the same type as `value` unless `value` is a quantized type, 2674 in which case a different quantized type may be used. 2675 name: A name for the operation (optional). 2676 2677 Returns: 2678 A `Tensor` with the same type as `value`. 2679 """ 2680 with ops.name_scope(name, "BiasAddV1", [value, bias]) as name: 2681 value = ops.convert_to_tensor(value, name="input") 2682 bias = ops.convert_to_tensor(bias, dtype=value.dtype, name="bias") 2683 return gen_nn_ops.bias_add_v1(value, bias, name=name) 2684 2685 2686 @tf_export(v1=["nn.crelu"]) 2687 def crelu(features, name=None, axis=-1): 2688 """Computes Concatenated ReLU. 2689 2690 Concatenates a ReLU which selects only the positive part of the activation 2691 with a ReLU which selects only the *negative* part of the activation. 2692 Note that as a result this non-linearity doubles the depth of the activations. 2693 Source: [Understanding and Improving Convolutional Neural Networks via 2694 Concatenated Rectified Linear Units. W. Shang, et 2695 al.](https://arxiv.org/abs/1603.05201) 2696 2697 Args: 2698 features: A `Tensor` with type `float`, `double`, `int32`, `int64`, `uint8`, 2699 `int16`, or `int8`. 2700 name: A name for the operation (optional). 2701 axis: The axis that the output values are concatenated along. Default is -1. 2702 2703 Returns: 2704 A `Tensor` with the same type as `features`. 2705 """ 2706 with ops.name_scope(name, "CRelu", [features]) as name: 2707 features = ops.convert_to_tensor(features, name="features") 2708 c = array_ops.concat([features, -features], axis, name=name) 2709 return gen_nn_ops.relu(c) 2710 2711 2712 @tf_export("nn.crelu", v1=[]) 2713 def crelu_v2(features, axis=-1, name=None): 2714 return crelu(features, name=name, axis=axis) 2715 crelu_v2.__doc__ = crelu.__doc__ 2716 2717 2718 @tf_export("nn.relu6") 2719 def relu6(features, name=None): 2720 """Computes Rectified Linear 6: `min(max(features, 0), 6)`. 2721 2722 Source: [Convolutional Deep Belief Networks on CIFAR-10. A. 2723 Krizhevsky](http://www.cs.utoronto.ca/~kriz/conv-cifar10-aug2010.pdf) 2724 2725 Args: 2726 features: A `Tensor` with type `float`, `double`, `int32`, `int64`, `uint8`, 2727 `int16`, or `int8`. 2728 name: A name for the operation (optional). 2729 2730 Returns: 2731 A `Tensor` with the same type as `features`. 2732 """ 2733 with ops.name_scope(name, "Relu6", [features]) as name: 2734 features = ops.convert_to_tensor(features, name="features") 2735 return gen_nn_ops.relu6(features, name=name) 2736 2737 2738 @tf_export("nn.leaky_relu") 2739 def leaky_relu(features, alpha=0.2, name=None): 2740 """Compute the Leaky ReLU activation function. 2741 2742 "Rectifier Nonlinearities Improve Neural Network Acoustic Models" 2743 AL Maas, AY Hannun, AY Ng - Proc. ICML, 2013 2744 https://ai.stanford.edu/~amaas/papers/relu_hybrid_icml2013_final.pdf 2745 2746 Args: 2747 features: A `Tensor` representing preactivation values. Must be one of 2748 the following types: `float16`, `float32`, `float64`, `int32`, `int64`. 2749 alpha: Slope of the activation function at x < 0. 2750 name: A name for the operation (optional). 2751 2752 Returns: 2753 The activation value. 2754 """ 2755 with ops.name_scope(name, "LeakyRelu", [features, alpha]) as name: 2756 features = ops.convert_to_tensor(features, name="features") 2757 if features.dtype.is_integer: 2758 features = math_ops.cast(features, dtypes.float32) 2759 if compat.forward_compatible(2018, 11, 1): 2760 if isinstance(alpha, np.ndarray): 2761 alpha = alpha.item() 2762 return gen_nn_ops.leaky_relu(features, alpha=alpha, name=name) 2763 alpha = ops.convert_to_tensor(alpha, dtype=features.dtype, name="alpha") 2764 return math_ops.maximum(alpha * features, features, name=name) 2765 2766 2767 def _flatten_outer_dims(logits): 2768 """Flattens logits' outer dimensions and keep its last dimension.""" 2769 rank = array_ops.rank(logits) 2770 last_dim_size = array_ops.slice( 2771 array_ops.shape(logits), [math_ops.subtract(rank, 1)], [1]) 2772 output = array_ops.reshape(logits, array_ops.concat([[-1], last_dim_size], 0)) 2773 2774 # Set output shape if known. 2775 if not context.executing_eagerly(): 2776 shape = logits.get_shape() 2777 if shape is not None and shape.dims is not None: 2778 shape = shape.as_list() 2779 product = 1 2780 product_valid = True 2781 for d in shape[:-1]: 2782 if d is None: 2783 product_valid = False 2784 break 2785 else: 2786 product *= d 2787 if product_valid: 2788 output_shape = [product, shape[-1]] 2789 output.set_shape(output_shape) 2790 2791 return output 2792 2793 2794 def _softmax(logits, compute_op, dim=-1, name=None): 2795 """Helper function for softmax and log_softmax. 2796 2797 It reshapes and transposes the input logits into a 2-D Tensor and then invokes 2798 the tf.nn._softmax or tf.nn._log_softmax function. The output would be 2799 transposed and reshaped back. 2800 2801 Args: 2802 logits: A non-empty `Tensor`. Must be one of the following types: `half`, 2803 `float32`, `float64`. 2804 compute_op: Either gen_nn_ops.softmax or gen_nn_ops.log_softmax 2805 dim: The dimension softmax would be performed on. The default is -1 which 2806 indicates the last dimension. 2807 name: A name for the operation (optional). 2808 2809 Returns: 2810 A `Tensor`. Has the same type as `logits`. Same shape as `logits`. 2811 Raises: 2812 InvalidArgumentError: if `logits` is empty or `dim` is beyond the last 2813 dimension of `logits`. 2814 """ 2815 2816 def _swap_axis(logits, dim_index, last_index, name=None): 2817 """Swaps logits's dim_index and last_index.""" 2818 return array_ops.transpose( 2819 logits, 2820 array_ops.concat([ 2821 math_ops.range(dim_index), [last_index], 2822 math_ops.range(dim_index + 1, last_index), [dim_index] 2823 ], 0), 2824 name=name) 2825 2826 logits = ops.convert_to_tensor(logits) 2827 2828 # We need its original shape for shape inference. 2829 shape = logits.get_shape() 2830 is_last_dim = (dim == -1) or (dim == shape.ndims - 1) 2831 2832 if is_last_dim: 2833 return compute_op(logits, name=name) 2834 2835 dim_val = dim 2836 if isinstance(dim, ops.Tensor): 2837 dim_val = tensor_util.constant_value(dim) 2838 if dim_val is not None and not -shape.ndims <= dim_val < shape.ndims: 2839 raise errors_impl.InvalidArgumentError( 2840 None, None, 2841 "Dimension (%d) must be in the range [%d, %d) where %d is the number of" 2842 " dimensions in the input." % (dim_val, -shape.ndims, shape.ndims, 2843 shape.ndims)) 2844 2845 # If dim is not the last dimension, we have to do a transpose so that we can 2846 # still perform softmax on its last dimension. 2847 2848 # In case dim is negative (and is not last dimension -1), add shape.ndims 2849 ndims = array_ops.rank(logits) 2850 if not isinstance(dim, ops.Tensor): 2851 if dim < 0: 2852 dim += ndims 2853 else: 2854 dim = array_ops.where(math_ops.less(dim, 0), dim + ndims, dim) 2855 2856 # Swap logits' dimension of dim and its last dimension. 2857 input_rank = array_ops.rank(logits) 2858 dim_axis = dim % shape.ndims 2859 logits = _swap_axis(logits, dim_axis, math_ops.subtract(input_rank, 1)) 2860 2861 # Do the actual softmax on its last dimension. 2862 output = compute_op(logits) 2863 2864 output = _swap_axis( 2865 output, dim_axis, math_ops.subtract(input_rank, 1), name=name) 2866 2867 # Make shape inference work since transpose may erase its static shape. 2868 output.set_shape(shape) 2869 2870 return output 2871 2872 2873 @tf_export(v1=["nn.softmax", "math.softmax"]) 2874 @deprecation.deprecated_args(None, "dim is deprecated, use axis instead", "dim") 2875 def softmax(logits, axis=None, name=None, dim=None): 2876 """Computes softmax activations. 2877 2878 This function performs the equivalent of 2879 2880 softmax = tf.exp(logits) / tf.reduce_sum(tf.exp(logits), axis) 2881 2882 Args: 2883 logits: A non-empty `Tensor`. Must be one of the following types: `half`, 2884 `float32`, `float64`. 2885 axis: The dimension softmax would be performed on. The default is -1 which 2886 indicates the last dimension. 2887 name: A name for the operation (optional). 2888 dim: Deprecated alias for `axis`. 2889 2890 Returns: 2891 A `Tensor`. Has the same type and shape as `logits`. 2892 2893 Raises: 2894 InvalidArgumentError: if `logits` is empty or `axis` is beyond the last 2895 dimension of `logits`. 2896 """ 2897 axis = deprecation.deprecated_argument_lookup("axis", axis, "dim", dim) 2898 if axis is None: 2899 axis = -1 2900 return _softmax(logits, gen_nn_ops.softmax, axis, name) 2901 2902 2903 @tf_export("nn.softmax", "math.softmax", v1=[]) 2904 def softmax_v2(logits, axis=None, name=None): 2905 """Computes softmax activations. 2906 2907 This function performs the equivalent of 2908 2909 softmax = tf.exp(logits) / tf.reduce_sum(tf.exp(logits), axis) 2910 2911 Args: 2912 logits: A non-empty `Tensor`. Must be one of the following types: `half`, 2913 `float32`, `float64`. 2914 axis: The dimension softmax would be performed on. The default is -1 which 2915 indicates the last dimension. 2916 name: A name for the operation (optional). 2917 2918 Returns: 2919 A `Tensor`. Has the same type and shape as `logits`. 2920 2921 Raises: 2922 InvalidArgumentError: if `logits` is empty or `axis` is beyond the last 2923 dimension of `logits`. 2924 """ 2925 if axis is None: 2926 axis = -1 2927 return _softmax(logits, gen_nn_ops.softmax, axis, name) 2928 2929 2930 @tf_export(v1=["nn.log_softmax", "math.log_softmax"]) 2931 @deprecation.deprecated_args(None, "dim is deprecated, use axis instead", "dim") 2932 def log_softmax(logits, axis=None, name=None, dim=None): 2933 """Computes log softmax activations. 2934 2935 For each batch `i` and class `j` we have 2936 2937 logsoftmax = logits - log(reduce_sum(exp(logits), axis)) 2938 2939 Args: 2940 logits: A non-empty `Tensor`. Must be one of the following types: `half`, 2941 `float32`, `float64`. 2942 axis: The dimension softmax would be performed on. The default is -1 which 2943 indicates the last dimension. 2944 name: A name for the operation (optional). 2945 dim: Deprecated alias for `axis`. 2946 2947 Returns: 2948 A `Tensor`. Has the same type as `logits`. Same shape as `logits`. 2949 2950 Raises: 2951 InvalidArgumentError: if `logits` is empty or `axis` is beyond the last 2952 dimension of `logits`. 2953 """ 2954 axis = deprecation.deprecated_argument_lookup("axis", axis, "dim", dim) 2955 if axis is None: 2956 axis = -1 2957 return _softmax(logits, gen_nn_ops.log_softmax, axis, name) 2958 2959 2960 @tf_export("nn.log_softmax", "math.log_softmax", v1=[]) 2961 def log_softmax_v2(logits, axis=None, name=None): 2962 """Computes log softmax activations. 2963 2964 For each batch `i` and class `j` we have 2965 2966 logsoftmax = logits - log(reduce_sum(exp(logits), axis)) 2967 2968 Args: 2969 logits: A non-empty `Tensor`. Must be one of the following types: `half`, 2970 `float32`, `float64`. 2971 axis: The dimension softmax would be performed on. The default is -1 which 2972 indicates the last dimension. 2973 name: A name for the operation (optional). 2974 2975 Returns: 2976 A `Tensor`. Has the same type as `logits`. Same shape as `logits`. 2977 2978 Raises: 2979 InvalidArgumentError: if `logits` is empty or `axis` is beyond the last 2980 dimension of `logits`. 2981 """ 2982 if axis is None: 2983 axis = -1 2984 return _softmax(logits, gen_nn_ops.log_softmax, axis, name) 2985 2986 2987 def _ensure_xent_args(name, sentinel, labels, logits): 2988 # Make sure that all arguments were passed as named arguments. 2989 if sentinel is not None: 2990 raise ValueError("Only call `%s` with " 2991 "named arguments (labels=..., logits=..., ...)" % name) 2992 if labels is None or logits is None: 2993 raise ValueError("Both labels and logits must be provided.") 2994 2995 2996 @tf_export("nn.softmax_cross_entropy_with_logits", v1=[]) 2997 def softmax_cross_entropy_with_logits_v2(labels, logits, axis=-1, name=None): 2998 """Computes softmax cross entropy between `logits` and `labels`. 2999 3000 Measures the probability error in discrete classification tasks in which the 3001 classes are mutually exclusive (each entry is in exactly one class). For 3002 example, each CIFAR-10 image is labeled with one and only one label: an image 3003 can be a dog or a truck, but not both. 3004 3005 **NOTE:** While the classes are mutually exclusive, their probabilities 3006 need not be. All that is required is that each row of `labels` is 3007 a valid probability distribution. If they are not, the computation of the 3008 gradient will be incorrect. 3009 3010 If using exclusive `labels` (wherein one and only 3011 one class is true at a time), see `sparse_softmax_cross_entropy_with_logits`. 3012 3013 **WARNING:** This op expects unscaled logits, since it performs a `softmax` 3014 on `logits` internally for efficiency. Do not call this op with the 3015 output of `softmax`, as it will produce incorrect results. 3016 3017 A common use case is to have logits and labels of shape 3018 `[batch_size, num_classes]`, but higher dimensions are supported, with 3019 the `axis` argument specifying the class dimension. 3020 3021 `logits` and `labels` must have the same dtype (either `float16`, `float32`, 3022 or `float64`). 3023 3024 Backpropagation will happen into both `logits` and `labels`. To disallow 3025 backpropagation into `labels`, pass label tensors through `tf.stop_gradient` 3026 before feeding it to this function. 3027 3028 **Note that to avoid confusion, it is required to pass only named arguments to 3029 this function.** 3030 3031 Args: 3032 labels: Each vector along the class dimension should hold a valid 3033 probability distribution e.g. for the case in which labels are of shape 3034 `[batch_size, num_classes]`, each row of `labels[i]` must be a valid 3035 probability distribution. 3036 logits: Unscaled log probabilities. 3037 axis: The class dimension. Defaulted to -1 which is the last dimension. 3038 name: A name for the operation (optional). 3039 3040 Returns: 3041 A `Tensor` that contains the softmax cross entropy loss. Its type is the 3042 same as `logits` and its shape is the same as `labels` except that it does 3043 not have the last dimension of `labels`. 3044 """ 3045 return softmax_cross_entropy_with_logits_v2_helper( 3046 labels=labels, logits=logits, axis=axis, name=name) 3047 3048 3049 @tf_export(v1=["nn.softmax_cross_entropy_with_logits_v2"]) 3050 @deprecated_args(None, "dim is deprecated, use axis instead", "dim") 3051 def softmax_cross_entropy_with_logits_v2_helper( 3052 labels, logits, axis=None, name=None, dim=None): 3053 """Computes softmax cross entropy between `logits` and `labels`. 3054 3055 Measures the probability error in discrete classification tasks in which the 3056 classes are mutually exclusive (each entry is in exactly one class). For 3057 example, each CIFAR-10 image is labeled with one and only one label: an image 3058 can be a dog or a truck, but not both. 3059 3060 **NOTE:** While the classes are mutually exclusive, their probabilities 3061 need not be. All that is required is that each row of `labels` is 3062 a valid probability distribution. If they are not, the computation of the 3063 gradient will be incorrect. 3064 3065 If using exclusive `labels` (wherein one and only 3066 one class is true at a time), see `sparse_softmax_cross_entropy_with_logits`. 3067 3068 **WARNING:** This op expects unscaled logits, since it performs a `softmax` 3069 on `logits` internally for efficiency. Do not call this op with the 3070 output of `softmax`, as it will produce incorrect results. 3071 3072 A common use case is to have logits and labels of shape 3073 `[batch_size, num_classes]`, but higher dimensions are supported, with 3074 the `axis` argument specifying the class dimension. 3075 3076 `logits` and `labels` must have the same dtype (either `float16`, `float32`, 3077 or `float64`). 3078 3079 Backpropagation will happen into both `logits` and `labels`. To disallow 3080 backpropagation into `labels`, pass label tensors through `tf.stop_gradient` 3081 before feeding it to this function. 3082 3083 **Note that to avoid confusion, it is required to pass only named arguments to 3084 this function.** 3085 3086 Args: 3087 labels: Each vector along the class dimension should hold a valid 3088 probability distribution e.g. for the case in which labels are of shape 3089 `[batch_size, num_classes]`, each row of `labels[i]` must be a valid 3090 probability distribution. 3091 logits: Unscaled log probabilities. 3092 axis: The class dimension. Defaulted to -1 which is the last dimension. 3093 name: A name for the operation (optional). 3094 dim: Deprecated alias for axis. 3095 3096 Returns: 3097 A `Tensor` that contains the softmax cross entropy loss. Its type is the 3098 same as `logits` and its shape is the same as `labels` except that it does 3099 not have the last dimension of `labels`. 3100 """ 3101 # TODO(pcmurray) Raise an error when the labels do not sum to 1. Note: This 3102 # could break users who call this with bad labels, but disregard the bad 3103 # results. 3104 axis = deprecated_argument_lookup("axis", axis, "dim", dim) 3105 del dim 3106 if axis is None: 3107 axis = -1 3108 3109 with ops.name_scope(name, "softmax_cross_entropy_with_logits", 3110 [logits, labels]) as name: 3111 logits = ops.convert_to_tensor(logits, name="logits") 3112 labels = ops.convert_to_tensor(labels, name="labels") 3113 convert_to_float32 = ( 3114 logits.dtype == dtypes.float16 or logits.dtype == dtypes.bfloat16) 3115 precise_logits = math_ops.cast( 3116 logits, dtypes.float32) if convert_to_float32 else logits 3117 # labels and logits must be of the same type 3118 labels = math_ops.cast(labels, precise_logits.dtype) 3119 input_rank = array_ops.rank(precise_logits) 3120 # For shape inference. 3121 shape = logits.get_shape() 3122 3123 # Move the dim to the end if dim is not the last dimension. 3124 if axis != -1: 3125 3126 def _move_dim_to_end(tensor, dim_index, rank): 3127 return array_ops.transpose( 3128 tensor, 3129 array_ops.concat([ 3130 math_ops.range(dim_index), 3131 math_ops.range(dim_index + 1, rank), [dim_index] 3132 ], 0)) 3133 3134 precise_logits = _move_dim_to_end(precise_logits, axis, input_rank) 3135 labels = _move_dim_to_end(labels, axis, input_rank) 3136 3137 input_shape = array_ops.shape(precise_logits) 3138 3139 # Make precise_logits and labels into matrices. 3140 precise_logits = _flatten_outer_dims(precise_logits) 3141 labels = _flatten_outer_dims(labels) 3142 3143 # Do the actual op computation. 3144 # The second output tensor contains the gradients. We use it in 3145 # _CrossEntropyGrad() in nn_grad but not here. 3146 cost, unused_backprop = gen_nn_ops.softmax_cross_entropy_with_logits( 3147 precise_logits, labels, name=name) 3148 3149 # The output cost shape should be the input minus axis. 3150 output_shape = array_ops.slice(input_shape, [0], 3151 [math_ops.subtract(input_rank, 1)]) 3152 cost = array_ops.reshape(cost, output_shape) 3153 3154 # Make shape inference work since reshape and transpose may erase its static 3155 # shape. 3156 if not context.executing_eagerly( 3157 ) and shape is not None and shape.dims is not None: 3158 shape = shape.as_list() 3159 del shape[axis] 3160 cost.set_shape(shape) 3161 3162 if convert_to_float32: 3163 return math_ops.cast(cost, logits.dtype) 3164 else: 3165 return cost 3166 3167 3168 _XENT_DEPRECATION = """ 3169 Future major versions of TensorFlow will allow gradients to flow 3170 into the labels input on backprop by default. 3171 3172 See `tf.nn.softmax_cross_entropy_with_logits_v2`. 3173 """ 3174 3175 3176 @tf_export(v1=["nn.softmax_cross_entropy_with_logits"]) 3177 @deprecation.deprecated(date=None, instructions=_XENT_DEPRECATION) 3178 def softmax_cross_entropy_with_logits( 3179 _sentinel=None, # pylint: disable=invalid-name 3180 labels=None, 3181 logits=None, 3182 dim=-1, 3183 name=None, 3184 axis=None): 3185 """Computes softmax cross entropy between `logits` and `labels`. 3186 3187 Measures the probability error in discrete classification tasks in which the 3188 classes are mutually exclusive (each entry is in exactly one class). For 3189 example, each CIFAR-10 image is labeled with one and only one label: an image 3190 can be a dog or a truck, but not both. 3191 3192 **NOTE:** While the classes are mutually exclusive, their probabilities 3193 need not be. All that is required is that each row of `labels` is 3194 a valid probability distribution. If they are not, the computation of the 3195 gradient will be incorrect. 3196 3197 If using exclusive `labels` (wherein one and only 3198 one class is true at a time), see `sparse_softmax_cross_entropy_with_logits`. 3199 3200 **WARNING:** This op expects unscaled logits, since it performs a `softmax` 3201 on `logits` internally for efficiency. Do not call this op with the 3202 output of `softmax`, as it will produce incorrect results. 3203 3204 A common use case is to have logits and labels of shape 3205 `[batch_size, num_classes]`, but higher dimensions are supported, with 3206 the `dim` argument specifying the class dimension. 3207 3208 Backpropagation will happen only into `logits`. To calculate a cross entropy 3209 loss that allows backpropagation into both `logits` and `labels`, see 3210 `tf.nn.softmax_cross_entropy_with_logits_v2`. 3211 3212 **Note that to avoid confusion, it is required to pass only named arguments to 3213 this function.** 3214 3215 Args: 3216 _sentinel: Used to prevent positional parameters. Internal, do not use. 3217 labels: Each vector along the class dimension should hold a valid 3218 probability distribution e.g. for the case in which labels are of shape 3219 `[batch_size, num_classes]`, each row of `labels[i]` must be a valid 3220 probability distribution. 3221 logits: Unscaled log probabilities. 3222 dim: The class dimension. Defaulted to -1 which is the last dimension. 3223 name: A name for the operation (optional). 3224 axis: Alias for dim. 3225 3226 Returns: 3227 A `Tensor` that contains the softmax cross entropy loss. Its type is the 3228 same as `logits` and its shape is the same as `labels` except that it does 3229 not have the last dimension of `labels`. 3230 """ 3231 dim = deprecated_argument_lookup("axis", axis, "dim", dim) 3232 _ensure_xent_args("softmax_cross_entropy_with_logits", _sentinel, labels, 3233 logits) 3234 3235 with ops.name_scope(name, "softmax_cross_entropy_with_logits_sg", 3236 [logits, labels]) as name: 3237 labels = array_ops.stop_gradient(labels, name="labels_stop_gradient") 3238 3239 return softmax_cross_entropy_with_logits_v2( 3240 labels=labels, logits=logits, axis=dim, name=name) 3241 3242 3243 @tf_export("nn.sparse_softmax_cross_entropy_with_logits") 3244 def sparse_softmax_cross_entropy_with_logits( 3245 _sentinel=None, # pylint: disable=invalid-name 3246 labels=None, 3247 logits=None, 3248 name=None): 3249 """Computes sparse softmax cross entropy between `logits` and `labels`. 3250 3251 Measures the probability error in discrete classification tasks in which the 3252 classes are mutually exclusive (each entry is in exactly one class). For 3253 example, each CIFAR-10 image is labeled with one and only one label: an image 3254 can be a dog or a truck, but not both. 3255 3256 **NOTE:** For this operation, the probability of a given label is considered 3257 exclusive. That is, soft classes are not allowed, and the `labels` vector 3258 must provide a single specific index for the true class for each row of 3259 `logits` (each minibatch entry). For soft softmax classification with 3260 a probability distribution for each entry, see 3261 `softmax_cross_entropy_with_logits_v2`. 3262 3263 **WARNING:** This op expects unscaled logits, since it performs a `softmax` 3264 on `logits` internally for efficiency. Do not call this op with the 3265 output of `softmax`, as it will produce incorrect results. 3266 3267 A common use case is to have logits of shape 3268 `[batch_size, num_classes]` and have labels of shape 3269 `[batch_size]`, but higher dimensions are supported, in which 3270 case the `dim`-th dimension is assumed to be of size `num_classes`. 3271 `logits` must have the dtype of `float16`, `float32`, or `float64`, and 3272 `labels` must have the dtype of `int32` or `int64`. 3273 3274 **Note that to avoid confusion, it is required to pass only named arguments to 3275 this function.** 3276 3277 Args: 3278 _sentinel: Used to prevent positional parameters. Internal, do not use. 3279 labels: `Tensor` of shape `[d_0, d_1, ..., d_{r-1}]` (where `r` is rank of 3280 `labels` and result) and dtype `int32` or `int64`. Each entry in `labels` 3281 must be an index in `[0, num_classes)`. Other values will raise an 3282 exception when this op is run on CPU, and return `NaN` for corresponding 3283 loss and gradient rows on GPU. 3284 logits: Unscaled log probabilities of shape 3285 `[d_0, d_1, ..., d_{r-1}, num_classes]` and dtype `float16`, `float32`, or 3286 `float64`. 3287 name: A name for the operation (optional). 3288 3289 Returns: 3290 A `Tensor` of the same shape as `labels` and of the same type as `logits` 3291 with the softmax cross entropy loss. 3292 3293 Raises: 3294 ValueError: If logits are scalars (need to have rank >= 1) or if the rank 3295 of the labels is not equal to the rank of the logits minus one. 3296 """ 3297 _ensure_xent_args("sparse_softmax_cross_entropy_with_logits", _sentinel, 3298 labels, logits) 3299 3300 # TODO(pcmurray) Raise an error when the label is not an index in 3301 # [0, num_classes). Note: This could break users who call this with bad 3302 # labels, but disregard the bad results. 3303 3304 # Reshape logits and labels to rank 2. 3305 with ops.name_scope(name, "SparseSoftmaxCrossEntropyWithLogits", 3306 [labels, logits]): 3307 labels = ops.convert_to_tensor(labels) 3308 logits = ops.convert_to_tensor(logits) 3309 precise_logits = math_ops.cast(logits, dtypes.float32) if (dtypes.as_dtype( 3310 logits.dtype) == dtypes.float16) else logits 3311 3312 # Store label shape for result later. 3313 labels_static_shape = labels.get_shape() 3314 labels_shape = array_ops.shape(labels) 3315 static_shapes_fully_defined = ( 3316 labels_static_shape.is_fully_defined() and 3317 logits.get_shape()[:-1].is_fully_defined()) 3318 if logits.get_shape().ndims is not None and logits.get_shape().ndims == 0: 3319 raise ValueError( 3320 "Logits cannot be scalars - received shape %s." % logits.get_shape()) 3321 if logits.get_shape().ndims is not None and ( 3322 labels_static_shape.ndims is not None and 3323 labels_static_shape.ndims != logits.get_shape().ndims - 1): 3324 raise ValueError("Rank mismatch: Rank of labels (received %s) should " 3325 "equal rank of logits minus 1 (received %s)." % 3326 (labels_static_shape.ndims, logits.get_shape().ndims)) 3327 if (static_shapes_fully_defined and 3328 labels_static_shape != logits.get_shape()[:-1]): 3329 raise ValueError("Shape mismatch: The shape of labels (received %s) " 3330 "should equal the shape of logits except for the last " 3331 "dimension (received %s)." % (labels_static_shape, 3332 logits.get_shape())) 3333 # Check if no reshapes are required. 3334 if logits.get_shape().ndims == 2: 3335 cost, _ = gen_nn_ops.sparse_softmax_cross_entropy_with_logits( 3336 precise_logits, labels, name=name) 3337 if logits.dtype == dtypes.float16: 3338 return math_ops.cast(cost, dtypes.float16) 3339 else: 3340 return cost 3341 3342 # Perform a check of the dynamic shapes if the static shapes are not fully 3343 # defined. 3344 shape_checks = [] 3345 if not static_shapes_fully_defined: 3346 shape_checks.append( 3347 check_ops.assert_equal( 3348 array_ops.shape(labels), 3349 array_ops.shape(logits)[:-1])) 3350 with ops.control_dependencies(shape_checks): 3351 # Reshape logits to 2 dim, labels to 1 dim. 3352 num_classes = array_ops.shape(logits)[array_ops.rank(logits) - 1] 3353 precise_logits = array_ops.reshape(precise_logits, [-1, num_classes]) 3354 labels = array_ops.reshape(labels, [-1]) 3355 # The second output tensor contains the gradients. We use it in 3356 # _CrossEntropyGrad() in nn_grad but not here. 3357 cost, _ = gen_nn_ops.sparse_softmax_cross_entropy_with_logits( 3358 precise_logits, labels, name=name) 3359 cost = array_ops.reshape(cost, labels_shape) 3360 cost.set_shape(labels_static_shape) 3361 if logits.dtype == dtypes.float16: 3362 return math_ops.cast(cost, dtypes.float16) 3363 else: 3364 return cost 3365 3366 3367 @tf_export("nn.avg_pool", v1=["nn.avg_pool_v2"]) 3368 def avg_pool_v2(input, ksize, strides, padding, data_format=None, name=None): # pylint: disable=redefined-builtin 3369 """Performs the avg pooling on the input. 3370 3371 Each entry in `output` is the mean of the corresponding size `ksize` 3372 window in `value`. 3373 3374 Args: 3375 input: Tensor of rank N+2, of shape `[batch_size] + input_spatial_shape + 3376 [num_channels]` if `data_format` does not start with "NC" (default), or 3377 `[batch_size, num_channels] + input_spatial_shape` if data_format starts 3378 with "NC". Pooling happens over the spatial dimensions only. 3379 ksize: An int or list of `ints` that has length `1`, `N` or `N+2`. The size 3380 of the window for each dimension of the input tensor. 3381 strides: An int or list of `ints` that has length `1`, `N` or `N+2`. The 3382 stride of the sliding window for each dimension of the input tensor. 3383 padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm. See 3384 the "returns" section of `tf.nn.convolution` for details. 3385 data_format: A string. Specifies the channel dimension. For N=1 it can be 3386 either "NWC" (default) or "NCW", for N=2 it can be either "NHWC" (default) 3387 or "NCHW" and for N=3 either "NDHWC" (default) or "NCDHW". 3388 name: Optional name for the operation. 3389 3390 Returns: 3391 A `Tensor` of format specified by `data_format`. 3392 The average pooled output tensor. 3393 """ 3394 if input.shape is not None: 3395 n = len(input.shape) - 2 3396 elif data_format is not None: 3397 n = len(data_format) - 2 3398 else: 3399 raise ValueError( 3400 "The input must have a rank or a data format must be given.") 3401 if not 1 <= n <= 3: 3402 raise ValueError( 3403 "Input tensor must be of rank 3, 4 or 5 but was {}.".format(n + 2)) 3404 3405 if data_format is None: 3406 channel_index = n + 1 3407 else: 3408 channel_index = 1 if data_format.startswith("NC") else n + 1 3409 3410 ksize = _get_sequence(ksize, n, channel_index, "ksize") 3411 strides = _get_sequence(strides, n, channel_index, "strides") 3412 3413 avg_pooling_ops = { 3414 1: avg_pool1d, 3415 2: gen_nn_ops.avg_pool, 3416 3: gen_nn_ops.avg_pool3d 3417 } 3418 3419 op = avg_pooling_ops[n] 3420 return op( 3421 input, 3422 ksize=ksize, 3423 strides=strides, 3424 padding=padding, 3425 data_format=data_format, 3426 name=name) 3427 3428 3429 @tf_export(v1=["nn.avg_pool", "nn.avg_pool2d"]) 3430 def avg_pool(value, ksize, strides, padding, data_format="NHWC", 3431 name=None, input=None): # pylint: disable=redefined-builtin 3432 """Performs the average pooling on the input. 3433 3434 Each entry in `output` is the mean of the corresponding size `ksize` 3435 window in `value`. 3436 3437 Args: 3438 value: A 4-D `Tensor` of shape `[batch, height, width, channels]` and type 3439 `float32`, `float64`, `qint8`, `quint8`, or `qint32`. 3440 ksize: An int or list of `ints` that has length `1`, `2` or `4`. The size of 3441 the window for each dimension of the input tensor. 3442 strides: An int or list of `ints` that has length `1`, `2` or `4`. The 3443 stride of the sliding window for each dimension of the input tensor. 3444 padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm. 3445 See the "returns" section of `tf.nn.convolution` for details. 3446 data_format: A string. 'NHWC' and 'NCHW' are supported. 3447 name: Optional name for the operation. 3448 input: Alias for value. 3449 3450 Returns: 3451 A `Tensor` with the same type as `value`. The average pooled output tensor. 3452 """ 3453 with ops.name_scope(name, "AvgPool", [value]) as name: 3454 value = deprecation.deprecated_argument_lookup( 3455 "input", input, "value", value) 3456 3457 if data_format is None: 3458 data_format = "NHWC" 3459 channel_index = 1 if data_format.startswith("NC") else 3 3460 3461 ksize = _get_sequence(ksize, 2, channel_index, "ksize") 3462 strides = _get_sequence(strides, 2, channel_index, "strides") 3463 3464 return gen_nn_ops.avg_pool( 3465 value, 3466 ksize=ksize, 3467 strides=strides, 3468 padding=padding, 3469 data_format=data_format, 3470 name=name) 3471 3472 3473 @tf_export("nn.avg_pool2d", v1=[]) 3474 def avg_pool2d(input, ksize, strides, padding, data_format="NHWC", name=None): # pylint: disable=redefined-builtin 3475 """Performs the average pooling on the input. 3476 3477 Each entry in `output` is the mean of the corresponding size `ksize` 3478 window in `value`. 3479 3480 Args: 3481 input: A 4-D `Tensor` of shape `[batch, height, width, channels]` and type 3482 `float32`, `float64`, `qint8`, `quint8`, or `qint32`. 3483 ksize: An int or list of `ints` that has length `1`, `2` or `4`. The size of 3484 the window for each dimension of the input tensor. 3485 strides: An int or list of `ints` that has length `1`, `2` or `4`. The 3486 stride of the sliding window for each dimension of the input tensor. 3487 padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm. 3488 See the "returns" section of `tf.nn.convolution` for details. 3489 data_format: A string. 'NHWC' and 'NCHW' are supported. 3490 name: Optional name for the operation. 3491 3492 Returns: 3493 A `Tensor` with the same type as `value`. The average pooled output tensor. 3494 """ 3495 with ops.name_scope(name, "AvgPool2D", [input]) as name: 3496 if data_format is None: 3497 data_format = "NHWC" 3498 channel_index = 1 if data_format.startswith("NC") else 3 3499 3500 ksize = _get_sequence(ksize, 2, channel_index, "ksize") 3501 strides = _get_sequence(strides, 2, channel_index, "strides") 3502 3503 return gen_nn_ops.avg_pool( 3504 input, 3505 ksize=ksize, 3506 strides=strides, 3507 padding=padding, 3508 data_format=data_format, 3509 name=name) 3510 3511 3512 @tf_export("nn.avg_pool1d") 3513 def avg_pool1d(input, ksize, strides, padding, data_format="NWC", name=None): # pylint: disable=redefined-builtin 3514 """Performs the average pooling on the input. 3515 3516 Each entry in `output` is the mean of the corresponding size `ksize` 3517 window in `value`. 3518 3519 Note internally this op reshapes and uses the underlying 2d operation. 3520 3521 Args: 3522 input: A 3-D `Tensor` of the format specified by `data_format`. 3523 ksize: An int or list of `ints` that has length `1` or `3`. The size of the 3524 window for each dimension of the input tensor. 3525 strides: An int or list of `ints` that has length `1` or `3`. The stride of 3526 the sliding window for each dimension of the input tensor. 3527 padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm. See 3528 the "returns" section of `tf.nn.convolution` for details. 3529 data_format: An optional string from: "NWC", "NCW". Defaults to "NWC". 3530 name: A name for the operation (optional). 3531 3532 Returns: 3533 A `Tensor` of format specified by `data_format`. 3534 The max pooled output tensor. 3535 """ 3536 with ops.name_scope(name, "AvgPool1D", [input]) as name: 3537 if data_format is None: 3538 data_format = "NWC" 3539 channel_index = 1 if data_format.startswith("NC") else 2 3540 ksize = [1] + _get_sequence(ksize, 1, channel_index, "ksize") 3541 strides = [1] + _get_sequence(strides, 1, channel_index, "strides") 3542 3543 data_format = "NHWC" if data_format == "NWC" else "NCHW" 3544 expanding_dim = 1 if data_format == "NWC" else 2 3545 3546 input = array_ops.expand_dims_v2(input, expanding_dim) 3547 result = gen_nn_ops.avg_pool( 3548 input, 3549 ksize=ksize, 3550 strides=strides, 3551 padding=padding, 3552 data_format=data_format, 3553 name=name) 3554 return array_ops.squeeze(result, expanding_dim) 3555 3556 3557 @tf_export("nn.avg_pool3d") 3558 def avg_pool3d(input, ksize, strides, padding, data_format="NDHWC", name=None): # pylint: disable=redefined-builtin 3559 """Performs the average pooling on the input. 3560 3561 Each entry in `output` is the mean of the corresponding size `ksize` 3562 window in `value`. 3563 3564 Args: 3565 input: A 5-D `Tensor` of shape `[batch, height, width, channels]` and type 3566 `float32`, `float64`, `qint8`, `quint8`, or `qint32`. 3567 ksize: An int or list of `ints` that has length `1`, `3` or `5`. The size of 3568 the window for each dimension of the input tensor. 3569 strides: An int or list of `ints` that has length `1`, `3` or `5`. The 3570 stride of the sliding window for each dimension of the input tensor. 3571 padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm. 3572 See the "returns" section of `tf.nn.convolution` for details. 3573 data_format: A string. 'NDHWC' and 'NCDHW' are supported. 3574 name: Optional name for the operation. 3575 3576 Returns: 3577 A `Tensor` with the same type as `value`. The average pooled output tensor. 3578 """ 3579 with ops.name_scope(name, "AvgPool3D", [input]) as name: 3580 if data_format is None: 3581 data_format = "NDHWC" 3582 channel_index = 1 if data_format.startswith("NC") else 3 3583 3584 ksize = _get_sequence(ksize, 3, channel_index, "ksize") 3585 strides = _get_sequence(strides, 3, channel_index, "strides") 3586 3587 return gen_nn_ops.avg_pool3d( 3588 input, 3589 ksize=ksize, 3590 strides=strides, 3591 padding=padding, 3592 data_format=data_format, 3593 name=name) 3594 3595 3596 # pylint: disable=redefined-builtin 3597 @tf_export("nn.max_pool", v1=["nn.max_pool_v2"]) 3598 def max_pool_v2(input, ksize, strides, padding, data_format=None, name=None): 3599 """Performs the max pooling on the input. 3600 3601 Args: 3602 input: Tensor of rank N+2, of shape `[batch_size] + input_spatial_shape + 3603 [num_channels]` if `data_format` does not start with "NC" (default), or 3604 `[batch_size, num_channels] + input_spatial_shape` if data_format starts 3605 with "NC". Pooling happens over the spatial dimensions only. 3606 ksize: An int or list of `ints` that has length `1`, `N` or `N+2`. The size 3607 of the window for each dimension of the input tensor. 3608 strides: An int or list of `ints` that has length `1`, `N` or `N+2`. The 3609 stride of the sliding window for each dimension of the input tensor. 3610 padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm. See 3611 the "returns" section of `tf.nn.convolution` for details. 3612 data_format: A string. Specifies the channel dimension. For N=1 it can be 3613 either "NWC" (default) or "NCW", for N=2 it can be either "NHWC" (default) 3614 or "NCHW" and for N=3 either "NDHWC" (default) or "NCDHW". 3615 name: Optional name for the operation. 3616 3617 Returns: 3618 A `Tensor` of format specified by `data_format`. 3619 The max pooled output tensor. 3620 """ 3621 if input.shape is not None: 3622 n = len(input.shape) - 2 3623 elif data_format is not None: 3624 n = len(data_format) - 2 3625 else: 3626 raise ValueError( 3627 "The input must have a rank or a data format must be given.") 3628 if not 1 <= n <= 3: 3629 raise ValueError( 3630 "Input tensor must be of rank 3, 4 or 5 but was {}.".format(n + 2)) 3631 3632 if data_format is None: 3633 channel_index = n + 1 3634 else: 3635 channel_index = 1 if data_format.startswith("NC") else n + 1 3636 3637 ksize = _get_sequence(ksize, n, channel_index, "ksize") 3638 strides = _get_sequence(strides, n, channel_index, "strides") 3639 3640 max_pooling_ops = { 3641 1: max_pool1d, 3642 2: gen_nn_ops.max_pool, 3643 3: gen_nn_ops.max_pool3d 3644 } 3645 3646 op = max_pooling_ops[n] 3647 return op( 3648 input, 3649 ksize=ksize, 3650 strides=strides, 3651 padding=padding, 3652 data_format=data_format, 3653 name=name) 3654 # pylint: enable=redefined-builtin 3655 3656 3657 @tf_export(v1=["nn.max_pool"]) 3658 def max_pool(value, 3659 ksize, 3660 strides, 3661 padding, 3662 data_format="NHWC", 3663 name=None, 3664 input=None): # pylint: disable=redefined-builtin 3665 """Performs the max pooling on the input. 3666 3667 Args: 3668 value: A 4-D `Tensor` of the format specified by `data_format`. 3669 ksize: An int or list of `ints` that has length `1`, `2` or `4`. 3670 The size of the window for each dimension of the input tensor. 3671 strides: An int or list of `ints` that has length `1`, `2` or `4`. 3672 The stride of the sliding window for each dimension of the input tensor. 3673 padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm. 3674 See the "returns" section of `tf.nn.convolution` for details. 3675 data_format: A string. 'NHWC', 'NCHW' and 'NCHW_VECT_C' are supported. 3676 name: Optional name for the operation. 3677 input: Alias for value. 3678 3679 Returns: 3680 A `Tensor` of format specified by `data_format`. 3681 The max pooled output tensor. 3682 """ 3683 value = deprecation.deprecated_argument_lookup("input", input, "value", value) 3684 with ops.name_scope(name, "MaxPool", [value]) as name: 3685 if data_format is None: 3686 data_format = "NHWC" 3687 channel_index = 1 if data_format.startswith("NC") else 3 3688 3689 ksize = _get_sequence(ksize, 2, channel_index, "ksize") 3690 strides = _get_sequence(strides, 2, channel_index, "strides") 3691 3692 return gen_nn_ops.max_pool( 3693 value, 3694 ksize=ksize, 3695 strides=strides, 3696 padding=padding, 3697 data_format=data_format, 3698 name=name) 3699 3700 3701 # pylint: disable=redefined-builtin 3702 @tf_export("nn.max_pool1d") 3703 def max_pool1d(input, ksize, strides, padding, data_format="NWC", name=None): 3704 """Performs the max pooling on the input. 3705 3706 Note internally this op reshapes and uses the underlying 2d operation. 3707 3708 Args: 3709 input: A 3-D `Tensor` of the format specified by `data_format`. 3710 ksize: An int or list of `ints` that has length `1` or `3`. The size of the 3711 window for each dimension of the input tensor. 3712 strides: An int or list of `ints` that has length `1` or `3`. The stride of 3713 the sliding window for each dimension of the input tensor. 3714 padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm. See 3715 the "returns" section of `tf.nn.convolution` for details. 3716 data_format: An optional string from: "NWC", "NCW". Defaults to "NWC". 3717 name: A name for the operation (optional). 3718 3719 Returns: 3720 A `Tensor` of format specified by `data_format`. 3721 The max pooled output tensor. 3722 """ 3723 with ops.name_scope(name, "MaxPool1d", [input]) as name: 3724 if data_format is None: 3725 data_format = "NWC" 3726 channel_index = 1 if data_format.startswith("NC") else 2 3727 ksize = [1] + _get_sequence(ksize, 1, channel_index, "ksize") 3728 strides = [1] + _get_sequence(strides, 1, channel_index, "strides") 3729 3730 data_format = "NHWC" if data_format == "NWC" else "NCHW" 3731 expanding_dim = 1 if data_format == "NWC" else 2 3732 3733 input = array_ops.expand_dims_v2(input, expanding_dim) 3734 result = gen_nn_ops.max_pool( 3735 input, 3736 ksize=ksize, 3737 strides=strides, 3738 padding=padding, 3739 data_format=data_format, 3740 name=name) 3741 return array_ops.squeeze(result, expanding_dim) 3742 # pylint: enable=redefined-builtin 3743 3744 3745 # pylint: disable=redefined-builtin 3746 @tf_export("nn.max_pool2d") 3747 def max_pool2d(input, ksize, strides, padding, data_format="NHWC", name=None): 3748 """Performs the max pooling on the input. 3749 3750 Args: 3751 input: A 4-D `Tensor` of the format specified by `data_format`. 3752 ksize: An int or list of `ints` that has length `1`, `2` or `4`. The size of 3753 the window for each dimension of the input tensor. 3754 strides: An int or list of `ints` that has length `1`, `2` or `4`. The 3755 stride of the sliding window for each dimension of the input tensor. 3756 padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm. See 3757 the "returns" section of `tf.nn.convolution` for details. 3758 data_format: A string. 'NHWC', 'NCHW' and 'NCHW_VECT_C' are supported. 3759 name: Optional name for the operation. 3760 3761 Returns: 3762 A `Tensor` of format specified by `data_format`. 3763 The max pooled output tensor. 3764 """ 3765 with ops.name_scope(name, "MaxPool2d", [input]) as name: 3766 if data_format is None: 3767 data_format = "NHWC" 3768 channel_index = 1 if data_format.startswith("NC") else 3 3769 3770 ksize = _get_sequence(ksize, 2, channel_index, "ksize") 3771 strides = _get_sequence(strides, 2, channel_index, "strides") 3772 3773 return gen_nn_ops.max_pool( 3774 input, 3775 ksize=ksize, 3776 strides=strides, 3777 padding=padding, 3778 data_format=data_format, 3779 name=name) 3780 # pylint: enable=redefined-builtin 3781 3782 3783 # pylint: disable=redefined-builtin 3784 @tf_export("nn.max_pool3d") 3785 def max_pool3d(input, ksize, strides, padding, data_format="NDHWC", name=None): 3786 """Performs the max pooling on the input. 3787 3788 Args: 3789 input: A 5-D `Tensor` of the format specified by `data_format`. 3790 ksize: An int or list of `ints` that has length `1`, `3` or `5`. The size of 3791 the window for each dimension of the input tensor. 3792 strides: An int or list of `ints` that has length `1`, `3` or `5`. The 3793 stride of the sliding window for each dimension of the input tensor. 3794 padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm. See 3795 the "returns" section of `tf.nn.convolution` for details. 3796 data_format: An optional string from: "NDHWC", "NCDHW". Defaults to "NDHWC". 3797 The data format of the input and output data. With the default format 3798 "NDHWC", the data is stored in the order of: [batch, in_depth, in_height, 3799 in_width, in_channels]. Alternatively, the format could be "NCDHW", the 3800 data storage order is: [batch, in_channels, in_depth, in_height, 3801 in_width]. 3802 name: A name for the operation (optional). 3803 3804 Returns: 3805 A `Tensor` of format specified by `data_format`. 3806 The max pooled output tensor. 3807 """ 3808 with ops.name_scope(name, "MaxPool3D", [input]) as name: 3809 if data_format is None: 3810 data_format = "NDHWC" 3811 channel_index = 1 if data_format.startswith("NC") else 4 3812 3813 ksize = _get_sequence(ksize, 3, channel_index, "ksize") 3814 strides = _get_sequence(strides, 3, channel_index, "strides") 3815 3816 return gen_nn_ops.max_pool3d( 3817 input, 3818 ksize=ksize, 3819 strides=strides, 3820 padding=padding, 3821 data_format=data_format, 3822 name=name) 3823 # pylint: enable=redefined-builtin 3824 3825 3826 @tf_export("nn.max_pool_with_argmax", v1=[]) 3827 def max_pool_with_argmax_v2( 3828 input, # pylint: disable=redefined-builtin 3829 ksize, 3830 strides, 3831 padding, 3832 data_format="NHWC", 3833 output_dtype=dtypes.int64, 3834 include_batch_in_index=False, 3835 name=None): 3836 """Performs max pooling on the input and outputs both max values and indices. 3837 3838 The indices in `argmax` are flattened, so that a maximum value at position 3839 `[b, y, x, c]` becomes flattened index: `(y * width + x) * channels + c` if 3840 `include_batch_in_index` is False; 3841 `((b * height + y) * width + x) * channels + c` 3842 if `include_batch_in_index` is True. 3843 3844 The indices returned are always in `[0, height) x [0, width)` before 3845 flattening, even if padding is involved and the mathematically correct answer 3846 is outside (either negative or too large). This is a bug, but fixing it is 3847 difficult to do in a safe backwards compatible way, especially due to 3848 flattening. 3849 3850 Args: 3851 input: A `Tensor`. Must be one of the following types: `float32`, `float64`, 3852 `int32`, `uint8`, `int16`, `int8`, `int64`, `bfloat16`, `uint16`, `half`, 3853 `uint32`, `uint64`. 3854 4-D with shape `[batch, height, width, channels]`. Input to pool over. 3855 ksize: An int or list of `ints` that has length `1`, `2` or `4`. 3856 The size of the window for each dimension of the input tensor. 3857 strides: An int or list of `ints` that has length `1`, `2` or `4`. 3858 The stride of the sliding window for each dimension of the 3859 input tensor. 3860 padding: A `string` from: `"SAME", "VALID"`. 3861 The type of padding algorithm to use. 3862 data_format: An optional `string`, must be set to `"NHWC"`. Defaults to 3863 `"NHWC"`. 3864 Specify the data format of the input and output data. 3865 output_dtype: An optional `tf.DType` from: `tf.int32, tf.int64`. 3866 Defaults to `tf.int64`. 3867 The dtype of the returned argmax tensor. 3868 include_batch_in_index: An optional `boolean`. Defaults to `False`. 3869 Whether to include batch dimension in flattened index of `argmax`. 3870 name: A name for the operation (optional). 3871 3872 Returns: 3873 A tuple of `Tensor` objects (output, argmax). 3874 3875 output: A `Tensor`. Has the same type as `input`. 3876 argmax: A `Tensor` of type `output_dtype`. 3877 """ 3878 3879 if data_format != "NHWC": 3880 raise ValueError("Data formats other than 'NHWC' are not yet supported") 3881 3882 ksize = _get_sequence(ksize, 2, 3, "ksize") 3883 strides = _get_sequence(strides, 2, 3, "strides") 3884 3885 return gen_nn_ops.max_pool_with_argmax( 3886 input=input, 3887 ksize=ksize, 3888 strides=strides, 3889 padding=padding, 3890 Targmax=output_dtype, 3891 include_batch_in_index=include_batch_in_index, 3892 name=name) 3893 3894 3895 @tf_export(v1=["nn.max_pool_with_argmax"]) 3896 def max_pool_with_argmax_v1( # pylint: disable=missing-docstring,invalid-name 3897 input, # pylint: disable=redefined-builtin 3898 ksize, 3899 strides, 3900 padding, 3901 data_format="NHWC", 3902 Targmax=None, 3903 name=None, 3904 output_dtype=None, 3905 include_batch_in_index=False): 3906 if data_format != "NHWC": 3907 raise ValueError("Data formats other than 'NHWC' are not yet supported") 3908 3909 Targmax = deprecated_argument_lookup( 3910 "output_dtype", output_dtype, "Targmax", Targmax) 3911 if Targmax is None: 3912 Targmax = dtypes.int64 3913 return gen_nn_ops.max_pool_with_argmax( 3914 input=input, 3915 ksize=ksize, 3916 strides=strides, 3917 padding=padding, 3918 Targmax=Targmax, 3919 include_batch_in_index=include_batch_in_index, 3920 name=name) 3921 3922 3923 max_pool_with_argmax_v1.__doc__ = gen_nn_ops.max_pool_with_argmax.__doc__ 3924 3925 3926 @ops.RegisterStatistics("Conv2D", "flops") 3927 def _calc_conv_flops(graph, node): 3928 """Calculates the compute resources needed for Conv2D.""" 3929 input_shape = graph_util.tensor_shape_from_node_def_name(graph, node.input[0]) 3930 input_shape.assert_is_fully_defined() 3931 filter_shape = graph_util.tensor_shape_from_node_def_name( 3932 graph, node.input[1]) 3933 filter_shape.assert_is_fully_defined() 3934 output_shape = graph_util.tensor_shape_from_node_def_name(graph, node.name) 3935 output_shape.assert_is_fully_defined() 3936 filter_height = int(filter_shape[0]) 3937 filter_width = int(filter_shape[1]) 3938 filter_in_depth = int(filter_shape[2]) 3939 output_count = np.prod(output_shape.as_list(), dtype=np.int64) 3940 return ops.OpStats( 3941 "flops", 3942 (output_count * filter_in_depth * filter_height * filter_width * 2)) 3943 3944 3945 @ops.RegisterStatistics("DepthwiseConv2dNative", "flops") 3946 def _calc_depthwise_conv_flops(graph, node): 3947 """Calculates the compute resources needed for DepthwiseConv2dNative.""" 3948 input_shape = graph_util.tensor_shape_from_node_def_name(graph, node.input[0]) 3949 input_shape.assert_is_fully_defined() 3950 filter_shape = graph_util.tensor_shape_from_node_def_name( 3951 graph, node.input[1]) 3952 filter_shape.assert_is_fully_defined() 3953 output_shape = graph_util.tensor_shape_from_node_def_name(graph, node.name) 3954 output_shape.assert_is_fully_defined() 3955 filter_height = int(filter_shape[0]) 3956 filter_width = int(filter_shape[1]) 3957 output_count = np.prod(output_shape.as_list(), dtype=np.int64) 3958 return ops.OpStats("flops", (output_count * filter_height * filter_width * 2)) 3959 3960 3961 @ops.RegisterStatistics("BiasAdd", "flops") 3962 def _calc_bias_add_flops(graph, node): 3963 """Calculates the computing needed for BiasAdd.""" 3964 input_shape = graph_util.tensor_shape_from_node_def_name(graph, node.input[0]) 3965 input_shape.assert_is_fully_defined() 3966 input_count = np.prod(input_shape.as_list()) 3967 return ops.OpStats("flops", input_count) 3968 3969 3970 @tf_export(v1=["nn.xw_plus_b"]) 3971 def xw_plus_b(x, weights, biases, name=None): # pylint: disable=invalid-name 3972 """Computes matmul(x, weights) + biases. 3973 3974 Args: 3975 x: a 2D tensor. Dimensions typically: batch, in_units 3976 weights: a 2D tensor. Dimensions typically: in_units, out_units 3977 biases: a 1D tensor. Dimensions: out_units 3978 name: A name for the operation (optional). If not specified 3979 "xw_plus_b" is used. 3980 3981 Returns: 3982 A 2-D Tensor computing matmul(x, weights) + biases. 3983 Dimensions typically: batch, out_units. 3984 """ 3985 with ops.name_scope(name, "xw_plus_b", [x, weights, biases]) as name: 3986 x = ops.convert_to_tensor(x, name="x") 3987 weights = ops.convert_to_tensor(weights, name="weights") 3988 biases = ops.convert_to_tensor(biases, name="biases") 3989 mm = math_ops.matmul(x, weights) 3990 return bias_add(mm, biases, name=name) 3991 3992 3993 def xw_plus_b_v1(x, weights, biases, name=None): 3994 """Computes matmul(x, weights) + biases. 3995 3996 This is a deprecated version of that will soon be removed. 3997 3998 Args: 3999 x: a 2D tensor. Dimensions typically: batch, in_units 4000 weights: a 2D tensor. Dimensions typically: in_units, out_units 4001 biases: a 1D tensor. Dimensions: out_units 4002 name: A name for the operation (optional). If not specified 4003 "xw_plus_b_v1" is used. 4004 4005 Returns: 4006 A 2-D Tensor computing matmul(x, weights) + biases. 4007 Dimensions typically: batch, out_units. 4008 """ 4009 with ops.name_scope(name, "xw_plus_b_v1", [x, weights, biases]) as name: 4010 x = ops.convert_to_tensor(x, name="x") 4011 weights = ops.convert_to_tensor(weights, name="weights") 4012 biases = ops.convert_to_tensor(biases, name="biases") 4013 mm = math_ops.matmul(x, weights) 4014 return bias_add_v1(mm, biases, name=name) 4015 4016 4017 def _get_noise_shape(x, noise_shape): 4018 # If noise_shape is none return immediately. 4019 if noise_shape is None: 4020 return array_ops.shape(x) 4021 4022 try: 4023 # Best effort to figure out the intended shape. 4024 # If not possible, let the op to handle it. 4025 # In eager mode exception will show up. 4026 noise_shape_ = tensor_shape.as_shape(noise_shape) 4027 except (TypeError, ValueError): 4028 return noise_shape 4029 4030 if x.shape.dims is not None and len(x.shape.dims) == len(noise_shape_.dims): 4031 new_dims = [] 4032 for i, dim in enumerate(x.shape.dims): 4033 if noise_shape_.dims[i].value is None and dim.value is not None: 4034 new_dims.append(dim.value) 4035 else: 4036 new_dims.append(noise_shape_.dims[i].value) 4037 return tensor_shape.TensorShape(new_dims) 4038 4039 return noise_shape 4040 4041 4042 @tf_export(v1=["nn.dropout"]) 4043 @deprecation.deprecated_args(None, "Please use `rate` instead of `keep_prob`. " 4044 "Rate should be set to `rate = 1 - keep_prob`.", 4045 "keep_prob") 4046 def dropout(x, keep_prob=None, noise_shape=None, seed=None, name=None, 4047 rate=None): 4048 """Computes dropout. 4049 4050 For each element of `x`, with probability `rate`, outputs `0`, and otherwise 4051 scales up the input by `1 / (1-rate)`. The scaling is such that the expected 4052 sum is unchanged. 4053 4054 By default, each element is kept or dropped independently. If `noise_shape` 4055 is specified, it must be 4056 [broadcastable](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) 4057 to the shape of `x`, and only dimensions with `noise_shape[i] == shape(x)[i]` 4058 will make independent decisions. For example, if `shape(x) = [k, l, m, n]` 4059 and `noise_shape = [k, 1, 1, n]`, each batch and channel component will be 4060 kept independently and each row and column will be kept or not kept together. 4061 4062 Args: 4063 x: A floating point tensor. 4064 keep_prob: (deprecated) A deprecated alias for `(1-rate)`. 4065 noise_shape: A 1-D `Tensor` of type `int32`, representing the 4066 shape for randomly generated keep/drop flags. 4067 seed: A Python integer. Used to create random seeds. See 4068 `tf.set_random_seed` for behavior. 4069 name: A name for this operation (optional). 4070 rate: A scalar `Tensor` with the same type as `x`. The probability that each 4071 element of `x` is discarded. 4072 4073 Returns: 4074 A Tensor of the same shape of `x`. 4075 4076 Raises: 4077 ValueError: If `rate` is not in `[0, 1)` or if `x` is not a floating 4078 point tensor. 4079 """ 4080 try: 4081 keep = 1. - keep_prob if keep_prob is not None else None 4082 except TypeError: 4083 raise ValueError("keep_prob must be a floating point number or Tensor " 4084 "(got %r)" % keep_prob) 4085 4086 rate = deprecation.deprecated_argument_lookup( 4087 "rate", rate, 4088 "keep_prob", keep) 4089 4090 if rate is None: 4091 raise ValueError("You must provide a rate to dropout.") 4092 4093 return dropout_v2(x, rate, noise_shape=noise_shape, seed=seed, name=name) 4094 4095 4096 @tf_export("nn.dropout", v1=[]) 4097 def dropout_v2(x, rate, noise_shape=None, seed=None, name=None): 4098 """Computes dropout. 4099 4100 With probability `rate`, drops elements of `x`. Input that are kept are 4101 scaled up by `1 / (1 - rate)`, otherwise outputs `0`. The scaling is so that 4102 the expected sum is unchanged. 4103 4104 By default, each element is kept or dropped independently. If `noise_shape` 4105 is specified, it must be 4106 [broadcastable](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) 4107 to the shape of `x`, and only dimensions with `noise_shape[i] == shape(x)[i]` 4108 will make independent decisions. For example, if `shape(x) = [k, l, m, n]` 4109 and `noise_shape = [k, 1, 1, n]`, each batch and channel component will be 4110 kept independently and each row and column will be kept or not kept together. 4111 4112 Args: 4113 x: A floating point tensor. 4114 rate: A scalar `Tensor` with the same type as x. The probability 4115 that each element is dropped. For example, setting rate=0.1 would drop 4116 10% of input elements. 4117 noise_shape: A 1-D `Tensor` of type `int32`, representing the 4118 shape for randomly generated keep/drop flags. 4119 seed: A Python integer. Used to create random seeds. See 4120 `tf.set_random_seed` 4121 for behavior. 4122 name: A name for this operation (optional). 4123 4124 Returns: 4125 A Tensor of the same shape of `x`. 4126 4127 Raises: 4128 ValueError: If `keep_prob` is not in `(0, 1]` or if `x` is not a floating 4129 point tensor. 4130 """ 4131 with ops.name_scope(name, "dropout", [x]) as name: 4132 x = ops.convert_to_tensor(x, name="x") 4133 if not x.dtype.is_floating: 4134 raise ValueError("x has to be a floating point tensor since it's going to" 4135 " be scaled. Got a %s tensor instead." % x.dtype) 4136 if isinstance(rate, numbers.Real) and not (rate >= 0 and rate < 1): 4137 raise ValueError("rate must be a scalar tensor or a float in the " 4138 "range [0, 1), got %g" % rate) 4139 4140 # Early return if nothing needs to be dropped. 4141 if isinstance(rate, numbers.Real) and rate == 0: 4142 return x 4143 if context.executing_eagerly(): 4144 if isinstance(rate, ops.EagerTensor): 4145 if rate.numpy() == 0: 4146 return x 4147 else: 4148 rate = ops.convert_to_tensor( 4149 rate, dtype=x.dtype, name="rate") 4150 rate.get_shape().assert_is_compatible_with(tensor_shape.scalar()) 4151 4152 # Do nothing if we know rate == 0 4153 if tensor_util.constant_value(rate) == 0: 4154 return x 4155 4156 noise_shape = _get_noise_shape(x, noise_shape) 4157 # Sample a uniform distribution on [0.0, 1.0) and select values larger than 4158 # rate. 4159 # 4160 # NOTE: Random uniform actually can only generate 2^23 floats on [1.0, 2.0) 4161 # and subtract 1.0. 4162 random_tensor = random_ops.random_uniform( 4163 noise_shape, seed=seed, dtype=x.dtype) 4164 keep_prob = 1 - rate 4165 scale = 1 / keep_prob 4166 # NOTE: if (1.0 + rate) - 1 is equal to rate, then we want to consider that 4167 # float to be selected, hence we use a >= comparison. 4168 keep_mask = random_tensor >= rate 4169 ret = x * scale * math_ops.cast(keep_mask, x.dtype) 4170 if not context.executing_eagerly(): 4171 ret.set_shape(x.get_shape()) 4172 return ret 4173 4174 4175 @tf_export("math.top_k", "nn.top_k") 4176 def top_k(input, k=1, sorted=True, name=None): # pylint: disable=redefined-builtin 4177 """Finds values and indices of the `k` largest entries for the last dimension. 4178 4179 If the input is a vector (rank=1), finds the `k` largest entries in the vector 4180 and outputs their values and indices as vectors. Thus `values[j]` is the 4181 `j`-th largest entry in `input`, and its index is `indices[j]`. 4182 4183 For matrices (resp. higher rank input), computes the top `k` entries in each 4184 row (resp. vector along the last dimension). Thus, 4185 4186 values.shape = indices.shape = input.shape[:-1] + [k] 4187 4188 If two elements are equal, the lower-index element appears first. 4189 4190 Args: 4191 input: 1-D or higher `Tensor` with last dimension at least `k`. 4192 k: 0-D `int32` `Tensor`. Number of top elements to look for along the last 4193 dimension (along each row for matrices). 4194 sorted: If true the resulting `k` elements will be sorted by the values in 4195 descending order. 4196 name: Optional name for the operation. 4197 4198 Returns: 4199 values: The `k` largest elements along each last dimensional slice. 4200 indices: The indices of `values` within the last dimension of `input`. 4201 """ 4202 return gen_nn_ops.top_kv2(input, k=k, sorted=sorted, name=name) 4203 4204 4205 def nth_element(input, n, reverse=False, name=None): # pylint: disable=redefined-builtin 4206 r"""Finds values of the `n`-th order statistic for the last dmension. 4207 4208 If the input is a vector (rank-1), finds the entries which is the nth-smallest 4209 value in the vector and outputs their values as scalar tensor. 4210 4211 For matrices (resp. higher rank input), computes the entries which is the 4212 nth-smallest value in each row (resp. vector along the last dimension). Thus, 4213 4214 values.shape = input.shape[:-1] 4215 4216 Args: 4217 input: 1-D or higher `Tensor` with last dimension at least `n+1`. 4218 n: A `Tensor` of type `int32`. 4219 0-D. Position of sorted vector to select along the last dimension (along 4220 each row for matrices). Valid range of n is `[0, input.shape[:-1])` 4221 reverse: An optional `bool`. Defaults to `False`. 4222 When set to True, find the nth-largest value in the vector and vice 4223 versa. 4224 name: A name for the operation (optional). 4225 4226 Returns: 4227 A `Tensor`. Has the same type as `input`. 4228 The `n`-th order statistic along each last dimensional slice. 4229 """ 4230 return gen_nn_ops.nth_element(input, n, reverse=reverse, name=name) 4231 4232 4233 @tf_export(v1=["nn.fractional_max_pool"]) 4234 @deprecation.deprecated(date=None, instructions="`seed2` and `deterministic` " 4235 "args are deprecated. Use fractional_max_pool_v2.") 4236 def fractional_max_pool(value, 4237 pooling_ratio, 4238 pseudo_random=False, 4239 overlapping=False, 4240 deterministic=False, 4241 seed=0, 4242 seed2=0, 4243 name=None): # pylint: disable=redefined-builtin 4244 r"""Performs fractional max pooling on the input. 4245 4246 This is a deprecated version of `fractional_max_pool`. 4247 4248 Fractional max pooling is slightly different than regular max pooling. In 4249 regular max pooling, you downsize an input set by taking the maximum value of 4250 smaller N x N subsections of the set (often 2x2), and try to reduce the set by 4251 a factor of N, where N is an integer. Fractional max pooling, as you might 4252 expect from the word "fractional", means that the overall reduction ratio N 4253 does not have to be an integer. 4254 4255 The sizes of the pooling regions are generated randomly but are fairly 4256 uniform. For example, let's look at the height dimension, and the constraints 4257 on the list of rows that will be pool boundaries. 4258 4259 First we define the following: 4260 4261 1. input_row_length : the number of rows from the input set 4262 2. output_row_length : which will be smaller than the input 4263 3. alpha = input_row_length / output_row_length : our reduction ratio 4264 4. K = floor(alpha) 4265 5. row_pooling_sequence : this is the result list of pool boundary rows 4266 4267 Then, row_pooling_sequence should satisfy: 4268 4269 1. a[0] = 0 : the first value of the sequence is 0 4270 2. a[end] = input_row_length : the last value of the sequence is the size 4271 3. K <= (a[i+1] - a[i]) <= K+1 : all intervals are K or K+1 size 4272 4. length(row_pooling_sequence) = output_row_length+1 4273 4274 For more details on fractional max pooling, see this paper: [Benjamin Graham, 4275 Fractional Max-Pooling](http://arxiv.org/abs/1412.6071) 4276 4277 Args: 4278 value: A `Tensor`. 4-D with shape `[batch, height, width, channels]`. 4279 pooling_ratio: A list of `floats` that has length >= 4. Pooling ratio for 4280 each dimension of `value`, currently only supports row and col dimension 4281 and should be >= 1.0. For example, a valid pooling ratio looks like [1.0, 4282 1.44, 1.73, 1.0]. The first and last elements must be 1.0 because we don't 4283 allow pooling on batch and channels dimensions. 1.44 and 1.73 are pooling 4284 ratio on height and width dimensions respectively. 4285 pseudo_random: An optional `bool`. Defaults to `False`. When set to `True`, 4286 generates the pooling sequence in a pseudorandom fashion, otherwise, in a 4287 random fashion. Check paper [Benjamin Graham, Fractional 4288 Max-Pooling](http://arxiv.org/abs/1412.6071) for difference between 4289 pseudorandom and random. 4290 overlapping: An optional `bool`. Defaults to `False`. When set to `True`, 4291 it means when pooling, the values at the boundary of adjacent pooling 4292 cells are used by both cells. For example: 4293 `index 0 1 2 3 4` 4294 `value 20 5 16 3 7` 4295 If the pooling sequence is [0, 2, 4], then 16, at index 2 will be used 4296 twice. The result would be [20, 16] for fractional max pooling. 4297 deterministic: An optional `bool`. Deprecated; use `fractional_max_pool_v2` 4298 instead. 4299 seed: An optional `int`. Defaults to `0`. If set to be non-zero, the 4300 random number generator is seeded by the given seed. Otherwise it is 4301 seeded by a random seed. 4302 seed2: An optional `int`. Deprecated; use `fractional_max_pool_v2` instead. 4303 name: A name for the operation (optional). 4304 4305 Returns: 4306 A tuple of `Tensor` objects (`output`, `row_pooling_sequence`, 4307 `col_pooling_sequence`). 4308 output: Output `Tensor` after fractional max pooling. Has the same type as 4309 `value`. 4310 row_pooling_sequence: A `Tensor` of type `int64`. 4311 col_pooling_sequence: A `Tensor` of type `int64`. 4312 """ 4313 return gen_nn_ops.fractional_max_pool(value, pooling_ratio, pseudo_random, 4314 overlapping, deterministic, seed, seed2, 4315 name) 4316 4317 4318 @tf_export("nn.fractional_max_pool", v1=[]) 4319 def fractional_max_pool_v2(value, 4320 pooling_ratio, 4321 pseudo_random=False, 4322 overlapping=False, 4323 seed=0, 4324 name=None): # pylint: disable=redefined-builtin 4325 r"""Performs fractional max pooling on the input. 4326 4327 Fractional max pooling is slightly different than regular max pooling. In 4328 regular max pooling, you downsize an input set by taking the maximum value of 4329 smaller N x N subsections of the set (often 2x2), and try to reduce the set by 4330 a factor of N, where N is an integer. Fractional max pooling, as you might 4331 expect from the word "fractional", means that the overall reduction ratio N 4332 does not have to be an integer. 4333 4334 The sizes of the pooling regions are generated randomly but are fairly 4335 uniform. For example, let's look at the height dimension, and the constraints 4336 on the list of rows that will be pool boundaries. 4337 4338 First we define the following: 4339 4340 1. input_row_length : the number of rows from the input set 4341 2. output_row_length : which will be smaller than the input 4342 3. alpha = input_row_length / output_row_length : our reduction ratio 4343 4. K = floor(alpha) 4344 5. row_pooling_sequence : this is the result list of pool boundary rows 4345 4346 Then, row_pooling_sequence should satisfy: 4347 4348 1. a[0] = 0 : the first value of the sequence is 0 4349 2. a[end] = input_row_length : the last value of the sequence is the size 4350 3. K <= (a[i+1] - a[i]) <= K+1 : all intervals are K or K+1 size 4351 4. length(row_pooling_sequence) = output_row_length+1 4352 4353 For more details on fractional max pooling, see this paper: [Benjamin Graham, 4354 Fractional Max-Pooling](http://arxiv.org/abs/1412.6071) 4355 4356 Args: 4357 value: A `Tensor`. 4-D with shape `[batch, height, width, channels]`. 4358 pooling_ratio: An int or list of `ints` that has length `1`, `2` or `4`. 4359 Pooling ratio for each dimension of `value`, currently only supports row 4360 and col dimension and should be >= 1.0. For example, a valid pooling ratio 4361 looks like [1.0, 1.44, 1.73, 1.0]. The first and last elements must be 1.0 4362 because we don't allow pooling on batch and channels dimensions. 1.44 and 4363 1.73 are pooling ratio on height and width dimensions respectively. 4364 pseudo_random: An optional `bool`. Defaults to `False`. When set to `True`, 4365 generates the pooling sequence in a pseudorandom fashion, otherwise, in a 4366 random fashion. Check paper [Benjamin Graham, Fractional 4367 Max-Pooling](http://arxiv.org/abs/1412.6071) for difference between 4368 pseudorandom and random. 4369 overlapping: An optional `bool`. Defaults to `False`. When set to `True`, 4370 it means when pooling, the values at the boundary of adjacent pooling 4371 cells are used by both cells. For example: 4372 `index 0 1 2 3 4` 4373 `value 20 5 16 3 7` 4374 If the pooling sequence is [0, 2, 4], then 16, at index 2 will be used 4375 twice. The result would be [20, 16] for fractional max pooling. 4376 seed: An optional `int`. Defaults to `0`. If set to be non-zero, the 4377 random number generator is seeded by the given seed. Otherwise it is 4378 seeded by a random seed. 4379 name: A name for the operation (optional). 4380 4381 Returns: 4382 A tuple of `Tensor` objects (`output`, `row_pooling_sequence`, 4383 `col_pooling_sequence`). 4384 output: Output `Tensor` after fractional max pooling. Has the same type as 4385 `value`. 4386 row_pooling_sequence: A `Tensor` of type `int64`. 4387 col_pooling_sequence: A `Tensor` of type `int64`. 4388 """ 4389 pooling_ratio = _get_sequence(pooling_ratio, 2, 3, "pooling_ratio") 4390 4391 if seed == 0: 4392 return gen_nn_ops.fractional_max_pool(value, pooling_ratio, pseudo_random, 4393 overlapping, deterministic=False, 4394 seed=0, seed2=0, name=name) 4395 else: 4396 seed1, seed2 = random_seed.get_seed(seed) 4397 return gen_nn_ops.fractional_max_pool(value, pooling_ratio, pseudo_random, 4398 overlapping, deterministic=True, 4399 seed=seed1, seed2=seed2, name=name) 4400 4401 4402 @tf_export(v1=["nn.fractional_avg_pool"]) 4403 @deprecation.deprecated(date=None, instructions="`seed2` and `deterministic` " 4404 "args are deprecated. Use fractional_avg_pool_v2.") 4405 def fractional_avg_pool(value, 4406 pooling_ratio, 4407 pseudo_random=False, 4408 overlapping=False, 4409 deterministic=False, 4410 seed=0, 4411 seed2=0, 4412 name=None): # pylint: disable=redefined-builtin 4413 r"""Performs fractional average pooling on the input. 4414 4415 This is a deprecated version of `fractional_avg_pool`. 4416 4417 Fractional average pooling is similar to Fractional max pooling in the pooling 4418 region generation step. The only difference is that after pooling regions are 4419 generated, a mean operation is performed instead of a max operation in each 4420 pooling region. 4421 4422 Args: 4423 value: A `Tensor`. 4-D with shape `[batch, height, width, channels]`. 4424 pooling_ratio: A list of `floats` that has length >= 4. Pooling ratio for 4425 each dimension of `value`, currently only supports row and col dimension 4426 and should be >= 1.0. For example, a valid pooling ratio looks like [1.0, 4427 1.44, 1.73, 1.0]. The first and last elements must be 1.0 because we don't 4428 allow pooling on batch and channels dimensions. 1.44 and 1.73 are pooling 4429 ratio on height and width dimensions respectively. 4430 pseudo_random: An optional `bool`. Defaults to `False`. When set to `True`, 4431 generates the pooling sequence in a pseudorandom fashion, otherwise, in a 4432 random fashion. Check paper [Benjamin Graham, Fractional 4433 Max-Pooling](http://arxiv.org/abs/1412.6071) for difference between 4434 pseudorandom and random. 4435 overlapping: An optional `bool`. Defaults to `False`. When set to `True`, 4436 it means when pooling, the values at the boundary of adjacent pooling 4437 cells are used by both cells. For example: 4438 `index 0 1 2 3 4` 4439 `value 20 5 16 3 7` 4440 If the pooling sequence is [0, 2, 4], then 16, at index 2 will be used 4441 twice. The result would be [20, 16] for fractional avg pooling. 4442 deterministic: An optional `bool`. Deprecated; use `fractional_avg_pool_v2` 4443 instead. 4444 seed: An optional `int`. Defaults to `0`. If set to be non-zero, the 4445 random number generator is seeded by the given seed. Otherwise it is 4446 seeded by a random seed. 4447 seed2: An optional `int`. Deprecated; use `fractional_avg_pool_v2` instead. 4448 name: A name for the operation (optional). 4449 4450 Returns: 4451 A tuple of `Tensor` objects (`output`, `row_pooling_sequence`, 4452 `col_pooling_sequence`). 4453 output: Output `Tensor` after fractional avg pooling. Has the same type as 4454 `value`. 4455 row_pooling_sequence: A `Tensor` of type `int64`. 4456 col_pooling_sequence: A `Tensor` of type `int64`. 4457 """ 4458 return gen_nn_ops.fractional_avg_pool(value, pooling_ratio, pseudo_random, 4459 overlapping, deterministic, seed, seed2, 4460 name=name) 4461 4462 4463 @tf_export("nn.fractional_avg_pool", v1=[]) 4464 def fractional_avg_pool_v2(value, 4465 pooling_ratio, 4466 pseudo_random=False, 4467 overlapping=False, 4468 seed=0, 4469 name=None): # pylint: disable=redefined-builtin 4470 r"""Performs fractional average pooling on the input. 4471 4472 Fractional average pooling is similar to Fractional max pooling in the pooling 4473 region generation step. The only difference is that after pooling regions are 4474 generated, a mean operation is performed instead of a max operation in each 4475 pooling region. 4476 4477 Args: 4478 value: A `Tensor`. 4-D with shape `[batch, height, width, channels]`. 4479 pooling_ratio: A list of `floats` that has length >= 4. Pooling ratio for 4480 each dimension of `value`, currently only supports row and col dimension 4481 and should be >= 1.0. For example, a valid pooling ratio looks like [1.0, 4482 1.44, 1.73, 1.0]. The first and last elements must be 1.0 because we don't 4483 allow pooling on batch and channels dimensions. 1.44 and 1.73 are pooling 4484 ratio on height and width dimensions respectively. 4485 pseudo_random: An optional `bool`. Defaults to `False`. When set to `True`, 4486 generates the pooling sequence in a pseudorandom fashion, otherwise, in a 4487 random fashion. Check paper [Benjamin Graham, Fractional 4488 Max-Pooling](http://arxiv.org/abs/1412.6071) for difference between 4489 pseudorandom and random. 4490 overlapping: An optional `bool`. Defaults to `False`. When set to `True`, 4491 it means when pooling, the values at the boundary of adjacent pooling 4492 cells are used by both cells. For example: 4493 `index 0 1 2 3 4` 4494 `value 20 5 16 3 7` 4495 If the pooling sequence is [0, 2, 4], then 16, at index 2 will be used 4496 twice. The result would be [20, 16] for fractional avg pooling. 4497 seed: An optional `int`. Defaults to `0`. If set to be non-zero, the 4498 random number generator is seeded by the given seed. Otherwise it is 4499 seeded by a random seed. 4500 name: A name for the operation (optional). 4501 4502 Returns: 4503 A tuple of `Tensor` objects (`output`, `row_pooling_sequence`, 4504 `col_pooling_sequence`). 4505 output: Output `Tensor` after fractional avg pooling. Has the same type as 4506 `value`. 4507 row_pooling_sequence: A `Tensor` of type `int64`. 4508 col_pooling_sequence: A `Tensor` of type `int64`. 4509 """ 4510 if seed == 0: 4511 return gen_nn_ops.fractional_avg_pool(value, pooling_ratio, pseudo_random, 4512 overlapping, deterministic=False, 4513 seed=0, seed2=0, name=name) 4514 else: 4515 seed1, seed2 = random_seed.get_seed(seed) 4516 return gen_nn_ops.fractional_avg_pool(value, pooling_ratio, pseudo_random, 4517 overlapping, deterministic=True, 4518 seed=seed1, seed2=seed2, name=name) 4519 4520 4521 @ops.RegisterStatistics("Dilation2D", "flops") 4522 def _calc_dilation2d_flops(graph, node): 4523 """Calculates the compute resources needed for Dilation2D.""" 4524 input_shape = graph_util.tensor_shape_from_node_def_name(graph, node.input[0]) 4525 input_shape.assert_is_fully_defined() 4526 filter_shape = graph_util.tensor_shape_from_node_def_name( 4527 graph, node.input[1]) 4528 filter_shape.assert_is_fully_defined() 4529 output_shape = graph_util.tensor_shape_from_node_def_name(graph, node.name) 4530 output_shape.assert_is_fully_defined() 4531 filter_height = int(filter_shape[0]) 4532 filter_width = int(filter_shape[1]) 4533 output_count = np.prod(output_shape.as_list(), dtype=np.int64) 4534 return ops.OpStats("flops", (output_count * filter_height * filter_width * 2)) 4535 4536 4537 @tf_export(v1=["nn.erosion2d"]) 4538 def erosion2d(value, kernel, strides, rates, padding, name=None): 4539 """Computes the grayscale erosion of 4-D `value` and 3-D `kernel` tensors. 4540 4541 The `value` tensor has shape `[batch, in_height, in_width, depth]` and the 4542 `kernel` tensor has shape `[kernel_height, kernel_width, depth]`, i.e., 4543 each input channel is processed independently of the others with its own 4544 structuring function. The `output` tensor has shape 4545 `[batch, out_height, out_width, depth]`. The spatial dimensions of the 4546 output tensor depend on the `padding` algorithm. We currently only support the 4547 default "NHWC" `data_format`. 4548 4549 In detail, the grayscale morphological 2-D erosion is given by: 4550 4551 output[b, y, x, c] = 4552 min_{dy, dx} value[b, 4553 strides[1] * y - rates[1] * dy, 4554 strides[2] * x - rates[2] * dx, 4555 c] - 4556 kernel[dy, dx, c] 4557 4558 Duality: The erosion of `value` by the `kernel` is equal to the negation of 4559 the dilation of `-value` by the reflected `kernel`. 4560 4561 Args: 4562 value: A `Tensor`. 4-D with shape `[batch, in_height, in_width, depth]`. 4563 kernel: A `Tensor`. Must have the same type as `value`. 4564 3-D with shape `[kernel_height, kernel_width, depth]`. 4565 strides: A list of `ints` that has length `>= 4`. 4566 1-D of length 4. The stride of the sliding window for each dimension of 4567 the input tensor. Must be: `[1, stride_height, stride_width, 1]`. 4568 rates: A list of `ints` that has length `>= 4`. 4569 1-D of length 4. The input stride for atrous morphological dilation. 4570 Must be: `[1, rate_height, rate_width, 1]`. 4571 padding: A `string` from: `"SAME", "VALID"`. 4572 The type of padding algorithm to use. 4573 name: A name for the operation (optional). If not specified "erosion2d" 4574 is used. 4575 4576 Returns: 4577 A `Tensor`. Has the same type as `value`. 4578 4-D with shape `[batch, out_height, out_width, depth]`. 4579 4580 Raises: 4581 ValueError: If the `value` depth does not match `kernel`' shape, or if 4582 padding is other than `'VALID'` or `'SAME'`. 4583 """ 4584 with ops.name_scope(name, "erosion2d", [value, kernel]) as name: 4585 # Reduce erosion to dilation by duality. 4586 return math_ops.negative( 4587 gen_nn_ops.dilation2d( 4588 input=math_ops.negative(value), 4589 filter=array_ops.reverse_v2(kernel, [0, 1]), 4590 strides=strides, 4591 rates=rates, 4592 padding=padding, 4593 name=name)) 4594 4595 4596 @tf_export("nn.erosion2d", v1=[]) 4597 def erosion2d_v2(value, 4598 filters, 4599 strides, 4600 padding, 4601 data_format, 4602 dilations, 4603 name=None): 4604 """Computes the grayscale erosion of 4-D `value` and 3-D `filters` tensors. 4605 4606 The `value` tensor has shape `[batch, in_height, in_width, depth]` and the 4607 `filters` tensor has shape `[filters_height, filters_width, depth]`, i.e., 4608 each input channel is processed independently of the others with its own 4609 structuring function. The `output` tensor has shape 4610 `[batch, out_height, out_width, depth]`. The spatial dimensions of the 4611 output tensor depend on the `padding` algorithm. We currently only support the 4612 default "NHWC" `data_format`. 4613 4614 In detail, the grayscale morphological 2-D erosion is given by: 4615 4616 output[b, y, x, c] = 4617 min_{dy, dx} value[b, 4618 strides[1] * y - dilations[1] * dy, 4619 strides[2] * x - dilations[2] * dx, 4620 c] - 4621 filters[dy, dx, c] 4622 4623 Duality: The erosion of `value` by the `filters` is equal to the negation of 4624 the dilation of `-value` by the reflected `filters`. 4625 4626 Args: 4627 value: A `Tensor`. 4-D with shape `[batch, in_height, in_width, depth]`. 4628 filters: A `Tensor`. Must have the same type as `value`. 4629 3-D with shape `[filters_height, filters_width, depth]`. 4630 strides: A list of `ints` that has length `>= 4`. 4631 1-D of length 4. The stride of the sliding window for each dimension of 4632 the input tensor. Must be: `[1, stride_height, stride_width, 1]`. 4633 padding: A `string` from: `"SAME", "VALID"`. 4634 The type of padding algorithm to use. 4635 data_format: A `string`, only `"NHWC"` is currently supported. 4636 dilations: A list of `ints` that has length `>= 4`. 4637 1-D of length 4. The input stride for atrous morphological dilation. 4638 Must be: `[1, rate_height, rate_width, 1]`. 4639 name: A name for the operation (optional). If not specified "erosion2d" 4640 is used. 4641 4642 Returns: 4643 A `Tensor`. Has the same type as `value`. 4644 4-D with shape `[batch, out_height, out_width, depth]`. 4645 4646 Raises: 4647 ValueError: If the `value` depth does not match `filters`' shape, or if 4648 padding is other than `'VALID'` or `'SAME'`. 4649 """ 4650 if data_format != "NHWC": 4651 raise ValueError("Data formats other than NHWC are not yet supported") 4652 4653 with ops.name_scope(name, "erosion2d", [value, filters]) as name: 4654 # Reduce erosion to dilation by duality. 4655 return math_ops.negative( 4656 gen_nn_ops.dilation2d( 4657 input=math_ops.negative(value), 4658 filter=array_ops.reverse_v2(filters, [0, 1]), 4659 strides=strides, 4660 rates=dilations, 4661 padding=padding, 4662 name=name)) 4663 4664 4665 @tf_export(v1=["math.in_top_k", "nn.in_top_k"]) 4666 def in_top_k(predictions, targets, k, name=None): 4667 r"""Says whether the targets are in the top `K` predictions. 4668 4669 This outputs a `batch_size` bool array, an entry `out[i]` is `true` if the 4670 prediction for the target class is among the top `k` predictions among 4671 all predictions for example `i`. Note that the behavior of `InTopK` differs 4672 from the `TopK` op in its handling of ties; if multiple classes have the 4673 same prediction value and straddle the top-`k` boundary, all of those 4674 classes are considered to be in the top `k`. 4675 4676 More formally, let 4677 4678 \\(predictions_i\\) be the predictions for all classes for example `i`, 4679 \\(targets_i\\) be the target class for example `i`, 4680 \\(out_i\\) be the output for example `i`, 4681 4682 $$out_i = predictions_{i, targets_i} \in TopKIncludingTies(predictions_i)$$ 4683 4684 Args: 4685 predictions: A `Tensor` of type `float32`. 4686 A `batch_size` x `classes` tensor. 4687 targets: A `Tensor`. Must be one of the following types: `int32`, `int64`. 4688 A `batch_size` vector of class ids. 4689 k: An `int`. Number of top elements to look at for computing precision. 4690 name: A name for the operation (optional). 4691 4692 Returns: 4693 A `Tensor` of type `bool`. Computed Precision at `k` as a `bool Tensor`. 4694 """ 4695 with ops.name_scope(name, "in_top_k"): 4696 return gen_nn_ops.in_top_kv2(predictions, targets, k, name=name) 4697 4698 4699 @tf_export("math.in_top_k", "nn.in_top_k", v1=[]) 4700 def in_top_k_v2(targets, predictions, k, name=None): 4701 return in_top_k(predictions, targets, k, name) 4702 4703 4704 in_top_k_v2.__doc__ = in_top_k.__doc__ 4705 4706 4707 tf_export(v1=["nn.quantized_avg_pool"])(gen_nn_ops.quantized_avg_pool) 4708 tf_export(v1=["nn.quantized_conv2d"])(gen_nn_ops.quantized_conv2d) 4709 tf_export(v1=["nn.quantized_relu_x"])(gen_nn_ops.quantized_relu_x) 4710 tf_export(v1=["nn.quantized_max_pool"])(gen_nn_ops.quantized_max_pool) 4711