1 # Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 # 3 # Licensed under the Apache License, Version 2.0 (the "License"); 4 # you may not use this file except in compliance with the License. 5 # You may obtain a copy of the License at 6 # 7 # http://www.apache.org/licenses/LICENSE-2.0 8 # 9 # Unless required by applicable law or agreed to in writing, software 10 # distributed under the License is distributed on an "AS IS" BASIS, 11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 # See the License for the specific language governing permissions and 13 # limitations under the License. 14 # ============================================================================== 15 """Wrappers for primitive Neural Net (NN) Operations.""" 16 17 from __future__ import absolute_import 18 from __future__ import division 19 from __future__ import print_function 20 21 import numbers 22 23 import numpy as np 24 25 from tensorflow.python.eager import context 26 from tensorflow.python.framework import dtypes 27 from tensorflow.python.framework import graph_util 28 from tensorflow.python.framework import ops 29 from tensorflow.python.framework import tensor_shape 30 from tensorflow.python.framework import tensor_util 31 from tensorflow.python.ops import array_ops 32 from tensorflow.python.ops import gen_nn_ops 33 from tensorflow.python.ops import math_ops 34 from tensorflow.python.ops import random_ops 35 36 # go/tf-wildcard-import 37 # pylint: disable=wildcard-import 38 from tensorflow.python.ops.gen_nn_ops import * 39 # pylint: enable=wildcard-import 40 41 from tensorflow.python.util import deprecation 42 from tensorflow.python.util.tf_export import tf_export 43 44 # Aliases for some automatically-generated names. 45 local_response_normalization = gen_nn_ops.lrn 46 47 # pylint: disable=protected-access 48 49 50 def _non_atrous_convolution( 51 input, # pylint: disable=redefined-builtin 52 filter, # pylint: disable=redefined-builtin 53 padding, 54 data_format=None, # pylint: disable=redefined-builtin 55 strides=None, 56 name=None): 57 """Computes sums of N-D convolutions (actually cross correlation). 58 59 It is required that 1 <= N <= 3. 60 61 This is used to implement the more generic `convolution` function, which 62 extends the interface of this function with a `dilation_rate` parameter. 63 64 Args: 65 66 input: Rank N+2 tensor of type T of shape 67 `[batch_size] + input_spatial_shape + [in_channels]` if `data_format` 68 does not start with `"NC"`, or 69 `[batch_size, in_channels] + input_spatial_shape` if `data_format` starts 70 with `"NC"`. 71 filter: Rank N+2 tensor of type T of shape 72 `filter_spatial_shape + [in_channels, out_channels]`. Rank of either 73 `input` or `filter` must be known. 74 padding: Padding method to use, must be either "VALID" or "SAME". 75 data_format: A string or None. Specifies whether the channel dimension of 76 the `input` and output is the last dimension (default, or if `data_format` 77 does not start with "NC"), or the second dimension (if `data_format` 78 starts with "NC"). For N=1, the valid values are "NWC" (default) and 79 "NCW". For N=2, the valid values are "NHWC" (default) and "NCHW". 80 For N=3, the valid values are "NDHWC" (default) and "NCDHW". 81 strides: Sequence of N positive integers, defaults to `[1] * N`. 82 name: Name prefix to use. 83 84 Returns: 85 Rank N+2 tensor of type T of shape 86 `[batch_size] + output_spatial_shape + [out_channels]`, where 87 if padding == "SAME": 88 output_spatial_shape = input_spatial_shape 89 if padding == "VALID": 90 output_spatial_shape = input_spatial_shape - filter_spatial_shape + 1. 91 92 Raises: 93 ValueError: if ranks are incompatible. 94 95 """ 96 with ops.name_scope(name, "non_atrous_convolution", [input, filter]) as scope: 97 input = ops.convert_to_tensor(input, name="input") # pylint: disable=redefined-builtin 98 input_shape = input.get_shape() 99 filter = ops.convert_to_tensor(filter, name="filter") # pylint: disable=redefined-builtin 100 filter_shape = filter.get_shape() 101 op = _NonAtrousConvolution( 102 input_shape, 103 filter_shape=filter_shape, 104 padding=padding, 105 data_format=data_format, 106 strides=strides, 107 name=scope) 108 return op(input, filter) 109 110 111 class _NonAtrousConvolution(object): 112 """Helper class for _non_atrous_convolution. 113 114 Note that this class assumes that shapes of input and filter passed to 115 __call__ are compatible with input_shape and filter_shape passed to the 116 constructor. 117 118 Arguments: 119 input_shape: static input shape, i.e. input.get_shape(). 120 filter_shape: static filter shape, i.e. filter.get_shape(). 121 padding: see _non_atrous_convolution. 122 data_format: see _non_atrous_convolution. 123 strides: see _non_atrous_convolution. 124 name: see _non_atrous_convolution. 125 """ 126 127 def __init__( 128 self, 129 input_shape, 130 filter_shape, # pylint: disable=redefined-builtin 131 padding, 132 data_format=None, 133 strides=None, 134 name=None): 135 filter_shape = filter_shape.with_rank(input_shape.ndims) 136 self.padding = padding 137 self.name = name 138 input_shape = input_shape.with_rank(filter_shape.ndims) 139 if input_shape.ndims is None: 140 raise ValueError("Rank of convolution must be known") 141 if input_shape.ndims < 3 or input_shape.ndims > 5: 142 raise ValueError( 143 "`input` and `filter` must have rank at least 3 and at most 5") 144 conv_dims = input_shape.ndims - 2 145 if strides is None: 146 strides = [1] * conv_dims 147 elif len(strides) != conv_dims: 148 raise ValueError("len(strides)=%d, but should be %d" % (len(strides), 149 conv_dims)) 150 if conv_dims == 1: 151 # conv1d uses the 2-d data format names 152 if data_format is None or data_format == "NWC": 153 data_format_2d = "NHWC" 154 elif data_format == "NCW": 155 data_format_2d = "NCHW" 156 else: 157 raise ValueError("data_format must be \"NWC\" or \"NCW\".") 158 self.strides = strides[0] 159 self.data_format = data_format_2d 160 self.conv_op = self._conv1d 161 elif conv_dims == 2: 162 if data_format is None or data_format == "NHWC": 163 data_format = "NHWC" 164 strides = [1] + list(strides) + [1] 165 elif data_format == "NCHW": 166 strides = [1, 1] + list(strides) 167 else: 168 raise ValueError("data_format must be \"NHWC\" or \"NCHW\".") 169 self.strides = strides 170 self.data_format = data_format 171 self.conv_op = gen_nn_ops.conv2d 172 elif conv_dims == 3: 173 if data_format is None or data_format == "NDHWC": 174 strides = [1] + list(strides) + [1] 175 elif data_format == "NCDHW": 176 strides = [1, 1] + list(strides) 177 else: 178 raise ValueError("data_format must be \"NDHWC\" or \"NCDHW\". Have: %s" 179 % data_format) 180 self.strides = strides 181 self.data_format = data_format 182 self.conv_op = gen_nn_ops.conv3d 183 184 # Note that we need this adapter since argument names for conv1d don't match 185 # those for gen_nn_ops.conv2d and gen_nn_ops.conv3d. 186 # pylint: disable=redefined-builtin 187 def _conv1d(self, input, filter, strides, padding, data_format, name): 188 return conv1d( 189 value=input, 190 filters=filter, 191 stride=strides, 192 padding=padding, 193 data_format=data_format, 194 name=name) 195 196 # pylint: enable=redefined-builtin 197 198 def __call__(self, inp, filter): # pylint: disable=redefined-builtin 199 return self.conv_op( 200 input=inp, 201 filter=filter, 202 strides=self.strides, 203 padding=self.padding, 204 data_format=self.data_format, 205 name=self.name) 206 207 208 @tf_export("nn.with_space_to_batch") 209 def with_space_to_batch( 210 input, # pylint: disable=redefined-builtin 211 dilation_rate, 212 padding, 213 op, 214 filter_shape=None, 215 spatial_dims=None, 216 data_format=None): 217 """Performs `op` on the space-to-batch representation of `input`. 218 219 This has the effect of transforming sliding window operations into the 220 corresponding "atrous" operation in which the input is sampled at the 221 specified `dilation_rate`. 222 223 In the special case that `dilation_rate` is uniformly 1, this simply returns: 224 225 op(input, num_spatial_dims, padding) 226 227 Otherwise, it returns: 228 229 batch_to_space_nd( 230 op(space_to_batch_nd(input, adjusted_dilation_rate, adjusted_paddings), 231 num_spatial_dims, 232 "VALID") 233 adjusted_dilation_rate, 234 adjusted_crops), 235 236 where: 237 238 adjusted_dilation_rate is an int64 tensor of shape [max(spatial_dims)], 239 adjusted_{paddings,crops} are int64 tensors of shape [max(spatial_dims), 2] 240 241 defined as follows: 242 243 We first define two int64 tensors `paddings` and `crops` of shape 244 `[num_spatial_dims, 2]` based on the value of `padding` and the spatial 245 dimensions of the `input`: 246 247 If `padding = "VALID"`, then: 248 249 paddings, crops = required_space_to_batch_paddings( 250 input_shape[spatial_dims], 251 dilation_rate) 252 253 If `padding = "SAME"`, then: 254 255 dilated_filter_shape = 256 filter_shape + (filter_shape - 1) * (dilation_rate - 1) 257 258 paddings, crops = required_space_to_batch_paddings( 259 input_shape[spatial_dims], 260 dilation_rate, 261 [(dilated_filter_shape - 1) // 2, 262 dilated_filter_shape - 1 - (dilated_filter_shape - 1) // 2]) 263 264 Because `space_to_batch_nd` and `batch_to_space_nd` assume that the spatial 265 dimensions are contiguous starting at the second dimension, but the specified 266 `spatial_dims` may not be, we must adjust `dilation_rate`, `paddings` and 267 `crops` in order to be usable with these operations. For a given dimension, 268 if the block size is 1, and both the starting and ending padding and crop 269 amounts are 0, then space_to_batch_nd effectively leaves that dimension alone, 270 which is what is needed for dimensions not part of `spatial_dims`. 271 Furthermore, `space_to_batch_nd` and `batch_to_space_nd` handle this case 272 efficiently for any number of leading and trailing dimensions. 273 274 For 0 <= i < len(spatial_dims), we assign: 275 276 adjusted_dilation_rate[spatial_dims[i] - 1] = dilation_rate[i] 277 adjusted_paddings[spatial_dims[i] - 1, :] = paddings[i, :] 278 adjusted_crops[spatial_dims[i] - 1, :] = crops[i, :] 279 280 All unassigned values of `adjusted_dilation_rate` default to 1, while all 281 unassigned values of `adjusted_paddings` and `adjusted_crops` default to 0. 282 283 Note in the case that `dilation_rate` is not uniformly 1, specifying "VALID" 284 padding is equivalent to specifying `padding = "SAME"` with a filter_shape of 285 `[1]*N`. 286 287 Advanced usage. Note the following optimization: A sequence of 288 `with_space_to_batch` operations with identical (not uniformly 1) 289 `dilation_rate` parameters and "VALID" padding 290 291 net = with_space_to_batch(net, dilation_rate, "VALID", op_1) 292 ... 293 net = with_space_to_batch(net, dilation_rate, "VALID", op_k) 294 295 can be combined into a single `with_space_to_batch` operation as follows: 296 297 def combined_op(converted_input, num_spatial_dims, _): 298 result = op_1(converted_input, num_spatial_dims, "VALID") 299 ... 300 result = op_k(result, num_spatial_dims, "VALID") 301 302 net = with_space_to_batch(net, dilation_rate, "VALID", combined_op) 303 304 This eliminates the overhead of `k-1` calls to `space_to_batch_nd` and 305 `batch_to_space_nd`. 306 307 Similarly, a sequence of `with_space_to_batch` operations with identical (not 308 uniformly 1) `dilation_rate` parameters, "SAME" padding, and odd filter 309 dimensions 310 311 net = with_space_to_batch(net, dilation_rate, "SAME", op_1, filter_shape_1) 312 ... 313 net = with_space_to_batch(net, dilation_rate, "SAME", op_k, filter_shape_k) 314 315 can be combined into a single `with_space_to_batch` operation as follows: 316 317 def combined_op(converted_input, num_spatial_dims, _): 318 result = op_1(converted_input, num_spatial_dims, "SAME") 319 ... 320 result = op_k(result, num_spatial_dims, "SAME") 321 322 net = with_space_to_batch(net, dilation_rate, "VALID", combined_op) 323 324 Args: 325 input: Tensor of rank > max(spatial_dims). 326 dilation_rate: int32 Tensor of *known* shape [num_spatial_dims]. 327 padding: str constant equal to "VALID" or "SAME" 328 op: Function that maps (input, num_spatial_dims, padding) -> output 329 filter_shape: If padding = "SAME", specifies the shape of the convolution 330 kernel/pooling window as an integer Tensor of shape [>=num_spatial_dims]. 331 If padding = "VALID", filter_shape is ignored and need not be specified. 332 spatial_dims: Monotonically increasing sequence of `num_spatial_dims` 333 integers (which are >= 1) specifying the spatial dimensions of `input` 334 and output. Defaults to: `range(1, num_spatial_dims+1)`. 335 data_format: A string or None. Specifies whether the channel dimension of 336 the `input` and output is the last dimension (default, or if `data_format` 337 does not start with "NC"), or the second dimension (if `data_format` 338 starts with "NC"). For N=1, the valid values are "NWC" (default) and 339 "NCW". For N=2, the valid values are "NHWC" (default) and "NCHW". 340 For N=3, the valid values are "NDHWC" (default) and "NCDHW". 341 342 Returns: 343 The output Tensor as described above, dimensions will vary based on the op 344 provided. 345 346 Raises: 347 ValueError: if `padding` is invalid or the arguments are incompatible. 348 ValueError: if `spatial_dims` are invalid. 349 350 """ 351 input = ops.convert_to_tensor(input, name="input") # pylint: disable=redefined-builtin 352 input_shape = input.get_shape() 353 354 def build_op(num_spatial_dims, padding): 355 return lambda inp, _: op(inp, num_spatial_dims, padding) 356 357 new_op = _WithSpaceToBatch( 358 input_shape, 359 dilation_rate, 360 padding, 361 build_op, 362 filter_shape=filter_shape, 363 spatial_dims=spatial_dims, 364 data_format=data_format) 365 return new_op(input, None) 366 367 368 class _WithSpaceToBatch(object): 369 """Helper class for with_space_to_batch. 370 371 Note that this class assumes that shapes of input and filter passed to 372 __call__ are compatible with input_shape and filter_shape passed to the 373 constructor. 374 375 Arguments 376 input_shape: static shape of input. i.e. input.get_shape(). 377 dilation_rate: see with_space_to_batch 378 padding: see with_space_to_batch 379 build_op: Function that maps (num_spatial_dims, paddings) -> (function that 380 maps (input, filter) -> output). 381 filter_shape: see with_space_to_batch 382 spatial_dims: see with_space_to_batch 383 data_format: see with_space_to_batch 384 """ 385 386 def __init__(self, 387 input_shape, 388 dilation_rate, 389 padding, 390 build_op, 391 filter_shape=None, 392 spatial_dims=None, 393 data_format=None): 394 """Helper class for _with_space_to_batch.""" 395 dilation_rate = ops.convert_to_tensor( 396 dilation_rate, dtypes.int32, name="dilation_rate") 397 try: 398 rate_shape = dilation_rate.get_shape().with_rank(1) 399 except ValueError: 400 raise ValueError("rate must be rank 1") 401 402 if not dilation_rate.get_shape().is_fully_defined(): 403 raise ValueError("rate must have known shape") 404 405 num_spatial_dims = rate_shape[0].value 406 407 if data_format is not None and data_format.startswith("NC"): 408 starting_spatial_dim = 2 409 else: 410 starting_spatial_dim = 1 411 412 if spatial_dims is None: 413 spatial_dims = range(starting_spatial_dim, 414 num_spatial_dims + starting_spatial_dim) 415 orig_spatial_dims = list(spatial_dims) 416 spatial_dims = sorted(set(int(x) for x in orig_spatial_dims)) 417 if spatial_dims != orig_spatial_dims or any(x < 1 for x in spatial_dims): 418 raise ValueError( 419 "spatial_dims must be a montonically increasing sequence of positive " 420 "integers") # pylint: disable=line-too-long 421 422 if data_format is not None and data_format.startswith("NC"): 423 expected_input_rank = spatial_dims[-1] 424 else: 425 expected_input_rank = spatial_dims[-1] + 1 426 427 try: 428 input_shape.with_rank_at_least(expected_input_rank) 429 except ValueError: 430 ValueError("input tensor must have rank %d at least" % 431 (expected_input_rank)) 432 433 const_rate = tensor_util.constant_value(dilation_rate) 434 rate_or_const_rate = dilation_rate 435 if const_rate is not None: 436 rate_or_const_rate = const_rate 437 if np.any(const_rate < 1): 438 raise ValueError("dilation_rate must be positive") 439 if np.all(const_rate == 1): 440 self.call = build_op(num_spatial_dims, padding) 441 return 442 443 # We have two padding contributions. The first is used for converting "SAME" 444 # to "VALID". The second is required so that the height and width of the 445 # zero-padded value tensor are multiples of rate. 446 447 # Padding required to reduce to "VALID" convolution 448 if padding == "SAME": 449 if filter_shape is None: 450 raise ValueError("filter_shape must be specified for SAME padding") 451 filter_shape = ops.convert_to_tensor(filter_shape, name="filter_shape") 452 const_filter_shape = tensor_util.constant_value(filter_shape) 453 if const_filter_shape is not None: 454 filter_shape = const_filter_shape 455 self.base_paddings = _with_space_to_batch_base_paddings( 456 const_filter_shape, num_spatial_dims, rate_or_const_rate) 457 else: 458 self.num_spatial_dims = num_spatial_dims 459 self.rate_or_const_rate = rate_or_const_rate 460 self.base_paddings = None 461 elif padding == "VALID": 462 self.base_paddings = np.zeros([num_spatial_dims, 2], np.int32) 463 else: 464 raise ValueError("Invalid padding method %r" % padding) 465 466 self.input_shape = input_shape 467 self.spatial_dims = spatial_dims 468 self.dilation_rate = dilation_rate 469 self.data_format = data_format 470 self.op = build_op(num_spatial_dims, "VALID") 471 self.call = self._with_space_to_batch_call 472 473 def _with_space_to_batch_call(self, inp, filter): # pylint: disable=redefined-builtin 474 """Call functionality for with_space_to_batch.""" 475 # Handle input whose shape is unknown during graph creation. 476 input_spatial_shape = None 477 input_shape = self.input_shape 478 spatial_dims = self.spatial_dims 479 if input_shape.ndims is not None: 480 input_shape_list = input_shape.as_list() 481 input_spatial_shape = [input_shape_list[i] for i in spatial_dims] 482 if input_spatial_shape is None or None in input_spatial_shape: 483 input_shape_tensor = array_ops.shape(inp) 484 input_spatial_shape = array_ops.stack( 485 [input_shape_tensor[i] for i in spatial_dims]) 486 487 base_paddings = self.base_paddings 488 if base_paddings is None: 489 # base_paddings could not be computed at build time since static filter 490 # shape was not fully defined. 491 filter_shape = array_ops.shape(filter) 492 base_paddings = _with_space_to_batch_base_paddings( 493 filter_shape, self.num_spatial_dims, self.rate_or_const_rate) 494 paddings, crops = array_ops.required_space_to_batch_paddings( 495 input_shape=input_spatial_shape, 496 base_paddings=base_paddings, 497 block_shape=self.dilation_rate) 498 499 dilation_rate = _with_space_to_batch_adjust(self.dilation_rate, 1, 500 spatial_dims) 501 paddings = _with_space_to_batch_adjust(paddings, 0, spatial_dims) 502 crops = _with_space_to_batch_adjust(crops, 0, spatial_dims) 503 input_converted = array_ops.space_to_batch_nd( 504 input=inp, block_shape=dilation_rate, paddings=paddings) 505 506 result = self.op(input_converted, filter) 507 508 result_converted = array_ops.batch_to_space_nd( 509 input=result, block_shape=dilation_rate, crops=crops) 510 511 # Recover channel information for output shape if channels are not last. 512 if self.data_format is not None and self.data_format.startswith("NC"): 513 if not result_converted.shape[1].value: 514 output_shape = result_converted.shape.as_list() 515 output_shape[1] = filter.shape[-1] 516 result_converted.set_shape(output_shape) 517 518 return result_converted 519 520 def __call__(self, inp, filter): # pylint: disable=redefined-builtin 521 return self.call(inp, filter) 522 523 524 def _with_space_to_batch_base_paddings(filter_shape, num_spatial_dims, 525 rate_or_const_rate): 526 """Helper function to compute base_paddings.""" 527 # Spatial dimensions of the filters and the upsampled filters in which we 528 # introduce (rate - 1) zeros between consecutive filter values. 529 filter_spatial_shape = filter_shape[:num_spatial_dims] 530 dilated_filter_spatial_shape = ( 531 filter_spatial_shape + (filter_spatial_shape - 1) * 532 (rate_or_const_rate - 1)) 533 pad_extra_shape = dilated_filter_spatial_shape - 1 534 535 # When full_padding_shape is odd, we pad more at end, following the same 536 # convention as conv2d. 537 pad_extra_start = pad_extra_shape // 2 538 pad_extra_end = pad_extra_shape - pad_extra_start 539 base_paddings = array_ops.stack( 540 [[pad_extra_start[i], pad_extra_end[i]] for i in range(num_spatial_dims)]) 541 return base_paddings 542 543 544 def _with_space_to_batch_adjust(orig, fill_value, spatial_dims): 545 """Returns an `adjusted` version of `orig` based on `spatial_dims`. 546 547 Tensor of the same type as `orig` and with shape 548 `[max(spatial_dims), ...]` where: 549 550 adjusted[spatial_dims[i] - 1, ...] = orig[i, ...] 551 552 for 0 <= i < len(spatial_dims), and 553 554 adjusted[j, ...] = fill_value 555 556 for j != spatial_dims[i] - 1 for some i. 557 558 If `orig` is a constant value, then the result will be a constant value. 559 560 Args: 561 orig: Tensor of rank > max(spatial_dims). 562 fill_value: Numpy scalar (of same data type as `orig) specifying the fill 563 value for non-spatial dimensions. 564 spatial_dims: See with_space_to_batch. 565 566 Returns: 567 `adjusted` tensor. 568 """ 569 fill_dims = orig.get_shape().as_list()[1:] 570 dtype = orig.dtype.as_numpy_dtype 571 parts = [] 572 const_orig = tensor_util.constant_value(orig) 573 const_or_orig = const_orig if const_orig is not None else orig 574 prev_spatial_dim = 0 575 i = 0 576 while i < len(spatial_dims): 577 start_i = i 578 start_spatial_dim = spatial_dims[i] 579 if start_spatial_dim > 1: 580 # Fill in any gap from the previous spatial dimension (or dimension 1 if 581 # this is the first spatial dimension) with `fill_value`. 582 parts.append( 583 np.full( 584 [start_spatial_dim - 1 - prev_spatial_dim] + fill_dims, 585 fill_value, 586 dtype=dtype)) 587 # Find the largest value of i such that: 588 # [spatial_dims[start_i], ..., spatial_dims[i]] 589 # == [start_spatial_dim, ..., start_spatial_dim + i - start_i], 590 # i.e. the end of a contiguous group of spatial dimensions. 591 while (i + 1 < len(spatial_dims) and 592 spatial_dims[i + 1] == spatial_dims[i] + 1): 593 i += 1 594 parts.append(const_or_orig[start_i:i + 1]) 595 prev_spatial_dim = spatial_dims[i] 596 i += 1 597 if const_orig is not None: 598 return np.concatenate(parts) 599 else: 600 return array_ops.concat(parts, 0) 601 602 603 def _get_strides_and_dilation_rate(num_spatial_dims, strides, dilation_rate): 604 """Helper function for verifying strides and dilation_rate arguments. 605 606 This is used by `convolution` and `pool`. 607 608 Args: 609 num_spatial_dims: int 610 strides: Optional. List of N ints >= 1. Defaults to [1]*N. If any value 611 of strides is > 1, then all values of dilation_rate must be 1. 612 dilation_rate: Optional. List of N ints >= 1. Defaults to [1]*N. If any 613 value of dilation_rate is > 1, then all values of strides must be 1. 614 615 Returns: 616 Normalized (strides, dilation_rate) as int32 numpy arrays of shape 617 [num_spatial_dims]. 618 619 Raises: 620 ValueError: if the parameters are invalid. 621 """ 622 if dilation_rate is None: 623 dilation_rate = [1] * num_spatial_dims 624 elif len(dilation_rate) != num_spatial_dims: 625 raise ValueError("len(dilation_rate)=%d but should be %d" % 626 (len(dilation_rate), num_spatial_dims)) 627 dilation_rate = np.array(dilation_rate, dtype=np.int32) 628 if np.any(dilation_rate < 1): 629 raise ValueError("all values of dilation_rate must be positive") 630 631 if strides is None: 632 strides = [1] * num_spatial_dims 633 elif len(strides) != num_spatial_dims: 634 raise ValueError("len(strides)=%d but should be %d" % (len(strides), 635 num_spatial_dims)) 636 strides = np.array(strides, dtype=np.int32) 637 if np.any(strides < 1): 638 raise ValueError("all values of strides must be positive") 639 640 if np.any(strides > 1) and np.any(dilation_rate > 1): 641 raise ValueError( 642 "strides > 1 not supported in conjunction with dilation_rate > 1") 643 return strides, dilation_rate 644 645 646 @tf_export("nn.convolution") 647 def convolution( 648 input, # pylint: disable=redefined-builtin 649 filter, # pylint: disable=redefined-builtin 650 padding, 651 strides=None, 652 dilation_rate=None, 653 name=None, 654 data_format=None): 655 # pylint: disable=line-too-long 656 """Computes sums of N-D convolutions (actually cross-correlation). 657 658 This also supports either output striding via the optional `strides` parameter 659 or atrous convolution (also known as convolution with holes or dilated 660 convolution, based on the French word "trous" meaning holes in English) via 661 the optional `dilation_rate` parameter. Currently, however, output striding 662 is not supported for atrous convolutions. 663 664 Specifically, in the case that `data_format` does not start with "NC", given 665 a rank (N+2) `input` Tensor of shape 666 667 [num_batches, 668 input_spatial_shape[0], 669 ..., 670 input_spatial_shape[N-1], 671 num_input_channels], 672 673 a rank (N+2) `filter` Tensor of shape 674 675 [spatial_filter_shape[0], 676 ..., 677 spatial_filter_shape[N-1], 678 num_input_channels, 679 num_output_channels], 680 681 an optional `dilation_rate` tensor of shape [N] (defaulting to [1]*N) 682 specifying the filter upsampling/input downsampling rate, and an optional list 683 of N `strides` (defaulting [1]*N), this computes for each N-D spatial output 684 position (x[0], ..., x[N-1]): 685 686 ``` 687 output[b, x[0], ..., x[N-1], k] = 688 sum_{z[0], ..., z[N-1], q} 689 filter[z[0], ..., z[N-1], q, k] * 690 padded_input[b, 691 x[0]*strides[0] + dilation_rate[0]*z[0], 692 ..., 693 x[N-1]*strides[N-1] + dilation_rate[N-1]*z[N-1], 694 q] 695 ``` 696 where b is the index into the batch, k is the output channel number, q is the 697 input channel number, and z is the N-D spatial offset within the filter. Here, 698 `padded_input` is obtained by zero padding the input using an effective 699 spatial filter shape of `(spatial_filter_shape-1) * dilation_rate + 1` and 700 output striding `strides` as described in the 701 @{tf.nn.convolution$comment here}. 702 703 In the case that `data_format` does start with `"NC"`, the `input` and output 704 (but not the `filter`) are simply transposed as follows: 705 706 convolution(input, data_format, **kwargs) = 707 tf.transpose(convolution(tf.transpose(input, [0] + range(2,N+2) + [1]), 708 **kwargs), 709 [0, N+1] + range(1, N+1)) 710 711 It is required that 1 <= N <= 3. 712 713 Args: 714 input: An N-D `Tensor` of type `T`, of shape 715 `[batch_size] + input_spatial_shape + [in_channels]` if data_format does 716 not start with "NC" (default), or 717 `[batch_size, in_channels] + input_spatial_shape` if data_format starts 718 with "NC". 719 filter: An N-D `Tensor` with the same type as `input` and shape 720 `spatial_filter_shape + [in_channels, out_channels]`. 721 padding: A string, either `"VALID"` or `"SAME"`. The padding algorithm. 722 strides: Optional. Sequence of N ints >= 1. Specifies the output stride. 723 Defaults to [1]*N. If any value of strides is > 1, then all values of 724 dilation_rate must be 1. 725 dilation_rate: Optional. Sequence of N ints >= 1. Specifies the filter 726 upsampling/input downsampling rate. In the literature, the same parameter 727 is sometimes called `input stride` or `dilation`. The effective filter 728 size used for the convolution will be `spatial_filter_shape + 729 (spatial_filter_shape - 1) * (rate - 1)`, obtained by inserting 730 (dilation_rate[i]-1) zeros between consecutive elements of the original 731 filter in each spatial dimension i. If any value of dilation_rate is > 1, 732 then all values of strides must be 1. 733 name: Optional name for the returned tensor. 734 data_format: A string or None. Specifies whether the channel dimension of 735 the `input` and output is the last dimension (default, or if `data_format` 736 does not start with "NC"), or the second dimension (if `data_format` 737 starts with "NC"). For N=1, the valid values are "NWC" (default) and 738 "NCW". For N=2, the valid values are "NHWC" (default) and "NCHW". 739 For N=3, the valid values are "NDHWC" (default) and "NCDHW". 740 741 Returns: 742 A `Tensor` with the same type as `input` of shape 743 744 `[batch_size] + output_spatial_shape + [out_channels]` 745 746 if data_format is None or does not start with "NC", or 747 748 `[batch_size, out_channels] + output_spatial_shape` 749 750 if data_format starts with "NC", 751 where `output_spatial_shape` depends on the value of `padding`. 752 753 If padding == "SAME": 754 output_spatial_shape[i] = ceil(input_spatial_shape[i] / strides[i]) 755 756 If padding == "VALID": 757 output_spatial_shape[i] = 758 ceil((input_spatial_shape[i] - 759 (spatial_filter_shape[i]-1) * dilation_rate[i]) 760 / strides[i]). 761 762 Raises: 763 ValueError: If input/output depth does not match `filter` shape, if padding 764 is other than `"VALID"` or `"SAME"`, or if data_format is invalid. 765 766 """ 767 # pylint: enable=line-too-long 768 with ops.name_scope(name, "convolution", [input, filter]) as name: 769 input = ops.convert_to_tensor(input, name="input") # pylint: disable=redefined-builtin 770 input_shape = input.get_shape() 771 filter = ops.convert_to_tensor(filter, name="filter") # pylint: disable=redefined-builtin 772 filter_shape = filter.get_shape() 773 op = Convolution( 774 input_shape, 775 filter_shape, 776 padding, 777 strides=strides, 778 dilation_rate=dilation_rate, 779 name=name, 780 data_format=data_format) 781 return op(input, filter) 782 783 784 class Convolution(object): 785 """Helper class for convolution. 786 787 Note that this class assumes that shapes of input and filter passed to 788 __call__ are compatible with input_shape and filter_shape passed to the 789 constructor. 790 791 Arguments 792 input_shape: static shape of input. i.e. input.get_shape(). 793 filter_shape: static shape of the filter. i.e. filter.get_shape(). 794 padding: see convolution. 795 strides: see convolution. 796 dilation_rate: see convolution. 797 name: see convolution. 798 data_format: see convolution. 799 """ 800 801 def __init__(self, 802 input_shape, 803 filter_shape, 804 padding, 805 strides=None, 806 dilation_rate=None, 807 name=None, 808 data_format=None): 809 """Helper function for convolution.""" 810 num_total_dims = filter_shape.ndims 811 if num_total_dims is None: 812 num_total_dims = input_shape.ndims 813 if num_total_dims is None: 814 raise ValueError("rank of input or filter must be known") 815 816 num_spatial_dims = num_total_dims - 2 817 818 try: 819 input_shape.with_rank(num_spatial_dims + 2) 820 except ValueError: 821 ValueError("input tensor must have rank %d" % (num_spatial_dims + 2)) 822 823 try: 824 filter_shape.with_rank(num_spatial_dims + 2) 825 except ValueError: 826 ValueError("filter tensor must have rank %d" % (num_spatial_dims + 2)) 827 828 if data_format is None or not data_format.startswith("NC"): 829 input_channels_dim = input_shape[num_spatial_dims + 1] 830 spatial_dims = range(1, num_spatial_dims + 1) 831 else: 832 input_channels_dim = input_shape[1] 833 spatial_dims = range(2, num_spatial_dims + 2) 834 835 if not input_channels_dim.is_compatible_with( 836 filter_shape[num_spatial_dims]): 837 raise ValueError( 838 "number of input channels does not match corresponding dimension of " 839 "filter, {} != {}".format(input_channels_dim, 840 filter_shape[num_spatial_dims])) 841 842 strides, dilation_rate = _get_strides_and_dilation_rate( 843 num_spatial_dims, strides, dilation_rate) 844 845 self.input_shape = input_shape 846 self.filter_shape = filter_shape 847 self.data_format = data_format 848 self.strides = strides 849 self.name = name 850 self.conv_op = _WithSpaceToBatch( 851 input_shape, 852 dilation_rate=dilation_rate, 853 padding=padding, 854 build_op=self._build_op, 855 filter_shape=filter_shape, 856 spatial_dims=spatial_dims, 857 data_format=data_format) 858 859 def _build_op(self, _, padding): 860 return _NonAtrousConvolution( 861 self.input_shape, 862 filter_shape=self.filter_shape, 863 padding=padding, 864 data_format=self.data_format, 865 strides=self.strides, 866 name=self.name) 867 868 def __call__(self, inp, filter): # pylint: disable=redefined-builtin 869 return self.conv_op(inp, filter) 870 871 872 @tf_export("nn.pool") 873 def pool( 874 input, # pylint: disable=redefined-builtin 875 window_shape, 876 pooling_type, 877 padding, 878 dilation_rate=None, 879 strides=None, 880 name=None, 881 data_format=None): 882 # pylint: disable=line-too-long 883 """Performs an N-D pooling operation. 884 885 In the case that `data_format` does not start with "NC", computes for 886 0 <= b < batch_size, 887 0 <= x[i] < output_spatial_shape[i], 888 0 <= c < num_channels: 889 890 ``` 891 output[b, x[0], ..., x[N-1], c] = 892 REDUCE_{z[0], ..., z[N-1]} 893 input[b, 894 x[0] * strides[0] - pad_before[0] + dilation_rate[0]*z[0], 895 ... 896 x[N-1]*strides[N-1] - pad_before[N-1] + dilation_rate[N-1]*z[N-1], 897 c], 898 ``` 899 900 where the reduction function REDUCE depends on the value of `pooling_type`, 901 and pad_before is defined based on the value of `padding` as described in the 902 @{tf.nn.convolution$comment here}. 903 The reduction never includes out-of-bounds positions. 904 905 In the case that `data_format` starts with `"NC"`, the `input` and output are 906 simply transposed as follows: 907 908 ``` 909 pool(input, data_format, **kwargs) = 910 tf.transpose(pool(tf.transpose(input, [0] + range(2,N+2) + [1]), 911 **kwargs), 912 [0, N+1] + range(1, N+1)) 913 ``` 914 915 Args: 916 input: Tensor of rank N+2, of shape 917 `[batch_size] + input_spatial_shape + [num_channels]` if data_format does 918 not start with "NC" (default), or 919 `[batch_size, num_channels] + input_spatial_shape` if data_format starts 920 with "NC". Pooling happens over the spatial dimensions only. 921 window_shape: Sequence of N ints >= 1. 922 pooling_type: Specifies pooling operation, must be "AVG" or "MAX". 923 padding: The padding algorithm, must be "SAME" or "VALID". 924 See the @{tf.nn.convolution$comment here} 925 dilation_rate: Optional. Dilation rate. List of N ints >= 1. 926 Defaults to [1]*N. If any value of dilation_rate is > 1, then all values 927 of strides must be 1. 928 strides: Optional. Sequence of N ints >= 1. Defaults to [1]*N. 929 If any value of strides is > 1, then all values of dilation_rate must be 930 1. 931 name: Optional. Name of the op. 932 data_format: A string or None. Specifies whether the channel dimension of 933 the `input` and output is the last dimension (default, or if `data_format` 934 does not start with "NC"), or the second dimension (if `data_format` 935 starts with "NC"). For N=1, the valid values are "NWC" (default) and 936 "NCW". For N=2, the valid values are "NHWC" (default) and "NCHW". 937 For N=3, the valid values are "NDHWC" (default) and "NCDHW". 938 939 Returns: 940 Tensor of rank N+2, of shape 941 [batch_size] + output_spatial_shape + [num_channels] 942 943 if data_format is None or does not start with "NC", or 944 945 [batch_size, num_channels] + output_spatial_shape 946 947 if data_format starts with "NC", 948 where `output_spatial_shape` depends on the value of padding: 949 950 If padding = "SAME": 951 output_spatial_shape[i] = ceil(input_spatial_shape[i] / strides[i]) 952 953 If padding = "VALID": 954 output_spatial_shape[i] = 955 ceil((input_spatial_shape[i] - (window_shape[i] - 1) * dilation_rate[i]) 956 / strides[i]). 957 958 Raises: 959 ValueError: if arguments are invalid. 960 961 """ 962 # pylint: enable=line-too-long 963 with ops.name_scope(name, "%s_pool" % (pooling_type.lower()), 964 [input]) as scope: 965 input = ops.convert_to_tensor(input, name="input") # pylint: disable=redefined-builtin 966 967 num_spatial_dims = len(window_shape) 968 if num_spatial_dims < 1 or num_spatial_dims > 3: 969 raise ValueError("It is required that 1 <= num_spatial_dims <= 3.") 970 971 input.get_shape().with_rank(num_spatial_dims + 2) 972 973 strides, dilation_rate = _get_strides_and_dilation_rate( 974 num_spatial_dims, strides, dilation_rate) 975 976 if padding == "SAME" and np.any(dilation_rate > 1): 977 raise ValueError( 978 "pooling with SAME padding is not implemented for dilation_rate > 1") 979 980 if np.any(strides > window_shape): 981 raise ValueError( 982 "strides > window_shape not supported due to inconsistency between " 983 "CPU and GPU implementations") 984 985 pooling_ops = { 986 ("MAX", 1): max_pool, 987 ("MAX", 2): max_pool, 988 ("MAX", 3): max_pool3d, # pylint: disable=undefined-variable 989 ("AVG", 1): avg_pool, 990 ("AVG", 2): avg_pool, 991 ("AVG", 3): avg_pool3d, # pylint: disable=undefined-variable 992 } 993 op_key = (pooling_type, num_spatial_dims) 994 if op_key not in pooling_ops: 995 raise ValueError("%d-D %s pooling is not supported." % (op_key[1], 996 op_key[0])) 997 998 if data_format is None or not data_format.startswith("NC"): 999 adjusted_window_shape = [1] + list(window_shape) + [1] 1000 adjusted_strides = [1] + list(strides) + [1] 1001 spatial_dims = range(1, num_spatial_dims + 1) 1002 else: 1003 adjusted_window_shape = [1, 1] + list(window_shape) 1004 adjusted_strides = [1, 1] + list(strides) 1005 spatial_dims = range(2, num_spatial_dims + 2) 1006 1007 if num_spatial_dims == 1: 1008 if data_format is None or data_format == "NWC": 1009 data_format_kwargs = dict(data_format="NHWC") 1010 elif data_format == "NCW": 1011 data_format_kwargs = dict(data_format="NCHW") 1012 else: 1013 raise ValueError("data_format must be either \"NWC\" or \"NCW\".") 1014 adjusted_window_shape = [1] + adjusted_window_shape 1015 adjusted_strides = [1] + adjusted_strides 1016 else: 1017 data_format_kwargs = dict(data_format=data_format) 1018 1019 def op(converted_input, _, converted_padding): # pylint: disable=missing-docstring 1020 if num_spatial_dims == 1: 1021 converted_input = array_ops.expand_dims(converted_input, 1022 spatial_dims[0]) 1023 result = pooling_ops[op_key]( 1024 converted_input, 1025 adjusted_window_shape, 1026 adjusted_strides, 1027 converted_padding, 1028 name=scope, 1029 **data_format_kwargs) 1030 if num_spatial_dims == 1: 1031 result = array_ops.squeeze(result, [spatial_dims[0]]) 1032 return result 1033 1034 return with_space_to_batch( 1035 input=input, 1036 dilation_rate=dilation_rate, 1037 padding=padding, 1038 op=op, 1039 spatial_dims=spatial_dims, 1040 filter_shape=window_shape) 1041 1042 1043 @tf_export("nn.atrous_conv2d") 1044 def atrous_conv2d(value, filters, rate, padding, name=None): 1045 """Atrous convolution (a.k.a. 1046 1047 convolution with holes or dilated convolution). 1048 1049 This function is a simpler wrapper around the more general 1050 @{tf.nn.convolution}, and exists only for backwards compatibility. You can 1051 use @{tf.nn.convolution} to perform 1-D, 2-D, or 3-D atrous convolution. 1052 1053 1054 Computes a 2-D atrous convolution, also known as convolution with holes or 1055 dilated convolution, given 4-D `value` and `filters` tensors. If the `rate` 1056 parameter is equal to one, it performs regular 2-D convolution. If the `rate` 1057 parameter is greater than one, it performs convolution with holes, sampling 1058 the input values every `rate` pixels in the `height` and `width` dimensions. 1059 This is equivalent to convolving the input with a set of upsampled filters, 1060 produced by inserting `rate - 1` zeros between two consecutive values of the 1061 filters along the `height` and `width` dimensions, hence the name atrous 1062 convolution or convolution with holes (the French word trous means holes in 1063 English). 1064 1065 More specifically: 1066 1067 ``` 1068 output[batch, height, width, out_channel] = 1069 sum_{dheight, dwidth, in_channel} ( 1070 filters[dheight, dwidth, in_channel, out_channel] * 1071 value[batch, height + rate*dheight, width + rate*dwidth, in_channel] 1072 ) 1073 ``` 1074 1075 Atrous convolution allows us to explicitly control how densely to compute 1076 feature responses in fully convolutional networks. Used in conjunction with 1077 bilinear interpolation, it offers an alternative to `conv2d_transpose` in 1078 dense prediction tasks such as semantic image segmentation, optical flow 1079 computation, or depth estimation. It also allows us to effectively enlarge 1080 the field of view of filters without increasing the number of parameters or 1081 the amount of computation. 1082 1083 For a description of atrous convolution and how it can be used for dense 1084 feature extraction, please see: [Semantic Image Segmentation with Deep 1085 Convolutional Nets and Fully Connected CRFs](http://arxiv.org/abs/1412.7062). 1086 The same operation is investigated further in [Multi-Scale Context Aggregation 1087 by Dilated Convolutions](http://arxiv.org/abs/1511.07122). Previous works 1088 that effectively use atrous convolution in different ways are, among others, 1089 [OverFeat: Integrated Recognition, Localization and Detection using 1090 Convolutional Networks](http://arxiv.org/abs/1312.6229) and [Fast Image 1091 Scanning with Deep Max-Pooling Convolutional Neural 1092 Networks](http://arxiv.org/abs/1302.1700). 1093 Atrous convolution is also closely related to the so-called noble identities 1094 in multi-rate signal processing. 1095 1096 There are many different ways to implement atrous convolution (see the refs 1097 above). The implementation here reduces 1098 1099 ```python 1100 atrous_conv2d(value, filters, rate, padding=padding) 1101 ``` 1102 1103 to the following three operations: 1104 1105 ```python 1106 paddings = ... 1107 net = space_to_batch(value, paddings, block_size=rate) 1108 net = conv2d(net, filters, strides=[1, 1, 1, 1], padding="VALID") 1109 crops = ... 1110 net = batch_to_space(net, crops, block_size=rate) 1111 ``` 1112 1113 Advanced usage. Note the following optimization: A sequence of `atrous_conv2d` 1114 operations with identical `rate` parameters, 'SAME' `padding`, and filters 1115 with odd heights/ widths: 1116 1117 ```python 1118 net = atrous_conv2d(net, filters1, rate, padding="SAME") 1119 net = atrous_conv2d(net, filters2, rate, padding="SAME") 1120 ... 1121 net = atrous_conv2d(net, filtersK, rate, padding="SAME") 1122 ``` 1123 1124 can be equivalently performed cheaper in terms of computation and memory as: 1125 1126 ```python 1127 pad = ... # padding so that the input dims are multiples of rate 1128 net = space_to_batch(net, paddings=pad, block_size=rate) 1129 net = conv2d(net, filters1, strides=[1, 1, 1, 1], padding="SAME") 1130 net = conv2d(net, filters2, strides=[1, 1, 1, 1], padding="SAME") 1131 ... 1132 net = conv2d(net, filtersK, strides=[1, 1, 1, 1], padding="SAME") 1133 net = batch_to_space(net, crops=pad, block_size=rate) 1134 ``` 1135 1136 because a pair of consecutive `space_to_batch` and `batch_to_space` ops with 1137 the same `block_size` cancel out when their respective `paddings` and `crops` 1138 inputs are identical. 1139 1140 Args: 1141 value: A 4-D `Tensor` of type `float`. It needs to be in the default "NHWC" 1142 format. Its shape is `[batch, in_height, in_width, in_channels]`. 1143 filters: A 4-D `Tensor` with the same type as `value` and shape 1144 `[filter_height, filter_width, in_channels, out_channels]`. `filters`' 1145 `in_channels` dimension must match that of `value`. Atrous convolution is 1146 equivalent to standard convolution with upsampled filters with effective 1147 height `filter_height + (filter_height - 1) * (rate - 1)` and effective 1148 width `filter_width + (filter_width - 1) * (rate - 1)`, produced by 1149 inserting `rate - 1` zeros along consecutive elements across the 1150 `filters`' spatial dimensions. 1151 rate: A positive int32. The stride with which we sample input values across 1152 the `height` and `width` dimensions. Equivalently, the rate by which we 1153 upsample the filter values by inserting zeros across the `height` and 1154 `width` dimensions. In the literature, the same parameter is sometimes 1155 called `input stride` or `dilation`. 1156 padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm. 1157 name: Optional name for the returned tensor. 1158 1159 Returns: 1160 A `Tensor` with the same type as `value`. 1161 Output shape with `'VALID`` padding is: 1162 1163 [batch, height - 2 * (filter_width - 1), 1164 width - 2 * (filter_height - 1), out_channels]. 1165 1166 Output shape with `'SAME'` padding is: 1167 1168 [batch, height, width, out_channels]. 1169 1170 Raises: 1171 ValueError: If input/output depth does not match `filters`' shape, or if 1172 padding is other than `'VALID'` or `'SAME'`. 1173 """ 1174 return convolution( 1175 input=value, 1176 filter=filters, 1177 padding=padding, 1178 dilation_rate=np.broadcast_to(rate, (2,)), 1179 name=name) 1180 1181 1182 @tf_export("nn.conv2d_transpose") 1183 def conv2d_transpose( 1184 value, 1185 filter, # pylint: disable=redefined-builtin 1186 output_shape, 1187 strides, 1188 padding="SAME", 1189 data_format="NHWC", 1190 name=None): 1191 """The transpose of `conv2d`. 1192 1193 This operation is sometimes called "deconvolution" after [Deconvolutional 1194 Networks](http://www.matthewzeiler.com/pubs/cvpr2010/cvpr2010.pdf), but is 1195 actually the transpose (gradient) of `conv2d` rather than an actual 1196 deconvolution. 1197 1198 Args: 1199 value: A 4-D `Tensor` of type `float` and shape 1200 `[batch, height, width, in_channels]` for `NHWC` data format or 1201 `[batch, in_channels, height, width]` for `NCHW` data format. 1202 filter: A 4-D `Tensor` with the same type as `value` and shape 1203 `[height, width, output_channels, in_channels]`. `filter`'s 1204 `in_channels` dimension must match that of `value`. 1205 output_shape: A 1-D `Tensor` representing the output shape of the 1206 deconvolution op. 1207 strides: A list of ints. The stride of the sliding window for each 1208 dimension of the input tensor. 1209 padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm. 1210 See the @{tf.nn.convolution$comment here} 1211 data_format: A string. 'NHWC' and 'NCHW' are supported. 1212 name: Optional name for the returned tensor. 1213 1214 Returns: 1215 A `Tensor` with the same type as `value`. 1216 1217 Raises: 1218 ValueError: If input/output depth does not match `filter`'s shape, or if 1219 padding is other than `'VALID'` or `'SAME'`. 1220 """ 1221 with ops.name_scope(name, "conv2d_transpose", 1222 [value, filter, output_shape]) as name: 1223 if data_format not in ("NCHW", "NHWC"): 1224 raise ValueError("data_format has to be either NCHW or NHWC.") 1225 value = ops.convert_to_tensor(value, name="value") 1226 filter = ops.convert_to_tensor(filter, name="filter") # pylint: disable=redefined-builtin 1227 axis = 3 if data_format == "NHWC" else 1 1228 if not value.get_shape()[axis].is_compatible_with(filter.get_shape()[3]): 1229 raise ValueError("input channels does not match filter's input channels, " 1230 "{} != {}".format(value.get_shape()[axis], 1231 filter.get_shape()[3])) 1232 1233 output_shape_ = ops.convert_to_tensor(output_shape, name="output_shape") 1234 if not output_shape_.get_shape().is_compatible_with(tensor_shape.vector(4)): 1235 raise ValueError("output_shape must have shape (4,), got {}".format( 1236 output_shape_.get_shape())) 1237 1238 if isinstance(output_shape, (list, np.ndarray)): 1239 # output_shape's shape should be == [4] if reached this point. 1240 if not filter.get_shape()[2].is_compatible_with(output_shape[axis]): 1241 raise ValueError( 1242 "output_shape does not match filter's output channels, " 1243 "{} != {}".format(output_shape[axis], 1244 filter.get_shape()[2])) 1245 1246 if padding != "VALID" and padding != "SAME": 1247 raise ValueError("padding must be either VALID or SAME:" 1248 " {}".format(padding)) 1249 1250 return gen_nn_ops.conv2d_backprop_input( 1251 input_sizes=output_shape_, 1252 filter=filter, 1253 out_backprop=value, 1254 strides=strides, 1255 padding=padding, 1256 data_format=data_format, 1257 name=name) 1258 1259 1260 @tf_export("nn.atrous_conv2d_transpose") 1261 def atrous_conv2d_transpose(value, 1262 filters, 1263 output_shape, 1264 rate, 1265 padding, 1266 name=None): 1267 """The transpose of `atrous_conv2d`. 1268 1269 This operation is sometimes called "deconvolution" after [Deconvolutional 1270 Networks](http://www.matthewzeiler.com/pubs/cvpr2010/cvpr2010.pdf), but is 1271 actually the transpose (gradient) of `atrous_conv2d` rather than an actual 1272 deconvolution. 1273 1274 Args: 1275 value: A 4-D `Tensor` of type `float`. It needs to be in the default `NHWC` 1276 format. Its shape is `[batch, in_height, in_width, in_channels]`. 1277 filters: A 4-D `Tensor` with the same type as `value` and shape 1278 `[filter_height, filter_width, out_channels, in_channels]`. `filters`' 1279 `in_channels` dimension must match that of `value`. Atrous convolution is 1280 equivalent to standard convolution with upsampled filters with effective 1281 height `filter_height + (filter_height - 1) * (rate - 1)` and effective 1282 width `filter_width + (filter_width - 1) * (rate - 1)`, produced by 1283 inserting `rate - 1` zeros along consecutive elements across the 1284 `filters`' spatial dimensions. 1285 output_shape: A 1-D `Tensor` of shape representing the output shape of the 1286 deconvolution op. 1287 rate: A positive int32. The stride with which we sample input values across 1288 the `height` and `width` dimensions. Equivalently, the rate by which we 1289 upsample the filter values by inserting zeros across the `height` and 1290 `width` dimensions. In the literature, the same parameter is sometimes 1291 called `input stride` or `dilation`. 1292 padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm. 1293 name: Optional name for the returned tensor. 1294 1295 Returns: 1296 A `Tensor` with the same type as `value`. 1297 1298 Raises: 1299 ValueError: If input/output depth does not match `filters`' shape, or if 1300 padding is other than `'VALID'` or `'SAME'`, or if the `rate` is less 1301 than one, or if the output_shape is not a tensor with 4 elements. 1302 """ 1303 with ops.name_scope(name, "atrous_conv2d_transpose", 1304 [value, filters, output_shape]) as name: 1305 value = ops.convert_to_tensor(value, name="value") 1306 filters = ops.convert_to_tensor(filters, name="filters") 1307 if not value.get_shape()[3].is_compatible_with(filters.get_shape()[3]): 1308 raise ValueError( 1309 "value's input channels does not match filters' input channels, " 1310 "{} != {}".format(value.get_shape()[3], 1311 filters.get_shape()[3])) 1312 if rate < 1: 1313 raise ValueError("rate {} cannot be less than one".format(rate)) 1314 1315 if rate == 1: 1316 return conv2d_transpose( 1317 value, 1318 filters, 1319 output_shape, 1320 strides=[1, 1, 1, 1], 1321 padding=padding, 1322 data_format="NHWC") 1323 1324 output_shape_ = ops.convert_to_tensor(output_shape, name="output_shape") 1325 if not output_shape_.get_shape().is_compatible_with(tensor_shape.vector(4)): 1326 raise ValueError("output_shape must have shape (4,), got {}".format( 1327 output_shape_.get_shape())) 1328 1329 if isinstance(output_shape, (list, np.ndarray)): 1330 # output_shape's shape should be == [4] if reached this point. 1331 if not filters.get_shape()[2].is_compatible_with(output_shape[3]): 1332 raise ValueError( 1333 "output_shape does not match filter's output channels, " 1334 "{} != {}".format(output_shape[3], 1335 filters.get_shape()[2])) 1336 1337 # We have two padding contributions. The first is used for converting "SAME" 1338 # to "VALID". The second is required so that the height and width of the 1339 # zero-padded value tensor are multiples of rate. 1340 1341 # Padding required to reduce to "VALID" convolution 1342 if padding == "SAME": 1343 # Handle filters whose shape is unknown during graph creation. 1344 if filters.get_shape().is_fully_defined(): 1345 filter_shape = filters.get_shape().as_list() 1346 else: 1347 filter_shape = array_ops.shape(filters) 1348 filter_height, filter_width = filter_shape[0], filter_shape[1] 1349 1350 # Spatial dimensions of the filters and the upsampled filters in which we 1351 # introduce (rate - 1) zeros between consecutive filter values. 1352 filter_height_up = filter_height + (filter_height - 1) * (rate - 1) 1353 filter_width_up = filter_width + (filter_width - 1) * (rate - 1) 1354 1355 pad_height = filter_height_up - 1 1356 pad_width = filter_width_up - 1 1357 1358 # When pad_height (pad_width) is odd, we pad more to bottom (right), 1359 # following the same convention as conv2d(). 1360 pad_top = pad_height // 2 1361 pad_bottom = pad_height - pad_top 1362 pad_left = pad_width // 2 1363 pad_right = pad_width - pad_left 1364 elif padding == "VALID": 1365 pad_top = 0 1366 pad_bottom = 0 1367 pad_left = 0 1368 pad_right = 0 1369 else: 1370 raise ValueError("padding must be either VALID or SAME:" 1371 " {}".format(padding)) 1372 1373 in_height = output_shape[1] + pad_top + pad_bottom 1374 in_width = output_shape[2] + pad_left + pad_right 1375 1376 # More padding so that rate divides the height and width of the input. 1377 pad_bottom_extra = (rate - in_height % rate) % rate 1378 pad_right_extra = (rate - in_width % rate) % rate 1379 1380 # The paddings argument to space_to_batch is just the extra padding 1381 # component. 1382 space_to_batch_pad = [[0, pad_bottom_extra], [0, pad_right_extra]] 1383 1384 value = array_ops.space_to_batch( 1385 input=value, paddings=space_to_batch_pad, block_size=rate) 1386 1387 input_sizes = [ 1388 rate * rate * output_shape[0], (in_height + pad_bottom_extra) // rate, 1389 (in_width + pad_right_extra) // rate, output_shape[3] 1390 ] 1391 1392 value = gen_nn_ops.conv2d_backprop_input( 1393 input_sizes=input_sizes, 1394 filter=filters, 1395 out_backprop=value, 1396 strides=[1, 1, 1, 1], 1397 padding="VALID", 1398 data_format="NHWC") 1399 1400 # The crops argument to batch_to_space includes both padding components. 1401 batch_to_space_crop = [[pad_top, pad_bottom + pad_bottom_extra], 1402 [pad_left, pad_right + pad_right_extra]] 1403 1404 return array_ops.batch_to_space( 1405 input=value, crops=batch_to_space_crop, block_size=rate) 1406 1407 1408 @tf_export("nn.conv3d_transpose") 1409 def conv3d_transpose( 1410 value, 1411 filter, # pylint: disable=redefined-builtin 1412 output_shape, 1413 strides, 1414 padding="SAME", 1415 data_format="NDHWC", 1416 name=None): 1417 """The transpose of `conv3d`. 1418 1419 This operation is sometimes called "deconvolution" after [Deconvolutional 1420 Networks](http://www.matthewzeiler.com/pubs/cvpr2010/cvpr2010.pdf), but is 1421 actually the transpose (gradient) of `conv3d` rather than an actual 1422 deconvolution. 1423 1424 Args: 1425 value: A 5-D `Tensor` of type `float` and shape 1426 `[batch, depth, height, width, in_channels]`. 1427 filter: A 5-D `Tensor` with the same type as `value` and shape 1428 `[depth, height, width, output_channels, in_channels]`. `filter`'s 1429 `in_channels` dimension must match that of `value`. 1430 output_shape: A 1-D `Tensor` representing the output shape of the 1431 deconvolution op. 1432 strides: A list of ints. The stride of the sliding window for each 1433 dimension of the input tensor. 1434 padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm. 1435 See the @{tf.nn.convolution$comment here} 1436 data_format: A string, either `'NDHWC'` or `'NCDHW`' specifying the layout 1437 of the input and output tensors. Defaults to `'NDHWC'`. 1438 name: Optional name for the returned tensor. 1439 1440 Returns: 1441 A `Tensor` with the same type as `value`. 1442 1443 Raises: 1444 ValueError: If input/output depth does not match `filter`'s shape, or if 1445 padding is other than `'VALID'` or `'SAME'`. 1446 """ 1447 with ops.name_scope(name, "conv3d_transpose", 1448 [value, filter, output_shape]) as name: 1449 value = ops.convert_to_tensor(value, name="value") 1450 filter = ops.convert_to_tensor(filter, name="filter") # pylint: disable=redefined-builtin 1451 axis = 1 if data_format == "NCDHW" else 4 1452 if not value.get_shape()[axis].is_compatible_with(filter.get_shape()[4]): 1453 raise ValueError("input channels does not match filter's input channels, " 1454 "{} != {}".format(value.get_shape()[axis], 1455 filter.get_shape()[4])) 1456 1457 output_shape_ = ops.convert_to_tensor(output_shape, name="output_shape") 1458 if not output_shape_.get_shape().is_compatible_with(tensor_shape.vector(5)): 1459 raise ValueError("output_shape must have shape (5,), got {}".format( 1460 output_shape_.get_shape())) 1461 1462 if isinstance(output_shape, (list, np.ndarray)): 1463 # output_shape's shape should be == [5] if reached this point. 1464 if not filter.get_shape()[3].is_compatible_with(output_shape[4]): 1465 raise ValueError( 1466 "output_shape does not match filter's output channels, " 1467 "{} != {}".format(output_shape[4], 1468 filter.get_shape()[3])) 1469 1470 if padding != "VALID" and padding != "SAME": 1471 raise ValueError("padding must be either VALID or SAME:" 1472 " {}".format(padding)) 1473 1474 return gen_nn_ops.conv3d_backprop_input_v2( 1475 input_sizes=output_shape_, 1476 filter=filter, 1477 out_backprop=value, 1478 strides=strides, 1479 padding=padding, 1480 data_format=data_format, 1481 name=name) 1482 1483 1484 # pylint: disable=protected-access 1485 @tf_export("nn.bias_add") 1486 def bias_add(value, bias, data_format=None, name=None): 1487 """Adds `bias` to `value`. 1488 1489 This is (mostly) a special case of `tf.add` where `bias` is restricted to 1-D. 1490 Broadcasting is supported, so `value` may have any number of dimensions. 1491 Unlike `tf.add`, the type of `bias` is allowed to differ from `value` in the 1492 case where both types are quantized. 1493 1494 Args: 1495 value: A `Tensor` with type `float`, `double`, `int64`, `int32`, `uint8`, 1496 `int16`, `int8`, `complex64`, or `complex128`. 1497 bias: A 1-D `Tensor` with size matching the last dimension of `value`. 1498 Must be the same type as `value` unless `value` is a quantized type, 1499 in which case a different quantized type may be used. 1500 data_format: A string. 'NHWC' and 'NCHW' are supported. 1501 name: A name for the operation (optional). 1502 1503 Returns: 1504 A `Tensor` with the same type as `value`. 1505 """ 1506 with ops.name_scope(name, "BiasAdd", [value, bias]) as name: 1507 value = ops.convert_to_tensor(value, name="input") 1508 bias = ops.convert_to_tensor(bias, dtype=value.dtype, name="bias") 1509 return gen_nn_ops._bias_add(value, bias, data_format=data_format, name=name) 1510 1511 1512 # pylint: disable=protected-access 1513 def bias_add_v1(value, bias, name=None): 1514 """Adds `bias` to `value`. 1515 1516 This is a deprecated version of bias_add and will soon to be removed. 1517 1518 This is (mostly) a special case of `tf.add` where `bias` is restricted to 1-D. 1519 Broadcasting is supported, so `value` may have any number of dimensions. 1520 Unlike `tf.add`, the type of `bias` is allowed to differ from `value` in the 1521 case where both types are quantized. 1522 1523 Args: 1524 value: A `Tensor` with type `float`, `double`, `int64`, `int32`, `uint8`, 1525 `int16`, `int8`, `complex64`, or `complex128`. 1526 bias: A 1-D `Tensor` with size matching the last dimension of `value`. 1527 Must be the same type as `value` unless `value` is a quantized type, 1528 in which case a different quantized type may be used. 1529 name: A name for the operation (optional). 1530 1531 Returns: 1532 A `Tensor` with the same type as `value`. 1533 """ 1534 with ops.name_scope(name, "BiasAddV1", [value, bias]) as name: 1535 value = ops.convert_to_tensor(value, name="input") 1536 bias = ops.convert_to_tensor(bias, dtype=value.dtype, name="bias") 1537 return gen_nn_ops._bias_add_v1(value, bias, name=name) 1538 1539 1540 @tf_export("nn.crelu") 1541 def crelu(features, name=None, axis=-1): 1542 """Computes Concatenated ReLU. 1543 1544 Concatenates a ReLU which selects only the positive part of the activation 1545 with a ReLU which selects only the *negative* part of the activation. 1546 Note that as a result this non-linearity doubles the depth of the activations. 1547 Source: [Understanding and Improving Convolutional Neural Networks via 1548 Concatenated Rectified Linear Units. W. Shang, et 1549 al.](https://arxiv.org/abs/1603.05201) 1550 1551 Args: 1552 features: A `Tensor` with type `float`, `double`, `int32`, `int64`, `uint8`, 1553 `int16`, or `int8`. 1554 name: A name for the operation (optional). 1555 axis: The axis that the output values are concatenated along. Default is -1. 1556 1557 Returns: 1558 A `Tensor` with the same type as `features`. 1559 """ 1560 with ops.name_scope(name, "CRelu", [features]) as name: 1561 features = ops.convert_to_tensor(features, name="features") 1562 c = array_ops.concat([features, -features], axis, name=name) 1563 return gen_nn_ops.relu(c) 1564 1565 1566 @tf_export("nn.relu6") 1567 def relu6(features, name=None): 1568 """Computes Rectified Linear 6: `min(max(features, 0), 6)`. 1569 1570 Source: [Convolutional Deep Belief Networks on CIFAR-10. A. 1571 Krizhevsky](http://www.cs.utoronto.ca/~kriz/conv-cifar10-aug2010.pdf) 1572 1573 Args: 1574 features: A `Tensor` with type `float`, `double`, `int32`, `int64`, `uint8`, 1575 `int16`, or `int8`. 1576 name: A name for the operation (optional). 1577 1578 Returns: 1579 A `Tensor` with the same type as `features`. 1580 """ 1581 with ops.name_scope(name, "Relu6", [features]) as name: 1582 features = ops.convert_to_tensor(features, name="features") 1583 return gen_nn_ops._relu6(features, name=name) 1584 1585 1586 @tf_export("nn.leaky_relu") 1587 def leaky_relu(features, alpha=0.2, name=None): 1588 """Compute the Leaky ReLU activation function. 1589 1590 "Rectifier Nonlinearities Improve Neural Network Acoustic Models" 1591 AL Maas, AY Hannun, AY Ng - Proc. ICML, 2013 1592 http://web.stanford.edu/~awni/papers/relu_hybrid_icml2013_final.pdf 1593 1594 Args: 1595 features: A `Tensor` representing preactivation values. Must be one of 1596 the following types: `float16`, `float32`, `float64`, `int32`, `int64`. 1597 alpha: Slope of the activation function at x < 0. 1598 name: A name for the operation (optional). 1599 1600 Returns: 1601 The activation value. 1602 """ 1603 with ops.name_scope(name, "LeakyRelu", [features, alpha]): 1604 features = ops.convert_to_tensor(features, name="features") 1605 if features.dtype.is_integer: 1606 features = math_ops.to_float(features) 1607 alpha = ops.convert_to_tensor(alpha, dtype=features.dtype, name="alpha") 1608 return math_ops.maximum(alpha * features, features) 1609 1610 1611 def _flatten_outer_dims(logits): 1612 """Flattens logits' outer dimensions and keep its last dimension.""" 1613 rank = array_ops.rank(logits) 1614 last_dim_size = array_ops.slice( 1615 array_ops.shape(logits), [math_ops.subtract(rank, 1)], [1]) 1616 output = array_ops.reshape(logits, array_ops.concat([[-1], last_dim_size], 0)) 1617 1618 # Set output shape if known. 1619 if context.in_graph_mode(): 1620 shape = logits.get_shape() 1621 if shape is not None and shape.dims is not None: 1622 shape = shape.as_list() 1623 product = 1 1624 product_valid = True 1625 for d in shape[:-1]: 1626 if d is None: 1627 product_valid = False 1628 break 1629 else: 1630 product *= d 1631 if product_valid: 1632 output_shape = [product, shape[-1]] 1633 output.set_shape(output_shape) 1634 1635 return output 1636 1637 1638 def _softmax(logits, compute_op, dim=-1, name=None): 1639 """Helper function for softmax and log_softmax. 1640 1641 It reshapes and transposes the input logits into a 2-D Tensor and then invokes 1642 the tf.nn._softmax or tf.nn._log_softmax function. The output would be 1643 transposed and reshaped back. 1644 1645 Args: 1646 logits: A non-empty `Tensor`. Must be one of the following types: `half`, 1647 `float32`, `float64`. 1648 compute_op: Either gen_nn_ops._softmax or gen_nn_ops._log_softmax 1649 dim: The dimension softmax would be performed on. The default is -1 which 1650 indicates the last dimension. 1651 name: A name for the operation (optional). 1652 1653 Returns: 1654 A `Tensor`. Has the same type as `logits`. Same shape as `logits`. 1655 Raises: 1656 InvalidArgumentError: if `logits` is empty or `dim` is beyond the last 1657 dimension of `logits`. 1658 """ 1659 1660 def _swap_axis(logits, dim_index, last_index, name=None): 1661 """Swaps logits's dim_index and last_index.""" 1662 return array_ops.transpose( 1663 logits, 1664 array_ops.concat([ 1665 math_ops.range(dim_index), [last_index], 1666 math_ops.range(dim_index + 1, last_index), [dim_index] 1667 ], 0), 1668 name=name) 1669 1670 logits = ops.convert_to_tensor(logits) 1671 1672 # We need its original shape for shape inference. 1673 shape = logits.get_shape() 1674 is_last_dim = (dim is -1) or (dim == shape.ndims - 1) 1675 1676 if shape.ndims is 2 and is_last_dim: 1677 return compute_op(logits, name=name) 1678 1679 # If dim is the last dimension, simply reshape the logits to a matrix and 1680 # apply the internal softmax. 1681 if is_last_dim: 1682 input_shape = array_ops.shape(logits) 1683 logits = _flatten_outer_dims(logits) 1684 output = compute_op(logits) 1685 output = array_ops.reshape(output, input_shape, name=name) 1686 return output 1687 1688 # If dim is not the last dimension, we have to do a reshape and transpose so 1689 # that we can still perform softmax on its last dimension. 1690 1691 # Swap logits' dimension of dim and its last dimension. 1692 input_rank = array_ops.rank(logits) 1693 dim_axis = dim % shape.ndims 1694 logits = _swap_axis(logits, dim_axis, math_ops.subtract(input_rank, 1)) 1695 shape_after_swap = array_ops.shape(logits) 1696 1697 # Reshape logits into a matrix. 1698 logits = _flatten_outer_dims(logits) 1699 1700 # Do the actual softmax on its last dimension. 1701 output = compute_op(logits) 1702 1703 # Transform back the output tensor. 1704 output = array_ops.reshape(output, shape_after_swap) 1705 output = _swap_axis( 1706 output, dim_axis, math_ops.subtract(input_rank, 1), name=name) 1707 1708 # Make shape inference work since reshape and transpose may erase its static 1709 # shape. 1710 output.set_shape(shape) 1711 1712 return output 1713 1714 1715 @tf_export("nn.softmax") 1716 @deprecation.deprecated_args(None, "dim is deprecated, use axis instead", "dim") 1717 def softmax(logits, axis=None, name=None, dim=None): 1718 """Computes softmax activations. 1719 1720 This function performs the equivalent of 1721 1722 softmax = tf.exp(logits) / tf.reduce_sum(tf.exp(logits), axis) 1723 1724 Args: 1725 logits: A non-empty `Tensor`. Must be one of the following types: `half`, 1726 `float32`, `float64`. 1727 axis: The dimension softmax would be performed on. The default is -1 which 1728 indicates the last dimension. 1729 name: A name for the operation (optional). 1730 dim: Deprecated alias for `axis`. 1731 1732 Returns: 1733 A `Tensor`. Has the same type and shape as `logits`. 1734 1735 Raises: 1736 InvalidArgumentError: if `logits` is empty or `axis` is beyond the last 1737 dimension of `logits`. 1738 """ 1739 axis = deprecation.deprecated_argument_lookup("axis", axis, "dim", dim) 1740 if axis is None: 1741 axis = -1 1742 return _softmax(logits, gen_nn_ops._softmax, axis, name) 1743 1744 1745 @tf_export("nn.log_softmax") 1746 @deprecation.deprecated_args(None, "dim is deprecated, use axis instead", "dim") 1747 def log_softmax(logits, axis=None, name=None, dim=None): 1748 """Computes log softmax activations. 1749 1750 For each batch `i` and class `j` we have 1751 1752 logsoftmax = logits - log(reduce_sum(exp(logits), axis)) 1753 1754 Args: 1755 logits: A non-empty `Tensor`. Must be one of the following types: `half`, 1756 `float32`, `float64`. 1757 axis: The dimension softmax would be performed on. The default is -1 which 1758 indicates the last dimension. 1759 name: A name for the operation (optional). 1760 dim: Deprecated alias for `axis`. 1761 1762 Returns: 1763 A `Tensor`. Has the same type as `logits`. Same shape as `logits`. 1764 1765 Raises: 1766 InvalidArgumentError: if `logits` is empty or `axis` is beyond the last 1767 dimension of `logits`. 1768 """ 1769 axis = deprecation.deprecated_argument_lookup("axis", axis, "dim", dim) 1770 if axis is None: 1771 axis = -1 1772 return _softmax(logits, gen_nn_ops._log_softmax, axis, name) 1773 1774 1775 def _ensure_xent_args(name, sentinel, labels, logits): 1776 # Make sure that all arguments were passed as named arguments. 1777 if sentinel is not None: 1778 raise ValueError("Only call `%s` with " 1779 "named arguments (labels=..., logits=..., ...)" % name) 1780 if labels is None or logits is None: 1781 raise ValueError("Both labels and logits must be provided.") 1782 1783 1784 @tf_export("nn.softmax_cross_entropy_with_logits_v2") 1785 def softmax_cross_entropy_with_logits_v2( 1786 _sentinel=None, # pylint: disable=invalid-name 1787 labels=None, 1788 logits=None, 1789 dim=-1, 1790 name=None): 1791 """Computes softmax cross entropy between `logits` and `labels`. 1792 1793 Measures the probability error in discrete classification tasks in which the 1794 classes are mutually exclusive (each entry is in exactly one class). For 1795 example, each CIFAR-10 image is labeled with one and only one label: an image 1796 can be a dog or a truck, but not both. 1797 1798 **NOTE:** While the classes are mutually exclusive, their probabilities 1799 need not be. All that is required is that each row of `labels` is 1800 a valid probability distribution. If they are not, the computation of the 1801 gradient will be incorrect. 1802 1803 If using exclusive `labels` (wherein one and only 1804 one class is true at a time), see `sparse_softmax_cross_entropy_with_logits`. 1805 1806 **WARNING:** This op expects unscaled logits, since it performs a `softmax` 1807 on `logits` internally for efficiency. Do not call this op with the 1808 output of `softmax`, as it will produce incorrect results. 1809 1810 `logits` and `labels` must have the same shape, e.g. 1811 `[batch_size, num_classes]` and the same dtype (either `float16`, `float32`, 1812 or `float64`). 1813 1814 Backpropagation will happen into both `logits` and `labels`. To disallow 1815 backpropagation into `labels`, pass label tensors through a `stop_gradients` 1816 before feeding it to this function. 1817 1818 **Note that to avoid confusion, it is required to pass only named arguments to 1819 this function.** 1820 1821 Args: 1822 _sentinel: Used to prevent positional parameters. Internal, do not use. 1823 labels: Each row `labels[i]` must be a valid probability distribution. 1824 logits: Unscaled log probabilities. 1825 dim: The class dimension. Defaulted to -1 which is the last dimension. 1826 name: A name for the operation (optional). 1827 1828 Returns: 1829 A 1-D `Tensor` of length `batch_size` of the same type as `logits` with the 1830 softmax cross entropy loss. 1831 """ 1832 _ensure_xent_args("softmax_cross_entropy_with_logits", _sentinel, labels, 1833 logits) 1834 1835 # TODO(pcmurray) Raise an error when the labels do not sum to 1. Note: This 1836 # could break users who call this with bad labels, but disregard the bad 1837 # results. 1838 1839 with ops.name_scope(name, "softmax_cross_entropy_with_logits", 1840 [logits, labels]) as name: 1841 logits = ops.convert_to_tensor(logits, name="logits") 1842 labels = ops.convert_to_tensor(labels, name="labels") 1843 precise_logits = math_ops.cast( 1844 logits, dtypes.float32) if (logits.dtype == dtypes.float16) else logits 1845 # labels and logits must be of the same type 1846 labels = math_ops.cast(labels, precise_logits.dtype) 1847 input_rank = array_ops.rank(precise_logits) 1848 # For shape inference. 1849 shape = logits.get_shape() 1850 1851 # Move the dim to the end if dim is not the last dimension. 1852 if dim is not -1: 1853 1854 def _move_dim_to_end(tensor, dim_index, rank): 1855 return array_ops.transpose( 1856 tensor, 1857 array_ops.concat([ 1858 math_ops.range(dim_index), 1859 math_ops.range(dim_index + 1, rank), [dim_index] 1860 ], 0)) 1861 1862 precise_logits = _move_dim_to_end(precise_logits, dim, input_rank) 1863 labels = _move_dim_to_end(labels, dim, input_rank) 1864 1865 input_shape = array_ops.shape(precise_logits) 1866 1867 # Make precise_logits and labels into matrices. 1868 precise_logits = _flatten_outer_dims(precise_logits) 1869 labels = _flatten_outer_dims(labels) 1870 1871 # Do the actual op computation. 1872 # The second output tensor contains the gradients. We use it in 1873 # _CrossEntropyGrad() in nn_grad but not here. 1874 cost, unused_backprop = gen_nn_ops._softmax_cross_entropy_with_logits( 1875 precise_logits, labels, name=name) 1876 1877 # The output cost shape should be the input minus dim. 1878 output_shape = array_ops.slice(input_shape, [0], 1879 [math_ops.subtract(input_rank, 1)]) 1880 cost = array_ops.reshape(cost, output_shape) 1881 1882 # Make shape inference work since reshape and transpose may erase its static 1883 # shape. 1884 if context.in_graph_mode() and shape is not None and shape.dims is not None: 1885 shape = shape.as_list() 1886 del shape[dim] 1887 cost.set_shape(shape) 1888 1889 if logits.dtype == dtypes.float16: 1890 return math_ops.cast(cost, dtypes.float16) 1891 else: 1892 return cost 1893 1894 1895 _XENT_DEPRECATION = """ 1896 Future major versions of TensorFlow will allow gradients to flow 1897 into the labels input on backprop by default. 1898 1899 See tf.nn.softmax_cross_entropy_with_logits_v2. 1900 """ 1901 1902 1903 @tf_export("nn.softmax_cross_entropy_with_logits") 1904 @deprecation.deprecated(date=None, instructions=_XENT_DEPRECATION) 1905 def softmax_cross_entropy_with_logits( 1906 _sentinel=None, # pylint: disable=invalid-name 1907 labels=None, 1908 logits=None, 1909 dim=-1, 1910 name=None): 1911 """Computes softmax cross entropy between `logits` and `labels`. 1912 1913 Measures the probability error in discrete classification tasks in which the 1914 classes are mutually exclusive (each entry is in exactly one class). For 1915 example, each CIFAR-10 image is labeled with one and only one label: an image 1916 can be a dog or a truck, but not both. 1917 1918 **NOTE:** While the classes are mutually exclusive, their probabilities 1919 need not be. All that is required is that each row of `labels` is 1920 a valid probability distribution. If they are not, the computation of the 1921 gradient will be incorrect. 1922 1923 If using exclusive `labels` (wherein one and only 1924 one class is true at a time), see `sparse_softmax_cross_entropy_with_logits`. 1925 1926 **WARNING:** This op expects unscaled logits, since it performs a `softmax` 1927 on `logits` internally for efficiency. Do not call this op with the 1928 output of `softmax`, as it will produce incorrect results. 1929 1930 `logits` and `labels` must have the same shape, e.g. 1931 `[batch_size, num_classes]` and the same dtype (either `float16`, `float32`, 1932 or `float64`). 1933 1934 Backpropagation will happen only into `logits`. To calculate a cross entropy 1935 loss that allows backpropagation into both `logits` and `labels`, see 1936 @{tf.nn.softmax_cross_entropy_with_logits_v2}. 1937 1938 **Note that to avoid confusion, it is required to pass only named arguments to 1939 this function.** 1940 1941 Args: 1942 _sentinel: Used to prevent positional parameters. Internal, do not use. 1943 labels: Each row `labels[i]` must be a valid probability distribution. 1944 logits: Unscaled log probabilities. 1945 dim: The class dimension. Defaulted to -1 which is the last dimension. 1946 name: A name for the operation (optional). 1947 1948 Returns: 1949 A 1-D `Tensor` of length `batch_size` of the same type as `logits` with the 1950 softmax cross entropy loss. 1951 """ 1952 _ensure_xent_args("softmax_cross_entropy_with_logits", _sentinel, labels, 1953 logits) 1954 1955 with ops.name_scope(name, "softmax_cross_entropy_with_logits_sg", 1956 [logits, labels]) as name: 1957 labels = array_ops.stop_gradient(labels, name="labels_stop_gradient") 1958 1959 return softmax_cross_entropy_with_logits_v2( 1960 labels=labels, logits=logits, dim=dim, name=name) 1961 1962 1963 @tf_export("nn.sparse_softmax_cross_entropy_with_logits") 1964 def sparse_softmax_cross_entropy_with_logits( 1965 _sentinel=None, # pylint: disable=invalid-name 1966 labels=None, 1967 logits=None, 1968 name=None): 1969 """Computes sparse softmax cross entropy between `logits` and `labels`. 1970 1971 Measures the probability error in discrete classification tasks in which the 1972 classes are mutually exclusive (each entry is in exactly one class). For 1973 example, each CIFAR-10 image is labeled with one and only one label: an image 1974 can be a dog or a truck, but not both. 1975 1976 **NOTE:** For this operation, the probability of a given label is considered 1977 exclusive. That is, soft classes are not allowed, and the `labels` vector 1978 must provide a single specific index for the true class for each row of 1979 `logits` (each minibatch entry). For soft softmax classification with 1980 a probability distribution for each entry, see 1981 `softmax_cross_entropy_with_logits`. 1982 1983 **WARNING:** This op expects unscaled logits, since it performs a `softmax` 1984 on `logits` internally for efficiency. Do not call this op with the 1985 output of `softmax`, as it will produce incorrect results. 1986 1987 A common use case is to have logits of shape `[batch_size, num_classes]` and 1988 labels of shape `[batch_size]`. But higher dimensions are supported. 1989 1990 **Note that to avoid confusion, it is required to pass only named arguments to 1991 this function.** 1992 1993 Args: 1994 _sentinel: Used to prevent positional parameters. Internal, do not use. 1995 labels: `Tensor` of shape `[d_0, d_1, ..., d_{r-1}]` (where `r` is rank of 1996 `labels` and result) and dtype `int32` or `int64`. Each entry in `labels` 1997 must be an index in `[0, num_classes)`. Other values will raise an 1998 exception when this op is run on CPU, and return `NaN` for corresponding 1999 loss and gradient rows on GPU. 2000 logits: Unscaled log probabilities of shape 2001 `[d_0, d_1, ..., d_{r-1}, num_classes]` and dtype `float32` or `float64`. 2002 name: A name for the operation (optional). 2003 2004 Returns: 2005 A `Tensor` of the same shape as `labels` and of the same type as `logits` 2006 with the softmax cross entropy loss. 2007 2008 Raises: 2009 ValueError: If logits are scalars (need to have rank >= 1) or if the rank 2010 of the labels is not equal to the rank of the logits minus one. 2011 """ 2012 _ensure_xent_args("sparse_softmax_cross_entropy_with_logits", _sentinel, 2013 labels, logits) 2014 2015 # TODO(pcmurray) Raise an error when the label is not an index in 2016 # [0, num_classes). Note: This could break users who call this with bad 2017 # labels, but disregard the bad results. 2018 2019 # Reshape logits and labels to rank 2. 2020 with ops.name_scope(name, "SparseSoftmaxCrossEntropyWithLogits", 2021 [labels, logits]): 2022 labels = ops.convert_to_tensor(labels) 2023 logits = ops.convert_to_tensor(logits) 2024 precise_logits = math_ops.cast(logits, dtypes.float32) if (dtypes.as_dtype( 2025 logits.dtype) == dtypes.float16) else logits 2026 2027 # Store label shape for result later. 2028 labels_static_shape = labels.get_shape() 2029 labels_shape = array_ops.shape(labels) 2030 if logits.get_shape().ndims is not None and logits.get_shape().ndims == 0: 2031 raise ValueError( 2032 "Logits cannot be scalars - received shape %s." % logits.get_shape()) 2033 if logits.get_shape().ndims is not None and ( 2034 labels_static_shape.ndims is not None and 2035 labels_static_shape.ndims != logits.get_shape().ndims - 1): 2036 raise ValueError("Rank mismatch: Rank of labels (received %s) should " 2037 "equal rank of logits minus 1 (received %s)." % 2038 (labels_static_shape.ndims, logits.get_shape().ndims)) 2039 # Check if no reshapes are required. 2040 if logits.get_shape().ndims == 2: 2041 cost, _ = gen_nn_ops._sparse_softmax_cross_entropy_with_logits( 2042 precise_logits, labels, name=name) 2043 if logits.dtype == dtypes.float16: 2044 return math_ops.cast(cost, dtypes.float16) 2045 else: 2046 return cost 2047 2048 # Reshape logits to 2 dim, labels to 1 dim. 2049 num_classes = array_ops.shape(logits)[array_ops.rank(logits) - 1] 2050 precise_logits = array_ops.reshape(precise_logits, [-1, num_classes]) 2051 labels = array_ops.reshape(labels, [-1]) 2052 # The second output tensor contains the gradients. We use it in 2053 # _CrossEntropyGrad() in nn_grad but not here. 2054 cost, _ = gen_nn_ops._sparse_softmax_cross_entropy_with_logits( 2055 precise_logits, labels, name=name) 2056 cost = array_ops.reshape(cost, labels_shape) 2057 cost.set_shape(labels_static_shape) 2058 if logits.dtype == dtypes.float16: 2059 return math_ops.cast(cost, dtypes.float16) 2060 else: 2061 return cost 2062 2063 2064 @tf_export("nn.avg_pool") 2065 def avg_pool(value, ksize, strides, padding, data_format="NHWC", name=None): 2066 """Performs the average pooling on the input. 2067 2068 Each entry in `output` is the mean of the corresponding size `ksize` 2069 window in `value`. 2070 2071 Args: 2072 value: A 4-D `Tensor` of shape `[batch, height, width, channels]` and type 2073 `float32`, `float64`, `qint8`, `quint8`, or `qint32`. 2074 ksize: A 1-D int Tensor of 4 elements. 2075 The size of the window for each dimension of the input tensor. 2076 strides: A 1-D int Tensor of 4 elements 2077 The stride of the sliding window for each dimension of the 2078 input tensor. 2079 padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm. 2080 See the @{tf.nn.convolution$comment here} 2081 data_format: A string. 'NHWC' and 'NCHW' are supported. 2082 name: Optional name for the operation. 2083 2084 Returns: 2085 A `Tensor` with the same type as `value`. The average pooled output tensor. 2086 """ 2087 with ops.name_scope(name, "AvgPool", [value]) as name: 2088 value = ops.convert_to_tensor(value, name="input") 2089 return gen_nn_ops._avg_pool( 2090 value, 2091 ksize=ksize, 2092 strides=strides, 2093 padding=padding, 2094 data_format=data_format, 2095 name=name) 2096 2097 2098 @tf_export("nn.max_pool") 2099 def max_pool(value, ksize, strides, padding, data_format="NHWC", name=None): 2100 """Performs the max pooling on the input. 2101 2102 Args: 2103 value: A 4-D `Tensor` of the format specified by `data_format`. 2104 ksize: A 1-D int Tensor of 4 elements. The size of the window for 2105 each dimension of the input tensor. 2106 strides: A 1-D int Tensor of 4 elements. The stride of the sliding 2107 window for each dimension of the input tensor. 2108 padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm. 2109 See the @{tf.nn.convolution$comment here} 2110 data_format: A string. 'NHWC', 'NCHW' and 'NCHW_VECT_C' are supported. 2111 name: Optional name for the operation. 2112 2113 Returns: 2114 A `Tensor` of format specified by `data_format`. 2115 The max pooled output tensor. 2116 """ 2117 with ops.name_scope(name, "MaxPool", [value]) as name: 2118 value = ops.convert_to_tensor(value, name="input") 2119 return gen_nn_ops._max_pool(value, 2120 ksize=ksize, 2121 strides=strides, 2122 padding=padding, 2123 data_format=data_format, 2124 name=name) 2125 2126 2127 @ops.RegisterStatistics("Conv2D", "flops") 2128 def _calc_conv_flops(graph, node): 2129 """Calculates the compute resources needed for Conv2D.""" 2130 input_shape = graph_util.tensor_shape_from_node_def_name(graph, node.input[0]) 2131 input_shape.assert_is_fully_defined() 2132 filter_shape = graph_util.tensor_shape_from_node_def_name( 2133 graph, node.input[1]) 2134 filter_shape.assert_is_fully_defined() 2135 output_shape = graph_util.tensor_shape_from_node_def_name(graph, node.name) 2136 output_shape.assert_is_fully_defined() 2137 filter_height = int(filter_shape[0]) 2138 filter_width = int(filter_shape[1]) 2139 filter_in_depth = int(filter_shape[2]) 2140 output_count = np.prod(output_shape.as_list()) 2141 return ops.OpStats( 2142 "flops", 2143 (output_count * filter_in_depth * filter_height * filter_width * 2)) 2144 2145 2146 @ops.RegisterStatistics("DepthwiseConv2dNative", "flops") 2147 def _calc_depthwise_conv_flops(graph, node): 2148 """Calculates the compute resources needed for DepthwiseConv2dNative.""" 2149 input_shape = graph_util.tensor_shape_from_node_def_name(graph, node.input[0]) 2150 input_shape.assert_is_fully_defined() 2151 filter_shape = graph_util.tensor_shape_from_node_def_name( 2152 graph, node.input[1]) 2153 filter_shape.assert_is_fully_defined() 2154 output_shape = graph_util.tensor_shape_from_node_def_name(graph, node.name) 2155 output_shape.assert_is_fully_defined() 2156 filter_height = int(filter_shape[0]) 2157 filter_width = int(filter_shape[1]) 2158 output_count = np.prod(output_shape.as_list()) 2159 return ops.OpStats("flops", (output_count * filter_height * filter_width * 2)) 2160 2161 2162 @ops.RegisterStatistics("BiasAdd", "flops") 2163 def _calc_bias_add_flops(graph, node): 2164 """Calculates the computing needed for BiasAdd.""" 2165 input_shape = graph_util.tensor_shape_from_node_def_name(graph, node.input[0]) 2166 input_shape.assert_is_fully_defined() 2167 input_count = np.prod(input_shape.as_list()) 2168 return ops.OpStats("flops", input_count) 2169 2170 2171 @tf_export("nn.xw_plus_b") 2172 def xw_plus_b(x, weights, biases, name=None): # pylint: disable=invalid-name 2173 """Computes matmul(x, weights) + biases. 2174 2175 Args: 2176 x: a 2D tensor. Dimensions typically: batch, in_units 2177 weights: a 2D tensor. Dimensions typically: in_units, out_units 2178 biases: a 1D tensor. Dimensions: out_units 2179 name: A name for the operation (optional). If not specified 2180 "xw_plus_b" is used. 2181 2182 Returns: 2183 A 2-D Tensor computing matmul(x, weights) + biases. 2184 Dimensions typically: batch, out_units. 2185 """ 2186 with ops.name_scope(name, "xw_plus_b", [x, weights, biases]) as name: 2187 x = ops.convert_to_tensor(x, name="x") 2188 weights = ops.convert_to_tensor(weights, name="weights") 2189 biases = ops.convert_to_tensor(biases, name="biases") 2190 mm = math_ops.matmul(x, weights) 2191 return bias_add(mm, biases, name=name) 2192 2193 2194 def xw_plus_b_v1(x, weights, biases, name=None): # pylint: disable=invalid-name 2195 """Computes matmul(x, weights) + biases. 2196 2197 This is a deprecated version of that will soon be removed. 2198 2199 Args: 2200 x: a 2D tensor. Dimensions typically: batch, in_units 2201 weights: a 2D tensor. Dimensions typically: in_units, out_units 2202 biases: a 1D tensor. Dimensions: out_units 2203 name: A name for the operation (optional). If not specified 2204 "xw_plus_b_v1" is used. 2205 2206 Returns: 2207 A 2-D Tensor computing matmul(x, weights) + biases. 2208 Dimensions typically: batch, out_units. 2209 """ 2210 with ops.name_scope(name, "xw_plus_b_v1", [x, weights, biases]) as name: 2211 x = ops.convert_to_tensor(x, name="x") 2212 weights = ops.convert_to_tensor(weights, name="weights") 2213 biases = ops.convert_to_tensor(biases, name="biases") 2214 mm = math_ops.matmul(x, weights) 2215 return bias_add_v1(mm, biases, name=name) 2216 2217 def _get_noise_shape(x, noise_shape): 2218 # If noise_shape is none return immediately. 2219 if noise_shape is None: 2220 return array_ops.shape(x) 2221 2222 try: 2223 # Best effort to figure out the intended shape. 2224 # If not possible, let the op to handle it. 2225 # In eager mode exception will show up. 2226 noise_shape_ = tensor_shape.as_shape(noise_shape) 2227 except (TypeError, ValueError): 2228 return noise_shape 2229 2230 if (x.shape.dims is not None and 2231 len(x.shape.dims) == len(noise_shape_.dims)): 2232 new_dims = [] 2233 for i, dim in enumerate(x.shape.dims): 2234 if noise_shape_.dims[i].value is None and dim.value is not None: 2235 new_dims.append(dim.value) 2236 else: 2237 new_dims.append(noise_shape_.dims[i].value) 2238 return tensor_shape.TensorShape(new_dims) 2239 2240 return noise_shape 2241 2242 @tf_export("nn.dropout") 2243 def dropout(x, keep_prob, noise_shape=None, seed=None, name=None): # pylint: disable=invalid-name 2244 """Computes dropout. 2245 2246 With probability `keep_prob`, outputs the input element scaled up by 2247 `1 / keep_prob`, otherwise outputs `0`. The scaling is so that the expected 2248 sum is unchanged. 2249 2250 By default, each element is kept or dropped independently. If `noise_shape` 2251 is specified, it must be 2252 [broadcastable](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) 2253 to the shape of `x`, and only dimensions with `noise_shape[i] == shape(x)[i]` 2254 will make independent decisions. For example, if `shape(x) = [k, l, m, n]` 2255 and `noise_shape = [k, 1, 1, n]`, each batch and channel component will be 2256 kept independently and each row and column will be kept or not kept together. 2257 2258 Args: 2259 x: A floating point tensor. 2260 keep_prob: A scalar `Tensor` with the same type as x. The probability 2261 that each element is kept. 2262 noise_shape: A 1-D `Tensor` of type `int32`, representing the 2263 shape for randomly generated keep/drop flags. 2264 seed: A Python integer. Used to create random seeds. See 2265 @{tf.set_random_seed} 2266 for behavior. 2267 name: A name for this operation (optional). 2268 2269 Returns: 2270 A Tensor of the same shape of `x`. 2271 2272 Raises: 2273 ValueError: If `keep_prob` is not in `(0, 1]` or if `x` is not a floating 2274 point tensor. 2275 """ 2276 with ops.name_scope(name, "dropout", [x]) as name: 2277 x = ops.convert_to_tensor(x, name="x") 2278 if not x.dtype.is_floating: 2279 raise ValueError("x has to be a floating point tensor since it's going to" 2280 " be scaled. Got a %s tensor instead." % x.dtype) 2281 if isinstance(keep_prob, numbers.Real) and not 0 < keep_prob <= 1: 2282 raise ValueError("keep_prob must be a scalar tensor or a float in the " 2283 "range (0, 1], got %g" % keep_prob) 2284 keep_prob = ops.convert_to_tensor( 2285 keep_prob, dtype=x.dtype, name="keep_prob") 2286 keep_prob.get_shape().assert_is_compatible_with(tensor_shape.scalar()) 2287 2288 # Do nothing if we know keep_prob == 1 2289 if tensor_util.constant_value(keep_prob) == 1: 2290 return x 2291 2292 noise_shape = _get_noise_shape(x, noise_shape) 2293 2294 # uniform [keep_prob, 1.0 + keep_prob) 2295 random_tensor = keep_prob 2296 random_tensor += random_ops.random_uniform( 2297 noise_shape, seed=seed, dtype=x.dtype) 2298 # 0. if [keep_prob, 1.0) and 1. if [1.0, 1.0 + keep_prob) 2299 binary_tensor = math_ops.floor(random_tensor) 2300 ret = math_ops.div(x, keep_prob) * binary_tensor 2301 if context.in_graph_mode(): 2302 ret.set_shape(x.get_shape()) 2303 return ret 2304 2305 2306 @tf_export("nn.top_k") 2307 def top_k(input, k=1, sorted=True, name=None): # pylint: disable=redefined-builtin 2308 """Finds values and indices of the `k` largest entries for the last dimension. 2309 2310 If the input is a vector (rank=1), finds the `k` largest entries in the vector 2311 and outputs their values and indices as vectors. Thus `values[j]` is the 2312 `j`-th largest entry in `input`, and its index is `indices[j]`. 2313 2314 For matrices (resp. higher rank input), computes the top `k` entries in each 2315 row (resp. vector along the last dimension). Thus, 2316 2317 values.shape = indices.shape = input.shape[:-1] + [k] 2318 2319 If two elements are equal, the lower-index element appears first. 2320 2321 Args: 2322 input: 1-D or higher `Tensor` with last dimension at least `k`. 2323 k: 0-D `int32` `Tensor`. Number of top elements to look for along the last 2324 dimension (along each row for matrices). 2325 sorted: If true the resulting `k` elements will be sorted by the values in 2326 descending order. 2327 name: Optional name for the operation. 2328 2329 Returns: 2330 values: The `k` largest elements along each last dimensional slice. 2331 indices: The indices of `values` within the last dimension of `input`. 2332 """ 2333 return gen_nn_ops._top_kv2(input, k=k, sorted=sorted, name=name) 2334 2335 2336 def nth_element(input, n, reverse=False, name=None): # pylint: disable=redefined-builtin 2337 r"""Finds values of the `n`-th order statistic for the last dmension. 2338 2339 If the input is a vector (rank-1), finds the entries which is the nth-smallest 2340 value in the vector and outputs their values as scalar tensor. 2341 2342 For matrices (resp. higher rank input), computes the entries which is the 2343 nth-smallest value in each row (resp. vector along the last dimension). Thus, 2344 2345 values.shape = input.shape[:-1] 2346 2347 Args: 2348 input: 1-D or higher `Tensor` with last dimension at least `n+1`. 2349 n: A `Tensor` of type `int32`. 2350 0-D. Position of sorted vector to select along the last dimension (along 2351 each row for matrices). Valid range of n is `[0, input.shape[:-1])` 2352 reverse: An optional `bool`. Defaults to `False`. 2353 When set to True, find the nth-largest value in the vector and vice 2354 versa. 2355 name: A name for the operation (optional). 2356 2357 Returns: 2358 A `Tensor`. Has the same type as `input`. 2359 The `n`-th order statistic along each last dimensional slice. 2360 """ 2361 return gen_nn_ops.nth_element(input, n, reverse=reverse, name=name) 2362 2363 2364 @tf_export("nn.conv1d") 2365 @deprecation.deprecated_arg_values( 2366 None, 2367 "`NCHW` for data_format is deprecated, use `NCW` instead", 2368 warn_once=True, 2369 data_format="NCHW") 2370 @deprecation.deprecated_arg_values( 2371 None, 2372 "`NHWC` for data_format is deprecated, use `NWC` instead", 2373 warn_once=True, 2374 data_format="NHWC") 2375 def conv1d(value, 2376 filters, 2377 stride, 2378 padding, 2379 use_cudnn_on_gpu=None, 2380 data_format=None, 2381 name=None): 2382 r"""Computes a 1-D convolution given 3-D input and filter tensors. 2383 2384 Given an input tensor of shape 2385 [batch, in_width, in_channels] 2386 if data_format is "NWC", or 2387 [batch, in_channels, in_width] 2388 if data_format is "NCW", 2389 and a filter / kernel tensor of shape 2390 [filter_width, in_channels, out_channels], this op reshapes 2391 the arguments to pass them to conv2d to perform the equivalent 2392 convolution operation. 2393 2394 Internally, this op reshapes the input tensors and invokes `tf.nn.conv2d`. 2395 For example, if `data_format` does not start with "NC", a tensor of shape 2396 [batch, in_width, in_channels] 2397 is reshaped to 2398 [batch, 1, in_width, in_channels], 2399 and the filter is reshaped to 2400 [1, filter_width, in_channels, out_channels]. 2401 The result is then reshaped back to 2402 [batch, out_width, out_channels] 2403 \(where out_width is a function of the stride and padding as in conv2d\) and 2404 returned to the caller. 2405 2406 Args: 2407 value: A 3D `Tensor`. Must be of type `float16` or `float32`. 2408 filters: A 3D `Tensor`. Must have the same type as `value`. 2409 stride: An `integer`. The number of entries by which 2410 the filter is moved right at each step. 2411 padding: 'SAME' or 'VALID' 2412 use_cudnn_on_gpu: An optional `bool`. Defaults to `True`. 2413 data_format: An optional `string` from `"NWC", "NCW"`. Defaults 2414 to `"NWC"`, the data is stored in the order of 2415 [batch, in_width, in_channels]. The `"NCW"` format stores 2416 data as [batch, in_channels, in_width]. 2417 name: A name for the operation (optional). 2418 2419 Returns: 2420 A `Tensor`. Has the same type as input. 2421 2422 Raises: 2423 ValueError: if `data_format` is invalid. 2424 """ 2425 with ops.name_scope(name, "conv1d", [value, filters]) as name: 2426 # Reshape the input tensor to [batch, 1, in_width, in_channels] 2427 if data_format is None or data_format == "NHWC" or data_format == "NWC": 2428 data_format = "NHWC" 2429 spatial_start_dim = 1 2430 strides = [1, 1, stride, 1] 2431 elif data_format == "NCHW" or data_format == "NCW": 2432 data_format = "NCHW" 2433 spatial_start_dim = 2 2434 strides = [1, 1, 1, stride] 2435 else: 2436 raise ValueError("data_format must be \"NWC\" or \"NCW\".") 2437 value = array_ops.expand_dims(value, spatial_start_dim) 2438 filters = array_ops.expand_dims(filters, 0) 2439 result = gen_nn_ops.conv2d( 2440 value, 2441 filters, 2442 strides, 2443 padding, 2444 use_cudnn_on_gpu=use_cudnn_on_gpu, 2445 data_format=data_format) 2446 return array_ops.squeeze(result, [spatial_start_dim]) 2447 2448 2449 def conv1d_transpose( 2450 value, 2451 filter, # pylint: disable=redefined-builtin 2452 output_shape, 2453 stride, 2454 padding="SAME", 2455 data_format="NWC", 2456 name=None): 2457 """The transpose of `conv1d`. 2458 2459 This operation is sometimes called "deconvolution" after [Deconvolutional 2460 Networks](http://www.matthewzeiler.com/pubs/cvpr2010/cvpr2010.pdf), but is 2461 actually the transpose (gradient) of `conv1d` rather than an actual 2462 deconvolution. 2463 2464 Args: 2465 value: A 3-D `Tensor` of type `float` and shape 2466 `[batch, in_width, in_channels]` for `NWC` data format or 2467 `[batch, in_channels, in_width]` for `NCW` data format. 2468 filter: A 3-D `Tensor` with the same type as `value` and shape 2469 `[filter_width, output_channels, in_channels]`. `filter`'s 2470 `in_channels` dimension must match that of `value`. 2471 output_shape: A 1-D `Tensor` representing the output shape of the 2472 deconvolution op. 2473 stride: An `integer`. The number of entries by which 2474 the filter is moved right at each step. 2475 padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm. 2476 See the @{tf.nn.convolution$comment here} 2477 data_format: A string. 'NHWC' and 'NCHW' are supported. 2478 name: Optional name for the returned tensor. 2479 2480 Returns: 2481 A `Tensor` with the same type as `value`. 2482 2483 Raises: 2484 ValueError: If input/output depth does not match `filter`'s shape, or if 2485 padding is other than `'VALID'` or `'SAME'`. 2486 """ 2487 with ops.name_scope(name, "conv1d_transpose", 2488 [value, filter, output_shape]) as name: 2489 output_shape_ = ops.convert_to_tensor(output_shape, name="output_shape") 2490 if not output_shape_.get_shape().is_compatible_with(tensor_shape.vector(3)): 2491 raise ValueError("output_shape must have shape (3,), got {}".format( 2492 output_shape_.get_shape())) 2493 2494 # The format could be either NWC or NCW, map to NHWC or NCHW 2495 if data_format is None or data_format == "NWC": 2496 data_format_2d = "NHWC" 2497 axis = 2 2498 elif data_format == "NCW": 2499 data_format_2d = "NCHW" 2500 axis = 1 2501 else: 2502 raise ValueError("data_format must be \"NWC\" or \"NCW\".") 2503 2504 if not value.get_shape()[axis].is_compatible_with(filter.get_shape()[2]): 2505 raise ValueError("input channels does not match filter's input channels, " 2506 "{} != {}".format(value.get_shape()[axis], 2507 filter.get_shape()[2])) 2508 2509 if isinstance(output_shape, (list, np.ndarray)): 2510 # output_shape's shape should be == [3] if reached this point. 2511 if not filter.get_shape()[1].is_compatible_with(output_shape[axis]): 2512 raise ValueError( 2513 "output_shape does not match filter's output channels, " 2514 "{} != {}".format(output_shape[axis], 2515 filter.get_shape()[1])) 2516 2517 if padding != "VALID" and padding != "SAME": 2518 raise ValueError("padding must be either VALID or SAME:" 2519 " {}".format(padding)) 2520 2521 # Reshape the input tensor to [batch, 1, in_width, in_channels] 2522 if data_format_2d == "NHWC": 2523 output_shape_ = array_ops.concat( 2524 [output_shape_[:1], [1], output_shape_[1:]], axis=0) 2525 spatial_start_dim = 1 2526 strides = [1, 1, stride, 1] 2527 else: 2528 output_shape_ = array_ops.concat( 2529 [output_shape_[:2], [1], output_shape_[2:]], axis=0) 2530 spatial_start_dim = 2 2531 strides = [1, 1, 1, stride] 2532 value = array_ops.expand_dims(value, spatial_start_dim) 2533 filter = array_ops.expand_dims(filter, 0) # pylint: disable=redefined-builtin 2534 2535 result = gen_nn_ops.conv2d_backprop_input( 2536 input_sizes=output_shape_, 2537 filter=filter, 2538 out_backprop=value, 2539 strides=strides, 2540 padding=padding, 2541 data_format=data_format_2d, 2542 name=name) 2543 return array_ops.squeeze(result, [spatial_start_dim]) 2544 2545 2546 @ops.RegisterStatistics("Dilation2D", "flops") 2547 def _calc_dilation2d_flops(graph, node): 2548 """Calculates the compute resources needed for Dilation2D.""" 2549 input_shape = graph_util.tensor_shape_from_node_def_name(graph, node.input[0]) 2550 input_shape.assert_is_fully_defined() 2551 filter_shape = graph_util.tensor_shape_from_node_def_name( 2552 graph, node.input[1]) 2553 filter_shape.assert_is_fully_defined() 2554 output_shape = graph_util.tensor_shape_from_node_def_name(graph, node.name) 2555 output_shape.assert_is_fully_defined() 2556 filter_height = int(filter_shape[0]) 2557 filter_width = int(filter_shape[1]) 2558 output_count = np.prod(output_shape.as_list()) 2559 return ops.OpStats("flops", (output_count * filter_height * filter_width * 2)) 2560 2561 2562 @tf_export("nn.erosion2d") 2563 def erosion2d(value, kernel, strides, rates, padding, name=None): 2564 """Computes the grayscale erosion of 4-D `value` and 3-D `kernel` tensors. 2565 2566 The `value` tensor has shape `[batch, in_height, in_width, depth]` and the 2567 `kernel` tensor has shape `[kernel_height, kernel_width, depth]`, i.e., 2568 each input channel is processed independently of the others with its own 2569 structuring function. The `output` tensor has shape 2570 `[batch, out_height, out_width, depth]`. The spatial dimensions of the 2571 output tensor depend on the `padding` algorithm. We currently only support the 2572 default "NHWC" `data_format`. 2573 2574 In detail, the grayscale morphological 2-D erosion is given by: 2575 2576 output[b, y, x, c] = 2577 min_{dy, dx} value[b, 2578 strides[1] * y - rates[1] * dy, 2579 strides[2] * x - rates[2] * dx, 2580 c] - 2581 kernel[dy, dx, c] 2582 2583 Duality: The erosion of `value` by the `kernel` is equal to the negation of 2584 the dilation of `-value` by the reflected `kernel`. 2585 2586 Args: 2587 value: A `Tensor`. 4-D with shape `[batch, in_height, in_width, depth]`. 2588 kernel: A `Tensor`. Must have the same type as `value`. 2589 3-D with shape `[kernel_height, kernel_width, depth]`. 2590 strides: A list of `ints` that has length `>= 4`. 2591 1-D of length 4. The stride of the sliding window for each dimension of 2592 the input tensor. Must be: `[1, stride_height, stride_width, 1]`. 2593 rates: A list of `ints` that has length `>= 4`. 2594 1-D of length 4. The input stride for atrous morphological dilation. 2595 Must be: `[1, rate_height, rate_width, 1]`. 2596 padding: A `string` from: `"SAME", "VALID"`. 2597 The type of padding algorithm to use. 2598 name: A name for the operation (optional). If not specified "erosion2d" 2599 is used. 2600 2601 Returns: 2602 A `Tensor`. Has the same type as `value`. 2603 4-D with shape `[batch, out_height, out_width, depth]`. 2604 2605 Raises: 2606 ValueError: If the `value` depth does not match `kernel`' shape, or if 2607 padding is other than `'VALID'` or `'SAME'`. 2608 """ 2609 with ops.name_scope(name, "erosion2d", [value, kernel]) as name: 2610 # Reduce erosion to dilation by duality. 2611 return math_ops.negative( 2612 gen_nn_ops.dilation2d( 2613 input=math_ops.negative(value), 2614 filter=array_ops.reverse_v2(kernel, [0, 1]), 2615 strides=strides, 2616 rates=rates, 2617 padding=padding, 2618 name=name)) 2619 2620 2621 @tf_export("nn.in_top_k") 2622 def in_top_k(predictions, targets, k, name=None): 2623 r"""Says whether the targets are in the top `K` predictions. 2624 2625 This outputs a `batch_size` bool array, an entry `out[i]` is `true` if the 2626 prediction for the target class is among the top `k` predictions among 2627 all predictions for example `i`. Note that the behavior of `InTopK` differs 2628 from the `TopK` op in its handling of ties; if multiple classes have the 2629 same prediction value and straddle the top-`k` boundary, all of those 2630 classes are considered to be in the top `k`. 2631 2632 More formally, let 2633 2634 \\(predictions_i\\) be the predictions for all classes for example `i`, 2635 \\(targets_i\\) be the target class for example `i`, 2636 \\(out_i\\) be the output for example `i`, 2637 2638 $$out_i = predictions_{i, targets_i} \in TopKIncludingTies(predictions_i)$$ 2639 2640 Args: 2641 predictions: A `Tensor` of type `float32`. 2642 A `batch_size` x `classes` tensor. 2643 targets: A `Tensor`. Must be one of the following types: `int32`, `int64`. 2644 A `batch_size` vector of class ids. 2645 k: An `int`. Number of top elements to look at for computing precision. 2646 name: A name for the operation (optional). 2647 2648 Returns: 2649 A `Tensor` of type `bool`. Computed Precision at `k` as a `bool Tensor`. 2650 """ 2651 with ops.name_scope(name, "in_top_k"): 2652 return gen_nn_ops._in_top_kv2(predictions, targets, k, name=name) 2653