Home | History | Annotate | Download | only in client
      1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 #include "tensorflow/compiler/xla/client/padding.h"
     17 
     18 #include <algorithm>
     19 
     20 #include "tensorflow/compiler/xla/util.h"
     21 #include "tensorflow/core/lib/math/math_util.h"
     22 #include "tensorflow/core/platform/logging.h"
     23 
     24 namespace xla {
     25 
     26 Status ValidatePaddingValues(
     27     tensorflow::gtl::ArraySlice<int64> input_dimensions,
     28     tensorflow::gtl::ArraySlice<int64> window_dimensions,
     29     tensorflow::gtl::ArraySlice<int64> window_strides) {
     30   bool ok = input_dimensions.size() == window_dimensions.size() &&
     31             input_dimensions.size() == window_strides.size();
     32   if (!ok) {
     33     return InvalidArgument(
     34         "Want input dimensions size %zu = window dimensions size %zu = window "
     35         "strides size %zu",
     36         input_dimensions.size(), window_dimensions.size(),
     37         window_strides.size());
     38   }
     39   return Status::OK();
     40 }
     41 
     42 std::vector<std::pair<int64, int64>> MakePadding(
     43     tensorflow::gtl::ArraySlice<int64> input_dimensions,
     44     tensorflow::gtl::ArraySlice<int64> window_dimensions,
     45     tensorflow::gtl::ArraySlice<int64> window_strides, Padding padding) {
     46   TF_CHECK_OK(ValidatePaddingValues(input_dimensions, window_dimensions,
     47                                     window_strides));
     48   std::vector<std::pair<int64, int64>> low_high_padding;
     49   switch (padding) {
     50     case Padding::kValid:
     51       low_high_padding.resize(window_dimensions.size(), {0, 0});
     52       return low_high_padding;
     53 
     54     case Padding::kSame:
     55       for (size_t i = 0; i < input_dimensions.size(); ++i) {
     56         int64 input_dimension = input_dimensions[i];
     57         int64 window_dimension = window_dimensions[i];
     58         int64 window_stride = window_strides[i];
     59         // We follow the same convention as in Tensorflow, such that
     60         // output dimension := ceil(input_dimension / window_stride).
     61         // See tensorflow/tensorflow/python/ops/nn.py
     62         // for the reference. See also tensorflow/core/kernels/ops_util.cc
     63         // for the part where we avoid negative padding using max(0, x).
     64         //
     65         //
     66         // For an odd sized window dimension 2N+1 with stride 1, the middle
     67         // element is always inside the base area, so we can see it as N + 1 +
     68         // N elements. In the example below, we have a kernel of size
     69         // 2*3+1=7 so that the center element is 4 with 123 to the
     70         // left and 567 to the right.
     71         //
     72         //  base area:           ------------------------
     73         //  kernel at left:   1234567
     74         //  kernel at right:                         1234567
     75         //
     76         // We can see visually here that we need to pad the base area
     77         // by 3 on each side:
     78         //
     79         //  padded base area: 000------------------------000
     80         //
     81         // For an even number 2N, there are two options:
     82         //
     83         // *** Option A
     84         //
     85         // We view 2N as (N - 1) + 1 + N, so for N=3 we have 12 to the
     86         // left, 3 is the center and 456 is to the right, like this:
     87         //
     88         //  base area:           ------------------------
     89         //  kernel at left:    123456
     90         //  kernel at right:                          123456
     91         //  padded base area:  00------------------------000
     92         //
     93         // Note how we pad by one more to the right than to the left.
     94         //
     95         // *** Option B
     96         //
     97         // We view 2N as N + 1 + (N - 1), so for N=3 we have 123 to
     98         // the left, 4 is the center and 56 is to the right, like
     99         // this:
    100         //
    101         //  base area:           ------------------------
    102         //  kernel at left:   123456
    103         //  kernel at right:                         123456
    104         //  padded base area: 000------------------------00
    105         //
    106         // The choice here is arbitrary. We choose option A as this is
    107         // what DistBelief and Tensorflow do.
    108         //
    109         // When the stride is greater than 1, the output size is smaller than
    110         // the input base size. The base area is padded such that the last
    111         // window fully fits in the padded base area, and the padding amount is
    112         // evenly divided between the left and the right (or 1 more on the right
    113         // if odd size padding is required). The example below shows the
    114         // required padding when the base size is 10, the kernel size is 5, and
    115         // the stride is 3. In this example, the output size is 4.
    116         //
    117         // base area:           ----------
    118         // 1'st kernel:       12345
    119         // 2'nd kernel:          12345
    120         // 3'rd kernel:             12345
    121         // 4'th kernel:                12345
    122         // padded base area:  00----------00
    123         int64 output_dimension =
    124             tensorflow::MathUtil::CeilOfRatio(input_dimension, window_stride);
    125         int64 padding_size =
    126             std::max<int64>((output_dimension - 1) * window_stride +
    127                                 window_dimension - input_dimension,
    128                             0);
    129         low_high_padding.emplace_back(
    130             tensorflow::MathUtil::FloorOfRatio(padding_size, 2ll),
    131             tensorflow::MathUtil::CeilOfRatio(padding_size, 2ll));
    132       }
    133       break;
    134   }
    135 
    136   return low_high_padding;
    137 }
    138 
    139 }  // namespace xla
    140