1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 // This is a helper struct to package up the input and output 17 // parameters of an image resizer (the height, widths, etc.). To 18 // reduce code duplication and ensure consistency across the different 19 // resizers, it performs the input validation. 20 21 #ifndef TENSORFLOW_CORE_KERNELS_IMAGE_RESIZER_STATE_H_ 22 #define TENSORFLOW_CORE_KERNELS_IMAGE_RESIZER_STATE_H_ 23 24 #define EIGEN_USE_THREADS 25 26 #include <math.h> 27 #include <algorithm> 28 #include <array> 29 30 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" 31 #include "tensorflow/core/framework/bounds_check.h" 32 #include "tensorflow/core/framework/op_kernel.h" 33 #include "tensorflow/core/framework/register_types.h" 34 #include "tensorflow/core/framework/tensor.h" 35 #include "tensorflow/core/framework/tensor_shape.h" 36 #include "tensorflow/core/framework/types.h" 37 38 namespace tensorflow { 39 40 // CalculateResizeScale determines the float scaling factor. 41 inline float CalculateResizeScale(int64 in_size, int64 out_size, 42 bool align_corners) { 43 return (align_corners && out_size > 1) 44 ? (in_size - 1) / static_cast<float>(out_size - 1) 45 : in_size / static_cast<float>(out_size); 46 } 47 48 // Half pixel scaler scales assuming that the pixel centers are at 0.5, i.e. the 49 // floating point coordinates of the top,left pixel is 0.5,0.5. 50 struct HalfPixelScaler { 51 inline float operator()(const int x, const float scale) const { 52 // Note that we subtract 0.5 from the return value, as the existing bilinear 53 // sampling code etc assumes pixels are in the old coordinate system. 54 return (static_cast<float>(x) + 0.5f) * scale - 0.5f; 55 } 56 }; 57 58 // Older incorrect scaling method that causes all resizes to have a slight 59 // translation leading to inconsistent results. For example, a flip then a 60 // resize gives different results then a resize then a flip. 61 struct LegacyScaler { 62 inline float operator()(const int x, const float scale) const { 63 return static_cast<float>(x) * scale; 64 } 65 }; 66 67 struct ImageResizerState { 68 explicit ImageResizerState(bool align_corners, bool half_pixel_centers) 69 : align_corners_(align_corners), 70 half_pixel_centers_(half_pixel_centers) {} 71 72 // ValidateAndCalculateOutputSize checks the bounds on the input tensors 73 // and requested size, sets up some of the resizing state such as the 74 // height_scale and width_scale, and calculates the output size. 75 // If any of these operations fails, it sets an error status in 76 // the context, which the caller must check. 77 void ValidateAndCalculateOutputSize(OpKernelContext* context, 78 const Tensor& input) { 79 OP_REQUIRES( 80 context, 81 !half_pixel_centers_ || (half_pixel_centers_ && !align_corners_), 82 errors::InvalidArgument("If half_pixel_centers is True, " 83 "align_corners must be False.")); 84 OP_REQUIRES(context, input.dims() == 4, 85 errors::InvalidArgument("input must be 4-dimensional", 86 input.shape().DebugString())); 87 const Tensor& shape_t = context->input(1); 88 OP_REQUIRES(context, shape_t.dims() == 1, 89 errors::InvalidArgument("shape_t must be 1-dimensional", 90 shape_t.shape().DebugString())); 91 OP_REQUIRES(context, shape_t.NumElements() == 2, 92 errors::InvalidArgument("shape_t must have two elements", 93 shape_t.shape().DebugString())); 94 auto Svec = shape_t.vec<int32>(); 95 batch_size = input.dim_size(0); 96 out_height = internal::SubtleMustCopy(Svec(0)); 97 out_width = internal::SubtleMustCopy(Svec(1)); 98 OP_REQUIRES( 99 context, 100 FastBoundsCheck(input.dim_size(1), std::numeric_limits<int32>::max()) && 101 FastBoundsCheck(input.dim_size(2), 102 std::numeric_limits<int32>::max()), 103 errors::InvalidArgument("input sizes must be between 0 and max int32")); 104 105 in_height = static_cast<int32>(input.dim_size(1)); 106 in_width = static_cast<int32>(input.dim_size(2)); 107 channels = input.dim_size(3); 108 OP_REQUIRES(context, out_height > 0 && out_width > 0, 109 errors::InvalidArgument("output dimensions must be positive")); 110 OP_REQUIRES( 111 context, channels > 0, 112 errors::InvalidArgument("image must have at least one channel")); 113 OP_REQUIRES( 114 context, input.dim_size(1) > 0 && input.dim_size(2) > 0, 115 errors::InvalidArgument("input image must be of non-zero size")); 116 height_scale = CalculateResizeScale(in_height, out_height, align_corners_); 117 width_scale = CalculateResizeScale(in_width, out_width, align_corners_); 118 119 // Guard against overflows 120 OP_REQUIRES(context, 121 ceilf((out_height - 1) * height_scale) <= 122 static_cast<float>(std::numeric_limits<int64>::max()), 123 errors::InvalidArgument( 124 "input image height scale would cause an overflow")); 125 OP_REQUIRES( 126 context, 127 ceilf((out_width - 1) * width_scale) <= static_cast<float>(INT_MAX), 128 errors::InvalidArgument( 129 "input image width scale would cause an overflow")); 130 } 131 132 // Calculates all the required variables, and allocates the output. 133 void ValidateAndCreateOutput(OpKernelContext* context, const Tensor& input) { 134 ValidateAndCalculateOutputSize(context, input); 135 if (!context->status().ok()) return; 136 OP_REQUIRES_OK(context, context->allocate_output( 137 0, 138 TensorShape({input.dim_size(0), out_height, 139 out_width, input.dim_size(3)}), 140 &output)); 141 } 142 143 int64 batch_size; 144 int64 out_height; 145 int64 out_width; 146 int64 in_height; 147 int64 in_width; 148 int64 channels; 149 float height_scale; 150 float width_scale; 151 Tensor* output = nullptr; 152 153 private: 154 bool align_corners_; 155 bool half_pixel_centers_; 156 }; 157 158 struct ImageResizerGradientState { 159 explicit ImageResizerGradientState(bool align_corners, 160 bool half_pixel_centers) 161 : align_corners_(align_corners), 162 half_pixel_centers_(half_pixel_centers) {} 163 164 void ValidateAndCreateOutput(OpKernelContext* context, const Tensor& input, 165 const Tensor& original_image) { 166 OP_REQUIRES( 167 context, 168 !half_pixel_centers_ || (half_pixel_centers_ && !align_corners_), 169 errors::InvalidArgument("If half_pixel_centers is True, " 170 "align_corners must be False.")); 171 172 OP_REQUIRES(context, input.dims() == 4, 173 errors::InvalidArgument("input_grad must be 4-dimensional", 174 input.shape().DebugString())); 175 // Resizers always produce float images, so input gradient must 176 // always be a float. 177 OP_REQUIRES(context, input.dtype() == DT_FLOAT, 178 errors::InvalidArgument("input_grad must be of type float", 179 DataTypeString(input.dtype()))); 180 181 OP_REQUIRES(context, original_image.dims() == 4, 182 errors::InvalidArgument("original_image must be 4-dimensional", 183 original_image.shape().DebugString())); 184 185 // Allocate output and initialize to zeros. 186 batch_size = input.dim_size(0); 187 channels = input.dim_size(3); 188 resized_height = input.dim_size(1); 189 resized_width = input.dim_size(2); 190 original_height = original_image.dim_size(1); 191 original_width = original_image.dim_size(2); 192 193 OP_REQUIRES( 194 context, 195 FastBoundsCheck(original_height, std::numeric_limits<int32>::max()) && 196 FastBoundsCheck(original_width, std::numeric_limits<int32>::max()), 197 errors::InvalidArgument( 198 "original sizes must be between 0 and max int32")); 199 200 height_scale = 201 CalculateResizeScale(original_height, resized_height, align_corners_); 202 width_scale = 203 CalculateResizeScale(original_width, resized_width, align_corners_); 204 output = nullptr; 205 OP_REQUIRES_OK(context, context->allocate_output( 206 0, 207 TensorShape({batch_size, original_height, 208 original_width, channels}), 209 &output)); 210 } 211 212 int64 batch_size; 213 int64 channels; 214 int64 resized_height; 215 int64 resized_width; 216 int64 original_height; 217 int64 original_width; 218 float height_scale; 219 float width_scale; 220 Tensor* output; 221 222 private: 223 bool align_corners_; 224 bool half_pixel_centers_; 225 }; 226 227 } // namespace tensorflow 228 229 #endif // TENSORFLOW_CORE_KERNELS_IMAGE_RESIZER_STATE_H_ 230