Home | History | Annotate | Download | only in kernels
      1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 // See docs in ../ops/image_ops.cc
     17 
     18 #include <memory>
     19 #include "tensorflow/core/framework/op_kernel.h"
     20 #include "tensorflow/core/framework/register_types.h"
     21 #include "tensorflow/core/framework/tensor.h"
     22 #include "tensorflow/core/framework/tensor_shape.h"
     23 #include "tensorflow/core/framework/types.h"
     24 #include "tensorflow/core/framework/types.pb.h"
     25 #include "tensorflow/core/kernels/bounds_check.h"
     26 #include "tensorflow/core/lib/core/status.h"
     27 #include "tensorflow/core/platform/logging.h"
     28 
     29 namespace tensorflow {
     30 
     31 // Decode the contents of a BMP file
     32 class DecodeBmpOp : public OpKernel {
     33  public:
     34   explicit DecodeBmpOp(OpKernelConstruction* context) : OpKernel(context) {
     35     OP_REQUIRES_OK(context, context->GetAttr("channels", &channels_));
     36     OP_REQUIRES(
     37         context,
     38         channels_ == 0 || channels_ == 1 || channels_ == 3 || channels_ == 4,
     39         errors::InvalidArgument("channels must be 0, 1, 3 or 4, got ",
     40                                 channels_));
     41   }
     42   inline int32 ByteSwapInt32ForBigEndian(int32 x) {
     43 #if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
     44     return le32toh(x);
     45 #else
     46     return x;
     47 #endif
     48   }
     49 
     50   void Compute(OpKernelContext* context) override {
     51     const Tensor& contents = context->input(0);
     52     OP_REQUIRES(context, TensorShapeUtils::IsScalar(contents.shape()),
     53                 errors::InvalidArgument("contents must be scalar, got shape ",
     54                                         contents.shape().DebugString()));
     55 
     56     // Start decoding image to get shape details
     57     const StringPiece input = contents.scalar<string>()();
     58 
     59     OP_REQUIRES(context, (32 <= input.size()),
     60                 errors::InvalidArgument("Incomplete bmp content, requires at "
     61                                         "least 32 bytes to find the header "
     62                                         "size, width, height, and bpp, got ",
     63                                         input.size(), " bytes"));
     64 
     65     const uint8* img_bytes = reinterpret_cast<const uint8*>(input.data());
     66     int32 header_size_ = internal::SubtleMustCopy(
     67         *(reinterpret_cast<const int32*>(img_bytes + 10)));
     68     const int32 header_size = ByteSwapInt32ForBigEndian(header_size_);
     69     int32 width_ = internal::SubtleMustCopy(
     70         *(reinterpret_cast<const int32*>(img_bytes + 18)));
     71     const int32 width = ByteSwapInt32ForBigEndian(width_);
     72     int32 height_ = internal::SubtleMustCopy(
     73         *(reinterpret_cast<const int32*>(img_bytes + 22)));
     74     const int32 height = ByteSwapInt32ForBigEndian(height_);
     75     int32 bpp_ = internal::SubtleMustCopy(
     76         *(reinterpret_cast<const int32*>(img_bytes + 28)));
     77     const int32 bpp = ByteSwapInt32ForBigEndian(bpp_);
     78 
     79     if (channels_) {
     80       OP_REQUIRES(context, (channels_ == bpp / 8),
     81                   errors::InvalidArgument(
     82                       "channels attribute ", channels_,
     83                       " does not match bits per pixel from file ", bpp / 8));
     84     } else {
     85       channels_ = bpp / 8;
     86     }
     87 
     88     // Current implementation only supports 1, 3 or 4 channel
     89     // bitmaps.
     90     OP_REQUIRES(context, (channels_ == 1 || channels_ == 3 || channels_ == 4),
     91                 errors::InvalidArgument(
     92                     "Number of channels must be 1, 3 or 4, was ", channels_));
     93 
     94     OP_REQUIRES(context, width > 0 && header_size >= 0,
     95                 errors::InvalidArgument("Width must be positive"));
     96     OP_REQUIRES(context, header_size >= 0,
     97                 errors::InvalidArgument("header size must be nonnegative"));
     98 
     99     // The real requirement is < 2^31 minus some headers and channel data,
    100     // so rounding down to something that's still ridiculously big.
    101     OP_REQUIRES(
    102         context,
    103         (static_cast<int64>(width) * std::abs(static_cast<int64>(height))) <
    104             static_cast<int64>(std::numeric_limits<int32_t>::max() / 8),
    105         errors::InvalidArgument(
    106             "Total possible pixel bytes must be less than 2^30"));
    107 
    108     const int32 abs_height = abs(height);
    109 
    110     // there may be padding bytes when the width is not a multiple of 4 bytes
    111     // 8 * channels == bits per pixel
    112     const int row_size = (8 * channels_ * width + 31) / 32 * 4;
    113 
    114     const int64 last_pixel_offset = static_cast<int64>(header_size) +
    115                                     (abs_height - 1) * row_size +
    116                                     (width - 1) * channels_;
    117 
    118     // [expected file size] = [last pixel offset] + [last pixel size=channels]
    119     const int64 expected_file_size = last_pixel_offset + channels_;
    120 
    121     OP_REQUIRES(
    122         context, (expected_file_size <= input.size()),
    123         errors::InvalidArgument("Incomplete bmp content, requires at least ",
    124                                 expected_file_size, " bytes, got ",
    125                                 input.size(), " bytes"));
    126 
    127     // if height is negative, data layout is top down
    128     // otherwise, it's bottom up
    129     bool top_down = (height < 0);
    130 
    131     // Decode image, allocating tensor once the image size is known
    132     Tensor* output = nullptr;
    133     OP_REQUIRES_OK(
    134         context, context->allocate_output(
    135                      0, TensorShape({abs_height, width, channels_}), &output));
    136 
    137     const uint8* bmp_pixels = &img_bytes[header_size];
    138 
    139     Decode(bmp_pixels, row_size, output->flat<uint8>().data(), width,
    140            abs_height, channels_, top_down);
    141   }
    142 
    143   uint8* Decode(const uint8* input, const int row_size, uint8* const output,
    144                 const int width, const int height, const int channles,
    145                 bool top_down);
    146 
    147  private:
    148   int channels_;
    149 };
    150 REGISTER_KERNEL_BUILDER(Name("DecodeBmp").Device(DEVICE_CPU), DecodeBmpOp);
    151 
    152 uint8* DecodeBmpOp::Decode(const uint8* input, const int row_size,
    153                            uint8* const output, const int width,
    154                            const int height, const int channels,
    155                            bool top_down) {
    156   for (int i = 0; i < height; i++) {
    157     int src_pos;
    158     int dst_pos;
    159 
    160     for (int j = 0; j < width; j++) {
    161       if (!top_down) {
    162         src_pos = ((height - 1 - i) * row_size) + j * channels;
    163       } else {
    164         src_pos = i * row_size + j * channels;
    165       }
    166 
    167       dst_pos = (i * width + j) * channels;
    168 
    169       switch (channels) {
    170         case 1:
    171           output[dst_pos] = input[src_pos];
    172           break;
    173         case 3:
    174           // BGR -> RGB
    175           output[dst_pos] = input[src_pos + 2];
    176           output[dst_pos + 1] = input[src_pos + 1];
    177           output[dst_pos + 2] = input[src_pos];
    178           break;
    179         case 4:
    180           // BGRA -> RGBA
    181           output[dst_pos] = input[src_pos + 2];
    182           output[dst_pos + 1] = input[src_pos + 1];
    183           output[dst_pos + 2] = input[src_pos];
    184           output[dst_pos + 3] = input[src_pos + 3];
    185           break;
    186         default:
    187           LOG(FATAL) << "Unexpected number of channels: " << channels;
    188           break;
    189       }
    190     }
    191   }
    192 
    193   return output;
    194 }
    195 
    196 }  // namespace tensorflow
    197