Home | History | Annotate | Download | only in ffmpeg
      1 // Copyright 2016 The TensorFlow Authors. All Rights Reserved.
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //     http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 // =============================================================================
     15 
     16 #include <stdlib.h>
     17 
     18 #include <cstdio>
     19 #include <set>
     20 
     21 #include "tensorflow/contrib/ffmpeg/ffmpeg_lib.h"
     22 #include "tensorflow/core/framework/op.h"
     23 #include "tensorflow/core/framework/op_kernel.h"
     24 #include "tensorflow/core/framework/shape_inference.h"
     25 #include "tensorflow/core/lib/io/path.h"
     26 #include "tensorflow/core/lib/strings/str_util.h"
     27 #include "tensorflow/core/lib/strings/strcat.h"
     28 #include "tensorflow/core/platform/env.h"
     29 #include "tensorflow/core/platform/logging.h"
     30 
     31 namespace tensorflow {
     32 namespace ffmpeg {
     33 
     34 class DecodeVideoOp : public OpKernel {
     35  public:
     36   explicit DecodeVideoOp(OpKernelConstruction* context) : OpKernel(context) {}
     37 
     38   void Compute(OpKernelContext* context) override {
     39     OP_REQUIRES(
     40         context, context->num_inputs() == 1,
     41         errors::InvalidArgument("DecodeVideo requires exactly 1 input."));
     42     const Tensor& contents_tensor = context->input(0);
     43 
     44     OP_REQUIRES(context, TensorShapeUtils::IsScalar(contents_tensor.shape()),
     45                 errors::InvalidArgument(
     46                     "contents must be a rank-0 tensor but got shape ",
     47                     contents_tensor.shape().DebugString()));
     48     const tensorflow::StringPiece contents = contents_tensor.scalar<string>()();
     49 
     50     // Write the input data to a temp file.
     51     string extension;
     52     const string temp_filename = io::GetTempFilename(extension);
     53     OP_REQUIRES_OK(context, WriteFile(temp_filename, contents));
     54     FileDeleter deleter(temp_filename);
     55 
     56     uint32 width = 0;
     57     uint32 height = 0;
     58     uint32 frames = 0;
     59 
     60     // Run FFmpeg on the data and verify results.
     61     std::vector<uint8> output_data;
     62     const Status result = ffmpeg::ReadVideoFile(temp_filename, &output_data,
     63                                                 &width, &height, &frames);
     64     if (result.code() == error::Code::NOT_FOUND) {
     65       OP_REQUIRES(
     66           context, result.ok(),
     67           errors::Unavailable("FFmpeg must be installed to run this op. FFmpeg "
     68                               "can be found at http://www.ffmpeg.org."));
     69     } else if (result.code() == error::UNKNOWN) {
     70       LOG(ERROR) << "Ffmpeg failed with error '" << result.error_message()
     71                  << "'. Returning empty tensor.";
     72       Tensor* output = nullptr;
     73       OP_REQUIRES_OK(context,
     74                      context->allocate_output(0, TensorShape({0, 0}), &output));
     75       return;
     76     } else {
     77       OP_REQUIRES_OK(context, result);
     78     }
     79     OP_REQUIRES(context, !output_data.empty(),
     80                 errors::Unknown("No output created by FFmpeg."));
     81     OP_REQUIRES(
     82         context, output_data.size() == (frames * height * width * 3),
     83         errors::Unknown("Output created by FFmpeg [", output_data.size(),
     84                         "] does not match description [", frames, ", ", height,
     85                         ", ", width, ", 3]"));
     86     Tensor* output = nullptr;
     87     OP_REQUIRES_OK(context,
     88                    context->allocate_output(
     89                        0, TensorShape({frames, height, width, 3}), &output));
     90     auto output_flat = output->flat<uint8>();
     91     std::copy_n(output_data.begin(), output_data.size(), &output_flat(0));
     92   }
     93 };
     94 
     95 REGISTER_KERNEL_BUILDER(Name("DecodeVideo").Device(DEVICE_CPU), DecodeVideoOp);
     96 
     97 REGISTER_OP("DecodeVideo")
     98     .Input("contents: string")
     99     .Output("output: uint8")
    100     .SetShapeFn([](shape_inference::InferenceContext* c) {
    101       c->set_output(0, c->UnknownShapeOfRank(4));
    102       return Status::OK();
    103     })
    104     .Doc(R"doc(
    105 Processes the contents of an video file into a tensor using FFmpeg to decode
    106 the file.
    107 
    108 contents: The binary contents of the video file to decode. This is a
    109     scalar.
    110 output: A rank-4 `Tensor` that has `[frames, height, width, 3]` RGB as output.
    111 )doc");
    112 
    113 }  // namespace ffmpeg
    114 }  // namespace tensorflow
    115