1 // Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // ============================================================================= 15 16 #include <stdlib.h> 17 18 #include <cstdio> 19 #include <set> 20 21 #include "tensorflow/contrib/ffmpeg/ffmpeg_lib.h" 22 #include "tensorflow/core/framework/op.h" 23 #include "tensorflow/core/framework/op_kernel.h" 24 #include "tensorflow/core/framework/shape_inference.h" 25 #include "tensorflow/core/lib/io/path.h" 26 #include "tensorflow/core/lib/strings/str_util.h" 27 #include "tensorflow/core/lib/strings/strcat.h" 28 #include "tensorflow/core/platform/env.h" 29 #include "tensorflow/core/platform/logging.h" 30 31 namespace tensorflow { 32 namespace ffmpeg { 33 34 class DecodeVideoOp : public OpKernel { 35 public: 36 explicit DecodeVideoOp(OpKernelConstruction* context) : OpKernel(context) {} 37 38 void Compute(OpKernelContext* context) override { 39 OP_REQUIRES( 40 context, context->num_inputs() == 1, 41 errors::InvalidArgument("DecodeVideo requires exactly 1 input.")); 42 const Tensor& contents_tensor = context->input(0); 43 44 OP_REQUIRES(context, TensorShapeUtils::IsScalar(contents_tensor.shape()), 45 errors::InvalidArgument( 46 "contents must be a rank-0 tensor but got shape ", 47 contents_tensor.shape().DebugString())); 48 const tensorflow::StringPiece contents = contents_tensor.scalar<string>()(); 49 50 // Write the input data to a temp file. 51 string extension; 52 const string temp_filename = io::GetTempFilename(extension); 53 OP_REQUIRES_OK(context, WriteFile(temp_filename, contents)); 54 FileDeleter deleter(temp_filename); 55 56 uint32 width = 0; 57 uint32 height = 0; 58 uint32 frames = 0; 59 60 // Run FFmpeg on the data and verify results. 61 std::vector<uint8> output_data; 62 const Status result = ffmpeg::ReadVideoFile(temp_filename, &output_data, 63 &width, &height, &frames); 64 if (result.code() == error::Code::NOT_FOUND) { 65 OP_REQUIRES( 66 context, result.ok(), 67 errors::Unavailable("FFmpeg must be installed to run this op. FFmpeg " 68 "can be found at http://www.ffmpeg.org.")); 69 } else if (result.code() == error::UNKNOWN) { 70 LOG(ERROR) << "Ffmpeg failed with error '" << result.error_message() 71 << "'. Returning empty tensor."; 72 Tensor* output = nullptr; 73 OP_REQUIRES_OK(context, 74 context->allocate_output(0, TensorShape({0, 0}), &output)); 75 return; 76 } else { 77 OP_REQUIRES_OK(context, result); 78 } 79 OP_REQUIRES(context, !output_data.empty(), 80 errors::Unknown("No output created by FFmpeg.")); 81 OP_REQUIRES( 82 context, output_data.size() == (frames * height * width * 3), 83 errors::Unknown("Output created by FFmpeg [", output_data.size(), 84 "] does not match description [", frames, ", ", height, 85 ", ", width, ", 3]")); 86 Tensor* output = nullptr; 87 OP_REQUIRES_OK(context, 88 context->allocate_output( 89 0, TensorShape({frames, height, width, 3}), &output)); 90 auto output_flat = output->flat<uint8>(); 91 std::copy_n(output_data.begin(), output_data.size(), &output_flat(0)); 92 } 93 }; 94 95 REGISTER_KERNEL_BUILDER(Name("DecodeVideo").Device(DEVICE_CPU), DecodeVideoOp); 96 97 REGISTER_OP("DecodeVideo") 98 .Input("contents: string") 99 .Output("output: uint8") 100 .SetShapeFn([](shape_inference::InferenceContext* c) { 101 c->set_output(0, c->UnknownShapeOfRank(4)); 102 return Status::OK(); 103 }) 104 .Doc(R"doc( 105 Processes the contents of an video file into a tensor using FFmpeg to decode 106 the file. 107 108 contents: The binary contents of the video file to decode. This is a 109 scalar. 110 output: A rank-4 `Tensor` that has `[frames, height, width, 3]` RGB as output. 111 )doc"); 112 113 } // namespace ffmpeg 114 } // namespace tensorflow 115