Home | History | Annotate | Download | only in default
      1 // Copyright 2016 The TensorFlow Authors. All Rights Reserved.
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //     http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 // =============================================================================
     15 
     16 #include "tensorflow/contrib/ffmpeg/ffmpeg_lib.h"
     17 
     18 #include <errno.h>
     19 #include <fcntl.h>
     20 #include <stdlib.h>
     21 #include <sys/stat.h>
     22 #include <sys/types.h>
     23 #include <sys/wait.h>
     24 #include <unistd.h>
     25 
     26 #include <vector>
     27 
     28 #include "tensorflow/core/lib/io/path.h"
     29 #include "tensorflow/core/lib/strings/numbers.h"
     30 #include "tensorflow/core/lib/strings/str_util.h"
     31 #include "tensorflow/core/platform/byte_order.h"
     32 #include "tensorflow/core/platform/env.h"
     33 
     34 using tensorflow::strings::StrCat;
     35 
     36 namespace tensorflow {
     37 namespace ffmpeg {
     38 namespace {
     39 
     40 const char kFfmpegExecutable[] = "ffmpeg";
     41 const int32 kDefaultProbeSize = 5000000;  // 5MB
     42 
     43 std::vector<string> FfmpegAudioCommandLine(const string& input_filename,
     44                                            const string& output_filename,
     45                                            const string& input_format_id,
     46                                            int32 samples_per_second,
     47                                            int32 channel_count,
     48                                            const string& stream) {
     49   std::vector<string> command({
     50       "-nostats",             // No additional progress display.
     51       "-nostdin",             // No interactive commands accepted.
     52       "-f", input_format_id,  // eg: "mp3"
     53       "-probesize", StrCat(kDefaultProbeSize), "-i", input_filename,
     54       "-loglevel", "error",   // Print errors only.
     55       "-hide_banner",         // Skip printing build options, version, etc.
     56       "-map_metadata", "-1",  // Copy global metadata from input to output.
     57       "-vn",                  // No video recording.
     58       "-ac:a:0", StrCat(channel_count), "-ar:a:0", StrCat(samples_per_second),
     59       // Output set (in several ways) to signed 16-bit little-endian ints.
     60       "-codec:a:0", "pcm_s16le", "-sample_fmt", "s16", "-f", "s16le",
     61       "-sn",  // No subtitle recording.
     62       "-y"    // Overwrite output file.
     63   });
     64   if (!stream.empty()) {
     65     command.emplace_back("-map");
     66     command.emplace_back(StrCat("0:", stream));
     67   }
     68   command.emplace_back(StrCat(output_filename));
     69 
     70   return command;
     71 }
     72 
     73 std::vector<string> FfmpegVideoCommandLine(const string& input_filename,
     74                                            const string& output_filename) {
     75   return {"-nostats",  // No additional progress display.
     76           "-nostdin",  // No interactive commands accepted.
     77           "-i", input_filename, "-f", "image2pipe", "-probesize",
     78           StrCat(kDefaultProbeSize), "-loglevel",
     79           // Info is needed to get the information about stream, etc.
     80           // It is generated to a separate file, not stdout/stderr.
     81           "info",
     82           "-hide_banner",  // Skip printing build options, version, etc.
     83           "-vcodec", "rawvideo", "-pix_fmt", "rgb24",
     84           "-y",  // Overwrite output file.
     85           StrCat(output_filename)};
     86 }
     87 
     88 // Is a named binary installed and executable by the current process?
     89 // Note that this is harder than it seems like it should be...
     90 bool IsBinaryInstalled(const string& binary_name) {
     91   string path = ::getenv("PATH");
     92   for (const string& dir : str_util::Split(path, ':')) {
     93     const string binary_path = io::JoinPath(dir, binary_name);
     94     char absolute_path[PATH_MAX + 1];
     95     if (::realpath(binary_path.c_str(), absolute_path) == nullptr) {
     96       continue;
     97     }
     98     struct stat statinfo;
     99     int result = ::stat(absolute_path, &statinfo);
    100     if (result < 0) {
    101       continue;
    102     }
    103     if (!S_ISREG(statinfo.st_mode)) {
    104       continue;
    105     }
    106 
    107     // Is the current user able to execute the file?
    108     if (statinfo.st_uid == ::geteuid() && statinfo.st_mode & S_IXUSR) {
    109       return true;
    110     }
    111     // Is the current group able to execute the file?
    112     if (statinfo.st_uid == ::getegid() && statinfo.st_mode & S_IXGRP) {
    113       return true;
    114     }
    115     // Is anyone able to execute the file?
    116     if (statinfo.st_mode & S_IXOTH) {
    117       return true;
    118     }
    119   }
    120   return false;
    121 }
    122 
    123 [[noreturn]] int ExecuteFfmpeg(const std::vector<string>& args) {
    124   std::vector<char*> args_chars;
    125   std::transform(args.begin(), args.end(), std::back_inserter(args_chars),
    126                  [](const string& s) { return const_cast<char*>(s.c_str()); });
    127   args_chars.push_back(nullptr);
    128   ::execvp(kFfmpegExecutable, args_chars.data());
    129   // exec only returns on error.
    130   const int error = errno;
    131   LOG(ERROR) << "FFmpeg could not be executed: " << strerror(error);
    132   ::_exit(error);
    133 }
    134 
    135 // Reads a PCM file using signed little endian 16-bit encoding (s16le).
    136 std::vector<float> ReadPcmFile(const string& filename) {
    137   string raw_data;
    138   TF_QCHECK_OK(ReadFileToString(Env::Default(), filename, &raw_data))
    139       << "Could not read FFmpeg output file: " << filename;
    140 
    141   std::vector<float> samples;
    142   const int32 sample_count = raw_data.size() / sizeof(int16);
    143   samples.reserve(sample_count);
    144 
    145   for (int32 i = 0; i < sample_count; ++i) {
    146     // Most of this is jumping through hoops in the standard to convert some
    147     // bits into the right format. I hope that an optimizing compiler will
    148     // remove almost all of this code.
    149     char raw[2] = {raw_data[i * 2], raw_data[i * 2 + 1]};
    150     if (!port::kLittleEndian) {
    151       std::swap(raw[0], raw[1]);
    152     }
    153     int16 host_order;
    154     ::memcpy(&host_order, raw, sizeof(host_order));
    155     const double normalized =
    156         static_cast<double>(host_order) / std::numeric_limits<int16>::max();
    157     samples.push_back(normalized);
    158   }
    159   return samples;
    160 }
    161 
    162 template <typename UInt>
    163 string LittleEndianData(UInt data) {
    164   static_assert(std::is_unsigned<UInt>::value, "UInt must be unsigned");
    165   string str;
    166   for (size_t i = 0; i < sizeof(UInt); ++i) {
    167     const unsigned char bits = static_cast<unsigned char>(data & 0xFFU);
    168     char ch;
    169     ::memcpy(&ch, &bits, sizeof(bits));
    170     str.push_back(ch);
    171     data >>= 8;
    172   }
    173   return str;
    174 }
    175 
    176 string LittleEndianDataInt(uint32 data) {
    177   return LittleEndianData<uint32>(data);
    178 }
    179 
    180 string LittleEndianDataShort(uint16 data) {
    181   return LittleEndianData<uint16>(data);
    182 }
    183 
    184 string WavHeader(int32 samples_per_second, int32 channel_count,
    185                  const std::vector<float>& samples) {
    186   string header = "RIFF";
    187   header += LittleEndianDataInt(36U + samples.size() * sizeof(int16));
    188   header += "WAVEfmt ";
    189   header += LittleEndianDataInt(16);
    190   header += LittleEndianDataShort(1);
    191   header += LittleEndianDataShort(channel_count);
    192   header += LittleEndianDataInt(samples_per_second);
    193   header +=
    194       LittleEndianDataInt(samples_per_second * channel_count * sizeof(int16));
    195   header += LittleEndianDataShort(channel_count * sizeof(int16));
    196   header += LittleEndianDataShort(16);
    197   header += "data";
    198   header += LittleEndianDataInt(samples.size() * sizeof(int16));
    199   CHECK_EQ(header.size(), 44);
    200   return header;
    201 }
    202 
    203 // Creates the contents of a .wav file using pcm_s16le format (signed 16 bit
    204 // little endian integers).
    205 string BuildWavFile(int32 samples_per_second, int32 channel_count,
    206                     const std::vector<float>& samples) {
    207   string data = WavHeader(samples_per_second, channel_count, samples);
    208   data.reserve(data.size() + samples.size() * sizeof(int16));
    209   for (float value : samples) {
    210     const int16 quantized =
    211         static_cast<int16>(value * std::numeric_limits<int16>::max());
    212     char raw[2];
    213     ::memcpy(raw, &quantized, sizeof(int16));
    214     if (!port::kLittleEndian) {
    215       std::swap(raw[0], raw[1]);
    216     }
    217     data.push_back(raw[0]);
    218     data.push_back(raw[1]);
    219   }
    220   return data;
    221 }
    222 
    223 Status ReadInfoFile(const string& filename, uint32* width, uint32* height,
    224                     uint32* frames) {
    225   string data;
    226   TF_QCHECK_OK(ReadFileToString(Env::Default(), filename, &data))
    227       << "Could not read FFmpeg file: " << filename;
    228   bool in_output = false;
    229   bool in_mapping = false;
    230   uint32 frames_value = 0;
    231   uint32 height_value = 0;
    232   uint32 width_value = 0;
    233   for (const string& line : str_util::Split(data, '\n')) {
    234     // Output starts with the first line of `Output #..`.
    235     // Further processing output region starts next line so we could continue
    236     // the loop.
    237     if (!in_output && line.find("Output #") == 0) {
    238       in_output = true;
    239       in_mapping = false;
    240       continue;
    241     }
    242     // Stream mapping starts with the first line of `Stream mapping`, it also
    243     // signals the end of Output section.
    244     // Further processing of stream mapping region starts next line so we could
    245     // continue the loop.
    246     if (!in_mapping && line.find("Stream mapping:") == 0) {
    247       in_output = false;
    248       in_mapping = true;
    249       continue;
    250     }
    251     if (in_output) {
    252       // We only look for the first stream in output `Stream #0`.
    253       // Once processed we will not further process output section.
    254       if (line.find("    Stream #") == 0) {
    255         size_t p = line.find(", rgb24, ", 24);
    256         if (p != std::string::npos) {
    257           string rgb24 = line.substr(p + 9, line.find(" ", p + 9));
    258           rgb24 = rgb24.substr(0, rgb24.find(","));
    259           // Strip anything after " ", in case the format is
    260           // `640x360 [SAR 1:1 DAR 16:9]`
    261           rgb24 = rgb24.substr(0, rgb24.find(" "));
    262           string rgb24_width = rgb24.substr(0, rgb24.find("x"));
    263           string rgb24_height = rgb24.substr(rgb24_width.length() + 1);
    264           if (strings::safe_strtou32(rgb24_width, &width_value) &&
    265               strings::safe_strtou32(rgb24_height, &height_value)) {
    266             in_output = false;
    267           }
    268         }
    269       }
    270       continue;
    271     }
    272     if (in_mapping) {
    273       // We only look for the first stream mapping to have the number of the
    274       // frames.
    275       // Once processed we will not further process stream mapping section.
    276       if (line.find("frame=") == 0) {
    277         // The format might be `frame=  166 ` or `frame=12488 `
    278         string number = line.substr(6);
    279         number = number.substr(number.find_first_not_of(" "));
    280         number = number.substr(0, number.find(" "));
    281         if (strings::safe_strtou32(number, &frames_value)) {
    282           in_mapping = false;
    283         }
    284       }
    285       continue;
    286     }
    287   }
    288   if (frames_value == 0 || height_value == 0 || width_value == 0) {
    289     return errors::Unknown("Not enough video info returned by FFmpeg [",
    290                            frames_value, ", ", height_value, ", ", width_value,
    291                            ", 3]");
    292   }
    293   *width = width_value;
    294   *height = height_value;
    295   *frames = frames_value;
    296   return Status::OK();
    297 }
    298 
    299 }  // namespace
    300 
    301 FileDeleter::~FileDeleter() {
    302   Env& env = *Env::Default();
    303   env.DeleteFile(filename_).IgnoreError();
    304 }
    305 
    306 Status WriteFile(const string& filename, StringPiece contents) {
    307   Env& env = *Env::Default();
    308   std::unique_ptr<WritableFile> file;
    309   TF_RETURN_IF_ERROR(env.NewWritableFile(filename, &file));
    310   TF_RETURN_IF_ERROR(file->Append(contents));
    311   TF_RETURN_IF_ERROR(file->Close());
    312   return Status::OK();
    313 }
    314 
    315 Status ReadAudioFile(const string& filename, const string& audio_format_id,
    316                      int32 samples_per_second, int32 channel_count,
    317                      const string& stream, std::vector<float>* output_samples) {
    318   // Create an argument list.
    319   string output_filename = io::GetTempFilename("raw");
    320   const std::vector<string> args =
    321       FfmpegAudioCommandLine(filename, output_filename, audio_format_id,
    322                              samples_per_second, channel_count, stream);
    323   // Unfortunately, it's impossible to differentiate an exec failure due to the
    324   // binary being missing and an error from the binary's execution. Therefore,
    325   // check to see if the binary *should* be available. If not, return an error
    326   // that will be converted into a helpful error message by the TensorFlow op.
    327   if (!IsBinaryInstalled(kFfmpegExecutable)) {
    328     return Status(error::Code::NOT_FOUND, StrCat("FFmpeg could not be found."));
    329   }
    330 
    331   // Execute ffmpeg and report errors.
    332   pid_t child_pid = ::fork();
    333   if (child_pid < 0) {
    334     return Status(error::Code::UNKNOWN,
    335                   StrCat("fork failed: ", strerror(errno)));
    336   }
    337   if (child_pid == 0) {
    338     ExecuteFfmpeg(args);
    339   } else {
    340     int status_code;
    341     ::waitpid(child_pid, &status_code, 0);
    342     if (status_code) {
    343       return Status(error::Code::UNKNOWN,
    344                     StrCat("FFmpeg execution failed: ", status_code));
    345     }
    346     *output_samples = ReadPcmFile(output_filename);
    347     TF_QCHECK_OK(Env::Default()->DeleteFile(output_filename))
    348         << output_filename;
    349     return Status::OK();
    350   }
    351 }
    352 
    353 Status CreateAudioFile(const string& audio_format_id, int32 bits_per_second,
    354                        int32 samples_per_second, int32 channel_count,
    355                        const std::vector<float>& samples, string* output_data) {
    356   if (audio_format_id != "wav") {
    357     return Status(error::Code::INVALID_ARGUMENT,
    358                   "CreateAudioFile only supports the 'wav' audio format.");
    359   }
    360   *output_data = BuildWavFile(samples_per_second, channel_count, samples);
    361   return Status::OK();
    362 }
    363 
    364 Status ReadVideoFile(const string& filename, std::vector<uint8>* output_data,
    365                      uint32* width, uint32* height, uint32* frames) {
    366   if (!IsBinaryInstalled(kFfmpegExecutable)) {
    367     return Status(error::Code::NOT_FOUND, StrCat("FFmpeg could not be found."));
    368   }
    369 
    370   string output_filename = io::GetTempFilename("raw");
    371   string stderr_filename = io::GetTempFilename("err");
    372 
    373   // Create an argument list.
    374   const std::vector<string> args =
    375       FfmpegVideoCommandLine(filename, output_filename);
    376   // Execute ffmpeg and report errors.
    377   pid_t child_pid = ::fork();
    378   if (child_pid < 0) {
    379     return Status(error::Code::UNKNOWN,
    380                   StrCat("fork failed: ", strerror(errno)));
    381   }
    382   if (child_pid == 0) {
    383     const int fd =
    384         open(stderr_filename.c_str(), O_RDWR | O_CREAT | O_APPEND, 0600);
    385     if (fd < 0) {
    386       const int error = errno;
    387       LOG(ERROR) << "FFmpeg stderr file could not be created: "
    388                  << strerror(error);
    389       ::_exit(error);
    390     }
    391     close(STDERR_FILENO);
    392     dup2(fd, STDERR_FILENO);
    393     ExecuteFfmpeg(args);
    394   } else {
    395     int status_code;
    396     if (::waitpid(child_pid, &status_code, 0) < 0) {
    397       return Status(error::Code::UNKNOWN,
    398                     StrCat("waitpid failed: ", strerror(errno)));
    399     }
    400     if (status_code) {
    401       return Status(error::Code::UNKNOWN,
    402                     StrCat("FFmpeg execution failed: ", status_code));
    403     }
    404 
    405     TF_QCHECK_OK(ReadInfoFile(stderr_filename, width, height, frames))
    406         << "Could not read FFmpeg stderr file: " << stderr_filename;
    407 
    408     string raw_data;
    409     TF_QCHECK_OK(ReadFileToString(Env::Default(), output_filename, &raw_data))
    410         << "Could not read FFmpeg output file: " << output_filename;
    411     output_data->resize(raw_data.size());
    412     std::copy_n(raw_data.data(), raw_data.size(), output_data->begin());
    413 
    414     TF_QCHECK_OK(Env::Default()->DeleteFile(output_filename))
    415         << output_filename;
    416     TF_QCHECK_OK(Env::Default()->DeleteFile(stderr_filename))
    417         << stderr_filename;
    418     return Status::OK();
    419   }
    420 }
    421 }  // namespace ffmpeg
    422 }  // namespace tensorflow
    423