Home | History | Annotate | Download | only in default
      1 // Copyright 2016 The TensorFlow Authors. All Rights Reserved.
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //     http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 // =============================================================================
     15 
     16 #include "tensorflow/contrib/ffmpeg/ffmpeg_lib.h"
     17 
     18 #include <errno.h>
     19 #include <fcntl.h>
     20 #include <stdlib.h>
     21 #include <sys/stat.h>
     22 #include <sys/types.h>
     23 #include <sys/wait.h>
     24 #include <unistd.h>
     25 
     26 #include <vector>
     27 
     28 #include "tensorflow/core/lib/io/path.h"
     29 #include "tensorflow/core/lib/strings/numbers.h"
     30 #include "tensorflow/core/lib/strings/str_util.h"
     31 #include "tensorflow/core/platform/cpu_info.h"
     32 #include "tensorflow/core/platform/env.h"
     33 
     34 using tensorflow::strings::StrCat;
     35 
     36 namespace tensorflow {
     37 namespace ffmpeg {
     38 namespace {
     39 
     40 const char kFfmpegExecutable[] = "ffmpeg";
     41 const int32 kDefaultProbeSize = 5000000;  // 5MB
     42 
     43 std::vector<string> FfmpegAudioCommandLine(const string& input_filename,
     44                                            const string& output_filename,
     45                                            const string& input_format_id,
     46                                            int32 samples_per_second,
     47                                            int32 channel_count,
     48                                            const string& stream) {
     49   std::vector<string> command({
     50       "-nostats",             // No additional progress display.
     51       "-nostdin",             // No interactive commands accepted.
     52       "-f", input_format_id,  // eg: "mp3"
     53       "-probesize", StrCat(kDefaultProbeSize), "-i", input_filename,
     54       "-loglevel", "error",   // Print errors only.
     55       "-hide_banner",         // Skip printing build options, version, etc.
     56       "-map_metadata", "-1",  // Copy global metadata from input to output.
     57       "-vn",                  // No video recording.
     58       "-ac:a:0", StrCat(channel_count), "-ar:a:0", StrCat(samples_per_second),
     59       // Output set (in several ways) to signed 16-bit little-endian ints.
     60       "-codec:a:0", "pcm_s16le", "-sample_fmt", "s16", "-f", "s16le",
     61       "-sn",  // No subtitle recording.
     62       "-y"    // Overwrite output file.
     63   });
     64   if (!stream.empty()) {
     65     command.emplace_back("-map");
     66     command.emplace_back(StrCat("0:", stream));
     67   }
     68   command.emplace_back(StrCat(output_filename));
     69 
     70   return command;
     71 }
     72 
     73 std::vector<string> FfmpegVideoCommandLine(const string& input_filename,
     74                                            const string& output_filename) {
     75   return {"-nostats",  // No additional progress display.
     76           "-nostdin",  // No interactive commands accepted.
     77           "-i", input_filename, "-f", "image2pipe", "-probesize",
     78           StrCat(kDefaultProbeSize), "-loglevel",
     79           // Info is needed to get the information about stream, etc.
     80           // It is generated to a separate file, not stdout/stderr.
     81           "info",
     82           "-hide_banner",  // Skip printing build options, version, etc.
     83           "-vcodec", "rawvideo", "-pix_fmt", "rgb24",
     84           "-y",  // Overwrite output file.
     85           StrCat(output_filename)};
     86 }
     87 
     88 // Is a named binary installed and executable by the current process?
     89 // Note that this is harder than it seems like it should be...
     90 bool IsBinaryInstalled(const string& binary_name) {
     91   string path = ::getenv("PATH");
     92   for (const string& dir : str_util::Split(path, ':')) {
     93     const string binary_path = io::JoinPath(dir, binary_name);
     94     char absolute_path[PATH_MAX + 1];
     95     if (::realpath(binary_path.c_str(), absolute_path) == nullptr) {
     96       continue;
     97     }
     98     struct stat statinfo;
     99     int result = ::stat(absolute_path, &statinfo);
    100     if (result < 0) {
    101       continue;
    102     }
    103     if (!S_ISREG(statinfo.st_mode)) {
    104       continue;
    105     }
    106 
    107     // Is the current user able to execute the file?
    108     if (statinfo.st_uid == ::geteuid() && statinfo.st_mode & S_IXUSR) {
    109       return true;
    110     }
    111     // Is the current group able to execute the file?
    112     if (statinfo.st_uid == ::getegid() && statinfo.st_mode & S_IXGRP) {
    113       return true;
    114     }
    115     // Is anyone able to execute the file?
    116     if (statinfo.st_mode & S_IXOTH) {
    117       return true;
    118     }
    119   }
    120   return false;
    121 }
    122 
    123 [[noreturn]] int ExecuteFfmpeg(const std::vector<string>& args) {
    124   std::vector<char*> args_chars;
    125   std::transform(args.begin(), args.end(), std::back_inserter(args_chars),
    126                  [](const string& s) { return const_cast<char*>(s.c_str()); });
    127   args_chars.push_back(nullptr);
    128   ::execvp(kFfmpegExecutable, args_chars.data());
    129   // exec only returns on error.
    130   const int error = errno;
    131   LOG(ERROR) << "FFmpeg could not be executed: " << strerror(error);
    132   ::_exit(error);
    133 }
    134 
    135 // Reads a PCM file using signed little endian 16-bit encoding (s16le).
    136 std::vector<float> ReadPcmFile(const string& filename) {
    137   string raw_data;
    138   TF_QCHECK_OK(ReadFileToString(Env::Default(), filename, &raw_data))
    139       << "Could not read FFmpeg output file: " << filename;
    140 
    141   std::vector<float> samples;
    142   const int32 sample_count = raw_data.size() / sizeof(int16);
    143   samples.reserve(sample_count);
    144 
    145   for (int32 i = 0; i < sample_count; ++i) {
    146     // Most of this is jumping through hoops in the standard to convert some
    147     // bits into the right format. I hope that an optimizing compiler will
    148     // remove almost all of this code.
    149     char raw[2] = {raw_data[i * 2], raw_data[i * 2 + 1]};
    150     if (!port::kLittleEndian) {
    151       std::swap(raw[0], raw[1]);
    152     }
    153     int16 host_order;
    154     ::memcpy(&host_order, raw, sizeof(host_order));
    155     const double normalized =
    156         static_cast<double>(host_order) / std::numeric_limits<int16>::max();
    157     samples.push_back(normalized);
    158   }
    159   return samples;
    160 }
    161 
    162 template <typename UInt>
    163 string LittleEndianData(UInt data) {
    164   static_assert(std::is_unsigned<UInt>::value, "UInt must be unsigned");
    165   string str;
    166   for (size_t i = 0; i < sizeof(UInt); ++i) {
    167     const unsigned char bits = static_cast<unsigned char>(data & 0xFFU);
    168     char ch;
    169     ::memcpy(&ch, &bits, sizeof(bits));
    170     str.push_back(ch);
    171     data >>= 8;
    172   }
    173   return str;
    174 }
    175 
    176 string LittleEndianDataInt(uint32 data) {
    177   return LittleEndianData<uint32>(data);
    178 }
    179 
    180 string LittleEndianDataShort(uint16 data) {
    181   return LittleEndianData<uint16>(data);
    182 }
    183 
    184 string WavHeader(int32 samples_per_second, int32 channel_count,
    185                  const std::vector<float>& samples) {
    186   string header = "RIFF";
    187   header += LittleEndianDataInt(36U + samples.size() * sizeof(int16));
    188   header += "WAVEfmt ";
    189   header += LittleEndianDataInt(16);
    190   header += LittleEndianDataShort(1);
    191   header += LittleEndianDataShort(channel_count);
    192   header += LittleEndianDataInt(samples_per_second);
    193   header +=
    194       LittleEndianDataInt(samples_per_second * channel_count * sizeof(int16));
    195   header += LittleEndianDataShort(channel_count * sizeof(int16));
    196   header += LittleEndianDataShort(16);
    197   header += "data";
    198   header += LittleEndianDataInt(samples.size() * sizeof(int16));
    199   CHECK_EQ(header.size(), 44);
    200   return header;
    201 }
    202 
    203 // Creates the contents of a .wav file using pcm_s16le format (signed 16 bit
    204 // little endian integers).
    205 string BuildWavFile(int32 samples_per_second, int32 channel_count,
    206                     const std::vector<float>& samples) {
    207   string data = WavHeader(samples_per_second, channel_count, samples);
    208   data.reserve(data.size() + samples.size() * sizeof(int16));
    209   for (float value : samples) {
    210     const int16 quantized =
    211         static_cast<int16>(value * std::numeric_limits<int16>::max());
    212     char raw[2];
    213     ::memcpy(raw, &quantized, sizeof(int16));
    214     if (!port::kLittleEndian) {
    215       std::swap(raw[0], raw[1]);
    216     }
    217     data.push_back(raw[0]);
    218     data.push_back(raw[1]);
    219   }
    220   return data;
    221 }
    222 
    223 Status ReadInfoFile(const string& filename, uint32* width, uint32* height,
    224                     uint32* frames) {
    225   string data;
    226   TF_QCHECK_OK(ReadFileToString(Env::Default(), filename, &data))
    227       << "Could not read FFmpeg file: " << filename;
    228   bool in_output = false;
    229   bool in_mapping = false;
    230   uint32 frames_value = 0;
    231   uint32 height_value = 0;
    232   uint32 width_value = 0;
    233   for (const string& line : str_util::Split(data, '\n')) {
    234     // Output starts with the first line of `Output #..`.
    235     // Further processing output region starts next line so we could continue
    236     // the loop.
    237     if (!in_output && line.find("Output #") == 0) {
    238       in_output = true;
    239       in_mapping = false;
    240       continue;
    241     }
    242     // Stream mapping starts with the first line of `Stream mapping`, it also
    243     // signals the end of Output section.
    244     // Further processing of stream mapping region starts next line so we could
    245     // continue the loop.
    246     if (!in_mapping && line.find("Stream mapping:") == 0) {
    247       in_output = false;
    248       in_mapping = true;
    249       continue;
    250     }
    251     if (in_output) {
    252       // We only look for the first stream in output `Stream #0`.
    253       // Once processed we will not further process output section.
    254       if (line.find("    Stream #") == 0) {
    255         size_t p = line.find(", rgb24, ", 24);
    256         if (p != std::string::npos) {
    257           string rgb24 = line.substr(p + 9, line.find(" ", p + 9));
    258           rgb24 = rgb24.substr(0, rgb24.find(","));
    259           string rgb24_width = rgb24.substr(0, rgb24.find("x"));
    260           string rgb24_height = rgb24.substr(rgb24_width.length() + 1);
    261           if (strings::safe_strtou32(rgb24_width, &width_value) &&
    262               strings::safe_strtou32(rgb24_height, &height_value)) {
    263             in_output = false;
    264           }
    265         }
    266       }
    267       continue;
    268     }
    269     if (in_mapping) {
    270       // We only look for the first stream mapping to have the number of the
    271       // frames.
    272       // Once processed we will not further process stream mapping section.
    273       if (line.find("frame=  ") == 0) {
    274         string number = line.substr(8, line.find(" ", 8));
    275         number = number.substr(0, number.find(" "));
    276         if (strings::safe_strtou32(number, &frames_value)) {
    277           in_mapping = false;
    278         }
    279       }
    280       continue;
    281     }
    282   }
    283   if (frames_value == 0 || height_value == 0 || width_value == 0) {
    284     return errors::Unknown("Not enough video info returned by FFmpeg [",
    285                            frames_value, ", ", height_value, ", ", width_value,
    286                            ", 3]");
    287   }
    288   *width = width_value;
    289   *height = height_value;
    290   *frames = frames_value;
    291   return Status::OK();
    292 }
    293 
    294 }  // namespace
    295 
    296 FileDeleter::~FileDeleter() {
    297   Env& env = *Env::Default();
    298   env.DeleteFile(filename_).IgnoreError();
    299 }
    300 
    301 Status WriteFile(const string& filename, StringPiece contents) {
    302   Env& env = *Env::Default();
    303   std::unique_ptr<WritableFile> file;
    304   TF_RETURN_IF_ERROR(env.NewWritableFile(filename, &file));
    305   TF_RETURN_IF_ERROR(file->Append(contents));
    306   TF_RETURN_IF_ERROR(file->Close());
    307   return Status::OK();
    308 }
    309 
    310 Status ReadAudioFile(const string& filename, const string& audio_format_id,
    311                      int32 samples_per_second, int32 channel_count,
    312                      const string& stream, std::vector<float>* output_samples) {
    313   // Create an argument list.
    314   string output_filename = io::GetTempFilename("raw");
    315   const std::vector<string> args =
    316       FfmpegAudioCommandLine(filename, output_filename, audio_format_id,
    317                              samples_per_second, channel_count, stream);
    318   // Unfortunately, it's impossible to differentiate an exec failure due to the
    319   // binary being missing and an error from the binary's execution. Therefore,
    320   // check to see if the binary *should* be available. If not, return an error
    321   // that will be converted into a helpful error message by the TensorFlow op.
    322   if (!IsBinaryInstalled(kFfmpegExecutable)) {
    323     return Status(error::Code::NOT_FOUND, StrCat("FFmpeg could not be found."));
    324   }
    325 
    326   // Execute ffmpeg and report errors.
    327   pid_t child_pid = ::fork();
    328   if (child_pid < 0) {
    329     return Status(error::Code::UNKNOWN,
    330                   StrCat("fork failed: ", strerror(errno)));
    331   }
    332   if (child_pid == 0) {
    333     ExecuteFfmpeg(args);
    334   } else {
    335     int status_code;
    336     ::waitpid(child_pid, &status_code, 0);
    337     if (status_code) {
    338       return Status(error::Code::UNKNOWN,
    339                     StrCat("FFmpeg execution failed: ", status_code));
    340     }
    341     *output_samples = ReadPcmFile(output_filename);
    342     TF_QCHECK_OK(Env::Default()->DeleteFile(output_filename))
    343         << output_filename;
    344     return Status::OK();
    345   }
    346 }
    347 
    348 Status CreateAudioFile(const string& audio_format_id, int32 bits_per_second,
    349                        int32 samples_per_second, int32 channel_count,
    350                        const std::vector<float>& samples, string* output_data) {
    351   if (audio_format_id != "wav") {
    352     return Status(error::Code::INVALID_ARGUMENT,
    353                   "CreateAudioFile only supports the 'wav' audio format.");
    354   }
    355   *output_data = BuildWavFile(samples_per_second, channel_count, samples);
    356   return Status::OK();
    357 }
    358 
    359 Status ReadVideoFile(const string& filename, std::vector<uint8>* output_data,
    360                      uint32* width, uint32* height, uint32* frames) {
    361   if (!IsBinaryInstalled(kFfmpegExecutable)) {
    362     return Status(error::Code::NOT_FOUND, StrCat("FFmpeg could not be found."));
    363   }
    364 
    365   string output_filename = io::GetTempFilename("raw");
    366   string stderr_filename = io::GetTempFilename("err");
    367 
    368   // Create an argument list.
    369   const std::vector<string> args =
    370       FfmpegVideoCommandLine(filename, output_filename);
    371   // Execute ffmpeg and report errors.
    372   pid_t child_pid = ::fork();
    373   if (child_pid < 0) {
    374     return Status(error::Code::UNKNOWN,
    375                   StrCat("fork failed: ", strerror(errno)));
    376   }
    377   if (child_pid == 0) {
    378     const int fd =
    379         open(stderr_filename.c_str(), O_RDWR | O_CREAT | O_APPEND, 0600);
    380     if (fd < 0) {
    381       const int error = errno;
    382       LOG(ERROR) << "FFmpeg stderr file could not be created: "
    383                  << strerror(error);
    384       ::_exit(error);
    385     }
    386     close(STDERR_FILENO);
    387     dup2(fd, STDERR_FILENO);
    388     ExecuteFfmpeg(args);
    389   } else {
    390     int status_code;
    391     if (::waitpid(child_pid, &status_code, 0) < 0) {
    392       return Status(error::Code::UNKNOWN,
    393                     StrCat("waitpid failed: ", strerror(errno)));
    394     }
    395     if (status_code) {
    396       return Status(error::Code::UNKNOWN,
    397                     StrCat("FFmpeg execution failed: ", status_code));
    398     }
    399 
    400     TF_QCHECK_OK(ReadInfoFile(stderr_filename, width, height, frames))
    401         << "Could not read FFmpeg stderr file: " << stderr_filename;
    402 
    403     string raw_data;
    404     TF_QCHECK_OK(ReadFileToString(Env::Default(), output_filename, &raw_data))
    405         << "Could not read FFmpeg output file: " << output_filename;
    406     output_data->resize(raw_data.size());
    407     std::copy_n(raw_data.data(), raw_data.size(), output_data->begin());
    408 
    409     TF_QCHECK_OK(Env::Default()->DeleteFile(output_filename))
    410         << output_filename;
    411     TF_QCHECK_OK(Env::Default()->DeleteFile(stderr_filename))
    412         << stderr_filename;
    413     return Status::OK();
    414   }
    415 }
    416 }  // namespace ffmpeg
    417 }  // namespace tensorflow
    418