1 // Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // ============================================================================= 15 16 #include "tensorflow/contrib/ffmpeg/ffmpeg_lib.h" 17 18 #include <errno.h> 19 #include <fcntl.h> 20 #include <stdlib.h> 21 #include <sys/stat.h> 22 #include <sys/types.h> 23 #include <sys/wait.h> 24 #include <unistd.h> 25 26 #include <vector> 27 28 #include "tensorflow/core/lib/io/path.h" 29 #include "tensorflow/core/lib/strings/numbers.h" 30 #include "tensorflow/core/lib/strings/str_util.h" 31 #include "tensorflow/core/platform/byte_order.h" 32 #include "tensorflow/core/platform/env.h" 33 34 using tensorflow::strings::StrCat; 35 36 namespace tensorflow { 37 namespace ffmpeg { 38 namespace { 39 40 const char kFfmpegExecutable[] = "ffmpeg"; 41 const int32 kDefaultProbeSize = 5000000; // 5MB 42 43 std::vector<string> FfmpegAudioCommandLine(const string& input_filename, 44 const string& output_filename, 45 const string& input_format_id, 46 int32 samples_per_second, 47 int32 channel_count, 48 const string& stream) { 49 std::vector<string> command({ 50 "-nostats", // No additional progress display. 51 "-nostdin", // No interactive commands accepted. 52 "-f", input_format_id, // eg: "mp3" 53 "-probesize", StrCat(kDefaultProbeSize), "-i", input_filename, 54 "-loglevel", "error", // Print errors only. 55 "-hide_banner", // Skip printing build options, version, etc. 56 "-map_metadata", "-1", // Copy global metadata from input to output. 57 "-vn", // No video recording. 58 "-ac:a:0", StrCat(channel_count), "-ar:a:0", StrCat(samples_per_second), 59 // Output set (in several ways) to signed 16-bit little-endian ints. 60 "-codec:a:0", "pcm_s16le", "-sample_fmt", "s16", "-f", "s16le", 61 "-sn", // No subtitle recording. 62 "-y" // Overwrite output file. 63 }); 64 if (!stream.empty()) { 65 command.emplace_back("-map"); 66 command.emplace_back(StrCat("0:", stream)); 67 } 68 command.emplace_back(StrCat(output_filename)); 69 70 return command; 71 } 72 73 std::vector<string> FfmpegVideoCommandLine(const string& input_filename, 74 const string& output_filename) { 75 return {"-nostats", // No additional progress display. 76 "-nostdin", // No interactive commands accepted. 77 "-i", input_filename, "-f", "image2pipe", "-probesize", 78 StrCat(kDefaultProbeSize), "-loglevel", 79 // Info is needed to get the information about stream, etc. 80 // It is generated to a separate file, not stdout/stderr. 81 "info", 82 "-hide_banner", // Skip printing build options, version, etc. 83 "-vcodec", "rawvideo", "-pix_fmt", "rgb24", 84 "-y", // Overwrite output file. 85 StrCat(output_filename)}; 86 } 87 88 // Is a named binary installed and executable by the current process? 89 // Note that this is harder than it seems like it should be... 90 bool IsBinaryInstalled(const string& binary_name) { 91 string path = ::getenv("PATH"); 92 for (const string& dir : str_util::Split(path, ':')) { 93 const string binary_path = io::JoinPath(dir, binary_name); 94 char absolute_path[PATH_MAX + 1]; 95 if (::realpath(binary_path.c_str(), absolute_path) == nullptr) { 96 continue; 97 } 98 struct stat statinfo; 99 int result = ::stat(absolute_path, &statinfo); 100 if (result < 0) { 101 continue; 102 } 103 if (!S_ISREG(statinfo.st_mode)) { 104 continue; 105 } 106 107 // Is the current user able to execute the file? 108 if (statinfo.st_uid == ::geteuid() && statinfo.st_mode & S_IXUSR) { 109 return true; 110 } 111 // Is the current group able to execute the file? 112 if (statinfo.st_uid == ::getegid() && statinfo.st_mode & S_IXGRP) { 113 return true; 114 } 115 // Is anyone able to execute the file? 116 if (statinfo.st_mode & S_IXOTH) { 117 return true; 118 } 119 } 120 return false; 121 } 122 123 [[noreturn]] int ExecuteFfmpeg(const std::vector<string>& args) { 124 std::vector<char*> args_chars; 125 std::transform(args.begin(), args.end(), std::back_inserter(args_chars), 126 [](const string& s) { return const_cast<char*>(s.c_str()); }); 127 args_chars.push_back(nullptr); 128 ::execvp(kFfmpegExecutable, args_chars.data()); 129 // exec only returns on error. 130 const int error = errno; 131 LOG(ERROR) << "FFmpeg could not be executed: " << strerror(error); 132 ::_exit(error); 133 } 134 135 // Reads a PCM file using signed little endian 16-bit encoding (s16le). 136 std::vector<float> ReadPcmFile(const string& filename) { 137 string raw_data; 138 TF_QCHECK_OK(ReadFileToString(Env::Default(), filename, &raw_data)) 139 << "Could not read FFmpeg output file: " << filename; 140 141 std::vector<float> samples; 142 const int32 sample_count = raw_data.size() / sizeof(int16); 143 samples.reserve(sample_count); 144 145 for (int32 i = 0; i < sample_count; ++i) { 146 // Most of this is jumping through hoops in the standard to convert some 147 // bits into the right format. I hope that an optimizing compiler will 148 // remove almost all of this code. 149 char raw[2] = {raw_data[i * 2], raw_data[i * 2 + 1]}; 150 if (!port::kLittleEndian) { 151 std::swap(raw[0], raw[1]); 152 } 153 int16 host_order; 154 ::memcpy(&host_order, raw, sizeof(host_order)); 155 const double normalized = 156 static_cast<double>(host_order) / std::numeric_limits<int16>::max(); 157 samples.push_back(normalized); 158 } 159 return samples; 160 } 161 162 template <typename UInt> 163 string LittleEndianData(UInt data) { 164 static_assert(std::is_unsigned<UInt>::value, "UInt must be unsigned"); 165 string str; 166 for (size_t i = 0; i < sizeof(UInt); ++i) { 167 const unsigned char bits = static_cast<unsigned char>(data & 0xFFU); 168 char ch; 169 ::memcpy(&ch, &bits, sizeof(bits)); 170 str.push_back(ch); 171 data >>= 8; 172 } 173 return str; 174 } 175 176 string LittleEndianDataInt(uint32 data) { 177 return LittleEndianData<uint32>(data); 178 } 179 180 string LittleEndianDataShort(uint16 data) { 181 return LittleEndianData<uint16>(data); 182 } 183 184 string WavHeader(int32 samples_per_second, int32 channel_count, 185 const std::vector<float>& samples) { 186 string header = "RIFF"; 187 header += LittleEndianDataInt(36U + samples.size() * sizeof(int16)); 188 header += "WAVEfmt "; 189 header += LittleEndianDataInt(16); 190 header += LittleEndianDataShort(1); 191 header += LittleEndianDataShort(channel_count); 192 header += LittleEndianDataInt(samples_per_second); 193 header += 194 LittleEndianDataInt(samples_per_second * channel_count * sizeof(int16)); 195 header += LittleEndianDataShort(channel_count * sizeof(int16)); 196 header += LittleEndianDataShort(16); 197 header += "data"; 198 header += LittleEndianDataInt(samples.size() * sizeof(int16)); 199 CHECK_EQ(header.size(), 44); 200 return header; 201 } 202 203 // Creates the contents of a .wav file using pcm_s16le format (signed 16 bit 204 // little endian integers). 205 string BuildWavFile(int32 samples_per_second, int32 channel_count, 206 const std::vector<float>& samples) { 207 string data = WavHeader(samples_per_second, channel_count, samples); 208 data.reserve(data.size() + samples.size() * sizeof(int16)); 209 for (float value : samples) { 210 const int16 quantized = 211 static_cast<int16>(value * std::numeric_limits<int16>::max()); 212 char raw[2]; 213 ::memcpy(raw, &quantized, sizeof(int16)); 214 if (!port::kLittleEndian) { 215 std::swap(raw[0], raw[1]); 216 } 217 data.push_back(raw[0]); 218 data.push_back(raw[1]); 219 } 220 return data; 221 } 222 223 Status ReadInfoFile(const string& filename, uint32* width, uint32* height, 224 uint32* frames) { 225 string data; 226 TF_QCHECK_OK(ReadFileToString(Env::Default(), filename, &data)) 227 << "Could not read FFmpeg file: " << filename; 228 bool in_output = false; 229 bool in_mapping = false; 230 uint32 frames_value = 0; 231 uint32 height_value = 0; 232 uint32 width_value = 0; 233 for (const string& line : str_util::Split(data, '\n')) { 234 // Output starts with the first line of `Output #..`. 235 // Further processing output region starts next line so we could continue 236 // the loop. 237 if (!in_output && line.find("Output #") == 0) { 238 in_output = true; 239 in_mapping = false; 240 continue; 241 } 242 // Stream mapping starts with the first line of `Stream mapping`, it also 243 // signals the end of Output section. 244 // Further processing of stream mapping region starts next line so we could 245 // continue the loop. 246 if (!in_mapping && line.find("Stream mapping:") == 0) { 247 in_output = false; 248 in_mapping = true; 249 continue; 250 } 251 if (in_output) { 252 // We only look for the first stream in output `Stream #0`. 253 // Once processed we will not further process output section. 254 if (line.find(" Stream #") == 0) { 255 size_t p = line.find(", rgb24, ", 24); 256 if (p != std::string::npos) { 257 string rgb24 = line.substr(p + 9, line.find(" ", p + 9)); 258 rgb24 = rgb24.substr(0, rgb24.find(",")); 259 // Strip anything after " ", in case the format is 260 // `640x360 [SAR 1:1 DAR 16:9]` 261 rgb24 = rgb24.substr(0, rgb24.find(" ")); 262 string rgb24_width = rgb24.substr(0, rgb24.find("x")); 263 string rgb24_height = rgb24.substr(rgb24_width.length() + 1); 264 if (strings::safe_strtou32(rgb24_width, &width_value) && 265 strings::safe_strtou32(rgb24_height, &height_value)) { 266 in_output = false; 267 } 268 } 269 } 270 continue; 271 } 272 if (in_mapping) { 273 // We only look for the first stream mapping to have the number of the 274 // frames. 275 // Once processed we will not further process stream mapping section. 276 if (line.find("frame=") == 0) { 277 // The format might be `frame= 166 ` or `frame=12488 ` 278 string number = line.substr(6); 279 number = number.substr(number.find_first_not_of(" ")); 280 number = number.substr(0, number.find(" ")); 281 if (strings::safe_strtou32(number, &frames_value)) { 282 in_mapping = false; 283 } 284 } 285 continue; 286 } 287 } 288 if (frames_value == 0 || height_value == 0 || width_value == 0) { 289 return errors::Unknown("Not enough video info returned by FFmpeg [", 290 frames_value, ", ", height_value, ", ", width_value, 291 ", 3]"); 292 } 293 *width = width_value; 294 *height = height_value; 295 *frames = frames_value; 296 return Status::OK(); 297 } 298 299 } // namespace 300 301 FileDeleter::~FileDeleter() { 302 Env& env = *Env::Default(); 303 env.DeleteFile(filename_).IgnoreError(); 304 } 305 306 Status WriteFile(const string& filename, StringPiece contents) { 307 Env& env = *Env::Default(); 308 std::unique_ptr<WritableFile> file; 309 TF_RETURN_IF_ERROR(env.NewWritableFile(filename, &file)); 310 TF_RETURN_IF_ERROR(file->Append(contents)); 311 TF_RETURN_IF_ERROR(file->Close()); 312 return Status::OK(); 313 } 314 315 Status ReadAudioFile(const string& filename, const string& audio_format_id, 316 int32 samples_per_second, int32 channel_count, 317 const string& stream, std::vector<float>* output_samples) { 318 // Create an argument list. 319 string output_filename = io::GetTempFilename("raw"); 320 const std::vector<string> args = 321 FfmpegAudioCommandLine(filename, output_filename, audio_format_id, 322 samples_per_second, channel_count, stream); 323 // Unfortunately, it's impossible to differentiate an exec failure due to the 324 // binary being missing and an error from the binary's execution. Therefore, 325 // check to see if the binary *should* be available. If not, return an error 326 // that will be converted into a helpful error message by the TensorFlow op. 327 if (!IsBinaryInstalled(kFfmpegExecutable)) { 328 return Status(error::Code::NOT_FOUND, StrCat("FFmpeg could not be found.")); 329 } 330 331 // Execute ffmpeg and report errors. 332 pid_t child_pid = ::fork(); 333 if (child_pid < 0) { 334 return Status(error::Code::UNKNOWN, 335 StrCat("fork failed: ", strerror(errno))); 336 } 337 if (child_pid == 0) { 338 ExecuteFfmpeg(args); 339 } else { 340 int status_code; 341 ::waitpid(child_pid, &status_code, 0); 342 if (status_code) { 343 return Status(error::Code::UNKNOWN, 344 StrCat("FFmpeg execution failed: ", status_code)); 345 } 346 *output_samples = ReadPcmFile(output_filename); 347 TF_QCHECK_OK(Env::Default()->DeleteFile(output_filename)) 348 << output_filename; 349 return Status::OK(); 350 } 351 } 352 353 Status CreateAudioFile(const string& audio_format_id, int32 bits_per_second, 354 int32 samples_per_second, int32 channel_count, 355 const std::vector<float>& samples, string* output_data) { 356 if (audio_format_id != "wav") { 357 return Status(error::Code::INVALID_ARGUMENT, 358 "CreateAudioFile only supports the 'wav' audio format."); 359 } 360 *output_data = BuildWavFile(samples_per_second, channel_count, samples); 361 return Status::OK(); 362 } 363 364 Status ReadVideoFile(const string& filename, std::vector<uint8>* output_data, 365 uint32* width, uint32* height, uint32* frames) { 366 if (!IsBinaryInstalled(kFfmpegExecutable)) { 367 return Status(error::Code::NOT_FOUND, StrCat("FFmpeg could not be found.")); 368 } 369 370 string output_filename = io::GetTempFilename("raw"); 371 string stderr_filename = io::GetTempFilename("err"); 372 373 // Create an argument list. 374 const std::vector<string> args = 375 FfmpegVideoCommandLine(filename, output_filename); 376 // Execute ffmpeg and report errors. 377 pid_t child_pid = ::fork(); 378 if (child_pid < 0) { 379 return Status(error::Code::UNKNOWN, 380 StrCat("fork failed: ", strerror(errno))); 381 } 382 if (child_pid == 0) { 383 const int fd = 384 open(stderr_filename.c_str(), O_RDWR | O_CREAT | O_APPEND, 0600); 385 if (fd < 0) { 386 const int error = errno; 387 LOG(ERROR) << "FFmpeg stderr file could not be created: " 388 << strerror(error); 389 ::_exit(error); 390 } 391 close(STDERR_FILENO); 392 dup2(fd, STDERR_FILENO); 393 ExecuteFfmpeg(args); 394 } else { 395 int status_code; 396 if (::waitpid(child_pid, &status_code, 0) < 0) { 397 return Status(error::Code::UNKNOWN, 398 StrCat("waitpid failed: ", strerror(errno))); 399 } 400 if (status_code) { 401 return Status(error::Code::UNKNOWN, 402 StrCat("FFmpeg execution failed: ", status_code)); 403 } 404 405 TF_QCHECK_OK(ReadInfoFile(stderr_filename, width, height, frames)) 406 << "Could not read FFmpeg stderr file: " << stderr_filename; 407 408 string raw_data; 409 TF_QCHECK_OK(ReadFileToString(Env::Default(), output_filename, &raw_data)) 410 << "Could not read FFmpeg output file: " << output_filename; 411 output_data->resize(raw_data.size()); 412 std::copy_n(raw_data.data(), raw_data.size(), output_data->begin()); 413 414 TF_QCHECK_OK(Env::Default()->DeleteFile(output_filename)) 415 << output_filename; 416 TF_QCHECK_OK(Env::Default()->DeleteFile(stderr_filename)) 417 << stderr_filename; 418 return Status::OK(); 419 } 420 } 421 } // namespace ffmpeg 422 } // namespace tensorflow 423