1 // Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // ============================================================================= 15 16 #include "tensorflow/contrib/ffmpeg/ffmpeg_lib.h" 17 18 #include <errno.h> 19 #include <fcntl.h> 20 #include <stdlib.h> 21 #include <sys/stat.h> 22 #include <sys/types.h> 23 #include <sys/wait.h> 24 #include <unistd.h> 25 26 #include <vector> 27 28 #include "tensorflow/core/lib/io/path.h" 29 #include "tensorflow/core/lib/strings/numbers.h" 30 #include "tensorflow/core/lib/strings/str_util.h" 31 #include "tensorflow/core/platform/cpu_info.h" 32 #include "tensorflow/core/platform/env.h" 33 34 using tensorflow::strings::StrCat; 35 36 namespace tensorflow { 37 namespace ffmpeg { 38 namespace { 39 40 const char kFfmpegExecutable[] = "ffmpeg"; 41 const int32 kDefaultProbeSize = 5000000; // 5MB 42 43 std::vector<string> FfmpegAudioCommandLine(const string& input_filename, 44 const string& output_filename, 45 const string& input_format_id, 46 int32 samples_per_second, 47 int32 channel_count, 48 const string& stream) { 49 std::vector<string> command({ 50 "-nostats", // No additional progress display. 51 "-nostdin", // No interactive commands accepted. 52 "-f", input_format_id, // eg: "mp3" 53 "-probesize", StrCat(kDefaultProbeSize), "-i", input_filename, 54 "-loglevel", "error", // Print errors only. 55 "-hide_banner", // Skip printing build options, version, etc. 56 "-map_metadata", "-1", // Copy global metadata from input to output. 57 "-vn", // No video recording. 58 "-ac:a:0", StrCat(channel_count), "-ar:a:0", StrCat(samples_per_second), 59 // Output set (in several ways) to signed 16-bit little-endian ints. 60 "-codec:a:0", "pcm_s16le", "-sample_fmt", "s16", "-f", "s16le", 61 "-sn", // No subtitle recording. 62 "-y" // Overwrite output file. 63 }); 64 if (!stream.empty()) { 65 command.emplace_back("-map"); 66 command.emplace_back(StrCat("0:", stream)); 67 } 68 command.emplace_back(StrCat(output_filename)); 69 70 return command; 71 } 72 73 std::vector<string> FfmpegVideoCommandLine(const string& input_filename, 74 const string& output_filename) { 75 return {"-nostats", // No additional progress display. 76 "-nostdin", // No interactive commands accepted. 77 "-i", input_filename, "-f", "image2pipe", "-probesize", 78 StrCat(kDefaultProbeSize), "-loglevel", 79 // Info is needed to get the information about stream, etc. 80 // It is generated to a separate file, not stdout/stderr. 81 "info", 82 "-hide_banner", // Skip printing build options, version, etc. 83 "-vcodec", "rawvideo", "-pix_fmt", "rgb24", 84 "-y", // Overwrite output file. 85 StrCat(output_filename)}; 86 } 87 88 // Is a named binary installed and executable by the current process? 89 // Note that this is harder than it seems like it should be... 90 bool IsBinaryInstalled(const string& binary_name) { 91 string path = ::getenv("PATH"); 92 for (const string& dir : str_util::Split(path, ':')) { 93 const string binary_path = io::JoinPath(dir, binary_name); 94 char absolute_path[PATH_MAX + 1]; 95 if (::realpath(binary_path.c_str(), absolute_path) == nullptr) { 96 continue; 97 } 98 struct stat statinfo; 99 int result = ::stat(absolute_path, &statinfo); 100 if (result < 0) { 101 continue; 102 } 103 if (!S_ISREG(statinfo.st_mode)) { 104 continue; 105 } 106 107 // Is the current user able to execute the file? 108 if (statinfo.st_uid == ::geteuid() && statinfo.st_mode & S_IXUSR) { 109 return true; 110 } 111 // Is the current group able to execute the file? 112 if (statinfo.st_uid == ::getegid() && statinfo.st_mode & S_IXGRP) { 113 return true; 114 } 115 // Is anyone able to execute the file? 116 if (statinfo.st_mode & S_IXOTH) { 117 return true; 118 } 119 } 120 return false; 121 } 122 123 [[noreturn]] int ExecuteFfmpeg(const std::vector<string>& args) { 124 std::vector<char*> args_chars; 125 std::transform(args.begin(), args.end(), std::back_inserter(args_chars), 126 [](const string& s) { return const_cast<char*>(s.c_str()); }); 127 args_chars.push_back(nullptr); 128 ::execvp(kFfmpegExecutable, args_chars.data()); 129 // exec only returns on error. 130 const int error = errno; 131 LOG(ERROR) << "FFmpeg could not be executed: " << strerror(error); 132 ::_exit(error); 133 } 134 135 // Reads a PCM file using signed little endian 16-bit encoding (s16le). 136 std::vector<float> ReadPcmFile(const string& filename) { 137 string raw_data; 138 TF_QCHECK_OK(ReadFileToString(Env::Default(), filename, &raw_data)) 139 << "Could not read FFmpeg output file: " << filename; 140 141 std::vector<float> samples; 142 const int32 sample_count = raw_data.size() / sizeof(int16); 143 samples.reserve(sample_count); 144 145 for (int32 i = 0; i < sample_count; ++i) { 146 // Most of this is jumping through hoops in the standard to convert some 147 // bits into the right format. I hope that an optimizing compiler will 148 // remove almost all of this code. 149 char raw[2] = {raw_data[i * 2], raw_data[i * 2 + 1]}; 150 if (!port::kLittleEndian) { 151 std::swap(raw[0], raw[1]); 152 } 153 int16 host_order; 154 ::memcpy(&host_order, raw, sizeof(host_order)); 155 const double normalized = 156 static_cast<double>(host_order) / std::numeric_limits<int16>::max(); 157 samples.push_back(normalized); 158 } 159 return samples; 160 } 161 162 template <typename UInt> 163 string LittleEndianData(UInt data) { 164 static_assert(std::is_unsigned<UInt>::value, "UInt must be unsigned"); 165 string str; 166 for (size_t i = 0; i < sizeof(UInt); ++i) { 167 const unsigned char bits = static_cast<unsigned char>(data & 0xFFU); 168 char ch; 169 ::memcpy(&ch, &bits, sizeof(bits)); 170 str.push_back(ch); 171 data >>= 8; 172 } 173 return str; 174 } 175 176 string LittleEndianDataInt(uint32 data) { 177 return LittleEndianData<uint32>(data); 178 } 179 180 string LittleEndianDataShort(uint16 data) { 181 return LittleEndianData<uint16>(data); 182 } 183 184 string WavHeader(int32 samples_per_second, int32 channel_count, 185 const std::vector<float>& samples) { 186 string header = "RIFF"; 187 header += LittleEndianDataInt(36U + samples.size() * sizeof(int16)); 188 header += "WAVEfmt "; 189 header += LittleEndianDataInt(16); 190 header += LittleEndianDataShort(1); 191 header += LittleEndianDataShort(channel_count); 192 header += LittleEndianDataInt(samples_per_second); 193 header += 194 LittleEndianDataInt(samples_per_second * channel_count * sizeof(int16)); 195 header += LittleEndianDataShort(channel_count * sizeof(int16)); 196 header += LittleEndianDataShort(16); 197 header += "data"; 198 header += LittleEndianDataInt(samples.size() * sizeof(int16)); 199 CHECK_EQ(header.size(), 44); 200 return header; 201 } 202 203 // Creates the contents of a .wav file using pcm_s16le format (signed 16 bit 204 // little endian integers). 205 string BuildWavFile(int32 samples_per_second, int32 channel_count, 206 const std::vector<float>& samples) { 207 string data = WavHeader(samples_per_second, channel_count, samples); 208 data.reserve(data.size() + samples.size() * sizeof(int16)); 209 for (float value : samples) { 210 const int16 quantized = 211 static_cast<int16>(value * std::numeric_limits<int16>::max()); 212 char raw[2]; 213 ::memcpy(raw, &quantized, sizeof(int16)); 214 if (!port::kLittleEndian) { 215 std::swap(raw[0], raw[1]); 216 } 217 data.push_back(raw[0]); 218 data.push_back(raw[1]); 219 } 220 return data; 221 } 222 223 Status ReadInfoFile(const string& filename, uint32* width, uint32* height, 224 uint32* frames) { 225 string data; 226 TF_QCHECK_OK(ReadFileToString(Env::Default(), filename, &data)) 227 << "Could not read FFmpeg file: " << filename; 228 bool in_output = false; 229 bool in_mapping = false; 230 uint32 frames_value = 0; 231 uint32 height_value = 0; 232 uint32 width_value = 0; 233 for (const string& line : str_util::Split(data, '\n')) { 234 // Output starts with the first line of `Output #..`. 235 // Further processing output region starts next line so we could continue 236 // the loop. 237 if (!in_output && line.find("Output #") == 0) { 238 in_output = true; 239 in_mapping = false; 240 continue; 241 } 242 // Stream mapping starts with the first line of `Stream mapping`, it also 243 // signals the end of Output section. 244 // Further processing of stream mapping region starts next line so we could 245 // continue the loop. 246 if (!in_mapping && line.find("Stream mapping:") == 0) { 247 in_output = false; 248 in_mapping = true; 249 continue; 250 } 251 if (in_output) { 252 // We only look for the first stream in output `Stream #0`. 253 // Once processed we will not further process output section. 254 if (line.find(" Stream #") == 0) { 255 size_t p = line.find(", rgb24, ", 24); 256 if (p != std::string::npos) { 257 string rgb24 = line.substr(p + 9, line.find(" ", p + 9)); 258 rgb24 = rgb24.substr(0, rgb24.find(",")); 259 string rgb24_width = rgb24.substr(0, rgb24.find("x")); 260 string rgb24_height = rgb24.substr(rgb24_width.length() + 1); 261 if (strings::safe_strtou32(rgb24_width, &width_value) && 262 strings::safe_strtou32(rgb24_height, &height_value)) { 263 in_output = false; 264 } 265 } 266 } 267 continue; 268 } 269 if (in_mapping) { 270 // We only look for the first stream mapping to have the number of the 271 // frames. 272 // Once processed we will not further process stream mapping section. 273 if (line.find("frame= ") == 0) { 274 string number = line.substr(8, line.find(" ", 8)); 275 number = number.substr(0, number.find(" ")); 276 if (strings::safe_strtou32(number, &frames_value)) { 277 in_mapping = false; 278 } 279 } 280 continue; 281 } 282 } 283 if (frames_value == 0 || height_value == 0 || width_value == 0) { 284 return errors::Unknown("Not enough video info returned by FFmpeg [", 285 frames_value, ", ", height_value, ", ", width_value, 286 ", 3]"); 287 } 288 *width = width_value; 289 *height = height_value; 290 *frames = frames_value; 291 return Status::OK(); 292 } 293 294 } // namespace 295 296 FileDeleter::~FileDeleter() { 297 Env& env = *Env::Default(); 298 env.DeleteFile(filename_).IgnoreError(); 299 } 300 301 Status WriteFile(const string& filename, StringPiece contents) { 302 Env& env = *Env::Default(); 303 std::unique_ptr<WritableFile> file; 304 TF_RETURN_IF_ERROR(env.NewWritableFile(filename, &file)); 305 TF_RETURN_IF_ERROR(file->Append(contents)); 306 TF_RETURN_IF_ERROR(file->Close()); 307 return Status::OK(); 308 } 309 310 Status ReadAudioFile(const string& filename, const string& audio_format_id, 311 int32 samples_per_second, int32 channel_count, 312 const string& stream, std::vector<float>* output_samples) { 313 // Create an argument list. 314 string output_filename = io::GetTempFilename("raw"); 315 const std::vector<string> args = 316 FfmpegAudioCommandLine(filename, output_filename, audio_format_id, 317 samples_per_second, channel_count, stream); 318 // Unfortunately, it's impossible to differentiate an exec failure due to the 319 // binary being missing and an error from the binary's execution. Therefore, 320 // check to see if the binary *should* be available. If not, return an error 321 // that will be converted into a helpful error message by the TensorFlow op. 322 if (!IsBinaryInstalled(kFfmpegExecutable)) { 323 return Status(error::Code::NOT_FOUND, StrCat("FFmpeg could not be found.")); 324 } 325 326 // Execute ffmpeg and report errors. 327 pid_t child_pid = ::fork(); 328 if (child_pid < 0) { 329 return Status(error::Code::UNKNOWN, 330 StrCat("fork failed: ", strerror(errno))); 331 } 332 if (child_pid == 0) { 333 ExecuteFfmpeg(args); 334 } else { 335 int status_code; 336 ::waitpid(child_pid, &status_code, 0); 337 if (status_code) { 338 return Status(error::Code::UNKNOWN, 339 StrCat("FFmpeg execution failed: ", status_code)); 340 } 341 *output_samples = ReadPcmFile(output_filename); 342 TF_QCHECK_OK(Env::Default()->DeleteFile(output_filename)) 343 << output_filename; 344 return Status::OK(); 345 } 346 } 347 348 Status CreateAudioFile(const string& audio_format_id, int32 bits_per_second, 349 int32 samples_per_second, int32 channel_count, 350 const std::vector<float>& samples, string* output_data) { 351 if (audio_format_id != "wav") { 352 return Status(error::Code::INVALID_ARGUMENT, 353 "CreateAudioFile only supports the 'wav' audio format."); 354 } 355 *output_data = BuildWavFile(samples_per_second, channel_count, samples); 356 return Status::OK(); 357 } 358 359 Status ReadVideoFile(const string& filename, std::vector<uint8>* output_data, 360 uint32* width, uint32* height, uint32* frames) { 361 if (!IsBinaryInstalled(kFfmpegExecutable)) { 362 return Status(error::Code::NOT_FOUND, StrCat("FFmpeg could not be found.")); 363 } 364 365 string output_filename = io::GetTempFilename("raw"); 366 string stderr_filename = io::GetTempFilename("err"); 367 368 // Create an argument list. 369 const std::vector<string> args = 370 FfmpegVideoCommandLine(filename, output_filename); 371 // Execute ffmpeg and report errors. 372 pid_t child_pid = ::fork(); 373 if (child_pid < 0) { 374 return Status(error::Code::UNKNOWN, 375 StrCat("fork failed: ", strerror(errno))); 376 } 377 if (child_pid == 0) { 378 const int fd = 379 open(stderr_filename.c_str(), O_RDWR | O_CREAT | O_APPEND, 0600); 380 if (fd < 0) { 381 const int error = errno; 382 LOG(ERROR) << "FFmpeg stderr file could not be created: " 383 << strerror(error); 384 ::_exit(error); 385 } 386 close(STDERR_FILENO); 387 dup2(fd, STDERR_FILENO); 388 ExecuteFfmpeg(args); 389 } else { 390 int status_code; 391 if (::waitpid(child_pid, &status_code, 0) < 0) { 392 return Status(error::Code::UNKNOWN, 393 StrCat("waitpid failed: ", strerror(errno))); 394 } 395 if (status_code) { 396 return Status(error::Code::UNKNOWN, 397 StrCat("FFmpeg execution failed: ", status_code)); 398 } 399 400 TF_QCHECK_OK(ReadInfoFile(stderr_filename, width, height, frames)) 401 << "Could not read FFmpeg stderr file: " << stderr_filename; 402 403 string raw_data; 404 TF_QCHECK_OK(ReadFileToString(Env::Default(), output_filename, &raw_data)) 405 << "Could not read FFmpeg output file: " << output_filename; 406 output_data->resize(raw_data.size()); 407 std::copy_n(raw_data.data(), raw_data.size(), output_data->begin()); 408 409 TF_QCHECK_OK(Env::Default()->DeleteFile(output_filename)) 410 << output_filename; 411 TF_QCHECK_OK(Env::Default()->DeleteFile(stderr_filename)) 412 << stderr_filename; 413 return Status::OK(); 414 } 415 } 416 } // namespace ffmpeg 417 } // namespace tensorflow 418