Home | History | Annotate | Download | only in ffmpeg
      1 # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
      2 #
      3 # Licensed under the Apache License, Version 2.0 (the "License");
      4 # you may not use this file except in compliance with the License.
      5 # You may obtain a copy of the License at
      6 #
      7 #     http://www.apache.org/licenses/LICENSE-2.0
      8 #
      9 # Unless required by applicable law or agreed to in writing, software
     10 # distributed under the License is distributed on an "AS IS" BASIS,
     11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 # See the License for the specific language governing permissions and
     13 # limitations under the License.
     14 # =============================================================================
     15 """Encoding and decoding audio using FFmpeg."""
     16 
     17 from __future__ import absolute_import
     18 from __future__ import division
     19 from __future__ import print_function
     20 
     21 from tensorflow.contrib.ffmpeg.ops import gen_decode_audio_op_py
     22 from tensorflow.contrib.ffmpeg.ops import gen_decode_video_op_py
     23 from tensorflow.contrib.ffmpeg.ops import gen_encode_audio_op_py
     24 from tensorflow.contrib.util import loader
     25 from tensorflow.python.framework import ops
     26 from tensorflow.python.platform import resource_loader
     27 from tensorflow.python.util.deprecation import deprecated
     28 
     29 _ffmpeg_so = loader.load_op_library(
     30     resource_loader.get_path_to_datafile('ffmpeg.so'))
     31 
     32 
     33 @deprecated('2018-09-04', 'This will be deleted and should not be used.')
     34 def decode_audio(contents, file_format=None, samples_per_second=None,
     35                  channel_count=None, stream=None):
     36   """Create an op that decodes the contents of an audio file.
     37 
     38   Note that ffmpeg is free to select the "best" audio track from an mp4.
     39   https://trac.ffmpeg.org/wiki/Map
     40 
     41   Args:
     42     contents: The binary contents of the audio file to decode. This is a
     43         scalar.
     44     file_format: A string or scalar string tensor specifying which
     45         format the contents will conform to. This can be mp3, mp4, ogg,
     46         or wav.
     47     samples_per_second: The number of samples per second that is
     48         assumed, as an `int` or scalar `int32` tensor. In some cases,
     49         resampling will occur to generate the correct sample rate.
     50     channel_count: The number of channels that should be created from the
     51         audio contents, as an `int` or scalar `int32` tensor. If the
     52         `contents` have more than this number, then some channels will
     53         be merged or dropped. If `contents` has fewer than this, then
     54         additional channels will be created from the existing ones.
     55     stream: A string specifying which stream from the content file
     56         should be decoded, e.g., '0' means the 0-th stream.
     57         The default value is '' which leaves the decision to ffmpeg.
     58 
     59   Returns:
     60     A rank-2 tensor that has time along dimension 0 and channels along
     61     dimension 1. Dimension 0 will be `samples_per_second *
     62     length_in_seconds` wide, and dimension 1 will be `channel_count`
     63     wide. If ffmpeg fails to decode the audio then an empty tensor will
     64     be returned.
     65   """
     66   return gen_decode_audio_op_py.decode_audio_v2(
     67       contents, file_format=file_format, samples_per_second=samples_per_second,
     68       channel_count=channel_count, stream=stream)
     69 
     70 
     71 ops.NotDifferentiable('DecodeAudio')
     72 
     73 
     74 @deprecated('2018-09-04', 'This will be deleted and should not be used.')
     75 def encode_audio(audio, file_format=None, samples_per_second=None):
     76   """Creates an op that encodes an audio file using sampled audio from a tensor.
     77 
     78   Args:
     79     audio: A rank-2 `Tensor` that has time along dimension 0 and
     80         channels along dimension 1. Dimension 0 is `samples_per_second *
     81         length_in_seconds` long.
     82     file_format: The type of file to encode, as a string or rank-0
     83         string tensor. "wav" is the only supported format.
     84     samples_per_second: The number of samples in the audio tensor per
     85         second of audio, as an `int` or rank-0 `int32` tensor.
     86 
     87   Returns:
     88     A scalar tensor that contains the encoded audio in the specified file
     89     format.
     90   """
     91   return gen_encode_audio_op_py.encode_audio_v2(
     92       audio,
     93       file_format=file_format,
     94       samples_per_second=samples_per_second,
     95       bits_per_second=192000)  # not used by WAV
     96 
     97 
     98 ops.NotDifferentiable('EncodeAudio')
     99 
    100 
    101 @deprecated('2018-09-04', 'This will be deleted and should not be used.')
    102 def decode_video(contents):
    103   """Create an op that decodes the contents of a video file.
    104 
    105   Args:
    106     contents: The binary contents of the video file to decode. This is a
    107       scalar.
    108 
    109   Returns:
    110     A rank-4 `Tensor` that has `[frames, height, width, 3]` RGB as output.
    111   """
    112   return gen_decode_video_op_py.decode_video(contents)
    113 
    114 
    115 ops.NotDifferentiable('DecodeVideo')
    116