Home | History | Annotate | Download | only in Lib
      1 """Routines to help recognizing sound files.
      2 
      3 Function whathdr() recognizes various types of sound file headers.
      4 It understands almost all headers that SOX can decode.
      5 
      6 The return tuple contains the following items, in this order:
      7 - file type (as SOX understands it)
      8 - sampling rate (0 if unknown or hard to decode)
      9 - number of channels (0 if unknown or hard to decode)
     10 - number of frames in the file (-1 if unknown or hard to decode)
     11 - number of bits/sample, or 'U' for U-LAW, or 'A' for A-LAW
     12 
     13 If the file doesn't have a recognizable type, it returns None.
     14 If the file can't be opened, OSError is raised.
     15 
     16 To compute the total time, divide the number of frames by the
     17 sampling rate (a frame contains a sample for each channel).
     18 
     19 Function what() calls whathdr().  (It used to also use some
     20 heuristics for raw data, but this doesn't work very well.)
     21 
     22 Finally, the function test() is a simple main program that calls
     23 what() for all files mentioned on the argument list.  For directory
     24 arguments it calls what() for all files in that directory.  Default
     25 argument is "." (testing all files in the current directory).  The
     26 option -r tells it to recurse down directories found inside
     27 explicitly given directories.
     28 """
     29 
     30 # The file structure is top-down except that the test program and its
     31 # subroutine come last.
     32 
     33 __all__ = ['what', 'whathdr']
     34 
     35 from collections import namedtuple
     36 
     37 SndHeaders = namedtuple('SndHeaders',
     38                         'filetype framerate nchannels nframes sampwidth')
     39 
     40 SndHeaders.filetype.__doc__ = ("""The value for type indicates the data type
     41 and will be one of the strings 'aifc', 'aiff', 'au','hcom',
     42 'sndr', 'sndt', 'voc', 'wav', '8svx', 'sb', 'ub', or 'ul'.""")
     43 SndHeaders.framerate.__doc__ = ("""The sampling_rate will be either the actual
     44 value or 0 if unknown or difficult to decode.""")
     45 SndHeaders.nchannels.__doc__ = ("""The number of channels or 0 if it cannot be
     46 determined or if the value is difficult to decode.""")
     47 SndHeaders.nframes.__doc__ = ("""The value for frames will be either the number
     48 of frames or -1.""")
     49 SndHeaders.sampwidth.__doc__ = ("""Either the sample size in bits or
     50 'A' for A-LAW or 'U' for u-LAW.""")
     51 
     52 def what(filename):
     53     """Guess the type of a sound file."""
     54     res = whathdr(filename)
     55     return res
     56 
     57 
     58 def whathdr(filename):
     59     """Recognize sound headers."""
     60     with open(filename, 'rb') as f:
     61         h = f.read(512)
     62         for tf in tests:
     63             res = tf(h, f)
     64             if res:
     65                 return SndHeaders(*res)
     66         return None
     67 
     68 
     69 #-----------------------------------#
     70 # Subroutines per sound header type #
     71 #-----------------------------------#
     72 
     73 tests = []
     74 
     75 def test_aifc(h, f):
     76     import aifc
     77     if not h.startswith(b'FORM'):
     78         return None
     79     if h[8:12] == b'AIFC':
     80         fmt = 'aifc'
     81     elif h[8:12] == b'AIFF':
     82         fmt = 'aiff'
     83     else:
     84         return None
     85     f.seek(0)
     86     try:
     87         a = aifc.open(f, 'r')
     88     except (EOFError, aifc.Error):
     89         return None
     90     return (fmt, a.getframerate(), a.getnchannels(),
     91             a.getnframes(), 8 * a.getsampwidth())
     92 
     93 tests.append(test_aifc)
     94 
     95 
     96 def test_au(h, f):
     97     if h.startswith(b'.snd'):
     98         func = get_long_be
     99     elif h[:4] in (b'\0ds.', b'dns.'):
    100         func = get_long_le
    101     else:
    102         return None
    103     filetype = 'au'
    104     hdr_size = func(h[4:8])
    105     data_size = func(h[8:12])
    106     encoding = func(h[12:16])
    107     rate = func(h[16:20])
    108     nchannels = func(h[20:24])
    109     sample_size = 1 # default
    110     if encoding == 1:
    111         sample_bits = 'U'
    112     elif encoding == 2:
    113         sample_bits = 8
    114     elif encoding == 3:
    115         sample_bits = 16
    116         sample_size = 2
    117     else:
    118         sample_bits = '?'
    119     frame_size = sample_size * nchannels
    120     if frame_size:
    121         nframe = data_size / frame_size
    122     else:
    123         nframe = -1
    124     return filetype, rate, nchannels, nframe, sample_bits
    125 
    126 tests.append(test_au)
    127 
    128 
    129 def test_hcom(h, f):
    130     if h[65:69] != b'FSSD' or h[128:132] != b'HCOM':
    131         return None
    132     divisor = get_long_be(h[144:148])
    133     if divisor:
    134         rate = 22050 / divisor
    135     else:
    136         rate = 0
    137     return 'hcom', rate, 1, -1, 8
    138 
    139 tests.append(test_hcom)
    140 
    141 
    142 def test_voc(h, f):
    143     if not h.startswith(b'Creative Voice File\032'):
    144         return None
    145     sbseek = get_short_le(h[20:22])
    146     rate = 0
    147     if 0 <= sbseek < 500 and h[sbseek] == 1:
    148         ratecode = 256 - h[sbseek+4]
    149         if ratecode:
    150             rate = int(1000000.0 / ratecode)
    151     return 'voc', rate, 1, -1, 8
    152 
    153 tests.append(test_voc)
    154 
    155 
    156 def test_wav(h, f):
    157     import wave
    158     # 'RIFF' <len> 'WAVE' 'fmt ' <len>
    159     if not h.startswith(b'RIFF') or h[8:12] != b'WAVE' or h[12:16] != b'fmt ':
    160         return None
    161     f.seek(0)
    162     try:
    163         w = wave.openfp(f, 'r')
    164     except (EOFError, wave.Error):
    165         return None
    166     return ('wav', w.getframerate(), w.getnchannels(),
    167                    w.getnframes(), 8*w.getsampwidth())
    168 
    169 tests.append(test_wav)
    170 
    171 
    172 def test_8svx(h, f):
    173     if not h.startswith(b'FORM') or h[8:12] != b'8SVX':
    174         return None
    175     # Should decode it to get #channels -- assume always 1
    176     return '8svx', 0, 1, 0, 8
    177 
    178 tests.append(test_8svx)
    179 
    180 
    181 def test_sndt(h, f):
    182     if h.startswith(b'SOUND'):
    183         nsamples = get_long_le(h[8:12])
    184         rate = get_short_le(h[20:22])
    185         return 'sndt', rate, 1, nsamples, 8
    186 
    187 tests.append(test_sndt)
    188 
    189 
    190 def test_sndr(h, f):
    191     if h.startswith(b'\0\0'):
    192         rate = get_short_le(h[2:4])
    193         if 4000 <= rate <= 25000:
    194             return 'sndr', rate, 1, -1, 8
    195 
    196 tests.append(test_sndr)
    197 
    198 
    199 #-------------------------------------------#
    200 # Subroutines to extract numbers from bytes #
    201 #-------------------------------------------#
    202 
    203 def get_long_be(b):
    204     return (b[0] << 24) | (b[1] << 16) | (b[2] << 8) | b[3]
    205 
    206 def get_long_le(b):
    207     return (b[3] << 24) | (b[2] << 16) | (b[1] << 8) | b[0]
    208 
    209 def get_short_be(b):
    210     return (b[0] << 8) | b[1]
    211 
    212 def get_short_le(b):
    213     return (b[1] << 8) | b[0]
    214 
    215 
    216 #--------------------#
    217 # Small test program #
    218 #--------------------#
    219 
    220 def test():
    221     import sys
    222     recursive = 0
    223     if sys.argv[1:] and sys.argv[1] == '-r':
    224         del sys.argv[1:2]
    225         recursive = 1
    226     try:
    227         if sys.argv[1:]:
    228             testall(sys.argv[1:], recursive, 1)
    229         else:
    230             testall(['.'], recursive, 1)
    231     except KeyboardInterrupt:
    232         sys.stderr.write('\n[Interrupted]\n')
    233         sys.exit(1)
    234 
    235 def testall(list, recursive, toplevel):
    236     import sys
    237     import os
    238     for filename in list:
    239         if os.path.isdir(filename):
    240             print(filename + '/:', end=' ')
    241             if recursive or toplevel:
    242                 print('recursing down:')
    243                 import glob
    244                 names = glob.glob(os.path.join(filename, '*'))
    245                 testall(names, recursive, 0)
    246             else:
    247                 print('*** directory (use -r) ***')
    248         else:
    249             print(filename + ':', end=' ')
    250             sys.stdout.flush()
    251             try:
    252                 print(what(filename))
    253             except OSError:
    254                 print('*** not found ***')
    255 
    256 if __name__ == '__main__':
    257     test()
    258