Home | History | Annotate | Download | only in Lib
      1 """Routines to help recognizing sound files.
      2 
      3 Function whathdr() recognizes various types of sound file headers.
      4 It understands almost all headers that SOX can decode.
      5 
      6 The return tuple contains the following items, in this order:
      7 - file type (as SOX understands it)
      8 - sampling rate (0 if unknown or hard to decode)
      9 - number of channels (0 if unknown or hard to decode)
     10 - number of frames in the file (-1 if unknown or hard to decode)
     11 - number of bits/sample, or 'U' for U-LAW, or 'A' for A-LAW
     12 
     13 If the file doesn't have a recognizable type, it returns None.
     14 If the file can't be opened, IOError is raised.
     15 
     16 To compute the total time, divide the number of frames by the
     17 sampling rate (a frame contains a sample for each channel).
     18 
     19 Function what() calls whathdr().  (It used to also use some
     20 heuristics for raw data, but this doesn't work very well.)
     21 
     22 Finally, the function test() is a simple main program that calls
     23 what() for all files mentioned on the argument list.  For directory
     24 arguments it calls what() for all files in that directory.  Default
     25 argument is "." (testing all files in the current directory).  The
     26 option -r tells it to recurse down directories found inside
     27 explicitly given directories.
     28 """
     29 
     30 # The file structure is top-down except that the test program and its
     31 # subroutine come last.
     32 
     33 __all__ = ["what","whathdr"]
     34 
     35 def what(filename):
     36     """Guess the type of a sound file"""
     37     res = whathdr(filename)
     38     return res
     39 
     40 
     41 def whathdr(filename):
     42     """Recognize sound headers"""
     43     f = open(filename, 'rb')
     44     h = f.read(512)
     45     for tf in tests:
     46         res = tf(h, f)
     47         if res:
     48             return res
     49     return None
     50 
     51 
     52 #-----------------------------------#
     53 # Subroutines per sound header type #
     54 #-----------------------------------#
     55 
     56 tests = []
     57 
     58 def test_aifc(h, f):
     59     import aifc
     60     if h[:4] != 'FORM':
     61         return None
     62     if h[8:12] == 'AIFC':
     63         fmt = 'aifc'
     64     elif h[8:12] == 'AIFF':
     65         fmt = 'aiff'
     66     else:
     67         return None
     68     f.seek(0)
     69     try:
     70         a = aifc.openfp(f, 'r')
     71     except (EOFError, aifc.Error):
     72         return None
     73     return (fmt, a.getframerate(), a.getnchannels(), \
     74             a.getnframes(), 8*a.getsampwidth())
     75 
     76 tests.append(test_aifc)
     77 
     78 
     79 def test_au(h, f):
     80     if h[:4] == '.snd':
     81         f = get_long_be
     82     elif h[:4] in ('\0ds.', 'dns.'):
     83         f = get_long_le
     84     else:
     85         return None
     86     type = 'au'
     87     hdr_size = f(h[4:8])
     88     data_size = f(h[8:12])
     89     encoding = f(h[12:16])
     90     rate = f(h[16:20])
     91     nchannels = f(h[20:24])
     92     sample_size = 1 # default
     93     if encoding == 1:
     94         sample_bits = 'U'
     95     elif encoding == 2:
     96         sample_bits = 8
     97     elif encoding == 3:
     98         sample_bits = 16
     99         sample_size = 2
    100     else:
    101         sample_bits = '?'
    102     frame_size = sample_size * nchannels
    103     return type, rate, nchannels, data_size//frame_size, sample_bits
    104 
    105 tests.append(test_au)
    106 
    107 
    108 def test_hcom(h, f):
    109     if h[65:69] != 'FSSD' or h[128:132] != 'HCOM':
    110         return None
    111     divisor = get_long_be(h[128+16:128+20])
    112     return 'hcom', 22050//divisor, 1, -1, 8
    113 
    114 tests.append(test_hcom)
    115 
    116 
    117 def test_voc(h, f):
    118     if h[:20] != 'Creative Voice File\032':
    119         return None
    120     sbseek = get_short_le(h[20:22])
    121     rate = 0
    122     if 0 <= sbseek < 500 and h[sbseek] == '\1':
    123         ratecode = ord(h[sbseek+4])
    124         rate = int(1000000.0 / (256 - ratecode))
    125     return 'voc', rate, 1, -1, 8
    126 
    127 tests.append(test_voc)
    128 
    129 
    130 def test_wav(h, f):
    131     # 'RIFF' <len> 'WAVE' 'fmt ' <len>
    132     if h[:4] != 'RIFF' or h[8:12] != 'WAVE' or h[12:16] != 'fmt ':
    133         return None
    134     style = get_short_le(h[20:22])
    135     nchannels = get_short_le(h[22:24])
    136     rate = get_long_le(h[24:28])
    137     sample_bits = get_short_le(h[34:36])
    138     return 'wav', rate, nchannels, -1, sample_bits
    139 
    140 tests.append(test_wav)
    141 
    142 
    143 def test_8svx(h, f):
    144     if h[:4] != 'FORM' or h[8:12] != '8SVX':
    145         return None
    146     # Should decode it to get #channels -- assume always 1
    147     return '8svx', 0, 1, 0, 8
    148 
    149 tests.append(test_8svx)
    150 
    151 
    152 def test_sndt(h, f):
    153     if h[:5] == 'SOUND':
    154         nsamples = get_long_le(h[8:12])
    155         rate = get_short_le(h[20:22])
    156         return 'sndt', rate, 1, nsamples, 8
    157 
    158 tests.append(test_sndt)
    159 
    160 
    161 def test_sndr(h, f):
    162     if h[:2] == '\0\0':
    163         rate = get_short_le(h[2:4])
    164         if 4000 <= rate <= 25000:
    165             return 'sndr', rate, 1, -1, 8
    166 
    167 tests.append(test_sndr)
    168 
    169 
    170 #---------------------------------------------#
    171 # Subroutines to extract numbers from strings #
    172 #---------------------------------------------#
    173 
    174 def get_long_be(s):
    175     return (ord(s[0])<<24) | (ord(s[1])<<16) | (ord(s[2])<<8) | ord(s[3])
    176 
    177 def get_long_le(s):
    178     return (ord(s[3])<<24) | (ord(s[2])<<16) | (ord(s[1])<<8) | ord(s[0])
    179 
    180 def get_short_be(s):
    181     return (ord(s[0])<<8) | ord(s[1])
    182 
    183 def get_short_le(s):
    184     return (ord(s[1])<<8) | ord(s[0])
    185 
    186 
    187 #--------------------#
    188 # Small test program #
    189 #--------------------#
    190 
    191 def test():
    192     import sys
    193     recursive = 0
    194     if sys.argv[1:] and sys.argv[1] == '-r':
    195         del sys.argv[1:2]
    196         recursive = 1
    197     try:
    198         if sys.argv[1:]:
    199             testall(sys.argv[1:], recursive, 1)
    200         else:
    201             testall(['.'], recursive, 1)
    202     except KeyboardInterrupt:
    203         sys.stderr.write('\n[Interrupted]\n')
    204         sys.exit(1)
    205 
    206 def testall(list, recursive, toplevel):
    207     import sys
    208     import os
    209     for filename in list:
    210         if os.path.isdir(filename):
    211             print filename + '/:',
    212             if recursive or toplevel:
    213                 print 'recursing down:'
    214                 import glob
    215                 names = glob.glob(os.path.join(filename, '*'))
    216                 testall(names, recursive, 0)
    217             else:
    218                 print '*** directory (use -r) ***'
    219         else:
    220             print filename + ':',
    221             sys.stdout.flush()
    222             try:
    223                 print what(filename)
    224             except IOError:
    225                 print '*** not found ***'
    226 
    227 if __name__ == '__main__':
    228     test()
    229