1 """Routines to help recognizing sound files. 2 3 Function whathdr() recognizes various types of sound file headers. 4 It understands almost all headers that SOX can decode. 5 6 The return tuple contains the following items, in this order: 7 - file type (as SOX understands it) 8 - sampling rate (0 if unknown or hard to decode) 9 - number of channels (0 if unknown or hard to decode) 10 - number of frames in the file (-1 if unknown or hard to decode) 11 - number of bits/sample, or 'U' for U-LAW, or 'A' for A-LAW 12 13 If the file doesn't have a recognizable type, it returns None. 14 If the file can't be opened, IOError is raised. 15 16 To compute the total time, divide the number of frames by the 17 sampling rate (a frame contains a sample for each channel). 18 19 Function what() calls whathdr(). (It used to also use some 20 heuristics for raw data, but this doesn't work very well.) 21 22 Finally, the function test() is a simple main program that calls 23 what() for all files mentioned on the argument list. For directory 24 arguments it calls what() for all files in that directory. Default 25 argument is "." (testing all files in the current directory). The 26 option -r tells it to recurse down directories found inside 27 explicitly given directories. 28 """ 29 30 # The file structure is top-down except that the test program and its 31 # subroutine come last. 32 33 __all__ = ["what","whathdr"] 34 35 def what(filename): 36 """Guess the type of a sound file""" 37 res = whathdr(filename) 38 return res 39 40 41 def whathdr(filename): 42 """Recognize sound headers""" 43 f = open(filename, 'rb') 44 h = f.read(512) 45 for tf in tests: 46 res = tf(h, f) 47 if res: 48 return res 49 return None 50 51 52 #-----------------------------------# 53 # Subroutines per sound header type # 54 #-----------------------------------# 55 56 tests = [] 57 58 def test_aifc(h, f): 59 import aifc 60 if h[:4] != 'FORM': 61 return None 62 if h[8:12] == 'AIFC': 63 fmt = 'aifc' 64 elif h[8:12] == 'AIFF': 65 fmt = 'aiff' 66 else: 67 return None 68 f.seek(0) 69 try: 70 a = aifc.openfp(f, 'r') 71 except (EOFError, aifc.Error): 72 return None 73 return (fmt, a.getframerate(), a.getnchannels(), \ 74 a.getnframes(), 8*a.getsampwidth()) 75 76 tests.append(test_aifc) 77 78 79 def test_au(h, f): 80 if h[:4] == '.snd': 81 f = get_long_be 82 elif h[:4] in ('\0ds.', 'dns.'): 83 f = get_long_le 84 else: 85 return None 86 type = 'au' 87 hdr_size = f(h[4:8]) 88 data_size = f(h[8:12]) 89 encoding = f(h[12:16]) 90 rate = f(h[16:20]) 91 nchannels = f(h[20:24]) 92 sample_size = 1 # default 93 if encoding == 1: 94 sample_bits = 'U' 95 elif encoding == 2: 96 sample_bits = 8 97 elif encoding == 3: 98 sample_bits = 16 99 sample_size = 2 100 else: 101 sample_bits = '?' 102 frame_size = sample_size * nchannels 103 return type, rate, nchannels, data_size//frame_size, sample_bits 104 105 tests.append(test_au) 106 107 108 def test_hcom(h, f): 109 if h[65:69] != 'FSSD' or h[128:132] != 'HCOM': 110 return None 111 divisor = get_long_be(h[128+16:128+20]) 112 return 'hcom', 22050//divisor, 1, -1, 8 113 114 tests.append(test_hcom) 115 116 117 def test_voc(h, f): 118 if h[:20] != 'Creative Voice File\032': 119 return None 120 sbseek = get_short_le(h[20:22]) 121 rate = 0 122 if 0 <= sbseek < 500 and h[sbseek] == '\1': 123 ratecode = ord(h[sbseek+4]) 124 rate = int(1000000.0 / (256 - ratecode)) 125 return 'voc', rate, 1, -1, 8 126 127 tests.append(test_voc) 128 129 130 def test_wav(h, f): 131 # 'RIFF' <len> 'WAVE' 'fmt ' <len> 132 if h[:4] != 'RIFF' or h[8:12] != 'WAVE' or h[12:16] != 'fmt ': 133 return None 134 style = get_short_le(h[20:22]) 135 nchannels = get_short_le(h[22:24]) 136 rate = get_long_le(h[24:28]) 137 sample_bits = get_short_le(h[34:36]) 138 return 'wav', rate, nchannels, -1, sample_bits 139 140 tests.append(test_wav) 141 142 143 def test_8svx(h, f): 144 if h[:4] != 'FORM' or h[8:12] != '8SVX': 145 return None 146 # Should decode it to get #channels -- assume always 1 147 return '8svx', 0, 1, 0, 8 148 149 tests.append(test_8svx) 150 151 152 def test_sndt(h, f): 153 if h[:5] == 'SOUND': 154 nsamples = get_long_le(h[8:12]) 155 rate = get_short_le(h[20:22]) 156 return 'sndt', rate, 1, nsamples, 8 157 158 tests.append(test_sndt) 159 160 161 def test_sndr(h, f): 162 if h[:2] == '\0\0': 163 rate = get_short_le(h[2:4]) 164 if 4000 <= rate <= 25000: 165 return 'sndr', rate, 1, -1, 8 166 167 tests.append(test_sndr) 168 169 170 #---------------------------------------------# 171 # Subroutines to extract numbers from strings # 172 #---------------------------------------------# 173 174 def get_long_be(s): 175 return (ord(s[0])<<24) | (ord(s[1])<<16) | (ord(s[2])<<8) | ord(s[3]) 176 177 def get_long_le(s): 178 return (ord(s[3])<<24) | (ord(s[2])<<16) | (ord(s[1])<<8) | ord(s[0]) 179 180 def get_short_be(s): 181 return (ord(s[0])<<8) | ord(s[1]) 182 183 def get_short_le(s): 184 return (ord(s[1])<<8) | ord(s[0]) 185 186 187 #--------------------# 188 # Small test program # 189 #--------------------# 190 191 def test(): 192 import sys 193 recursive = 0 194 if sys.argv[1:] and sys.argv[1] == '-r': 195 del sys.argv[1:2] 196 recursive = 1 197 try: 198 if sys.argv[1:]: 199 testall(sys.argv[1:], recursive, 1) 200 else: 201 testall(['.'], recursive, 1) 202 except KeyboardInterrupt: 203 sys.stderr.write('\n[Interrupted]\n') 204 sys.exit(1) 205 206 def testall(list, recursive, toplevel): 207 import sys 208 import os 209 for filename in list: 210 if os.path.isdir(filename): 211 print filename + '/:', 212 if recursive or toplevel: 213 print 'recursing down:' 214 import glob 215 names = glob.glob(os.path.join(filename, '*')) 216 testall(names, recursive, 0) 217 else: 218 print '*** directory (use -r) ***' 219 else: 220 print filename + ':', 221 sys.stdout.flush() 222 try: 223 print what(filename) 224 except IOError: 225 print '*** not found ***' 226 227 if __name__ == '__main__': 228 test() 229