1 """Macintosh binhex compression/decompression. 2 3 easy interface: 4 binhex(inputfilename, outputfilename) 5 hexbin(inputfilename, outputfilename) 6 """ 7 8 # 9 # Jack Jansen, CWI, August 1995. 10 # 11 # The module is supposed to be as compatible as possible. Especially the 12 # easy interface should work "as expected" on any platform. 13 # XXXX Note: currently, textfiles appear in mac-form on all platforms. 14 # We seem to lack a simple character-translate in python. 15 # (we should probably use ISO-Latin-1 on all but the mac platform). 16 # XXXX The simple routines are too simple: they expect to hold the complete 17 # files in-core. Should be fixed. 18 # XXXX It would be nice to handle AppleDouble format on unix 19 # (for servers serving macs). 20 # XXXX I don't understand what happens when you get 0x90 times the same byte on 21 # input. The resulting code (xx 90 90) would appear to be interpreted as an 22 # escaped *value* of 0x90. All coders I've seen appear to ignore this nicety... 23 # 24 import sys 25 import os 26 import struct 27 import binascii 28 29 __all__ = ["binhex","hexbin","Error"] 30 31 class Error(Exception): 32 pass 33 34 # States (what have we written) 35 _DID_HEADER = 0 36 _DID_DATA = 1 37 38 # Various constants 39 REASONABLY_LARGE=32768 # Minimal amount we pass the rle-coder 40 LINELEN=64 41 RUNCHAR=chr(0x90) # run-length introducer 42 43 # 44 # This code is no longer byte-order dependent 45 46 # 47 # Workarounds for non-mac machines. 48 try: 49 from Carbon.File import FSSpec, FInfo 50 from MacOS import openrf 51 52 def getfileinfo(name): 53 finfo = FSSpec(name).FSpGetFInfo() 54 dir, file = os.path.split(name) 55 # XXX Get resource/data sizes 56 fp = open(name, 'rb') 57 fp.seek(0, 2) 58 dlen = fp.tell() 59 fp = openrf(name, '*rb') 60 fp.seek(0, 2) 61 rlen = fp.tell() 62 return file, finfo, dlen, rlen 63 64 def openrsrc(name, *mode): 65 if not mode: 66 mode = '*rb' 67 else: 68 mode = '*' + mode[0] 69 return openrf(name, mode) 70 71 except ImportError: 72 # 73 # Glue code for non-macintosh usage 74 # 75 76 class FInfo: 77 def __init__(self): 78 self.Type = '????' 79 self.Creator = '????' 80 self.Flags = 0 81 82 def getfileinfo(name): 83 finfo = FInfo() 84 # Quick check for textfile 85 fp = open(name) 86 data = open(name).read(256) 87 for c in data: 88 if not c.isspace() and (c<' ' or ord(c) > 0x7f): 89 break 90 else: 91 finfo.Type = 'TEXT' 92 fp.seek(0, 2) 93 dsize = fp.tell() 94 fp.close() 95 dir, file = os.path.split(name) 96 file = file.replace(':', '-', 1) 97 return file, finfo, dsize, 0 98 99 class openrsrc: 100 def __init__(self, *args): 101 pass 102 103 def read(self, *args): 104 return '' 105 106 def write(self, *args): 107 pass 108 109 def close(self): 110 pass 111 112 class _Hqxcoderengine: 113 """Write data to the coder in 3-byte chunks""" 114 115 def __init__(self, ofp): 116 self.ofp = ofp 117 self.data = '' 118 self.hqxdata = '' 119 self.linelen = LINELEN-1 120 121 def write(self, data): 122 self.data = self.data + data 123 datalen = len(self.data) 124 todo = (datalen//3)*3 125 data = self.data[:todo] 126 self.data = self.data[todo:] 127 if not data: 128 return 129 self.hqxdata = self.hqxdata + binascii.b2a_hqx(data) 130 self._flush(0) 131 132 def _flush(self, force): 133 first = 0 134 while first <= len(self.hqxdata)-self.linelen: 135 last = first + self.linelen 136 self.ofp.write(self.hqxdata[first:last]+'\n') 137 self.linelen = LINELEN 138 first = last 139 self.hqxdata = self.hqxdata[first:] 140 if force: 141 self.ofp.write(self.hqxdata + ':\n') 142 143 def close(self): 144 if self.data: 145 self.hqxdata = \ 146 self.hqxdata + binascii.b2a_hqx(self.data) 147 self._flush(1) 148 self.ofp.close() 149 del self.ofp 150 151 class _Rlecoderengine: 152 """Write data to the RLE-coder in suitably large chunks""" 153 154 def __init__(self, ofp): 155 self.ofp = ofp 156 self.data = '' 157 158 def write(self, data): 159 self.data = self.data + data 160 if len(self.data) < REASONABLY_LARGE: 161 return 162 rledata = binascii.rlecode_hqx(self.data) 163 self.ofp.write(rledata) 164 self.data = '' 165 166 def close(self): 167 if self.data: 168 rledata = binascii.rlecode_hqx(self.data) 169 self.ofp.write(rledata) 170 self.ofp.close() 171 del self.ofp 172 173 class BinHex: 174 def __init__(self, name_finfo_dlen_rlen, ofp): 175 name, finfo, dlen, rlen = name_finfo_dlen_rlen 176 if type(ofp) == type(''): 177 ofname = ofp 178 ofp = open(ofname, 'w') 179 ofp.write('(This file must be converted with BinHex 4.0)\n\n:') 180 hqxer = _Hqxcoderengine(ofp) 181 self.ofp = _Rlecoderengine(hqxer) 182 self.crc = 0 183 if finfo is None: 184 finfo = FInfo() 185 self.dlen = dlen 186 self.rlen = rlen 187 self._writeinfo(name, finfo) 188 self.state = _DID_HEADER 189 190 def _writeinfo(self, name, finfo): 191 nl = len(name) 192 if nl > 63: 193 raise Error, 'Filename too long' 194 d = chr(nl) + name + '\0' 195 d2 = finfo.Type + finfo.Creator 196 197 # Force all structs to be packed with big-endian 198 d3 = struct.pack('>h', finfo.Flags) 199 d4 = struct.pack('>ii', self.dlen, self.rlen) 200 info = d + d2 + d3 + d4 201 self._write(info) 202 self._writecrc() 203 204 def _write(self, data): 205 self.crc = binascii.crc_hqx(data, self.crc) 206 self.ofp.write(data) 207 208 def _writecrc(self): 209 # XXXX Should this be here?? 210 # self.crc = binascii.crc_hqx('\0\0', self.crc) 211 if self.crc < 0: 212 fmt = '>h' 213 else: 214 fmt = '>H' 215 self.ofp.write(struct.pack(fmt, self.crc)) 216 self.crc = 0 217 218 def write(self, data): 219 if self.state != _DID_HEADER: 220 raise Error, 'Writing data at the wrong time' 221 self.dlen = self.dlen - len(data) 222 self._write(data) 223 224 def close_data(self): 225 if self.dlen != 0: 226 raise Error, 'Incorrect data size, diff=%r' % (self.rlen,) 227 self._writecrc() 228 self.state = _DID_DATA 229 230 def write_rsrc(self, data): 231 if self.state < _DID_DATA: 232 self.close_data() 233 if self.state != _DID_DATA: 234 raise Error, 'Writing resource data at the wrong time' 235 self.rlen = self.rlen - len(data) 236 self._write(data) 237 238 def close(self): 239 if self.state is None: 240 return 241 try: 242 if self.state < _DID_DATA: 243 self.close_data() 244 if self.state != _DID_DATA: 245 raise Error, 'Close at the wrong time' 246 if self.rlen != 0: 247 raise Error, \ 248 "Incorrect resource-datasize, diff=%r" % (self.rlen,) 249 self._writecrc() 250 finally: 251 self.state = None 252 ofp = self.ofp 253 del self.ofp 254 ofp.close() 255 256 def binhex(inp, out): 257 """(infilename, outfilename) - Create binhex-encoded copy of a file""" 258 finfo = getfileinfo(inp) 259 ofp = BinHex(finfo, out) 260 261 ifp = open(inp, 'rb') 262 # XXXX Do textfile translation on non-mac systems 263 while 1: 264 d = ifp.read(128000) 265 if not d: break 266 ofp.write(d) 267 ofp.close_data() 268 ifp.close() 269 270 ifp = openrsrc(inp, 'rb') 271 while 1: 272 d = ifp.read(128000) 273 if not d: break 274 ofp.write_rsrc(d) 275 ofp.close() 276 ifp.close() 277 278 class _Hqxdecoderengine: 279 """Read data via the decoder in 4-byte chunks""" 280 281 def __init__(self, ifp): 282 self.ifp = ifp 283 self.eof = 0 284 285 def read(self, totalwtd): 286 """Read at least wtd bytes (or until EOF)""" 287 decdata = '' 288 wtd = totalwtd 289 # 290 # The loop here is convoluted, since we don't really now how 291 # much to decode: there may be newlines in the incoming data. 292 while wtd > 0: 293 if self.eof: return decdata 294 wtd = ((wtd+2)//3)*4 295 data = self.ifp.read(wtd) 296 # 297 # Next problem: there may not be a complete number of 298 # bytes in what we pass to a2b. Solve by yet another 299 # loop. 300 # 301 while 1: 302 try: 303 decdatacur, self.eof = \ 304 binascii.a2b_hqx(data) 305 break 306 except binascii.Incomplete: 307 pass 308 newdata = self.ifp.read(1) 309 if not newdata: 310 raise Error, \ 311 'Premature EOF on binhex file' 312 data = data + newdata 313 decdata = decdata + decdatacur 314 wtd = totalwtd - len(decdata) 315 if not decdata and not self.eof: 316 raise Error, 'Premature EOF on binhex file' 317 return decdata 318 319 def close(self): 320 self.ifp.close() 321 322 class _Rledecoderengine: 323 """Read data via the RLE-coder""" 324 325 def __init__(self, ifp): 326 self.ifp = ifp 327 self.pre_buffer = '' 328 self.post_buffer = '' 329 self.eof = 0 330 331 def read(self, wtd): 332 if wtd > len(self.post_buffer): 333 self._fill(wtd-len(self.post_buffer)) 334 rv = self.post_buffer[:wtd] 335 self.post_buffer = self.post_buffer[wtd:] 336 return rv 337 338 def _fill(self, wtd): 339 self.pre_buffer = self.pre_buffer + self.ifp.read(wtd+4) 340 if self.ifp.eof: 341 self.post_buffer = self.post_buffer + \ 342 binascii.rledecode_hqx(self.pre_buffer) 343 self.pre_buffer = '' 344 return 345 346 # 347 # Obfuscated code ahead. We have to take care that we don't 348 # end up with an orphaned RUNCHAR later on. So, we keep a couple 349 # of bytes in the buffer, depending on what the end of 350 # the buffer looks like: 351 # '\220\0\220' - Keep 3 bytes: repeated \220 (escaped as \220\0) 352 # '?\220' - Keep 2 bytes: repeated something-else 353 # '\220\0' - Escaped \220: Keep 2 bytes. 354 # '?\220?' - Complete repeat sequence: decode all 355 # otherwise: keep 1 byte. 356 # 357 mark = len(self.pre_buffer) 358 if self.pre_buffer[-3:] == RUNCHAR + '\0' + RUNCHAR: 359 mark = mark - 3 360 elif self.pre_buffer[-1] == RUNCHAR: 361 mark = mark - 2 362 elif self.pre_buffer[-2:] == RUNCHAR + '\0': 363 mark = mark - 2 364 elif self.pre_buffer[-2] == RUNCHAR: 365 pass # Decode all 366 else: 367 mark = mark - 1 368 369 self.post_buffer = self.post_buffer + \ 370 binascii.rledecode_hqx(self.pre_buffer[:mark]) 371 self.pre_buffer = self.pre_buffer[mark:] 372 373 def close(self): 374 self.ifp.close() 375 376 class HexBin: 377 def __init__(self, ifp): 378 if type(ifp) == type(''): 379 ifp = open(ifp) 380 # 381 # Find initial colon. 382 # 383 while 1: 384 ch = ifp.read(1) 385 if not ch: 386 raise Error, "No binhex data found" 387 # Cater for \r\n terminated lines (which show up as \n\r, hence 388 # all lines start with \r) 389 if ch == '\r': 390 continue 391 if ch == ':': 392 break 393 if ch != '\n': 394 dummy = ifp.readline() 395 396 hqxifp = _Hqxdecoderengine(ifp) 397 self.ifp = _Rledecoderengine(hqxifp) 398 self.crc = 0 399 self._readheader() 400 401 def _read(self, len): 402 data = self.ifp.read(len) 403 self.crc = binascii.crc_hqx(data, self.crc) 404 return data 405 406 def _checkcrc(self): 407 filecrc = struct.unpack('>h', self.ifp.read(2))[0] & 0xffff 408 #self.crc = binascii.crc_hqx('\0\0', self.crc) 409 # XXXX Is this needed?? 410 self.crc = self.crc & 0xffff 411 if filecrc != self.crc: 412 raise Error, 'CRC error, computed %x, read %x' \ 413 %(self.crc, filecrc) 414 self.crc = 0 415 416 def _readheader(self): 417 len = self._read(1) 418 fname = self._read(ord(len)) 419 rest = self._read(1+4+4+2+4+4) 420 self._checkcrc() 421 422 type = rest[1:5] 423 creator = rest[5:9] 424 flags = struct.unpack('>h', rest[9:11])[0] 425 self.dlen = struct.unpack('>l', rest[11:15])[0] 426 self.rlen = struct.unpack('>l', rest[15:19])[0] 427 428 self.FName = fname 429 self.FInfo = FInfo() 430 self.FInfo.Creator = creator 431 self.FInfo.Type = type 432 self.FInfo.Flags = flags 433 434 self.state = _DID_HEADER 435 436 def read(self, *n): 437 if self.state != _DID_HEADER: 438 raise Error, 'Read data at wrong time' 439 if n: 440 n = n[0] 441 n = min(n, self.dlen) 442 else: 443 n = self.dlen 444 rv = '' 445 while len(rv) < n: 446 rv = rv + self._read(n-len(rv)) 447 self.dlen = self.dlen - n 448 return rv 449 450 def close_data(self): 451 if self.state != _DID_HEADER: 452 raise Error, 'close_data at wrong time' 453 if self.dlen: 454 dummy = self._read(self.dlen) 455 self._checkcrc() 456 self.state = _DID_DATA 457 458 def read_rsrc(self, *n): 459 if self.state == _DID_HEADER: 460 self.close_data() 461 if self.state != _DID_DATA: 462 raise Error, 'Read resource data at wrong time' 463 if n: 464 n = n[0] 465 n = min(n, self.rlen) 466 else: 467 n = self.rlen 468 self.rlen = self.rlen - n 469 return self._read(n) 470 471 def close(self): 472 if self.state is None: 473 return 474 try: 475 if self.rlen: 476 dummy = self.read_rsrc(self.rlen) 477 self._checkcrc() 478 finally: 479 self.state = None 480 self.ifp.close() 481 482 def hexbin(inp, out): 483 """(infilename, outfilename) - Decode binhexed file""" 484 ifp = HexBin(inp) 485 finfo = ifp.FInfo 486 if not out: 487 out = ifp.FName 488 489 ofp = open(out, 'wb') 490 # XXXX Do translation on non-mac systems 491 while 1: 492 d = ifp.read(128000) 493 if not d: break 494 ofp.write(d) 495 ofp.close() 496 ifp.close_data() 497 498 d = ifp.read_rsrc(128000) 499 if d: 500 ofp = openrsrc(out, 'wb') 501 ofp.write(d) 502 while 1: 503 d = ifp.read_rsrc(128000) 504 if not d: break 505 ofp.write(d) 506 ofp.close() 507 508 ifp.close() 509 510 def _test(): 511 fname = sys.argv[1] 512 binhex(fname, fname+'.hqx') 513 hexbin(fname+'.hqx', fname+'.viahqx') 514 #hexbin(fname, fname+'.unpacked') 515 sys.exit(1) 516 517 if __name__ == '__main__': 518 _test() 519