Home | History | Annotate | Download | only in python2.7
      1 #! /usr/bin/env python
      2 
      3 """Mimification and unmimification of mail messages.
      4 
      5 Decode quoted-printable parts of a mail message or encode using
      6 quoted-printable.
      7 
      8 Usage:
      9         mimify(input, output)
     10         unmimify(input, output, decode_base64 = 0)
     11 to encode and decode respectively.  Input and output may be the name
     12 of a file or an open file object.  Only a readline() method is used
     13 on the input file, only a write() method is used on the output file.
     14 When using file names, the input and output file names may be the
     15 same.
     16 
     17 Interactive usage:
     18         mimify.py -e [infile [outfile]]
     19         mimify.py -d [infile [outfile]]
     20 to encode and decode respectively.  Infile defaults to standard
     21 input and outfile to standard output.
     22 """
     23 
     24 # Configure
     25 MAXLEN = 200    # if lines longer than this, encode as quoted-printable
     26 CHARSET = 'ISO-8859-1'  # default charset for non-US-ASCII mail
     27 QUOTE = '> '            # string replies are quoted with
     28 # End configure
     29 
     30 import re
     31 
     32 import warnings
     33 warnings.warn("the mimify module is deprecated; use the email package instead",
     34                 DeprecationWarning, 2)
     35 
     36 __all__ = ["mimify","unmimify","mime_encode_header","mime_decode_header"]
     37 
     38 qp = re.compile('^content-transfer-encoding:\\s*quoted-printable', re.I)
     39 base64_re = re.compile('^content-transfer-encoding:\\s*base64', re.I)
     40 mp = re.compile('^content-type:.*multipart/.*boundary="?([^;"\n]*)', re.I|re.S)
     41 chrset = re.compile('^(content-type:.*charset=")(us-ascii|iso-8859-[0-9]+)(".*)', re.I|re.S)
     42 he = re.compile('^-*\n')
     43 mime_code = re.compile('=([0-9a-f][0-9a-f])', re.I)
     44 mime_head = re.compile('=\\?iso-8859-1\\?q\\?([^? \t\n]+)\\?=', re.I)
     45 repl = re.compile('^subject:\\s+re: ', re.I)
     46 
     47 class File:
     48     """A simple fake file object that knows about limited read-ahead and
     49     boundaries.  The only supported method is readline()."""
     50 
     51     def __init__(self, file, boundary):
     52         self.file = file
     53         self.boundary = boundary
     54         self.peek = None
     55 
     56     def readline(self):
     57         if self.peek is not None:
     58             return ''
     59         line = self.file.readline()
     60         if not line:
     61             return line
     62         if self.boundary:
     63             if line == self.boundary + '\n':
     64                 self.peek = line
     65                 return ''
     66             if line == self.boundary + '--\n':
     67                 self.peek = line
     68                 return ''
     69         return line
     70 
     71 class HeaderFile:
     72     def __init__(self, file):
     73         self.file = file
     74         self.peek = None
     75 
     76     def readline(self):
     77         if self.peek is not None:
     78             line = self.peek
     79             self.peek = None
     80         else:
     81             line = self.file.readline()
     82         if not line:
     83             return line
     84         if he.match(line):
     85             return line
     86         while 1:
     87             self.peek = self.file.readline()
     88             if len(self.peek) == 0 or \
     89                (self.peek[0] != ' ' and self.peek[0] != '\t'):
     90                 return line
     91             line = line + self.peek
     92             self.peek = None
     93 
     94 def mime_decode(line):
     95     """Decode a single line of quoted-printable text to 8bit."""
     96     newline = ''
     97     pos = 0
     98     while 1:
     99         res = mime_code.search(line, pos)
    100         if res is None:
    101             break
    102         newline = newline + line[pos:res.start(0)] + \
    103                   chr(int(res.group(1), 16))
    104         pos = res.end(0)
    105     return newline + line[pos:]
    106 
    107 def mime_decode_header(line):
    108     """Decode a header line to 8bit."""
    109     newline = ''
    110     pos = 0
    111     while 1:
    112         res = mime_head.search(line, pos)
    113         if res is None:
    114             break
    115         match = res.group(1)
    116         # convert underscores to spaces (before =XX conversion!)
    117         match = ' '.join(match.split('_'))
    118         newline = newline + line[pos:res.start(0)] + mime_decode(match)
    119         pos = res.end(0)
    120     return newline + line[pos:]
    121 
    122 def unmimify_part(ifile, ofile, decode_base64 = 0):
    123     """Convert a quoted-printable part of a MIME mail message to 8bit."""
    124     multipart = None
    125     quoted_printable = 0
    126     is_base64 = 0
    127     is_repl = 0
    128     if ifile.boundary and ifile.boundary[:2] == QUOTE:
    129         prefix = QUOTE
    130     else:
    131         prefix = ''
    132 
    133     # read header
    134     hfile = HeaderFile(ifile)
    135     while 1:
    136         line = hfile.readline()
    137         if not line:
    138             return
    139         if prefix and line[:len(prefix)] == prefix:
    140             line = line[len(prefix):]
    141             pref = prefix
    142         else:
    143             pref = ''
    144         line = mime_decode_header(line)
    145         if qp.match(line):
    146             quoted_printable = 1
    147             continue        # skip this header
    148         if decode_base64 and base64_re.match(line):
    149             is_base64 = 1
    150             continue
    151         ofile.write(pref + line)
    152         if not prefix and repl.match(line):
    153             # we're dealing with a reply message
    154             is_repl = 1
    155         mp_res = mp.match(line)
    156         if mp_res:
    157             multipart = '--' + mp_res.group(1)
    158         if he.match(line):
    159             break
    160     if is_repl and (quoted_printable or multipart):
    161         is_repl = 0
    162 
    163     # read body
    164     while 1:
    165         line = ifile.readline()
    166         if not line:
    167             return
    168         line = re.sub(mime_head, '\\1', line)
    169         if prefix and line[:len(prefix)] == prefix:
    170             line = line[len(prefix):]
    171             pref = prefix
    172         else:
    173             pref = ''
    174 ##              if is_repl and len(line) >= 4 and line[:4] == QUOTE+'--' and line[-3:] != '--\n':
    175 ##                      multipart = line[:-1]
    176         while multipart:
    177             if line == multipart + '--\n':
    178                 ofile.write(pref + line)
    179                 multipart = None
    180                 line = None
    181                 break
    182             if line == multipart + '\n':
    183                 ofile.write(pref + line)
    184                 nifile = File(ifile, multipart)
    185                 unmimify_part(nifile, ofile, decode_base64)
    186                 line = nifile.peek
    187                 if not line:
    188                     # premature end of file
    189                     break
    190                 continue
    191             # not a boundary between parts
    192             break
    193         if line and quoted_printable:
    194             while line[-2:] == '=\n':
    195                 line = line[:-2]
    196                 newline = ifile.readline()
    197                 if newline[:len(QUOTE)] == QUOTE:
    198                     newline = newline[len(QUOTE):]
    199                 line = line + newline
    200             line = mime_decode(line)
    201         if line and is_base64 and not pref:
    202             import base64
    203             line = base64.decodestring(line)
    204         if line:
    205             ofile.write(pref + line)
    206 
    207 def unmimify(infile, outfile, decode_base64 = 0):
    208     """Convert quoted-printable parts of a MIME mail message to 8bit."""
    209     if type(infile) == type(''):
    210         ifile = open(infile)
    211         if type(outfile) == type('') and infile == outfile:
    212             import os
    213             d, f = os.path.split(infile)
    214             os.rename(infile, os.path.join(d, ',' + f))
    215     else:
    216         ifile = infile
    217     if type(outfile) == type(''):
    218         ofile = open(outfile, 'w')
    219     else:
    220         ofile = outfile
    221     nifile = File(ifile, None)
    222     unmimify_part(nifile, ofile, decode_base64)
    223     ofile.flush()
    224 
    225 mime_char = re.compile('[=\177-\377]') # quote these chars in body
    226 mime_header_char = re.compile('[=?\177-\377]') # quote these in header
    227 
    228 def mime_encode(line, header):
    229     """Code a single line as quoted-printable.
    230     If header is set, quote some extra characters."""
    231     if header:
    232         reg = mime_header_char
    233     else:
    234         reg = mime_char
    235     newline = ''
    236     pos = 0
    237     if len(line) >= 5 and line[:5] == 'From ':
    238         # quote 'From ' at the start of a line for stupid mailers
    239         newline = ('=%02x' % ord('F')).upper()
    240         pos = 1
    241     while 1:
    242         res = reg.search(line, pos)
    243         if res is None:
    244             break
    245         newline = newline + line[pos:res.start(0)] + \
    246                   ('=%02x' % ord(res.group(0))).upper()
    247         pos = res.end(0)
    248     line = newline + line[pos:]
    249 
    250     newline = ''
    251     while len(line) >= 75:
    252         i = 73
    253         while line[i] == '=' or line[i-1] == '=':
    254             i = i - 1
    255         i = i + 1
    256         newline = newline + line[:i] + '=\n'
    257         line = line[i:]
    258     return newline + line
    259 
    260 mime_header = re.compile('([ \t(]|^)([-a-zA-Z0-9_+]*[\177-\377][-a-zA-Z0-9_+\177-\377]*)(?=[ \t)]|\n)')
    261 
    262 def mime_encode_header(line):
    263     """Code a single header line as quoted-printable."""
    264     newline = ''
    265     pos = 0
    266     while 1:
    267         res = mime_header.search(line, pos)
    268         if res is None:
    269             break
    270         newline = '%s%s%s=?%s?Q?%s?=' % \
    271                   (newline, line[pos:res.start(0)], res.group(1),
    272                    CHARSET, mime_encode(res.group(2), 1))
    273         pos = res.end(0)
    274     return newline + line[pos:]
    275 
    276 mv = re.compile('^mime-version:', re.I)
    277 cte = re.compile('^content-transfer-encoding:', re.I)
    278 iso_char = re.compile('[\177-\377]')
    279 
    280 def mimify_part(ifile, ofile, is_mime):
    281     """Convert an 8bit part of a MIME mail message to quoted-printable."""
    282     has_cte = is_qp = is_base64 = 0
    283     multipart = None
    284     must_quote_body = must_quote_header = has_iso_chars = 0
    285 
    286     header = []
    287     header_end = ''
    288     message = []
    289     message_end = ''
    290     # read header
    291     hfile = HeaderFile(ifile)
    292     while 1:
    293         line = hfile.readline()
    294         if not line:
    295             break
    296         if not must_quote_header and iso_char.search(line):
    297             must_quote_header = 1
    298         if mv.match(line):
    299             is_mime = 1
    300         if cte.match(line):
    301             has_cte = 1
    302             if qp.match(line):
    303                 is_qp = 1
    304             elif base64_re.match(line):
    305                 is_base64 = 1
    306         mp_res = mp.match(line)
    307         if mp_res:
    308             multipart = '--' + mp_res.group(1)
    309         if he.match(line):
    310             header_end = line
    311             break
    312         header.append(line)
    313 
    314     # read body
    315     while 1:
    316         line = ifile.readline()
    317         if not line:
    318             break
    319         if multipart:
    320             if line == multipart + '--\n':
    321                 message_end = line
    322                 break
    323             if line == multipart + '\n':
    324                 message_end = line
    325                 break
    326         if is_base64:
    327             message.append(line)
    328             continue
    329         if is_qp:
    330             while line[-2:] == '=\n':
    331                 line = line[:-2]
    332                 newline = ifile.readline()
    333                 if newline[:len(QUOTE)] == QUOTE:
    334                     newline = newline[len(QUOTE):]
    335                 line = line + newline
    336             line = mime_decode(line)
    337         message.append(line)
    338         if not has_iso_chars:
    339             if iso_char.search(line):
    340                 has_iso_chars = must_quote_body = 1
    341         if not must_quote_body:
    342             if len(line) > MAXLEN:
    343                 must_quote_body = 1
    344 
    345     # convert and output header and body
    346     for line in header:
    347         if must_quote_header:
    348             line = mime_encode_header(line)
    349         chrset_res = chrset.match(line)
    350         if chrset_res:
    351             if has_iso_chars:
    352                 # change us-ascii into iso-8859-1
    353                 if chrset_res.group(2).lower() == 'us-ascii':
    354                     line = '%s%s%s' % (chrset_res.group(1),
    355                                        CHARSET,
    356                                        chrset_res.group(3))
    357             else:
    358                 # change iso-8859-* into us-ascii
    359                 line = '%sus-ascii%s' % chrset_res.group(1, 3)
    360         if has_cte and cte.match(line):
    361             line = 'Content-Transfer-Encoding: '
    362             if is_base64:
    363                 line = line + 'base64\n'
    364             elif must_quote_body:
    365                 line = line + 'quoted-printable\n'
    366             else:
    367                 line = line + '7bit\n'
    368         ofile.write(line)
    369     if (must_quote_header or must_quote_body) and not is_mime:
    370         ofile.write('Mime-Version: 1.0\n')
    371         ofile.write('Content-Type: text/plain; ')
    372         if has_iso_chars:
    373             ofile.write('charset="%s"\n' % CHARSET)
    374         else:
    375             ofile.write('charset="us-ascii"\n')
    376     if must_quote_body and not has_cte:
    377         ofile.write('Content-Transfer-Encoding: quoted-printable\n')
    378     ofile.write(header_end)
    379 
    380     for line in message:
    381         if must_quote_body:
    382             line = mime_encode(line, 0)
    383         ofile.write(line)
    384     ofile.write(message_end)
    385 
    386     line = message_end
    387     while multipart:
    388         if line == multipart + '--\n':
    389             # read bit after the end of the last part
    390             while 1:
    391                 line = ifile.readline()
    392                 if not line:
    393                     return
    394                 if must_quote_body:
    395                     line = mime_encode(line, 0)
    396                 ofile.write(line)
    397         if line == multipart + '\n':
    398             nifile = File(ifile, multipart)
    399             mimify_part(nifile, ofile, 1)
    400             line = nifile.peek
    401             if not line:
    402                 # premature end of file
    403                 break
    404             ofile.write(line)
    405             continue
    406         # unexpectedly no multipart separator--copy rest of file
    407         while 1:
    408             line = ifile.readline()
    409             if not line:
    410                 return
    411             if must_quote_body:
    412                 line = mime_encode(line, 0)
    413             ofile.write(line)
    414 
    415 def mimify(infile, outfile):
    416     """Convert 8bit parts of a MIME mail message to quoted-printable."""
    417     if type(infile) == type(''):
    418         ifile = open(infile)
    419         if type(outfile) == type('') and infile == outfile:
    420             import os
    421             d, f = os.path.split(infile)
    422             os.rename(infile, os.path.join(d, ',' + f))
    423     else:
    424         ifile = infile
    425     if type(outfile) == type(''):
    426         ofile = open(outfile, 'w')
    427     else:
    428         ofile = outfile
    429     nifile = File(ifile, None)
    430     mimify_part(nifile, ofile, 0)
    431     ofile.flush()
    432 
    433 import sys
    434 if __name__ == '__main__' or (len(sys.argv) > 0 and sys.argv[0] == 'mimify'):
    435     import getopt
    436     usage = 'Usage: mimify [-l len] -[ed] [infile [outfile]]'
    437 
    438     decode_base64 = 0
    439     opts, args = getopt.getopt(sys.argv[1:], 'l:edb')
    440     if len(args) not in (0, 1, 2):
    441         print usage
    442         sys.exit(1)
    443     if (('-e', '') in opts) == (('-d', '') in opts) or \
    444        ((('-b', '') in opts) and (('-d', '') not in opts)):
    445         print usage
    446         sys.exit(1)
    447     for o, a in opts:
    448         if o == '-e':
    449             encode = mimify
    450         elif o == '-d':
    451             encode = unmimify
    452         elif o == '-l':
    453             try:
    454                 MAXLEN = int(a)
    455             except (ValueError, OverflowError):
    456                 print usage
    457                 sys.exit(1)
    458         elif o == '-b':
    459             decode_base64 = 1
    460     if len(args) == 0:
    461         encode_args = (sys.stdin, sys.stdout)
    462     elif len(args) == 1:
    463         encode_args = (args[0], sys.stdout)
    464     else:
    465         encode_args = (args[0], args[1])
    466     if decode_base64:
    467         encode_args = encode_args + (decode_base64,)
    468     encode(*encode_args)
    469