Home | History | Annotate | Download | only in email
      1 # Copyright (C) 2002-2006 Python Software Foundation
      2 # Author: Ben Gertzfield
      3 # Contact: email-sig (at] python.org
      4 
      5 """Base64 content transfer encoding per RFCs 2045-2047.
      6 
      7 This module handles the content transfer encoding method defined in RFC 2045
      8 to encode arbitrary 8-bit data using the three 8-bit bytes in four 7-bit
      9 characters encoding known as Base64.
     10 
     11 It is used in the MIME standards for email to attach images, audio, and text
     12 using some 8-bit character sets to messages.
     13 
     14 This module provides an interface to encode and decode both headers and bodies
     15 with Base64 encoding.
     16 
     17 RFC 2045 defines a method for including character set information in an
     18 `encoded-word' in a header.  This method is commonly used for 8-bit real names
     19 in To:, From:, Cc:, etc. fields, as well as Subject: lines.
     20 
     21 This module does not do the line wrapping or end-of-line character conversion
     22 necessary for proper internationalized headers; it only does dumb encoding and
     23 decoding.  To deal with the various line wrapping issues, use the email.header
     24 module.
     25 """
     26 
     27 __all__ = [
     28     'base64_len',
     29     'body_decode',
     30     'body_encode',
     31     'decode',
     32     'decodestring',
     33     'encode',
     34     'encodestring',
     35     'header_encode',
     36     ]
     37 
     38 
     39 from binascii import b2a_base64, a2b_base64
     40 from email.utils import fix_eols
     41 
     42 CRLF = '\r\n'
     43 NL = '\n'
     44 EMPTYSTRING = ''
     45 
     46 # See also Charset.py
     47 MISC_LEN = 7
     48 
     49 
     50 
     52 # Helpers
     53 def base64_len(s):
     54     """Return the length of s when it is encoded with base64."""
     55     groups_of_3, leftover = divmod(len(s), 3)
     56     # 4 bytes out for each 3 bytes (or nonzero fraction thereof) in.
     57     # Thanks, Tim!
     58     n = groups_of_3 * 4
     59     if leftover:
     60         n += 4
     61     return n
     62 
     63 
     64 
     66 def header_encode(header, charset='iso-8859-1', keep_eols=False,
     67                   maxlinelen=76, eol=NL):
     68     """Encode a single header line with Base64 encoding in a given charset.
     69 
     70     Defined in RFC 2045, this Base64 encoding is identical to normal Base64
     71     encoding, except that each line must be intelligently wrapped (respecting
     72     the Base64 encoding), and subsequent lines must start with a space.
     73 
     74     charset names the character set to use to encode the header.  It defaults
     75     to iso-8859-1.
     76 
     77     End-of-line characters (\\r, \\n, \\r\\n) will be automatically converted
     78     to the canonical email line separator \\r\\n unless the keep_eols
     79     parameter is True (the default is False).
     80 
     81     Each line of the header will be terminated in the value of eol, which
     82     defaults to "\\n".  Set this to "\\r\\n" if you are using the result of
     83     this function directly in email.
     84 
     85     The resulting string will be in the form:
     86 
     87     "=?charset?b?WW/5ciBtYXp66XLrIHf8eiBhIGhhbXBzdGHuciBBIFlv+XIgbWF6euly?=\\n
     88       =?charset?b?6yB3/HogYSBoYW1wc3Rh7nIgQkMgWW/5ciBtYXp66XLrIHf8eiBhIGhh?="
     89 
     90     with each line wrapped at, at most, maxlinelen characters (defaults to 76
     91     characters).
     92     """
     93     # Return empty headers unchanged
     94     if not header:
     95         return header
     96 
     97     if not keep_eols:
     98         header = fix_eols(header)
     99 
    100     # Base64 encode each line, in encoded chunks no greater than maxlinelen in
    101     # length, after the RFC chrome is added in.
    102     base64ed = []
    103     max_encoded = maxlinelen - len(charset) - MISC_LEN
    104     max_unencoded = max_encoded * 3 // 4
    105 
    106     for i in range(0, len(header), max_unencoded):
    107         base64ed.append(b2a_base64(header[i:i+max_unencoded]))
    108 
    109     # Now add the RFC chrome to each encoded chunk
    110     lines = []
    111     for line in base64ed:
    112         # Ignore the last character of each line if it is a newline
    113         if line.endswith(NL):
    114             line = line[:-1]
    115         # Add the chrome
    116         lines.append('=?%s?b?%s?=' % (charset, line))
    117     # Glue the lines together and return it.  BAW: should we be able to
    118     # specify the leading whitespace in the joiner?
    119     joiner = eol + ' '
    120     return joiner.join(lines)
    121 
    122 
    123 
    125 def encode(s, binary=True, maxlinelen=76, eol=NL):
    126     """Encode a string with base64.
    127 
    128     Each line will be wrapped at, at most, maxlinelen characters (defaults to
    129     76 characters).
    130 
    131     If binary is False, end-of-line characters will be converted to the
    132     canonical email end-of-line sequence \\r\\n.  Otherwise they will be left
    133     verbatim (this is the default).
    134 
    135     Each line of encoded text will end with eol, which defaults to "\\n".  Set
    136     this to "\\r\\n" if you will be using the result of this function directly
    137     in an email.
    138     """
    139     if not s:
    140         return s
    141 
    142     if not binary:
    143         s = fix_eols(s)
    144 
    145     encvec = []
    146     max_unencoded = maxlinelen * 3 // 4
    147     for i in range(0, len(s), max_unencoded):
    148         # BAW: should encode() inherit b2a_base64()'s dubious behavior in
    149         # adding a newline to the encoded string?
    150         enc = b2a_base64(s[i:i + max_unencoded])
    151         if enc.endswith(NL) and eol != NL:
    152             enc = enc[:-1] + eol
    153         encvec.append(enc)
    154     return EMPTYSTRING.join(encvec)
    155 
    156 
    157 # For convenience and backwards compatibility w/ standard base64 module
    158 body_encode = encode
    159 encodestring = encode
    160 
    161 
    162 
    164 def decode(s, convert_eols=None):
    165     """Decode a raw base64 string.
    166 
    167     If convert_eols is set to a string value, all canonical email linefeeds,
    168     e.g. "\\r\\n", in the decoded text will be converted to the value of
    169     convert_eols.  os.linesep is a good choice for convert_eols if you are
    170     decoding a text attachment.
    171 
    172     This function does not parse a full MIME header value encoded with
    173     base64 (like =?iso-8895-1?b?bmloISBuaWgh?=) -- please use the high
    174     level email.header class for that functionality.
    175     """
    176     if not s:
    177         return s
    178 
    179     dec = a2b_base64(s)
    180     if convert_eols:
    181         return dec.replace(CRLF, convert_eols)
    182     return dec
    183 
    184 
    185 # For convenience and backwards compatibility w/ standard base64 module
    186 body_decode = decode
    187 decodestring = decode
    188