Home | History | Annotate | Download | only in email
      1 # Copyright (C) 2002-2007 Python Software Foundation
      2 # Author: Ben Gertzfield
      3 # Contact: email-sig (at] python.org
      4 
      5 """Base64 content transfer encoding per RFCs 2045-2047.
      6 
      7 This module handles the content transfer encoding method defined in RFC 2045
      8 to encode arbitrary 8-bit data using the three 8-bit bytes in four 7-bit
      9 characters encoding known as Base64.
     10 
     11 It is used in the MIME standards for email to attach images, audio, and text
     12 using some 8-bit character sets to messages.
     13 
     14 This module provides an interface to encode and decode both headers and bodies
     15 with Base64 encoding.
     16 
     17 RFC 2045 defines a method for including character set information in an
     18 `encoded-word' in a header.  This method is commonly used for 8-bit real names
     19 in To:, From:, Cc:, etc. fields, as well as Subject: lines.
     20 
     21 This module does not do the line wrapping or end-of-line character conversion
     22 necessary for proper internationalized headers; it only does dumb encoding and
     23 decoding.  To deal with the various line wrapping issues, use the email.header
     24 module.
     25 """
     26 
     27 __all__ = [
     28     'body_decode',
     29     'body_encode',
     30     'decode',
     31     'decodestring',
     32     'header_encode',
     33     'header_length',
     34     ]
     35 
     36 
     37 from base64 import b64encode
     38 from binascii import b2a_base64, a2b_base64
     39 
     40 CRLF = '\r\n'
     41 NL = '\n'
     42 EMPTYSTRING = ''
     43 
     44 # See also Charset.py
     45 MISC_LEN = 7
     46 
     47 
     48 
     50 # Helpers
     51 def header_length(bytearray):
     52     """Return the length of s when it is encoded with base64."""
     53     groups_of_3, leftover = divmod(len(bytearray), 3)
     54     # 4 bytes out for each 3 bytes (or nonzero fraction thereof) in.
     55     n = groups_of_3 * 4
     56     if leftover:
     57         n += 4
     58     return n
     59 
     60 
     61 
     63 def header_encode(header_bytes, charset='iso-8859-1'):
     64     """Encode a single header line with Base64 encoding in a given charset.
     65 
     66     charset names the character set to use to encode the header.  It defaults
     67     to iso-8859-1.  Base64 encoding is defined in RFC 2045.
     68     """
     69     if not header_bytes:
     70         return ""
     71     if isinstance(header_bytes, str):
     72         header_bytes = header_bytes.encode(charset)
     73     encoded = b64encode(header_bytes).decode("ascii")
     74     return '=?%s?b?%s?=' % (charset, encoded)
     75 
     76 
     77 
     79 def body_encode(s, maxlinelen=76, eol=NL):
     80     r"""Encode a string with base64.
     81 
     82     Each line will be wrapped at, at most, maxlinelen characters (defaults to
     83     76 characters).
     84 
     85     Each line of encoded text will end with eol, which defaults to "\n".  Set
     86     this to "\r\n" if you will be using the result of this function directly
     87     in an email.
     88     """
     89     if not s:
     90         return s
     91 
     92     encvec = []
     93     max_unencoded = maxlinelen * 3 // 4
     94     for i in range(0, len(s), max_unencoded):
     95         # BAW: should encode() inherit b2a_base64()'s dubious behavior in
     96         # adding a newline to the encoded string?
     97         enc = b2a_base64(s[i:i + max_unencoded]).decode("ascii")
     98         if enc.endswith(NL) and eol != NL:
     99             enc = enc[:-1] + eol
    100         encvec.append(enc)
    101     return EMPTYSTRING.join(encvec)
    102 
    103 
    104 
    106 def decode(string):
    107     """Decode a raw base64 string, returning a bytes object.
    108 
    109     This function does not parse a full MIME header value encoded with
    110     base64 (like =?iso-8859-1?b?bmloISBuaWgh?=) -- please use the high
    111     level email.header class for that functionality.
    112     """
    113     if not string:
    114         return bytes()
    115     elif isinstance(string, str):
    116         return a2b_base64(string.encode('raw-unicode-escape'))
    117     else:
    118         return a2b_base64(string)
    119 
    120 
    121 # For convenience and backwards compatibility w/ standard base64 module
    122 body_decode = decode
    123 decodestring = decode
    124