1 # Copyright (C) 2002-2006 Python Software Foundation 2 # Author: Ben Gertzfield 3 # Contact: email-sig (at] python.org 4 5 """Base64 content transfer encoding per RFCs 2045-2047. 6 7 This module handles the content transfer encoding method defined in RFC 2045 8 to encode arbitrary 8-bit data using the three 8-bit bytes in four 7-bit 9 characters encoding known as Base64. 10 11 It is used in the MIME standards for email to attach images, audio, and text 12 using some 8-bit character sets to messages. 13 14 This module provides an interface to encode and decode both headers and bodies 15 with Base64 encoding. 16 17 RFC 2045 defines a method for including character set information in an 18 `encoded-word' in a header. This method is commonly used for 8-bit real names 19 in To:, From:, Cc:, etc. fields, as well as Subject: lines. 20 21 This module does not do the line wrapping or end-of-line character conversion 22 necessary for proper internationalized headers; it only does dumb encoding and 23 decoding. To deal with the various line wrapping issues, use the email.header 24 module. 25 """ 26 27 __all__ = [ 28 'base64_len', 29 'body_decode', 30 'body_encode', 31 'decode', 32 'decodestring', 33 'encode', 34 'encodestring', 35 'header_encode', 36 ] 37 38 39 from binascii import b2a_base64, a2b_base64 40 from email.utils import fix_eols 41 42 CRLF = '\r\n' 43 NL = '\n' 44 EMPTYSTRING = '' 45 46 # See also Charset.py 47 MISC_LEN = 7 48 49 50 52 # Helpers 53 def base64_len(s): 54 """Return the length of s when it is encoded with base64.""" 55 groups_of_3, leftover = divmod(len(s), 3) 56 # 4 bytes out for each 3 bytes (or nonzero fraction thereof) in. 57 # Thanks, Tim! 58 n = groups_of_3 * 4 59 if leftover: 60 n += 4 61 return n 62 63 64 66 def header_encode(header, charset='iso-8859-1', keep_eols=False, 67 maxlinelen=76, eol=NL): 68 """Encode a single header line with Base64 encoding in a given charset. 69 70 Defined in RFC 2045, this Base64 encoding is identical to normal Base64 71 encoding, except that each line must be intelligently wrapped (respecting 72 the Base64 encoding), and subsequent lines must start with a space. 73 74 charset names the character set to use to encode the header. It defaults 75 to iso-8859-1. 76 77 End-of-line characters (\\r, \\n, \\r\\n) will be automatically converted 78 to the canonical email line separator \\r\\n unless the keep_eols 79 parameter is True (the default is False). 80 81 Each line of the header will be terminated in the value of eol, which 82 defaults to "\\n". Set this to "\\r\\n" if you are using the result of 83 this function directly in email. 84 85 The resulting string will be in the form: 86 87 "=?charset?b?WW/5ciBtYXp66XLrIHf8eiBhIGhhbXBzdGHuciBBIFlv+XIgbWF6euly?=\\n 88 =?charset?b?6yB3/HogYSBoYW1wc3Rh7nIgQkMgWW/5ciBtYXp66XLrIHf8eiBhIGhh?=" 89 90 with each line wrapped at, at most, maxlinelen characters (defaults to 76 91 characters). 92 """ 93 # Return empty headers unchanged 94 if not header: 95 return header 96 97 if not keep_eols: 98 header = fix_eols(header) 99 100 # Base64 encode each line, in encoded chunks no greater than maxlinelen in 101 # length, after the RFC chrome is added in. 102 base64ed = [] 103 max_encoded = maxlinelen - len(charset) - MISC_LEN 104 max_unencoded = max_encoded * 3 // 4 105 106 for i in range(0, len(header), max_unencoded): 107 base64ed.append(b2a_base64(header[i:i+max_unencoded])) 108 109 # Now add the RFC chrome to each encoded chunk 110 lines = [] 111 for line in base64ed: 112 # Ignore the last character of each line if it is a newline 113 if line.endswith(NL): 114 line = line[:-1] 115 # Add the chrome 116 lines.append('=?%s?b?%s?=' % (charset, line)) 117 # Glue the lines together and return it. BAW: should we be able to 118 # specify the leading whitespace in the joiner? 119 joiner = eol + ' ' 120 return joiner.join(lines) 121 122 123 125 def encode(s, binary=True, maxlinelen=76, eol=NL): 126 """Encode a string with base64. 127 128 Each line will be wrapped at, at most, maxlinelen characters (defaults to 129 76 characters). 130 131 If binary is False, end-of-line characters will be converted to the 132 canonical email end-of-line sequence \\r\\n. Otherwise they will be left 133 verbatim (this is the default). 134 135 Each line of encoded text will end with eol, which defaults to "\\n". Set 136 this to "\\r\\n" if you will be using the result of this function directly 137 in an email. 138 """ 139 if not s: 140 return s 141 142 if not binary: 143 s = fix_eols(s) 144 145 encvec = [] 146 max_unencoded = maxlinelen * 3 // 4 147 for i in range(0, len(s), max_unencoded): 148 # BAW: should encode() inherit b2a_base64()'s dubious behavior in 149 # adding a newline to the encoded string? 150 enc = b2a_base64(s[i:i + max_unencoded]) 151 if enc.endswith(NL) and eol != NL: 152 enc = enc[:-1] + eol 153 encvec.append(enc) 154 return EMPTYSTRING.join(encvec) 155 156 157 # For convenience and backwards compatibility w/ standard base64 module 158 body_encode = encode 159 encodestring = encode 160 161 162 164 def decode(s, convert_eols=None): 165 """Decode a raw base64 string. 166 167 If convert_eols is set to a string value, all canonical email linefeeds, 168 e.g. "\\r\\n", in the decoded text will be converted to the value of 169 convert_eols. os.linesep is a good choice for convert_eols if you are 170 decoding a text attachment. 171 172 This function does not parse a full MIME header value encoded with 173 base64 (like =?iso-8895-1?b?bmloISBuaWgh?=) -- please use the high 174 level email.header class for that functionality. 175 """ 176 if not s: 177 return s 178 179 dec = a2b_base64(s) 180 if convert_eols: 181 return dec.replace(CRLF, convert_eols) 182 return dec 183 184 185 # For convenience and backwards compatibility w/ standard base64 module 186 body_decode = decode 187 decodestring = decode 188