1 #! /usr/bin/env python 2 3 """RFC 3548: Base16, Base32, Base64 Data Encodings""" 4 5 # Modified 04-Oct-1995 by Jack Jansen to use binascii module 6 # Modified 30-Dec-2003 by Barry Warsaw to add full RFC 3548 support 7 8 import re 9 import struct 10 import string 11 import binascii 12 13 14 __all__ = [ 15 # Legacy interface exports traditional RFC 1521 Base64 encodings 16 'encode', 'decode', 'encodestring', 'decodestring', 17 # Generalized interface for other encodings 18 'b64encode', 'b64decode', 'b32encode', 'b32decode', 19 'b16encode', 'b16decode', 20 # Standard Base64 encoding 21 'standard_b64encode', 'standard_b64decode', 22 # Some common Base64 alternatives. As referenced by RFC 3458, see thread 23 # starting at: 24 # 25 # http://zgp.org/pipermail/p2p-hackers/2001-September/000316.html 26 'urlsafe_b64encode', 'urlsafe_b64decode', 27 ] 28 29 _translation = [chr(_x) for _x in range(256)] 30 EMPTYSTRING = '' 31 32 33 def _translate(s, altchars): 34 translation = _translation[:] 35 for k, v in altchars.items(): 36 translation[ord(k)] = v 37 return s.translate(''.join(translation)) 38 39 40 42 # Base64 encoding/decoding uses binascii 43 44 def b64encode(s, altchars=None): 45 """Encode a string using Base64. 46 47 s is the string to encode. Optional altchars must be a string of at least 48 length 2 (additional characters are ignored) which specifies an 49 alternative alphabet for the '+' and '/' characters. This allows an 50 application to e.g. generate url or filesystem safe Base64 strings. 51 52 The encoded string is returned. 53 """ 54 # Strip off the trailing newline 55 encoded = binascii.b2a_base64(s)[:-1] 56 if altchars is not None: 57 return encoded.translate(string.maketrans(b'+/', altchars[:2])) 58 return encoded 59 60 61 def b64decode(s, altchars=None): 62 """Decode a Base64 encoded string. 63 64 s is the string to decode. Optional altchars must be a string of at least 65 length 2 (additional characters are ignored) which specifies the 66 alternative alphabet used instead of the '+' and '/' characters. 67 68 The decoded string is returned. A TypeError is raised if s is 69 incorrectly padded. Characters that are neither in the normal base-64 70 alphabet nor the alternative alphabet are discarded prior to the padding 71 check. 72 """ 73 if altchars is not None: 74 s = s.translate(string.maketrans(altchars[:2], '+/')) 75 try: 76 return binascii.a2b_base64(s) 77 except binascii.Error, msg: 78 # Transform this exception for consistency 79 raise TypeError(msg) 80 81 82 def standard_b64encode(s): 83 """Encode a string using the standard Base64 alphabet. 84 85 s is the string to encode. The encoded string is returned. 86 """ 87 return b64encode(s) 88 89 def standard_b64decode(s): 90 """Decode a string encoded with the standard Base64 alphabet. 91 92 Argument s is the string to decode. The decoded string is returned. A 93 TypeError is raised if the string is incorrectly padded. Characters that 94 are not in the standard alphabet are discarded prior to the padding 95 check. 96 """ 97 return b64decode(s) 98 99 _urlsafe_encode_translation = string.maketrans(b'+/', b'-_') 100 _urlsafe_decode_translation = string.maketrans(b'-_', b'+/') 101 102 def urlsafe_b64encode(s): 103 """Encode a string using the URL- and filesystem-safe Base64 alphabet. 104 105 Argument s is the string to encode. The encoded string is returned. The 106 alphabet uses '-' instead of '+' and '_' instead of '/'. 107 """ 108 return b64encode(s).translate(_urlsafe_encode_translation) 109 110 def urlsafe_b64decode(s): 111 """Decode a string using the URL- and filesystem-safe Base64 alphabet. 112 113 Argument s is the string to decode. The decoded string is returned. A 114 TypeError is raised if the string is incorrectly padded. Characters that 115 are not in the URL-safe base-64 alphabet, and are not a plus '+' or slash 116 '/', are discarded prior to the padding check. 117 118 The alphabet uses '-' instead of '+' and '_' instead of '/'. 119 """ 120 return b64decode(s.translate(_urlsafe_decode_translation)) 121 122 123 125 # Base32 encoding/decoding must be done in Python 126 _b32alphabet = { 127 0: 'A', 9: 'J', 18: 'S', 27: '3', 128 1: 'B', 10: 'K', 19: 'T', 28: '4', 129 2: 'C', 11: 'L', 20: 'U', 29: '5', 130 3: 'D', 12: 'M', 21: 'V', 30: '6', 131 4: 'E', 13: 'N', 22: 'W', 31: '7', 132 5: 'F', 14: 'O', 23: 'X', 133 6: 'G', 15: 'P', 24: 'Y', 134 7: 'H', 16: 'Q', 25: 'Z', 135 8: 'I', 17: 'R', 26: '2', 136 } 137 138 _b32tab = _b32alphabet.items() 139 _b32tab.sort() 140 _b32tab = [v for k, v in _b32tab] 141 _b32rev = dict([(v, long(k)) for k, v in _b32alphabet.items()]) 142 143 144 def b32encode(s): 145 """Encode a string using Base32. 146 147 s is the string to encode. The encoded string is returned. 148 """ 149 parts = [] 150 quanta, leftover = divmod(len(s), 5) 151 # Pad the last quantum with zero bits if necessary 152 if leftover: 153 s += ('\0' * (5 - leftover)) 154 quanta += 1 155 for i in range(quanta): 156 # c1 and c2 are 16 bits wide, c3 is 8 bits wide. The intent of this 157 # code is to process the 40 bits in units of 5 bits. So we take the 1 158 # leftover bit of c1 and tack it onto c2. Then we take the 2 leftover 159 # bits of c2 and tack them onto c3. The shifts and masks are intended 160 # to give us values of exactly 5 bits in width. 161 c1, c2, c3 = struct.unpack('!HHB', s[i*5:(i+1)*5]) 162 c2 += (c1 & 1) << 16 # 17 bits wide 163 c3 += (c2 & 3) << 8 # 10 bits wide 164 parts.extend([_b32tab[c1 >> 11], # bits 1 - 5 165 _b32tab[(c1 >> 6) & 0x1f], # bits 6 - 10 166 _b32tab[(c1 >> 1) & 0x1f], # bits 11 - 15 167 _b32tab[c2 >> 12], # bits 16 - 20 (1 - 5) 168 _b32tab[(c2 >> 7) & 0x1f], # bits 21 - 25 (6 - 10) 169 _b32tab[(c2 >> 2) & 0x1f], # bits 26 - 30 (11 - 15) 170 _b32tab[c3 >> 5], # bits 31 - 35 (1 - 5) 171 _b32tab[c3 & 0x1f], # bits 36 - 40 (1 - 5) 172 ]) 173 encoded = EMPTYSTRING.join(parts) 174 # Adjust for any leftover partial quanta 175 if leftover == 1: 176 return encoded[:-6] + '======' 177 elif leftover == 2: 178 return encoded[:-4] + '====' 179 elif leftover == 3: 180 return encoded[:-3] + '===' 181 elif leftover == 4: 182 return encoded[:-1] + '=' 183 return encoded 184 185 186 def b32decode(s, casefold=False, map01=None): 187 """Decode a Base32 encoded string. 188 189 s is the string to decode. Optional casefold is a flag specifying whether 190 a lowercase alphabet is acceptable as input. For security purposes, the 191 default is False. 192 193 RFC 3548 allows for optional mapping of the digit 0 (zero) to the letter O 194 (oh), and for optional mapping of the digit 1 (one) to either the letter I 195 (eye) or letter L (el). The optional argument map01 when not None, 196 specifies which letter the digit 1 should be mapped to (when map01 is not 197 None, the digit 0 is always mapped to the letter O). For security 198 purposes the default is None, so that 0 and 1 are not allowed in the 199 input. 200 201 The decoded string is returned. A TypeError is raised if s were 202 incorrectly padded or if there are non-alphabet characters present in the 203 string. 204 """ 205 quanta, leftover = divmod(len(s), 8) 206 if leftover: 207 raise TypeError('Incorrect padding') 208 # Handle section 2.4 zero and one mapping. The flag map01 will be either 209 # False, or the character to map the digit 1 (one) to. It should be 210 # either L (el) or I (eye). 211 if map01: 212 s = s.translate(string.maketrans(b'01', b'O' + map01)) 213 if casefold: 214 s = s.upper() 215 # Strip off pad characters from the right. We need to count the pad 216 # characters because this will tell us how many null bytes to remove from 217 # the end of the decoded string. 218 padchars = 0 219 mo = re.search('(?P<pad>[=]*)$', s) 220 if mo: 221 padchars = len(mo.group('pad')) 222 if padchars > 0: 223 s = s[:-padchars] 224 # Now decode the full quanta 225 parts = [] 226 acc = 0 227 shift = 35 228 for c in s: 229 val = _b32rev.get(c) 230 if val is None: 231 raise TypeError('Non-base32 digit found') 232 acc += _b32rev[c] << shift 233 shift -= 5 234 if shift < 0: 235 parts.append(binascii.unhexlify('%010x' % acc)) 236 acc = 0 237 shift = 35 238 # Process the last, partial quanta 239 last = binascii.unhexlify('%010x' % acc) 240 if padchars == 0: 241 last = '' # No characters 242 elif padchars == 1: 243 last = last[:-1] 244 elif padchars == 3: 245 last = last[:-2] 246 elif padchars == 4: 247 last = last[:-3] 248 elif padchars == 6: 249 last = last[:-4] 250 else: 251 raise TypeError('Incorrect padding') 252 parts.append(last) 253 return EMPTYSTRING.join(parts) 254 255 256 258 # RFC 3548, Base 16 Alphabet specifies uppercase, but hexlify() returns 259 # lowercase. The RFC also recommends against accepting input case 260 # insensitively. 261 def b16encode(s): 262 """Encode a string using Base16. 263 264 s is the string to encode. The encoded string is returned. 265 """ 266 return binascii.hexlify(s).upper() 267 268 269 def b16decode(s, casefold=False): 270 """Decode a Base16 encoded string. 271 272 s is the string to decode. Optional casefold is a flag specifying whether 273 a lowercase alphabet is acceptable as input. For security purposes, the 274 default is False. 275 276 The decoded string is returned. A TypeError is raised if s is 277 incorrectly padded or if there are non-alphabet characters present in the 278 string. 279 """ 280 if casefold: 281 s = s.upper() 282 if re.search('[^0-9A-F]', s): 283 raise TypeError('Non-base16 digit found') 284 return binascii.unhexlify(s) 285 286 287 289 # Legacy interface. This code could be cleaned up since I don't believe 290 # binascii has any line length limitations. It just doesn't seem worth it 291 # though. 292 293 MAXLINESIZE = 76 # Excluding the CRLF 294 MAXBINSIZE = (MAXLINESIZE//4)*3 295 296 def encode(input, output): 297 """Encode a file.""" 298 while True: 299 s = input.read(MAXBINSIZE) 300 if not s: 301 break 302 while len(s) < MAXBINSIZE: 303 ns = input.read(MAXBINSIZE-len(s)) 304 if not ns: 305 break 306 s += ns 307 line = binascii.b2a_base64(s) 308 output.write(line) 309 310 311 def decode(input, output): 312 """Decode a file.""" 313 while True: 314 line = input.readline() 315 if not line: 316 break 317 s = binascii.a2b_base64(line) 318 output.write(s) 319 320 321 def encodestring(s): 322 """Encode a string into multiple lines of base-64 data.""" 323 pieces = [] 324 for i in range(0, len(s), MAXBINSIZE): 325 chunk = s[i : i + MAXBINSIZE] 326 pieces.append(binascii.b2a_base64(chunk)) 327 return "".join(pieces) 328 329 330 def decodestring(s): 331 """Decode a string.""" 332 return binascii.a2b_base64(s) 333 334 335 337 # Useable as a script... 338 def test(): 339 """Small test program""" 340 import sys, getopt 341 try: 342 opts, args = getopt.getopt(sys.argv[1:], 'deut') 343 except getopt.error, msg: 344 sys.stdout = sys.stderr 345 print msg 346 print """usage: %s [-d|-e|-u|-t] [file|-] 347 -d, -u: decode 348 -e: encode (default) 349 -t: encode and decode string 'Aladdin:open sesame'"""%sys.argv[0] 350 sys.exit(2) 351 func = encode 352 for o, a in opts: 353 if o == '-e': func = encode 354 if o == '-d': func = decode 355 if o == '-u': func = decode 356 if o == '-t': test1(); return 357 if args and args[0] != '-': 358 with open(args[0], 'rb') as f: 359 func(f, sys.stdout) 360 else: 361 func(sys.stdin, sys.stdout) 362 363 364 def test1(): 365 s0 = "Aladdin:open sesame" 366 s1 = encodestring(s0) 367 s2 = decodestring(s1) 368 print s0, repr(s1), s2 369 370 371 if __name__ == '__main__': 372 test() 373