Home | History | Annotate | Download | only in Lib
      1 #! /usr/bin/env python
      2 
      3 """RFC 3548: Base16, Base32, Base64 Data Encodings"""
      4 
      5 # Modified 04-Oct-1995 by Jack Jansen to use binascii module
      6 # Modified 30-Dec-2003 by Barry Warsaw to add full RFC 3548 support
      7 
      8 import re
      9 import struct
     10 import string
     11 import binascii
     12 
     13 
     14 __all__ = [
     15     # Legacy interface exports traditional RFC 1521 Base64 encodings
     16     'encode', 'decode', 'encodestring', 'decodestring',
     17     # Generalized interface for other encodings
     18     'b64encode', 'b64decode', 'b32encode', 'b32decode',
     19     'b16encode', 'b16decode',
     20     # Standard Base64 encoding
     21     'standard_b64encode', 'standard_b64decode',
     22     # Some common Base64 alternatives.  As referenced by RFC 3458, see thread
     23     # starting at:
     24     #
     25     # http://zgp.org/pipermail/p2p-hackers/2001-September/000316.html
     26     'urlsafe_b64encode', 'urlsafe_b64decode',
     27     ]
     28 
     29 _translation = [chr(_x) for _x in range(256)]
     30 EMPTYSTRING = ''
     31 
     32 
     33 def _translate(s, altchars):
     34     translation = _translation[:]
     35     for k, v in altchars.items():
     36         translation[ord(k)] = v
     37     return s.translate(''.join(translation))
     38 
     39 
     40 
     42 # Base64 encoding/decoding uses binascii
     43 
     44 def b64encode(s, altchars=None):
     45     """Encode a string using Base64.
     46 
     47     s is the string to encode.  Optional altchars must be a string of at least
     48     length 2 (additional characters are ignored) which specifies an
     49     alternative alphabet for the '+' and '/' characters.  This allows an
     50     application to e.g. generate url or filesystem safe Base64 strings.
     51 
     52     The encoded string is returned.
     53     """
     54     # Strip off the trailing newline
     55     encoded = binascii.b2a_base64(s)[:-1]
     56     if altchars is not None:
     57         return encoded.translate(string.maketrans(b'+/', altchars[:2]))
     58     return encoded
     59 
     60 
     61 def b64decode(s, altchars=None):
     62     """Decode a Base64 encoded string.
     63 
     64     s is the string to decode.  Optional altchars must be a string of at least
     65     length 2 (additional characters are ignored) which specifies the
     66     alternative alphabet used instead of the '+' and '/' characters.
     67 
     68     The decoded string is returned.  A TypeError is raised if s is
     69     incorrectly padded.  Characters that are neither in the normal base-64
     70     alphabet nor the alternative alphabet are discarded prior to the padding
     71     check.
     72     """
     73     if altchars is not None:
     74         s = s.translate(string.maketrans(altchars[:2], '+/'))
     75     try:
     76         return binascii.a2b_base64(s)
     77     except binascii.Error, msg:
     78         # Transform this exception for consistency
     79         raise TypeError(msg)
     80 
     81 
     82 def standard_b64encode(s):
     83     """Encode a string using the standard Base64 alphabet.
     84 
     85     s is the string to encode.  The encoded string is returned.
     86     """
     87     return b64encode(s)
     88 
     89 def standard_b64decode(s):
     90     """Decode a string encoded with the standard Base64 alphabet.
     91 
     92     Argument s is the string to decode.  The decoded string is returned.  A
     93     TypeError is raised if the string is incorrectly padded.  Characters that
     94     are not in the standard alphabet are discarded prior to the padding
     95     check.
     96     """
     97     return b64decode(s)
     98 
     99 _urlsafe_encode_translation = string.maketrans(b'+/', b'-_')
    100 _urlsafe_decode_translation = string.maketrans(b'-_', b'+/')
    101 
    102 def urlsafe_b64encode(s):
    103     """Encode a string using the URL- and filesystem-safe Base64 alphabet.
    104 
    105     Argument s is the string to encode.  The encoded string is returned.  The
    106     alphabet uses '-' instead of '+' and '_' instead of '/'.
    107     """
    108     return b64encode(s).translate(_urlsafe_encode_translation)
    109 
    110 def urlsafe_b64decode(s):
    111     """Decode a string using the URL- and filesystem-safe Base64 alphabet.
    112 
    113     Argument s is the string to decode.  The decoded string is returned.  A
    114     TypeError is raised if the string is incorrectly padded.  Characters that
    115     are not in the URL-safe base-64 alphabet, and are not a plus '+' or slash
    116     '/', are discarded prior to the padding check.
    117 
    118     The alphabet uses '-' instead of '+' and '_' instead of '/'.
    119     """
    120     return b64decode(s.translate(_urlsafe_decode_translation))
    121 
    122 
    123 
    125 # Base32 encoding/decoding must be done in Python
    126 _b32alphabet = {
    127     0: 'A',  9: 'J', 18: 'S', 27: '3',
    128     1: 'B', 10: 'K', 19: 'T', 28: '4',
    129     2: 'C', 11: 'L', 20: 'U', 29: '5',
    130     3: 'D', 12: 'M', 21: 'V', 30: '6',
    131     4: 'E', 13: 'N', 22: 'W', 31: '7',
    132     5: 'F', 14: 'O', 23: 'X',
    133     6: 'G', 15: 'P', 24: 'Y',
    134     7: 'H', 16: 'Q', 25: 'Z',
    135     8: 'I', 17: 'R', 26: '2',
    136     }
    137 
    138 _b32tab = _b32alphabet.items()
    139 _b32tab.sort()
    140 _b32tab = [v for k, v in _b32tab]
    141 _b32rev = dict([(v, long(k)) for k, v in _b32alphabet.items()])
    142 
    143 
    144 def b32encode(s):
    145     """Encode a string using Base32.
    146 
    147     s is the string to encode.  The encoded string is returned.
    148     """
    149     parts = []
    150     quanta, leftover = divmod(len(s), 5)
    151     # Pad the last quantum with zero bits if necessary
    152     if leftover:
    153         s += ('\0' * (5 - leftover))
    154         quanta += 1
    155     for i in range(quanta):
    156         # c1 and c2 are 16 bits wide, c3 is 8 bits wide.  The intent of this
    157         # code is to process the 40 bits in units of 5 bits.  So we take the 1
    158         # leftover bit of c1 and tack it onto c2.  Then we take the 2 leftover
    159         # bits of c2 and tack them onto c3.  The shifts and masks are intended
    160         # to give us values of exactly 5 bits in width.
    161         c1, c2, c3 = struct.unpack('!HHB', s[i*5:(i+1)*5])
    162         c2 += (c1 & 1) << 16 # 17 bits wide
    163         c3 += (c2 & 3) << 8  # 10 bits wide
    164         parts.extend([_b32tab[c1 >> 11],         # bits 1 - 5
    165                       _b32tab[(c1 >> 6) & 0x1f], # bits 6 - 10
    166                       _b32tab[(c1 >> 1) & 0x1f], # bits 11 - 15
    167                       _b32tab[c2 >> 12],         # bits 16 - 20 (1 - 5)
    168                       _b32tab[(c2 >> 7) & 0x1f], # bits 21 - 25 (6 - 10)
    169                       _b32tab[(c2 >> 2) & 0x1f], # bits 26 - 30 (11 - 15)
    170                       _b32tab[c3 >> 5],          # bits 31 - 35 (1 - 5)
    171                       _b32tab[c3 & 0x1f],        # bits 36 - 40 (1 - 5)
    172                       ])
    173     encoded = EMPTYSTRING.join(parts)
    174     # Adjust for any leftover partial quanta
    175     if leftover == 1:
    176         return encoded[:-6] + '======'
    177     elif leftover == 2:
    178         return encoded[:-4] + '===='
    179     elif leftover == 3:
    180         return encoded[:-3] + '==='
    181     elif leftover == 4:
    182         return encoded[:-1] + '='
    183     return encoded
    184 
    185 
    186 def b32decode(s, casefold=False, map01=None):
    187     """Decode a Base32 encoded string.
    188 
    189     s is the string to decode.  Optional casefold is a flag specifying whether
    190     a lowercase alphabet is acceptable as input.  For security purposes, the
    191     default is False.
    192 
    193     RFC 3548 allows for optional mapping of the digit 0 (zero) to the letter O
    194     (oh), and for optional mapping of the digit 1 (one) to either the letter I
    195     (eye) or letter L (el).  The optional argument map01 when not None,
    196     specifies which letter the digit 1 should be mapped to (when map01 is not
    197     None, the digit 0 is always mapped to the letter O).  For security
    198     purposes the default is None, so that 0 and 1 are not allowed in the
    199     input.
    200 
    201     The decoded string is returned.  A TypeError is raised if s were
    202     incorrectly padded or if there are non-alphabet characters present in the
    203     string.
    204     """
    205     quanta, leftover = divmod(len(s), 8)
    206     if leftover:
    207         raise TypeError('Incorrect padding')
    208     # Handle section 2.4 zero and one mapping.  The flag map01 will be either
    209     # False, or the character to map the digit 1 (one) to.  It should be
    210     # either L (el) or I (eye).
    211     if map01:
    212         s = s.translate(string.maketrans(b'01', b'O' + map01))
    213     if casefold:
    214         s = s.upper()
    215     # Strip off pad characters from the right.  We need to count the pad
    216     # characters because this will tell us how many null bytes to remove from
    217     # the end of the decoded string.
    218     padchars = 0
    219     mo = re.search('(?P<pad>[=]*)$', s)
    220     if mo:
    221         padchars = len(mo.group('pad'))
    222         if padchars > 0:
    223             s = s[:-padchars]
    224     # Now decode the full quanta
    225     parts = []
    226     acc = 0
    227     shift = 35
    228     for c in s:
    229         val = _b32rev.get(c)
    230         if val is None:
    231             raise TypeError('Non-base32 digit found')
    232         acc += _b32rev[c] << shift
    233         shift -= 5
    234         if shift < 0:
    235             parts.append(binascii.unhexlify('%010x' % acc))
    236             acc = 0
    237             shift = 35
    238     # Process the last, partial quanta
    239     last = binascii.unhexlify('%010x' % acc)
    240     if padchars == 0:
    241         last = ''                       # No characters
    242     elif padchars == 1:
    243         last = last[:-1]
    244     elif padchars == 3:
    245         last = last[:-2]
    246     elif padchars == 4:
    247         last = last[:-3]
    248     elif padchars == 6:
    249         last = last[:-4]
    250     else:
    251         raise TypeError('Incorrect padding')
    252     parts.append(last)
    253     return EMPTYSTRING.join(parts)
    254 
    255 
    256 
    258 # RFC 3548, Base 16 Alphabet specifies uppercase, but hexlify() returns
    259 # lowercase.  The RFC also recommends against accepting input case
    260 # insensitively.
    261 def b16encode(s):
    262     """Encode a string using Base16.
    263 
    264     s is the string to encode.  The encoded string is returned.
    265     """
    266     return binascii.hexlify(s).upper()
    267 
    268 
    269 def b16decode(s, casefold=False):
    270     """Decode a Base16 encoded string.
    271 
    272     s is the string to decode.  Optional casefold is a flag specifying whether
    273     a lowercase alphabet is acceptable as input.  For security purposes, the
    274     default is False.
    275 
    276     The decoded string is returned.  A TypeError is raised if s is
    277     incorrectly padded or if there are non-alphabet characters present in the
    278     string.
    279     """
    280     if casefold:
    281         s = s.upper()
    282     if re.search('[^0-9A-F]', s):
    283         raise TypeError('Non-base16 digit found')
    284     return binascii.unhexlify(s)
    285 
    286 
    287 
    289 # Legacy interface.  This code could be cleaned up since I don't believe
    290 # binascii has any line length limitations.  It just doesn't seem worth it
    291 # though.
    292 
    293 MAXLINESIZE = 76 # Excluding the CRLF
    294 MAXBINSIZE = (MAXLINESIZE//4)*3
    295 
    296 def encode(input, output):
    297     """Encode a file."""
    298     while True:
    299         s = input.read(MAXBINSIZE)
    300         if not s:
    301             break
    302         while len(s) < MAXBINSIZE:
    303             ns = input.read(MAXBINSIZE-len(s))
    304             if not ns:
    305                 break
    306             s += ns
    307         line = binascii.b2a_base64(s)
    308         output.write(line)
    309 
    310 
    311 def decode(input, output):
    312     """Decode a file."""
    313     while True:
    314         line = input.readline()
    315         if not line:
    316             break
    317         s = binascii.a2b_base64(line)
    318         output.write(s)
    319 
    320 
    321 def encodestring(s):
    322     """Encode a string into multiple lines of base-64 data."""
    323     pieces = []
    324     for i in range(0, len(s), MAXBINSIZE):
    325         chunk = s[i : i + MAXBINSIZE]
    326         pieces.append(binascii.b2a_base64(chunk))
    327     return "".join(pieces)
    328 
    329 
    330 def decodestring(s):
    331     """Decode a string."""
    332     return binascii.a2b_base64(s)
    333 
    334 
    335 
    337 # Useable as a script...
    338 def test():
    339     """Small test program"""
    340     import sys, getopt
    341     try:
    342         opts, args = getopt.getopt(sys.argv[1:], 'deut')
    343     except getopt.error, msg:
    344         sys.stdout = sys.stderr
    345         print msg
    346         print """usage: %s [-d|-e|-u|-t] [file|-]
    347         -d, -u: decode
    348         -e: encode (default)
    349         -t: encode and decode string 'Aladdin:open sesame'"""%sys.argv[0]
    350         sys.exit(2)
    351     func = encode
    352     for o, a in opts:
    353         if o == '-e': func = encode
    354         if o == '-d': func = decode
    355         if o == '-u': func = decode
    356         if o == '-t': test1(); return
    357     if args and args[0] != '-':
    358         with open(args[0], 'rb') as f:
    359             func(f, sys.stdout)
    360     else:
    361         func(sys.stdin, sys.stdout)
    362 
    363 
    364 def test1():
    365     s0 = "Aladdin:open sesame"
    366     s1 = encodestring(s0)
    367     s2 = decodestring(s1)
    368     print s0, repr(s1), s2
    369 
    370 
    371 if __name__ == '__main__':
    372     test()
    373