Home | History | Annotate | Download | only in Lib
      1 #.  Copyright (C) 2005-2010   Gregory P. Smith (greg (at] krypto.org)
      2 #  Licensed to PSF under a Contributor Agreement.
      3 #
      4 
      5 __doc__ = """hashlib module - A common interface to many hash functions.
      6 
      7 new(name, data=b'', **kwargs) - returns a new hash object implementing the
      8                                 given hash function; initializing the hash
      9                                 using the given binary data.
     10 
     11 Named constructor functions are also available, these are faster
     12 than using new(name):
     13 
     14 md5(), sha1(), sha224(), sha256(), sha384(), sha512(), blake2b(), blake2s(),
     15 sha3_224, sha3_256, sha3_384, sha3_512, shake_128, and shake_256.
     16 
     17 More algorithms may be available on your platform but the above are guaranteed
     18 to exist.  See the algorithms_guaranteed and algorithms_available attributes
     19 to find out what algorithm names can be passed to new().
     20 
     21 NOTE: If you want the adler32 or crc32 hash functions they are available in
     22 the zlib module.
     23 
     24 Choose your hash function wisely.  Some have known collision weaknesses.
     25 sha384 and sha512 will be slow on 32 bit platforms.
     26 
     27 Hash objects have these methods:
     28  - update(data): Update the hash object with the bytes in data. Repeated calls
     29                  are equivalent to a single call with the concatenation of all
     30                  the arguments.
     31  - digest():     Return the digest of the bytes passed to the update() method
     32                  so far as a bytes object.
     33  - hexdigest():  Like digest() except the digest is returned as a string
     34                  of double length, containing only hexadecimal digits.
     35  - copy():       Return a copy (clone) of the hash object. This can be used to
     36                  efficiently compute the digests of datas that share a common
     37                  initial substring.
     38 
     39 For example, to obtain the digest of the byte string 'Nobody inspects the
     40 spammish repetition':
     41 
     42     >>> import hashlib
     43     >>> m = hashlib.md5()
     44     >>> m.update(b"Nobody inspects")
     45     >>> m.update(b" the spammish repetition")
     46     >>> m.digest()
     47     b'\\xbbd\\x9c\\x83\\xdd\\x1e\\xa5\\xc9\\xd9\\xde\\xc9\\xa1\\x8d\\xf0\\xff\\xe9'
     48 
     49 More condensed:
     50 
     51     >>> hashlib.sha224(b"Nobody inspects the spammish repetition").hexdigest()
     52     'a4337bc45a8fc544c03f52dc550cd6e1e87021bc896588bd79e901e2'
     53 
     54 """
     55 
     56 # This tuple and __get_builtin_constructor() must be modified if a new
     57 # always available algorithm is added.
     58 __always_supported = ('md5', 'sha1', 'sha224', 'sha256', 'sha384', 'sha512',
     59                       'blake2b', 'blake2s',
     60                       'sha3_224', 'sha3_256', 'sha3_384', 'sha3_512',
     61                       'shake_128', 'shake_256')
     62 
     63 
     64 algorithms_guaranteed = set(__always_supported)
     65 algorithms_available = set(__always_supported)
     66 
     67 __all__ = __always_supported + ('new', 'algorithms_guaranteed',
     68                                 'algorithms_available', 'pbkdf2_hmac')
     69 
     70 
     71 __builtin_constructor_cache = {}
     72 
     73 def __get_builtin_constructor(name):
     74     cache = __builtin_constructor_cache
     75     constructor = cache.get(name)
     76     if constructor is not None:
     77         return constructor
     78     try:
     79         if name in ('SHA1', 'sha1'):
     80             import _sha1
     81             cache['SHA1'] = cache['sha1'] = _sha1.sha1
     82         elif name in ('MD5', 'md5'):
     83             import _md5
     84             cache['MD5'] = cache['md5'] = _md5.md5
     85         elif name in ('SHA256', 'sha256', 'SHA224', 'sha224'):
     86             import _sha256
     87             cache['SHA224'] = cache['sha224'] = _sha256.sha224
     88             cache['SHA256'] = cache['sha256'] = _sha256.sha256
     89         elif name in ('SHA512', 'sha512', 'SHA384', 'sha384'):
     90             import _sha512
     91             cache['SHA384'] = cache['sha384'] = _sha512.sha384
     92             cache['SHA512'] = cache['sha512'] = _sha512.sha512
     93         elif name in ('blake2b', 'blake2s'):
     94             import _blake2
     95             cache['blake2b'] = _blake2.blake2b
     96             cache['blake2s'] = _blake2.blake2s
     97         elif name in {'sha3_224', 'sha3_256', 'sha3_384', 'sha3_512',
     98                       'shake_128', 'shake_256'}:
     99             import _sha3
    100             cache['sha3_224'] = _sha3.sha3_224
    101             cache['sha3_256'] = _sha3.sha3_256
    102             cache['sha3_384'] = _sha3.sha3_384
    103             cache['sha3_512'] = _sha3.sha3_512
    104             cache['shake_128'] = _sha3.shake_128
    105             cache['shake_256'] = _sha3.shake_256
    106     except ImportError:
    107         pass  # no extension module, this hash is unsupported.
    108 
    109     constructor = cache.get(name)
    110     if constructor is not None:
    111         return constructor
    112 
    113     raise ValueError('unsupported hash type ' + name)
    114 
    115 
    116 def __get_openssl_constructor(name):
    117     if name in {'blake2b', 'blake2s'}:
    118         # Prefer our blake2 implementation.
    119         return __get_builtin_constructor(name)
    120     try:
    121         f = getattr(_hashlib, 'openssl_' + name)
    122         # Allow the C module to raise ValueError.  The function will be
    123         # defined but the hash not actually available thanks to OpenSSL.
    124         f()
    125         # Use the C function directly (very fast)
    126         return f
    127     except (AttributeError, ValueError):
    128         return __get_builtin_constructor(name)
    129 
    130 
    131 def __py_new(name, data=b'', **kwargs):
    132     """new(name, data=b'', **kwargs) - Return a new hashing object using the
    133     named algorithm; optionally initialized with data (which must be
    134     a bytes-like object).
    135     """
    136     return __get_builtin_constructor(name)(data, **kwargs)
    137 
    138 
    139 def __hash_new(name, data=b'', **kwargs):
    140     """new(name, data=b'') - Return a new hashing object using the named algorithm;
    141     optionally initialized with data (which must be a bytes-like object).
    142     """
    143     if name in {'blake2b', 'blake2s'}:
    144         # Prefer our blake2 implementation.
    145         # OpenSSL 1.1.0 comes with a limited implementation of blake2b/s.
    146         # It does neither support keyed blake2 nor advanced features like
    147         # salt, personal, tree hashing or SSE.
    148         return __get_builtin_constructor(name)(data, **kwargs)
    149     try:
    150         return _hashlib.new(name, data)
    151     except ValueError:
    152         # If the _hashlib module (OpenSSL) doesn't support the named
    153         # hash, try using our builtin implementations.
    154         # This allows for SHA224/256 and SHA384/512 support even though
    155         # the OpenSSL library prior to 0.9.8 doesn't provide them.
    156         return __get_builtin_constructor(name)(data)
    157 
    158 
    159 try:
    160     import _hashlib
    161     new = __hash_new
    162     __get_hash = __get_openssl_constructor
    163     algorithms_available = algorithms_available.union(
    164             _hashlib.openssl_md_meth_names)
    165 except ImportError:
    166     new = __py_new
    167     __get_hash = __get_builtin_constructor
    168 
    169 try:
    170     # OpenSSL's PKCS5_PBKDF2_HMAC requires OpenSSL 1.0+ with HMAC and SHA
    171     from _hashlib import pbkdf2_hmac
    172 except ImportError:
    173     _trans_5C = bytes((x ^ 0x5C) for x in range(256))
    174     _trans_36 = bytes((x ^ 0x36) for x in range(256))
    175 
    176     def pbkdf2_hmac(hash_name, password, salt, iterations, dklen=None):
    177         """Password based key derivation function 2 (PKCS #5 v2.0)
    178 
    179         This Python implementations based on the hmac module about as fast
    180         as OpenSSL's PKCS5_PBKDF2_HMAC for short passwords and much faster
    181         for long passwords.
    182         """
    183         if not isinstance(hash_name, str):
    184             raise TypeError(hash_name)
    185 
    186         if not isinstance(password, (bytes, bytearray)):
    187             password = bytes(memoryview(password))
    188         if not isinstance(salt, (bytes, bytearray)):
    189             salt = bytes(memoryview(salt))
    190 
    191         # Fast inline HMAC implementation
    192         inner = new(hash_name)
    193         outer = new(hash_name)
    194         blocksize = getattr(inner, 'block_size', 64)
    195         if len(password) > blocksize:
    196             password = new(hash_name, password).digest()
    197         password = password + b'\x00' * (blocksize - len(password))
    198         inner.update(password.translate(_trans_36))
    199         outer.update(password.translate(_trans_5C))
    200 
    201         def prf(msg, inner=inner, outer=outer):
    202             # PBKDF2_HMAC uses the password as key. We can re-use the same
    203             # digest objects and just update copies to skip initialization.
    204             icpy = inner.copy()
    205             ocpy = outer.copy()
    206             icpy.update(msg)
    207             ocpy.update(icpy.digest())
    208             return ocpy.digest()
    209 
    210         if iterations < 1:
    211             raise ValueError(iterations)
    212         if dklen is None:
    213             dklen = outer.digest_size
    214         if dklen < 1:
    215             raise ValueError(dklen)
    216 
    217         dkey = b''
    218         loop = 1
    219         from_bytes = int.from_bytes
    220         while len(dkey) < dklen:
    221             prev = prf(salt + loop.to_bytes(4, 'big'))
    222             # endianness doesn't matter here as long to / from use the same
    223             rkey = int.from_bytes(prev, 'big')
    224             for i in range(iterations - 1):
    225                 prev = prf(prev)
    226                 # rkey = rkey ^ prev
    227                 rkey ^= from_bytes(prev, 'big')
    228             loop += 1
    229             dkey += rkey.to_bytes(inner.digest_size, 'big')
    230 
    231         return dkey[:dklen]
    232 
    233 try:
    234     # OpenSSL's scrypt requires OpenSSL 1.1+
    235     from _hashlib import scrypt
    236 except ImportError:
    237     pass
    238 
    239 
    240 for __func_name in __always_supported:
    241     # try them all, some may not work due to the OpenSSL
    242     # version not supporting that algorithm.
    243     try:
    244         globals()[__func_name] = __get_hash(__func_name)
    245     except ValueError:
    246         import logging
    247         logging.exception('code for hash %s was not found.', __func_name)
    248 
    249 
    250 # Cleanup locals()
    251 del __always_supported, __func_name, __get_hash
    252 del __py_new, __hash_new, __get_openssl_constructor
    253