Home | History | Annotate | Download | only in Lib
      1 #.  Copyright (C) 2005-2010   Gregory P. Smith (greg (at] krypto.org)
      2 #  Licensed to PSF under a Contributor Agreement.
      3 #
      4 
      5 __doc__ = """hashlib module - A common interface to many hash functions.
      6 
      7 new(name, data=b'', **kwargs) - returns a new hash object implementing the
      8                                 given hash function; initializing the hash
      9                                 using the given binary data.
     10 
     11 Named constructor functions are also available, these are faster
     12 than using new(name):
     13 
     14 md5(), sha1(), sha224(), sha256(), sha384(), sha512(), blake2b(), blake2s(),
     15 sha3_224, sha3_256, sha3_384, sha3_512, shake_128, and shake_256.
     16 
     17 More algorithms may be available on your platform but the above are guaranteed
     18 to exist.  See the algorithms_guaranteed and algorithms_available attributes
     19 to find out what algorithm names can be passed to new().
     20 
     21 NOTE: If you want the adler32 or crc32 hash functions they are available in
     22 the zlib module.
     23 
     24 Choose your hash function wisely.  Some have known collision weaknesses.
     25 sha384 and sha512 will be slow on 32 bit platforms.
     26 
     27 Hash objects have these methods:
     28  - update(arg): Update the hash object with the bytes in arg. Repeated calls
     29                 are equivalent to a single call with the concatenation of all
     30                 the arguments.
     31  - digest():    Return the digest of the bytes passed to the update() method
     32                 so far.
     33  - hexdigest(): Like digest() except the digest is returned as a unicode
     34                 object of double length, containing only hexadecimal digits.
     35  - copy():      Return a copy (clone) of the hash object. This can be used to
     36                 efficiently compute the digests of strings that share a common
     37                 initial substring.
     38 
     39 For example, to obtain the digest of the string 'Nobody inspects the
     40 spammish repetition':
     41 
     42     >>> import hashlib
     43     >>> m = hashlib.md5()
     44     >>> m.update(b"Nobody inspects")
     45     >>> m.update(b" the spammish repetition")
     46     >>> m.digest()
     47     b'\\xbbd\\x9c\\x83\\xdd\\x1e\\xa5\\xc9\\xd9\\xde\\xc9\\xa1\\x8d\\xf0\\xff\\xe9'
     48 
     49 More condensed:
     50 
     51     >>> hashlib.sha224(b"Nobody inspects the spammish repetition").hexdigest()
     52     'a4337bc45a8fc544c03f52dc550cd6e1e87021bc896588bd79e901e2'
     53 
     54 """
     55 
     56 # This tuple and __get_builtin_constructor() must be modified if a new
     57 # always available algorithm is added.
     58 __always_supported = ('md5', 'sha1', 'sha224', 'sha256', 'sha384', 'sha512',
     59                       'blake2b', 'blake2s',
     60                       'sha3_224', 'sha3_256', 'sha3_384', 'sha3_512',
     61                       'shake_128', 'shake_256')
     62 
     63 
     64 algorithms_guaranteed = set(__always_supported)
     65 algorithms_available = set(__always_supported)
     66 
     67 __all__ = __always_supported + ('new', 'algorithms_guaranteed',
     68                                 'algorithms_available', 'pbkdf2_hmac')
     69 
     70 
     71 __builtin_constructor_cache = {}
     72 
     73 def __get_builtin_constructor(name):
     74     cache = __builtin_constructor_cache
     75     constructor = cache.get(name)
     76     if constructor is not None:
     77         return constructor
     78     try:
     79         if name in ('SHA1', 'sha1'):
     80             import _sha1
     81             cache['SHA1'] = cache['sha1'] = _sha1.sha1
     82         elif name in ('MD5', 'md5'):
     83             import _md5
     84             cache['MD5'] = cache['md5'] = _md5.md5
     85         elif name in ('SHA256', 'sha256', 'SHA224', 'sha224'):
     86             import _sha256
     87             cache['SHA224'] = cache['sha224'] = _sha256.sha224
     88             cache['SHA256'] = cache['sha256'] = _sha256.sha256
     89         elif name in ('SHA512', 'sha512', 'SHA384', 'sha384'):
     90             import _sha512
     91             cache['SHA384'] = cache['sha384'] = _sha512.sha384
     92             cache['SHA512'] = cache['sha512'] = _sha512.sha512
     93         elif name in ('blake2b', 'blake2s'):
     94             import _blake2
     95             cache['blake2b'] = _blake2.blake2b
     96             cache['blake2s'] = _blake2.blake2s
     97         elif name in {'sha3_224', 'sha3_256', 'sha3_384', 'sha3_512',
     98                       'shake_128', 'shake_256'}:
     99             import _sha3
    100             cache['sha3_224'] = _sha3.sha3_224
    101             cache['sha3_256'] = _sha3.sha3_256
    102             cache['sha3_384'] = _sha3.sha3_384
    103             cache['sha3_512'] = _sha3.sha3_512
    104             cache['shake_128'] = _sha3.shake_128
    105             cache['shake_256'] = _sha3.shake_256
    106     except ImportError:
    107         pass  # no extension module, this hash is unsupported.
    108 
    109     constructor = cache.get(name)
    110     if constructor is not None:
    111         return constructor
    112 
    113     raise ValueError('unsupported hash type ' + name)
    114 
    115 
    116 def __get_openssl_constructor(name):
    117     if name in {'blake2b', 'blake2s'}:
    118         # Prefer our blake2 implementation.
    119         return __get_builtin_constructor(name)
    120     try:
    121         f = getattr(_hashlib, 'openssl_' + name)
    122         # Allow the C module to raise ValueError.  The function will be
    123         # defined but the hash not actually available thanks to OpenSSL.
    124         f()
    125         # Use the C function directly (very fast)
    126         return f
    127     except (AttributeError, ValueError):
    128         return __get_builtin_constructor(name)
    129 
    130 
    131 def __py_new(name, data=b'', **kwargs):
    132     """new(name, data=b'', **kwargs) - Return a new hashing object using the
    133     named algorithm; optionally initialized with data (which must be bytes).
    134     """
    135     return __get_builtin_constructor(name)(data, **kwargs)
    136 
    137 
    138 def __hash_new(name, data=b'', **kwargs):
    139     """new(name, data=b'') - Return a new hashing object using the named algorithm;
    140     optionally initialized with data (which must be bytes).
    141     """
    142     if name in {'blake2b', 'blake2s'}:
    143         # Prefer our blake2 implementation.
    144         # OpenSSL 1.1.0 comes with a limited implementation of blake2b/s.
    145         # It does neither support keyed blake2 nor advanced features like
    146         # salt, personal, tree hashing or SSE.
    147         return __get_builtin_constructor(name)(data, **kwargs)
    148     try:
    149         return _hashlib.new(name, data)
    150     except ValueError:
    151         # If the _hashlib module (OpenSSL) doesn't support the named
    152         # hash, try using our builtin implementations.
    153         # This allows for SHA224/256 and SHA384/512 support even though
    154         # the OpenSSL library prior to 0.9.8 doesn't provide them.
    155         return __get_builtin_constructor(name)(data)
    156 
    157 
    158 try:
    159     import _hashlib
    160     new = __hash_new
    161     __get_hash = __get_openssl_constructor
    162     algorithms_available = algorithms_available.union(
    163             _hashlib.openssl_md_meth_names)
    164 except ImportError:
    165     new = __py_new
    166     __get_hash = __get_builtin_constructor
    167 
    168 try:
    169     # OpenSSL's PKCS5_PBKDF2_HMAC requires OpenSSL 1.0+ with HMAC and SHA
    170     from _hashlib import pbkdf2_hmac
    171 except ImportError:
    172     _trans_5C = bytes((x ^ 0x5C) for x in range(256))
    173     _trans_36 = bytes((x ^ 0x36) for x in range(256))
    174 
    175     def pbkdf2_hmac(hash_name, password, salt, iterations, dklen=None):
    176         """Password based key derivation function 2 (PKCS #5 v2.0)
    177 
    178         This Python implementations based on the hmac module about as fast
    179         as OpenSSL's PKCS5_PBKDF2_HMAC for short passwords and much faster
    180         for long passwords.
    181         """
    182         if not isinstance(hash_name, str):
    183             raise TypeError(hash_name)
    184 
    185         if not isinstance(password, (bytes, bytearray)):
    186             password = bytes(memoryview(password))
    187         if not isinstance(salt, (bytes, bytearray)):
    188             salt = bytes(memoryview(salt))
    189 
    190         # Fast inline HMAC implementation
    191         inner = new(hash_name)
    192         outer = new(hash_name)
    193         blocksize = getattr(inner, 'block_size', 64)
    194         if len(password) > blocksize:
    195             password = new(hash_name, password).digest()
    196         password = password + b'\x00' * (blocksize - len(password))
    197         inner.update(password.translate(_trans_36))
    198         outer.update(password.translate(_trans_5C))
    199 
    200         def prf(msg, inner=inner, outer=outer):
    201             # PBKDF2_HMAC uses the password as key. We can re-use the same
    202             # digest objects and just update copies to skip initialization.
    203             icpy = inner.copy()
    204             ocpy = outer.copy()
    205             icpy.update(msg)
    206             ocpy.update(icpy.digest())
    207             return ocpy.digest()
    208 
    209         if iterations < 1:
    210             raise ValueError(iterations)
    211         if dklen is None:
    212             dklen = outer.digest_size
    213         if dklen < 1:
    214             raise ValueError(dklen)
    215 
    216         dkey = b''
    217         loop = 1
    218         from_bytes = int.from_bytes
    219         while len(dkey) < dklen:
    220             prev = prf(salt + loop.to_bytes(4, 'big'))
    221             # endianess doesn't matter here as long to / from use the same
    222             rkey = int.from_bytes(prev, 'big')
    223             for i in range(iterations - 1):
    224                 prev = prf(prev)
    225                 # rkey = rkey ^ prev
    226                 rkey ^= from_bytes(prev, 'big')
    227             loop += 1
    228             dkey += rkey.to_bytes(inner.digest_size, 'big')
    229 
    230         return dkey[:dklen]
    231 
    232 try:
    233     # OpenSSL's scrypt requires OpenSSL 1.1+
    234     from _hashlib import scrypt
    235 except ImportError:
    236     pass
    237 
    238 
    239 for __func_name in __always_supported:
    240     # try them all, some may not work due to the OpenSSL
    241     # version not supporting that algorithm.
    242     try:
    243         globals()[__func_name] = __get_hash(__func_name)
    244     except ValueError:
    245         import logging
    246         logging.exception('code for hash %s was not found.', __func_name)
    247 
    248 
    249 # Cleanup locals()
    250 del __always_supported, __func_name, __get_hash
    251 del __py_new, __hash_new, __get_openssl_constructor
    252