Home | History | Annotate | Download | only in exceptions
      1 # (c) 2005 Ian Bicking and contributors; written for Paste (http://pythonpaste.org)
      2 # Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php
      3 
      4 """
      5 Creates a human-readable identifier, using numbers and digits,
      6 avoiding ambiguous numbers and letters.  hash_identifier can be used
      7 to create compact representations that are unique for a certain string
      8 (or concatenation of strings)
      9 """
     10 
     11 try:
     12     from hashlib import md5
     13 except ImportError:
     14     from md5 import md5
     15 
     16 import six
     17 
     18 good_characters = "23456789abcdefghjkmnpqrtuvwxyz"
     19 
     20 base = len(good_characters)
     21 
     22 def make_identifier(number):
     23     """
     24     Encodes a number as an identifier.
     25     """
     26     if not isinstance(number, six.integer_types):
     27         raise ValueError(
     28             "You can only make identifiers out of integers (not %r)"
     29             % number)
     30     if number < 0:
     31         raise ValueError(
     32             "You cannot make identifiers out of negative numbers: %r"
     33             % number)
     34     result = []
     35     while number:
     36         next = number % base
     37         result.append(good_characters[next])
     38         # Note, this depends on integer rounding of results:
     39         number = number // base
     40     return ''.join(result)
     41 
     42 def hash_identifier(s, length, pad=True, hasher=md5, prefix='',
     43                     group=None, upper=False):
     44     """
     45     Hashes the string (with the given hashing module), then turns that
     46     hash into an identifier of the given length (using modulo to
     47     reduce the length of the identifier).  If ``pad`` is False, then
     48     the minimum-length identifier will be used; otherwise the
     49     identifier will be padded with 0's as necessary.
     50 
     51     ``prefix`` will be added last, and does not count towards the
     52     target length.  ``group`` will group the characters with ``-`` in
     53     the given lengths, and also does not count towards the target
     54     length.  E.g., ``group=4`` will cause a identifier like
     55     ``a5f3-hgk3-asdf``.  Grouping occurs before the prefix.
     56     """
     57     if not callable(hasher):
     58         # Accept sha/md5 modules as well as callables
     59         hasher = hasher.new
     60     if length > 26 and hasher is md5:
     61         raise ValueError(
     62             "md5 cannot create hashes longer than 26 characters in "
     63             "length (you gave %s)" % length)
     64     if isinstance(s, six.text_type):
     65         s = s.encode('utf-8')
     66     elif not isinstance(s, six.binary_type):
     67         s = str(s)
     68         if six.PY3:
     69             s = s.encode('utf-8')
     70     h = hasher(s)
     71     bin_hash = h.digest()
     72     modulo = base ** length
     73     number = 0
     74     for c in list(bin_hash):
     75         number = (number * 256 + six.byte2int([c])) % modulo
     76     ident = make_identifier(number)
     77     if pad:
     78         ident = good_characters[0]*(length-len(ident)) + ident
     79     if group:
     80         parts = []
     81         while ident:
     82             parts.insert(0, ident[-group:])
     83             ident = ident[:-group]
     84         ident = '-'.join(parts)
     85     if upper:
     86         ident = ident.upper()
     87     return prefix + ident
     88 
     89 # doctest tests:
     90 __test__ = {
     91     'make_identifier': """
     92     >>> make_identifier(0)
     93     ''
     94     >>> make_identifier(1000)
     95     'c53'
     96     >>> make_identifier(-100)
     97     Traceback (most recent call last):
     98         ...
     99     ValueError: You cannot make identifiers out of negative numbers: -100
    100     >>> make_identifier('test')
    101     Traceback (most recent call last):
    102         ...
    103     ValueError: You can only make identifiers out of integers (not 'test')
    104     >>> make_identifier(1000000000000)
    105     'c53x9rqh3'
    106     """,
    107     'hash_identifier': """
    108     >>> hash_identifier(0, 5)
    109     'cy2dr'
    110     >>> hash_identifier(0, 10)
    111     'cy2dr6rg46'
    112     >>> hash_identifier('this is a test of a long string', 5)
    113     'awatu'
    114     >>> hash_identifier(0, 26)
    115     'cy2dr6rg46cx8t4w2f3nfexzk4'
    116     >>> hash_identifier(0, 30)
    117     Traceback (most recent call last):
    118         ...
    119     ValueError: md5 cannot create hashes longer than 26 characters in length (you gave 30)
    120     >>> hash_identifier(0, 10, group=4)
    121     'cy-2dr6-rg46'
    122     >>> hash_identifier(0, 10, group=4, upper=True, prefix='M-')
    123     'M-CY-2DR6-RG46'
    124     """}
    125 
    126 if __name__ == '__main__':
    127     import doctest
    128     doctest.testmod()
    129 
    130