Home | History | Annotate | Download | only in scripts
      1 #!/usr/bin/python
      2 # Copyright (C) 2013 Google, Inc.
      3 #
      4 # This library is free software; you can redistribute it and/or
      5 # modify it under the terms of the GNU Library General Public
      6 # License as published by the Free Software Foundation; either
      7 # version 2 of the License, or (at your option) any later version.
      8 #
      9 # This library is distributed in the hope that it will be useful,
     10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
     11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     12 # Library General Public License for more details.
     13 #
     14 # You should have received a copy of the GNU Library General Public License
     15 # along with this library; see the file COPYING.LIB.  If not, write to
     16 # the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
     17 # Boston, MA 02110-1301, USA.
     18 
     19 # This implementaiton of SuperFastHash is based on the Python implementation
     20 # by Victor Perron at <https://github.com/vperron/python-superfasthash>.
     21 # We've modified Victor's version to output hash values that match WTFString,
     22 # which involves using a specific seed and some different constants.
     23 
     24 class uint32_t(long):
     25     def __rshift__(self, other):
     26         return uint32_t(long.__rshift__(self, other) & ((1L << 32) - 1))
     27 
     28     def __lshift__(self, other):
     29         return uint32_t(long.__lshift__(self, other) & ((1L << 32) - 1))
     30 
     31     def __add__(self, other):
     32         return uint32_t(long.__add__(self, other) & ((1L << 32) - 1))
     33 
     34     def __xor__(self, other):
     35         return uint32_t(long.__xor__(self, other) & ((1L << 32) - 1))
     36 
     37 
     38 def hash(string):
     39     """
     40     Stream-adapted SuperFastHash algorithm from Paul Hsieh,
     41     http://www.azillionmonkeys.com/qed/hash.html
     42     LGPLv2.1
     43     Python version with no dependencies.
     44     Victor Perron <victor (at] iso3103.net>
     45     """
     46 
     47     if not string:
     48         return 0
     49 
     50     result = uint32_t(0x9E3779B9L)
     51     length = len(string)
     52     remainder = length & 1
     53     length >>= 1
     54 
     55     i = 0
     56     while length > 0:
     57         length -= 1
     58         result += ord(string[i])
     59         temp = (ord(string[i + 1]) << 11) ^ result
     60         result = (result << 16) ^ temp
     61         i += 2
     62         result += (result >> 11)
     63 
     64     if remainder == 1:
     65         result += ord(string[i])
     66         result ^= (result << 11)
     67         result += (result >> 17)
     68 
     69     # Force "avalanching" of final 127 bits
     70     result ^= (result << 3)
     71     result += (result >> 5)
     72     result ^= (result << 2)
     73     result += (result >> 15)
     74     result ^= (result << 10)
     75 
     76     # Save 8 bits for StringImpl to use as flags.
     77     result &= 0xffffff
     78 
     79     # This avoids ever returning a hash code of 0, since that is used to
     80     # signal "hash not computed yet".
     81     assert result != 0
     82 
     83     return result
     84