1 /* 2 * Copyright (C) 2005, 2006, 2008, 2010 Apple Inc. All rights reserved. 3 * Copyright (C) 2010 Patrick Gansterer <paroga (at) paroga.com> 4 * 5 * This library is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU Library General Public 7 * License as published by the Free Software Foundation; either 8 * version 2 of the License, or (at your option) any later version. 9 * 10 * This library is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * Library General Public License for more details. 14 * 15 * You should have received a copy of the GNU Library General Public License 16 * along with this library; see the file COPYING.LIB. If not, write to 17 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 18 * Boston, MA 02110-1301, USA. 19 * 20 */ 21 #ifndef WTF_StringHasher_h 22 #define WTF_StringHasher_h 23 24 #include <wtf/unicode/Unicode.h> 25 26 namespace WTF { 27 28 // Golden ratio - arbitrary start value to avoid mapping all 0's to all 0's 29 static const unsigned stringHashingStartValue = 0x9e3779b9U; 30 31 // Paul Hsieh's SuperFastHash 32 // http://www.azillionmonkeys.com/qed/hash.html 33 // char* data is interpreted as latin-encoded (zero extended to 16 bits). 34 class StringHasher { 35 public: 36 inline StringHasher() 37 : m_hash(stringHashingStartValue) 38 , m_hasPendingCharacter(false) 39 , m_pendingCharacter(0) 40 { 41 } 42 43 inline void addCharacters(UChar a, UChar b) 44 { 45 ASSERT(!m_hasPendingCharacter); 46 addCharactersToHash(a, b); 47 } 48 49 inline void addCharacter(UChar ch) 50 { 51 if (m_hasPendingCharacter) { 52 addCharactersToHash(m_pendingCharacter, ch); 53 m_hasPendingCharacter = false; 54 return; 55 } 56 57 m_pendingCharacter = ch; 58 m_hasPendingCharacter = true; 59 } 60 61 inline unsigned hash() const 62 { 63 unsigned result = m_hash; 64 65 // Handle end case. 66 if (m_hasPendingCharacter) { 67 result += m_pendingCharacter; 68 result ^= result << 11; 69 result += result >> 17; 70 } 71 72 // Force "avalanching" of final 31 bits. 73 result ^= result << 3; 74 result += result >> 5; 75 result ^= result << 2; 76 result += result >> 15; 77 result ^= result << 10; 78 79 // First bit is used in UStringImpl for m_isIdentifier. 80 result &= 0x7fffffff; 81 82 // This avoids ever returning a hash code of 0, since that is used to 83 // signal "hash not computed yet", using a value that is likely to be 84 // effectively the same as 0 when the low bits are masked. 85 if (!result) 86 return 0x40000000; 87 88 return result; 89 } 90 91 template<typename T, UChar Converter(T)> static inline unsigned computeHash(const T* data, unsigned length) 92 { 93 StringHasher hasher; 94 bool rem = length & 1; 95 length >>= 1; 96 97 while (length--) { 98 hasher.addCharacters(Converter(data[0]), Converter(data[1])); 99 data += 2; 100 } 101 102 if (rem) 103 hasher.addCharacter(Converter(*data)); 104 105 return hasher.hash(); 106 } 107 108 template<typename T, UChar Converter(T)> static inline unsigned computeHash(const T* data) 109 { 110 StringHasher hasher; 111 112 while (true) { 113 UChar b0 = Converter(*data++); 114 if (!b0) 115 break; 116 UChar b1 = Converter(*data++); 117 if (!b1) { 118 hasher.addCharacter(b0); 119 break; 120 } 121 122 hasher.addCharacters(b0, b1); 123 } 124 125 return hasher.hash(); 126 } 127 128 template<typename T> static inline unsigned computeHash(const T* data, unsigned length) 129 { 130 return computeHash<T, defaultCoverter>(data, length); 131 } 132 133 template<typename T> static inline unsigned computeHash(const T* data) 134 { 135 return computeHash<T, defaultCoverter>(data); 136 } 137 138 template<size_t length> static inline unsigned hashMemory(const void* data) 139 { 140 COMPILE_ASSERT(!(length % 4), length_must_be_a_multible_of_four); 141 return computeHash<UChar>(static_cast<const UChar*>(data), length / sizeof(UChar)); 142 } 143 144 static inline unsigned hashMemory(const void* data, unsigned size) 145 { 146 ASSERT(!(size % 2)); 147 return computeHash<UChar>(static_cast<const UChar*>(data), size / sizeof(UChar)); 148 } 149 150 private: 151 static inline UChar defaultCoverter(UChar ch) 152 { 153 return ch; 154 } 155 156 static inline UChar defaultCoverter(char ch) 157 { 158 return static_cast<unsigned char>(ch); 159 } 160 161 inline void addCharactersToHash(UChar a, UChar b) 162 { 163 m_hash += a; 164 unsigned tmp = (b << 11) ^ m_hash; 165 m_hash = (m_hash << 16) ^ tmp; 166 m_hash += m_hash >> 11; 167 } 168 169 unsigned m_hash; 170 bool m_hasPendingCharacter; 171 UChar m_pendingCharacter; 172 }; 173 174 } // namespace WTF 175 176 using WTF::StringHasher; 177 178 #endif // WTF_StringHasher_h 179