1 /* 2 * Copyright (C) 2006, 2007, 2008 Apple Inc. All rights reserved 3 * Copyright (C) Research In Motion Limited 2009. All rights reserved. 4 * 5 * This library is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU Library General Public 7 * License as published by the Free Software Foundation; either 8 * version 2 of the License, or (at your option) any later version. 9 * 10 * This library is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * Library General Public License for more details. 14 * 15 * You should have received a copy of the GNU Library General Public License 16 * along with this library; see the file COPYING.LIB. If not, write to 17 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 18 * Boston, MA 02110-1301, USA. 19 * 20 */ 21 22 #ifndef StringHash_h 23 #define StringHash_h 24 25 #include "AtomicString.h" 26 #include "WTFString.h" 27 #include <wtf/Forward.h> 28 #include <wtf/HashTraits.h> 29 #include <wtf/StringHasher.h> 30 #include <wtf/unicode/Unicode.h> 31 32 namespace WTF { 33 34 // The hash() functions on StringHash and CaseFoldingHash do not support 35 // null strings. get(), contains(), and add() on HashMap<String,..., StringHash> 36 // cause a null-pointer dereference when passed null strings. 37 38 // FIXME: We should really figure out a way to put the computeHash function that's 39 // currently a member function of StringImpl into this file so we can be a little 40 // closer to having all the nearly-identical hash functions in one place. 41 42 struct StringHash { 43 static unsigned hash(StringImpl* key) { return key->hash(); } 44 static bool equal(const StringImpl* a, const StringImpl* b) 45 { 46 if (a == b) 47 return true; 48 if (!a || !b) 49 return false; 50 51 unsigned aLength = a->length(); 52 unsigned bLength = b->length(); 53 if (aLength != bLength) 54 return false; 55 56 // FIXME: perhaps we should have a more abstract macro that indicates when 57 // going 4 bytes at a time is unsafe 58 #if CPU(ARM) || CPU(SH4) || CPU(MIPS) 59 const UChar* aChars = a->characters(); 60 const UChar* bChars = b->characters(); 61 for (unsigned i = 0; i != aLength; ++i) { 62 if (*aChars++ != *bChars++) 63 return false; 64 } 65 return true; 66 #else 67 /* Do it 4-bytes-at-a-time on architectures where it's safe */ 68 const uint32_t* aChars = reinterpret_cast<const uint32_t*>(a->characters()); 69 const uint32_t* bChars = reinterpret_cast<const uint32_t*>(b->characters()); 70 71 unsigned halfLength = aLength >> 1; 72 for (unsigned i = 0; i != halfLength; ++i) 73 if (*aChars++ != *bChars++) 74 return false; 75 76 if (aLength & 1 && *reinterpret_cast<const uint16_t*>(aChars) != *reinterpret_cast<const uint16_t*>(bChars)) 77 return false; 78 79 return true; 80 #endif 81 } 82 83 static unsigned hash(const RefPtr<StringImpl>& key) { return key->hash(); } 84 static bool equal(const RefPtr<StringImpl>& a, const RefPtr<StringImpl>& b) 85 { 86 return equal(a.get(), b.get()); 87 } 88 89 static unsigned hash(const String& key) { return key.impl()->hash(); } 90 static bool equal(const String& a, const String& b) 91 { 92 return equal(a.impl(), b.impl()); 93 } 94 95 static const bool safeToCompareToEmptyOrDeleted = false; 96 }; 97 98 class CaseFoldingHash { 99 public: 100 template<typename T> static inline UChar foldCase(T ch) 101 { 102 return WTF::Unicode::foldCase(ch); 103 } 104 105 static unsigned hash(const UChar* data, unsigned length) 106 { 107 return StringHasher::computeHash<UChar, foldCase<UChar> >(data, length); 108 } 109 110 static unsigned hash(StringImpl* str) 111 { 112 return hash(str->characters(), str->length()); 113 } 114 115 static unsigned hash(const char* data, unsigned length) 116 { 117 return StringHasher::computeHash<char, foldCase<char> >(data, length); 118 } 119 120 static bool equal(const StringImpl* a, const StringImpl* b) 121 { 122 if (a == b) 123 return true; 124 if (!a || !b) 125 return false; 126 unsigned length = a->length(); 127 if (length != b->length()) 128 return false; 129 return WTF::Unicode::umemcasecmp(a->characters(), b->characters(), length) == 0; 130 } 131 132 static unsigned hash(const RefPtr<StringImpl>& key) 133 { 134 return hash(key.get()); 135 } 136 137 static bool equal(const RefPtr<StringImpl>& a, const RefPtr<StringImpl>& b) 138 { 139 return equal(a.get(), b.get()); 140 } 141 142 static unsigned hash(const String& key) 143 { 144 return hash(key.impl()); 145 } 146 static unsigned hash(const AtomicString& key) 147 { 148 return hash(key.impl()); 149 } 150 static bool equal(const String& a, const String& b) 151 { 152 return equal(a.impl(), b.impl()); 153 } 154 static bool equal(const AtomicString& a, const AtomicString& b) 155 { 156 return (a == b) || equal(a.impl(), b.impl()); 157 } 158 159 static const bool safeToCompareToEmptyOrDeleted = false; 160 }; 161 162 // This hash can be used in cases where the key is a hash of a string, but we don't 163 // want to store the string. It's not really specific to string hashing, but all our 164 // current uses of it are for strings. 165 struct AlreadyHashed : IntHash<unsigned> { 166 static unsigned hash(unsigned key) { return key; } 167 168 // To use a hash value as a key for a hash table, we need to eliminate the 169 // "deleted" value, which is negative one. That could be done by changing 170 // the string hash function to never generate negative one, but this works 171 // and is still relatively efficient. 172 static unsigned avoidDeletedValue(unsigned hash) 173 { 174 ASSERT(hash); 175 unsigned newHash = hash | (!(hash + 1) << 31); 176 ASSERT(newHash); 177 ASSERT(newHash != 0xFFFFFFFF); 178 return newHash; 179 } 180 }; 181 182 template<> struct HashTraits<String> : SimpleClassHashTraits<String> { }; 183 184 } 185 186 using WTF::StringHash; 187 using WTF::CaseFoldingHash; 188 using WTF::AlreadyHashed; 189 190 #endif 191