1 /* 2 * Copyright (C) 2014, The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef LATINIME_TRIE_MAP_H 18 #define LATINIME_TRIE_MAP_H 19 20 #include <climits> 21 #include <cstdint> 22 #include <cstdio> 23 #include <vector> 24 25 #include "defines.h" 26 #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" 27 #include "utils/byte_array_view.h" 28 29 namespace latinime { 30 31 /** 32 * Trie map derived from Phil Bagwell's Hash Array Mapped Trie. 33 * key is int and value is uint64_t. 34 * This supports multiple level map. Terminal entries can have a bitmap for the next level map. 35 * This doesn't support root map resizing. 36 */ 37 class TrieMap { 38 public: 39 struct Result { 40 const uint64_t mValue; 41 const bool mIsValid; 42 const int mNextLevelBitmapEntryIndex; 43 44 Result(const uint64_t value, const bool isValid, const int nextLevelBitmapEntryIndex) 45 : mValue(value), mIsValid(isValid), 46 mNextLevelBitmapEntryIndex(nextLevelBitmapEntryIndex) {} 47 }; 48 49 /** 50 * Struct to record iteration state in a table. 51 */ 52 struct TableIterationState { 53 int mTableSize; 54 int mTableIndex; 55 int mCurrentIndex; 56 57 TableIterationState(const int tableSize, const int tableIndex) 58 : mTableSize(tableSize), mTableIndex(tableIndex), mCurrentIndex(0) {} 59 }; 60 61 class TrieMapRange; 62 class TrieMapIterator { 63 public: 64 class IterationResult { 65 public: 66 IterationResult(const TrieMap *const trieMap, const int key, const uint64_t value, 67 const int nextLeveBitmapEntryIndex) 68 : mTrieMap(trieMap), mKey(key), mValue(value), 69 mNextLevelBitmapEntryIndex(nextLeveBitmapEntryIndex) {} 70 71 const TrieMapRange getEntriesInNextLevel() const { 72 return TrieMapRange(mTrieMap, mNextLevelBitmapEntryIndex); 73 } 74 75 bool hasNextLevelMap() const { 76 return mNextLevelBitmapEntryIndex != INVALID_INDEX; 77 } 78 79 AK_FORCE_INLINE int key() const { 80 return mKey; 81 } 82 83 AK_FORCE_INLINE uint64_t value() const { 84 return mValue; 85 } 86 87 private: 88 const TrieMap *const mTrieMap; 89 const int mKey; 90 const uint64_t mValue; 91 const int mNextLevelBitmapEntryIndex; 92 }; 93 94 TrieMapIterator(const TrieMap *const trieMap, const int bitmapEntryIndex) 95 : mTrieMap(trieMap), mStateStack(), mBaseBitmapEntryIndex(bitmapEntryIndex), 96 mKey(0), mValue(0), mIsValid(false), mNextLevelBitmapEntryIndex(INVALID_INDEX) { 97 if (!trieMap) { 98 return; 99 } 100 const Entry bitmapEntry = mTrieMap->readEntry(mBaseBitmapEntryIndex); 101 mStateStack.emplace_back( 102 mTrieMap->popCount(bitmapEntry.getBitmap()), bitmapEntry.getTableIndex()); 103 this->operator++(); 104 } 105 106 const IterationResult operator*() const { 107 return IterationResult(mTrieMap, mKey, mValue, mNextLevelBitmapEntryIndex); 108 } 109 110 bool operator!=(const TrieMapIterator &other) const { 111 // Caveat: This works only for for loops. 112 return mIsValid || other.mIsValid; 113 } 114 115 const TrieMapIterator &operator++() { 116 const Result result = mTrieMap->iterateNext(&mStateStack, &mKey); 117 mValue = result.mValue; 118 mIsValid = result.mIsValid; 119 mNextLevelBitmapEntryIndex = result.mNextLevelBitmapEntryIndex; 120 return *this; 121 } 122 123 private: 124 DISALLOW_DEFAULT_CONSTRUCTOR(TrieMapIterator); 125 DISALLOW_ASSIGNMENT_OPERATOR(TrieMapIterator); 126 127 const TrieMap *const mTrieMap; 128 std::vector<TrieMap::TableIterationState> mStateStack; 129 const int mBaseBitmapEntryIndex; 130 int mKey; 131 uint64_t mValue; 132 bool mIsValid; 133 int mNextLevelBitmapEntryIndex; 134 }; 135 136 /** 137 * Class to support iterating entries in TrieMap by range base for loops. 138 */ 139 class TrieMapRange { 140 public: 141 TrieMapRange(const TrieMap *const trieMap, const int bitmapEntryIndex) 142 : mTrieMap(trieMap), mBaseBitmapEntryIndex(bitmapEntryIndex) {}; 143 144 TrieMapIterator begin() const { 145 return TrieMapIterator(mTrieMap, mBaseBitmapEntryIndex); 146 } 147 148 const TrieMapIterator end() const { 149 return TrieMapIterator(nullptr, INVALID_INDEX); 150 } 151 152 private: 153 DISALLOW_DEFAULT_CONSTRUCTOR(TrieMapRange); 154 DISALLOW_ASSIGNMENT_OPERATOR(TrieMapRange); 155 156 const TrieMap *const mTrieMap; 157 const int mBaseBitmapEntryIndex; 158 }; 159 160 static const int INVALID_INDEX; 161 static const uint64_t MAX_VALUE; 162 163 TrieMap(); 164 // Construct TrieMap using existing data in the memory region written by save(). 165 TrieMap(const ReadWriteByteArrayView buffer); 166 void dump(const int from = 0, const int to = 0) const; 167 168 bool isNearSizeLimit() const { 169 return mBuffer.isNearSizeLimit(); 170 } 171 172 int getRootBitmapEntryIndex() const { 173 return ROOT_BITMAP_ENTRY_INDEX; 174 } 175 176 // Returns bitmapEntryIndex. Create the next level map if it doesn't exist. 177 int getNextLevelBitmapEntryIndex(const int key) { 178 return getNextLevelBitmapEntryIndex(key, ROOT_BITMAP_ENTRY_INDEX); 179 } 180 181 int getNextLevelBitmapEntryIndex(const int key, const int bitmapEntryIndex); 182 183 const Result getRoot(const int key) const { 184 return get(key, ROOT_BITMAP_ENTRY_INDEX); 185 } 186 187 const Result get(const int key, const int bitmapEntryIndex) const; 188 189 bool putRoot(const int key, const uint64_t value) { 190 return put(key, value, ROOT_BITMAP_ENTRY_INDEX); 191 } 192 193 bool put(const int key, const uint64_t value, const int bitmapEntryIndex); 194 195 const TrieMapRange getEntriesInRootLevel() const { 196 return getEntriesInSpecifiedLevel(ROOT_BITMAP_ENTRY_INDEX); 197 } 198 199 const TrieMapRange getEntriesInSpecifiedLevel(const int bitmapEntryIndex) const { 200 return TrieMapRange(this, bitmapEntryIndex); 201 } 202 203 bool save(FILE *const file) const; 204 205 private: 206 DISALLOW_COPY_AND_ASSIGN(TrieMap); 207 208 /** 209 * Struct represents an entry. 210 * 211 * Entry is one of these entry types. All entries are fixed size and have 2 fields FIELD_0 and 212 * FIELD_1. 213 * 1. bitmap entry. bitmap entry contains bitmap and the link to hash table. 214 * FIELD_0(bitmap) FIELD_1(LINK_TO_HASH_TABLE) 215 * 2. terminal entry. terminal entry contains hashed key and value or terminal link. terminal 216 * entry have terminal link when the value is not fit to FIELD_1 or there is a next level map 217 * for the key. 218 * FIELD_0(hashed key) (FIELD_1(VALUE_FLAG VALUE) | FIELD_1(TERMINAL_LINK_FLAG TERMINAL_LINK)) 219 * 3. value entry. value entry represents a value. Upper order bytes are stored in FIELD_0 and 220 * lower order bytes are stored in FIELD_1. 221 * FIELD_0(value (upper order bytes)) FIELD_1(value (lower order bytes)) 222 */ 223 struct Entry { 224 const uint32_t mData0; 225 const uint32_t mData1; 226 227 Entry(const uint32_t data0, const uint32_t data1) : mData0(data0), mData1(data1) {} 228 229 AK_FORCE_INLINE bool isBitmapEntry() const { 230 return (mData1 & VALUE_FLAG) == 0 && (mData1 & TERMINAL_LINK_FLAG) == 0; 231 } 232 233 AK_FORCE_INLINE bool hasTerminalLink() const { 234 return (mData1 & TERMINAL_LINK_FLAG) != 0; 235 } 236 237 // For terminal entry. 238 AK_FORCE_INLINE uint32_t getKey() const { 239 return mData0; 240 } 241 242 // For terminal entry. 243 AK_FORCE_INLINE uint32_t getValue() const { 244 return mData1 & VALUE_MASK; 245 } 246 247 // For terminal entry. 248 AK_FORCE_INLINE uint32_t getValueEntryIndex() const { 249 return mData1 & TERMINAL_LINK_MASK; 250 } 251 252 // For bitmap entry. 253 AK_FORCE_INLINE uint32_t getBitmap() const { 254 return mData0; 255 } 256 257 // For bitmap entry. 258 AK_FORCE_INLINE int getTableIndex() const { 259 return static_cast<int>(mData1); 260 } 261 262 // For value entry. 263 AK_FORCE_INLINE uint64_t getValueOfValueEntry() const { 264 return ((static_cast<uint64_t>(mData0) << (FIELD1_SIZE * CHAR_BIT)) ^ mData1); 265 } 266 }; 267 268 BufferWithExtendableBuffer mBuffer; 269 270 static const int FIELD0_SIZE; 271 static const int FIELD1_SIZE; 272 static const int ENTRY_SIZE; 273 static const uint32_t VALUE_FLAG; 274 static const uint32_t VALUE_MASK; 275 static const uint32_t TERMINAL_LINK_FLAG; 276 static const uint32_t TERMINAL_LINK_MASK; 277 static const int NUM_OF_BITS_USED_FOR_ONE_LEVEL; 278 static const uint32_t LABEL_MASK; 279 static const int MAX_NUM_OF_ENTRIES_IN_ONE_LEVEL; 280 static const int ROOT_BITMAP_ENTRY_INDEX; 281 static const int ROOT_BITMAP_ENTRY_POS; 282 static const Entry EMPTY_BITMAP_ENTRY; 283 static const int MAX_BUFFER_SIZE; 284 285 uint32_t getBitShuffledKey(const uint32_t key) const; 286 bool writeValue(const uint64_t value, const int terminalEntryIndex); 287 bool updateValue(const Entry &terminalEntry, const uint64_t value, 288 const int terminalEntryIndex); 289 bool freeTable(const int tableIndex, const int entryCount); 290 int allocateTable(const int entryCount); 291 int getTerminalEntryIndex(const uint32_t key, const uint32_t hashedKey, 292 const Entry &bitmapEntry, const int level) const; 293 const Result getInternal(const uint32_t key, const uint32_t hashedKey, 294 const int bitmapEntryIndex, const int level) const; 295 bool putInternal(const uint32_t key, const uint64_t value, const uint32_t hashedKey, 296 const int bitmapEntryIndex, const Entry &bitmapEntry, const int level); 297 bool addNewEntryByResolvingConflict(const uint32_t key, const uint64_t value, 298 const uint32_t hashedKey, const Entry &conflictedEntry, const int conflictedEntryIndex, 299 const int level); 300 bool addNewEntryByExpandingTable(const uint32_t key, const uint64_t value, 301 const int tableIndex, const uint32_t bitmap, const int bitmapEntryIndex, 302 const int label); 303 const Result iterateNext(std::vector<TableIterationState> *const iterationState, 304 int *const outKey) const; 305 306 AK_FORCE_INLINE const Entry readEntry(const int entryIndex) const { 307 return Entry(readField0(entryIndex), readField1(entryIndex)); 308 } 309 310 // Returns whether an entry for the index is existing by testing if the index-th bit in the 311 // bitmap is set or not. 312 AK_FORCE_INLINE bool exists(const uint32_t bitmap, const int index) const { 313 return (bitmap & (1 << index)) != 0; 314 } 315 316 // Set index-th bit in the bitmap. 317 AK_FORCE_INLINE uint32_t setExist(const uint32_t bitmap, const int index) const { 318 return bitmap | (1 << index); 319 } 320 321 // Count set bits before index in the bitmap. 322 AK_FORCE_INLINE int popCount(const uint32_t bitmap, const int index) const { 323 return popCount(bitmap & ((1 << index) - 1)); 324 } 325 326 // Count set bits in the bitmap. 327 AK_FORCE_INLINE int popCount(const uint32_t bitmap) const { 328 return __builtin_popcount(bitmap); 329 // int v = bitmap - ((bitmap >> 1) & 0x55555555); 330 // v = (v & 0x33333333) + ((v >> 2) & 0x33333333); 331 // return (((v + (v >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24; 332 } 333 334 AK_FORCE_INLINE int getLabel(const uint32_t hashedKey, const int level) const { 335 return (hashedKey >> (level * NUM_OF_BITS_USED_FOR_ONE_LEVEL)) & LABEL_MASK; 336 } 337 338 AK_FORCE_INLINE uint32_t readField0(const int entryIndex) const { 339 return mBuffer.readUint(FIELD0_SIZE, ROOT_BITMAP_ENTRY_POS + entryIndex * ENTRY_SIZE); 340 } 341 342 AK_FORCE_INLINE uint32_t readField1(const int entryIndex) const { 343 return mBuffer.readUint(FIELD1_SIZE, 344 ROOT_BITMAP_ENTRY_POS + entryIndex * ENTRY_SIZE + FIELD0_SIZE); 345 } 346 347 AK_FORCE_INLINE int readEmptyTableLink(const int entryCount) const { 348 return mBuffer.readUint(FIELD1_SIZE, (entryCount - 1) * FIELD1_SIZE); 349 } 350 351 AK_FORCE_INLINE bool writeEmptyTableLink(const int tableIndex, const int entryCount) { 352 return mBuffer.writeUint(tableIndex, FIELD1_SIZE, (entryCount - 1) * FIELD1_SIZE); 353 } 354 355 AK_FORCE_INLINE bool writeField0(const uint32_t data, const int entryIndex) { 356 return mBuffer.writeUint(data, FIELD0_SIZE, 357 ROOT_BITMAP_ENTRY_POS + entryIndex * ENTRY_SIZE); 358 } 359 360 AK_FORCE_INLINE bool writeField1(const uint32_t data, const int entryIndex) { 361 return mBuffer.writeUint(data, FIELD1_SIZE, 362 ROOT_BITMAP_ENTRY_POS + entryIndex * ENTRY_SIZE + FIELD0_SIZE); 363 } 364 365 AK_FORCE_INLINE bool writeEntry(const Entry &entry, const int entryIndex) { 366 return writeField0(entry.mData0, entryIndex) && writeField1(entry.mData1, entryIndex); 367 } 368 369 AK_FORCE_INLINE bool writeTerminalEntry(const uint32_t key, const uint64_t value, 370 const int entryIndex) { 371 return writeField0(key, entryIndex) && writeValue(value, entryIndex); 372 } 373 374 AK_FORCE_INLINE bool copyEntry(const int originalEntryIndex, const int newEntryIndex) { 375 return writeEntry(readEntry(originalEntryIndex), newEntryIndex); 376 } 377 378 AK_FORCE_INLINE int getTailEntryIndex() const { 379 return (mBuffer.getTailPosition() - ROOT_BITMAP_ENTRY_POS) / ENTRY_SIZE; 380 } 381 }; 382 383 } // namespace latinime 384 #endif /* LATINIME_TRIE_MAP_H */ 385