1 /* 2 * Copyright (C) 2014, The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "suggest/policyimpl/dictionary/utils/trie_map.h" 18 19 #include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h" 20 21 namespace latinime { 22 23 const int TrieMap::INVALID_INDEX = -1; 24 const int TrieMap::FIELD0_SIZE = 4; 25 const int TrieMap::FIELD1_SIZE = 3; 26 const int TrieMap::ENTRY_SIZE = FIELD0_SIZE + FIELD1_SIZE; 27 const uint32_t TrieMap::VALUE_FLAG = 0x400000; 28 const uint32_t TrieMap::VALUE_MASK = 0x3FFFFF; 29 const uint32_t TrieMap::TERMINAL_LINK_FLAG = 0x800000; 30 const uint32_t TrieMap::TERMINAL_LINK_MASK = 0x7FFFFF; 31 const int TrieMap::NUM_OF_BITS_USED_FOR_ONE_LEVEL = 5; 32 const uint32_t TrieMap::LABEL_MASK = 0x1F; 33 const int TrieMap::MAX_NUM_OF_ENTRIES_IN_ONE_LEVEL = 1 << NUM_OF_BITS_USED_FOR_ONE_LEVEL; 34 const int TrieMap::ROOT_BITMAP_ENTRY_INDEX = 0; 35 const int TrieMap::ROOT_BITMAP_ENTRY_POS = MAX_NUM_OF_ENTRIES_IN_ONE_LEVEL * FIELD0_SIZE; 36 const TrieMap::Entry TrieMap::EMPTY_BITMAP_ENTRY = TrieMap::Entry(0, 0); 37 const uint64_t TrieMap::MAX_VALUE = 38 (static_cast<uint64_t>(1) << ((FIELD0_SIZE + FIELD1_SIZE) * CHAR_BIT)) - 1; 39 const int TrieMap::MAX_BUFFER_SIZE = TERMINAL_LINK_MASK * ENTRY_SIZE; 40 41 TrieMap::TrieMap() : mBuffer(MAX_BUFFER_SIZE) { 42 mBuffer.extend(ROOT_BITMAP_ENTRY_POS); 43 writeEntry(EMPTY_BITMAP_ENTRY, ROOT_BITMAP_ENTRY_INDEX); 44 } 45 46 TrieMap::TrieMap(const ReadWriteByteArrayView buffer) 47 : mBuffer(buffer, BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE) {} 48 49 void TrieMap::dump(const int from, const int to) const { 50 AKLOGI("BufSize: %d", mBuffer.getTailPosition()); 51 for (int i = from; i < to; ++i) { 52 AKLOGI("Entry[%d]: %x, %x", i, readField0(i), readField1(i)); 53 } 54 int unusedRegionSize = 0; 55 for (int i = 1; i <= MAX_NUM_OF_ENTRIES_IN_ONE_LEVEL; ++i) { 56 int index = readEmptyTableLink(i); 57 while (index != ROOT_BITMAP_ENTRY_INDEX) { 58 index = readField0(index); 59 unusedRegionSize += i; 60 } 61 } 62 AKLOGI("Unused Size: %d", unusedRegionSize); 63 } 64 65 int TrieMap::getNextLevelBitmapEntryIndex(const int key, const int bitmapEntryIndex) { 66 const Entry bitmapEntry = readEntry(bitmapEntryIndex); 67 const uint32_t unsignedKey = static_cast<uint32_t>(key); 68 const int terminalEntryIndex = getTerminalEntryIndex( 69 unsignedKey, getBitShuffledKey(unsignedKey), bitmapEntry, 0 /* level */); 70 if (terminalEntryIndex == INVALID_INDEX) { 71 // Not found. 72 return INVALID_INDEX; 73 } 74 const Entry terminalEntry = readEntry(terminalEntryIndex); 75 if (terminalEntry.hasTerminalLink()) { 76 return terminalEntry.getValueEntryIndex() + 1; 77 } 78 // Create a value entry and a bitmap entry. 79 const int valueEntryIndex = allocateTable(2 /* entryCount */); 80 if (!writeEntry(Entry(0, terminalEntry.getValue()), valueEntryIndex)) { 81 return INVALID_INDEX; 82 } 83 if (!writeEntry(EMPTY_BITMAP_ENTRY, valueEntryIndex + 1)) { 84 return INVALID_INDEX; 85 } 86 if (!writeField1(valueEntryIndex | TERMINAL_LINK_FLAG, valueEntryIndex)) { 87 return INVALID_INDEX; 88 } 89 return valueEntryIndex + 1; 90 } 91 92 const TrieMap::Result TrieMap::get(const int key, const int bitmapEntryIndex) const { 93 const uint32_t unsignedKey = static_cast<uint32_t>(key); 94 return getInternal(unsignedKey, getBitShuffledKey(unsignedKey), bitmapEntryIndex, 95 0 /* level */); 96 } 97 98 bool TrieMap::put(const int key, const uint64_t value, const int bitmapEntryIndex) { 99 if (value > MAX_VALUE) { 100 return false; 101 } 102 const uint32_t unsignedKey = static_cast<uint32_t>(key); 103 return putInternal(unsignedKey, value, getBitShuffledKey(unsignedKey), bitmapEntryIndex, 104 readEntry(bitmapEntryIndex), 0 /* level */); 105 } 106 107 bool TrieMap::save(FILE *const file) const { 108 return DictFileWritingUtils::writeBufferToFileTail(file, &mBuffer); 109 } 110 111 /** 112 * Iterate next entry in a certain level. 113 * 114 * @param iterationState the iteration state that will be read and updated in this method. 115 * @param outKey the output key 116 * @return Result instance. mIsValid is false when all entries are iterated. 117 */ 118 const TrieMap::Result TrieMap::iterateNext(std::vector<TableIterationState> *const iterationState, 119 int *const outKey) const { 120 while (!iterationState->empty()) { 121 TableIterationState &state = iterationState->back(); 122 if (state.mTableSize <= state.mCurrentIndex) { 123 // Move to parent. 124 iterationState->pop_back(); 125 } else { 126 const int entryIndex = state.mTableIndex + state.mCurrentIndex; 127 state.mCurrentIndex += 1; 128 const Entry entry = readEntry(entryIndex); 129 if (entry.isBitmapEntry()) { 130 // Move to child. 131 iterationState->emplace_back(popCount(entry.getBitmap()), entry.getTableIndex()); 132 } else { 133 if (outKey) { 134 *outKey = entry.getKey(); 135 } 136 if (!entry.hasTerminalLink()) { 137 return Result(entry.getValue(), true, INVALID_INDEX); 138 } 139 const int valueEntryIndex = entry.getValueEntryIndex(); 140 const Entry valueEntry = readEntry(valueEntryIndex); 141 return Result(valueEntry.getValueOfValueEntry(), true, valueEntryIndex + 1); 142 } 143 } 144 } 145 // Visited all entries. 146 return Result(0, false, INVALID_INDEX); 147 } 148 149 /** 150 * Shuffle bits of the key in the fixed order. 151 * 152 * This method is used as a hash function. This returns different values for different inputs. 153 */ 154 uint32_t TrieMap::getBitShuffledKey(const uint32_t key) const { 155 uint32_t shuffledKey = 0; 156 for (int i = 0; i < 4; ++i) { 157 const uint32_t keyPiece = (key >> (i * 8)) & 0xFF; 158 shuffledKey ^= ((keyPiece ^ (keyPiece << 7) ^ (keyPiece << 14) ^ (keyPiece << 21)) 159 & 0x11111111) << i; 160 } 161 return shuffledKey; 162 } 163 164 bool TrieMap::writeValue(const uint64_t value, const int terminalEntryIndex) { 165 if (value <= VALUE_MASK) { 166 // Write value into the terminal entry. 167 return writeField1(value | VALUE_FLAG, terminalEntryIndex); 168 } 169 // Create value entry and write value. 170 const int valueEntryIndex = allocateTable(2 /* entryCount */); 171 if (!writeEntry(Entry(value >> (FIELD1_SIZE * CHAR_BIT), value), valueEntryIndex)) { 172 return false; 173 } 174 if (!writeEntry(EMPTY_BITMAP_ENTRY, valueEntryIndex + 1)) { 175 return false; 176 } 177 return writeField1(valueEntryIndex | TERMINAL_LINK_FLAG, terminalEntryIndex); 178 } 179 180 bool TrieMap::updateValue(const Entry &terminalEntry, const uint64_t value, 181 const int terminalEntryIndex) { 182 if (!terminalEntry.hasTerminalLink()) { 183 return writeValue(value, terminalEntryIndex); 184 } 185 const int valueEntryIndex = terminalEntry.getValueEntryIndex(); 186 return writeEntry(Entry(value >> (FIELD1_SIZE * CHAR_BIT), value), valueEntryIndex); 187 } 188 189 bool TrieMap::freeTable(const int tableIndex, const int entryCount) { 190 if (!writeField0(readEmptyTableLink(entryCount), tableIndex)) { 191 return false; 192 } 193 return writeEmptyTableLink(tableIndex, entryCount); 194 } 195 196 /** 197 * Allocate table with entryCount-entries. Reuse freed table if possible. 198 */ 199 int TrieMap::allocateTable(const int entryCount) { 200 if (entryCount > 0 && entryCount <= MAX_NUM_OF_ENTRIES_IN_ONE_LEVEL) { 201 const int tableIndex = readEmptyTableLink(entryCount); 202 if (tableIndex > 0) { 203 if (!writeEmptyTableLink(readField0(tableIndex), entryCount)) { 204 return INVALID_INDEX; 205 } 206 // Reuse the table. 207 return tableIndex; 208 } 209 } 210 // Allocate memory space at tail position of the buffer. 211 const int mapIndex = getTailEntryIndex(); 212 if (!mBuffer.extend(entryCount * ENTRY_SIZE)) { 213 return INVALID_INDEX; 214 } 215 return mapIndex; 216 } 217 218 int TrieMap::getTerminalEntryIndex(const uint32_t key, const uint32_t hashedKey, 219 const Entry &bitmapEntry, const int level) const { 220 const int label = getLabel(hashedKey, level); 221 if (!exists(bitmapEntry.getBitmap(), label)) { 222 return INVALID_INDEX; 223 } 224 const int entryIndex = bitmapEntry.getTableIndex() + popCount(bitmapEntry.getBitmap(), label); 225 const Entry entry = readEntry(entryIndex); 226 if (entry.isBitmapEntry()) { 227 // Move to the next level. 228 return getTerminalEntryIndex(key, hashedKey, entry, level + 1); 229 } 230 if (entry.getKey() == key) { 231 // Terminal entry is found. 232 return entryIndex; 233 } 234 return INVALID_INDEX; 235 } 236 237 /** 238 * Get Result corresponding to the key. 239 * 240 * @param key the key. 241 * @param hashedKey the hashed key. 242 * @param bitmapEntryIndex the index of bitmap entry 243 * @param level current level 244 * @return Result instance corresponding to the key. mIsValid indicates whether the key is in the 245 * map. 246 */ 247 const TrieMap::Result TrieMap::getInternal(const uint32_t key, const uint32_t hashedKey, 248 const int bitmapEntryIndex, const int level) const { 249 const int terminalEntryIndex = getTerminalEntryIndex(key, hashedKey, 250 readEntry(bitmapEntryIndex), level); 251 if (terminalEntryIndex == INVALID_INDEX) { 252 // Not found. 253 return Result(0, false, INVALID_INDEX); 254 } 255 const Entry terminalEntry = readEntry(terminalEntryIndex); 256 if (!terminalEntry.hasTerminalLink()) { 257 return Result(terminalEntry.getValue(), true, INVALID_INDEX); 258 } 259 const int valueEntryIndex = terminalEntry.getValueEntryIndex(); 260 const Entry valueEntry = readEntry(valueEntryIndex); 261 return Result(valueEntry.getValueOfValueEntry(), true, valueEntryIndex + 1); 262 } 263 264 /** 265 * Put key to value mapping to the map. 266 * 267 * @param key the key. 268 * @param value the value 269 * @param hashedKey the hashed key. 270 * @param bitmapEntryIndex the index of bitmap entry 271 * @param bitmapEntry the bitmap entry 272 * @param level current level 273 * @return whether the key-value has been correctly inserted to the map or not. 274 */ 275 bool TrieMap::putInternal(const uint32_t key, const uint64_t value, const uint32_t hashedKey, 276 const int bitmapEntryIndex, const Entry &bitmapEntry, const int level) { 277 const int label = getLabel(hashedKey, level); 278 const uint32_t bitmap = bitmapEntry.getBitmap(); 279 const int mapIndex = bitmapEntry.getTableIndex(); 280 if (!exists(bitmap, label)) { 281 // Current map doesn't contain the label. 282 return addNewEntryByExpandingTable(key, value, mapIndex, bitmap, bitmapEntryIndex, label); 283 } 284 const int entryIndex = mapIndex + popCount(bitmap, label); 285 const Entry entry = readEntry(entryIndex); 286 if (entry.isBitmapEntry()) { 287 // Bitmap entry is found. Go to the next level. 288 return putInternal(key, value, hashedKey, entryIndex, entry, level + 1); 289 } 290 if (entry.getKey() == key) { 291 // Terminal entry for the key is found. Update the value. 292 return updateValue(entry, value, entryIndex); 293 } 294 // Conflict with the existing key. 295 return addNewEntryByResolvingConflict(key, value, hashedKey, entry, entryIndex, level); 296 } 297 298 /** 299 * Resolve a conflict in the current level and add new entry. 300 * 301 * @param key the key 302 * @param value the value 303 * @param hashedKey the hashed key 304 * @param conflictedEntry the existing conflicted entry 305 * @param conflictedEntryIndex the index of existing conflicted entry 306 * @param level current level 307 * @return whether the key-value has been correctly inserted to the map or not. 308 */ 309 bool TrieMap::addNewEntryByResolvingConflict(const uint32_t key, const uint64_t value, 310 const uint32_t hashedKey, const Entry &conflictedEntry, const int conflictedEntryIndex, 311 const int level) { 312 const int conflictedKeyNextLabel = 313 getLabel(getBitShuffledKey(conflictedEntry.getKey()), level + 1); 314 const int nextLabel = getLabel(hashedKey, level + 1); 315 if (conflictedKeyNextLabel == nextLabel) { 316 // Conflicted again in the next level. 317 const int newTableIndex = allocateTable(1 /* entryCount */); 318 if (newTableIndex == INVALID_INDEX) { 319 return false; 320 } 321 if (!writeEntry(conflictedEntry, newTableIndex)) { 322 return false; 323 } 324 const Entry newBitmapEntry(setExist(0 /* bitmap */, nextLabel), newTableIndex); 325 if (!writeEntry(newBitmapEntry, conflictedEntryIndex)) { 326 return false; 327 } 328 return putInternal(key, value, hashedKey, conflictedEntryIndex, newBitmapEntry, level + 1); 329 } 330 // The conflict has been resolved. Create a table that contains 2 entries. 331 const int newTableIndex = allocateTable(2 /* entryCount */); 332 if (newTableIndex == INVALID_INDEX) { 333 return false; 334 } 335 if (nextLabel < conflictedKeyNextLabel) { 336 if (!writeTerminalEntry(key, value, newTableIndex)) { 337 return false; 338 } 339 if (!writeEntry(conflictedEntry, newTableIndex + 1)) { 340 return false; 341 } 342 } else { // nextLabel > conflictedKeyNextLabel 343 if (!writeEntry(conflictedEntry, newTableIndex)) { 344 return false; 345 } 346 if (!writeTerminalEntry(key, value, newTableIndex + 1)) { 347 return false; 348 } 349 } 350 const uint32_t updatedBitmap = 351 setExist(setExist(0 /* bitmap */, nextLabel), conflictedKeyNextLabel); 352 return writeEntry(Entry(updatedBitmap, newTableIndex), conflictedEntryIndex); 353 } 354 355 /** 356 * Add new entry to the existing table. 357 */ 358 bool TrieMap::addNewEntryByExpandingTable(const uint32_t key, const uint64_t value, 359 const int tableIndex, const uint32_t bitmap, const int bitmapEntryIndex, const int label) { 360 // Current map doesn't contain the label. 361 const int entryCount = popCount(bitmap); 362 const int newTableIndex = allocateTable(entryCount + 1); 363 if (newTableIndex == INVALID_INDEX) { 364 return false; 365 } 366 const int newEntryIndexInTable = popCount(bitmap, label); 367 // Copy from existing table to the new table. 368 for (int i = 0; i < entryCount; ++i) { 369 if (!copyEntry(tableIndex + i, newTableIndex + i + (i >= newEntryIndexInTable ? 1 : 0))) { 370 return false; 371 } 372 } 373 // Write new terminal entry. 374 if (!writeTerminalEntry(key, value, newTableIndex + newEntryIndexInTable)) { 375 return false; 376 } 377 // Update bitmap. 378 if (!writeEntry(Entry(setExist(bitmap, label), newTableIndex), bitmapEntryIndex)) { 379 return false; 380 } 381 if (entryCount > 0) { 382 return freeTable(tableIndex, entryCount); 383 } 384 return true; 385 } 386 387 } // namespace latinime 388