Home | History | Annotate | Download | only in utils
      1 /*
      2  * Copyright (C) 2014, The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *     http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "dictionary/utils/trie_map.h"
     18 
     19 #include "dictionary/utils/dict_file_writing_utils.h"
     20 
     21 namespace latinime {
     22 
     23 const int TrieMap::INVALID_INDEX = -1;
     24 const int TrieMap::FIELD0_SIZE = 4;
     25 const int TrieMap::FIELD1_SIZE = 3;
     26 const int TrieMap::ENTRY_SIZE = FIELD0_SIZE + FIELD1_SIZE;
     27 const uint32_t TrieMap::VALUE_FLAG = 0x400000;
     28 const uint32_t TrieMap::VALUE_MASK = 0x3FFFFF;
     29 const uint32_t TrieMap::INVALID_VALUE_IN_KEY_VALUE_ENTRY = VALUE_MASK;
     30 const uint32_t TrieMap::TERMINAL_LINK_FLAG = 0x800000;
     31 const uint32_t TrieMap::TERMINAL_LINK_MASK = 0x7FFFFF;
     32 const int TrieMap::NUM_OF_BITS_USED_FOR_ONE_LEVEL = 5;
     33 const uint32_t TrieMap::LABEL_MASK = 0x1F;
     34 const int TrieMap::MAX_NUM_OF_ENTRIES_IN_ONE_LEVEL = 1 << NUM_OF_BITS_USED_FOR_ONE_LEVEL;
     35 const int TrieMap::ROOT_BITMAP_ENTRY_INDEX = 0;
     36 const int TrieMap::ROOT_BITMAP_ENTRY_POS = MAX_NUM_OF_ENTRIES_IN_ONE_LEVEL * FIELD0_SIZE;
     37 const TrieMap::Entry TrieMap::EMPTY_BITMAP_ENTRY = TrieMap::Entry(0, 0);
     38 const int TrieMap::TERMINAL_LINKED_ENTRY_COUNT = 2; // Value entry and bitmap entry.
     39 const uint64_t TrieMap::MAX_VALUE =
     40         (static_cast<uint64_t>(1) << ((FIELD0_SIZE + FIELD1_SIZE) * CHAR_BIT)) - 1;
     41 const int TrieMap::MAX_BUFFER_SIZE = TERMINAL_LINK_MASK * ENTRY_SIZE;
     42 
     43 TrieMap::TrieMap() : mBuffer(MAX_BUFFER_SIZE) {
     44     mBuffer.extend(ROOT_BITMAP_ENTRY_POS);
     45     writeEntry(EMPTY_BITMAP_ENTRY, ROOT_BITMAP_ENTRY_INDEX);
     46 }
     47 
     48 TrieMap::TrieMap(const ReadWriteByteArrayView buffer)
     49         : mBuffer(buffer, BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE) {}
     50 
     51 void TrieMap::dump(const int from, const int to) const {
     52     AKLOGI("BufSize: %d", mBuffer.getTailPosition());
     53     for (int i = from; i < to; ++i) {
     54         AKLOGI("Entry[%d]: %x, %x", i, readField0(i), readField1(i));
     55     }
     56     int unusedRegionSize = 0;
     57     for (int i = 1; i <= MAX_NUM_OF_ENTRIES_IN_ONE_LEVEL; ++i) {
     58         int index = readEmptyTableLink(i);
     59         while (index != ROOT_BITMAP_ENTRY_INDEX) {
     60             index = readField0(index);
     61             unusedRegionSize += i;
     62         }
     63     }
     64     AKLOGI("Unused Size: %d", unusedRegionSize);
     65 }
     66 
     67 int TrieMap::getNextLevelBitmapEntryIndex(const int key, const int bitmapEntryIndex) {
     68     const Entry bitmapEntry = readEntry(bitmapEntryIndex);
     69     const uint32_t unsignedKey = static_cast<uint32_t>(key);
     70     const int terminalEntryIndex = getTerminalEntryIndex(
     71             unsignedKey, getBitShuffledKey(unsignedKey), bitmapEntry, 0 /* level */);
     72     if (terminalEntryIndex == INVALID_INDEX) {
     73         // Not found.
     74         return INVALID_INDEX;
     75     }
     76     const Entry terminalEntry = readEntry(terminalEntryIndex);
     77     if (terminalEntry.hasTerminalLink()) {
     78         return terminalEntry.getValueEntryIndex() + 1;
     79     }
     80     // Create a value entry and a bitmap entry.
     81     const int valueEntryIndex = allocateTable(TERMINAL_LINKED_ENTRY_COUNT);
     82     if (valueEntryIndex == INVALID_INDEX) {
     83         return INVALID_INDEX;
     84     }
     85     if (!writeEntry(Entry(0, terminalEntry.getValue()), valueEntryIndex)) {
     86         return INVALID_INDEX;
     87     }
     88     if (!writeEntry(EMPTY_BITMAP_ENTRY, valueEntryIndex + 1)) {
     89         return INVALID_INDEX;
     90     }
     91     if (!writeField1(valueEntryIndex | TERMINAL_LINK_FLAG, terminalEntryIndex)) {
     92         return INVALID_INDEX;
     93     }
     94     return valueEntryIndex + 1;
     95 }
     96 
     97 const TrieMap::Result TrieMap::get(const int key, const int bitmapEntryIndex) const {
     98     const uint32_t unsignedKey = static_cast<uint32_t>(key);
     99     return getInternal(unsignedKey, getBitShuffledKey(unsignedKey), bitmapEntryIndex,
    100             0 /* level */);
    101 }
    102 
    103 bool TrieMap::put(const int key, const uint64_t value, const int bitmapEntryIndex) {
    104     if (value > MAX_VALUE) {
    105         return false;
    106     }
    107     const uint32_t unsignedKey = static_cast<uint32_t>(key);
    108     return putInternal(unsignedKey, value, getBitShuffledKey(unsignedKey), bitmapEntryIndex,
    109             readEntry(bitmapEntryIndex), 0 /* level */);
    110 }
    111 
    112 bool TrieMap::save(FILE *const file) const {
    113     return DictFileWritingUtils::writeBufferToFileTail(file, &mBuffer);
    114 }
    115 
    116 bool TrieMap::remove(const int key, const int bitmapEntryIndex) {
    117     const Entry bitmapEntry = readEntry(bitmapEntryIndex);
    118     const uint32_t unsignedKey = static_cast<uint32_t>(key);
    119     const int terminalEntryIndex = getTerminalEntryIndex(
    120             unsignedKey, getBitShuffledKey(unsignedKey), bitmapEntry, 0 /* level */);
    121     if (terminalEntryIndex == INVALID_INDEX) {
    122         // Not found.
    123         return false;
    124     }
    125     const Entry terminalEntry = readEntry(terminalEntryIndex);
    126     if (!writeField1(VALUE_FLAG ^ INVALID_VALUE_IN_KEY_VALUE_ENTRY , terminalEntryIndex)) {
    127         return false;
    128     }
    129     if (terminalEntry.hasTerminalLink()) {
    130         const Entry nextLevelBitmapEntry = readEntry(terminalEntry.getValueEntryIndex() + 1);
    131         if (!freeTable(terminalEntry.getValueEntryIndex(), TERMINAL_LINKED_ENTRY_COUNT)) {
    132             return false;
    133         }
    134         if (!removeInner(nextLevelBitmapEntry)){
    135             return false;
    136         }
    137     }
    138     return true;
    139 }
    140 
    141 /**
    142  * Iterate next entry in a certain level.
    143  *
    144  * @param iterationState the iteration state that will be read and updated in this method.
    145  * @param outKey the output key
    146  * @return Result instance. mIsValid is false when all entries are iterated.
    147  */
    148 const TrieMap::Result TrieMap::iterateNext(std::vector<TableIterationState> *const iterationState,
    149         int *const outKey) const {
    150     while (!iterationState->empty()) {
    151         TableIterationState &state = iterationState->back();
    152         if (state.mTableSize <= state.mCurrentIndex) {
    153             // Move to parent.
    154             iterationState->pop_back();
    155         } else {
    156             const int entryIndex = state.mTableIndex + state.mCurrentIndex;
    157             state.mCurrentIndex += 1;
    158             const Entry entry = readEntry(entryIndex);
    159             if (entry.isBitmapEntry()) {
    160                 // Move to child.
    161                 iterationState->emplace_back(popCount(entry.getBitmap()), entry.getTableIndex());
    162             } else if (entry.isValidTerminalEntry()) {
    163                 if (outKey) {
    164                     *outKey = entry.getKey();
    165                 }
    166                 if (!entry.hasTerminalLink()) {
    167                     return Result(entry.getValue(), true, INVALID_INDEX);
    168                 }
    169                 const int valueEntryIndex = entry.getValueEntryIndex();
    170                 const Entry valueEntry = readEntry(valueEntryIndex);
    171                 return Result(valueEntry.getValueOfValueEntry(), true, valueEntryIndex + 1);
    172             }
    173         }
    174     }
    175     // Visited all entries.
    176     return Result(0, false, INVALID_INDEX);
    177 }
    178 
    179 /**
    180  * Shuffle bits of the key in the fixed order.
    181  *
    182  * This method is used as a hash function. This returns different values for different inputs.
    183  */
    184 uint32_t TrieMap::getBitShuffledKey(const uint32_t key) const {
    185     uint32_t shuffledKey = 0;
    186     for (int i = 0; i < 4; ++i) {
    187         const uint32_t keyPiece = (key >> (i * 8)) & 0xFF;
    188         shuffledKey ^= ((keyPiece ^ (keyPiece << 7) ^ (keyPiece << 14) ^ (keyPiece << 21))
    189                 & 0x11111111) << i;
    190     }
    191     return shuffledKey;
    192 }
    193 
    194 bool TrieMap::writeValue(const uint64_t value, const int terminalEntryIndex) {
    195     if (value < VALUE_MASK) {
    196         // Write value into the terminal entry.
    197         return writeField1(value | VALUE_FLAG, terminalEntryIndex);
    198     }
    199     // Create value entry and write value.
    200     const int valueEntryIndex = allocateTable(TERMINAL_LINKED_ENTRY_COUNT);
    201     if (valueEntryIndex == INVALID_INDEX) {
    202         return false;
    203     }
    204     if (!writeEntry(Entry(value >> (FIELD1_SIZE * CHAR_BIT), value), valueEntryIndex)) {
    205         return false;
    206     }
    207     if (!writeEntry(EMPTY_BITMAP_ENTRY, valueEntryIndex + 1)) {
    208         return false;
    209     }
    210     return writeField1(valueEntryIndex | TERMINAL_LINK_FLAG, terminalEntryIndex);
    211 }
    212 
    213 bool TrieMap::updateValue(const Entry &terminalEntry, const uint64_t value,
    214         const int terminalEntryIndex) {
    215     if (!terminalEntry.hasTerminalLink()) {
    216         return writeValue(value, terminalEntryIndex);
    217     }
    218     const int valueEntryIndex = terminalEntry.getValueEntryIndex();
    219     return writeEntry(Entry(value >> (FIELD1_SIZE * CHAR_BIT), value), valueEntryIndex);
    220 }
    221 
    222 bool TrieMap::freeTable(const int tableIndex, const int entryCount) {
    223     if (!writeField0(readEmptyTableLink(entryCount), tableIndex)) {
    224         return false;
    225     }
    226     return writeEmptyTableLink(tableIndex, entryCount);
    227 }
    228 
    229 /**
    230  * Allocate table with entryCount-entries. Reuse freed table if possible.
    231  */
    232 int TrieMap::allocateTable(const int entryCount) {
    233     if (entryCount > 0 && entryCount <= MAX_NUM_OF_ENTRIES_IN_ONE_LEVEL) {
    234         const int tableIndex = readEmptyTableLink(entryCount);
    235         if (tableIndex > 0) {
    236             if (!writeEmptyTableLink(readField0(tableIndex), entryCount)) {
    237                 return INVALID_INDEX;
    238             }
    239             // Reuse the table.
    240             return tableIndex;
    241         }
    242     }
    243     // Allocate memory space at tail position of the buffer.
    244     const int mapIndex = getTailEntryIndex();
    245     if (!mBuffer.extend(entryCount * ENTRY_SIZE)) {
    246         return INVALID_INDEX;
    247     }
    248     return mapIndex;
    249 }
    250 
    251 int TrieMap::getTerminalEntryIndex(const uint32_t key, const uint32_t hashedKey,
    252         const Entry &bitmapEntry, const int level) const {
    253     const int label = getLabel(hashedKey, level);
    254     if (!exists(bitmapEntry.getBitmap(), label)) {
    255         return INVALID_INDEX;
    256     }
    257     const int entryIndex = bitmapEntry.getTableIndex() + popCount(bitmapEntry.getBitmap(), label);
    258     const Entry entry = readEntry(entryIndex);
    259     if (entry.isBitmapEntry()) {
    260         // Move to the next level.
    261         return getTerminalEntryIndex(key, hashedKey, entry, level + 1);
    262     }
    263     if (!entry.isValidTerminalEntry()) {
    264         return INVALID_INDEX;
    265     }
    266     if (entry.getKey() == key) {
    267         // Terminal entry is found.
    268         return entryIndex;
    269     }
    270     return INVALID_INDEX;
    271 }
    272 
    273 /**
    274  * Get Result corresponding to the key.
    275  *
    276  * @param key the key.
    277  * @param hashedKey the hashed key.
    278  * @param bitmapEntryIndex the index of bitmap entry
    279  * @param level current level
    280  * @return Result instance corresponding to the key. mIsValid indicates whether the key is in the
    281  * map.
    282  */
    283 const TrieMap::Result TrieMap::getInternal(const uint32_t key, const uint32_t hashedKey,
    284         const int bitmapEntryIndex, const int level) const {
    285     const int terminalEntryIndex = getTerminalEntryIndex(key, hashedKey,
    286             readEntry(bitmapEntryIndex), level);
    287     if (terminalEntryIndex == INVALID_INDEX) {
    288         // Not found.
    289         return Result(0, false, INVALID_INDEX);
    290     }
    291     const Entry terminalEntry = readEntry(terminalEntryIndex);
    292     if (!terminalEntry.hasTerminalLink()) {
    293         return Result(terminalEntry.getValue(), true, INVALID_INDEX);
    294     }
    295     const int valueEntryIndex = terminalEntry.getValueEntryIndex();
    296     const Entry valueEntry = readEntry(valueEntryIndex);
    297     return Result(valueEntry.getValueOfValueEntry(), true, valueEntryIndex + 1);
    298 }
    299 
    300 /**
    301  * Put key to value mapping to the map.
    302  *
    303  * @param key the key.
    304  * @param value the value
    305  * @param hashedKey the hashed key.
    306  * @param bitmapEntryIndex the index of bitmap entry
    307  * @param bitmapEntry the bitmap entry
    308  * @param level current level
    309  * @return whether the key-value has been correctly inserted to the map or not.
    310  */
    311 bool TrieMap::putInternal(const uint32_t key, const uint64_t value, const uint32_t hashedKey,
    312         const int bitmapEntryIndex, const Entry &bitmapEntry, const int level) {
    313     const int label = getLabel(hashedKey, level);
    314     const uint32_t bitmap = bitmapEntry.getBitmap();
    315     const int mapIndex = bitmapEntry.getTableIndex();
    316     if (!exists(bitmap, label)) {
    317         // Current map doesn't contain the label.
    318         return addNewEntryByExpandingTable(key, value, mapIndex, bitmap, bitmapEntryIndex, label);
    319     }
    320     const int entryIndex = mapIndex + popCount(bitmap, label);
    321     const Entry entry = readEntry(entryIndex);
    322     if (entry.isBitmapEntry()) {
    323         // Bitmap entry is found. Go to the next level.
    324         return putInternal(key, value, hashedKey, entryIndex, entry, level + 1);
    325     }
    326     if (!entry.isValidTerminalEntry()) {
    327         // Overwrite invalid terminal entry.
    328         return writeTerminalEntry(key, value, entryIndex);
    329     }
    330     if (entry.getKey() == key) {
    331         // Terminal entry for the key is found. Update the value.
    332         return updateValue(entry, value, entryIndex);
    333     }
    334     // Conflict with the existing key.
    335     return addNewEntryByResolvingConflict(key, value, hashedKey, entry, entryIndex, level);
    336 }
    337 
    338 /**
    339  * Resolve a conflict in the current level and add new entry.
    340  *
    341  * @param key the key
    342  * @param value the value
    343  * @param hashedKey the hashed key
    344  * @param conflictedEntry the existing conflicted entry
    345  * @param conflictedEntryIndex the index of existing conflicted entry
    346  * @param level current level
    347  * @return whether the key-value has been correctly inserted to the map or not.
    348  */
    349 bool TrieMap::addNewEntryByResolvingConflict(const uint32_t key, const uint64_t value,
    350         const uint32_t hashedKey, const Entry &conflictedEntry, const int conflictedEntryIndex,
    351         const int level) {
    352     const int conflictedKeyNextLabel =
    353             getLabel(getBitShuffledKey(conflictedEntry.getKey()), level + 1);
    354     const int nextLabel = getLabel(hashedKey, level + 1);
    355     if (conflictedKeyNextLabel == nextLabel) {
    356         // Conflicted again in the next level.
    357         const int newTableIndex = allocateTable(1 /* entryCount */);
    358         if (newTableIndex == INVALID_INDEX) {
    359             return false;
    360         }
    361         if (!writeEntry(conflictedEntry, newTableIndex)) {
    362             return false;
    363         }
    364         const Entry newBitmapEntry(setExist(0 /* bitmap */, nextLabel), newTableIndex);
    365         if (!writeEntry(newBitmapEntry, conflictedEntryIndex)) {
    366             return false;
    367         }
    368         return putInternal(key, value, hashedKey, conflictedEntryIndex, newBitmapEntry, level + 1);
    369     }
    370     // The conflict has been resolved. Create a table that contains 2 entries.
    371     const int newTableIndex = allocateTable(2 /* entryCount */);
    372     if (newTableIndex == INVALID_INDEX) {
    373         return false;
    374     }
    375     if (nextLabel < conflictedKeyNextLabel) {
    376         if (!writeTerminalEntry(key, value, newTableIndex)) {
    377             return false;
    378         }
    379         if (!writeEntry(conflictedEntry, newTableIndex + 1)) {
    380             return false;
    381         }
    382     } else { // nextLabel > conflictedKeyNextLabel
    383         if (!writeEntry(conflictedEntry, newTableIndex)) {
    384             return false;
    385         }
    386         if (!writeTerminalEntry(key, value, newTableIndex + 1)) {
    387             return false;
    388         }
    389     }
    390     const uint32_t updatedBitmap =
    391             setExist(setExist(0 /* bitmap */, nextLabel), conflictedKeyNextLabel);
    392     return writeEntry(Entry(updatedBitmap, newTableIndex), conflictedEntryIndex);
    393 }
    394 
    395 /**
    396  * Add new entry to the existing table.
    397  */
    398 bool TrieMap::addNewEntryByExpandingTable(const uint32_t key, const uint64_t value,
    399         const int tableIndex, const uint32_t bitmap, const int bitmapEntryIndex, const int label) {
    400     // Current map doesn't contain the label.
    401     const int entryCount = popCount(bitmap);
    402     const int newTableIndex = allocateTable(entryCount + 1);
    403     if (newTableIndex == INVALID_INDEX) {
    404         return false;
    405     }
    406     const int newEntryIndexInTable = popCount(bitmap, label);
    407     // Copy from existing table to the new table.
    408     for (int i = 0; i < entryCount; ++i) {
    409         if (!copyEntry(tableIndex + i, newTableIndex + i + (i >= newEntryIndexInTable ? 1 : 0))) {
    410             return false;
    411         }
    412     }
    413     // Write new terminal entry.
    414     if (!writeTerminalEntry(key, value, newTableIndex + newEntryIndexInTable)) {
    415         return false;
    416     }
    417     // Update bitmap.
    418     if (!writeEntry(Entry(setExist(bitmap, label), newTableIndex), bitmapEntryIndex)) {
    419         return false;
    420     }
    421     if (entryCount > 0) {
    422         return freeTable(tableIndex, entryCount);
    423     }
    424     return true;
    425 }
    426 
    427 bool TrieMap::removeInner(const Entry &bitmapEntry) {
    428     const int tableSize = popCount(bitmapEntry.getBitmap());
    429     if (tableSize <= 0) {
    430         // The table is empty. No need to remove any entries.
    431         return true;
    432     }
    433     for (int i = 0; i < tableSize; ++i) {
    434         const int entryIndex = bitmapEntry.getTableIndex() + i;
    435         const Entry entry = readEntry(entryIndex);
    436         if (entry.isBitmapEntry()) {
    437             // Delete next bitmap entry recursively.
    438             if (!removeInner(entry)) {
    439                 return false;
    440             }
    441         } else {
    442             // Invalidate terminal entry just in case.
    443             if (!writeField1(VALUE_FLAG ^ INVALID_VALUE_IN_KEY_VALUE_ENTRY , entryIndex)) {
    444                 return false;
    445             }
    446             if (entry.hasTerminalLink()) {
    447                 const Entry nextLevelBitmapEntry = readEntry(entry.getValueEntryIndex() + 1);
    448                 if (!freeTable(entry.getValueEntryIndex(), TERMINAL_LINKED_ENTRY_COUNT)) {
    449                     return false;
    450                 }
    451                 if (!removeInner(nextLevelBitmapEntry)) {
    452                     return false;
    453                 }
    454             }
    455         }
    456     }
    457     return true;
    458 }
    459 
    460 }  // namespace latinime
    461