Home | History | Annotate | Download | only in utils
      1 /*
      2  * Copyright (C) 2014, The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *     http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "suggest/policyimpl/dictionary/utils/trie_map.h"
     18 
     19 #include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
     20 
     21 namespace latinime {
     22 
     23 const int TrieMap::INVALID_INDEX = -1;
     24 const int TrieMap::FIELD0_SIZE = 4;
     25 const int TrieMap::FIELD1_SIZE = 3;
     26 const int TrieMap::ENTRY_SIZE = FIELD0_SIZE + FIELD1_SIZE;
     27 const uint32_t TrieMap::VALUE_FLAG = 0x400000;
     28 const uint32_t TrieMap::VALUE_MASK = 0x3FFFFF;
     29 const uint32_t TrieMap::TERMINAL_LINK_FLAG = 0x800000;
     30 const uint32_t TrieMap::TERMINAL_LINK_MASK = 0x7FFFFF;
     31 const int TrieMap::NUM_OF_BITS_USED_FOR_ONE_LEVEL = 5;
     32 const uint32_t TrieMap::LABEL_MASK = 0x1F;
     33 const int TrieMap::MAX_NUM_OF_ENTRIES_IN_ONE_LEVEL = 1 << NUM_OF_BITS_USED_FOR_ONE_LEVEL;
     34 const int TrieMap::ROOT_BITMAP_ENTRY_INDEX = 0;
     35 const int TrieMap::ROOT_BITMAP_ENTRY_POS = MAX_NUM_OF_ENTRIES_IN_ONE_LEVEL * FIELD0_SIZE;
     36 const TrieMap::Entry TrieMap::EMPTY_BITMAP_ENTRY = TrieMap::Entry(0, 0);
     37 const uint64_t TrieMap::MAX_VALUE =
     38         (static_cast<uint64_t>(1) << ((FIELD0_SIZE + FIELD1_SIZE) * CHAR_BIT)) - 1;
     39 const int TrieMap::MAX_BUFFER_SIZE = TERMINAL_LINK_MASK * ENTRY_SIZE;
     40 
     41 TrieMap::TrieMap() : mBuffer(MAX_BUFFER_SIZE) {
     42     mBuffer.extend(ROOT_BITMAP_ENTRY_POS);
     43     writeEntry(EMPTY_BITMAP_ENTRY, ROOT_BITMAP_ENTRY_INDEX);
     44 }
     45 
     46 TrieMap::TrieMap(const ReadWriteByteArrayView buffer)
     47         : mBuffer(buffer, BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE) {}
     48 
     49 void TrieMap::dump(const int from, const int to) const {
     50     AKLOGI("BufSize: %d", mBuffer.getTailPosition());
     51     for (int i = from; i < to; ++i) {
     52         AKLOGI("Entry[%d]: %x, %x", i, readField0(i), readField1(i));
     53     }
     54     int unusedRegionSize = 0;
     55     for (int i = 1; i <= MAX_NUM_OF_ENTRIES_IN_ONE_LEVEL; ++i) {
     56         int index = readEmptyTableLink(i);
     57         while (index != ROOT_BITMAP_ENTRY_INDEX) {
     58             index = readField0(index);
     59             unusedRegionSize += i;
     60         }
     61     }
     62     AKLOGI("Unused Size: %d", unusedRegionSize);
     63 }
     64 
     65 int TrieMap::getNextLevelBitmapEntryIndex(const int key, const int bitmapEntryIndex) {
     66     const Entry bitmapEntry = readEntry(bitmapEntryIndex);
     67     const uint32_t unsignedKey = static_cast<uint32_t>(key);
     68     const int terminalEntryIndex = getTerminalEntryIndex(
     69             unsignedKey, getBitShuffledKey(unsignedKey), bitmapEntry, 0 /* level */);
     70     if (terminalEntryIndex == INVALID_INDEX) {
     71         // Not found.
     72         return INVALID_INDEX;
     73     }
     74     const Entry terminalEntry = readEntry(terminalEntryIndex);
     75     if (terminalEntry.hasTerminalLink()) {
     76         return terminalEntry.getValueEntryIndex() + 1;
     77     }
     78     // Create a value entry and a bitmap entry.
     79     const int valueEntryIndex = allocateTable(2 /* entryCount */);
     80     if (!writeEntry(Entry(0, terminalEntry.getValue()), valueEntryIndex)) {
     81         return INVALID_INDEX;
     82     }
     83     if (!writeEntry(EMPTY_BITMAP_ENTRY, valueEntryIndex + 1)) {
     84         return INVALID_INDEX;
     85     }
     86     if (!writeField1(valueEntryIndex | TERMINAL_LINK_FLAG, valueEntryIndex)) {
     87         return INVALID_INDEX;
     88     }
     89     return valueEntryIndex + 1;
     90 }
     91 
     92 const TrieMap::Result TrieMap::get(const int key, const int bitmapEntryIndex) const {
     93     const uint32_t unsignedKey = static_cast<uint32_t>(key);
     94     return getInternal(unsignedKey, getBitShuffledKey(unsignedKey), bitmapEntryIndex,
     95             0 /* level */);
     96 }
     97 
     98 bool TrieMap::put(const int key, const uint64_t value, const int bitmapEntryIndex) {
     99     if (value > MAX_VALUE) {
    100         return false;
    101     }
    102     const uint32_t unsignedKey = static_cast<uint32_t>(key);
    103     return putInternal(unsignedKey, value, getBitShuffledKey(unsignedKey), bitmapEntryIndex,
    104             readEntry(bitmapEntryIndex), 0 /* level */);
    105 }
    106 
    107 bool TrieMap::save(FILE *const file) const {
    108     return DictFileWritingUtils::writeBufferToFileTail(file, &mBuffer);
    109 }
    110 
    111 /**
    112  * Iterate next entry in a certain level.
    113  *
    114  * @param iterationState the iteration state that will be read and updated in this method.
    115  * @param outKey the output key
    116  * @return Result instance. mIsValid is false when all entries are iterated.
    117  */
    118 const TrieMap::Result TrieMap::iterateNext(std::vector<TableIterationState> *const iterationState,
    119         int *const outKey) const {
    120     while (!iterationState->empty()) {
    121         TableIterationState &state = iterationState->back();
    122         if (state.mTableSize <= state.mCurrentIndex) {
    123             // Move to parent.
    124             iterationState->pop_back();
    125         } else {
    126             const int entryIndex = state.mTableIndex + state.mCurrentIndex;
    127             state.mCurrentIndex += 1;
    128             const Entry entry = readEntry(entryIndex);
    129             if (entry.isBitmapEntry()) {
    130                 // Move to child.
    131                 iterationState->emplace_back(popCount(entry.getBitmap()), entry.getTableIndex());
    132             } else {
    133                 if (outKey) {
    134                     *outKey = entry.getKey();
    135                 }
    136                 if (!entry.hasTerminalLink()) {
    137                     return Result(entry.getValue(), true, INVALID_INDEX);
    138                 }
    139                 const int valueEntryIndex = entry.getValueEntryIndex();
    140                 const Entry valueEntry = readEntry(valueEntryIndex);
    141                 return Result(valueEntry.getValueOfValueEntry(), true, valueEntryIndex + 1);
    142             }
    143         }
    144     }
    145     // Visited all entries.
    146     return Result(0, false, INVALID_INDEX);
    147 }
    148 
    149 /**
    150  * Shuffle bits of the key in the fixed order.
    151  *
    152  * This method is used as a hash function. This returns different values for different inputs.
    153  */
    154 uint32_t TrieMap::getBitShuffledKey(const uint32_t key) const {
    155     uint32_t shuffledKey = 0;
    156     for (int i = 0; i < 4; ++i) {
    157         const uint32_t keyPiece = (key >> (i * 8)) & 0xFF;
    158         shuffledKey ^= ((keyPiece ^ (keyPiece << 7) ^ (keyPiece << 14) ^ (keyPiece << 21))
    159                 & 0x11111111) << i;
    160     }
    161     return shuffledKey;
    162 }
    163 
    164 bool TrieMap::writeValue(const uint64_t value, const int terminalEntryIndex) {
    165     if (value <= VALUE_MASK) {
    166         // Write value into the terminal entry.
    167         return writeField1(value | VALUE_FLAG, terminalEntryIndex);
    168     }
    169     // Create value entry and write value.
    170     const int valueEntryIndex = allocateTable(2 /* entryCount */);
    171     if (!writeEntry(Entry(value >> (FIELD1_SIZE * CHAR_BIT), value), valueEntryIndex)) {
    172         return false;
    173     }
    174     if (!writeEntry(EMPTY_BITMAP_ENTRY, valueEntryIndex + 1)) {
    175         return false;
    176     }
    177     return writeField1(valueEntryIndex | TERMINAL_LINK_FLAG, terminalEntryIndex);
    178 }
    179 
    180 bool TrieMap::updateValue(const Entry &terminalEntry, const uint64_t value,
    181         const int terminalEntryIndex) {
    182     if (!terminalEntry.hasTerminalLink()) {
    183         return writeValue(value, terminalEntryIndex);
    184     }
    185     const int valueEntryIndex = terminalEntry.getValueEntryIndex();
    186     return writeEntry(Entry(value >> (FIELD1_SIZE * CHAR_BIT), value), valueEntryIndex);
    187 }
    188 
    189 bool TrieMap::freeTable(const int tableIndex, const int entryCount) {
    190     if (!writeField0(readEmptyTableLink(entryCount), tableIndex)) {
    191         return false;
    192     }
    193     return writeEmptyTableLink(tableIndex, entryCount);
    194 }
    195 
    196 /**
    197  * Allocate table with entryCount-entries. Reuse freed table if possible.
    198  */
    199 int TrieMap::allocateTable(const int entryCount) {
    200     if (entryCount > 0 && entryCount <= MAX_NUM_OF_ENTRIES_IN_ONE_LEVEL) {
    201         const int tableIndex = readEmptyTableLink(entryCount);
    202         if (tableIndex > 0) {
    203             if (!writeEmptyTableLink(readField0(tableIndex), entryCount)) {
    204                 return INVALID_INDEX;
    205             }
    206             // Reuse the table.
    207             return tableIndex;
    208         }
    209     }
    210     // Allocate memory space at tail position of the buffer.
    211     const int mapIndex = getTailEntryIndex();
    212     if (!mBuffer.extend(entryCount * ENTRY_SIZE)) {
    213         return INVALID_INDEX;
    214     }
    215     return mapIndex;
    216 }
    217 
    218 int TrieMap::getTerminalEntryIndex(const uint32_t key, const uint32_t hashedKey,
    219         const Entry &bitmapEntry, const int level) const {
    220     const int label = getLabel(hashedKey, level);
    221     if (!exists(bitmapEntry.getBitmap(), label)) {
    222         return INVALID_INDEX;
    223     }
    224     const int entryIndex = bitmapEntry.getTableIndex() + popCount(bitmapEntry.getBitmap(), label);
    225     const Entry entry = readEntry(entryIndex);
    226     if (entry.isBitmapEntry()) {
    227         // Move to the next level.
    228         return getTerminalEntryIndex(key, hashedKey, entry, level + 1);
    229     }
    230     if (entry.getKey() == key) {
    231         // Terminal entry is found.
    232         return entryIndex;
    233     }
    234     return INVALID_INDEX;
    235 }
    236 
    237 /**
    238  * Get Result corresponding to the key.
    239  *
    240  * @param key the key.
    241  * @param hashedKey the hashed key.
    242  * @param bitmapEntryIndex the index of bitmap entry
    243  * @param level current level
    244  * @return Result instance corresponding to the key. mIsValid indicates whether the key is in the
    245  * map.
    246  */
    247 const TrieMap::Result TrieMap::getInternal(const uint32_t key, const uint32_t hashedKey,
    248         const int bitmapEntryIndex, const int level) const {
    249     const int terminalEntryIndex = getTerminalEntryIndex(key, hashedKey,
    250             readEntry(bitmapEntryIndex), level);
    251     if (terminalEntryIndex == INVALID_INDEX) {
    252         // Not found.
    253         return Result(0, false, INVALID_INDEX);
    254     }
    255     const Entry terminalEntry = readEntry(terminalEntryIndex);
    256     if (!terminalEntry.hasTerminalLink()) {
    257         return Result(terminalEntry.getValue(), true, INVALID_INDEX);
    258     }
    259     const int valueEntryIndex = terminalEntry.getValueEntryIndex();
    260     const Entry valueEntry = readEntry(valueEntryIndex);
    261     return Result(valueEntry.getValueOfValueEntry(), true, valueEntryIndex + 1);
    262 }
    263 
    264 /**
    265  * Put key to value mapping to the map.
    266  *
    267  * @param key the key.
    268  * @param value the value
    269  * @param hashedKey the hashed key.
    270  * @param bitmapEntryIndex the index of bitmap entry
    271  * @param bitmapEntry the bitmap entry
    272  * @param level current level
    273  * @return whether the key-value has been correctly inserted to the map or not.
    274  */
    275 bool TrieMap::putInternal(const uint32_t key, const uint64_t value, const uint32_t hashedKey,
    276         const int bitmapEntryIndex, const Entry &bitmapEntry, const int level) {
    277     const int label = getLabel(hashedKey, level);
    278     const uint32_t bitmap = bitmapEntry.getBitmap();
    279     const int mapIndex = bitmapEntry.getTableIndex();
    280     if (!exists(bitmap, label)) {
    281         // Current map doesn't contain the label.
    282         return addNewEntryByExpandingTable(key, value, mapIndex, bitmap, bitmapEntryIndex, label);
    283     }
    284     const int entryIndex = mapIndex + popCount(bitmap, label);
    285     const Entry entry = readEntry(entryIndex);
    286     if (entry.isBitmapEntry()) {
    287         // Bitmap entry is found. Go to the next level.
    288         return putInternal(key, value, hashedKey, entryIndex, entry, level + 1);
    289     }
    290     if (entry.getKey() == key) {
    291         // Terminal entry for the key is found. Update the value.
    292         return updateValue(entry, value, entryIndex);
    293     }
    294     // Conflict with the existing key.
    295     return addNewEntryByResolvingConflict(key, value, hashedKey, entry, entryIndex, level);
    296 }
    297 
    298 /**
    299  * Resolve a conflict in the current level and add new entry.
    300  *
    301  * @param key the key
    302  * @param value the value
    303  * @param hashedKey the hashed key
    304  * @param conflictedEntry the existing conflicted entry
    305  * @param conflictedEntryIndex the index of existing conflicted entry
    306  * @param level current level
    307  * @return whether the key-value has been correctly inserted to the map or not.
    308  */
    309 bool TrieMap::addNewEntryByResolvingConflict(const uint32_t key, const uint64_t value,
    310         const uint32_t hashedKey, const Entry &conflictedEntry, const int conflictedEntryIndex,
    311         const int level) {
    312     const int conflictedKeyNextLabel =
    313             getLabel(getBitShuffledKey(conflictedEntry.getKey()), level + 1);
    314     const int nextLabel = getLabel(hashedKey, level + 1);
    315     if (conflictedKeyNextLabel == nextLabel) {
    316         // Conflicted again in the next level.
    317         const int newTableIndex = allocateTable(1 /* entryCount */);
    318         if (newTableIndex == INVALID_INDEX) {
    319             return false;
    320         }
    321         if (!writeEntry(conflictedEntry, newTableIndex)) {
    322             return false;
    323         }
    324         const Entry newBitmapEntry(setExist(0 /* bitmap */, nextLabel), newTableIndex);
    325         if (!writeEntry(newBitmapEntry, conflictedEntryIndex)) {
    326             return false;
    327         }
    328         return putInternal(key, value, hashedKey, conflictedEntryIndex, newBitmapEntry, level + 1);
    329     }
    330     // The conflict has been resolved. Create a table that contains 2 entries.
    331     const int newTableIndex = allocateTable(2 /* entryCount */);
    332     if (newTableIndex == INVALID_INDEX) {
    333         return false;
    334     }
    335     if (nextLabel < conflictedKeyNextLabel) {
    336         if (!writeTerminalEntry(key, value, newTableIndex)) {
    337             return false;
    338         }
    339         if (!writeEntry(conflictedEntry, newTableIndex + 1)) {
    340             return false;
    341         }
    342     } else { // nextLabel > conflictedKeyNextLabel
    343         if (!writeEntry(conflictedEntry, newTableIndex)) {
    344             return false;
    345         }
    346         if (!writeTerminalEntry(key, value, newTableIndex + 1)) {
    347             return false;
    348         }
    349     }
    350     const uint32_t updatedBitmap =
    351             setExist(setExist(0 /* bitmap */, nextLabel), conflictedKeyNextLabel);
    352     return writeEntry(Entry(updatedBitmap, newTableIndex), conflictedEntryIndex);
    353 }
    354 
    355 /**
    356  * Add new entry to the existing table.
    357  */
    358 bool TrieMap::addNewEntryByExpandingTable(const uint32_t key, const uint64_t value,
    359         const int tableIndex, const uint32_t bitmap, const int bitmapEntryIndex, const int label) {
    360     // Current map doesn't contain the label.
    361     const int entryCount = popCount(bitmap);
    362     const int newTableIndex = allocateTable(entryCount + 1);
    363     if (newTableIndex == INVALID_INDEX) {
    364         return false;
    365     }
    366     const int newEntryIndexInTable = popCount(bitmap, label);
    367     // Copy from existing table to the new table.
    368     for (int i = 0; i < entryCount; ++i) {
    369         if (!copyEntry(tableIndex + i, newTableIndex + i + (i >= newEntryIndexInTable ? 1 : 0))) {
    370             return false;
    371         }
    372     }
    373     // Write new terminal entry.
    374     if (!writeTerminalEntry(key, value, newTableIndex + newEntryIndexInTable)) {
    375         return false;
    376     }
    377     // Update bitmap.
    378     if (!writeEntry(Entry(setExist(bitmap, label), newTableIndex), bitmapEntryIndex)) {
    379         return false;
    380     }
    381     if (entryCount > 0) {
    382         return freeTable(tableIndex, entryCount);
    383     }
    384     return true;
    385 }
    386 
    387 }  // namespace latinime
    388