Home | History | Annotate | Download | only in utils
      1 /*
      2  * Copyright (C) 2014, The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *     http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #ifndef LATINIME_TRIE_MAP_H
     18 #define LATINIME_TRIE_MAP_H
     19 
     20 #include <climits>
     21 #include <cstdint>
     22 #include <cstdio>
     23 #include <vector>
     24 
     25 #include "defines.h"
     26 #include "dictionary/utils/buffer_with_extendable_buffer.h"
     27 #include "utils/byte_array_view.h"
     28 
     29 namespace latinime {
     30 
     31 /**
     32  * Trie map derived from Phil Bagwell's Hash Array Mapped Trie.
     33  * key is int and value is uint64_t.
     34  * This supports multiple level map. Terminal entries can have a bitmap for the next level map.
     35  * This doesn't support root map resizing.
     36  */
     37 class TrieMap {
     38  public:
     39     struct Result {
     40         const uint64_t mValue;
     41         const bool mIsValid;
     42         const int mNextLevelBitmapEntryIndex;
     43 
     44         Result(const uint64_t value, const bool isValid, const int nextLevelBitmapEntryIndex)
     45                 : mValue(value), mIsValid(isValid),
     46                   mNextLevelBitmapEntryIndex(nextLevelBitmapEntryIndex) {}
     47     };
     48 
     49     /**
     50      * Struct to record iteration state in a table.
     51      */
     52     struct TableIterationState {
     53         int mTableSize;
     54         int mTableIndex;
     55         int mCurrentIndex;
     56 
     57         TableIterationState(const int tableSize, const int tableIndex)
     58                 : mTableSize(tableSize), mTableIndex(tableIndex), mCurrentIndex(0) {}
     59     };
     60 
     61     class TrieMapRange;
     62     class TrieMapIterator {
     63      public:
     64         class IterationResult {
     65          public:
     66             IterationResult(const TrieMap *const trieMap, const int key, const uint64_t value,
     67                     const int nextLeveBitmapEntryIndex)
     68                     : mTrieMap(trieMap), mKey(key), mValue(value),
     69                       mNextLevelBitmapEntryIndex(nextLeveBitmapEntryIndex) {}
     70 
     71             const TrieMapRange getEntriesInNextLevel() const {
     72                 return TrieMapRange(mTrieMap, mNextLevelBitmapEntryIndex);
     73             }
     74 
     75             bool hasNextLevelMap() const {
     76                 return mNextLevelBitmapEntryIndex != INVALID_INDEX;
     77             }
     78 
     79             AK_FORCE_INLINE int key() const {
     80                 return mKey;
     81             }
     82 
     83             AK_FORCE_INLINE uint64_t value() const {
     84                 return mValue;
     85             }
     86 
     87             AK_FORCE_INLINE int getNextLevelBitmapEntryIndex() const {
     88                 return mNextLevelBitmapEntryIndex;
     89             }
     90 
     91          private:
     92             const TrieMap *const mTrieMap;
     93             const int mKey;
     94             const uint64_t mValue;
     95             const int mNextLevelBitmapEntryIndex;
     96         };
     97 
     98         TrieMapIterator(const TrieMap *const trieMap, const int bitmapEntryIndex)
     99                 : mTrieMap(trieMap), mStateStack(), mBaseBitmapEntryIndex(bitmapEntryIndex),
    100                   mKey(0), mValue(0), mIsValid(false), mNextLevelBitmapEntryIndex(INVALID_INDEX) {
    101             if (!trieMap || mBaseBitmapEntryIndex == INVALID_INDEX) {
    102                 return;
    103             }
    104             const Entry bitmapEntry = mTrieMap->readEntry(mBaseBitmapEntryIndex);
    105             mStateStack.emplace_back(
    106                     mTrieMap->popCount(bitmapEntry.getBitmap()), bitmapEntry.getTableIndex());
    107             this->operator++();
    108         }
    109 
    110         const IterationResult operator*() const {
    111             return IterationResult(mTrieMap, mKey, mValue, mNextLevelBitmapEntryIndex);
    112         }
    113 
    114         bool operator!=(const TrieMapIterator &other) const {
    115             // Caveat: This works only for for loops.
    116             return mIsValid || other.mIsValid;
    117         }
    118 
    119         const TrieMapIterator &operator++() {
    120             const Result result = mTrieMap->iterateNext(&mStateStack, &mKey);
    121             mValue = result.mValue;
    122             mIsValid = result.mIsValid;
    123             mNextLevelBitmapEntryIndex = result.mNextLevelBitmapEntryIndex;
    124             return *this;
    125         }
    126 
    127      private:
    128         DISALLOW_DEFAULT_CONSTRUCTOR(TrieMapIterator);
    129         DISALLOW_ASSIGNMENT_OPERATOR(TrieMapIterator);
    130 
    131         const TrieMap *const mTrieMap;
    132         std::vector<TrieMap::TableIterationState> mStateStack;
    133         const int mBaseBitmapEntryIndex;
    134         int mKey;
    135         uint64_t mValue;
    136         bool mIsValid;
    137         int mNextLevelBitmapEntryIndex;
    138     };
    139 
    140     /**
    141      * Class to support iterating entries in TrieMap by range base for loops.
    142      */
    143     class TrieMapRange {
    144      public:
    145         TrieMapRange(const TrieMap *const trieMap, const int bitmapEntryIndex)
    146                 : mTrieMap(trieMap), mBaseBitmapEntryIndex(bitmapEntryIndex) {};
    147 
    148         TrieMapIterator begin() const {
    149             return TrieMapIterator(mTrieMap, mBaseBitmapEntryIndex);
    150         }
    151 
    152         const TrieMapIterator end() const {
    153             return TrieMapIterator(nullptr, INVALID_INDEX);
    154         }
    155 
    156      private:
    157         DISALLOW_DEFAULT_CONSTRUCTOR(TrieMapRange);
    158         DISALLOW_ASSIGNMENT_OPERATOR(TrieMapRange);
    159 
    160         const TrieMap *const mTrieMap;
    161         const int mBaseBitmapEntryIndex;
    162     };
    163 
    164     static const int INVALID_INDEX;
    165     static const uint64_t MAX_VALUE;
    166 
    167     TrieMap();
    168     // Construct TrieMap using existing data in the memory region written by save().
    169     TrieMap(const ReadWriteByteArrayView buffer);
    170     void dump(const int from = 0, const int to = 0) const;
    171 
    172     bool isNearSizeLimit() const {
    173         return mBuffer.isNearSizeLimit();
    174     }
    175 
    176     int getRootBitmapEntryIndex() const {
    177         return ROOT_BITMAP_ENTRY_INDEX;
    178     }
    179 
    180     // Returns bitmapEntryIndex. Create the next level map if it doesn't exist.
    181     int getNextLevelBitmapEntryIndex(const int key) {
    182         return getNextLevelBitmapEntryIndex(key, ROOT_BITMAP_ENTRY_INDEX);
    183     }
    184 
    185     int getNextLevelBitmapEntryIndex(const int key, const int bitmapEntryIndex);
    186 
    187     const Result getRoot(const int key) const {
    188         return get(key, ROOT_BITMAP_ENTRY_INDEX);
    189     }
    190 
    191     const Result get(const int key, const int bitmapEntryIndex) const;
    192 
    193     bool putRoot(const int key, const uint64_t value) {
    194         return put(key, value, ROOT_BITMAP_ENTRY_INDEX);
    195     }
    196 
    197     bool put(const int key, const uint64_t value, const int bitmapEntryIndex);
    198 
    199     const TrieMapRange getEntriesInRootLevel() const {
    200         return getEntriesInSpecifiedLevel(ROOT_BITMAP_ENTRY_INDEX);
    201     }
    202 
    203     const TrieMapRange getEntriesInSpecifiedLevel(const int bitmapEntryIndex) const {
    204         return TrieMapRange(this, bitmapEntryIndex);
    205     }
    206 
    207     bool save(FILE *const file) const;
    208 
    209     bool remove(const int key, const int bitmapEntryIndex);
    210 
    211  private:
    212     DISALLOW_COPY_AND_ASSIGN(TrieMap);
    213 
    214     /**
    215      * Struct represents an entry.
    216      *
    217      * Entry is one of these entry types. All entries are fixed size and have 2 fields FIELD_0 and
    218      * FIELD_1.
    219      * 1. bitmap entry. bitmap entry contains bitmap and the link to hash table.
    220      *   FIELD_0(bitmap) FIELD_1(LINK_TO_HASH_TABLE)
    221      * 2. terminal entry. terminal entry contains hashed key and value or terminal link. terminal
    222      * entry have terminal link when the value is not fit to FIELD_1 or there is a next level map
    223      * for the key.
    224      *   FIELD_0(hashed key) (FIELD_1(VALUE_FLAG VALUE) | FIELD_1(TERMINAL_LINK_FLAG TERMINAL_LINK))
    225      * 3. value entry. value entry represents a value. Upper order bytes are stored in FIELD_0 and
    226      * lower order bytes are stored in FIELD_1.
    227      *   FIELD_0(value (upper order bytes)) FIELD_1(value (lower order bytes))
    228      */
    229     struct Entry {
    230         const uint32_t mData0;
    231         const uint32_t mData1;
    232 
    233         Entry(const uint32_t data0, const uint32_t data1) : mData0(data0), mData1(data1) {}
    234 
    235         AK_FORCE_INLINE bool isBitmapEntry() const {
    236             return (mData1 & VALUE_FLAG) == 0 && (mData1 & TERMINAL_LINK_FLAG) == 0;
    237         }
    238 
    239         AK_FORCE_INLINE bool hasTerminalLink() const {
    240             return (mData1 & TERMINAL_LINK_FLAG) != 0;
    241         }
    242 
    243         // For terminal entry.
    244         AK_FORCE_INLINE uint32_t getKey() const {
    245             return mData0;
    246         }
    247 
    248         // For terminal entry.
    249         AK_FORCE_INLINE uint32_t getValue() const {
    250             return mData1 & VALUE_MASK;
    251         }
    252 
    253         // For terminal entry.
    254         AK_FORCE_INLINE bool isValidTerminalEntry() const {
    255             return hasTerminalLink() || ((mData1 & VALUE_MASK) != INVALID_VALUE_IN_KEY_VALUE_ENTRY);
    256         }
    257 
    258         // For terminal entry.
    259         AK_FORCE_INLINE uint32_t getValueEntryIndex() const {
    260             return mData1 & TERMINAL_LINK_MASK;
    261         }
    262 
    263         // For bitmap entry.
    264         AK_FORCE_INLINE uint32_t getBitmap() const {
    265             return mData0;
    266         }
    267 
    268         // For bitmap entry.
    269         AK_FORCE_INLINE int getTableIndex() const {
    270             return static_cast<int>(mData1);
    271         }
    272 
    273         // For value entry.
    274         AK_FORCE_INLINE uint64_t getValueOfValueEntry() const {
    275             return ((static_cast<uint64_t>(mData0) << (FIELD1_SIZE * CHAR_BIT)) ^ mData1);
    276         }
    277     };
    278 
    279     BufferWithExtendableBuffer mBuffer;
    280 
    281     static const int FIELD0_SIZE;
    282     static const int FIELD1_SIZE;
    283     static const int ENTRY_SIZE;
    284     static const uint32_t VALUE_FLAG;
    285     static const uint32_t VALUE_MASK;
    286     static const uint32_t INVALID_VALUE_IN_KEY_VALUE_ENTRY;
    287     static const uint32_t TERMINAL_LINK_FLAG;
    288     static const uint32_t TERMINAL_LINK_MASK;
    289     static const int NUM_OF_BITS_USED_FOR_ONE_LEVEL;
    290     static const uint32_t LABEL_MASK;
    291     static const int MAX_NUM_OF_ENTRIES_IN_ONE_LEVEL;
    292     static const int ROOT_BITMAP_ENTRY_INDEX;
    293     static const int ROOT_BITMAP_ENTRY_POS;
    294     static const Entry EMPTY_BITMAP_ENTRY;
    295     static const int TERMINAL_LINKED_ENTRY_COUNT;
    296     static const int MAX_BUFFER_SIZE;
    297 
    298     uint32_t getBitShuffledKey(const uint32_t key) const;
    299     bool writeValue(const uint64_t value, const int terminalEntryIndex);
    300     bool updateValue(const Entry &terminalEntry, const uint64_t value,
    301             const int terminalEntryIndex);
    302     bool freeTable(const int tableIndex, const int entryCount);
    303     int allocateTable(const int entryCount);
    304     int getTerminalEntryIndex(const uint32_t key, const uint32_t hashedKey,
    305             const Entry &bitmapEntry, const int level) const;
    306     const Result getInternal(const uint32_t key, const uint32_t hashedKey,
    307             const int bitmapEntryIndex, const int level) const;
    308     bool putInternal(const uint32_t key, const uint64_t value, const uint32_t hashedKey,
    309             const int bitmapEntryIndex, const Entry &bitmapEntry, const int level);
    310     bool addNewEntryByResolvingConflict(const uint32_t key, const uint64_t value,
    311             const uint32_t hashedKey, const Entry &conflictedEntry, const int conflictedEntryIndex,
    312             const int level);
    313     bool addNewEntryByExpandingTable(const uint32_t key, const uint64_t value,
    314             const int tableIndex, const uint32_t bitmap, const int bitmapEntryIndex,
    315             const int label);
    316     const Result iterateNext(std::vector<TableIterationState> *const iterationState,
    317             int *const outKey) const;
    318 
    319     AK_FORCE_INLINE const Entry readEntry(const int entryIndex) const {
    320         return Entry(readField0(entryIndex), readField1(entryIndex));
    321     }
    322 
    323     // Returns whether an entry for the index is existing by testing if the index-th bit in the
    324     // bitmap is set or not.
    325     AK_FORCE_INLINE bool exists(const uint32_t bitmap, const int index) const {
    326         return (bitmap & (1 << index)) != 0;
    327     }
    328 
    329     // Set index-th bit in the bitmap.
    330     AK_FORCE_INLINE uint32_t setExist(const uint32_t bitmap, const int index) const {
    331         return bitmap | (1 << index);
    332     }
    333 
    334     // Count set bits before index in the bitmap.
    335     AK_FORCE_INLINE int popCount(const uint32_t bitmap, const int index) const {
    336         return popCount(bitmap & ((1 << index) - 1));
    337     }
    338 
    339     // Count set bits in the bitmap.
    340     AK_FORCE_INLINE int popCount(const uint32_t bitmap) const {
    341         return __builtin_popcount(bitmap);
    342         // int v = bitmap - ((bitmap >> 1) & 0x55555555);
    343         // v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
    344         // return (((v + (v >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24;
    345     }
    346 
    347     AK_FORCE_INLINE int getLabel(const uint32_t hashedKey, const int level) const {
    348         return (hashedKey >> (level * NUM_OF_BITS_USED_FOR_ONE_LEVEL)) & LABEL_MASK;
    349     }
    350 
    351     AK_FORCE_INLINE uint32_t readField0(const int entryIndex) const {
    352         return mBuffer.readUint(FIELD0_SIZE, ROOT_BITMAP_ENTRY_POS + entryIndex * ENTRY_SIZE);
    353     }
    354 
    355     AK_FORCE_INLINE uint32_t readField1(const int entryIndex) const {
    356         return mBuffer.readUint(FIELD1_SIZE,
    357                 ROOT_BITMAP_ENTRY_POS + entryIndex * ENTRY_SIZE + FIELD0_SIZE);
    358     }
    359 
    360     AK_FORCE_INLINE int readEmptyTableLink(const int entryCount) const {
    361         return mBuffer.readUint(FIELD1_SIZE, (entryCount - 1) * FIELD1_SIZE);
    362     }
    363 
    364     AK_FORCE_INLINE bool writeEmptyTableLink(const int tableIndex, const int entryCount) {
    365         return mBuffer.writeUint(tableIndex, FIELD1_SIZE, (entryCount - 1) * FIELD1_SIZE);
    366     }
    367 
    368     AK_FORCE_INLINE bool writeField0(const uint32_t data, const int entryIndex) {
    369         return mBuffer.writeUint(data, FIELD0_SIZE,
    370                 ROOT_BITMAP_ENTRY_POS + entryIndex * ENTRY_SIZE);
    371     }
    372 
    373     AK_FORCE_INLINE bool writeField1(const uint32_t data, const int entryIndex) {
    374         return mBuffer.writeUint(data, FIELD1_SIZE,
    375                 ROOT_BITMAP_ENTRY_POS + entryIndex * ENTRY_SIZE + FIELD0_SIZE);
    376     }
    377 
    378     AK_FORCE_INLINE bool writeEntry(const Entry &entry, const int entryIndex) {
    379         return writeField0(entry.mData0, entryIndex) && writeField1(entry.mData1, entryIndex);
    380     }
    381 
    382     AK_FORCE_INLINE bool writeTerminalEntry(const uint32_t key, const uint64_t value,
    383             const int entryIndex) {
    384         return writeField0(key, entryIndex) && writeValue(value, entryIndex);
    385     }
    386 
    387     AK_FORCE_INLINE bool copyEntry(const int originalEntryIndex, const int newEntryIndex) {
    388         return writeEntry(readEntry(originalEntryIndex), newEntryIndex);
    389     }
    390 
    391     AK_FORCE_INLINE int getTailEntryIndex() const {
    392         return (mBuffer.getTailPosition() - ROOT_BITMAP_ENTRY_POS) / ENTRY_SIZE;
    393     }
    394 
    395     bool removeInner(const Entry &bitmapEntry);
    396 };
    397 
    398 } // namespace latinime
    399 #endif /* LATINIME_TRIE_MAP_H */
    400