Home | History | Annotate | Download | only in utils
      1 /*
      2  * Copyright (C) 2014, The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *     http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #ifndef LATINIME_TRIE_MAP_H
     18 #define LATINIME_TRIE_MAP_H
     19 
     20 #include <climits>
     21 #include <cstdint>
     22 #include <cstdio>
     23 #include <vector>
     24 
     25 #include "defines.h"
     26 #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
     27 #include "utils/byte_array_view.h"
     28 
     29 namespace latinime {
     30 
     31 /**
     32  * Trie map derived from Phil Bagwell's Hash Array Mapped Trie.
     33  * key is int and value is uint64_t.
     34  * This supports multiple level map. Terminal entries can have a bitmap for the next level map.
     35  * This doesn't support root map resizing.
     36  */
     37 class TrieMap {
     38  public:
     39     struct Result {
     40         const uint64_t mValue;
     41         const bool mIsValid;
     42         const int mNextLevelBitmapEntryIndex;
     43 
     44         Result(const uint64_t value, const bool isValid, const int nextLevelBitmapEntryIndex)
     45                 : mValue(value), mIsValid(isValid),
     46                   mNextLevelBitmapEntryIndex(nextLevelBitmapEntryIndex) {}
     47     };
     48 
     49     /**
     50      * Struct to record iteration state in a table.
     51      */
     52     struct TableIterationState {
     53         int mTableSize;
     54         int mTableIndex;
     55         int mCurrentIndex;
     56 
     57         TableIterationState(const int tableSize, const int tableIndex)
     58                 : mTableSize(tableSize), mTableIndex(tableIndex), mCurrentIndex(0) {}
     59     };
     60 
     61     class TrieMapRange;
     62     class TrieMapIterator {
     63      public:
     64         class IterationResult {
     65          public:
     66             IterationResult(const TrieMap *const trieMap, const int key, const uint64_t value,
     67                     const int nextLeveBitmapEntryIndex)
     68                     : mTrieMap(trieMap), mKey(key), mValue(value),
     69                       mNextLevelBitmapEntryIndex(nextLeveBitmapEntryIndex) {}
     70 
     71             const TrieMapRange getEntriesInNextLevel() const {
     72                 return TrieMapRange(mTrieMap, mNextLevelBitmapEntryIndex);
     73             }
     74 
     75             bool hasNextLevelMap() const {
     76                 return mNextLevelBitmapEntryIndex != INVALID_INDEX;
     77             }
     78 
     79             AK_FORCE_INLINE int key() const {
     80                 return mKey;
     81             }
     82 
     83             AK_FORCE_INLINE uint64_t value() const {
     84                 return mValue;
     85             }
     86 
     87          private:
     88             const TrieMap *const mTrieMap;
     89             const int mKey;
     90             const uint64_t mValue;
     91             const int mNextLevelBitmapEntryIndex;
     92         };
     93 
     94         TrieMapIterator(const TrieMap *const trieMap, const int bitmapEntryIndex)
     95                 : mTrieMap(trieMap), mStateStack(), mBaseBitmapEntryIndex(bitmapEntryIndex),
     96                   mKey(0), mValue(0), mIsValid(false), mNextLevelBitmapEntryIndex(INVALID_INDEX) {
     97             if (!trieMap) {
     98                 return;
     99             }
    100             const Entry bitmapEntry = mTrieMap->readEntry(mBaseBitmapEntryIndex);
    101             mStateStack.emplace_back(
    102                     mTrieMap->popCount(bitmapEntry.getBitmap()), bitmapEntry.getTableIndex());
    103             this->operator++();
    104         }
    105 
    106         const IterationResult operator*() const {
    107             return IterationResult(mTrieMap, mKey, mValue, mNextLevelBitmapEntryIndex);
    108         }
    109 
    110         bool operator!=(const TrieMapIterator &other) const {
    111             // Caveat: This works only for for loops.
    112             return mIsValid || other.mIsValid;
    113         }
    114 
    115         const TrieMapIterator &operator++() {
    116             const Result result = mTrieMap->iterateNext(&mStateStack, &mKey);
    117             mValue = result.mValue;
    118             mIsValid = result.mIsValid;
    119             mNextLevelBitmapEntryIndex = result.mNextLevelBitmapEntryIndex;
    120             return *this;
    121         }
    122 
    123      private:
    124         DISALLOW_DEFAULT_CONSTRUCTOR(TrieMapIterator);
    125         DISALLOW_ASSIGNMENT_OPERATOR(TrieMapIterator);
    126 
    127         const TrieMap *const mTrieMap;
    128         std::vector<TrieMap::TableIterationState> mStateStack;
    129         const int mBaseBitmapEntryIndex;
    130         int mKey;
    131         uint64_t mValue;
    132         bool mIsValid;
    133         int mNextLevelBitmapEntryIndex;
    134     };
    135 
    136     /**
    137      * Class to support iterating entries in TrieMap by range base for loops.
    138      */
    139     class TrieMapRange {
    140      public:
    141         TrieMapRange(const TrieMap *const trieMap, const int bitmapEntryIndex)
    142                 : mTrieMap(trieMap), mBaseBitmapEntryIndex(bitmapEntryIndex) {};
    143 
    144         TrieMapIterator begin() const {
    145             return TrieMapIterator(mTrieMap, mBaseBitmapEntryIndex);
    146         }
    147 
    148         const TrieMapIterator end() const {
    149             return TrieMapIterator(nullptr, INVALID_INDEX);
    150         }
    151 
    152      private:
    153         DISALLOW_DEFAULT_CONSTRUCTOR(TrieMapRange);
    154         DISALLOW_ASSIGNMENT_OPERATOR(TrieMapRange);
    155 
    156         const TrieMap *const mTrieMap;
    157         const int mBaseBitmapEntryIndex;
    158     };
    159 
    160     static const int INVALID_INDEX;
    161     static const uint64_t MAX_VALUE;
    162 
    163     TrieMap();
    164     // Construct TrieMap using existing data in the memory region written by save().
    165     TrieMap(const ReadWriteByteArrayView buffer);
    166     void dump(const int from = 0, const int to = 0) const;
    167 
    168     bool isNearSizeLimit() const {
    169         return mBuffer.isNearSizeLimit();
    170     }
    171 
    172     int getRootBitmapEntryIndex() const {
    173         return ROOT_BITMAP_ENTRY_INDEX;
    174     }
    175 
    176     // Returns bitmapEntryIndex. Create the next level map if it doesn't exist.
    177     int getNextLevelBitmapEntryIndex(const int key) {
    178         return getNextLevelBitmapEntryIndex(key, ROOT_BITMAP_ENTRY_INDEX);
    179     }
    180 
    181     int getNextLevelBitmapEntryIndex(const int key, const int bitmapEntryIndex);
    182 
    183     const Result getRoot(const int key) const {
    184         return get(key, ROOT_BITMAP_ENTRY_INDEX);
    185     }
    186 
    187     const Result get(const int key, const int bitmapEntryIndex) const;
    188 
    189     bool putRoot(const int key, const uint64_t value) {
    190         return put(key, value, ROOT_BITMAP_ENTRY_INDEX);
    191     }
    192 
    193     bool put(const int key, const uint64_t value, const int bitmapEntryIndex);
    194 
    195     const TrieMapRange getEntriesInRootLevel() const {
    196         return getEntriesInSpecifiedLevel(ROOT_BITMAP_ENTRY_INDEX);
    197     }
    198 
    199     const TrieMapRange getEntriesInSpecifiedLevel(const int bitmapEntryIndex) const {
    200         return TrieMapRange(this, bitmapEntryIndex);
    201     }
    202 
    203     bool save(FILE *const file) const;
    204 
    205  private:
    206     DISALLOW_COPY_AND_ASSIGN(TrieMap);
    207 
    208     /**
    209      * Struct represents an entry.
    210      *
    211      * Entry is one of these entry types. All entries are fixed size and have 2 fields FIELD_0 and
    212      * FIELD_1.
    213      * 1. bitmap entry. bitmap entry contains bitmap and the link to hash table.
    214      *   FIELD_0(bitmap) FIELD_1(LINK_TO_HASH_TABLE)
    215      * 2. terminal entry. terminal entry contains hashed key and value or terminal link. terminal
    216      * entry have terminal link when the value is not fit to FIELD_1 or there is a next level map
    217      * for the key.
    218      *   FIELD_0(hashed key) (FIELD_1(VALUE_FLAG VALUE) | FIELD_1(TERMINAL_LINK_FLAG TERMINAL_LINK))
    219      * 3. value entry. value entry represents a value. Upper order bytes are stored in FIELD_0 and
    220      * lower order bytes are stored in FIELD_1.
    221      *   FIELD_0(value (upper order bytes)) FIELD_1(value (lower order bytes))
    222      */
    223     struct Entry {
    224         const uint32_t mData0;
    225         const uint32_t mData1;
    226 
    227         Entry(const uint32_t data0, const uint32_t data1) : mData0(data0), mData1(data1) {}
    228 
    229         AK_FORCE_INLINE bool isBitmapEntry() const {
    230             return (mData1 & VALUE_FLAG) == 0 && (mData1 & TERMINAL_LINK_FLAG) == 0;
    231         }
    232 
    233         AK_FORCE_INLINE bool hasTerminalLink() const {
    234             return (mData1 & TERMINAL_LINK_FLAG) != 0;
    235         }
    236 
    237         // For terminal entry.
    238         AK_FORCE_INLINE uint32_t getKey() const {
    239             return mData0;
    240         }
    241 
    242         // For terminal entry.
    243         AK_FORCE_INLINE uint32_t getValue() const {
    244             return mData1 & VALUE_MASK;
    245         }
    246 
    247         // For terminal entry.
    248         AK_FORCE_INLINE uint32_t getValueEntryIndex() const {
    249             return mData1 & TERMINAL_LINK_MASK;
    250         }
    251 
    252         // For bitmap entry.
    253         AK_FORCE_INLINE uint32_t getBitmap() const {
    254             return mData0;
    255         }
    256 
    257         // For bitmap entry.
    258         AK_FORCE_INLINE int getTableIndex() const {
    259             return static_cast<int>(mData1);
    260         }
    261 
    262         // For value entry.
    263         AK_FORCE_INLINE uint64_t getValueOfValueEntry() const {
    264             return ((static_cast<uint64_t>(mData0) << (FIELD1_SIZE * CHAR_BIT)) ^ mData1);
    265         }
    266     };
    267 
    268     BufferWithExtendableBuffer mBuffer;
    269 
    270     static const int FIELD0_SIZE;
    271     static const int FIELD1_SIZE;
    272     static const int ENTRY_SIZE;
    273     static const uint32_t VALUE_FLAG;
    274     static const uint32_t VALUE_MASK;
    275     static const uint32_t TERMINAL_LINK_FLAG;
    276     static const uint32_t TERMINAL_LINK_MASK;
    277     static const int NUM_OF_BITS_USED_FOR_ONE_LEVEL;
    278     static const uint32_t LABEL_MASK;
    279     static const int MAX_NUM_OF_ENTRIES_IN_ONE_LEVEL;
    280     static const int ROOT_BITMAP_ENTRY_INDEX;
    281     static const int ROOT_BITMAP_ENTRY_POS;
    282     static const Entry EMPTY_BITMAP_ENTRY;
    283     static const int MAX_BUFFER_SIZE;
    284 
    285     uint32_t getBitShuffledKey(const uint32_t key) const;
    286     bool writeValue(const uint64_t value, const int terminalEntryIndex);
    287     bool updateValue(const Entry &terminalEntry, const uint64_t value,
    288             const int terminalEntryIndex);
    289     bool freeTable(const int tableIndex, const int entryCount);
    290     int allocateTable(const int entryCount);
    291     int getTerminalEntryIndex(const uint32_t key, const uint32_t hashedKey,
    292             const Entry &bitmapEntry, const int level) const;
    293     const Result getInternal(const uint32_t key, const uint32_t hashedKey,
    294             const int bitmapEntryIndex, const int level) const;
    295     bool putInternal(const uint32_t key, const uint64_t value, const uint32_t hashedKey,
    296             const int bitmapEntryIndex, const Entry &bitmapEntry, const int level);
    297     bool addNewEntryByResolvingConflict(const uint32_t key, const uint64_t value,
    298             const uint32_t hashedKey, const Entry &conflictedEntry, const int conflictedEntryIndex,
    299             const int level);
    300     bool addNewEntryByExpandingTable(const uint32_t key, const uint64_t value,
    301             const int tableIndex, const uint32_t bitmap, const int bitmapEntryIndex,
    302             const int label);
    303     const Result iterateNext(std::vector<TableIterationState> *const iterationState,
    304             int *const outKey) const;
    305 
    306     AK_FORCE_INLINE const Entry readEntry(const int entryIndex) const {
    307         return Entry(readField0(entryIndex), readField1(entryIndex));
    308     }
    309 
    310     // Returns whether an entry for the index is existing by testing if the index-th bit in the
    311     // bitmap is set or not.
    312     AK_FORCE_INLINE bool exists(const uint32_t bitmap, const int index) const {
    313         return (bitmap & (1 << index)) != 0;
    314     }
    315 
    316     // Set index-th bit in the bitmap.
    317     AK_FORCE_INLINE uint32_t setExist(const uint32_t bitmap, const int index) const {
    318         return bitmap | (1 << index);
    319     }
    320 
    321     // Count set bits before index in the bitmap.
    322     AK_FORCE_INLINE int popCount(const uint32_t bitmap, const int index) const {
    323         return popCount(bitmap & ((1 << index) - 1));
    324     }
    325 
    326     // Count set bits in the bitmap.
    327     AK_FORCE_INLINE int popCount(const uint32_t bitmap) const {
    328         return __builtin_popcount(bitmap);
    329         // int v = bitmap - ((bitmap >> 1) & 0x55555555);
    330         // v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
    331         // return (((v + (v >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24;
    332     }
    333 
    334     AK_FORCE_INLINE int getLabel(const uint32_t hashedKey, const int level) const {
    335         return (hashedKey >> (level * NUM_OF_BITS_USED_FOR_ONE_LEVEL)) & LABEL_MASK;
    336     }
    337 
    338     AK_FORCE_INLINE uint32_t readField0(const int entryIndex) const {
    339         return mBuffer.readUint(FIELD0_SIZE, ROOT_BITMAP_ENTRY_POS + entryIndex * ENTRY_SIZE);
    340     }
    341 
    342     AK_FORCE_INLINE uint32_t readField1(const int entryIndex) const {
    343         return mBuffer.readUint(FIELD1_SIZE,
    344                 ROOT_BITMAP_ENTRY_POS + entryIndex * ENTRY_SIZE + FIELD0_SIZE);
    345     }
    346 
    347     AK_FORCE_INLINE int readEmptyTableLink(const int entryCount) const {
    348         return mBuffer.readUint(FIELD1_SIZE, (entryCount - 1) * FIELD1_SIZE);
    349     }
    350 
    351     AK_FORCE_INLINE bool writeEmptyTableLink(const int tableIndex, const int entryCount) {
    352         return mBuffer.writeUint(tableIndex, FIELD1_SIZE, (entryCount - 1) * FIELD1_SIZE);
    353     }
    354 
    355     AK_FORCE_INLINE bool writeField0(const uint32_t data, const int entryIndex) {
    356         return mBuffer.writeUint(data, FIELD0_SIZE,
    357                 ROOT_BITMAP_ENTRY_POS + entryIndex * ENTRY_SIZE);
    358     }
    359 
    360     AK_FORCE_INLINE bool writeField1(const uint32_t data, const int entryIndex) {
    361         return mBuffer.writeUint(data, FIELD1_SIZE,
    362                 ROOT_BITMAP_ENTRY_POS + entryIndex * ENTRY_SIZE + FIELD0_SIZE);
    363     }
    364 
    365     AK_FORCE_INLINE bool writeEntry(const Entry &entry, const int entryIndex) {
    366         return writeField0(entry.mData0, entryIndex) && writeField1(entry.mData1, entryIndex);
    367     }
    368 
    369     AK_FORCE_INLINE bool writeTerminalEntry(const uint32_t key, const uint64_t value,
    370             const int entryIndex) {
    371         return writeField0(key, entryIndex) && writeValue(value, entryIndex);
    372     }
    373 
    374     AK_FORCE_INLINE bool copyEntry(const int originalEntryIndex, const int newEntryIndex) {
    375         return writeEntry(readEntry(originalEntryIndex), newEntryIndex);
    376     }
    377 
    378     AK_FORCE_INLINE int getTailEntryIndex() const {
    379         return (mBuffer.getTailPosition() - ROOT_BITMAP_ENTRY_POS) / ENTRY_SIZE;
    380     }
    381 };
    382 
    383 } // namespace latinime
    384 #endif /* LATINIME_TRIE_MAP_H */
    385