1 /* 2 * Copyright (C) 2013, The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H 18 #define LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H 19 20 #include <stdint.h> 21 22 #include "defines.h" 23 #include "utils/hash_map_compat.h" 24 25 namespace latinime { 26 27 class BufferWithExtendableBuffer; 28 class DynamicBigramListPolicy; 29 class DynamicPatriciaTrieNodeReader; 30 class DynamicPatriciaTrieReadingHelper; 31 class DynamicShortcutListPolicy; 32 class HeaderPolicy; 33 34 class DynamicPatriciaTrieWritingHelper { 35 public: 36 typedef hash_map_compat<int, int> PtNodeArrayPositionRelocationMap; 37 typedef hash_map_compat<int, int> PtNodePositionRelocationMap; 38 struct DictPositionRelocationMap { 39 public: 40 DictPositionRelocationMap() 41 : mPtNodeArrayPositionRelocationMap(), mPtNodePositionRelocationMap() {} 42 43 PtNodeArrayPositionRelocationMap mPtNodeArrayPositionRelocationMap; 44 PtNodePositionRelocationMap mPtNodePositionRelocationMap; 45 46 private: 47 DISALLOW_COPY_AND_ASSIGN(DictPositionRelocationMap); 48 }; 49 50 static const size_t MAX_DICTIONARY_SIZE; 51 52 DynamicPatriciaTrieWritingHelper(BufferWithExtendableBuffer *const buffer, 53 DynamicBigramListPolicy *const bigramPolicy, 54 DynamicShortcutListPolicy *const shortcutPolicy, const bool needsToDecay) 55 : mBuffer(buffer), mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy), 56 mNeedsToDecay(needsToDecay) {} 57 58 ~DynamicPatriciaTrieWritingHelper() {} 59 60 // Add a word to the dictionary. If the word already exists, update the probability. 61 bool addUnigramWord(DynamicPatriciaTrieReadingHelper *const readingHelper, 62 const int *const wordCodePoints, const int codePointCount, const int probability, 63 bool *const outAddedNewUnigram); 64 65 // Add a bigram relation from word0Pos to word1Pos. 66 bool addBigramWords(const int word0Pos, const int word1Pos, const int probability, 67 bool *const outAddedNewBigram); 68 69 // Remove a bigram relation from word0Pos to word1Pos. 70 bool removeBigramWords(const int word0Pos, const int word1Pos); 71 72 void writeToDictFile(const char *const fileName, const HeaderPolicy *const headerPolicy, 73 const int unigramCount, const int bigramCount); 74 75 void writeToDictFileWithGC(const int rootPtNodeArrayPos, const char *const fileName, 76 const HeaderPolicy *const headerPolicy); 77 78 // CAVEAT: This method must be called only from inner classes of 79 // DynamicPatriciaTrieGcEventListeners. 80 bool markNodeAsDeleted(const DynamicPatriciaTrieNodeReader *const nodeToUpdate); 81 82 // CAVEAT: This method must be called only from this class or inner classes of 83 // DynamicPatriciaTrieGcEventListeners. 84 bool writePtNodeToBufferByCopyingPtNodeInfo(BufferWithExtendableBuffer *const bufferToWrite, 85 const DynamicPatriciaTrieNodeReader *const originalNode, const int parentPos, 86 const int *const codePoints, const int codePointCount, const int probability, 87 int *const writingPos); 88 89 private: 90 DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieWritingHelper); 91 92 static const int CHILDREN_POSITION_FIELD_SIZE; 93 94 BufferWithExtendableBuffer *const mBuffer; 95 DynamicBigramListPolicy *const mBigramPolicy; 96 DynamicShortcutListPolicy *const mShortcutPolicy; 97 const bool mNeedsToDecay; 98 99 bool markNodeAsMovedAndSetPosition(const DynamicPatriciaTrieNodeReader *const nodeToUpdate, 100 const int movedPos, const int bigramLinkedNodePos); 101 102 bool writePtNodeWithFullInfoToBuffer(BufferWithExtendableBuffer *const bufferToWrite, 103 const bool isBlacklisted, const bool isNotAWord, 104 const int parentPos, const int *const codePoints, const int codePointCount, 105 const int probability, const int childrenPos, const int originalBigramListPos, 106 const int originalShortcutListPos, int *const writingPos); 107 108 bool writePtNodeToBuffer(BufferWithExtendableBuffer *const bufferToWrite, 109 const int parentPos, const int *const codePoints, const int codePointCount, 110 const int probability, int *const writingPos); 111 112 bool createAndInsertNodeIntoPtNodeArray(const int parentPos, const int *const nodeCodePoints, 113 const int nodeCodePointCount, const int probability, int *const forwardLinkFieldPos); 114 115 bool setPtNodeProbability(const DynamicPatriciaTrieNodeReader *const originalNode, 116 const int probability, const int *const codePoints, bool *const outAddedNewUnigram); 117 118 bool createChildrenPtNodeArrayAndAChildPtNode( 119 const DynamicPatriciaTrieNodeReader *const parentNode, const int probability, 120 const int *const codePoints, const int codePointCount); 121 122 bool createNewPtNodeArrayWithAChildPtNode(const int parentPos, const int *const nodeCodePoints, 123 const int nodeCodePointCount, const int probability); 124 125 bool reallocatePtNodeAndAddNewPtNodes( 126 const DynamicPatriciaTrieNodeReader *const reallocatingPtNode, 127 const int *const reallocatingPtNodeCodePoints, const int overlappingCodePointCount, 128 const int probabilityOfNewPtNode, const int *const newNodeCodePoints, 129 const int newNodeCodePointCount); 130 131 bool runGC(const int rootPtNodeArrayPos, const HeaderPolicy *const headerPolicy, 132 BufferWithExtendableBuffer *const bufferToWrite, int *const outUnigramCount, 133 int *const outBigramCount); 134 135 int getUpdatedProbability(const int originalProbability, const int newProbability); 136 }; 137 } // namespace latinime 138 #endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H */ 139