1 /* 2 * Copyright (C) 2013, The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h" 18 19 #include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h" 20 #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h" 21 #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h" 22 #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h" 23 #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h" 24 #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h" 25 #include "suggest/policyimpl/dictionary/header/header_policy.h" 26 #include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h" 27 #include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h" 28 #include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h" 29 #include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" 30 #include "utils/hash_map_compat.h" 31 32 namespace latinime { 33 34 const int DynamicPatriciaTrieWritingHelper::CHILDREN_POSITION_FIELD_SIZE = 3; 35 // TODO: Make MAX_DICTIONARY_SIZE 8MB. 36 const size_t DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE = 2 * 1024 * 1024; 37 38 bool DynamicPatriciaTrieWritingHelper::addUnigramWord( 39 DynamicPatriciaTrieReadingHelper *const readingHelper, 40 const int *const wordCodePoints, const int codePointCount, const int probability, 41 bool *const outAddedNewUnigram) { 42 int parentPos = NOT_A_DICT_POS; 43 while (!readingHelper->isEnd()) { 44 const int matchedCodePointCount = readingHelper->getPrevTotalCodePointCount(); 45 if (!readingHelper->isMatchedCodePoint(0 /* index */, 46 wordCodePoints[matchedCodePointCount])) { 47 // The first code point is different from target code point. Skip this node and read 48 // the next sibling node. 49 readingHelper->readNextSiblingNode(); 50 continue; 51 } 52 // Check following merged node code points. 53 const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper->getNodeReader(); 54 const int nodeCodePointCount = nodeReader->getCodePointCount(); 55 for (int j = 1; j < nodeCodePointCount; ++j) { 56 const int nextIndex = matchedCodePointCount + j; 57 if (nextIndex >= codePointCount || !readingHelper->isMatchedCodePoint(j, 58 wordCodePoints[matchedCodePointCount + j])) { 59 *outAddedNewUnigram = true; 60 return reallocatePtNodeAndAddNewPtNodes(nodeReader, 61 readingHelper->getMergedNodeCodePoints(), j, 62 getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, 63 probability), 64 wordCodePoints + matchedCodePointCount, 65 codePointCount - matchedCodePointCount); 66 } 67 } 68 // All characters are matched. 69 if (codePointCount == readingHelper->getTotalCodePointCount()) { 70 return setPtNodeProbability(nodeReader, probability, 71 readingHelper->getMergedNodeCodePoints(), outAddedNewUnigram); 72 } 73 if (!nodeReader->hasChildren()) { 74 *outAddedNewUnigram = true; 75 return createChildrenPtNodeArrayAndAChildPtNode(nodeReader, 76 getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability), 77 wordCodePoints + readingHelper->getTotalCodePointCount(), 78 codePointCount - readingHelper->getTotalCodePointCount()); 79 } 80 // Advance to the children nodes. 81 parentPos = nodeReader->getHeadPos(); 82 readingHelper->readChildNode(); 83 } 84 if (readingHelper->isError()) { 85 // The dictionary is invalid. 86 return false; 87 } 88 int pos = readingHelper->getPosOfLastForwardLinkField(); 89 *outAddedNewUnigram = true; 90 return createAndInsertNodeIntoPtNodeArray(parentPos, 91 wordCodePoints + readingHelper->getPrevTotalCodePointCount(), 92 codePointCount - readingHelper->getPrevTotalCodePointCount(), 93 getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability), &pos); 94 } 95 96 bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const int word1Pos, 97 const int probability, bool *const outAddedNewBigram) { 98 int mMergedNodeCodePoints[MAX_WORD_LENGTH]; 99 DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy); 100 nodeReader.fetchNodeInfoInBufferFromPtNodePosAndGetNodeCodePoints(word0Pos, MAX_WORD_LENGTH, 101 mMergedNodeCodePoints); 102 // Move node to add bigram entry. 103 const int newNodePos = mBuffer->getTailPosition(); 104 if (!markNodeAsMovedAndSetPosition(&nodeReader, newNodePos, newNodePos)) { 105 return false; 106 } 107 int writingPos = newNodePos; 108 // Write a new PtNode using original PtNode's info to the tail of the dictionary in mBuffer. 109 if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, &nodeReader, nodeReader.getParentPos(), 110 mMergedNodeCodePoints, nodeReader.getCodePointCount(), nodeReader.getProbability(), 111 &writingPos)) { 112 return false; 113 } 114 nodeReader.fetchNodeInfoInBufferFromPtNodePos(newNodePos); 115 if (nodeReader.getBigramsPos() != NOT_A_DICT_POS) { 116 // Insert a new bigram entry into the existing bigram list. 117 int bigramListPos = nodeReader.getBigramsPos(); 118 return mBigramPolicy->addNewBigramEntryToBigramList(word1Pos, probability, &bigramListPos, 119 outAddedNewBigram); 120 } else { 121 // The PtNode doesn't have a bigram list. 122 *outAddedNewBigram = true; 123 // First, Write a bigram entry at the tail position of the PtNode. 124 if (!mBigramPolicy->writeNewBigramEntry(word1Pos, probability, &writingPos)) { 125 return false; 126 } 127 // Then, Mark as the PtNode having bigram list in the flags. 128 const PatriciaTrieReadingUtils::NodeFlags updatedFlags = 129 PatriciaTrieReadingUtils::createAndGetFlags(nodeReader.isBlacklisted(), 130 nodeReader.isNotAWord(), nodeReader.getProbability() != NOT_A_PROBABILITY, 131 nodeReader.getShortcutPos() != NOT_A_DICT_POS, true /* hasBigrams */, 132 nodeReader.getCodePointCount() > 1, CHILDREN_POSITION_FIELD_SIZE); 133 writingPos = newNodePos; 134 // Write updated flags into the moved PtNode's flags field. 135 return DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags, 136 &writingPos); 137 } 138 } 139 140 // Remove a bigram relation from word0Pos to word1Pos. 141 bool DynamicPatriciaTrieWritingHelper::removeBigramWords(const int word0Pos, const int word1Pos) { 142 DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy); 143 nodeReader.fetchNodeInfoInBufferFromPtNodePos(word0Pos); 144 if (nodeReader.getBigramsPos() == NOT_A_DICT_POS) { 145 return false; 146 } 147 return mBigramPolicy->removeBigram(nodeReader.getBigramsPos(), word1Pos); 148 } 149 150 void DynamicPatriciaTrieWritingHelper::writeToDictFile(const char *const fileName, 151 const HeaderPolicy *const headerPolicy, const int unigramCount, const int bigramCount) { 152 BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */); 153 const int extendedRegionSize = headerPolicy->getExtendedRegionSize() + 154 mBuffer->getUsedAdditionalBufferSize(); 155 if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, false /* updatesLastUpdatedTime */, 156 false /* updatesLastDecayedTime */, unigramCount, bigramCount, extendedRegionSize)) { 157 return; 158 } 159 DictFileWritingUtils::flushAllHeaderAndBodyToFile(fileName, &headerBuffer, mBuffer); 160 } 161 162 void DynamicPatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNodeArrayPos, 163 const char *const fileName, const HeaderPolicy *const headerPolicy) { 164 BufferWithExtendableBuffer newDictBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */, 165 MAX_DICTIONARY_SIZE); 166 int unigramCount = 0; 167 int bigramCount = 0; 168 if (mNeedsToDecay) { 169 ForgettingCurveUtils::sTimeKeeper.setCurrentTime(); 170 } 171 if (!runGC(rootPtNodeArrayPos, headerPolicy, &newDictBuffer, &unigramCount, &bigramCount)) { 172 return; 173 } 174 BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */); 175 if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */, 176 mNeedsToDecay, unigramCount, bigramCount, 0 /* extendedRegionSize */)) { 177 return; 178 } 179 DictFileWritingUtils::flushAllHeaderAndBodyToFile(fileName, &headerBuffer, &newDictBuffer); 180 } 181 182 bool DynamicPatriciaTrieWritingHelper::markNodeAsDeleted( 183 const DynamicPatriciaTrieNodeReader *const nodeToUpdate) { 184 int pos = nodeToUpdate->getHeadPos(); 185 const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(pos); 186 const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer); 187 if (usesAdditionalBuffer) { 188 pos -= mBuffer->getOriginalBufferSize(); 189 } 190 // Read original flags 191 const PatriciaTrieReadingUtils::NodeFlags originalFlags = 192 PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos); 193 const PatriciaTrieReadingUtils::NodeFlags updatedFlags = 194 DynamicPatriciaTrieReadingUtils::updateAndGetFlags(originalFlags, false /* isMoved */, 195 true /* isDeleted */); 196 int writingPos = nodeToUpdate->getHeadPos(); 197 // Update flags. 198 return DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags, 199 &writingPos); 200 } 201 202 bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition( 203 const DynamicPatriciaTrieNodeReader *const originalNode, const int movedPos, 204 const int bigramLinkedNodePos) { 205 int pos = originalNode->getHeadPos(); 206 const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(pos); 207 const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer); 208 if (usesAdditionalBuffer) { 209 pos -= mBuffer->getOriginalBufferSize(); 210 } 211 // Read original flags 212 const PatriciaTrieReadingUtils::NodeFlags originalFlags = 213 PatriciaTrieReadingUtils::getFlagsAndAdvancePosition(dictBuf, &pos); 214 const PatriciaTrieReadingUtils::NodeFlags updatedFlags = 215 DynamicPatriciaTrieReadingUtils::updateAndGetFlags(originalFlags, true /* isMoved */, 216 false /* isDeleted */); 217 int writingPos = originalNode->getHeadPos(); 218 // Update flags. 219 if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(mBuffer, updatedFlags, 220 &writingPos)) { 221 return false; 222 } 223 // Update moved position, which is stored in the parent offset field. 224 if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition( 225 mBuffer, movedPos, originalNode->getHeadPos(), &writingPos)) { 226 return false; 227 } 228 // Update bigram linked node position, which is stored in the children position field. 229 int childrenPosFieldPos = originalNode->getChildrenPosFieldPos(); 230 if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition( 231 mBuffer, bigramLinkedNodePos, &childrenPosFieldPos)) { 232 return false; 233 } 234 if (originalNode->hasChildren()) { 235 // Update children's parent position. 236 DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, mBigramPolicy, mShortcutPolicy); 237 const DynamicPatriciaTrieNodeReader *const nodeReader = readingHelper.getNodeReader(); 238 readingHelper.initWithPtNodeArrayPos(originalNode->getChildrenPos()); 239 while (!readingHelper.isEnd()) { 240 int parentOffsetFieldPos = nodeReader->getHeadPos() 241 + DynamicPatriciaTrieWritingUtils::NODE_FLAG_FIELD_SIZE; 242 if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition( 243 mBuffer, bigramLinkedNodePos, nodeReader->getHeadPos(), 244 &parentOffsetFieldPos)) { 245 // Parent offset cannot be written because of a bug or a broken dictionary; thus, 246 // we give up to update dictionary. 247 return false; 248 } 249 readingHelper.readNextSiblingNode(); 250 } 251 } 252 return true; 253 } 254 255 // Write new PtNode at writingPos. 256 bool DynamicPatriciaTrieWritingHelper::writePtNodeWithFullInfoToBuffer( 257 BufferWithExtendableBuffer *const bufferToWrite, const bool isBlacklisted, 258 const bool isNotAWord, const int parentPos, const int *const codePoints, 259 const int codePointCount, const int probability, const int childrenPos, 260 const int originalBigramListPos, const int originalShortcutListPos, 261 int *const writingPos) { 262 const int nodePos = *writingPos; 263 // Write dummy flags. The Node flags are updated with appropriate flags at the last step of the 264 // PtNode writing. 265 if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(bufferToWrite, 266 0 /* nodeFlags */, writingPos)) { 267 return false; 268 } 269 // Calculate a parent offset and write the offset. 270 if (!DynamicPatriciaTrieWritingUtils::writeParentPosOffsetAndAdvancePosition(bufferToWrite, 271 parentPos, nodePos, writingPos)) { 272 return false; 273 } 274 // Write code points 275 if (!DynamicPatriciaTrieWritingUtils::writeCodePointsAndAdvancePosition(bufferToWrite, 276 codePoints, codePointCount, writingPos)) { 277 return false; 278 } 279 // Write probability when the probability is a valid probability, which means this node is 280 // terminal. 281 if (probability != NOT_A_PROBABILITY) { 282 if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(bufferToWrite, 283 probability, writingPos)) { 284 return false; 285 } 286 } 287 // Write children position 288 if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(bufferToWrite, 289 childrenPos, writingPos)) { 290 return false; 291 } 292 // Copy shortcut list when the originalShortcutListPos is valid dictionary position. 293 if (originalShortcutListPos != NOT_A_DICT_POS) { 294 int fromPos = originalShortcutListPos; 295 if (!mShortcutPolicy->copyAllShortcutsAndReturnIfSucceededOrNot(bufferToWrite, &fromPos, 296 writingPos)) { 297 return false; 298 } 299 } 300 // Copy bigram list when the originalBigramListPos is valid dictionary position. 301 int bigramCount = 0; 302 if (originalBigramListPos != NOT_A_DICT_POS) { 303 int fromPos = originalBigramListPos; 304 if (!mBigramPolicy->copyAllBigrams(bufferToWrite, &fromPos, writingPos, &bigramCount)) { 305 return false; 306 } 307 } 308 // Create node flags and write them. 309 PatriciaTrieReadingUtils::NodeFlags nodeFlags = 310 PatriciaTrieReadingUtils::createAndGetFlags(isBlacklisted, isNotAWord, 311 probability != NOT_A_PROBABILITY /* isTerminal */, 312 originalShortcutListPos != NOT_A_DICT_POS /* hasShortcutTargets */, 313 bigramCount > 0 /* hasBigrams */, codePointCount > 1 /* hasMultipleChars */, 314 CHILDREN_POSITION_FIELD_SIZE); 315 int flagsFieldPos = nodePos; 316 if (!DynamicPatriciaTrieWritingUtils::writeFlagsAndAdvancePosition(bufferToWrite, nodeFlags, 317 &flagsFieldPos)) { 318 return false; 319 } 320 return true; 321 } 322 323 bool DynamicPatriciaTrieWritingHelper::writePtNodeToBuffer( 324 BufferWithExtendableBuffer *const bufferToWrite, const int parentPos, 325 const int *const codePoints, const int codePointCount, const int probability, 326 int *const writingPos) { 327 return writePtNodeWithFullInfoToBuffer(bufferToWrite, false /* isBlacklisted */, 328 false /* isNotAWord */, parentPos, codePoints, codePointCount, probability, 329 NOT_A_DICT_POS /* childrenPos */, NOT_A_DICT_POS /* originalBigramsPos */, 330 NOT_A_DICT_POS /* originalShortcutPos */, writingPos); 331 } 332 333 bool DynamicPatriciaTrieWritingHelper::writePtNodeToBufferByCopyingPtNodeInfo( 334 BufferWithExtendableBuffer *const bufferToWrite, 335 const DynamicPatriciaTrieNodeReader *const originalNode, const int parentPos, 336 const int *const codePoints, const int codePointCount, const int probability, 337 int *const writingPos) { 338 return writePtNodeWithFullInfoToBuffer(bufferToWrite, originalNode->isBlacklisted(), 339 originalNode->isNotAWord(), parentPos, codePoints, codePointCount, probability, 340 originalNode->getChildrenPos(), originalNode->getBigramsPos(), 341 originalNode->getShortcutPos(), writingPos); 342 } 343 344 bool DynamicPatriciaTrieWritingHelper::createAndInsertNodeIntoPtNodeArray(const int parentPos, 345 const int *const nodeCodePoints, const int nodeCodePointCount, const int probability, 346 int *const forwardLinkFieldPos) { 347 const int newPtNodeArrayPos = mBuffer->getTailPosition(); 348 if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer, 349 newPtNodeArrayPos, forwardLinkFieldPos)) { 350 return false; 351 } 352 return createNewPtNodeArrayWithAChildPtNode(parentPos, nodeCodePoints, nodeCodePointCount, 353 probability); 354 } 355 356 bool DynamicPatriciaTrieWritingHelper::setPtNodeProbability( 357 const DynamicPatriciaTrieNodeReader *const originalPtNode, const int probability, 358 const int *const codePoints, bool *const outAddedNewUnigram) { 359 if (originalPtNode->isTerminal()) { 360 // Overwrites the probability. 361 *outAddedNewUnigram = false; 362 const int probabilityToWrite = getUpdatedProbability(originalPtNode->getProbability(), 363 probability); 364 int probabilityFieldPos = originalPtNode->getProbabilityFieldPos(); 365 if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(mBuffer, 366 probabilityToWrite, &probabilityFieldPos)) { 367 return false; 368 } 369 } else { 370 // Make the node terminal and write the probability. 371 *outAddedNewUnigram = true; 372 int movedPos = mBuffer->getTailPosition(); 373 if (!markNodeAsMovedAndSetPosition(originalPtNode, movedPos, movedPos)) { 374 return false; 375 } 376 if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, originalPtNode, 377 originalPtNode->getParentPos(), codePoints, originalPtNode->getCodePointCount(), 378 getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability), 379 &movedPos)) { 380 return false; 381 } 382 } 383 return true; 384 } 385 386 bool DynamicPatriciaTrieWritingHelper::createChildrenPtNodeArrayAndAChildPtNode( 387 const DynamicPatriciaTrieNodeReader *const parentNode, const int probability, 388 const int *const codePoints, const int codePointCount) { 389 const int newPtNodeArrayPos = mBuffer->getTailPosition(); 390 int childrenPosFieldPos = parentNode->getChildrenPosFieldPos(); 391 if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer, 392 newPtNodeArrayPos, &childrenPosFieldPos)) { 393 return false; 394 } 395 return createNewPtNodeArrayWithAChildPtNode(parentNode->getHeadPos(), codePoints, 396 codePointCount, probability); 397 } 398 399 bool DynamicPatriciaTrieWritingHelper::createNewPtNodeArrayWithAChildPtNode( 400 const int parentPtNodePos, const int *const nodeCodePoints, const int nodeCodePointCount, 401 const int probability) { 402 int writingPos = mBuffer->getTailPosition(); 403 if (!DynamicPatriciaTrieWritingUtils::writePtNodeArraySizeAndAdvancePosition(mBuffer, 404 1 /* arraySize */, &writingPos)) { 405 return false; 406 } 407 if (!writePtNodeToBuffer(mBuffer, parentPtNodePos, nodeCodePoints, nodeCodePointCount, 408 probability, &writingPos)) { 409 return false; 410 } 411 if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer, 412 NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) { 413 return false; 414 } 415 return true; 416 } 417 418 // Returns whether the dictionary updating was succeeded or not. 419 bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes( 420 const DynamicPatriciaTrieNodeReader *const reallocatingPtNode, 421 const int *const reallocatingPtNodeCodePoints, const int overlappingCodePointCount, 422 const int probabilityOfNewPtNode, const int *const newNodeCodePoints, 423 const int newNodeCodePointCount) { 424 // When addsExtraChild is true, split the reallocating PtNode and add new child. 425 // Reallocating PtNode: abcde, newNode: abcxy. 426 // abc (1st, not terminal) __ de (2nd) 427 // \_ xy (extra child, terminal) 428 // Otherwise, this method makes 1st part terminal and write probabilityOfNewPtNode. 429 // Reallocating PtNode: abcde, newNode: abc. 430 // abc (1st, terminal) __ de (2nd) 431 const bool addsExtraChild = newNodeCodePointCount > overlappingCodePointCount; 432 const int firstPartOfReallocatedPtNodePos = mBuffer->getTailPosition(); 433 int writingPos = firstPartOfReallocatedPtNodePos; 434 // Write the 1st part of the reallocating node. The children position will be updated later 435 // with actual children position. 436 const int newProbability = addsExtraChild ? NOT_A_PROBABILITY : probabilityOfNewPtNode; 437 if (!writePtNodeToBuffer(mBuffer, reallocatingPtNode->getParentPos(), 438 reallocatingPtNodeCodePoints, overlappingCodePointCount, newProbability, 439 &writingPos)) { 440 return false; 441 } 442 const int actualChildrenPos = writingPos; 443 // Create new children PtNode array. 444 const size_t newPtNodeCount = addsExtraChild ? 2 : 1; 445 if (!DynamicPatriciaTrieWritingUtils::writePtNodeArraySizeAndAdvancePosition(mBuffer, 446 newPtNodeCount, &writingPos)) { 447 return false; 448 } 449 // Write the 2nd part of the reallocating node. 450 const int secondPartOfReallocatedPtNodePos = writingPos; 451 if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, reallocatingPtNode, 452 firstPartOfReallocatedPtNodePos, 453 reallocatingPtNodeCodePoints + overlappingCodePointCount, 454 reallocatingPtNode->getCodePointCount() - overlappingCodePointCount, 455 reallocatingPtNode->getProbability(), &writingPos)) { 456 return false; 457 } 458 if (addsExtraChild) { 459 if (!writePtNodeToBuffer(mBuffer, firstPartOfReallocatedPtNodePos, 460 newNodeCodePoints + overlappingCodePointCount, 461 newNodeCodePointCount - overlappingCodePointCount, probabilityOfNewPtNode, 462 &writingPos)) { 463 return false; 464 } 465 } 466 if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer, 467 NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) { 468 return false; 469 } 470 // Update original reallocatingPtNode as moved. 471 if (!markNodeAsMovedAndSetPosition(reallocatingPtNode, firstPartOfReallocatedPtNodePos, 472 secondPartOfReallocatedPtNodePos)) { 473 return false; 474 } 475 // Load node info. Information of the 1st part will be fetched. 476 DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy); 477 nodeReader.fetchNodeInfoInBufferFromPtNodePos(firstPartOfReallocatedPtNodePos); 478 // Update children position. 479 int childrenPosFieldPos = nodeReader.getChildrenPosFieldPos(); 480 if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer, 481 actualChildrenPos, &childrenPosFieldPos)) { 482 return false; 483 } 484 return true; 485 } 486 487 bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos, 488 const HeaderPolicy *const headerPolicy, BufferWithExtendableBuffer *const bufferToWrite, 489 int *const outUnigramCount, int *const outBigramCount) { 490 DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, mBigramPolicy, mShortcutPolicy); 491 readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); 492 DynamicPatriciaTrieGcEventListeners 493 ::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted 494 traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted( 495 headerPolicy, this, mBuffer, mNeedsToDecay); 496 if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner( 497 &traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted)) { 498 return false; 499 } 500 if (mNeedsToDecay && traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted 501 .getValidUnigramCount() > ForgettingCurveUtils::MAX_UNIGRAM_COUNT_AFTER_GC) { 502 // TODO: Remove more unigrams. 503 } 504 505 readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); 506 DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateBigramProbability 507 traversePolicyToUpdateBigramProbability(mBigramPolicy); 508 if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner( 509 &traversePolicyToUpdateBigramProbability)) { 510 return false; 511 } 512 if (mNeedsToDecay && traversePolicyToUpdateBigramProbability.getValidBigramEntryCount() 513 > ForgettingCurveUtils::MAX_BIGRAM_COUNT_AFTER_GC) { 514 // TODO: Remove more bigrams. 515 } 516 517 // Mapping from positions in mBuffer to positions in bufferToWrite. 518 DictPositionRelocationMap dictPositionRelocationMap; 519 readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); 520 DynamicPatriciaTrieGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer 521 traversePolicyToPlaceAndWriteValidPtNodesToBuffer(this, bufferToWrite, 522 &dictPositionRelocationMap); 523 if (!readingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner( 524 &traversePolicyToPlaceAndWriteValidPtNodesToBuffer)) { 525 return false; 526 } 527 528 // Create policy instance for the GCed dictionary. 529 DynamicShortcutListPolicy newDictShortcutPolicy(bufferToWrite); 530 DynamicBigramListPolicy newDictBigramPolicy(headerPolicy, bufferToWrite, &newDictShortcutPolicy, 531 mNeedsToDecay); 532 // Create reading helper for the GCed dictionary. 533 DynamicPatriciaTrieReadingHelper newDictReadingHelper(bufferToWrite, &newDictBigramPolicy, 534 &newDictShortcutPolicy); 535 newDictReadingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos); 536 DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateAllPositionFields 537 traversePolicyToUpdateAllPositionFields(this, &newDictBigramPolicy, bufferToWrite, 538 &dictPositionRelocationMap); 539 if (!newDictReadingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner( 540 &traversePolicyToUpdateAllPositionFields)) { 541 return false; 542 } 543 *outUnigramCount = traversePolicyToUpdateAllPositionFields.getUnigramCount(); 544 *outBigramCount = traversePolicyToUpdateAllPositionFields.getBigramCount(); 545 return true; 546 } 547 548 int DynamicPatriciaTrieWritingHelper::getUpdatedProbability(const int originalProbability, 549 const int newProbability) { 550 if (mNeedsToDecay) { 551 return ForgettingCurveUtils::getUpdatedEncodedProbability(originalProbability, 552 newProbability); 553 } else { 554 return newProbability; 555 } 556 } 557 558 } // namespace latinime 559