1 /* 2 * Copyright (C) 2013 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h" 18 19 #include "suggest/core/policy/dictionary_shortcuts_structure_policy.h" 20 #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h" 21 #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h" 22 #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" 23 #include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h" 24 25 namespace latinime { 26 27 const int DynamicBigramListPolicy::CONTINUING_BIGRAM_LINK_COUNT_LIMIT = 10000; 28 const int DynamicBigramListPolicy::BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT = 100000; 29 30 void DynamicBigramListPolicy::getNextBigram(int *const outBigramPos, int *const outProbability, 31 bool *const outHasNext, int *const bigramEntryPos) const { 32 const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramEntryPos); 33 const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer); 34 if (usesAdditionalBuffer) { 35 *bigramEntryPos -= mBuffer->getOriginalBufferSize(); 36 } 37 BigramListReadWriteUtils::BigramFlags bigramFlags; 38 int originalBigramPos; 39 BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition(buffer, &bigramFlags, 40 &originalBigramPos, bigramEntryPos); 41 if (usesAdditionalBuffer && originalBigramPos != NOT_A_DICT_POS) { 42 originalBigramPos += mBuffer->getOriginalBufferSize(); 43 } 44 *outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags); 45 *outHasNext = BigramListReadWriteUtils::hasNext(bigramFlags); 46 if (mIsDecayingDict && !ForgettingCurveUtils::isValidEncodedProbability(*outProbability)) { 47 // This bigram is too weak to output. 48 *outBigramPos = NOT_A_DICT_POS; 49 } else { 50 *outBigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos); 51 } 52 if (usesAdditionalBuffer) { 53 *bigramEntryPos += mBuffer->getOriginalBufferSize(); 54 } 55 } 56 57 void DynamicBigramListPolicy::skipAllBigrams(int *const bigramListPos) const { 58 const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramListPos); 59 const uint8_t *const buffer = mBuffer->getBuffer(usesAdditionalBuffer); 60 if (usesAdditionalBuffer) { 61 *bigramListPos -= mBuffer->getOriginalBufferSize(); 62 } 63 BigramListReadWriteUtils::skipExistingBigrams(buffer, bigramListPos); 64 if (usesAdditionalBuffer) { 65 *bigramListPos += mBuffer->getOriginalBufferSize(); 66 } 67 } 68 69 bool DynamicBigramListPolicy::copyAllBigrams(BufferWithExtendableBuffer *const bufferToWrite, 70 int *const fromPos, int *const toPos, int *const outBigramsCount) const { 71 const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*fromPos); 72 if (usesAdditionalBuffer) { 73 *fromPos -= mBuffer->getOriginalBufferSize(); 74 } 75 *outBigramsCount = 0; 76 BigramListReadWriteUtils::BigramFlags bigramFlags; 77 int bigramEntryCount = 0; 78 int lastWrittenEntryPos = NOT_A_DICT_POS; 79 do { 80 if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) { 81 AKLOGE("Too many bigram entries. Entry count: %d, Limit: %d", 82 bigramEntryCount, BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT); 83 ASSERT(false); 84 return false; 85 } 86 // The buffer address can be changed after calling buffer writing methods. 87 int originalBigramPos; 88 BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition( 89 mBuffer->getBuffer(usesAdditionalBuffer), &bigramFlags, &originalBigramPos, 90 fromPos); 91 if (originalBigramPos == NOT_A_DICT_POS) { 92 // skip invalid bigram entry. 93 continue; 94 } 95 if (usesAdditionalBuffer) { 96 originalBigramPos += mBuffer->getOriginalBufferSize(); 97 } 98 const int bigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos); 99 if (bigramPos == NOT_A_DICT_POS) { 100 // Target PtNode has been invalidated. 101 continue; 102 } 103 lastWrittenEntryPos = *toPos; 104 if (!BigramListReadWriteUtils::createAndWriteBigramEntry(bufferToWrite, bigramPos, 105 BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags), 106 BigramListReadWriteUtils::hasNext(bigramFlags), toPos)) { 107 return false; 108 } 109 (*outBigramsCount)++; 110 } while(BigramListReadWriteUtils::hasNext(bigramFlags)); 111 // Makes the last entry the terminal of the list. Updates the flags. 112 if (lastWrittenEntryPos != NOT_A_DICT_POS) { 113 if (!BigramListReadWriteUtils::setHasNextFlag(bufferToWrite, false /* hasNext */, 114 lastWrittenEntryPos)) { 115 return false; 116 } 117 } 118 if (usesAdditionalBuffer) { 119 *fromPos += mBuffer->getOriginalBufferSize(); 120 } 121 return true; 122 } 123 124 // Finding useless bigram entries and remove them. Bigram entry is useless when the target PtNode 125 // has been deleted or is not a valid terminal. 126 bool DynamicBigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries( 127 int *const bigramListPos, int *const outValidBigramEntryCount) { 128 const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramListPos); 129 if (usesAdditionalBuffer) { 130 *bigramListPos -= mBuffer->getOriginalBufferSize(); 131 } 132 DynamicPatriciaTrieNodeReader nodeReader(mBuffer, this /* bigramsPolicy */, mShortcutPolicy); 133 BigramListReadWriteUtils::BigramFlags bigramFlags; 134 int bigramEntryCount = 0; 135 do { 136 if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) { 137 AKLOGE("Too many bigram entries. Entry count: %d, Limit: %d", 138 bigramEntryCount, BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT); 139 ASSERT(false); 140 return false; 141 } 142 int bigramEntryPos = *bigramListPos; 143 int originalBigramPos; 144 // The buffer address can be changed after calling buffer writing methods. 145 BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition( 146 mBuffer->getBuffer(usesAdditionalBuffer), &bigramFlags, &originalBigramPos, 147 bigramListPos); 148 if (usesAdditionalBuffer) { 149 bigramEntryPos += mBuffer->getOriginalBufferSize(); 150 } 151 if (originalBigramPos == NOT_A_DICT_POS) { 152 // This entry has already been removed. 153 continue; 154 } 155 if (usesAdditionalBuffer) { 156 originalBigramPos += mBuffer->getOriginalBufferSize(); 157 } 158 const int bigramTargetNodePos = 159 followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos); 160 nodeReader.fetchNodeInfoInBufferFromPtNodePos(bigramTargetNodePos); 161 if (nodeReader.isDeleted() || !nodeReader.isTerminal() 162 || bigramTargetNodePos == NOT_A_DICT_POS) { 163 // The target is no longer valid terminal. Invalidate the current bigram entry. 164 if (!BigramListReadWriteUtils::writeBigramEntry(mBuffer, bigramFlags, 165 NOT_A_DICT_POS /* targetPtNodePos */, &bigramEntryPos)) { 166 return false; 167 } 168 continue; 169 } 170 bool isRemoved = false; 171 if (!updateProbabilityForDecay(bigramFlags, bigramTargetNodePos, &bigramEntryPos, 172 &isRemoved)) { 173 return false; 174 } 175 if (!isRemoved) { 176 (*outValidBigramEntryCount) += 1; 177 } 178 } while(BigramListReadWriteUtils::hasNext(bigramFlags)); 179 return true; 180 } 181 182 // Updates bigram target PtNode positions in the list after the placing step in GC. 183 bool DynamicBigramListPolicy::updateAllBigramTargetPtNodePositions(int *const bigramListPos, 184 const DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap *const 185 ptNodePositionRelocationMap, int *const outBigramEntryCount) { 186 const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramListPos); 187 if (usesAdditionalBuffer) { 188 *bigramListPos -= mBuffer->getOriginalBufferSize(); 189 } 190 BigramListReadWriteUtils::BigramFlags bigramFlags; 191 int bigramEntryCount = 0; 192 do { 193 if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) { 194 AKLOGE("Too many bigram entries. Entry count: %d, Limit: %d", 195 bigramEntryCount, BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT); 196 ASSERT(false); 197 return false; 198 } 199 int bigramEntryPos = *bigramListPos; 200 if (usesAdditionalBuffer) { 201 bigramEntryPos += mBuffer->getOriginalBufferSize(); 202 } 203 int bigramTargetPtNodePos; 204 // The buffer address can be changed after calling buffer writing methods. 205 BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition( 206 mBuffer->getBuffer(usesAdditionalBuffer), &bigramFlags, &bigramTargetPtNodePos, 207 bigramListPos); 208 if (bigramTargetPtNodePos == NOT_A_DICT_POS) { 209 continue; 210 } 211 if (usesAdditionalBuffer) { 212 bigramTargetPtNodePos += mBuffer->getOriginalBufferSize(); 213 } 214 215 DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap::const_iterator it = 216 ptNodePositionRelocationMap->find(bigramTargetPtNodePos); 217 if (it != ptNodePositionRelocationMap->end()) { 218 bigramTargetPtNodePos = it->second; 219 } else { 220 bigramTargetPtNodePos = NOT_A_DICT_POS; 221 } 222 if (!BigramListReadWriteUtils::writeBigramEntry(mBuffer, bigramFlags, 223 bigramTargetPtNodePos, &bigramEntryPos)) { 224 return false; 225 } 226 } while(BigramListReadWriteUtils::hasNext(bigramFlags)); 227 (*outBigramEntryCount) = bigramEntryCount; 228 return true; 229 } 230 231 bool DynamicBigramListPolicy::addNewBigramEntryToBigramList(const int bigramTargetPos, 232 const int probability, int *const bigramListPos, bool *const outAddedNewBigram) { 233 const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramListPos); 234 if (usesAdditionalBuffer) { 235 *bigramListPos -= mBuffer->getOriginalBufferSize(); 236 } 237 BigramListReadWriteUtils::BigramFlags bigramFlags; 238 int bigramEntryCount = 0; 239 do { 240 if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) { 241 AKLOGE("Too many bigram entries. Entry count: %d, Limit: %d", 242 bigramEntryCount, BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT); 243 ASSERT(false); 244 return false; 245 } 246 int entryPos = *bigramListPos; 247 if (usesAdditionalBuffer) { 248 entryPos += mBuffer->getOriginalBufferSize(); 249 } 250 int originalBigramPos; 251 // The buffer address can be changed after calling buffer writing methods. 252 BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition( 253 mBuffer->getBuffer(usesAdditionalBuffer), &bigramFlags, &originalBigramPos, 254 bigramListPos); 255 if (usesAdditionalBuffer && originalBigramPos != NOT_A_DICT_POS) { 256 originalBigramPos += mBuffer->getOriginalBufferSize(); 257 } 258 if (followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos) == bigramTargetPos) { 259 // Update this bigram entry. 260 *outAddedNewBigram = false; 261 const int originalProbability = BigramListReadWriteUtils::getProbabilityFromFlags( 262 bigramFlags); 263 const int probabilityToWrite = mIsDecayingDict ? 264 ForgettingCurveUtils::getUpdatedEncodedProbability(originalProbability, 265 probability) : probability; 266 const BigramListReadWriteUtils::BigramFlags updatedFlags = 267 BigramListReadWriteUtils::setProbabilityInFlags(bigramFlags, 268 probabilityToWrite); 269 return BigramListReadWriteUtils::writeBigramEntry(mBuffer, updatedFlags, 270 originalBigramPos, &entryPos); 271 } 272 if (BigramListReadWriteUtils::hasNext(bigramFlags)) { 273 continue; 274 } 275 // The current last entry is found. 276 // First, update the flags of the last entry. 277 if (!BigramListReadWriteUtils::setHasNextFlag(mBuffer, true /* hasNext */, entryPos)) { 278 *outAddedNewBigram = false; 279 return false; 280 } 281 if (usesAdditionalBuffer) { 282 *bigramListPos += mBuffer->getOriginalBufferSize(); 283 } 284 // Then, add a new entry after the last entry. 285 *outAddedNewBigram = true; 286 return writeNewBigramEntry(bigramTargetPos, probability, bigramListPos); 287 } while(BigramListReadWriteUtils::hasNext(bigramFlags)); 288 // We return directly from the while loop. 289 ASSERT(false); 290 return false; 291 } 292 293 bool DynamicBigramListPolicy::writeNewBigramEntry(const int bigramTargetPos, const int probability, 294 int *const writingPos) { 295 // hasNext is false because we are adding a new bigram entry at the end of the bigram list. 296 const int probabilityToWrite = mIsDecayingDict ? 297 ForgettingCurveUtils::getUpdatedEncodedProbability(NOT_A_PROBABILITY, probability) : 298 probability; 299 return BigramListReadWriteUtils::createAndWriteBigramEntry(mBuffer, bigramTargetPos, 300 probabilityToWrite, false /* hasNext */, writingPos); 301 } 302 303 bool DynamicBigramListPolicy::removeBigram(const int bigramListPos, const int bigramTargetPos) { 304 const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(bigramListPos); 305 int pos = bigramListPos; 306 if (usesAdditionalBuffer) { 307 pos -= mBuffer->getOriginalBufferSize(); 308 } 309 BigramListReadWriteUtils::BigramFlags bigramFlags; 310 int bigramEntryCount = 0; 311 do { 312 if (++bigramEntryCount > BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT) { 313 AKLOGE("Too many bigram entries. Entry count: %d, Limit: %d", 314 bigramEntryCount, BIGRAM_ENTRY_COUNT_IN_A_BIGRAM_LIST_LIMIT); 315 ASSERT(false); 316 return false; 317 } 318 int bigramEntryPos = pos; 319 int originalBigramPos; 320 // The buffer address can be changed after calling buffer writing methods. 321 BigramListReadWriteUtils::getBigramEntryPropertiesAndAdvancePosition( 322 mBuffer->getBuffer(usesAdditionalBuffer), &bigramFlags, &originalBigramPos, &pos); 323 if (usesAdditionalBuffer) { 324 bigramEntryPos += mBuffer->getOriginalBufferSize(); 325 } 326 if (usesAdditionalBuffer && originalBigramPos != NOT_A_DICT_POS) { 327 originalBigramPos += mBuffer->getOriginalBufferSize(); 328 } 329 const int bigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos); 330 if (bigramPos != bigramTargetPos) { 331 continue; 332 } 333 // Target entry is found. Write an invalid target position to mark the bigram invalid. 334 return BigramListReadWriteUtils::writeBigramEntry(mBuffer, bigramFlags, 335 NOT_A_DICT_POS /* targetOffset */, &bigramEntryPos); 336 } while(BigramListReadWriteUtils::hasNext(bigramFlags)); 337 return false; 338 } 339 340 int DynamicBigramListPolicy::followBigramLinkAndGetCurrentBigramPtNodePos( 341 const int originalBigramPos) const { 342 if (originalBigramPos == NOT_A_DICT_POS) { 343 return NOT_A_DICT_POS; 344 } 345 int currentPos = originalBigramPos; 346 DynamicPatriciaTrieNodeReader nodeReader(mBuffer, this /* bigramsPolicy */, mShortcutPolicy); 347 nodeReader.fetchNodeInfoInBufferFromPtNodePos(currentPos); 348 int bigramLinkCount = 0; 349 while (nodeReader.getBigramLinkedNodePos() != NOT_A_DICT_POS) { 350 currentPos = nodeReader.getBigramLinkedNodePos(); 351 nodeReader.fetchNodeInfoInBufferFromPtNodePos(currentPos); 352 bigramLinkCount++; 353 if (bigramLinkCount > CONTINUING_BIGRAM_LINK_COUNT_LIMIT) { 354 AKLOGE("Bigram link is invalid. start position: %d", originalBigramPos); 355 ASSERT(false); 356 return NOT_A_DICT_POS; 357 } 358 } 359 return currentPos; 360 } 361 362 bool DynamicBigramListPolicy::updateProbabilityForDecay( 363 const BigramListReadWriteUtils::BigramFlags bigramFlags, const int targetPtNodePos, 364 int *const bigramEntryPos, bool *const outRemoved) const { 365 *outRemoved = false; 366 if (mIsDecayingDict) { 367 // Update bigram probability for decaying. 368 const int newProbability = ForgettingCurveUtils::getEncodedProbabilityToSave( 369 BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags), mHeaderPolicy); 370 if (ForgettingCurveUtils::isValidEncodedProbability(newProbability)) { 371 // Write new probability. 372 const BigramListReadWriteUtils::BigramFlags updatedBigramFlags = 373 BigramListReadWriteUtils::setProbabilityInFlags( 374 bigramFlags, newProbability); 375 if (!BigramListReadWriteUtils::writeBigramEntry(mBuffer, updatedBigramFlags, 376 targetPtNodePos, bigramEntryPos)) { 377 return false; 378 } 379 } else { 380 // Remove current bigram entry. 381 *outRemoved = true; 382 if (!BigramListReadWriteUtils::writeBigramEntry(mBuffer, bigramFlags, 383 NOT_A_DICT_POS /* targetPtNodePos */, bigramEntryPos)) { 384 return false; 385 } 386 } 387 } 388 return true; 389 } 390 391 } // namespace latinime 392