1 /* 2 * Copyright (C) 2013 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 /* 18 * !!!!! DO NOT CHANGE THE LOGIC IN THIS FILE !!!!! 19 * Do not edit this file other than updating policy's interface. 20 * 21 * This file was generated from 22 * dictionary/structure/v4/bigram/ver4_bigram_list_policy.cpp 23 */ 24 25 #include "dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.h" 26 27 #include "dictionary/header/header_policy.h" 28 #include "dictionary/property/ngram_property.h" 29 #include "dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.h" 30 #include "dictionary/structure/backward/v402/content/bigram_dict_content.h" 31 #include "dictionary/structure/backward/v402/content/terminal_position_lookup_table.h" 32 #include "dictionary/structure/backward/v402/ver4_dict_constants.h" 33 #include "dictionary/utils/forgetting_curve_utils.h" 34 35 namespace latinime { 36 namespace backward { 37 namespace v402 { 38 39 void Ver4BigramListPolicy::getNextBigram(int *const outBigramPos, int *const outProbability, 40 bool *const outHasNext, int *const bigramEntryPos) const { 41 const BigramEntry bigramEntry = 42 mBigramDictContent->getBigramEntryAndAdvancePosition(bigramEntryPos); 43 if (outBigramPos) { 44 // Lookup target PtNode position. 45 *outBigramPos = mTerminalPositionLookupTable->getTerminalPtNodePosition( 46 bigramEntry.getTargetTerminalId()); 47 } 48 if (outProbability) { 49 if (bigramEntry.hasHistoricalInfo()) { 50 *outProbability = 51 ForgettingCurveUtils::decodeProbability(bigramEntry.getHistoricalInfo(), 52 mHeaderPolicy); 53 } else { 54 *outProbability = bigramEntry.getProbability(); 55 } 56 } 57 if (outHasNext) { 58 *outHasNext = bigramEntry.hasNext(); 59 } 60 } 61 62 bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTargetTerminalId, 63 const NgramProperty *const ngramProperty, bool *const outAddedNewEntry) { 64 // 1. The word has no bigrams yet. 65 // 2. The word has bigrams, and there is the target in the list. 66 // 3. The word has bigrams, and there is an invalid entry that can be reclaimed. 67 // 4. The word has bigrams. We have to append new bigram entry to the list. 68 // 5. Same as 4, but the list is the last entry of the content file. 69 if (outAddedNewEntry) { 70 *outAddedNewEntry = false; 71 } 72 const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId); 73 if (bigramListPos == NOT_A_DICT_POS) { 74 // Case 1. PtNode that doesn't have a bigram list. 75 // Create new bigram list. 76 if (!mBigramDictContent->createNewBigramList(terminalId)) { 77 return false; 78 } 79 const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY, 80 newTargetTerminalId); 81 const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(&newBigramEntry, 82 ngramProperty); 83 // Write an entry. 84 const int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId); 85 if (!mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, writingPos)) { 86 return false; 87 } 88 if (outAddedNewEntry) { 89 *outAddedNewEntry = true; 90 } 91 return true; 92 } 93 94 int tailEntryPos = NOT_A_DICT_POS; 95 const int entryPosToUpdate = getEntryPosToUpdate(newTargetTerminalId, bigramListPos, 96 &tailEntryPos); 97 if (tailEntryPos != NOT_A_DICT_POS || entryPosToUpdate == NOT_A_DICT_POS) { 98 // Case 4, 5. 99 // Add new entry to the bigram list. 100 if (tailEntryPos == NOT_A_DICT_POS) { 101 // Case 4. Create new bigram list. 102 if (!mBigramDictContent->createNewBigramList(terminalId)) { 103 return false; 104 } 105 const int destPos = mBigramDictContent->getBigramListHeadPos(terminalId); 106 // Copy existing bigram list. 107 if (!mBigramDictContent->copyBigramList(bigramListPos, destPos, &tailEntryPos)) { 108 return false; 109 } 110 } 111 // Write new entry at the tail position of the bigram content. 112 const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY, 113 newTargetTerminalId); 114 const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom( 115 &newBigramEntry, ngramProperty); 116 if (!mBigramDictContent->writeBigramEntryAtTail(&bigramEntryToWrite)) { 117 return false; 118 } 119 // Update has next flag of the tail entry. 120 if (!updateHasNextFlag(true /* hasNext */, tailEntryPos)) { 121 return false; 122 } 123 if (outAddedNewEntry) { 124 *outAddedNewEntry = true; 125 } 126 return true; 127 } 128 129 // Case 2. Overwrite the existing entry. Case 3. Reclaim and reuse the existing invalid entry. 130 const BigramEntry originalBigramEntry = mBigramDictContent->getBigramEntry(entryPosToUpdate); 131 if (!originalBigramEntry.isValid()) { 132 // Case 3. Reuse the existing invalid entry. outAddedNewEntry is false when an existing 133 // entry is updated. 134 if (outAddedNewEntry) { 135 *outAddedNewEntry = true; 136 } 137 } 138 const BigramEntry updatedBigramEntry = 139 originalBigramEntry.updateTargetTerminalIdAndGetEntry(newTargetTerminalId); 140 const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom( 141 &updatedBigramEntry, ngramProperty); 142 return mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, entryPosToUpdate); 143 } 144 145 bool Ver4BigramListPolicy::removeEntry(const int terminalId, const int targetTerminalId) { 146 const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId); 147 if (bigramListPos == NOT_A_DICT_POS) { 148 // Bigram list doesn't exist. 149 return false; 150 } 151 const int entryPosToUpdate = getEntryPosToUpdate(targetTerminalId, bigramListPos, 152 nullptr /* outTailEntryPos */); 153 if (entryPosToUpdate == NOT_A_DICT_POS) { 154 // Bigram entry doesn't exist. 155 return false; 156 } 157 const BigramEntry bigramEntry = mBigramDictContent->getBigramEntry(entryPosToUpdate); 158 if (targetTerminalId != bigramEntry.getTargetTerminalId()) { 159 // Bigram entry doesn't exist. 160 return false; 161 } 162 // Remove bigram entry by marking it as invalid entry and overwriting the original entry. 163 const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry(); 164 return mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPosToUpdate); 165 } 166 167 bool Ver4BigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(const int terminalId, 168 int *const outBigramCount) { 169 const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId); 170 if (bigramListPos == NOT_A_DICT_POS) { 171 // Bigram list doesn't exist. 172 return true; 173 } 174 bool hasNext = true; 175 int readingPos = bigramListPos; 176 while (hasNext) { 177 const int entryPos = readingPos; 178 const BigramEntry bigramEntry = 179 mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos); 180 hasNext = bigramEntry.hasNext(); 181 if (!bigramEntry.isValid()) { 182 continue; 183 } 184 const int targetPtNodePos = mTerminalPositionLookupTable->getTerminalPtNodePosition( 185 bigramEntry.getTargetTerminalId()); 186 if (targetPtNodePos == NOT_A_DICT_POS) { 187 // Invalidate bigram entry. 188 const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry(); 189 if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) { 190 return false; 191 } 192 } else if (bigramEntry.hasHistoricalInfo()) { 193 const HistoricalInfo historicalInfo = ForgettingCurveUtils::createHistoricalInfoToSave( 194 bigramEntry.getHistoricalInfo(), mHeaderPolicy); 195 if (ForgettingCurveUtils::needsToKeep(&historicalInfo, mHeaderPolicy)) { 196 const BigramEntry updatedBigramEntry = 197 bigramEntry.updateHistoricalInfoAndGetEntry(&historicalInfo); 198 if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) { 199 return false; 200 } 201 *outBigramCount += 1; 202 } else { 203 // Remove entry. 204 const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry(); 205 if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) { 206 return false; 207 } 208 } 209 } else { 210 *outBigramCount += 1; 211 } 212 } 213 return true; 214 } 215 216 int Ver4BigramListPolicy::getBigramEntryConut(const int terminalId) { 217 const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId); 218 if (bigramListPos == NOT_A_DICT_POS) { 219 // Bigram list doesn't exist. 220 return 0; 221 } 222 int bigramCount = 0; 223 bool hasNext = true; 224 int readingPos = bigramListPos; 225 while (hasNext) { 226 const BigramEntry bigramEntry = 227 mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos); 228 hasNext = bigramEntry.hasNext(); 229 if (bigramEntry.isValid()) { 230 bigramCount++; 231 } 232 } 233 return bigramCount; 234 } 235 236 int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind, 237 const int bigramListPos, int *const outTailEntryPos) const { 238 if (outTailEntryPos) { 239 *outTailEntryPos = NOT_A_DICT_POS; 240 } 241 bool hasNext = true; 242 int invalidEntryPos = NOT_A_DICT_POS; 243 int readingPos = bigramListPos; 244 while (hasNext) { 245 const int entryPos = readingPos; 246 const BigramEntry bigramEntry = 247 mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos); 248 hasNext = bigramEntry.hasNext(); 249 if (bigramEntry.getTargetTerminalId() == targetTerminalIdToFind) { 250 // Entry with same target is found. 251 return entryPos; 252 } else if (!bigramEntry.isValid()) { 253 // Invalid entry that can be reused is found. 254 invalidEntryPos = entryPos; 255 } 256 if (!hasNext && mBigramDictContent->isContentTailPos(readingPos)) { 257 if (outTailEntryPos) { 258 *outTailEntryPos = entryPos; 259 } 260 } 261 } 262 return invalidEntryPos; 263 } 264 265 const BigramEntry Ver4BigramListPolicy::createUpdatedBigramEntryFrom( 266 const BigramEntry *const originalBigramEntry, 267 const NgramProperty *const ngramProperty) const { 268 // TODO: Consolidate historical info and probability. 269 if (mHeaderPolicy->hasHistoricalInfoOfWords()) { 270 const HistoricalInfo &historicalInfoForUpdate = ngramProperty->getHistoricalInfo(); 271 const HistoricalInfo updatedHistoricalInfo = 272 ForgettingCurveUtils::createUpdatedHistoricalInfo( 273 originalBigramEntry->getHistoricalInfo(), ngramProperty->getProbability(), 274 &historicalInfoForUpdate, mHeaderPolicy); 275 return originalBigramEntry->updateHistoricalInfoAndGetEntry(&updatedHistoricalInfo); 276 } else { 277 return originalBigramEntry->updateProbabilityAndGetEntry(ngramProperty->getProbability()); 278 } 279 } 280 281 bool Ver4BigramListPolicy::updateHasNextFlag(const bool hasNext, const int bigramEntryPos) { 282 const BigramEntry bigramEntry = mBigramDictContent->getBigramEntry(bigramEntryPos); 283 const BigramEntry updatedBigramEntry = bigramEntry.updateHasNextAndGetEntry(hasNext); 284 return mBigramDictContent->writeBigramEntry(&updatedBigramEntry, bigramEntryPos); 285 } 286 287 } // namespace v402 288 } // namespace backward 289 } // namespace latinime 290