Home | History | Annotate | Download | only in bigram
      1 /*
      2  * Copyright (C) 2013 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 /*
     18  * !!!!! DO NOT CHANGE THE LOGIC IN THIS FILE !!!!!
     19  * Do not edit this file other than updating policy's interface.
     20  *
     21  * This file was generated from
     22  *   dictionary/structure/v4/bigram/ver4_bigram_list_policy.cpp
     23  */
     24 
     25 #include "dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.h"
     26 
     27 #include "dictionary/header/header_policy.h"
     28 #include "dictionary/property/ngram_property.h"
     29 #include "dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.h"
     30 #include "dictionary/structure/backward/v402/content/bigram_dict_content.h"
     31 #include "dictionary/structure/backward/v402/content/terminal_position_lookup_table.h"
     32 #include "dictionary/structure/backward/v402/ver4_dict_constants.h"
     33 #include "dictionary/utils/forgetting_curve_utils.h"
     34 
     35 namespace latinime {
     36 namespace backward {
     37 namespace v402 {
     38 
     39 void Ver4BigramListPolicy::getNextBigram(int *const outBigramPos, int *const outProbability,
     40         bool *const outHasNext, int *const bigramEntryPos) const {
     41     const BigramEntry bigramEntry =
     42             mBigramDictContent->getBigramEntryAndAdvancePosition(bigramEntryPos);
     43     if (outBigramPos) {
     44         // Lookup target PtNode position.
     45         *outBigramPos = mTerminalPositionLookupTable->getTerminalPtNodePosition(
     46                 bigramEntry.getTargetTerminalId());
     47     }
     48     if (outProbability) {
     49         if (bigramEntry.hasHistoricalInfo()) {
     50             *outProbability =
     51                     ForgettingCurveUtils::decodeProbability(bigramEntry.getHistoricalInfo(),
     52                             mHeaderPolicy);
     53         } else {
     54             *outProbability = bigramEntry.getProbability();
     55         }
     56     }
     57     if (outHasNext) {
     58         *outHasNext = bigramEntry.hasNext();
     59     }
     60 }
     61 
     62 bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTargetTerminalId,
     63         const NgramProperty *const ngramProperty, bool *const outAddedNewEntry) {
     64     // 1. The word has no bigrams yet.
     65     // 2. The word has bigrams, and there is the target in the list.
     66     // 3. The word has bigrams, and there is an invalid entry that can be reclaimed.
     67     // 4. The word has bigrams. We have to append new bigram entry to the list.
     68     // 5. Same as 4, but the list is the last entry of the content file.
     69     if (outAddedNewEntry) {
     70         *outAddedNewEntry = false;
     71     }
     72     const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
     73     if (bigramListPos == NOT_A_DICT_POS) {
     74         // Case 1. PtNode that doesn't have a bigram list.
     75         // Create new bigram list.
     76         if (!mBigramDictContent->createNewBigramList(terminalId)) {
     77             return false;
     78         }
     79         const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
     80                 newTargetTerminalId);
     81         const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(&newBigramEntry,
     82                 ngramProperty);
     83         // Write an entry.
     84         const int writingPos =  mBigramDictContent->getBigramListHeadPos(terminalId);
     85         if (!mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, writingPos)) {
     86             return false;
     87         }
     88         if (outAddedNewEntry) {
     89             *outAddedNewEntry = true;
     90         }
     91         return true;
     92     }
     93 
     94     int tailEntryPos = NOT_A_DICT_POS;
     95     const int entryPosToUpdate = getEntryPosToUpdate(newTargetTerminalId, bigramListPos,
     96             &tailEntryPos);
     97     if (tailEntryPos != NOT_A_DICT_POS || entryPosToUpdate == NOT_A_DICT_POS) {
     98         // Case 4, 5.
     99         // Add new entry to the bigram list.
    100         if (tailEntryPos == NOT_A_DICT_POS) {
    101             // Case 4. Create new bigram list.
    102             if (!mBigramDictContent->createNewBigramList(terminalId)) {
    103                 return false;
    104             }
    105             const int destPos = mBigramDictContent->getBigramListHeadPos(terminalId);
    106             // Copy existing bigram list.
    107             if (!mBigramDictContent->copyBigramList(bigramListPos, destPos, &tailEntryPos)) {
    108                 return false;
    109             }
    110         }
    111         // Write new entry at the tail position of the bigram content.
    112         const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
    113                 newTargetTerminalId);
    114         const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(
    115                 &newBigramEntry, ngramProperty);
    116         if (!mBigramDictContent->writeBigramEntryAtTail(&bigramEntryToWrite)) {
    117             return false;
    118         }
    119         // Update has next flag of the tail entry.
    120         if (!updateHasNextFlag(true /* hasNext */, tailEntryPos)) {
    121             return false;
    122         }
    123         if (outAddedNewEntry) {
    124             *outAddedNewEntry = true;
    125         }
    126         return true;
    127     }
    128 
    129     // Case 2. Overwrite the existing entry. Case 3. Reclaim and reuse the existing invalid entry.
    130     const BigramEntry originalBigramEntry = mBigramDictContent->getBigramEntry(entryPosToUpdate);
    131     if (!originalBigramEntry.isValid()) {
    132         // Case 3. Reuse the existing invalid entry. outAddedNewEntry is false when an existing
    133         // entry is updated.
    134         if (outAddedNewEntry) {
    135             *outAddedNewEntry = true;
    136         }
    137     }
    138     const BigramEntry updatedBigramEntry =
    139             originalBigramEntry.updateTargetTerminalIdAndGetEntry(newTargetTerminalId);
    140     const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(
    141             &updatedBigramEntry, ngramProperty);
    142     return mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, entryPosToUpdate);
    143 }
    144 
    145 bool Ver4BigramListPolicy::removeEntry(const int terminalId, const int targetTerminalId) {
    146     const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
    147     if (bigramListPos == NOT_A_DICT_POS) {
    148         // Bigram list doesn't exist.
    149         return false;
    150     }
    151     const int entryPosToUpdate = getEntryPosToUpdate(targetTerminalId, bigramListPos,
    152             nullptr /* outTailEntryPos */);
    153     if (entryPosToUpdate == NOT_A_DICT_POS) {
    154         // Bigram entry doesn't exist.
    155         return false;
    156     }
    157     const BigramEntry bigramEntry = mBigramDictContent->getBigramEntry(entryPosToUpdate);
    158     if (targetTerminalId != bigramEntry.getTargetTerminalId()) {
    159         // Bigram entry doesn't exist.
    160         return false;
    161     }
    162     // Remove bigram entry by marking it as invalid entry and overwriting the original entry.
    163     const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry();
    164     return mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPosToUpdate);
    165 }
    166 
    167 bool Ver4BigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(const int terminalId,
    168         int *const outBigramCount) {
    169     const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
    170     if (bigramListPos == NOT_A_DICT_POS) {
    171         // Bigram list doesn't exist.
    172         return true;
    173     }
    174     bool hasNext = true;
    175     int readingPos = bigramListPos;
    176     while (hasNext) {
    177         const int entryPos = readingPos;
    178         const BigramEntry bigramEntry =
    179                 mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
    180         hasNext = bigramEntry.hasNext();
    181         if (!bigramEntry.isValid()) {
    182             continue;
    183         }
    184         const int targetPtNodePos = mTerminalPositionLookupTable->getTerminalPtNodePosition(
    185                 bigramEntry.getTargetTerminalId());
    186         if (targetPtNodePos == NOT_A_DICT_POS) {
    187             // Invalidate bigram entry.
    188             const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry();
    189             if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) {
    190                 return false;
    191             }
    192         } else if (bigramEntry.hasHistoricalInfo()) {
    193             const HistoricalInfo historicalInfo = ForgettingCurveUtils::createHistoricalInfoToSave(
    194                     bigramEntry.getHistoricalInfo(), mHeaderPolicy);
    195             if (ForgettingCurveUtils::needsToKeep(&historicalInfo, mHeaderPolicy)) {
    196                 const BigramEntry updatedBigramEntry =
    197                         bigramEntry.updateHistoricalInfoAndGetEntry(&historicalInfo);
    198                 if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) {
    199                     return false;
    200                 }
    201                 *outBigramCount += 1;
    202             } else {
    203                 // Remove entry.
    204                 const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry();
    205                 if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) {
    206                     return false;
    207                 }
    208             }
    209         } else {
    210             *outBigramCount += 1;
    211         }
    212     }
    213     return true;
    214 }
    215 
    216 int Ver4BigramListPolicy::getBigramEntryConut(const int terminalId) {
    217     const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
    218     if (bigramListPos == NOT_A_DICT_POS) {
    219         // Bigram list doesn't exist.
    220         return 0;
    221     }
    222     int bigramCount = 0;
    223     bool hasNext = true;
    224     int readingPos = bigramListPos;
    225     while (hasNext) {
    226         const BigramEntry bigramEntry =
    227                 mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
    228         hasNext = bigramEntry.hasNext();
    229         if (bigramEntry.isValid()) {
    230             bigramCount++;
    231         }
    232     }
    233     return bigramCount;
    234 }
    235 
    236 int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind,
    237         const int bigramListPos, int *const outTailEntryPos) const {
    238     if (outTailEntryPos) {
    239         *outTailEntryPos = NOT_A_DICT_POS;
    240     }
    241     bool hasNext = true;
    242     int invalidEntryPos = NOT_A_DICT_POS;
    243     int readingPos = bigramListPos;
    244     while (hasNext) {
    245         const int entryPos = readingPos;
    246         const BigramEntry bigramEntry =
    247                 mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
    248         hasNext = bigramEntry.hasNext();
    249         if (bigramEntry.getTargetTerminalId() == targetTerminalIdToFind) {
    250             // Entry with same target is found.
    251             return entryPos;
    252         } else if (!bigramEntry.isValid()) {
    253             // Invalid entry that can be reused is found.
    254             invalidEntryPos = entryPos;
    255         }
    256         if (!hasNext && mBigramDictContent->isContentTailPos(readingPos)) {
    257             if (outTailEntryPos) {
    258                 *outTailEntryPos = entryPos;
    259             }
    260         }
    261     }
    262     return invalidEntryPos;
    263 }
    264 
    265 const BigramEntry Ver4BigramListPolicy::createUpdatedBigramEntryFrom(
    266         const BigramEntry *const originalBigramEntry,
    267         const NgramProperty *const ngramProperty) const {
    268     // TODO: Consolidate historical info and probability.
    269     if (mHeaderPolicy->hasHistoricalInfoOfWords()) {
    270         const HistoricalInfo &historicalInfoForUpdate = ngramProperty->getHistoricalInfo();
    271         const HistoricalInfo updatedHistoricalInfo =
    272                 ForgettingCurveUtils::createUpdatedHistoricalInfo(
    273                         originalBigramEntry->getHistoricalInfo(), ngramProperty->getProbability(),
    274                         &historicalInfoForUpdate, mHeaderPolicy);
    275         return originalBigramEntry->updateHistoricalInfoAndGetEntry(&updatedHistoricalInfo);
    276     } else {
    277         return originalBigramEntry->updateProbabilityAndGetEntry(ngramProperty->getProbability());
    278     }
    279 }
    280 
    281 bool Ver4BigramListPolicy::updateHasNextFlag(const bool hasNext, const int bigramEntryPos) {
    282     const BigramEntry bigramEntry = mBigramDictContent->getBigramEntry(bigramEntryPos);
    283     const BigramEntry updatedBigramEntry = bigramEntry.updateHasNextAndGetEntry(hasNext);
    284     return mBigramDictContent->writeBigramEntry(&updatedBigramEntry, bigramEntryPos);
    285 }
    286 
    287 } // namespace v402
    288 } // namespace backward
    289 } // namespace latinime
    290