Home | History | Annotate | Download | only in bigram
      1 /*
      2  * Copyright (C) 2013 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "suggest/policyimpl/dictionary/structure/v4/bigram/ver4_bigram_list_policy.h"
     18 
     19 #include "suggest/core/dictionary/property/bigram_property.h"
     20 #include "suggest/policyimpl/dictionary/header/header_policy.h"
     21 #include "suggest/policyimpl/dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.h"
     22 #include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h"
     23 #include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
     24 #include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
     25 #include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
     26 
     27 namespace latinime {
     28 
     29 void Ver4BigramListPolicy::getNextBigram(int *const outBigramPos, int *const outProbability,
     30         bool *const outHasNext, int *const bigramEntryPos) const {
     31     const BigramEntry bigramEntry =
     32             mBigramDictContent->getBigramEntryAndAdvancePosition(bigramEntryPos);
     33     if (outBigramPos) {
     34         // Lookup target PtNode position.
     35         *outBigramPos = mTerminalPositionLookupTable->getTerminalPtNodePosition(
     36                 bigramEntry.getTargetTerminalId());
     37     }
     38     if (outProbability) {
     39         if (bigramEntry.hasHistoricalInfo()) {
     40             *outProbability =
     41                     ForgettingCurveUtils::decodeProbability(bigramEntry.getHistoricalInfo(),
     42                             mHeaderPolicy);
     43         } else {
     44             *outProbability = bigramEntry.getProbability();
     45         }
     46     }
     47     if (outHasNext) {
     48         *outHasNext = bigramEntry.hasNext();
     49     }
     50 }
     51 
     52 bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTargetTerminalId,
     53         const BigramProperty *const bigramProperty, bool *const outAddedNewEntry) {
     54     // 1. The word has no bigrams yet.
     55     // 2. The word has bigrams, and there is the target in the list.
     56     // 3. The word has bigrams, and there is an invalid entry that can be reclaimed.
     57     // 4. The word has bigrams. We have to append new bigram entry to the list.
     58     // 5. Same as 4, but the list is the last entry of the content file.
     59     if (outAddedNewEntry) {
     60         *outAddedNewEntry = false;
     61     }
     62     const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
     63     if (bigramListPos == NOT_A_DICT_POS) {
     64         // Case 1. PtNode that doesn't have a bigram list.
     65         // Create new bigram list.
     66         if (!mBigramDictContent->createNewBigramList(terminalId)) {
     67             return false;
     68         }
     69         const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
     70                 newTargetTerminalId);
     71         const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(&newBigramEntry,
     72                 bigramProperty);
     73         // Write an entry.
     74         int writingPos =  mBigramDictContent->getBigramListHeadPos(terminalId);
     75         if (!mBigramDictContent->writeBigramEntryAndAdvancePosition(&bigramEntryToWrite,
     76                 &writingPos)) {
     77             AKLOGE("Cannot write bigram entry. pos: %d.", writingPos);
     78             return false;
     79         }
     80         if (!mBigramDictContent->writeTerminator(writingPos)) {
     81             AKLOGE("Cannot write bigram list terminator. pos: %d.", writingPos);
     82             return false;
     83         }
     84         if (outAddedNewEntry) {
     85             *outAddedNewEntry = true;
     86         }
     87         return true;
     88     }
     89 
     90     int tailEntryPos = NOT_A_DICT_POS;
     91     const int entryPosToUpdate = getEntryPosToUpdate(newTargetTerminalId, bigramListPos,
     92             &tailEntryPos);
     93     if (entryPosToUpdate == NOT_A_DICT_POS) {
     94         // Case 4, 5. Add new entry to the bigram list.
     95         const int contentTailPos = mBigramDictContent->getContentTailPos();
     96         // If the tail entry is at the tail of content buffer, the new entry can be written without
     97         // link (Case 5).
     98         const bool canAppendEntry =
     99                 contentTailPos == tailEntryPos + mBigramDictContent->getBigramEntrySize();
    100         const int newEntryPos = canAppendEntry ? tailEntryPos : contentTailPos;
    101         int writingPos = newEntryPos;
    102         // Write new entry at the tail position of the bigram content.
    103         const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
    104                 newTargetTerminalId);
    105         const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(
    106                 &newBigramEntry, bigramProperty);
    107         if (!mBigramDictContent->writeBigramEntryAndAdvancePosition(&bigramEntryToWrite,
    108                 &writingPos)) {
    109             AKLOGE("Cannot write bigram entry. pos: %d.", writingPos);
    110             return false;
    111         }
    112         if (!mBigramDictContent->writeTerminator(writingPos)) {
    113             AKLOGE("Cannot write bigram list terminator. pos: %d.", writingPos);
    114             return false;
    115         }
    116         if (!canAppendEntry) {
    117             // Update link of the current tail entry.
    118             if (!mBigramDictContent->writeLink(newEntryPos, tailEntryPos)) {
    119                 AKLOGE("Cannot update bigram entry link. pos: %d, linked entry pos: %d.",
    120                         tailEntryPos, newEntryPos);
    121                 return false;
    122             }
    123         }
    124         if (outAddedNewEntry) {
    125             *outAddedNewEntry = true;
    126         }
    127         return true;
    128     }
    129 
    130     // Case 2. Overwrite the existing entry. Case 3. Reclaim and reuse the existing invalid entry.
    131     const BigramEntry originalBigramEntry = mBigramDictContent->getBigramEntry(entryPosToUpdate);
    132     if (!originalBigramEntry.isValid()) {
    133         // Case 3. Reuse the existing invalid entry. outAddedNewEntry is false when an existing
    134         // entry is updated.
    135         if (outAddedNewEntry) {
    136             *outAddedNewEntry = true;
    137         }
    138     }
    139     const BigramEntry updatedBigramEntry =
    140             originalBigramEntry.updateTargetTerminalIdAndGetEntry(newTargetTerminalId);
    141     const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(
    142             &updatedBigramEntry, bigramProperty);
    143     return mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, entryPosToUpdate);
    144 }
    145 
    146 bool Ver4BigramListPolicy::removeEntry(const int terminalId, const int targetTerminalId) {
    147     const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
    148     if (bigramListPos == NOT_A_DICT_POS) {
    149         // Bigram list doesn't exist.
    150         return false;
    151     }
    152     const int entryPosToUpdate = getEntryPosToUpdate(targetTerminalId, bigramListPos,
    153             nullptr /* outTailEntryPos */);
    154     if (entryPosToUpdate == NOT_A_DICT_POS) {
    155         // Bigram entry doesn't exist.
    156         return false;
    157     }
    158     const BigramEntry bigramEntry = mBigramDictContent->getBigramEntry(entryPosToUpdate);
    159     if (targetTerminalId != bigramEntry.getTargetTerminalId()) {
    160         // Bigram entry doesn't exist.
    161         return false;
    162     }
    163     // Remove bigram entry by marking it as invalid entry and overwriting the original entry.
    164     const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry();
    165     return mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPosToUpdate);
    166 }
    167 
    168 bool Ver4BigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(const int terminalId,
    169         int *const outBigramCount) {
    170     const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
    171     if (bigramListPos == NOT_A_DICT_POS) {
    172         // Bigram list doesn't exist.
    173         return true;
    174     }
    175     bool hasNext = true;
    176     int readingPos = bigramListPos;
    177     while (hasNext) {
    178         const BigramEntry bigramEntry =
    179                 mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
    180         const int entryPos = readingPos - mBigramDictContent->getBigramEntrySize();
    181         hasNext = bigramEntry.hasNext();
    182         if (!bigramEntry.isValid()) {
    183             continue;
    184         }
    185         const int targetPtNodePos = mTerminalPositionLookupTable->getTerminalPtNodePosition(
    186                 bigramEntry.getTargetTerminalId());
    187         if (targetPtNodePos == NOT_A_DICT_POS) {
    188             // Invalidate bigram entry.
    189             const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry();
    190             if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) {
    191                 return false;
    192             }
    193         } else if (bigramEntry.hasHistoricalInfo()) {
    194             const HistoricalInfo historicalInfo = ForgettingCurveUtils::createHistoricalInfoToSave(
    195                     bigramEntry.getHistoricalInfo(), mHeaderPolicy);
    196             if (ForgettingCurveUtils::needsToKeep(&historicalInfo, mHeaderPolicy)) {
    197                 const BigramEntry updatedBigramEntry =
    198                         bigramEntry.updateHistoricalInfoAndGetEntry(&historicalInfo);
    199                 if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) {
    200                     return false;
    201                 }
    202                 *outBigramCount += 1;
    203             } else {
    204                 // Remove entry.
    205                 const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry();
    206                 if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) {
    207                     return false;
    208                 }
    209             }
    210         } else {
    211             *outBigramCount += 1;
    212         }
    213     }
    214     return true;
    215 }
    216 
    217 int Ver4BigramListPolicy::getBigramEntryConut(const int terminalId) {
    218     const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId);
    219     if (bigramListPos == NOT_A_DICT_POS) {
    220         // Bigram list doesn't exist.
    221         return 0;
    222     }
    223     int bigramCount = 0;
    224     bool hasNext = true;
    225     int readingPos = bigramListPos;
    226     while (hasNext) {
    227         const BigramEntry bigramEntry =
    228                 mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
    229         hasNext = bigramEntry.hasNext();
    230         if (bigramEntry.isValid()) {
    231             bigramCount++;
    232         }
    233     }
    234     return bigramCount;
    235 }
    236 
    237 int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind,
    238         const int bigramListPos, int *const outTailEntryPos) const {
    239     if (outTailEntryPos) {
    240         *outTailEntryPos = NOT_A_DICT_POS;
    241     }
    242     int invalidEntryPos = NOT_A_DICT_POS;
    243     int readingPos = bigramListPos;
    244     while (true) {
    245         const BigramEntry bigramEntry =
    246                 mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
    247         const int entryPos = readingPos - mBigramDictContent->getBigramEntrySize();
    248         if (!bigramEntry.hasNext()) {
    249             if (outTailEntryPos) {
    250                 *outTailEntryPos = entryPos;
    251             }
    252             break;
    253         }
    254         if (bigramEntry.getTargetTerminalId() == targetTerminalIdToFind) {
    255             // Entry with same target is found.
    256             return entryPos;
    257         } else if (!bigramEntry.isValid()) {
    258             // Invalid entry that can be reused is found.
    259             invalidEntryPos = entryPos;
    260         }
    261     }
    262     return invalidEntryPos;
    263 }
    264 
    265 const BigramEntry Ver4BigramListPolicy::createUpdatedBigramEntryFrom(
    266         const BigramEntry *const originalBigramEntry,
    267         const BigramProperty *const bigramProperty) const {
    268     // TODO: Consolidate historical info and probability.
    269     if (mHeaderPolicy->hasHistoricalInfoOfWords()) {
    270         const HistoricalInfo historicalInfoForUpdate(bigramProperty->getTimestamp(),
    271                 bigramProperty->getLevel(), bigramProperty->getCount());
    272         const HistoricalInfo updatedHistoricalInfo =
    273                 ForgettingCurveUtils::createUpdatedHistoricalInfo(
    274                         originalBigramEntry->getHistoricalInfo(), bigramProperty->getProbability(),
    275                         &historicalInfoForUpdate, mHeaderPolicy);
    276         return originalBigramEntry->updateHistoricalInfoAndGetEntry(&updatedHistoricalInfo);
    277     } else {
    278         return originalBigramEntry->updateProbabilityAndGetEntry(bigramProperty->getProbability());
    279     }
    280 }
    281 
    282 } // namespace latinime
    283