Home | History | Annotate | Download | only in content
      1 /*
      2  * Copyright (C) 2013 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h"
     18 
     19 #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
     20 
     21 namespace latinime {
     22 
     23 const int BigramDictContent::INVALID_LINKED_ENTRY_POS = Ver4DictConstants::NOT_A_TERMINAL_ID;
     24 
     25 const BigramEntry BigramDictContent::getBigramEntryAndAdvancePosition(
     26         int *const bigramEntryPos) const {
     27     const BufferWithExtendableBuffer *const bigramListBuffer = getContentBuffer();
     28     const int bigramEntryTailPos = (*bigramEntryPos) + getBigramEntrySize();
     29     if (*bigramEntryPos < 0 || bigramEntryTailPos > bigramListBuffer->getTailPosition()) {
     30         AKLOGE("Invalid bigram entry position. bigramEntryPos: %d, bigramEntryTailPos: %d, "
     31                 "bufSize: %d", *bigramEntryPos, bigramEntryTailPos,
     32                         bigramListBuffer->getTailPosition());
     33         ASSERT(false);
     34         return BigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
     35                 Ver4DictConstants::NOT_A_TERMINAL_ID);
     36     }
     37     const int bigramFlags = bigramListBuffer->readUintAndAdvancePosition(
     38             Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, bigramEntryPos);
     39     const bool isLink = (bigramFlags & Ver4DictConstants::BIGRAM_IS_LINK_MASK) != 0;
     40     int probability = NOT_A_PROBABILITY;
     41     int timestamp = NOT_A_TIMESTAMP;
     42     int level = 0;
     43     int count = 0;
     44     if (mHasHistoricalInfo) {
     45         timestamp = bigramListBuffer->readUintAndAdvancePosition(
     46                 Ver4DictConstants::TIME_STAMP_FIELD_SIZE, bigramEntryPos);
     47         level = bigramListBuffer->readUintAndAdvancePosition(
     48                 Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, bigramEntryPos);
     49         count = bigramListBuffer->readUintAndAdvancePosition(
     50                 Ver4DictConstants::WORD_COUNT_FIELD_SIZE, bigramEntryPos);
     51     } else {
     52         probability = bigramListBuffer->readUintAndAdvancePosition(
     53                 Ver4DictConstants::PROBABILITY_SIZE, bigramEntryPos);
     54     }
     55     const int encodedTargetTerminalId = bigramListBuffer->readUintAndAdvancePosition(
     56             Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, bigramEntryPos);
     57     const int targetTerminalId =
     58             (encodedTargetTerminalId == Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID) ?
     59                     Ver4DictConstants::NOT_A_TERMINAL_ID : encodedTargetTerminalId;
     60     if (isLink) {
     61         const int linkedEntryPos = targetTerminalId;
     62         if (linkedEntryPos == INVALID_LINKED_ENTRY_POS) {
     63             // Bigram list terminator is found.
     64             return BigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
     65                     Ver4DictConstants::NOT_A_TERMINAL_ID);
     66         }
     67         *bigramEntryPos = linkedEntryPos;
     68         return getBigramEntryAndAdvancePosition(bigramEntryPos);
     69     }
     70     // hasNext is always true because we should continue to read the next entry until the terminator
     71     // is found.
     72     if (mHasHistoricalInfo) {
     73         const HistoricalInfo historicalInfo(timestamp, level, count);
     74         return BigramEntry(true /* hasNext */, probability, &historicalInfo, targetTerminalId);
     75     } else {
     76         return BigramEntry(true /* hasNext */, probability, targetTerminalId);
     77     }
     78 }
     79 
     80 bool BigramDictContent::writeBigramEntryAndAdvancePosition(
     81         const BigramEntry *const bigramEntryToWrite, int *const entryWritingPos) {
     82     return writeBigramEntryAttributesAndAdvancePosition(false /* isLink */,
     83             bigramEntryToWrite->getProbability(), bigramEntryToWrite->getTargetTerminalId(),
     84             bigramEntryToWrite->getHistoricalInfo()->getTimeStamp(),
     85             bigramEntryToWrite->getHistoricalInfo()->getLevel(),
     86             bigramEntryToWrite->getHistoricalInfo()->getCount(),
     87             entryWritingPos);
     88 }
     89 
     90 bool BigramDictContent::writeBigramEntryAttributesAndAdvancePosition(
     91         const bool isLink, const int probability, const int targetTerminalId,
     92         const int timestamp, const int level, const int count, int *const entryWritingPos) {
     93     BufferWithExtendableBuffer *const bigramListBuffer = getWritableContentBuffer();
     94     const int bigramFlags = isLink ? Ver4DictConstants::BIGRAM_IS_LINK_MASK : 0;
     95     if (!bigramListBuffer->writeUintAndAdvancePosition(bigramFlags,
     96             Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, entryWritingPos)) {
     97         AKLOGE("Cannot write bigram flags. pos: %d, flags: %x", *entryWritingPos, bigramFlags);
     98         return false;
     99     }
    100     if (mHasHistoricalInfo) {
    101         if (!bigramListBuffer->writeUintAndAdvancePosition(timestamp,
    102                 Ver4DictConstants::TIME_STAMP_FIELD_SIZE, entryWritingPos)) {
    103             AKLOGE("Cannot write bigram timestamps. pos: %d, timestamp: %d", *entryWritingPos,
    104                     timestamp);
    105             return false;
    106         }
    107         if (!bigramListBuffer->writeUintAndAdvancePosition(level,
    108                 Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, entryWritingPos)) {
    109             AKLOGE("Cannot write bigram level. pos: %d, level: %d", *entryWritingPos,
    110                     level);
    111             return false;
    112         }
    113         if (!bigramListBuffer->writeUintAndAdvancePosition(count,
    114                 Ver4DictConstants::WORD_COUNT_FIELD_SIZE, entryWritingPos)) {
    115             AKLOGE("Cannot write bigram count. pos: %d, count: %d", *entryWritingPos,
    116                     count);
    117             return false;
    118         }
    119     } else {
    120         if (!bigramListBuffer->writeUintAndAdvancePosition(probability,
    121                 Ver4DictConstants::PROBABILITY_SIZE, entryWritingPos)) {
    122             AKLOGE("Cannot write bigram probability. pos: %d, probability: %d", *entryWritingPos,
    123                     probability);
    124             return false;
    125         }
    126     }
    127     const int targetTerminalIdToWrite = (targetTerminalId == Ver4DictConstants::NOT_A_TERMINAL_ID) ?
    128             Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID : targetTerminalId;
    129     if (!bigramListBuffer->writeUintAndAdvancePosition(targetTerminalIdToWrite,
    130             Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, entryWritingPos)) {
    131         AKLOGE("Cannot write bigram target terminal id. pos: %d, target terminal id: %d",
    132                 *entryWritingPos, targetTerminalId);
    133         return false;
    134     }
    135     return true;
    136 }
    137 
    138 bool BigramDictContent::writeLink(const int linkedEntryPos, const int writingPos) {
    139     const int targetTerminalId = linkedEntryPos;
    140     int pos = writingPos;
    141     return writeBigramEntryAttributesAndAdvancePosition(true /* isLink */,
    142             NOT_A_PROBABILITY /* probability */, targetTerminalId, NOT_A_TIMESTAMP, 0 /* level */,
    143             0 /* count */, &pos);
    144 }
    145 
    146 bool BigramDictContent::runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
    147         const BigramDictContent *const originalBigramDictContent,
    148         int *const outBigramEntryCount) {
    149     for (TerminalPositionLookupTable::TerminalIdMap::const_iterator it = terminalIdMap->begin();
    150             it != terminalIdMap->end(); ++it) {
    151         const int originalBigramListPos =
    152                 originalBigramDictContent->getBigramListHeadPos(it->first);
    153         if (originalBigramListPos == NOT_A_DICT_POS) {
    154             // This terminal does not have a bigram list.
    155             continue;
    156         }
    157         const int bigramListPos = getContentBuffer()->getTailPosition();
    158         int bigramEntryCount = 0;
    159         // Copy bigram list with GC from original content.
    160         if (!runGCBigramList(originalBigramListPos, originalBigramDictContent, bigramListPos,
    161                 terminalIdMap, &bigramEntryCount)) {
    162             AKLOGE("Cannot complete GC for the bigram list. original pos: %d, pos: %d",
    163                     originalBigramListPos, bigramListPos);
    164             return false;
    165         }
    166         if (bigramEntryCount == 0) {
    167             // All bigram entries are useless. This terminal does not have a bigram list.
    168             continue;
    169         }
    170         *outBigramEntryCount += bigramEntryCount;
    171         // Set bigram list position to the lookup table.
    172         if (!getUpdatableAddressLookupTable()->set(it->second, bigramListPos)) {
    173             AKLOGE("Cannot set bigram list position. terminal id: %d, pos: %d",
    174                     it->second, bigramListPos);
    175             return false;
    176         }
    177     }
    178     return true;
    179 }
    180 
    181 // Returns whether GC for the bigram list was succeeded or not.
    182 bool BigramDictContent::runGCBigramList(const int bigramListPos,
    183         const BigramDictContent *const sourceBigramDictContent, const int toPos,
    184         const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
    185         int *const outEntryCount) {
    186     bool hasNext = true;
    187     int readingPos = bigramListPos;
    188     int writingPos = toPos;
    189     while (hasNext) {
    190         const BigramEntry originalBigramEntry =
    191                 sourceBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
    192         hasNext = originalBigramEntry.hasNext();
    193         if (!originalBigramEntry.isValid()) {
    194             continue;
    195         }
    196         TerminalPositionLookupTable::TerminalIdMap::const_iterator it =
    197                 terminalIdMap->find(originalBigramEntry.getTargetTerminalId());
    198         if (it == terminalIdMap->end()) {
    199             // Target word has been removed.
    200             continue;
    201         }
    202         const BigramEntry updatedBigramEntry =
    203                 originalBigramEntry.updateTargetTerminalIdAndGetEntry(it->second);
    204         if (!writeBigramEntryAndAdvancePosition(&updatedBigramEntry, &writingPos)) {
    205             AKLOGE("Cannot write bigram entry to run GC. pos: %d", writingPos);
    206             return false;
    207         }
    208         *outEntryCount += 1;
    209     }
    210     if (*outEntryCount > 0) {
    211         if (!writeTerminator(writingPos)) {
    212             AKLOGE("Cannot write terminator to run GC. pos: %d", writingPos);
    213             return false;
    214         }
    215     }
    216     return true;
    217 }
    218 
    219 } // namespace latinime
    220