Home | History | Annotate | Download | only in v402
      1 /*
      2  * Copyright (C) 2013, The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *     http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 /*
     18  * !!!!! DO NOT EDIT THIS FILE !!!!!
     19  *
     20  * This file was generated from
     21  *   suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp
     22  */
     23 
     24 #include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_writing_helper.h"
     25 
     26 #include <cstring>
     27 #include <queue>
     28 
     29 #include "suggest/policyimpl/dictionary/header/header_policy.h"
     30 #include "suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.h"
     31 #include "suggest/policyimpl/dictionary/structure/backward/v402/shortcut/ver4_shortcut_list_policy.h"
     32 #include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_buffers.h"
     33 #include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h"
     34 #include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.h"
     35 #include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.h"
     36 #include "suggest/policyimpl/dictionary/structure/backward/v402/ver4_pt_node_array_reader.h"
     37 #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
     38 #include "suggest/policyimpl/dictionary/utils/file_utils.h"
     39 #include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
     40 
     41 namespace latinime {
     42 namespace backward {
     43 namespace v402 {
     44 
     45 bool Ver4PatriciaTrieWritingHelper::writeToDictFile(const char *const dictDirPath,
     46         const int unigramCount, const int bigramCount) const {
     47     const HeaderPolicy *const headerPolicy = mBuffers->getHeaderPolicy();
     48     BufferWithExtendableBuffer headerBuffer(
     49             BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE);
     50     const int extendedRegionSize = headerPolicy->getExtendedRegionSize()
     51             + mBuffers->getTrieBuffer()->getUsedAdditionalBufferSize();
     52     if (!headerPolicy->fillInAndWriteHeaderToBuffer(false /* updatesLastDecayedTime */,
     53             unigramCount, bigramCount, extendedRegionSize, &headerBuffer)) {
     54         AKLOGE("Cannot write header structure to buffer. "
     55                 "updatesLastDecayedTime: %d, unigramCount: %d, bigramCount: %d, "
     56                 "extendedRegionSize: %d", false, unigramCount, bigramCount,
     57                 extendedRegionSize);
     58         return false;
     59     }
     60     return mBuffers->flushHeaderAndDictBuffers(dictDirPath, &headerBuffer);
     61 }
     62 
     63 bool Ver4PatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNodeArrayPos,
     64         const char *const dictDirPath) {
     65     const HeaderPolicy *const headerPolicy = mBuffers->getHeaderPolicy();
     66     Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers(
     67             Ver4DictBuffers::createVer4DictBuffers(headerPolicy,
     68                     Ver4DictConstants::MAX_DICTIONARY_SIZE));
     69     int unigramCount = 0;
     70     int bigramCount = 0;
     71     if (!runGC(rootPtNodeArrayPos, headerPolicy, dictBuffers.get(), &unigramCount, &bigramCount)) {
     72         return false;
     73     }
     74     BufferWithExtendableBuffer headerBuffer(
     75             BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE);
     76     if (!headerPolicy->fillInAndWriteHeaderToBuffer(true /* updatesLastDecayedTime */,
     77             unigramCount, bigramCount, 0 /* extendedRegionSize */, &headerBuffer)) {
     78         return false;
     79     }
     80     return dictBuffers->flushHeaderAndDictBuffers(dictDirPath, &headerBuffer);
     81 }
     82 
     83 bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
     84         const HeaderPolicy *const headerPolicy, Ver4DictBuffers *const buffersToWrite,
     85         int *const outUnigramCount, int *const outBigramCount) {
     86     Ver4PatriciaTrieNodeReader ptNodeReader(mBuffers->getTrieBuffer(),
     87             mBuffers->getProbabilityDictContent(), headerPolicy);
     88     Ver4PtNodeArrayReader ptNodeArrayReader(mBuffers->getTrieBuffer());
     89     Ver4BigramListPolicy bigramPolicy(mBuffers->getMutableBigramDictContent(),
     90             mBuffers->getTerminalPositionLookupTable(), headerPolicy);
     91     Ver4ShortcutListPolicy shortcutPolicy(mBuffers->getMutableShortcutDictContent(),
     92             mBuffers->getTerminalPositionLookupTable());
     93     Ver4PatriciaTrieNodeWriter ptNodeWriter(mBuffers->getWritableTrieBuffer(),
     94             mBuffers, headerPolicy, &ptNodeReader, &ptNodeArrayReader, &bigramPolicy,
     95             &shortcutPolicy);
     96 
     97     DynamicPtReadingHelper readingHelper(&ptNodeReader, &ptNodeArrayReader);
     98     readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
     99     DynamicPtGcEventListeners
    100             ::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
    101                     traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted(
    102                             &ptNodeWriter);
    103     if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
    104             &traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted)) {
    105         return false;
    106     }
    107     const int unigramCount = traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
    108             .getValidUnigramCount();
    109     const int maxUnigramCount = headerPolicy->getMaxUnigramCount();
    110     if (headerPolicy->isDecayingDict() && unigramCount > maxUnigramCount) {
    111         if (!truncateUnigrams(&ptNodeReader, &ptNodeWriter, maxUnigramCount)) {
    112             AKLOGE("Cannot remove unigrams. current: %d, max: %d", unigramCount,
    113                     maxUnigramCount);
    114             return false;
    115         }
    116     }
    117 
    118     readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
    119     DynamicPtGcEventListeners::TraversePolicyToUpdateBigramProbability
    120             traversePolicyToUpdateBigramProbability(&ptNodeWriter);
    121     if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
    122             &traversePolicyToUpdateBigramProbability)) {
    123         return false;
    124     }
    125     const int bigramCount = traversePolicyToUpdateBigramProbability.getValidBigramEntryCount();
    126     const int maxBigramCount = headerPolicy->getMaxBigramCount();
    127     if (headerPolicy->isDecayingDict() && bigramCount > maxBigramCount) {
    128         if (!truncateBigrams(maxBigramCount)) {
    129             AKLOGE("Cannot remove bigrams. current: %d, max: %d", bigramCount, maxBigramCount);
    130             return false;
    131         }
    132     }
    133 
    134     // Mapping from positions in mBuffer to positions in bufferToWrite.
    135     PtNodeWriter::DictPositionRelocationMap dictPositionRelocationMap;
    136     readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
    137     Ver4PatriciaTrieNodeWriter ptNodeWriterForNewBuffers(buffersToWrite->getWritableTrieBuffer(),
    138             buffersToWrite, headerPolicy, &ptNodeReader, &ptNodeArrayReader, &bigramPolicy,
    139             &shortcutPolicy);
    140     DynamicPtGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
    141             traversePolicyToPlaceAndWriteValidPtNodesToBuffer(&ptNodeWriterForNewBuffers,
    142                     buffersToWrite->getWritableTrieBuffer(), &dictPositionRelocationMap);
    143     if (!readingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
    144             &traversePolicyToPlaceAndWriteValidPtNodesToBuffer)) {
    145         return false;
    146     }
    147 
    148     // Create policy instances for the GCed dictionary.
    149     Ver4PatriciaTrieNodeReader newPtNodeReader(buffersToWrite->getTrieBuffer(),
    150             buffersToWrite->getProbabilityDictContent(), headerPolicy);
    151     Ver4PtNodeArrayReader newPtNodeArrayreader(buffersToWrite->getTrieBuffer());
    152     Ver4BigramListPolicy newBigramPolicy(buffersToWrite->getMutableBigramDictContent(),
    153             buffersToWrite->getTerminalPositionLookupTable(), headerPolicy);
    154     Ver4ShortcutListPolicy newShortcutPolicy(buffersToWrite->getMutableShortcutDictContent(),
    155             buffersToWrite->getTerminalPositionLookupTable());
    156     Ver4PatriciaTrieNodeWriter newPtNodeWriter(buffersToWrite->getWritableTrieBuffer(),
    157             buffersToWrite, headerPolicy, &newPtNodeReader, &newPtNodeArrayreader, &newBigramPolicy,
    158             &newShortcutPolicy);
    159     // Re-assign terminal IDs for valid terminal PtNodes.
    160     TerminalPositionLookupTable::TerminalIdMap terminalIdMap;
    161     if(!buffersToWrite->getMutableTerminalPositionLookupTable()->runGCTerminalIds(
    162             &terminalIdMap)) {
    163         return false;
    164     }
    165     // Run GC for probability dict content.
    166     if (!buffersToWrite->getMutableProbabilityDictContent()->runGC(&terminalIdMap,
    167             mBuffers->getProbabilityDictContent())) {
    168         return false;
    169     }
    170     // Run GC for bigram dict content.
    171     if(!buffersToWrite->getMutableBigramDictContent()->runGC(&terminalIdMap,
    172             mBuffers->getBigramDictContent(), outBigramCount)) {
    173         return false;
    174     }
    175     // Run GC for shortcut dict content.
    176     if(!buffersToWrite->getMutableShortcutDictContent()->runGC(&terminalIdMap,
    177             mBuffers->getShortcutDictContent())) {
    178         return false;
    179     }
    180     DynamicPtReadingHelper newDictReadingHelper(&newPtNodeReader, &newPtNodeArrayreader);
    181     newDictReadingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
    182     DynamicPtGcEventListeners::TraversePolicyToUpdateAllPositionFields
    183             traversePolicyToUpdateAllPositionFields(&newPtNodeWriter, &dictPositionRelocationMap);
    184     if (!newDictReadingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
    185             &traversePolicyToUpdateAllPositionFields)) {
    186         return false;
    187     }
    188     newDictReadingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
    189     TraversePolicyToUpdateAllPtNodeFlagsAndTerminalIds
    190             traversePolicyToUpdateAllPtNodeFlagsAndTerminalIds(&newPtNodeWriter, &terminalIdMap);
    191     if (!newDictReadingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
    192             &traversePolicyToUpdateAllPtNodeFlagsAndTerminalIds)) {
    193         return false;
    194     }
    195     *outUnigramCount = traversePolicyToUpdateAllPositionFields.getUnigramCount();
    196     return true;
    197 }
    198 
    199 bool Ver4PatriciaTrieWritingHelper::truncateUnigrams(
    200         const Ver4PatriciaTrieNodeReader *const ptNodeReader,
    201         Ver4PatriciaTrieNodeWriter *const ptNodeWriter, const int maxUnigramCount) {
    202     const TerminalPositionLookupTable *const terminalPosLookupTable =
    203             mBuffers->getTerminalPositionLookupTable();
    204     const int nextTerminalId = terminalPosLookupTable->getNextTerminalId();
    205     std::priority_queue<DictProbability, std::vector<DictProbability>, DictProbabilityComparator>
    206             priorityQueue;
    207     for (int i = 0; i < nextTerminalId; ++i) {
    208         const int terminalPos = terminalPosLookupTable->getTerminalPtNodePosition(i);
    209         if (terminalPos == NOT_A_DICT_POS) {
    210             continue;
    211         }
    212         const ProbabilityEntry probabilityEntry =
    213                 mBuffers->getProbabilityDictContent()->getProbabilityEntry(i);
    214         const int probability = probabilityEntry.hasHistoricalInfo() ?
    215                 ForgettingCurveUtils::decodeProbability(
    216                         probabilityEntry.getHistoricalInfo(), mBuffers->getHeaderPolicy()) :
    217                 probabilityEntry.getProbability();
    218         priorityQueue.push(DictProbability(terminalPos, probability,
    219                 probabilityEntry.getHistoricalInfo()->getTimeStamp()));
    220     }
    221 
    222     // Delete unigrams.
    223     while (static_cast<int>(priorityQueue.size()) > maxUnigramCount) {
    224         const int ptNodePos = priorityQueue.top().getDictPos();
    225         priorityQueue.pop();
    226         const PtNodeParams ptNodeParams =
    227                 ptNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos);
    228         if (ptNodeParams.representsNonWordInfo()) {
    229             continue;
    230         }
    231         if (!ptNodeWriter->markPtNodeAsWillBecomeNonTerminal(&ptNodeParams)) {
    232             AKLOGE("Cannot mark PtNode as willBecomeNonterminal. PtNode pos: %d", ptNodePos);
    233             return false;
    234         }
    235     }
    236     return true;
    237 }
    238 
    239 bool Ver4PatriciaTrieWritingHelper::truncateBigrams(const int maxBigramCount) {
    240     const TerminalPositionLookupTable *const terminalPosLookupTable =
    241             mBuffers->getTerminalPositionLookupTable();
    242     const int nextTerminalId = terminalPosLookupTable->getNextTerminalId();
    243     std::priority_queue<DictProbability, std::vector<DictProbability>, DictProbabilityComparator>
    244             priorityQueue;
    245     BigramDictContent *const bigramDictContent = mBuffers->getMutableBigramDictContent();
    246     for (int i = 0; i < nextTerminalId; ++i) {
    247         const int bigramListPos = bigramDictContent->getBigramListHeadPos(i);
    248         if (bigramListPos == NOT_A_DICT_POS) {
    249             continue;
    250         }
    251         bool hasNext = true;
    252         int readingPos = bigramListPos;
    253         while (hasNext) {
    254             const int entryPos = readingPos;
    255             const BigramEntry bigramEntry =
    256                     bigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
    257             hasNext = bigramEntry.hasNext();
    258             if (!bigramEntry.isValid()) {
    259                 continue;
    260             }
    261             const int probability = bigramEntry.hasHistoricalInfo() ?
    262                     ForgettingCurveUtils::decodeProbability(
    263                             bigramEntry.getHistoricalInfo(), mBuffers->getHeaderPolicy()) :
    264                     bigramEntry.getProbability();
    265             priorityQueue.push(DictProbability(entryPos, probability,
    266                     bigramEntry.getHistoricalInfo()->getTimeStamp()));
    267         }
    268     }
    269 
    270     // Delete bigrams.
    271     while (static_cast<int>(priorityQueue.size()) > maxBigramCount) {
    272         const int entryPos = priorityQueue.top().getDictPos();
    273         const BigramEntry bigramEntry = bigramDictContent->getBigramEntry(entryPos);
    274         const BigramEntry invalidatedBigramEntry = bigramEntry.getInvalidatedEntry();
    275         if (!bigramDictContent->writeBigramEntry(&invalidatedBigramEntry, entryPos)) {
    276             AKLOGE("Cannot write bigram entry to remove. pos: %d", entryPos);
    277             return false;
    278         }
    279         priorityQueue.pop();
    280     }
    281     return true;
    282 }
    283 
    284 bool Ver4PatriciaTrieWritingHelper::TraversePolicyToUpdateAllPtNodeFlagsAndTerminalIds
    285         ::onVisitingPtNode(const PtNodeParams *const ptNodeParams) {
    286     if (!ptNodeParams->isTerminal()) {
    287         return true;
    288     }
    289     TerminalPositionLookupTable::TerminalIdMap::const_iterator it =
    290             mTerminalIdMap->find(ptNodeParams->getTerminalId());
    291     if (it == mTerminalIdMap->end()) {
    292         AKLOGE("terminal Id %d is not in the terminal position map. map size: %zd",
    293                 ptNodeParams->getTerminalId(), mTerminalIdMap->size());
    294         return false;
    295     }
    296     if (!mPtNodeWriter->updateTerminalId(ptNodeParams, it->second)) {
    297         AKLOGE("Cannot update terminal id. %d -> %d", it->first, it->second);
    298     }
    299     return mPtNodeWriter->updatePtNodeHasBigramsAndShortcutTargetsFlags(ptNodeParams);
    300 }
    301 
    302 } // namespace v402
    303 } // namespace backward
    304 } // namespace latinime
    305