Home | History | Annotate | Download | only in content
      1 /*
      2  * Copyright (C) 2013, The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *     http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #ifndef LATINIME_BIGRAM_DICT_CONTENT_H
     18 #define LATINIME_BIGRAM_DICT_CONTENT_H
     19 
     20 #include <cstdint>
     21 #include <cstdio>
     22 
     23 #include "defines.h"
     24 #include "suggest/policyimpl/dictionary/structure/v4/content/bigram_entry.h"
     25 #include "suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h"
     26 #include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h"
     27 #include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
     28 
     29 namespace latinime {
     30 
     31 class BigramDictContent : public SparseTableDictContent {
     32  public:
     33     BigramDictContent(uint8_t *const *buffers, const int *bufferSizes, const bool hasHistoricalInfo)
     34             : SparseTableDictContent(buffers, bufferSizes,
     35                       Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE,
     36                       Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE),
     37               mHasHistoricalInfo(hasHistoricalInfo) {}
     38 
     39     BigramDictContent(const bool hasHistoricalInfo)
     40             : SparseTableDictContent(Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE,
     41                       Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE),
     42               mHasHistoricalInfo(hasHistoricalInfo) {}
     43 
     44     int getContentTailPos() const {
     45         return getContentBuffer()->getTailPosition();
     46     }
     47 
     48     const BigramEntry getBigramEntry(const int bigramEntryPos) const {
     49         int readingPos = bigramEntryPos;
     50         return getBigramEntryAndAdvancePosition(&readingPos);
     51     }
     52 
     53     const BigramEntry getBigramEntryAndAdvancePosition(int *const bigramEntryPos) const;
     54 
     55     // Returns head position of bigram list for a PtNode specified by terminalId.
     56     int getBigramListHeadPos(const int terminalId) const {
     57         const SparseTable *const addressLookupTable = getAddressLookupTable();
     58         if (!addressLookupTable->contains(terminalId)) {
     59             return NOT_A_DICT_POS;
     60         }
     61         return addressLookupTable->get(terminalId);
     62     }
     63 
     64     bool writeBigramEntryAtTail(const BigramEntry *const bigramEntryToWrite) {
     65         int writingPos = getContentBuffer()->getTailPosition();
     66         return writeBigramEntryAndAdvancePosition(bigramEntryToWrite, &writingPos);
     67     }
     68 
     69     bool writeBigramEntry(const BigramEntry *const bigramEntryToWrite, const int entryWritingPos) {
     70         int writingPos = entryWritingPos;
     71         return writeBigramEntryAndAdvancePosition(bigramEntryToWrite, &writingPos);
     72     }
     73 
     74     bool writeBigramEntryAndAdvancePosition(const BigramEntry *const bigramEntryToWrite,
     75             int *const entryWritingPos);
     76 
     77     bool writeTerminator(const int writingPos) {
     78         // Terminator is a link to the invalid position.
     79         return writeLink(INVALID_LINKED_ENTRY_POS, writingPos);
     80     }
     81 
     82     bool writeLink(const int linkedPos, const int writingPos);
     83 
     84     bool createNewBigramList(const int terminalId) {
     85         const int bigramListPos = getContentBuffer()->getTailPosition();
     86         return getUpdatableAddressLookupTable()->set(terminalId, bigramListPos);
     87     }
     88 
     89     bool flushToFile(FILE *const file) const {
     90         return flush(file);
     91     }
     92 
     93     bool runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
     94             const BigramDictContent *const originalBigramDictContent,
     95             int *const outBigramEntryCount);
     96 
     97     int getBigramEntrySize() const {
     98         if (mHasHistoricalInfo) {
     99             return Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE
    100                     + Ver4DictConstants::TIME_STAMP_FIELD_SIZE
    101                     + Ver4DictConstants::WORD_LEVEL_FIELD_SIZE
    102                     + Ver4DictConstants::WORD_COUNT_FIELD_SIZE
    103                     + Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE;
    104         } else {
    105             return Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE
    106                     + Ver4DictConstants::PROBABILITY_SIZE
    107                     + Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE;
    108         }
    109     }
    110 
    111  private:
    112     DISALLOW_COPY_AND_ASSIGN(BigramDictContent);
    113 
    114     static const int INVALID_LINKED_ENTRY_POS;
    115 
    116     bool writeBigramEntryAttributesAndAdvancePosition(
    117             const bool isLink, const int probability, const int targetTerminalId,
    118             const int timestamp, const int level, const int count, int *const entryWritingPos);
    119 
    120     bool runGCBigramList(const int bigramListPos,
    121             const BigramDictContent *const sourceBigramDictContent, const int toPos,
    122             const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
    123             int *const outEntryCount);
    124 
    125     bool mHasHistoricalInfo;
    126 };
    127 } // namespace latinime
    128 #endif /* LATINIME_BIGRAM_DICT_CONTENT_H */
    129