1 /* 2 * Copyright (C) 2013, The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef LATINIME_BIGRAM_DICT_CONTENT_H 18 #define LATINIME_BIGRAM_DICT_CONTENT_H 19 20 #include <cstdint> 21 #include <cstdio> 22 23 #include "defines.h" 24 #include "suggest/policyimpl/dictionary/structure/v4/content/bigram_entry.h" 25 #include "suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h" 26 #include "suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h" 27 #include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" 28 29 namespace latinime { 30 31 class BigramDictContent : public SparseTableDictContent { 32 public: 33 BigramDictContent(uint8_t *const *buffers, const int *bufferSizes, const bool hasHistoricalInfo) 34 : SparseTableDictContent(buffers, bufferSizes, 35 Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE, 36 Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE), 37 mHasHistoricalInfo(hasHistoricalInfo) {} 38 39 BigramDictContent(const bool hasHistoricalInfo) 40 : SparseTableDictContent(Ver4DictConstants::BIGRAM_ADDRESS_TABLE_BLOCK_SIZE, 41 Ver4DictConstants::BIGRAM_ADDRESS_TABLE_DATA_SIZE), 42 mHasHistoricalInfo(hasHistoricalInfo) {} 43 44 int getContentTailPos() const { 45 return getContentBuffer()->getTailPosition(); 46 } 47 48 const BigramEntry getBigramEntry(const int bigramEntryPos) const { 49 int readingPos = bigramEntryPos; 50 return getBigramEntryAndAdvancePosition(&readingPos); 51 } 52 53 const BigramEntry getBigramEntryAndAdvancePosition(int *const bigramEntryPos) const; 54 55 // Returns head position of bigram list for a PtNode specified by terminalId. 56 int getBigramListHeadPos(const int terminalId) const { 57 const SparseTable *const addressLookupTable = getAddressLookupTable(); 58 if (!addressLookupTable->contains(terminalId)) { 59 return NOT_A_DICT_POS; 60 } 61 return addressLookupTable->get(terminalId); 62 } 63 64 bool writeBigramEntryAtTail(const BigramEntry *const bigramEntryToWrite) { 65 int writingPos = getContentBuffer()->getTailPosition(); 66 return writeBigramEntryAndAdvancePosition(bigramEntryToWrite, &writingPos); 67 } 68 69 bool writeBigramEntry(const BigramEntry *const bigramEntryToWrite, const int entryWritingPos) { 70 int writingPos = entryWritingPos; 71 return writeBigramEntryAndAdvancePosition(bigramEntryToWrite, &writingPos); 72 } 73 74 bool writeBigramEntryAndAdvancePosition(const BigramEntry *const bigramEntryToWrite, 75 int *const entryWritingPos); 76 77 bool writeTerminator(const int writingPos) { 78 // Terminator is a link to the invalid position. 79 return writeLink(INVALID_LINKED_ENTRY_POS, writingPos); 80 } 81 82 bool writeLink(const int linkedPos, const int writingPos); 83 84 bool createNewBigramList(const int terminalId) { 85 const int bigramListPos = getContentBuffer()->getTailPosition(); 86 return getUpdatableAddressLookupTable()->set(terminalId, bigramListPos); 87 } 88 89 bool flushToFile(FILE *const file) const { 90 return flush(file); 91 } 92 93 bool runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap, 94 const BigramDictContent *const originalBigramDictContent, 95 int *const outBigramEntryCount); 96 97 int getBigramEntrySize() const { 98 if (mHasHistoricalInfo) { 99 return Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE 100 + Ver4DictConstants::TIME_STAMP_FIELD_SIZE 101 + Ver4DictConstants::WORD_LEVEL_FIELD_SIZE 102 + Ver4DictConstants::WORD_COUNT_FIELD_SIZE 103 + Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE; 104 } else { 105 return Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE 106 + Ver4DictConstants::PROBABILITY_SIZE 107 + Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE; 108 } 109 } 110 111 private: 112 DISALLOW_COPY_AND_ASSIGN(BigramDictContent); 113 114 static const int INVALID_LINKED_ENTRY_POS; 115 116 bool writeBigramEntryAttributesAndAdvancePosition( 117 const bool isLink, const int probability, const int targetTerminalId, 118 const int timestamp, const int level, const int count, int *const entryWritingPos); 119 120 bool runGCBigramList(const int bigramListPos, 121 const BigramDictContent *const sourceBigramDictContent, const int toPos, 122 const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap, 123 int *const outEntryCount); 124 125 bool mHasHistoricalInfo; 126 }; 127 } // namespace latinime 128 #endif /* LATINIME_BIGRAM_DICT_CONTENT_H */ 129