1 /* 2 * Copyright (C) 2013 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h" 18 19 #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" 20 21 namespace latinime { 22 23 const int BigramDictContent::INVALID_LINKED_ENTRY_POS = Ver4DictConstants::NOT_A_TERMINAL_ID; 24 25 const BigramEntry BigramDictContent::getBigramEntryAndAdvancePosition( 26 int *const bigramEntryPos) const { 27 const BufferWithExtendableBuffer *const bigramListBuffer = getContentBuffer(); 28 const int bigramEntryTailPos = (*bigramEntryPos) + getBigramEntrySize(); 29 if (*bigramEntryPos < 0 || bigramEntryTailPos > bigramListBuffer->getTailPosition()) { 30 AKLOGE("Invalid bigram entry position. bigramEntryPos: %d, bigramEntryTailPos: %d, " 31 "bufSize: %d", *bigramEntryPos, bigramEntryTailPos, 32 bigramListBuffer->getTailPosition()); 33 ASSERT(false); 34 return BigramEntry(false /* hasNext */, NOT_A_PROBABILITY, 35 Ver4DictConstants::NOT_A_TERMINAL_ID); 36 } 37 const int bigramFlags = bigramListBuffer->readUintAndAdvancePosition( 38 Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, bigramEntryPos); 39 const bool isLink = (bigramFlags & Ver4DictConstants::BIGRAM_IS_LINK_MASK) != 0; 40 int probability = NOT_A_PROBABILITY; 41 int timestamp = NOT_A_TIMESTAMP; 42 int level = 0; 43 int count = 0; 44 if (mHasHistoricalInfo) { 45 timestamp = bigramListBuffer->readUintAndAdvancePosition( 46 Ver4DictConstants::TIME_STAMP_FIELD_SIZE, bigramEntryPos); 47 level = bigramListBuffer->readUintAndAdvancePosition( 48 Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, bigramEntryPos); 49 count = bigramListBuffer->readUintAndAdvancePosition( 50 Ver4DictConstants::WORD_COUNT_FIELD_SIZE, bigramEntryPos); 51 } else { 52 probability = bigramListBuffer->readUintAndAdvancePosition( 53 Ver4DictConstants::PROBABILITY_SIZE, bigramEntryPos); 54 } 55 const int encodedTargetTerminalId = bigramListBuffer->readUintAndAdvancePosition( 56 Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, bigramEntryPos); 57 const int targetTerminalId = 58 (encodedTargetTerminalId == Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID) ? 59 Ver4DictConstants::NOT_A_TERMINAL_ID : encodedTargetTerminalId; 60 if (isLink) { 61 const int linkedEntryPos = targetTerminalId; 62 if (linkedEntryPos == INVALID_LINKED_ENTRY_POS) { 63 // Bigram list terminator is found. 64 return BigramEntry(false /* hasNext */, NOT_A_PROBABILITY, 65 Ver4DictConstants::NOT_A_TERMINAL_ID); 66 } 67 *bigramEntryPos = linkedEntryPos; 68 return getBigramEntryAndAdvancePosition(bigramEntryPos); 69 } 70 // hasNext is always true because we should continue to read the next entry until the terminator 71 // is found. 72 if (mHasHistoricalInfo) { 73 const HistoricalInfo historicalInfo(timestamp, level, count); 74 return BigramEntry(true /* hasNext */, probability, &historicalInfo, targetTerminalId); 75 } else { 76 return BigramEntry(true /* hasNext */, probability, targetTerminalId); 77 } 78 } 79 80 bool BigramDictContent::writeBigramEntryAndAdvancePosition( 81 const BigramEntry *const bigramEntryToWrite, int *const entryWritingPos) { 82 return writeBigramEntryAttributesAndAdvancePosition(false /* isLink */, 83 bigramEntryToWrite->getProbability(), bigramEntryToWrite->getTargetTerminalId(), 84 bigramEntryToWrite->getHistoricalInfo()->getTimeStamp(), 85 bigramEntryToWrite->getHistoricalInfo()->getLevel(), 86 bigramEntryToWrite->getHistoricalInfo()->getCount(), 87 entryWritingPos); 88 } 89 90 bool BigramDictContent::writeBigramEntryAttributesAndAdvancePosition( 91 const bool isLink, const int probability, const int targetTerminalId, 92 const int timestamp, const int level, const int count, int *const entryWritingPos) { 93 BufferWithExtendableBuffer *const bigramListBuffer = getWritableContentBuffer(); 94 const int bigramFlags = isLink ? Ver4DictConstants::BIGRAM_IS_LINK_MASK : 0; 95 if (!bigramListBuffer->writeUintAndAdvancePosition(bigramFlags, 96 Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, entryWritingPos)) { 97 AKLOGE("Cannot write bigram flags. pos: %d, flags: %x", *entryWritingPos, bigramFlags); 98 return false; 99 } 100 if (mHasHistoricalInfo) { 101 if (!bigramListBuffer->writeUintAndAdvancePosition(timestamp, 102 Ver4DictConstants::TIME_STAMP_FIELD_SIZE, entryWritingPos)) { 103 AKLOGE("Cannot write bigram timestamps. pos: %d, timestamp: %d", *entryWritingPos, 104 timestamp); 105 return false; 106 } 107 if (!bigramListBuffer->writeUintAndAdvancePosition(level, 108 Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, entryWritingPos)) { 109 AKLOGE("Cannot write bigram level. pos: %d, level: %d", *entryWritingPos, 110 level); 111 return false; 112 } 113 if (!bigramListBuffer->writeUintAndAdvancePosition(count, 114 Ver4DictConstants::WORD_COUNT_FIELD_SIZE, entryWritingPos)) { 115 AKLOGE("Cannot write bigram count. pos: %d, count: %d", *entryWritingPos, 116 count); 117 return false; 118 } 119 } else { 120 if (!bigramListBuffer->writeUintAndAdvancePosition(probability, 121 Ver4DictConstants::PROBABILITY_SIZE, entryWritingPos)) { 122 AKLOGE("Cannot write bigram probability. pos: %d, probability: %d", *entryWritingPos, 123 probability); 124 return false; 125 } 126 } 127 const int targetTerminalIdToWrite = (targetTerminalId == Ver4DictConstants::NOT_A_TERMINAL_ID) ? 128 Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID : targetTerminalId; 129 if (!bigramListBuffer->writeUintAndAdvancePosition(targetTerminalIdToWrite, 130 Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, entryWritingPos)) { 131 AKLOGE("Cannot write bigram target terminal id. pos: %d, target terminal id: %d", 132 *entryWritingPos, targetTerminalId); 133 return false; 134 } 135 return true; 136 } 137 138 bool BigramDictContent::writeLink(const int linkedEntryPos, const int writingPos) { 139 const int targetTerminalId = linkedEntryPos; 140 int pos = writingPos; 141 return writeBigramEntryAttributesAndAdvancePosition(true /* isLink */, 142 NOT_A_PROBABILITY /* probability */, targetTerminalId, NOT_A_TIMESTAMP, 0 /* level */, 143 0 /* count */, &pos); 144 } 145 146 bool BigramDictContent::runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap, 147 const BigramDictContent *const originalBigramDictContent, 148 int *const outBigramEntryCount) { 149 for (TerminalPositionLookupTable::TerminalIdMap::const_iterator it = terminalIdMap->begin(); 150 it != terminalIdMap->end(); ++it) { 151 const int originalBigramListPos = 152 originalBigramDictContent->getBigramListHeadPos(it->first); 153 if (originalBigramListPos == NOT_A_DICT_POS) { 154 // This terminal does not have a bigram list. 155 continue; 156 } 157 const int bigramListPos = getContentBuffer()->getTailPosition(); 158 int bigramEntryCount = 0; 159 // Copy bigram list with GC from original content. 160 if (!runGCBigramList(originalBigramListPos, originalBigramDictContent, bigramListPos, 161 terminalIdMap, &bigramEntryCount)) { 162 AKLOGE("Cannot complete GC for the bigram list. original pos: %d, pos: %d", 163 originalBigramListPos, bigramListPos); 164 return false; 165 } 166 if (bigramEntryCount == 0) { 167 // All bigram entries are useless. This terminal does not have a bigram list. 168 continue; 169 } 170 *outBigramEntryCount += bigramEntryCount; 171 // Set bigram list position to the lookup table. 172 if (!getUpdatableAddressLookupTable()->set(it->second, bigramListPos)) { 173 AKLOGE("Cannot set bigram list position. terminal id: %d, pos: %d", 174 it->second, bigramListPos); 175 return false; 176 } 177 } 178 return true; 179 } 180 181 // Returns whether GC for the bigram list was succeeded or not. 182 bool BigramDictContent::runGCBigramList(const int bigramListPos, 183 const BigramDictContent *const sourceBigramDictContent, const int toPos, 184 const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap, 185 int *const outEntryCount) { 186 bool hasNext = true; 187 int readingPos = bigramListPos; 188 int writingPos = toPos; 189 while (hasNext) { 190 const BigramEntry originalBigramEntry = 191 sourceBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos); 192 hasNext = originalBigramEntry.hasNext(); 193 if (!originalBigramEntry.isValid()) { 194 continue; 195 } 196 TerminalPositionLookupTable::TerminalIdMap::const_iterator it = 197 terminalIdMap->find(originalBigramEntry.getTargetTerminalId()); 198 if (it == terminalIdMap->end()) { 199 // Target word has been removed. 200 continue; 201 } 202 const BigramEntry updatedBigramEntry = 203 originalBigramEntry.updateTargetTerminalIdAndGetEntry(it->second); 204 if (!writeBigramEntryAndAdvancePosition(&updatedBigramEntry, &writingPos)) { 205 AKLOGE("Cannot write bigram entry to run GC. pos: %d", writingPos); 206 return false; 207 } 208 *outEntryCount += 1; 209 } 210 if (*outEntryCount > 0) { 211 if (!writeTerminator(writingPos)) { 212 AKLOGE("Cannot write terminator to run GC. pos: %d", writingPos); 213 return false; 214 } 215 } 216 return true; 217 } 218 219 } // namespace latinime 220