1 /* 2 * Copyright (C) 2013, The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "suggest/policyimpl/dictionary/header/header_policy.h" 18 19 #include <algorithm> 20 21 namespace latinime { 22 23 // Note that these are corresponding definitions in Java side in DictionaryHeader. 24 const char *const HeaderPolicy::MULTIPLE_WORDS_DEMOTION_RATE_KEY = "MULTIPLE_WORDS_DEMOTION_RATE"; 25 const char *const HeaderPolicy::REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY = 26 "REQUIRES_GERMAN_UMLAUT_PROCESSING"; 27 // TODO: Change attribute string to "IS_DECAYING_DICT". 28 const char *const HeaderPolicy::IS_DECAYING_DICT_KEY = "USES_FORGETTING_CURVE"; 29 const char *const HeaderPolicy::DATE_KEY = "date"; 30 const char *const HeaderPolicy::LAST_DECAYED_TIME_KEY = "LAST_DECAYED_TIME"; 31 const char *const HeaderPolicy::UNIGRAM_COUNT_KEY = "UNIGRAM_COUNT"; 32 const char *const HeaderPolicy::BIGRAM_COUNT_KEY = "BIGRAM_COUNT"; 33 const char *const HeaderPolicy::EXTENDED_REGION_SIZE_KEY = "EXTENDED_REGION_SIZE"; 34 // Historical info is information that is needed to support decaying such as timestamp, level and 35 // count. 36 const char *const HeaderPolicy::HAS_HISTORICAL_INFO_KEY = "HAS_HISTORICAL_INFO"; 37 const char *const HeaderPolicy::LOCALE_KEY = "locale"; // match Java declaration 38 const char *const HeaderPolicy::FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP_KEY = 39 "FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP"; 40 const char *const HeaderPolicy::FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID_KEY = 41 "FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID"; 42 const char *const HeaderPolicy::FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS_KEY = 43 "FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS"; 44 45 const char *const HeaderPolicy::MAX_UNIGRAM_COUNT_KEY = "MAX_UNIGRAM_COUNT"; 46 const char *const HeaderPolicy::MAX_BIGRAM_COUNT_KEY = "MAX_BIGRAM_COUNT"; 47 48 const int HeaderPolicy::DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE = 100; 49 const float HeaderPolicy::MULTIPLE_WORD_COST_MULTIPLIER_SCALE = 100.0f; 50 const int HeaderPolicy::DEFAULT_FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP = 2; 51 const int HeaderPolicy::DEFAULT_FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID = 3; 52 // 30 days 53 const int HeaderPolicy::DEFAULT_FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS = 54 30 * 24 * 60 * 60; 55 56 const int HeaderPolicy::DEFAULT_MAX_UNIGRAM_COUNT = 10000; 57 const int HeaderPolicy::DEFAULT_MAX_BIGRAM_COUNT = 10000; 58 59 // Used for logging. Question mark is used to indicate that the key is not found. 60 void HeaderPolicy::readHeaderValueOrQuestionMark(const char *const key, int *outValue, 61 int outValueSize) const { 62 if (outValueSize <= 0) return; 63 if (outValueSize == 1) { 64 outValue[0] = '\0'; 65 return; 66 } 67 std::vector<int> keyCodePointVector; 68 HeaderReadWriteUtils::insertCharactersIntoVector(key, &keyCodePointVector); 69 DictionaryHeaderStructurePolicy::AttributeMap::const_iterator it = 70 mAttributeMap.find(keyCodePointVector); 71 if (it == mAttributeMap.end()) { 72 // The key was not found. 73 outValue[0] = '?'; 74 outValue[1] = '\0'; 75 return; 76 } 77 const int terminalIndex = std::min(static_cast<int>(it->second.size()), outValueSize - 1); 78 for (int i = 0; i < terminalIndex; ++i) { 79 outValue[i] = it->second[i]; 80 } 81 outValue[terminalIndex] = '\0'; 82 } 83 84 const std::vector<int> HeaderPolicy::readLocale() const { 85 return HeaderReadWriteUtils::readCodePointVectorAttributeValue(&mAttributeMap, LOCALE_KEY); 86 } 87 88 float HeaderPolicy::readMultipleWordCostMultiplier() const { 89 const int demotionRate = HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, 90 MULTIPLE_WORDS_DEMOTION_RATE_KEY, DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE); 91 if (demotionRate <= 0) { 92 return static_cast<float>(MAX_VALUE_FOR_WEIGHTING); 93 } 94 return MULTIPLE_WORD_COST_MULTIPLIER_SCALE / static_cast<float>(demotionRate); 95 } 96 97 bool HeaderPolicy::readRequiresGermanUmlautProcessing() const { 98 return HeaderReadWriteUtils::readBoolAttributeValue(&mAttributeMap, 99 REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY, false); 100 } 101 102 bool HeaderPolicy::fillInAndWriteHeaderToBuffer(const bool updatesLastDecayedTime, 103 const int unigramCount, const int bigramCount, 104 const int extendedRegionSize, BufferWithExtendableBuffer *const outBuffer) const { 105 int writingPos = 0; 106 DictionaryHeaderStructurePolicy::AttributeMap attributeMapToWrite(mAttributeMap); 107 fillInHeader(updatesLastDecayedTime, unigramCount, bigramCount, 108 extendedRegionSize, &attributeMapToWrite); 109 if (!HeaderReadWriteUtils::writeDictionaryVersion(outBuffer, mDictFormatVersion, 110 &writingPos)) { 111 return false; 112 } 113 if (!HeaderReadWriteUtils::writeDictionaryFlags(outBuffer, mDictionaryFlags, 114 &writingPos)) { 115 return false; 116 } 117 // Temporarily writes a dummy header size. 118 int headerSizeFieldPos = writingPos; 119 if (!HeaderReadWriteUtils::writeDictionaryHeaderSize(outBuffer, 0 /* size */, 120 &writingPos)) { 121 return false; 122 } 123 if (!HeaderReadWriteUtils::writeHeaderAttributes(outBuffer, &attributeMapToWrite, 124 &writingPos)) { 125 return false; 126 } 127 // Writes the actual header size. 128 if (!HeaderReadWriteUtils::writeDictionaryHeaderSize(outBuffer, writingPos, 129 &headerSizeFieldPos)) { 130 return false; 131 } 132 return true; 133 } 134 135 void HeaderPolicy::fillInHeader(const bool updatesLastDecayedTime, const int unigramCount, 136 const int bigramCount, const int extendedRegionSize, 137 DictionaryHeaderStructurePolicy::AttributeMap *outAttributeMap) const { 138 HeaderReadWriteUtils::setIntAttribute(outAttributeMap, UNIGRAM_COUNT_KEY, unigramCount); 139 HeaderReadWriteUtils::setIntAttribute(outAttributeMap, BIGRAM_COUNT_KEY, bigramCount); 140 HeaderReadWriteUtils::setIntAttribute(outAttributeMap, EXTENDED_REGION_SIZE_KEY, 141 extendedRegionSize); 142 // Set the current time as the generation time. 143 HeaderReadWriteUtils::setIntAttribute(outAttributeMap, DATE_KEY, 144 TimeKeeper::peekCurrentTime()); 145 HeaderReadWriteUtils::setCodePointVectorAttribute(outAttributeMap, LOCALE_KEY, mLocale); 146 if (updatesLastDecayedTime) { 147 // Set current time as the last updated time. 148 HeaderReadWriteUtils::setIntAttribute(outAttributeMap, LAST_DECAYED_TIME_KEY, 149 TimeKeeper::peekCurrentTime()); 150 } 151 } 152 153 /* static */ DictionaryHeaderStructurePolicy::AttributeMap 154 HeaderPolicy::createAttributeMapAndReadAllAttributes(const uint8_t *const dictBuf) { 155 DictionaryHeaderStructurePolicy::AttributeMap attributeMap; 156 HeaderReadWriteUtils::fetchAllHeaderAttributes(dictBuf, &attributeMap); 157 return attributeMap; 158 } 159 160 } // namespace latinime 161