Home | History | Annotate | Download | only in header
      1 /*
      2  * Copyright (C) 2013, The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *     http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "suggest/policyimpl/dictionary/header/header_policy.h"
     18 
     19 #include <algorithm>
     20 
     21 namespace latinime {
     22 
     23 // Note that these are corresponding definitions in Java side in DictionaryHeader.
     24 const char *const HeaderPolicy::MULTIPLE_WORDS_DEMOTION_RATE_KEY = "MULTIPLE_WORDS_DEMOTION_RATE";
     25 const char *const HeaderPolicy::REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY =
     26         "REQUIRES_GERMAN_UMLAUT_PROCESSING";
     27 // TODO: Change attribute string to "IS_DECAYING_DICT".
     28 const char *const HeaderPolicy::IS_DECAYING_DICT_KEY = "USES_FORGETTING_CURVE";
     29 const char *const HeaderPolicy::DATE_KEY = "date";
     30 const char *const HeaderPolicy::LAST_DECAYED_TIME_KEY = "LAST_DECAYED_TIME";
     31 const char *const HeaderPolicy::UNIGRAM_COUNT_KEY = "UNIGRAM_COUNT";
     32 const char *const HeaderPolicy::BIGRAM_COUNT_KEY = "BIGRAM_COUNT";
     33 const char *const HeaderPolicy::EXTENDED_REGION_SIZE_KEY = "EXTENDED_REGION_SIZE";
     34 // Historical info is information that is needed to support decaying such as timestamp, level and
     35 // count.
     36 const char *const HeaderPolicy::HAS_HISTORICAL_INFO_KEY = "HAS_HISTORICAL_INFO";
     37 const char *const HeaderPolicy::LOCALE_KEY = "locale"; // match Java declaration
     38 const char *const HeaderPolicy::FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP_KEY =
     39         "FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP";
     40 const char *const HeaderPolicy::FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID_KEY =
     41         "FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID";
     42 const char *const HeaderPolicy::FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS_KEY =
     43         "FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS";
     44 
     45 const char *const HeaderPolicy::MAX_UNIGRAM_COUNT_KEY = "MAX_UNIGRAM_COUNT";
     46 const char *const HeaderPolicy::MAX_BIGRAM_COUNT_KEY = "MAX_BIGRAM_COUNT";
     47 
     48 const int HeaderPolicy::DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE = 100;
     49 const float HeaderPolicy::MULTIPLE_WORD_COST_MULTIPLIER_SCALE = 100.0f;
     50 const int HeaderPolicy::DEFAULT_FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP = 2;
     51 const int HeaderPolicy::DEFAULT_FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID = 3;
     52 // 30 days
     53 const int HeaderPolicy::DEFAULT_FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS =
     54         30 * 24 * 60 * 60;
     55 
     56 const int HeaderPolicy::DEFAULT_MAX_UNIGRAM_COUNT = 10000;
     57 const int HeaderPolicy::DEFAULT_MAX_BIGRAM_COUNT = 10000;
     58 
     59 // Used for logging. Question mark is used to indicate that the key is not found.
     60 void HeaderPolicy::readHeaderValueOrQuestionMark(const char *const key, int *outValue,
     61         int outValueSize) const {
     62     if (outValueSize <= 0) return;
     63     if (outValueSize == 1) {
     64         outValue[0] = '\0';
     65         return;
     66     }
     67     std::vector<int> keyCodePointVector;
     68     HeaderReadWriteUtils::insertCharactersIntoVector(key, &keyCodePointVector);
     69     DictionaryHeaderStructurePolicy::AttributeMap::const_iterator it =
     70             mAttributeMap.find(keyCodePointVector);
     71     if (it == mAttributeMap.end()) {
     72         // The key was not found.
     73         outValue[0] = '?';
     74         outValue[1] = '\0';
     75         return;
     76     }
     77     const int terminalIndex = std::min(static_cast<int>(it->second.size()), outValueSize - 1);
     78     for (int i = 0; i < terminalIndex; ++i) {
     79         outValue[i] = it->second[i];
     80     }
     81     outValue[terminalIndex] = '\0';
     82 }
     83 
     84 const std::vector<int> HeaderPolicy::readLocale() const {
     85     return HeaderReadWriteUtils::readCodePointVectorAttributeValue(&mAttributeMap, LOCALE_KEY);
     86 }
     87 
     88 float HeaderPolicy::readMultipleWordCostMultiplier() const {
     89     const int demotionRate = HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
     90             MULTIPLE_WORDS_DEMOTION_RATE_KEY, DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE);
     91     if (demotionRate <= 0) {
     92         return static_cast<float>(MAX_VALUE_FOR_WEIGHTING);
     93     }
     94     return MULTIPLE_WORD_COST_MULTIPLIER_SCALE / static_cast<float>(demotionRate);
     95 }
     96 
     97 bool HeaderPolicy::readRequiresGermanUmlautProcessing() const {
     98     return HeaderReadWriteUtils::readBoolAttributeValue(&mAttributeMap,
     99             REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY, false);
    100 }
    101 
    102 bool HeaderPolicy::fillInAndWriteHeaderToBuffer(const bool updatesLastDecayedTime,
    103         const int unigramCount, const int bigramCount,
    104         const int extendedRegionSize, BufferWithExtendableBuffer *const outBuffer) const {
    105     int writingPos = 0;
    106     DictionaryHeaderStructurePolicy::AttributeMap attributeMapToWrite(mAttributeMap);
    107     fillInHeader(updatesLastDecayedTime, unigramCount, bigramCount,
    108             extendedRegionSize, &attributeMapToWrite);
    109     if (!HeaderReadWriteUtils::writeDictionaryVersion(outBuffer, mDictFormatVersion,
    110             &writingPos)) {
    111         return false;
    112     }
    113     if (!HeaderReadWriteUtils::writeDictionaryFlags(outBuffer, mDictionaryFlags,
    114             &writingPos)) {
    115         return false;
    116     }
    117     // Temporarily writes a dummy header size.
    118     int headerSizeFieldPos = writingPos;
    119     if (!HeaderReadWriteUtils::writeDictionaryHeaderSize(outBuffer, 0 /* size */,
    120             &writingPos)) {
    121         return false;
    122     }
    123     if (!HeaderReadWriteUtils::writeHeaderAttributes(outBuffer, &attributeMapToWrite,
    124             &writingPos)) {
    125         return false;
    126     }
    127     // Writes the actual header size.
    128     if (!HeaderReadWriteUtils::writeDictionaryHeaderSize(outBuffer, writingPos,
    129             &headerSizeFieldPos)) {
    130         return false;
    131     }
    132     return true;
    133 }
    134 
    135 void HeaderPolicy::fillInHeader(const bool updatesLastDecayedTime, const int unigramCount,
    136         const int bigramCount, const int extendedRegionSize,
    137         DictionaryHeaderStructurePolicy::AttributeMap *outAttributeMap) const {
    138     HeaderReadWriteUtils::setIntAttribute(outAttributeMap, UNIGRAM_COUNT_KEY, unigramCount);
    139     HeaderReadWriteUtils::setIntAttribute(outAttributeMap, BIGRAM_COUNT_KEY, bigramCount);
    140     HeaderReadWriteUtils::setIntAttribute(outAttributeMap, EXTENDED_REGION_SIZE_KEY,
    141             extendedRegionSize);
    142     // Set the current time as the generation time.
    143     HeaderReadWriteUtils::setIntAttribute(outAttributeMap, DATE_KEY,
    144             TimeKeeper::peekCurrentTime());
    145     HeaderReadWriteUtils::setCodePointVectorAttribute(outAttributeMap, LOCALE_KEY, mLocale);
    146     if (updatesLastDecayedTime) {
    147         // Set current time as the last updated time.
    148         HeaderReadWriteUtils::setIntAttribute(outAttributeMap, LAST_DECAYED_TIME_KEY,
    149                 TimeKeeper::peekCurrentTime());
    150     }
    151 }
    152 
    153 /* static */ DictionaryHeaderStructurePolicy::AttributeMap
    154         HeaderPolicy::createAttributeMapAndReadAllAttributes(const uint8_t *const dictBuf) {
    155     DictionaryHeaderStructurePolicy::AttributeMap attributeMap;
    156     HeaderReadWriteUtils::fetchAllHeaderAttributes(dictBuf, &attributeMap);
    157     return attributeMap;
    158 }
    159 
    160 } // namespace latinime
    161