Home | History | Annotate | Download | only in dicnode
      1 /*
      2  * Copyright (C) 2012 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "suggest/core/dicnode/dic_node_utils.h"
     18 
     19 #include <cstring>
     20 
     21 #include "suggest/core/dicnode/dic_node.h"
     22 #include "suggest/core/dicnode/dic_node_vector.h"
     23 #include "suggest/core/dictionary/multi_bigram_map.h"
     24 #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
     25 #include "utils/char_utils.h"
     26 
     27 namespace latinime {
     28 
     29 ///////////////////////////////
     30 // Node initialization utils //
     31 ///////////////////////////////
     32 
     33 /* static */ void DicNodeUtils::initAsRoot(
     34         const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
     35         const int prevWordNodePos, DicNode *const newRootNode) {
     36     newRootNode->initAsRoot(dictionaryStructurePolicy->getRootPosition(), prevWordNodePos);
     37 }
     38 
     39 /*static */ void DicNodeUtils::initAsRootWithPreviousWord(
     40         const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
     41         DicNode *const prevWordLastNode, DicNode *const newRootNode) {
     42     newRootNode->initAsRootWithPreviousWord(
     43             prevWordLastNode, dictionaryStructurePolicy->getRootPosition());
     44 }
     45 
     46 /* static */ void DicNodeUtils::initByCopy(DicNode *srcNode, DicNode *destNode) {
     47     destNode->initByCopy(srcNode);
     48 }
     49 
     50 ///////////////////////////////////
     51 // Traverse node expansion utils //
     52 ///////////////////////////////////
     53 /* static */ void DicNodeUtils::getAllChildDicNodes(DicNode *dicNode,
     54         const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
     55         DicNodeVector *childDicNodes) {
     56     if (dicNode->isTotalInputSizeExceedingLimit()) {
     57         return;
     58     }
     59     if (!dicNode->isLeavingNode()) {
     60         childDicNodes->pushPassingChild(dicNode);
     61     } else {
     62         dictionaryStructurePolicy->createAndGetAllChildNodes(dicNode, childDicNodes);
     63     }
     64 }
     65 
     66 ///////////////////
     67 // Scoring utils //
     68 ///////////////////
     69 /**
     70  * Computes the combined bigram / unigram cost for the given dicNode.
     71  */
     72 /* static */ float DicNodeUtils::getBigramNodeImprobability(
     73         const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
     74         const DicNode *const node, MultiBigramMap *multiBigramMap) {
     75     if (node->hasMultipleWords() && !node->isValidMultipleWordSuggestion()) {
     76         return static_cast<float>(MAX_VALUE_FOR_WEIGHTING);
     77     }
     78     const int probability = getBigramNodeProbability(dictionaryStructurePolicy, node,
     79             multiBigramMap);
     80     // TODO: This equation to calculate the improbability looks unreasonable.  Investigate this.
     81     const float cost = static_cast<float>(MAX_PROBABILITY - probability)
     82             / static_cast<float>(MAX_PROBABILITY);
     83     return cost;
     84 }
     85 
     86 /* static */ int DicNodeUtils::getBigramNodeProbability(
     87         const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
     88         const DicNode *const node, MultiBigramMap *multiBigramMap) {
     89     const int unigramProbability = node->getProbability();
     90     const int wordPos = node->getPos();
     91     const int prevWordPos = node->getPrevWordPos();
     92     if (NOT_A_DICT_POS == wordPos || NOT_A_DICT_POS == prevWordPos) {
     93         // Note: Normally wordPos comes from the dictionary and should never equal
     94         // NOT_A_VALID_WORD_POS.
     95         return dictionaryStructurePolicy->getProbability(unigramProbability,
     96                 NOT_A_PROBABILITY);
     97     }
     98     if (multiBigramMap) {
     99         return multiBigramMap->getBigramProbability(dictionaryStructurePolicy, prevWordPos,
    100                 wordPos, unigramProbability);
    101     }
    102     return dictionaryStructurePolicy->getProbability(unigramProbability,
    103             NOT_A_PROBABILITY);
    104 }
    105 
    106 ////////////////
    107 // Char utils //
    108 ////////////////
    109 
    110 // TODO: Move to char_utils?
    111 /* static */ int DicNodeUtils::appendTwoWords(const int *const src0, const int16_t length0,
    112         const int *const src1, const int16_t length1, int *dest) {
    113     int actualLength0 = 0;
    114     for (int i = 0; i < length0; ++i) {
    115         if (src0[i] == 0) {
    116             break;
    117         }
    118         actualLength0 = i + 1;
    119     }
    120     actualLength0 = min(actualLength0, MAX_WORD_LENGTH);
    121     memcpy(dest, src0, actualLength0 * sizeof(dest[0]));
    122     if (!src1 || length1 == 0) {
    123         return actualLength0;
    124     }
    125     int actualLength1 = 0;
    126     for (int i = 0; i < length1; ++i) {
    127         if (src1[i] == 0) {
    128             break;
    129         }
    130         actualLength1 = i + 1;
    131     }
    132     actualLength1 = min(actualLength1, MAX_WORD_LENGTH - actualLength0);
    133     memcpy(&dest[actualLength0], src1, actualLength1 * sizeof(dest[0]));
    134     return actualLength0 + actualLength1;
    135 }
    136 } // namespace latinime
    137