1 /* 2 * Copyright (C) 2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "suggest/core/dicnode/dic_node_utils.h" 18 19 #include <cstring> 20 21 #include "suggest/core/dicnode/dic_node.h" 22 #include "suggest/core/dicnode/dic_node_vector.h" 23 #include "suggest/core/dictionary/multi_bigram_map.h" 24 #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" 25 #include "utils/char_utils.h" 26 27 namespace latinime { 28 29 /////////////////////////////// 30 // Node initialization utils // 31 /////////////////////////////// 32 33 /* static */ void DicNodeUtils::initAsRoot( 34 const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, 35 const int prevWordNodePos, DicNode *const newRootNode) { 36 newRootNode->initAsRoot(dictionaryStructurePolicy->getRootPosition(), prevWordNodePos); 37 } 38 39 /*static */ void DicNodeUtils::initAsRootWithPreviousWord( 40 const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, 41 DicNode *const prevWordLastNode, DicNode *const newRootNode) { 42 newRootNode->initAsRootWithPreviousWord( 43 prevWordLastNode, dictionaryStructurePolicy->getRootPosition()); 44 } 45 46 /* static */ void DicNodeUtils::initByCopy(DicNode *srcNode, DicNode *destNode) { 47 destNode->initByCopy(srcNode); 48 } 49 50 /////////////////////////////////// 51 // Traverse node expansion utils // 52 /////////////////////////////////// 53 /* static */ void DicNodeUtils::getAllChildDicNodes(DicNode *dicNode, 54 const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, 55 DicNodeVector *childDicNodes) { 56 if (dicNode->isTotalInputSizeExceedingLimit()) { 57 return; 58 } 59 if (!dicNode->isLeavingNode()) { 60 childDicNodes->pushPassingChild(dicNode); 61 } else { 62 dictionaryStructurePolicy->createAndGetAllChildNodes(dicNode, childDicNodes); 63 } 64 } 65 66 /////////////////// 67 // Scoring utils // 68 /////////////////// 69 /** 70 * Computes the combined bigram / unigram cost for the given dicNode. 71 */ 72 /* static */ float DicNodeUtils::getBigramNodeImprobability( 73 const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, 74 const DicNode *const node, MultiBigramMap *multiBigramMap) { 75 if (node->hasMultipleWords() && !node->isValidMultipleWordSuggestion()) { 76 return static_cast<float>(MAX_VALUE_FOR_WEIGHTING); 77 } 78 const int probability = getBigramNodeProbability(dictionaryStructurePolicy, node, 79 multiBigramMap); 80 // TODO: This equation to calculate the improbability looks unreasonable. Investigate this. 81 const float cost = static_cast<float>(MAX_PROBABILITY - probability) 82 / static_cast<float>(MAX_PROBABILITY); 83 return cost; 84 } 85 86 /* static */ int DicNodeUtils::getBigramNodeProbability( 87 const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, 88 const DicNode *const node, MultiBigramMap *multiBigramMap) { 89 const int unigramProbability = node->getProbability(); 90 const int wordPos = node->getPos(); 91 const int prevWordPos = node->getPrevWordPos(); 92 if (NOT_A_DICT_POS == wordPos || NOT_A_DICT_POS == prevWordPos) { 93 // Note: Normally wordPos comes from the dictionary and should never equal 94 // NOT_A_VALID_WORD_POS. 95 return dictionaryStructurePolicy->getProbability(unigramProbability, 96 NOT_A_PROBABILITY); 97 } 98 if (multiBigramMap) { 99 return multiBigramMap->getBigramProbability(dictionaryStructurePolicy, prevWordPos, 100 wordPos, unigramProbability); 101 } 102 return dictionaryStructurePolicy->getProbability(unigramProbability, 103 NOT_A_PROBABILITY); 104 } 105 106 //////////////// 107 // Char utils // 108 //////////////// 109 110 // TODO: Move to char_utils? 111 /* static */ int DicNodeUtils::appendTwoWords(const int *const src0, const int16_t length0, 112 const int *const src1, const int16_t length1, int *dest) { 113 int actualLength0 = 0; 114 for (int i = 0; i < length0; ++i) { 115 if (src0[i] == 0) { 116 break; 117 } 118 actualLength0 = i + 1; 119 } 120 actualLength0 = min(actualLength0, MAX_WORD_LENGTH); 121 memcpy(dest, src0, actualLength0 * sizeof(dest[0])); 122 if (!src1 || length1 == 0) { 123 return actualLength0; 124 } 125 int actualLength1 = 0; 126 for (int i = 0; i < length1; ++i) { 127 if (src1[i] == 0) { 128 break; 129 } 130 actualLength1 = i + 1; 131 } 132 actualLength1 = min(actualLength1, MAX_WORD_LENGTH - actualLength0); 133 memcpy(&dest[actualLength0], src1, actualLength1 * sizeof(dest[0])); 134 return actualLength0 + actualLength1; 135 } 136 } // namespace latinime 137