Home | History | Annotate | Download | only in offdevice_intermediate_dict
      1 /*
      2  * Copyright (C) 2014 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "offdevice_intermediate_dict/offdevice_intermediate_dict.h"
     18 
     19 #include "offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node.h"
     20 
     21 namespace latinime {
     22 namespace dicttoolkit {
     23 
     24 bool OffdeviceIntermediateDict::addWord(const WordProperty &wordProperty) {
     25     const CodePointArrayView codePoints = wordProperty.getCodePoints();
     26     if (codePoints.empty() || codePoints.size() > MAX_WORD_LENGTH) {
     27         return false;
     28     }
     29     return addWordInner(codePoints, wordProperty, mRootPtNodeArray);
     30 }
     31 
     32 bool OffdeviceIntermediateDict::addWordInner(const CodePointArrayView codePoints,
     33         const WordProperty &wordProperty, OffdeviceIntermediateDictPtNodeArray &ptNodeArray) {
     34     auto ptNodeList = ptNodeArray.getMutablePtNodeList();
     35     auto ptNodeIt = ptNodeList->begin();
     36     for (; ptNodeIt != ptNodeList->end(); ++ptNodeIt) {
     37         const auto &ptNode = *ptNodeIt;
     38         const CodePointArrayView ptNodeCodePoints = ptNode->getPtNodeCodePoints();
     39         if (codePoints[0] < ptNodeCodePoints[0]) {
     40             continue;
     41         }
     42         if (codePoints[0] > ptNodeCodePoints[0]) {
     43             break;
     44         }
     45         size_t i = 1;
     46         for (; i < codePoints.size(); ++i) {
     47             if (i >= ptNodeCodePoints.size()) {
     48                 // Add new child.
     49                 return addWordInner(codePoints.skip(i), wordProperty,
     50                         ptNode->getChildrenPtNodeArray());
     51             }
     52             if (codePoints[i] != ptNodeCodePoints[i]) {
     53                 break;
     54             }
     55         }
     56         if (codePoints.size() == i && codePoints.size() == ptNodeCodePoints.size()) {
     57             // All code points matched.
     58             if (ptNode->getWordProperty()) {
     59                 //  Adding the same word multiple times is not supported.
     60                 return false;
     61             }
     62             ptNodeList->insert(ptNodeIt,
     63                     std::make_shared<OffdeviceIntermediateDictPtNode>(wordProperty, *ptNode));
     64             ptNodeList->erase(ptNodeIt);
     65             return true;
     66         }
     67         // The (i+1)-th elements are different.
     68         // Create and Add new parent ptNode for the common part.
     69         auto newPtNode = codePoints.size() == i
     70                 ? std::make_shared<OffdeviceIntermediateDictPtNode>(codePoints, wordProperty)
     71                 : std::make_shared<OffdeviceIntermediateDictPtNode>(codePoints.limit(i));
     72         ptNodeList->insert(ptNodeIt, newPtNode);
     73         OffdeviceIntermediateDictPtNodeArray &childrenPtNodeArray =
     74                 newPtNode->getChildrenPtNodeArray();
     75         // Add new child for the existing ptNode.
     76         childrenPtNodeArray.getMutablePtNodeList()->push_back(
     77                 std::make_shared<OffdeviceIntermediateDictPtNode>(
     78                         ptNodeCodePoints.skip(i), *ptNode));
     79         ptNodeList->erase(ptNodeIt);
     80         if (codePoints.size() != i) {
     81             // Add a child for the new word.
     82             return addWordInner(codePoints.skip(i), wordProperty, childrenPtNodeArray);
     83         }
     84         return true;
     85     }
     86     ptNodeList->insert(ptNodeIt,
     87             std::make_shared<OffdeviceIntermediateDictPtNode>(codePoints, wordProperty));
     88     return true;
     89 }
     90 
     91 const WordProperty *OffdeviceIntermediateDict::getWordProperty(
     92         const CodePointArrayView codePoints) const {
     93     const OffdeviceIntermediateDictPtNodeArray *ptNodeArray = &mRootPtNodeArray;
     94     for (size_t i = 0; i < codePoints.size();) {
     95         bool foundNext = false;
     96         for (const auto ptNode : ptNodeArray->getPtNodeList()) {
     97             const CodePointArrayView ptNodeCodePoints = ptNode->getPtNodeCodePoints();
     98             if (codePoints[i] < ptNodeCodePoints[0]) {
     99                 continue;
    100             }
    101             if (codePoints[i] > ptNodeCodePoints[0]
    102                      || codePoints.size() < ptNodeCodePoints.size()) {
    103                 return nullptr;
    104             }
    105             for (size_t j = 1; j < ptNodeCodePoints.size(); ++j) {
    106                 if (codePoints[i + j] != ptNodeCodePoints[j]) {
    107                     return nullptr;
    108                 }
    109             }
    110             i += ptNodeCodePoints.size();
    111             if (i == codePoints.size()) {
    112                 return ptNode->getWordProperty();
    113             }
    114             ptNodeArray = &ptNode->getChildrenPtNodeArray();
    115             foundNext = true;
    116             break;
    117         }
    118         if (!foundNext) {
    119             break;
    120         }
    121     }
    122     return nullptr;
    123 }
    124 
    125 } // namespace dicttoolkit
    126 } // namespace latinime
    127