1 /* 2 * Copyright (C) 2013, The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "dictionary/structure/pt_common/dynamic_pt_reading_helper.h" 18 19 #include "dictionary/structure/pt_common/pt_node_array_reader.h" 20 #include "utils/char_utils.h" 21 22 namespace latinime { 23 24 // To avoid infinite loop caused by invalid or malicious forward links. 25 const int DynamicPtReadingHelper::MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP = 100000; 26 const int DynamicPtReadingHelper::MAX_PT_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP = 100000; 27 const size_t DynamicPtReadingHelper::MAX_READING_STATE_STACK_SIZE = MAX_WORD_LENGTH; 28 29 bool DynamicPtReadingHelper::TraversePolicyToGetAllTerminalPtNodePositions::onVisitingPtNode( 30 const PtNodeParams *const ptNodeParams) { 31 if (ptNodeParams->isTerminal() && !ptNodeParams->isDeleted()) { 32 mTerminalPositions->push_back(ptNodeParams->getHeadPos()); 33 } 34 return true; 35 } 36 37 // Visits all PtNodes in post-order depth first manner. 38 // For example, visits c -> b -> y -> x -> a for the following dictionary: 39 // a _ b _ c 40 // \ x _ y 41 bool DynamicPtReadingHelper::traverseAllPtNodesInPostorderDepthFirstManner( 42 TraversingEventListener *const listener) { 43 bool alreadyVisitedChildren = false; 44 // Descend from the root to the root PtNode array. 45 if (!listener->onDescend(getPosOfLastPtNodeArrayHead())) { 46 return false; 47 } 48 while (!isEnd()) { 49 const PtNodeParams ptNodeParams(getPtNodeParams()); 50 if (!ptNodeParams.isValid()) { 51 break; 52 } 53 if (!alreadyVisitedChildren) { 54 if (ptNodeParams.hasChildren()) { 55 // Move to the first child. 56 if (!listener->onDescend(ptNodeParams.getChildrenPos())) { 57 return false; 58 } 59 pushReadingStateToStack(); 60 readChildNode(ptNodeParams); 61 } else { 62 alreadyVisitedChildren = true; 63 } 64 } else { 65 if (!listener->onVisitingPtNode(&ptNodeParams)) { 66 return false; 67 } 68 readNextSiblingNode(ptNodeParams); 69 if (isEnd()) { 70 // All PtNodes in current linked PtNode arrays have been visited. 71 // Return to the parent. 72 if (!listener->onReadingPtNodeArrayTail()) { 73 return false; 74 } 75 if (mReadingStateStack.size() <= 0) { 76 break; 77 } 78 if (!listener->onAscend()) { 79 return false; 80 } 81 popReadingStateFromStack(); 82 alreadyVisitedChildren = true; 83 } else { 84 // Process sibling PtNode. 85 alreadyVisitedChildren = false; 86 } 87 } 88 } 89 // Ascend from the root PtNode array to the root. 90 if (!listener->onAscend()) { 91 return false; 92 } 93 return !isError(); 94 } 95 96 // Visits all PtNodes in PtNode array level pre-order depth first manner, which is the same order 97 // that PtNodes are written in the dictionary buffer. 98 // For example, visits a -> b -> x -> c -> y for the following dictionary: 99 // a _ b _ c 100 // \ x _ y 101 bool DynamicPtReadingHelper::traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner( 102 TraversingEventListener *const listener) { 103 bool alreadyVisitedAllPtNodesInArray = false; 104 bool alreadyVisitedChildren = false; 105 // Descend from the root to the root PtNode array. 106 if (!listener->onDescend(getPosOfLastPtNodeArrayHead())) { 107 return false; 108 } 109 if (isEnd()) { 110 // Empty dictionary. Needs to notify the listener of the tail of empty PtNode array. 111 if (!listener->onReadingPtNodeArrayTail()) { 112 return false; 113 } 114 } 115 pushReadingStateToStack(); 116 while (!isEnd()) { 117 const PtNodeParams ptNodeParams(getPtNodeParams()); 118 if (!ptNodeParams.isValid()) { 119 break; 120 } 121 if (alreadyVisitedAllPtNodesInArray) { 122 if (alreadyVisitedChildren) { 123 // Move to next sibling PtNode's children. 124 readNextSiblingNode(ptNodeParams); 125 if (isEnd()) { 126 // Return to the parent PTNode. 127 if (!listener->onAscend()) { 128 return false; 129 } 130 if (mReadingStateStack.size() <= 0) { 131 break; 132 } 133 popReadingStateFromStack(); 134 alreadyVisitedChildren = true; 135 alreadyVisitedAllPtNodesInArray = true; 136 } else { 137 alreadyVisitedChildren = false; 138 } 139 } else { 140 if (ptNodeParams.hasChildren()) { 141 // Move to the first child. 142 if (!listener->onDescend(ptNodeParams.getChildrenPos())) { 143 return false; 144 } 145 pushReadingStateToStack(); 146 readChildNode(ptNodeParams); 147 // Push state to return the head of PtNode array. 148 pushReadingStateToStack(); 149 alreadyVisitedAllPtNodesInArray = false; 150 alreadyVisitedChildren = false; 151 } else { 152 alreadyVisitedChildren = true; 153 } 154 } 155 } else { 156 if (!listener->onVisitingPtNode(&ptNodeParams)) { 157 return false; 158 } 159 readNextSiblingNode(ptNodeParams); 160 if (isEnd()) { 161 if (!listener->onReadingPtNodeArrayTail()) { 162 return false; 163 } 164 // Return to the head of current PtNode array. 165 popReadingStateFromStack(); 166 alreadyVisitedAllPtNodesInArray = true; 167 } 168 } 169 } 170 popReadingStateFromStack(); 171 // Ascend from the root PtNode array to the root. 172 if (!listener->onAscend()) { 173 return false; 174 } 175 return !isError(); 176 } 177 178 int DynamicPtReadingHelper::getCodePointsAndReturnCodePointCount(const int maxCodePointCount, 179 int *const outCodePoints) { 180 // This method traverses parent nodes from the terminal by following parent pointers; thus, 181 // node code points are stored in the buffer in the reverse order. 182 int reverseCodePoints[maxCodePointCount]; 183 const PtNodeParams terminalPtNodeParams(getPtNodeParams()); 184 // First, read the terminal node and get its probability. 185 if (!isValidTerminalNode(terminalPtNodeParams)) { 186 // Node at the ptNodePos is not a valid terminal node. 187 return 0; 188 } 189 // Then, following parent node link to the dictionary root and fetch node code points. 190 int totalCodePointCount = 0; 191 while (!isEnd()) { 192 const PtNodeParams ptNodeParams(getPtNodeParams()); 193 totalCodePointCount = getTotalCodePointCount(ptNodeParams); 194 if (!ptNodeParams.isValid() || totalCodePointCount > maxCodePointCount) { 195 // The ptNodePos is not a valid terminal node position in the dictionary. 196 return 0; 197 } 198 // Store node code points to buffer in the reverse order. 199 fetchMergedNodeCodePointsInReverseOrder(ptNodeParams, getPrevTotalCodePointCount(), 200 reverseCodePoints); 201 // Follow parent node toward the root node. 202 readParentNode(ptNodeParams); 203 } 204 if (isError()) { 205 // The node position or the dictionary is invalid. 206 return 0; 207 } 208 // Reverse the stored code points to output them. 209 for (int i = 0; i < totalCodePointCount; ++i) { 210 outCodePoints[i] = reverseCodePoints[totalCodePointCount - i - 1]; 211 } 212 return totalCodePointCount; 213 } 214 215 int DynamicPtReadingHelper::getTerminalPtNodePositionOfWord(const int *const inWord, 216 const size_t length, const bool forceLowerCaseSearch) { 217 int searchCodePoints[length]; 218 for (size_t i = 0; i < length; ++i) { 219 searchCodePoints[i] = forceLowerCaseSearch ? CharUtils::toLowerCase(inWord[i]) : inWord[i]; 220 } 221 while (!isEnd()) { 222 const PtNodeParams ptNodeParams(getPtNodeParams()); 223 const int matchedCodePointCount = getPrevTotalCodePointCount(); 224 if (getTotalCodePointCount(ptNodeParams) > length 225 || !isMatchedCodePoint(ptNodeParams, 0 /* index */, 226 searchCodePoints[matchedCodePointCount])) { 227 // Current node has too many code points or its first code point is different from 228 // target code point. Skip this node and read the next sibling node. 229 readNextSiblingNode(ptNodeParams); 230 continue; 231 } 232 // Check following merged node code points. 233 const int nodeCodePointCount = ptNodeParams.getCodePointCount(); 234 for (int j = 1; j < nodeCodePointCount; ++j) { 235 if (!isMatchedCodePoint(ptNodeParams, j, searchCodePoints[matchedCodePointCount + j])) { 236 // Different code point is found. The given word is not included in the dictionary. 237 return NOT_A_DICT_POS; 238 } 239 } 240 // All characters are matched. 241 if (length == getTotalCodePointCount(ptNodeParams)) { 242 if (!ptNodeParams.isTerminal()) { 243 return NOT_A_DICT_POS; 244 } 245 // Terminal position is found. 246 return ptNodeParams.getHeadPos(); 247 } 248 if (!ptNodeParams.hasChildren()) { 249 return NOT_A_DICT_POS; 250 } 251 // Advance to the children nodes. 252 readChildNode(ptNodeParams); 253 } 254 // If we already traversed the tree further than the word is long, there means 255 // there was no match (or we would have found it). 256 return NOT_A_DICT_POS; 257 } 258 259 // Read node array size and process empty node arrays. Nodes and arrays are counted up in this 260 // method to avoid an infinite loop. 261 void DynamicPtReadingHelper::nextPtNodeArray() { 262 int ptNodeCountInArray = 0; 263 int firstPtNodePos = NOT_A_DICT_POS; 264 if (!mPtNodeArrayReader->readPtNodeArrayInfoAndReturnIfValid( 265 mReadingState.mPos, &ptNodeCountInArray, &firstPtNodePos)) { 266 mIsError = true; 267 mReadingState.mPos = NOT_A_DICT_POS; 268 return; 269 } 270 mReadingState.mPosOfThisPtNodeArrayHead = mReadingState.mPos; 271 mReadingState.mRemainingPtNodeCountInThisArray = ptNodeCountInArray; 272 mReadingState.mPos = firstPtNodePos; 273 // Count up nodes and node arrays to avoid infinite loop. 274 mReadingState.mTotalPtNodeIndexInThisArrayChain += 275 mReadingState.mRemainingPtNodeCountInThisArray; 276 mReadingState.mPtNodeArrayIndexInThisArrayChain++; 277 if (mReadingState.mRemainingPtNodeCountInThisArray < 0 278 || mReadingState.mTotalPtNodeIndexInThisArrayChain 279 > MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP 280 || mReadingState.mPtNodeArrayIndexInThisArrayChain 281 > MAX_PT_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP) { 282 // Invalid dictionary. 283 AKLOGI("Invalid dictionary. nodeCount: %d, totalNodeCount: %d, MAX_CHILD_COUNT: %d" 284 "nodeArrayCount: %d, MAX_NODE_ARRAY_COUNT: %d", 285 mReadingState.mRemainingPtNodeCountInThisArray, 286 mReadingState.mTotalPtNodeIndexInThisArrayChain, 287 MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP, 288 mReadingState.mPtNodeArrayIndexInThisArrayChain, 289 MAX_PT_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP); 290 ASSERT(false); 291 mIsError = true; 292 mReadingState.mPos = NOT_A_DICT_POS; 293 return; 294 } 295 if (mReadingState.mRemainingPtNodeCountInThisArray == 0) { 296 // Empty node array. Try following forward link. 297 followForwardLink(); 298 } 299 } 300 301 // Follow the forward link and read the next node array if exists. 302 void DynamicPtReadingHelper::followForwardLink() { 303 int nextPtNodeArrayPos = NOT_A_DICT_POS; 304 if (!mPtNodeArrayReader->readForwardLinkAndReturnIfValid( 305 mReadingState.mPos, &nextPtNodeArrayPos)) { 306 mIsError = true; 307 mReadingState.mPos = NOT_A_DICT_POS; 308 return; 309 } 310 mReadingState.mPosOfLastForwardLinkField = mReadingState.mPos; 311 if (nextPtNodeArrayPos != NOT_A_DICT_POS) { 312 // Follow the forward link. 313 mReadingState.mPos = nextPtNodeArrayPos; 314 nextPtNodeArray(); 315 } else { 316 // All node arrays have been read. 317 mReadingState.mPos = NOT_A_DICT_POS; 318 } 319 } 320 321 } // namespace latinime 322