1 /* 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef LATINIME_NGRAM_CONTEXT_H 18 #define LATINIME_NGRAM_CONTEXT_H 19 20 #include <array> 21 22 #include "defines.h" 23 #include "utils/int_array_view.h" 24 25 namespace latinime { 26 27 class DictionaryStructureWithBufferPolicy; 28 29 class NgramContext { 30 public: 31 // No prev word information. 32 NgramContext(); 33 // Copy constructor to use this class with std::vector and use this class as a return value. 34 NgramContext(const NgramContext &ngramContext); 35 // Construct from previous words. 36 NgramContext(const int prevWordCodePoints[][MAX_WORD_LENGTH], 37 const int *const prevWordCodePointCount, const bool *const isBeginningOfSentence, 38 const size_t prevWordCount); 39 // Construct from a previous word. 40 NgramContext(const int *const prevWordCodePoints, const int prevWordCodePointCount, 41 const bool isBeginningOfSentence); 42 43 size_t getPrevWordCount() const { 44 return mPrevWordCount; 45 } 46 bool isValid() const; 47 48 template<size_t N> 49 const WordIdArrayView getPrevWordIds( 50 const DictionaryStructureWithBufferPolicy *const dictStructurePolicy, 51 WordIdArray<N> *const prevWordIdBuffer, const bool tryLowerCaseSearch) const { 52 for (size_t i = 0; i < std::min(mPrevWordCount, N); ++i) { 53 prevWordIdBuffer->at(i) = getWordId(dictStructurePolicy, mPrevWordCodePoints[i], 54 mPrevWordCodePointCount[i], mIsBeginningOfSentence[i], tryLowerCaseSearch); 55 } 56 return WordIdArrayView::fromArray(*prevWordIdBuffer).limit(mPrevWordCount); 57 } 58 59 // n is 1-indexed. 60 const CodePointArrayView getNthPrevWordCodePoints(const size_t n) const; 61 // n is 1-indexed. 62 bool isNthPrevWordBeginningOfSentence(const size_t n) const; 63 64 private: 65 DISALLOW_ASSIGNMENT_OPERATOR(NgramContext); 66 67 static int getWordId(const DictionaryStructureWithBufferPolicy *const dictStructurePolicy, 68 const int *const wordCodePoints, const int wordCodePointCount, 69 const bool isBeginningOfSentence, const bool tryLowerCaseSearch); 70 void clear(); 71 72 const size_t mPrevWordCount; 73 int mPrevWordCodePoints[MAX_PREV_WORD_COUNT_FOR_N_GRAM][MAX_WORD_LENGTH]; 74 int mPrevWordCodePointCount[MAX_PREV_WORD_COUNT_FOR_N_GRAM]; 75 bool mIsBeginningOfSentence[MAX_PREV_WORD_COUNT_FOR_N_GRAM]; 76 }; 77 } // namespace latinime 78 #endif // LATINIME_NGRAM_CONTEXT_H 79