Home | History | Annotate | Download | only in property
      1 /*
      2  * Copyright (C) 2014 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #ifndef LATINIME_NGRAM_CONTEXT_H
     18 #define LATINIME_NGRAM_CONTEXT_H
     19 
     20 #include <array>
     21 
     22 #include "defines.h"
     23 #include "utils/int_array_view.h"
     24 
     25 namespace latinime {
     26 
     27 class DictionaryStructureWithBufferPolicy;
     28 
     29 class NgramContext {
     30  public:
     31     // No prev word information.
     32     NgramContext();
     33     // Copy constructor to use this class with std::vector and use this class as a return value.
     34     NgramContext(const NgramContext &ngramContext);
     35     // Construct from previous words.
     36     NgramContext(const int prevWordCodePoints[][MAX_WORD_LENGTH],
     37             const int *const prevWordCodePointCount, const bool *const isBeginningOfSentence,
     38             const size_t prevWordCount);
     39     // Construct from a previous word.
     40     NgramContext(const int *const prevWordCodePoints, const int prevWordCodePointCount,
     41             const bool isBeginningOfSentence);
     42 
     43     size_t getPrevWordCount() const {
     44         return mPrevWordCount;
     45     }
     46     bool isValid() const;
     47 
     48     template<size_t N>
     49     const WordIdArrayView getPrevWordIds(
     50             const DictionaryStructureWithBufferPolicy *const dictStructurePolicy,
     51             WordIdArray<N> *const prevWordIdBuffer, const bool tryLowerCaseSearch) const {
     52         for (size_t i = 0; i < std::min(mPrevWordCount, N); ++i) {
     53             prevWordIdBuffer->at(i) = getWordId(dictStructurePolicy, mPrevWordCodePoints[i],
     54                     mPrevWordCodePointCount[i], mIsBeginningOfSentence[i], tryLowerCaseSearch);
     55         }
     56         return WordIdArrayView::fromArray(*prevWordIdBuffer).limit(mPrevWordCount);
     57     }
     58 
     59     // n is 1-indexed.
     60     const CodePointArrayView getNthPrevWordCodePoints(const size_t n) const;
     61     // n is 1-indexed.
     62     bool isNthPrevWordBeginningOfSentence(const size_t n) const;
     63 
     64  private:
     65     DISALLOW_ASSIGNMENT_OPERATOR(NgramContext);
     66 
     67     static int getWordId(const DictionaryStructureWithBufferPolicy *const dictStructurePolicy,
     68             const int *const wordCodePoints, const int wordCodePointCount,
     69             const bool isBeginningOfSentence, const bool tryLowerCaseSearch);
     70     void clear();
     71 
     72     const size_t mPrevWordCount;
     73     int mPrevWordCodePoints[MAX_PREV_WORD_COUNT_FOR_N_GRAM][MAX_WORD_LENGTH];
     74     int mPrevWordCodePointCount[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
     75     bool mIsBeginningOfSentence[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
     76 };
     77 } // namespace latinime
     78 #endif // LATINIME_NGRAM_CONTEXT_H
     79