Home | History | Annotate | Download | only in include
      1 /*
      2  * Copyright (C) 2009 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #ifndef PINYINIME_INCLUDE_DICTLIST_H__
     18 #define PINYINIME_INCLUDE_DICTLIST_H__
     19 
     20 #include <stdlib.h>
     21 #include <stdio.h>
     22 #include "./dictdef.h"
     23 #include "./searchutility.h"
     24 #include "./spellingtrie.h"
     25 #include "./utf16char.h"
     26 
     27 namespace ime_pinyin {
     28 
     29 class DictList {
     30  private:
     31   bool initialized_;
     32 
     33   const SpellingTrie *spl_trie_;
     34 
     35   // Number of SingCharItem. The first is blank, because id 0 is invalid.
     36   size_t scis_num_;
     37   char16 *scis_hz_;
     38   SpellingId *scis_splid_;
     39 
     40   // The large memory block to store the word list.
     41   char16 *buf_;
     42 
     43   // Starting position of those words whose lengths are i+1, counted in
     44   // char16
     45   size_t start_pos_[kMaxLemmaSize + 1];
     46 
     47   size_t start_id_[kMaxLemmaSize + 1];
     48 
     49   int (*cmp_func_[kMaxLemmaSize])(const void *, const void *);
     50 
     51   bool alloc_resource(size_t buf_size, size_t scim_num);
     52 
     53   void free_resource();
     54 
     55 #ifdef ___BUILD_MODEL___
     56   // Calculate the requsted memory, including the start_pos[] buffer.
     57   size_t calculate_size(const LemmaEntry *lemma_arr, size_t lemma_num);
     58 
     59   void fill_scis(const SingleCharItem *scis, size_t scis_num);
     60 
     61   // Copy the related content to the inner buffer
     62   // It should be called after calculate_size()
     63   void fill_list(const LemmaEntry *lemma_arr, size_t lemma_num);
     64 
     65   // Find the starting position for the buffer of those 2-character Chinese word
     66   // whose first character is the given Chinese character.
     67   char16* find_pos2_startedbyhz(char16 hz_char);
     68 #endif
     69 
     70   // Find the starting position for the buffer of those words whose lengths are
     71   // word_len. The given parameter cmp_func decides how many characters from
     72   // beginning will be used to compare.
     73   char16* find_pos_startedbyhzs(const char16 last_hzs[],
     74                                 size_t word_Len,
     75                                 int (*cmp_func)(const void *, const void *));
     76 
     77  public:
     78 
     79   DictList();
     80   ~DictList();
     81 
     82   bool save_list(FILE *fp);
     83   bool load_list(FILE *fp);
     84 
     85 #ifdef ___BUILD_MODEL___
     86   // Init the list from the LemmaEntry array.
     87   // lemma_arr should have been sorted by the hanzi_str, and have been given
     88   // ids from 1
     89   bool init_list(const SingleCharItem *scis, size_t scis_num,
     90                  const LemmaEntry *lemma_arr, size_t lemma_num);
     91 #endif
     92 
     93   // Get the hanzi string for the given id
     94   uint16 get_lemma_str(LemmaIdType id_hz, char16 *str_buf, uint16 str_max);
     95 
     96   void convert_to_hanzis(char16 *str, uint16 str_len);
     97 
     98   void convert_to_scis_ids(char16 *str, uint16 str_len);
     99 
    100   // last_hzs stores the last n Chinese characters history, its length should be
    101   // less or equal than kMaxPredictSize.
    102   // hzs_len specifies the length(<= kMaxPredictSize).
    103   // predict_buf is used to store the result.
    104   // buf_len specifies the buffer length.
    105   // b4_used specifies how many items before predict_buf have been used.
    106   // Returned value is the number of newly added items.
    107   size_t predict(const char16 last_hzs[], uint16 hzs_len,
    108                  NPredictItem *npre_items, size_t npre_max,
    109                  size_t b4_used);
    110 
    111   // If half_splid is a valid half spelling id, return those full spelling
    112   // ids which share this half id.
    113   uint16 get_splids_for_hanzi(char16 hanzi, uint16 half_splid,
    114                               uint16 *splids, uint16 max_splids);
    115 
    116   LemmaIdType get_lemma_id(const char16 *str, uint16 str_len);
    117 };
    118 }
    119 
    120 #endif  // PINYINIME_INCLUDE_DICTLIST_H__
    121