1 /* 2 * Copyright (C) 2009 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef PINYINIME_INCLUDE_DICTLIST_H__ 18 #define PINYINIME_INCLUDE_DICTLIST_H__ 19 20 #include <stdlib.h> 21 #include <stdio.h> 22 #include "./dictdef.h" 23 #include "./searchutility.h" 24 #include "./spellingtrie.h" 25 #include "./utf16char.h" 26 27 namespace ime_pinyin { 28 29 class DictList { 30 private: 31 bool initialized_; 32 33 const SpellingTrie *spl_trie_; 34 35 // Number of SingCharItem. The first is blank, because id 0 is invalid. 36 size_t scis_num_; 37 char16 *scis_hz_; 38 SpellingId *scis_splid_; 39 40 // The large memory block to store the word list. 41 char16 *buf_; 42 43 // Starting position of those words whose lengths are i+1, counted in 44 // char16 45 size_t start_pos_[kMaxLemmaSize + 1]; 46 47 size_t start_id_[kMaxLemmaSize + 1]; 48 49 int (*cmp_func_[kMaxLemmaSize])(const void *, const void *); 50 51 bool alloc_resource(size_t buf_size, size_t scim_num); 52 53 void free_resource(); 54 55 #ifdef ___BUILD_MODEL___ 56 // Calculate the requsted memory, including the start_pos[] buffer. 57 size_t calculate_size(const LemmaEntry *lemma_arr, size_t lemma_num); 58 59 void fill_scis(const SingleCharItem *scis, size_t scis_num); 60 61 // Copy the related content to the inner buffer 62 // It should be called after calculate_size() 63 void fill_list(const LemmaEntry *lemma_arr, size_t lemma_num); 64 65 // Find the starting position for the buffer of those 2-character Chinese word 66 // whose first character is the given Chinese character. 67 char16* find_pos2_startedbyhz(char16 hz_char); 68 #endif 69 70 // Find the starting position for the buffer of those words whose lengths are 71 // word_len. The given parameter cmp_func decides how many characters from 72 // beginning will be used to compare. 73 char16* find_pos_startedbyhzs(const char16 last_hzs[], 74 size_t word_Len, 75 int (*cmp_func)(const void *, const void *)); 76 77 public: 78 79 DictList(); 80 ~DictList(); 81 82 bool save_list(FILE *fp); 83 bool load_list(FILE *fp); 84 85 #ifdef ___BUILD_MODEL___ 86 // Init the list from the LemmaEntry array. 87 // lemma_arr should have been sorted by the hanzi_str, and have been given 88 // ids from 1 89 bool init_list(const SingleCharItem *scis, size_t scis_num, 90 const LemmaEntry *lemma_arr, size_t lemma_num); 91 #endif 92 93 // Get the hanzi string for the given id 94 uint16 get_lemma_str(LemmaIdType id_hz, char16 *str_buf, uint16 str_max); 95 96 void convert_to_hanzis(char16 *str, uint16 str_len); 97 98 void convert_to_scis_ids(char16 *str, uint16 str_len); 99 100 // last_hzs stores the last n Chinese characters history, its length should be 101 // less or equal than kMaxPredictSize. 102 // hzs_len specifies the length(<= kMaxPredictSize). 103 // predict_buf is used to store the result. 104 // buf_len specifies the buffer length. 105 // b4_used specifies how many items before predict_buf have been used. 106 // Returned value is the number of newly added items. 107 size_t predict(const char16 last_hzs[], uint16 hzs_len, 108 NPredictItem *npre_items, size_t npre_max, 109 size_t b4_used); 110 111 // If half_splid is a valid half spelling id, return those full spelling 112 // ids which share this half id. 113 uint16 get_splids_for_hanzi(char16 hanzi, uint16 half_splid, 114 uint16 *splids, uint16 max_splids); 115 116 LemmaIdType get_lemma_id(const char16 *str, uint16 str_len); 117 }; 118 } 119 120 #endif // PINYINIME_INCLUDE_DICTLIST_H__ 121