1 /* 2 * Copyright (C) 2009 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 /** 18 * This class defines AtomDictBase class which is the base class for all atom 19 * dictionaries. Atom dictionaries are managed by the decoder class 20 * MatrixSearch. 21 * 22 * When the user appends a new character to the Pinyin string, all enabled atom 23 * dictionaries' extend_dict() will be called at least once to get candidates 24 * ended in this step (the information of starting step is also given in the 25 * parameter). Usually, when extend_dict() is called, a MileStoneHandle object 26 * returned by a previous calling for a earlier step is given to speed up the 27 * look-up process, and a new MileStoneHandle object will be returned if 28 * the extension is successful. 29 * 30 * A returned MileStoneHandle object should keep alive until Function 31 * reset_milestones() is called and this object is noticed to be reset. 32 * 33 * Usually, the atom dictionary can use step information to manage its 34 * MileStoneHandle objects, or it can make the objects in ascendant order to 35 * make the reset easier. 36 * 37 * When the decoder loads the dictionary, it will give a starting lemma id for 38 * this atom dictionary to map a inner id to a global id. Global ids should be 39 * used when an atom dictionary talks to any component outside. 40 */ 41 #ifndef PINYINIME_INCLUDE_ATOMDICTBASE_H__ 42 #define PINYINIME_INCLUDE_ATOMDICTBASE_H__ 43 44 #include <stdlib.h> 45 #include "./dictdef.h" 46 #include "./searchutility.h" 47 48 namespace ime_pinyin { 49 class AtomDictBase { 50 public: 51 virtual ~AtomDictBase() {} 52 53 /** 54 * Load an atom dictionary from a file. 55 * 56 * @param file_name The file name to load dictionary. 57 * @param start_id The starting id used for this atom dictionary. 58 * @param end_id The end id (included) which can be used for this atom 59 * dictionary. User dictionary will always use the last id space, so it can 60 * ignore this paramter. All other atom dictionaries should check this 61 * parameter. 62 * @return True if succeed. 63 */ 64 virtual bool load_dict(const char *file_name, LemmaIdType start_id, 65 LemmaIdType end_id) = 0; 66 67 /** 68 * Close this atom dictionary. 69 * 70 * @return True if succeed. 71 */ 72 virtual bool close_dict() = 0; 73 74 /** 75 * Get the total number of lemmas in this atom dictionary. 76 * 77 * @return The total number of lemmas. 78 */ 79 virtual size_t number_of_lemmas() = 0; 80 81 /** 82 * This function is called by the decoder when user deletes a character from 83 * the input string, or begins a new input string. 84 * 85 * Different atom dictionaries may implement this function in different way. 86 * an atom dictionary can use one of these two parameters (or both) to reset 87 * its corresponding MileStoneHandle objects according its detailed 88 * implementation. 89 * 90 * For example, if an atom dictionary uses step information to manage its 91 * MileStoneHandle objects, parameter from_step can be used to identify which 92 * objects should be reset; otherwise, if another atom dictionary does not 93 * use the detailed step information, it only uses ascendant handles 94 * (according to step. For the same step, earlier call, smaller handle), it 95 * can easily reset those MileStoneHandle which are larger than from_handle. 96 * 97 * The decoder always reset the decoding state by step. So when it begins 98 * resetting, it will call reset_milestones() of its atom dictionaries with 99 * the step information, and the MileStoneHandle objects returned by the 100 * earliest calling of extend_dict() for that step. 101 * 102 * If an atom dictionary does not implement incremental search, this function 103 * can be totally ignored. 104 * 105 * @param from_step From which step(included) the MileStoneHandle 106 * objects should be reset. 107 * @param from_handle The ealiest MileStoneHandle object for step from_step 108 */ 109 virtual void reset_milestones(uint16 from_step, 110 MileStoneHandle from_handle) = 0; 111 112 /** 113 * Used to extend in this dictionary. The handle returned should keep valid 114 * until reset_milestones() is called. 115 * 116 * @param from_handle Its previous returned extended handle without the new 117 * spelling id, it can be used to speed up the extending. 118 * @param dep The paramter used for extending. 119 * @param lpi_items Used to fill in the lemmas matched. 120 * @param lpi_max The length of the buffer 121 * @param lpi_num Used to return the newly added items. 122 * @return The new mile stone for this extending. 0 if fail. 123 */ 124 virtual MileStoneHandle extend_dict(MileStoneHandle from_handle, 125 const DictExtPara *dep, 126 LmaPsbItem *lpi_items, 127 size_t lpi_max, size_t *lpi_num) = 0; 128 129 /** 130 * Get lemma items with scores according to a spelling id stream. 131 * This atom dictionary does not need to sort the returned items. 132 * 133 * @param splid_str The spelling id stream buffer. 134 * @param splid_str_len The length of the spelling id stream buffer. 135 * @param lpi_items Used to return matched lemma items with scores. 136 * @param lpi_max The maximum size of the buffer to return result. 137 * @return The number of matched items which have been filled in to lpi_items. 138 */ 139 virtual size_t get_lpis(const uint16 *splid_str, uint16 splid_str_len, 140 LmaPsbItem *lpi_items, size_t lpi_max) = 0; 141 142 /** 143 * Get a lemma string (The Chinese string) by the given lemma id. 144 * 145 * @param id_lemma The lemma id to get the string. 146 * @param str_buf The buffer to return the Chinese string. 147 * @param str_max The maximum size of the buffer. 148 * @return The length of the string, 0 if fail. 149 */ 150 virtual uint16 get_lemma_str(LemmaIdType id_lemma, char16 *str_buf, 151 uint16 str_max) = 0; 152 153 /** 154 * Get the full spelling ids for the given lemma id. 155 * If the given buffer is too short, return 0. 156 * 157 * @param splids Used to return the spelling ids. 158 * @param splids_max The maximum buffer length of splids. 159 * @param arg_valid Used to indicate if the incoming parameters have been 160 * initialized are valid. If it is true, the splids and splids_max are valid 161 * and there may be half ids in splids to be updated to full ids. In this 162 * case, splids_max is the number of valid ids in splids. 163 * @return The number of ids in the buffer. 164 */ 165 virtual uint16 get_lemma_splids(LemmaIdType id_lemma, uint16 *splids, 166 uint16 splids_max, bool arg_valid) = 0; 167 168 /** 169 * Function used for prediction. 170 * No need to sort the newly added items. 171 * 172 * @param last_hzs The last n Chinese chracters(called Hanzi), its length 173 * should be less than or equal to kMaxPredictSize. 174 * @param hzs_len specifies the length(<= kMaxPredictSize) of the history. 175 * @param npre_items Used used to return the result. 176 * @param npre_max The length of the buffer to return result 177 * @param b4_used Number of prediction result (from npre_items[-b4_used]) 178 * from other atom dictionaries. A atom ditionary can just ignore it. 179 * @return The number of prediction result from this atom dictionary. 180 */ 181 virtual size_t predict(const char16 last_hzs[], uint16 hzs_len, 182 NPredictItem *npre_items, size_t npre_max, 183 size_t b4_used) = 0; 184 185 /** 186 * Add a lemma to the dictionary. If the dictionary allows to add new 187 * items and this item does not exist, add it. 188 * 189 * @param lemma_str The Chinese string of the lemma. 190 * @param splids The spelling ids of the lemma. 191 * @param lemma_len The length of the Chinese lemma. 192 * @param count The frequency count for this lemma. 193 */ 194 virtual LemmaIdType put_lemma(char16 lemma_str[], uint16 splids[], 195 uint16 lemma_len, uint16 count) = 0; 196 197 /** 198 * Update a lemma's occuring count. 199 * 200 * @param lemma_id The lemma id to update. 201 * @param delta_count The frequnecy count to ajust. 202 * @param selected Indicate whether this lemma is selected by user and 203 * submitted to target edit box. 204 * @return The id if succeed, 0 if fail. 205 */ 206 virtual LemmaIdType update_lemma(LemmaIdType lemma_id, int16 delta_count, 207 bool selected) = 0; 208 209 /** 210 * Get the lemma id for the given lemma. 211 * 212 * @param lemma_str The Chinese string of the lemma. 213 * @param splids The spelling ids of the lemma. 214 * @param lemma_len The length of the lemma. 215 * @return The matched lemma id, or 0 if fail. 216 */ 217 virtual LemmaIdType get_lemma_id(char16 lemma_str[], uint16 splids[], 218 uint16 lemma_len) = 0; 219 220 /** 221 * Get the lemma score. 222 * 223 * @param lemma_id The lemma id to get score. 224 * @return The score of the lemma, or 0 if fail. 225 */ 226 virtual LmaScoreType get_lemma_score(LemmaIdType lemma_id) = 0; 227 228 /** 229 * Get the lemma score. 230 * 231 * @param lemma_str The Chinese string of the lemma. 232 * @param splids The spelling ids of the lemma. 233 * @param lemma_len The length of the lemma. 234 * @return The score of the lamm, or 0 if fail. 235 */ 236 virtual LmaScoreType get_lemma_score(char16 lemma_str[], uint16 splids[], 237 uint16 lemma_len) = 0; 238 239 /** 240 * If the dictionary allowed, remove a lemma from it. 241 * 242 * @param lemma_id The id of the lemma to remove. 243 * @return True if succeed. 244 */ 245 virtual bool remove_lemma(LemmaIdType lemma_id) = 0; 246 247 /** 248 * Get the total occuring count of this atom dictionary. 249 * 250 * @return The total occuring count of this atom dictionary. 251 */ 252 virtual size_t get_total_lemma_count() = 0; 253 254 /** 255 * Set the total occuring count of other atom dictionaries. 256 * 257 * @param count The total occuring count of other atom dictionaies. 258 */ 259 virtual void set_total_lemma_count_of_others(size_t count) = 0; 260 261 /** 262 * Notify this atom dictionary to flush the cached data to persistent storage 263 * if necessary. 264 */ 265 virtual void flush_cache() = 0; 266 }; 267 } 268 269 #endif // PINYINIME_INCLUDE_ATOMDICTBASE_H__ 270