1 /* 2 * Copyright (C) 2009 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef PINYINIME_ANDPY_INCLUDE_SEARCHCOMMON_H__ 18 #define PINYINIME_ANDPY_INCLUDE_SEARCHCOMMON_H__ 19 20 #include <stdlib.h> 21 #include "./spellingtrie.h" 22 23 namespace ime_pinyin { 24 25 // Type used to identify the size of a pool, such as id pool, etc. 26 typedef uint16 PoolPosType; 27 28 // Type used to identify a parsing mile stone in an atom dictionary. 29 typedef uint16 MileStoneHandle; 30 31 // Type used to express a lemma and its probability score. 32 typedef struct { 33 size_t id:(kLemmaIdSize * 8); 34 size_t lma_len:4; 35 uint16 psb; // The score, the lower psb, the higher possibility. 36 // For single character items, we may also need Hanzi. 37 // For multiple characer items, ignore it. 38 char16 hanzi; 39 } LmaPsbItem, *PLmaPsbItem; 40 41 // LmaPsbItem extended with string. 42 typedef struct { 43 LmaPsbItem lpi; 44 char16 str[kMaxLemmaSize + 1]; 45 } LmaPsbStrItem, *PLmaPsbStrItem; 46 47 48 typedef struct { 49 float psb; 50 char16 pre_hzs[kMaxPredictSize]; 51 uint16 his_len; // The length of the history used to do the prediction. 52 } NPredictItem, *PNPredictItem; 53 54 // Parameter structure used to extend in a dictionary. All dictionaries 55 // receives the same DictExtPara and a dictionary specific MileStoneHandle for 56 // extending. 57 // 58 // When the user inputs a new character, AtomDictBase::extend_dict() will be 59 // called at least once for each dictionary. 60 // 61 // For example, when the user inputs "wm", extend_dict() will be called twice, 62 // and the DictExtPara parameter are as follows respectively: 63 // 1. splids = {w, m}; splids_extended = 1; ext_len = 1; step_no = 1; 64 // splid_end_split = false; id_start = wa(the first id start with 'w'); 65 // id_num = number of ids starting with 'w'. 66 // 2. splids = {m}; splids_extended = 0; ext_len = 1; step_no = 1; 67 // splid_end_split = false; id_start = wa; id_num = number of ids starting with 68 // 'w'. 69 // 70 // For string "women", one of the cases of the DictExtPara parameter is: 71 // splids = {wo, men}, splids_extended = 1, ext_len = 3 (length of "men"), 72 // step_no = 4; splid_end_split = false; id_start = men, id_num = 1. 73 // 74 typedef struct { 75 // Spelling ids for extending, there are splids_extended + 1 ids in the 76 // buffer. 77 // For a normal lemma, there can only be kMaxLemmaSize spelling ids in max, 78 // but for a composing phrase, there can kMaxSearchSteps spelling ids. 79 uint16 splids[kMaxSearchSteps]; 80 81 // Number of ids that have been used before. splids[splids_extended] is the 82 // newly added id for the current extension. 83 uint16 splids_extended; 84 85 // The step span of the extension. It is also the size of the string for 86 // the newly added spelling id. 87 uint16 ext_len; 88 89 // The step number for the current extension. It is also the ending position 90 // in the input Pinyin string for the substring of spelling ids in splids[]. 91 // For example, when the user inputs "women", step_no = 4. 92 // This parameter may useful to manage the MileStoneHandle list for each 93 // step. When the user deletes a character from the string, MileStoneHandle 94 // objects for the the steps after that character should be reset; when the 95 // user begins a new string, all MileStoneHandle objects should be reset. 96 uint16 step_no; 97 98 // Indicate whether the newly added spelling ends with a splitting character 99 bool splid_end_split; 100 101 // If the newly added id is a half id, id_start is the first id of the 102 // corresponding full ids; if the newly added id is a full id, id_start is 103 // that id. 104 uint16 id_start; 105 106 // If the newly added id is a half id, id_num is the number of corresponding 107 // ids; if it is a full id, id_num == 1. 108 uint16 id_num; 109 }DictExtPara, *PDictExtPara; 110 111 bool is_system_lemma(LemmaIdType lma_id); 112 bool is_user_lemma(LemmaIdType lma_id); 113 bool is_composing_lemma(LemmaIdType lma_id); 114 115 int cmp_lpi_with_psb(const void *p1, const void *p2); 116 int cmp_lpi_with_unified_psb(const void *p1, const void *p2); 117 int cmp_lpi_with_id(const void *p1, const void *p2); 118 int cmp_lpi_with_hanzi(const void *p1, const void *p2); 119 120 int cmp_lpsi_with_str(const void *p1, const void *p2); 121 122 int cmp_hanzis_1(const void *p1, const void *p2); 123 int cmp_hanzis_2(const void *p1, const void *p2); 124 int cmp_hanzis_3(const void *p1, const void *p2); 125 int cmp_hanzis_4(const void *p1, const void *p2); 126 int cmp_hanzis_5(const void *p1, const void *p2); 127 int cmp_hanzis_6(const void *p1, const void *p2); 128 int cmp_hanzis_7(const void *p1, const void *p2); 129 int cmp_hanzis_8(const void *p1, const void *p2); 130 131 int cmp_npre_by_score(const void *p1, const void *p2); 132 int cmp_npre_by_hislen_score(const void *p1, const void *p2); 133 int cmp_npre_by_hanzi_score(const void *p1, const void *p2); 134 135 136 size_t remove_duplicate_npre(NPredictItem *npre_items, size_t npre_num); 137 138 size_t align_to_size_t(size_t size); 139 140 } // namespace 141 142 #endif // PINYINIME_ANDPY_INCLUDE_SEARCHCOMMON_H__ 143