Home | History | Annotate | Download | only in include
      1 /*
      2  * Copyright (C) 2009 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 /**
     18  * This class defines AtomDictBase class which is the base class for all atom
     19  * dictionaries. Atom dictionaries are managed by the decoder class
     20  * MatrixSearch.
     21  *
     22  * When the user appends a new character to the Pinyin string, all enabled atom
     23  * dictionaries' extend_dict() will be called at least once to get candidates
     24  * ended in this step (the information of starting step is also given in the
     25  * parameter). Usually, when extend_dict() is called, a MileStoneHandle object
     26  * returned by a previous calling for a earlier step is given to speed up the
     27  * look-up process, and a new MileStoneHandle object will be returned if
     28  * the extension is successful.
     29  *
     30  * A returned MileStoneHandle object should keep alive until Function
     31  * reset_milestones() is called and this object is noticed to be reset.
     32  *
     33  * Usually, the atom dictionary can use step information to manage its
     34  * MileStoneHandle objects, or it can make the objects in ascendant order to
     35  * make the reset easier.
     36  *
     37  * When the decoder loads the dictionary, it will give a starting lemma id for
     38  * this atom dictionary to map a inner id to a global id. Global ids should be
     39  * used when an atom dictionary talks to any component outside.
     40  */
     41 #ifndef PINYINIME_INCLUDE_ATOMDICTBASE_H__
     42 #define PINYINIME_INCLUDE_ATOMDICTBASE_H__
     43 
     44 #include <stdlib.h>
     45 #include "./dictdef.h"
     46 #include "./searchutility.h"
     47 
     48 namespace ime_pinyin {
     49 class AtomDictBase {
     50  public:
     51   virtual ~AtomDictBase() {}
     52 
     53   /**
     54    * Load an atom dictionary from a file.
     55    *
     56    * @param file_name The file name to load dictionary.
     57    * @param start_id The starting id used for this atom dictionary.
     58    * @param end_id The end id (included) which can be used for this atom
     59    * dictionary. User dictionary will always use the last id space, so it can
     60    * ignore this paramter. All other atom dictionaries should check this
     61    * parameter.
     62    * @return True if succeed.
     63    */
     64   virtual bool load_dict(const char *file_name, LemmaIdType start_id,
     65                          LemmaIdType end_id) = 0;
     66 
     67   /**
     68    * Close this atom dictionary.
     69    *
     70    * @return True if succeed.
     71    */
     72   virtual bool close_dict() = 0;
     73 
     74   /**
     75    * Get the total number of lemmas in this atom dictionary.
     76    *
     77    * @return The total number of lemmas.
     78    */
     79   virtual size_t number_of_lemmas() = 0;
     80 
     81   /**
     82    * This function is called by the decoder when user deletes a character from
     83    * the input string, or begins a new input string.
     84    *
     85    * Different atom dictionaries may implement this function in different way.
     86    * an atom dictionary can use one of these two parameters (or both) to reset
     87    * its corresponding MileStoneHandle objects according its detailed
     88    * implementation.
     89    *
     90    * For example, if an atom dictionary uses step information to manage its
     91    * MileStoneHandle objects, parameter from_step can be used to identify which
     92    * objects should be reset; otherwise, if another atom dictionary does not
     93    * use the detailed step information, it only uses ascendant handles
     94    * (according to step. For the same step, earlier call, smaller handle), it
     95    * can easily reset those MileStoneHandle which are larger than from_handle.
     96    *
     97    * The decoder always reset the decoding state by step. So when it begins
     98    * resetting, it will call reset_milestones() of its atom dictionaries with
     99    * the step information, and the MileStoneHandle objects returned by the
    100    * earliest calling of extend_dict() for that step.
    101    *
    102    * If an atom dictionary does not implement incremental search, this function
    103    * can be totally ignored.
    104    *
    105    * @param from_step From which step(included) the MileStoneHandle
    106    * objects should be reset.
    107    * @param from_handle The ealiest MileStoneHandle object for step from_step
    108    */
    109   virtual void reset_milestones(uint16 from_step,
    110                                 MileStoneHandle from_handle) = 0;
    111 
    112   /**
    113    * Used to extend in this dictionary. The handle returned should keep valid
    114    * until reset_milestones() is called.
    115    *
    116    * @param from_handle Its previous returned extended handle without the new
    117    * spelling id, it can be used to speed up the extending.
    118    * @param dep The paramter used for extending.
    119    * @param lpi_items Used to fill in the lemmas matched.
    120    * @param lpi_max The length of the buffer
    121    * @param lpi_num Used to return the newly added items.
    122    * @return The new mile stone for this extending. 0 if fail.
    123    */
    124   virtual MileStoneHandle extend_dict(MileStoneHandle from_handle,
    125                                       const DictExtPara *dep,
    126                                       LmaPsbItem *lpi_items,
    127                                       size_t lpi_max, size_t *lpi_num) = 0;
    128 
    129   /**
    130    * Get lemma items with scores according to a spelling id stream.
    131    * This atom dictionary does not need to sort the returned items.
    132    *
    133    * @param splid_str The spelling id stream buffer.
    134    * @param splid_str_len The length of the spelling id stream buffer.
    135    * @param lpi_items Used to return matched lemma items with scores.
    136    * @param lpi_max The maximum size of the buffer to return result.
    137    * @return The number of matched items which have been filled in to lpi_items.
    138    */
    139   virtual size_t get_lpis(const uint16 *splid_str, uint16 splid_str_len,
    140                           LmaPsbItem *lpi_items, size_t lpi_max) = 0;
    141 
    142   /**
    143    * Get a lemma string (The Chinese string) by the given lemma id.
    144    *
    145    * @param id_lemma The lemma id to get the string.
    146    * @param str_buf The buffer to return the Chinese string.
    147    * @param str_max The maximum size of the buffer.
    148    * @return The length of the string, 0 if fail.
    149    */
    150   virtual uint16 get_lemma_str(LemmaIdType id_lemma, char16 *str_buf,
    151                                uint16 str_max) = 0;
    152 
    153   /**
    154    * Get the full spelling ids for the given lemma id.
    155    * If the given buffer is too short, return 0.
    156    *
    157    * @param splids Used to return the spelling ids.
    158    * @param splids_max The maximum buffer length of splids.
    159    * @param arg_valid Used to indicate if the incoming parameters have been
    160    * initialized are valid. If it is true, the splids and splids_max are valid
    161    * and there may be half ids in splids to be updated to full ids. In this
    162    * case, splids_max is the number of valid ids in splids.
    163    * @return The number of ids in the buffer.
    164    */
    165   virtual uint16 get_lemma_splids(LemmaIdType id_lemma, uint16 *splids,
    166                                   uint16 splids_max, bool arg_valid) = 0;
    167 
    168   /**
    169    * Function used for prediction.
    170    * No need to sort the newly added items.
    171    *
    172    * @param last_hzs The last n Chinese chracters(called Hanzi), its length
    173    * should be less than or equal to kMaxPredictSize.
    174    * @param hzs_len specifies the length(<= kMaxPredictSize) of the history.
    175    * @param npre_items Used used to return the result.
    176    * @param npre_max The length of the buffer to return result
    177    * @param b4_used Number of prediction result (from npre_items[-b4_used])
    178    * from other atom dictionaries. A atom ditionary can just ignore it.
    179    * @return The number of prediction result from this atom dictionary.
    180    */
    181   virtual size_t predict(const char16 last_hzs[], uint16 hzs_len,
    182                          NPredictItem *npre_items, size_t npre_max,
    183                          size_t b4_used) = 0;
    184 
    185   /**
    186    * Add a lemma to the dictionary. If the dictionary allows to add new
    187    * items and this item does not exist, add it.
    188    *
    189    * @param lemma_str The Chinese string of the lemma.
    190    * @param splids The spelling ids of the lemma.
    191    * @param lemma_len The length of the Chinese lemma.
    192    * @param count The frequency count for this lemma.
    193    */
    194   virtual LemmaIdType put_lemma(char16 lemma_str[], uint16 splids[],
    195                                 uint16 lemma_len, uint16 count) = 0;
    196 
    197   /**
    198    * Update a lemma's occuring count.
    199    *
    200    * @param lemma_id The lemma id to update.
    201    * @param delta_count The frequnecy count to ajust.
    202    * @param selected Indicate whether this lemma is selected by user and
    203    * submitted to target edit box.
    204    * @return The id if succeed, 0 if fail.
    205    */
    206   virtual LemmaIdType update_lemma(LemmaIdType lemma_id, int16 delta_count,
    207                                    bool selected) = 0;
    208 
    209   /**
    210    * Get the lemma id for the given lemma.
    211    *
    212    * @param lemma_str The Chinese string of the lemma.
    213    * @param splids The spelling ids of the lemma.
    214    * @param lemma_len The length of the lemma.
    215    * @return The matched lemma id, or 0 if fail.
    216    */
    217   virtual LemmaIdType get_lemma_id(char16 lemma_str[], uint16 splids[],
    218                                    uint16 lemma_len) = 0;
    219 
    220   /**
    221    * Get the lemma score.
    222    *
    223    * @param lemma_id The lemma id to get score.
    224    * @return The score of the lemma, or 0 if fail.
    225    */
    226   virtual LmaScoreType get_lemma_score(LemmaIdType lemma_id) = 0;
    227 
    228   /**
    229    * Get the lemma score.
    230    *
    231    * @param lemma_str The Chinese string of the lemma.
    232    * @param splids The spelling ids of the lemma.
    233    * @param lemma_len The length of the lemma.
    234    * @return The score of the lamm, or 0 if fail.
    235    */
    236   virtual LmaScoreType get_lemma_score(char16 lemma_str[], uint16 splids[],
    237                                 uint16 lemma_len) = 0;
    238 
    239   /**
    240    * If the dictionary allowed, remove a lemma from it.
    241    *
    242    * @param lemma_id The id of the lemma to remove.
    243    * @return True if succeed.
    244    */
    245   virtual bool remove_lemma(LemmaIdType lemma_id) = 0;
    246 
    247   /**
    248    * Get the total occuring count of this atom dictionary.
    249    *
    250    * @return The total occuring count of this atom dictionary.
    251    */
    252   virtual size_t get_total_lemma_count() = 0;
    253 
    254   /**
    255    * Set the total occuring count of other atom dictionaries.
    256    *
    257    * @param count The total occuring count of other atom dictionaies.
    258    */
    259   virtual void set_total_lemma_count_of_others(size_t count) = 0;
    260 
    261   /**
    262    * Notify this atom dictionary to flush the cached data to persistent storage
    263    * if necessary.
    264    */
    265   virtual void flush_cache() = 0;
    266 };
    267 }
    268 
    269 #endif  // PINYINIME_INCLUDE_ATOMDICTBASE_H__
    270