Home | History | Annotate | Download | only in share
      1 /*
      2  * Copyright (C) 2009 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include <stdio.h>
     18 #include <string.h>
     19 #include <assert.h>
     20 #include "../include/dictdef.h"
     21 
     22 #ifdef ___BUILD_MODEL___
     23 #include "../include/spellingtable.h"
     24 #endif
     25 
     26 #include "../include/spellingtrie.h"
     27 
     28 namespace ime_pinyin {
     29 
     30 SpellingTrie* SpellingTrie::instance_ = NULL;
     31 
     32 // z/c/s is for Zh/Ch/Sh
     33 const char SpellingTrie::kHalfId2Sc_[kFullSplIdStart + 1] =
     34     "0ABCcDEFGHIJKLMNOPQRSsTUVWXYZz";
     35 
     36 // Bit 0 : is it a Shengmu char?
     37 // Bit 1 : is it a Yunmu char? (one char is a Yunmu)
     38 // Bit 2 : is it enabled in ShouZiMu(first char) mode?
     39 unsigned char SpellingTrie::char_flags_[] = {
     40   // a    b      c     d     e     f     g
     41   0x02, 0x01, 0x01, 0x01, 0x02, 0x01, 0x01,
     42   // h    i     j      k     l     m    n
     43   0x01, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01,
     44   // o    p     q      r     s     t
     45   0x02, 0x01, 0x01, 0x01, 0x01, 0x01,
     46   // u    v     w      x     y     z
     47   0x00, 0x00, 0x01, 0x01, 0x01, 0x01
     48 };
     49 
     50 int compare_spl(const void* p1, const void* p2) {
     51   return strcmp((const char*)(p1), (const char*)(p2));
     52 }
     53 
     54 SpellingTrie::SpellingTrie() {
     55   spelling_buf_ = NULL;
     56   spelling_size_ = 0;
     57   spelling_num_ = 0;
     58   spl_ym_ids_ = NULL;
     59   splstr_queried_ = NULL;
     60   splstr16_queried_ = NULL;
     61   root_ = NULL;
     62   dumb_node_ = NULL;
     63   splitter_node_ = NULL;
     64   instance_ = NULL;
     65   ym_buf_ = NULL;
     66   f2h_ = NULL;
     67 
     68   szm_enable_shm(true);
     69   szm_enable_ym(true);
     70 
     71 #ifdef ___BUILD_MODEL___
     72   node_num_ = 0;
     73 #endif
     74 }
     75 
     76 SpellingTrie::~SpellingTrie() {
     77   if (NULL != spelling_buf_)
     78     delete [] spelling_buf_;
     79 
     80   if (NULL != splstr_queried_)
     81     delete [] splstr_queried_;
     82 
     83   if (NULL != splstr16_queried_)
     84     delete [] splstr16_queried_;
     85 
     86   if (NULL != spl_ym_ids_)
     87     delete [] spl_ym_ids_;
     88 
     89   if (NULL != root_) {
     90     free_son_trie(root_);
     91     delete root_;
     92   }
     93 
     94   if (NULL != dumb_node_) {
     95     delete [] dumb_node_;
     96   }
     97 
     98   if (NULL != splitter_node_) {
     99     delete [] splitter_node_;
    100   }
    101 
    102   if (NULL != instance_) {
    103     delete instance_;
    104     instance_ = NULL;
    105   }
    106 
    107   if (NULL != ym_buf_)
    108     delete [] ym_buf_;
    109 
    110   if (NULL != f2h_)
    111     delete [] f2h_;
    112 }
    113 
    114 bool SpellingTrie::if_valid_id_update(uint16 *splid) const {
    115   if (NULL == splid || 0 == *splid)
    116     return false;
    117 
    118   if (*splid >= kFullSplIdStart)
    119     return true;
    120   if (*splid < kFullSplIdStart) {
    121     char ch = kHalfId2Sc_[*splid];
    122     if (ch > 'Z') {
    123       return true;
    124     } else {
    125       if (szm_is_enabled(ch)) {
    126         return true;
    127       } else if (is_yunmu_char(ch)) {
    128         assert(h2f_num_[*splid] > 0);
    129         *splid = h2f_start_[*splid];
    130         return true;
    131       }
    132     }
    133   }
    134   return false;
    135 }
    136 
    137 bool SpellingTrie::is_half_id(uint16 splid) const {
    138   if (0 == splid || splid >= kFullSplIdStart)
    139     return false;
    140 
    141   return true;
    142 }
    143 
    144 bool SpellingTrie::is_full_id(uint16 splid) const {
    145   if (splid < kFullSplIdStart || splid >= kFullSplIdStart + spelling_num_)
    146     return false;
    147   return true;
    148 }
    149 
    150 bool SpellingTrie::half_full_compatible(uint16 half_id, uint16 full_id) const {
    151   uint16 half_fr_full = full_to_half(full_id);
    152 
    153   if (half_fr_full == half_id)
    154     return true;
    155 
    156   // &~0x20 is used to conver the char to upper case.
    157   // So that Zh/Ch/Sh(whose char is z/c/s) can be matched with Z/C/S.
    158   char ch_f = (kHalfId2Sc_[half_fr_full] & (~0x20));
    159   char ch_h = kHalfId2Sc_[half_id];
    160   if (ch_f == ch_h)
    161     return true;
    162 
    163   return false;
    164 }
    165 
    166 bool SpellingTrie::is_half_id_yunmu(uint16 splid) const {
    167   if (0 == splid || splid >= kFullSplIdStart)
    168     return false;
    169 
    170   char ch = kHalfId2Sc_[splid];
    171   // If ch >= 'a', that means the half id is one of Zh/Ch/Sh
    172   if (ch >= 'a') {
    173     return false;
    174   }
    175 
    176   return char_flags_[ch - 'A'] & kHalfIdYunmuMask;
    177 }
    178 
    179 bool SpellingTrie::is_shengmu_char(char ch) const {
    180   return char_flags_[ch - 'A'] & kHalfIdShengmuMask;
    181 }
    182 
    183 bool SpellingTrie::is_yunmu_char(char ch) const {
    184   return char_flags_[ch - 'A'] & kHalfIdYunmuMask;
    185 }
    186 
    187 bool SpellingTrie::is_szm_char(char ch) const {
    188   return is_shengmu_char(ch) || is_yunmu_char(ch);
    189 }
    190 
    191 bool SpellingTrie::szm_is_enabled(char ch) const {
    192   return char_flags_[ch - 'A'] & kHalfIdSzmMask;
    193 }
    194 
    195 void SpellingTrie::szm_enable_shm(bool enable) {
    196   if (enable) {
    197     for (char ch = 'A'; ch <= 'Z'; ch++) {
    198       if (is_shengmu_char(ch))
    199         char_flags_[ch - 'A'] = char_flags_[ch - 'A'] | kHalfIdSzmMask;
    200     }
    201   } else {
    202     for (char ch = 'A'; ch <= 'Z'; ch++) {
    203       if (is_shengmu_char(ch))
    204         char_flags_[ch - 'A'] = char_flags_[ch - 'A'] & (kHalfIdSzmMask ^ 0xff);
    205     }
    206   }
    207 }
    208 
    209 void SpellingTrie::szm_enable_ym(bool enable) {
    210   if (enable) {
    211     for (char ch = 'A'; ch <= 'Z'; ch++) {
    212       if (is_yunmu_char(ch))
    213         char_flags_[ch - 'A'] = char_flags_[ch - 'A'] | kHalfIdSzmMask;
    214     }
    215   } else {
    216     for (char ch = 'A'; ch <= 'Z'; ch++) {
    217       if (is_yunmu_char(ch))
    218         char_flags_[ch - 'A'] = char_flags_[ch - 'A'] & (kHalfIdSzmMask ^ 0xff);
    219     }
    220   }
    221 }
    222 
    223 bool SpellingTrie::is_szm_enabled(char ch) const {
    224   return char_flags_[ch - 'A'] & kHalfIdSzmMask;
    225 }
    226 
    227 const SpellingTrie* SpellingTrie::get_cpinstance() {
    228   return &get_instance();
    229 }
    230 
    231 SpellingTrie& SpellingTrie::get_instance() {
    232   if (NULL == instance_)
    233     instance_ = new SpellingTrie();
    234 
    235   return *instance_;
    236 }
    237 
    238 uint16 SpellingTrie::half2full_num(uint16 half_id) const {
    239   if (NULL == root_ || half_id >= kFullSplIdStart)
    240     return 0;
    241   return h2f_num_[half_id];
    242 }
    243 
    244 uint16 SpellingTrie::half_to_full(uint16 half_id, uint16 *spl_id_start) const {
    245   if (NULL == spl_id_start || NULL == root_ || half_id >= kFullSplIdStart)
    246     return 0;
    247 
    248   *spl_id_start = h2f_start_[half_id];
    249   return h2f_num_[half_id];
    250 }
    251 
    252 uint16 SpellingTrie::full_to_half(uint16 full_id) const {
    253   if (NULL == root_ || full_id < kFullSplIdStart ||
    254       full_id > spelling_num_ + kFullSplIdStart)
    255     return 0;
    256 
    257   return f2h_[full_id - kFullSplIdStart];
    258 }
    259 
    260 void SpellingTrie::free_son_trie(SpellingNode* node) {
    261   if (NULL == node)
    262     return;
    263 
    264   for (size_t pos = 0; pos < node->num_of_son; pos++) {
    265     free_son_trie(node->first_son + pos);
    266   }
    267 
    268   if (NULL != node->first_son)
    269     delete [] node->first_son;
    270 }
    271 
    272 bool SpellingTrie::construct(const char* spelling_arr, size_t item_size,
    273                              size_t item_num, float score_amplifier,
    274                              unsigned char average_score) {
    275   if (spelling_arr == NULL)
    276     return false;
    277 
    278   memset(h2f_start_, 0, sizeof(uint16) * kFullSplIdStart);
    279   memset(h2f_num_, 0, sizeof(uint16) * kFullSplIdStart);
    280 
    281   // If the arr is the same as the buf, means this function is called by
    282   // load_table(), the table data are ready; otherwise the array should be
    283   // saved.
    284   if (spelling_arr != spelling_buf_) {
    285     if (NULL != spelling_buf_)
    286       delete [] spelling_buf_;
    287     spelling_buf_ = new char[item_size * item_num];
    288     if (NULL == spelling_buf_)
    289       return false;
    290     memcpy(spelling_buf_, spelling_arr, sizeof(char) * item_size * item_num);
    291   }
    292 
    293   spelling_size_ = item_size;
    294   spelling_num_ = item_num;
    295 
    296   score_amplifier_ = score_amplifier;
    297   average_score_ = average_score;
    298 
    299   if (NULL != splstr_queried_)
    300     delete [] splstr_queried_;
    301   splstr_queried_ = new char[spelling_size_];
    302   if (NULL == splstr_queried_)
    303     return false;
    304 
    305   if (NULL != splstr16_queried_)
    306     delete [] splstr16_queried_;
    307   splstr16_queried_ = new char16[spelling_size_];
    308   if (NULL == splstr16_queried_)
    309     return false;
    310 
    311   // First, sort the buf to ensure they are in ascendant order
    312   qsort(spelling_buf_, spelling_num_, spelling_size_, compare_spl);
    313 
    314 #ifdef ___BUILD_MODEL___
    315   node_num_ = 1;
    316 #endif
    317 
    318   root_ = new SpellingNode();
    319   memset(root_, 0, sizeof(SpellingNode));
    320 
    321   dumb_node_ = new SpellingNode();
    322   memset(dumb_node_, 0, sizeof(SpellingNode));
    323   dumb_node_->score = average_score_;
    324 
    325   splitter_node_ = new SpellingNode();
    326   memset(splitter_node_, 0, sizeof(SpellingNode));
    327   splitter_node_->score = average_score_;
    328 
    329   memset(level1_sons_, 0, sizeof(SpellingNode*) * kValidSplCharNum);
    330 
    331   root_->first_son = construct_spellings_subset(0, spelling_num_, 0, root_);
    332 
    333   // Root's score should be cleared.
    334   root_->score = 0;
    335 
    336   if (NULL == root_->first_son)
    337     return false;
    338 
    339   h2f_start_[0] = h2f_num_[0] = 0;
    340 
    341   if (!build_f2h())
    342     return false;
    343 
    344 #ifdef ___BUILD_MODEL___
    345   if (kPrintDebug0) {
    346     printf("---SpellingTrie Nodes: %d\n", node_num_);
    347   }
    348   return build_ym_info();
    349 #else
    350   return true;
    351 #endif
    352 }
    353 
    354 #ifdef ___BUILD_MODEL___
    355 const char* SpellingTrie::get_ym_str(const char *spl_str) {
    356   bool start_ZCS = false;
    357   if (is_shengmu_char(*spl_str)) {
    358     if ('Z' == *spl_str || 'C' == *spl_str || 'S' == *spl_str)
    359       start_ZCS = true;
    360     spl_str += 1;
    361     if (start_ZCS && 'h' == *spl_str)
    362       spl_str += 1;
    363   }
    364   return spl_str;
    365 }
    366 
    367 bool SpellingTrie::build_ym_info() {
    368   bool sucess;
    369   SpellingTable *spl_table = new SpellingTable();
    370 
    371   sucess = spl_table->init_table(kMaxPinyinSize - 1, 2 * kMaxYmNum, false);
    372   assert(sucess);
    373 
    374   for (uint16 pos = 0; pos < spelling_num_; pos++) {
    375     const char *spl_str = spelling_buf_ + spelling_size_ * pos;
    376     spl_str = get_ym_str(spl_str);
    377     if ('\0' != spl_str[0]) {
    378       sucess = spl_table->put_spelling(spl_str, 0);
    379       assert(sucess);
    380     }
    381   }
    382 
    383   size_t ym_item_size;  // '\0' is included
    384   size_t ym_num;
    385   const char* ym_buf;
    386   ym_buf = spl_table->arrange(&ym_item_size, &ym_num);
    387 
    388   if (NULL != ym_buf_)
    389     delete [] ym_buf_;
    390   ym_buf_ = new char[ym_item_size * ym_num];
    391   if (NULL == ym_buf_) {
    392     delete spl_table;
    393     return false;
    394   }
    395 
    396   memcpy(ym_buf_, ym_buf, sizeof(char) * ym_item_size * ym_num);
    397   ym_size_ = ym_item_size;
    398   ym_num_ = ym_num;
    399 
    400   delete spl_table;
    401 
    402   // Generate the maping from the spelling ids to the Yunmu ids.
    403   if (spl_ym_ids_)
    404     delete spl_ym_ids_;
    405   spl_ym_ids_ = new uint8[spelling_num_ + kFullSplIdStart];
    406   if (NULL == spl_ym_ids_)
    407     return false;
    408 
    409   memset(spl_ym_ids_, 0, sizeof(uint8) * (spelling_num_ + kFullSplIdStart));
    410 
    411   for (uint16 id = 1; id < spelling_num_ + kFullSplIdStart; id++) {
    412     const char *str = get_spelling_str(id);
    413 
    414     str = get_ym_str(str);
    415     if ('\0' != str[0]) {
    416       uint8 ym_id = get_ym_id(str);
    417       spl_ym_ids_[id] = ym_id;
    418       assert(ym_id > 0);
    419     } else {
    420       spl_ym_ids_[id] = 0;
    421     }
    422   }
    423   return true;
    424 }
    425 #endif
    426 
    427 SpellingNode* SpellingTrie::construct_spellings_subset(
    428     size_t item_start, size_t item_end, size_t level, SpellingNode* parent) {
    429   if (level >= spelling_size_ || item_end <= item_start || NULL == parent)
    430     return NULL;
    431 
    432   SpellingNode *first_son = NULL;
    433   uint16 num_of_son = 0;
    434   unsigned char min_son_score = 255;
    435 
    436   const char *spelling_last_start = spelling_buf_ + spelling_size_ * item_start;
    437   char char_for_node = spelling_last_start[level];
    438   assert(char_for_node >= 'A' && char_for_node <= 'Z' ||
    439          'h' == char_for_node);
    440 
    441   // Scan the array to find how many sons
    442   for (size_t i = item_start + 1; i < item_end; i++) {
    443     const char *spelling_current = spelling_buf_ + spelling_size_ * i;
    444     char char_current = spelling_current[level];
    445     if (char_current != char_for_node) {
    446       num_of_son++;
    447       char_for_node = char_current;
    448     }
    449   }
    450   num_of_son++;
    451 
    452   // Allocate memory
    453 #ifdef ___BUILD_MODEL___
    454   node_num_ += num_of_son;
    455 #endif
    456   first_son = new SpellingNode[num_of_son];
    457   memset(first_son, 0, sizeof(SpellingNode)*num_of_son);
    458 
    459   // Now begin construct tree
    460   size_t son_pos = 0;
    461 
    462   spelling_last_start = spelling_buf_ + spelling_size_ * item_start;
    463   char_for_node = spelling_last_start[level];
    464 
    465   bool spelling_endable = true;
    466   if (spelling_last_start[level + 1] != '\0')
    467     spelling_endable = false;
    468 
    469   size_t item_start_next = item_start;
    470 
    471   for (size_t i = item_start + 1; i < item_end; i++) {
    472     const char *spelling_current = spelling_buf_ + spelling_size_ * i;
    473     char char_current = spelling_current[level];
    474     assert(is_valid_spl_char(char_current));
    475 
    476     if (char_current != char_for_node) {
    477       // Construct a node
    478       SpellingNode *node_current = first_son + son_pos;
    479       node_current->char_this_node = char_for_node;
    480 
    481       // For quick search in the first level
    482       if (0 == level)
    483         level1_sons_[char_for_node - 'A'] = node_current;
    484 
    485       if (spelling_endable) {
    486         node_current->spelling_idx = kFullSplIdStart + item_start_next;
    487       }
    488 
    489       if (spelling_last_start[level + 1] != '\0' || i - item_start_next > 1) {
    490         size_t real_start = item_start_next;
    491         if (spelling_last_start[level + 1] == '\0')
    492           real_start++;
    493 
    494         node_current->first_son =
    495             construct_spellings_subset(real_start, i, level + 1,
    496                                        node_current);
    497 
    498         if (real_start == item_start_next + 1) {
    499           uint16 score_this = static_cast<unsigned char>(
    500               spelling_last_start[spelling_size_ - 1]);
    501           if (score_this < node_current->score)
    502             node_current->score = score_this;
    503         }
    504       } else {
    505         node_current->first_son = NULL;
    506         node_current->score = static_cast<unsigned char>(
    507             spelling_last_start[spelling_size_ - 1]);
    508       }
    509 
    510       if (node_current->score < min_son_score)
    511         min_son_score = node_current->score;
    512 
    513       bool is_half = false;
    514       if (level == 0 && is_szm_char(char_for_node)) {
    515         node_current->spelling_idx =
    516           static_cast<uint16>(char_for_node - 'A' + 1);
    517 
    518         if (char_for_node > 'C')
    519           node_current->spelling_idx++;
    520         if (char_for_node > 'S')
    521           node_current->spelling_idx++;
    522 
    523         h2f_num_[node_current->spelling_idx] = i - item_start_next;
    524         is_half = true;
    525       } else if (level == 1 && char_for_node == 'h') {
    526         char ch_level0 = spelling_last_start[0];
    527         uint16 part_id = 0;
    528         if (ch_level0 == 'C')
    529           part_id = 'C' - 'A' + 1 + 1;
    530         else if (ch_level0 == 'S')
    531           part_id = 'S' - 'A' + 1 + 2;
    532         else if (ch_level0 == 'Z')
    533           part_id = 'Z' - 'A' + 1 + 3;
    534         if (0 != part_id) {
    535           node_current->spelling_idx = part_id;
    536           h2f_num_[node_current->spelling_idx] = i - item_start_next;
    537           is_half = true;
    538         }
    539       }
    540 
    541       if (is_half) {
    542         if (h2f_num_[node_current->spelling_idx] > 0)
    543           h2f_start_[node_current->spelling_idx] =
    544             item_start_next + kFullSplIdStart;
    545         else
    546           h2f_start_[node_current->spelling_idx] = 0;
    547       }
    548 
    549       // for next sibling
    550       spelling_last_start = spelling_current;
    551       char_for_node = char_current;
    552       item_start_next = i;
    553       spelling_endable = true;
    554       if (spelling_current[level + 1] != '\0')
    555         spelling_endable = false;
    556 
    557       son_pos++;
    558     }
    559   }
    560 
    561   // the last one
    562   SpellingNode *node_current = first_son + son_pos;
    563   node_current->char_this_node = char_for_node;
    564 
    565   // For quick search in the first level
    566   if (0 == level)
    567     level1_sons_[char_for_node - 'A'] = node_current;
    568 
    569   if (spelling_endable) {
    570     node_current->spelling_idx = kFullSplIdStart + item_start_next;
    571   }
    572 
    573   if (spelling_last_start[level + 1] != '\0' ||
    574       item_end - item_start_next > 1) {
    575     size_t real_start = item_start_next;
    576     if (spelling_last_start[level + 1] == '\0')
    577       real_start++;
    578 
    579     node_current->first_son =
    580         construct_spellings_subset(real_start, item_end, level + 1,
    581                                    node_current);
    582 
    583     if (real_start == item_start_next + 1) {
    584       uint16 score_this = static_cast<unsigned char>(
    585           spelling_last_start[spelling_size_ - 1]);
    586       if (score_this < node_current->score)
    587         node_current->score = score_this;
    588     }
    589   } else {
    590     node_current->first_son = NULL;
    591     node_current->score = static_cast<unsigned char>(
    592         spelling_last_start[spelling_size_ - 1]);
    593   }
    594 
    595   if (node_current->score < min_son_score)
    596     min_son_score = node_current->score;
    597 
    598   assert(son_pos + 1 == num_of_son);
    599 
    600   bool is_half = false;
    601   if (level == 0 && szm_is_enabled(char_for_node)) {
    602     node_current->spelling_idx = static_cast<uint16>(char_for_node - 'A' + 1);
    603 
    604     if (char_for_node > 'C')
    605       node_current->spelling_idx++;
    606     if (char_for_node > 'S')
    607       node_current->spelling_idx++;
    608 
    609     h2f_num_[node_current->spelling_idx] = item_end - item_start_next;
    610     is_half = true;
    611   } else if (level == 1 && char_for_node == 'h') {
    612     char ch_level0 = spelling_last_start[0];
    613     uint16 part_id = 0;
    614     if (ch_level0 == 'C')
    615       part_id = 'C' - 'A' + 1 + 1;
    616     else if (ch_level0 == 'S')
    617       part_id = 'S' - 'A' + 1 + 2;
    618     else if (ch_level0 == 'Z')
    619       part_id = 'Z' - 'A' + 1 + 3;
    620     if (0 != part_id) {
    621       node_current->spelling_idx = part_id;
    622       h2f_num_[node_current->spelling_idx] = item_end - item_start_next;
    623       is_half = true;
    624     }
    625   }
    626   if (is_half) {
    627     if (h2f_num_[node_current->spelling_idx] > 0)
    628       h2f_start_[node_current->spelling_idx] =
    629         item_start_next + kFullSplIdStart;
    630     else
    631       h2f_start_[node_current->spelling_idx] = 0;
    632   }
    633 
    634   parent->num_of_son = num_of_son;
    635   parent->score = min_son_score;
    636   return first_son;
    637 }
    638 
    639 bool SpellingTrie::save_spl_trie(FILE *fp) {
    640   if (NULL == fp || NULL == spelling_buf_)
    641     return false;
    642 
    643   if (fwrite(&spelling_size_, sizeof(size_t), 1, fp) != 1)
    644     return false;
    645 
    646   if (fwrite(&spelling_num_, sizeof(size_t), 1, fp) != 1)
    647     return false;
    648 
    649   if (fwrite(&score_amplifier_, sizeof(float), 1, fp) != 1)
    650     return false;
    651 
    652   if (fwrite(&average_score_, sizeof(unsigned char), 1, fp) != 1)
    653     return false;
    654 
    655   if (fwrite(spelling_buf_, sizeof(char) * spelling_size_,
    656              spelling_num_, fp) != spelling_num_)
    657     return false;
    658 
    659   return true;
    660 }
    661 
    662 bool SpellingTrie::load_spl_trie(FILE *fp) {
    663   if (NULL == fp)
    664     return false;
    665 
    666   if (fread(&spelling_size_, sizeof(size_t), 1, fp) != 1)
    667     return false;
    668 
    669   if (fread(&spelling_num_, sizeof(size_t), 1, fp) != 1)
    670     return false;
    671 
    672   if (fread(&score_amplifier_, sizeof(float), 1, fp) != 1)
    673     return false;
    674 
    675   if (fread(&average_score_, sizeof(unsigned char), 1, fp) != 1)
    676     return false;
    677 
    678   if (NULL != spelling_buf_)
    679     delete [] spelling_buf_;
    680 
    681   spelling_buf_ = new char[spelling_size_ * spelling_num_];
    682   if (NULL == spelling_buf_)
    683     return false;
    684 
    685   if (fread(spelling_buf_, sizeof(char) * spelling_size_,
    686             spelling_num_, fp) != spelling_num_)
    687     return false;
    688 
    689   return construct(spelling_buf_, spelling_size_, spelling_num_,
    690                    score_amplifier_, average_score_);
    691 }
    692 
    693 bool SpellingTrie::build_f2h() {
    694   if (NULL != f2h_)
    695     delete [] f2h_;
    696   f2h_ = new uint16[spelling_num_];
    697   if (NULL == f2h_)
    698     return false;
    699 
    700   for (uint16 hid = 0; hid < kFullSplIdStart; hid++) {
    701     for (uint16 fid = h2f_start_[hid];
    702          fid < h2f_start_[hid] + h2f_num_[hid]; fid++)
    703       f2h_[fid - kFullSplIdStart] = hid;
    704   }
    705 
    706   return true;
    707 }
    708 
    709 size_t SpellingTrie::get_spelling_num() {
    710   return spelling_num_;
    711 }
    712 
    713 uint8 SpellingTrie::get_ym_id(const char *ym_str) {
    714   if (NULL == ym_str || NULL == ym_buf_)
    715     return 0;
    716 
    717   for (uint8 pos = 0; pos < ym_num_; pos++)
    718     if (strcmp(ym_buf_ + ym_size_ * pos, ym_str) == 0)
    719       return pos + 1;
    720 
    721   return 0;
    722 }
    723 
    724 const char* SpellingTrie::get_spelling_str(uint16 splid) {
    725   splstr_queried_[0] = '\0';
    726 
    727   if (splid >= kFullSplIdStart) {
    728     splid -= kFullSplIdStart;
    729     snprintf(splstr_queried_, spelling_size_, "%s",
    730              spelling_buf_ + splid * spelling_size_);
    731   } else {
    732     if (splid == 'C' - 'A' + 1 + 1) {
    733       snprintf(splstr_queried_, spelling_size_, "%s", "Ch");
    734     } else if (splid == 'S' - 'A' + 1 + 2) {
    735       snprintf(splstr_queried_, spelling_size_, "%s", "Sh");
    736     } else if (splid == 'Z' - 'A' + 1 + 3) {
    737       snprintf(splstr_queried_, spelling_size_, "%s", "Zh");
    738     } else {
    739       if (splid > 'C' - 'A' + 1)
    740         splid--;
    741       if (splid > 'S' - 'A' + 1)
    742         splid--;
    743       splstr_queried_[0] = 'A' + splid - 1;
    744       splstr_queried_[1] = '\0';
    745     }
    746   }
    747   return splstr_queried_;
    748 }
    749 
    750 const char16* SpellingTrie::get_spelling_str16(uint16 splid) {
    751   splstr16_queried_[0] = '\0';
    752 
    753   if (splid >= kFullSplIdStart) {
    754     splid -= kFullSplIdStart;
    755     for (size_t pos = 0; pos < spelling_size_; pos++) {
    756       splstr16_queried_[pos] = static_cast<char16>
    757           (spelling_buf_[splid * spelling_size_ + pos]);
    758     }
    759   } else {
    760     if (splid == 'C' - 'A' + 1 + 1) {
    761       splstr16_queried_[0] = static_cast<char16>('C');
    762       splstr16_queried_[1] = static_cast<char16>('h');
    763       splstr16_queried_[2] = static_cast<char16>('\0');
    764     } else if (splid == 'S' - 'A' + 1 + 2) {
    765       splstr16_queried_[0] = static_cast<char16>('S');
    766       splstr16_queried_[1] = static_cast<char16>('h');
    767       splstr16_queried_[2] = static_cast<char16>('\0');
    768     } else if (splid == 'Z' - 'A' + 1 + 3) {
    769       splstr16_queried_[0] = static_cast<char16>('Z');
    770       splstr16_queried_[1] = static_cast<char16>('h');
    771       splstr16_queried_[2] = static_cast<char16>('\0');
    772     } else {
    773       if (splid > 'C' - 'A' + 1)
    774         splid--;
    775       if (splid > 'S' - 'A' + 1)
    776         splid--;
    777       splstr16_queried_[0] = 'A' + splid - 1;
    778       splstr16_queried_[1] = '\0';
    779     }
    780   }
    781   return splstr16_queried_;
    782 }
    783 
    784 size_t SpellingTrie::get_spelling_str16(uint16 splid, char16 *splstr16,
    785                                         size_t splstr16_len) {
    786   if (NULL == splstr16 || splstr16_len < kMaxPinyinSize + 1) return 0;
    787 
    788   if (splid >= kFullSplIdStart) {
    789     splid -= kFullSplIdStart;
    790     for (size_t pos = 0; pos <= kMaxPinyinSize; pos++) {
    791       splstr16[pos] = static_cast<char16>
    792           (spelling_buf_[splid * spelling_size_ + pos]);
    793       if (static_cast<char16>('\0') == splstr16[pos]) {
    794         return pos;
    795       }
    796     }
    797   } else {
    798     if (splid == 'C' - 'A' + 1 + 1) {
    799       splstr16[0] = static_cast<char16>('C');
    800       splstr16[1] = static_cast<char16>('h');
    801       splstr16[2] = static_cast<char16>('\0');
    802       return 2;
    803     } else if (splid == 'S' - 'A' + 1 + 2) {
    804       splstr16[0] = static_cast<char16>('S');
    805       splstr16[1] = static_cast<char16>('h');
    806       splstr16[2] = static_cast<char16>('\0');
    807       return 2;
    808     } else if (splid == 'Z' - 'A' + 1 + 3) {
    809       splstr16[0] = static_cast<char16>('Z');
    810       splstr16[1] = static_cast<char16>('h');
    811       splstr16[2] = static_cast<char16>('\0');
    812       return 2;
    813     } else {
    814       if (splid > 'C' - 'A' + 1)
    815         splid--;
    816       if (splid > 'S' - 'A' + 1)
    817         splid--;
    818       splstr16[0] = 'A' + splid - 1;
    819       splstr16[1] = '\0';
    820       return 1;
    821     }
    822   }
    823 
    824   // Not reachable.
    825   return 0;
    826 }
    827 
    828 }  // namespace ime_pinyin
    829