1 /* 2 * Copyright (C) 2009 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include <stdio.h> 18 #include <string.h> 19 #include <assert.h> 20 #include "../include/dictdef.h" 21 22 #ifdef ___BUILD_MODEL___ 23 #include "../include/spellingtable.h" 24 #endif 25 26 #include "../include/spellingtrie.h" 27 28 namespace ime_pinyin { 29 30 SpellingTrie* SpellingTrie::instance_ = NULL; 31 32 // z/c/s is for Zh/Ch/Sh 33 const char SpellingTrie::kHalfId2Sc_[kFullSplIdStart + 1] = 34 "0ABCcDEFGHIJKLMNOPQRSsTUVWXYZz"; 35 36 // Bit 0 : is it a Shengmu char? 37 // Bit 1 : is it a Yunmu char? (one char is a Yunmu) 38 // Bit 2 : is it enabled in ShouZiMu(first char) mode? 39 unsigned char SpellingTrie::char_flags_[] = { 40 // a b c d e f g 41 0x02, 0x01, 0x01, 0x01, 0x02, 0x01, 0x01, 42 // h i j k l m n 43 0x01, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 44 // o p q r s t 45 0x02, 0x01, 0x01, 0x01, 0x01, 0x01, 46 // u v w x y z 47 0x00, 0x00, 0x01, 0x01, 0x01, 0x01 48 }; 49 50 int compare_spl(const void* p1, const void* p2) { 51 return strcmp((const char*)(p1), (const char*)(p2)); 52 } 53 54 SpellingTrie::SpellingTrie() { 55 spelling_buf_ = NULL; 56 spelling_size_ = 0; 57 spelling_num_ = 0; 58 spl_ym_ids_ = NULL; 59 splstr_queried_ = NULL; 60 splstr16_queried_ = NULL; 61 root_ = NULL; 62 dumb_node_ = NULL; 63 splitter_node_ = NULL; 64 instance_ = NULL; 65 ym_buf_ = NULL; 66 f2h_ = NULL; 67 68 szm_enable_shm(true); 69 szm_enable_ym(true); 70 71 #ifdef ___BUILD_MODEL___ 72 node_num_ = 0; 73 #endif 74 } 75 76 SpellingTrie::~SpellingTrie() { 77 if (NULL != spelling_buf_) 78 delete [] spelling_buf_; 79 80 if (NULL != splstr_queried_) 81 delete [] splstr_queried_; 82 83 if (NULL != splstr16_queried_) 84 delete [] splstr16_queried_; 85 86 if (NULL != spl_ym_ids_) 87 delete [] spl_ym_ids_; 88 89 if (NULL != root_) { 90 free_son_trie(root_); 91 delete root_; 92 } 93 94 if (NULL != dumb_node_) { 95 delete [] dumb_node_; 96 } 97 98 if (NULL != splitter_node_) { 99 delete [] splitter_node_; 100 } 101 102 if (NULL != instance_) { 103 delete instance_; 104 instance_ = NULL; 105 } 106 107 if (NULL != ym_buf_) 108 delete [] ym_buf_; 109 110 if (NULL != f2h_) 111 delete [] f2h_; 112 } 113 114 bool SpellingTrie::if_valid_id_update(uint16 *splid) const { 115 if (NULL == splid || 0 == *splid) 116 return false; 117 118 if (*splid >= kFullSplIdStart) 119 return true; 120 if (*splid < kFullSplIdStart) { 121 char ch = kHalfId2Sc_[*splid]; 122 if (ch > 'Z') { 123 return true; 124 } else { 125 if (szm_is_enabled(ch)) { 126 return true; 127 } else if (is_yunmu_char(ch)) { 128 assert(h2f_num_[*splid] > 0); 129 *splid = h2f_start_[*splid]; 130 return true; 131 } 132 } 133 } 134 return false; 135 } 136 137 bool SpellingTrie::is_half_id(uint16 splid) const { 138 if (0 == splid || splid >= kFullSplIdStart) 139 return false; 140 141 return true; 142 } 143 144 bool SpellingTrie::is_full_id(uint16 splid) const { 145 if (splid < kFullSplIdStart || splid >= kFullSplIdStart + spelling_num_) 146 return false; 147 return true; 148 } 149 150 bool SpellingTrie::half_full_compatible(uint16 half_id, uint16 full_id) const { 151 uint16 half_fr_full = full_to_half(full_id); 152 153 if (half_fr_full == half_id) 154 return true; 155 156 // &~0x20 is used to conver the char to upper case. 157 // So that Zh/Ch/Sh(whose char is z/c/s) can be matched with Z/C/S. 158 char ch_f = (kHalfId2Sc_[half_fr_full] & (~0x20)); 159 char ch_h = kHalfId2Sc_[half_id]; 160 if (ch_f == ch_h) 161 return true; 162 163 return false; 164 } 165 166 bool SpellingTrie::is_half_id_yunmu(uint16 splid) const { 167 if (0 == splid || splid >= kFullSplIdStart) 168 return false; 169 170 char ch = kHalfId2Sc_[splid]; 171 // If ch >= 'a', that means the half id is one of Zh/Ch/Sh 172 if (ch >= 'a') { 173 return false; 174 } 175 176 return char_flags_[ch - 'A'] & kHalfIdYunmuMask; 177 } 178 179 bool SpellingTrie::is_shengmu_char(char ch) const { 180 return char_flags_[ch - 'A'] & kHalfIdShengmuMask; 181 } 182 183 bool SpellingTrie::is_yunmu_char(char ch) const { 184 return char_flags_[ch - 'A'] & kHalfIdYunmuMask; 185 } 186 187 bool SpellingTrie::is_szm_char(char ch) const { 188 return is_shengmu_char(ch) || is_yunmu_char(ch); 189 } 190 191 bool SpellingTrie::szm_is_enabled(char ch) const { 192 return char_flags_[ch - 'A'] & kHalfIdSzmMask; 193 } 194 195 void SpellingTrie::szm_enable_shm(bool enable) { 196 if (enable) { 197 for (char ch = 'A'; ch <= 'Z'; ch++) { 198 if (is_shengmu_char(ch)) 199 char_flags_[ch - 'A'] = char_flags_[ch - 'A'] | kHalfIdSzmMask; 200 } 201 } else { 202 for (char ch = 'A'; ch <= 'Z'; ch++) { 203 if (is_shengmu_char(ch)) 204 char_flags_[ch - 'A'] = char_flags_[ch - 'A'] & (kHalfIdSzmMask ^ 0xff); 205 } 206 } 207 } 208 209 void SpellingTrie::szm_enable_ym(bool enable) { 210 if (enable) { 211 for (char ch = 'A'; ch <= 'Z'; ch++) { 212 if (is_yunmu_char(ch)) 213 char_flags_[ch - 'A'] = char_flags_[ch - 'A'] | kHalfIdSzmMask; 214 } 215 } else { 216 for (char ch = 'A'; ch <= 'Z'; ch++) { 217 if (is_yunmu_char(ch)) 218 char_flags_[ch - 'A'] = char_flags_[ch - 'A'] & (kHalfIdSzmMask ^ 0xff); 219 } 220 } 221 } 222 223 bool SpellingTrie::is_szm_enabled(char ch) const { 224 return char_flags_[ch - 'A'] & kHalfIdSzmMask; 225 } 226 227 const SpellingTrie* SpellingTrie::get_cpinstance() { 228 return &get_instance(); 229 } 230 231 SpellingTrie& SpellingTrie::get_instance() { 232 if (NULL == instance_) 233 instance_ = new SpellingTrie(); 234 235 return *instance_; 236 } 237 238 uint16 SpellingTrie::half2full_num(uint16 half_id) const { 239 if (NULL == root_ || half_id >= kFullSplIdStart) 240 return 0; 241 return h2f_num_[half_id]; 242 } 243 244 uint16 SpellingTrie::half_to_full(uint16 half_id, uint16 *spl_id_start) const { 245 if (NULL == spl_id_start || NULL == root_ || half_id >= kFullSplIdStart) 246 return 0; 247 248 *spl_id_start = h2f_start_[half_id]; 249 return h2f_num_[half_id]; 250 } 251 252 uint16 SpellingTrie::full_to_half(uint16 full_id) const { 253 if (NULL == root_ || full_id < kFullSplIdStart || 254 full_id > spelling_num_ + kFullSplIdStart) 255 return 0; 256 257 return f2h_[full_id - kFullSplIdStart]; 258 } 259 260 void SpellingTrie::free_son_trie(SpellingNode* node) { 261 if (NULL == node) 262 return; 263 264 for (size_t pos = 0; pos < node->num_of_son; pos++) { 265 free_son_trie(node->first_son + pos); 266 } 267 268 if (NULL != node->first_son) 269 delete [] node->first_son; 270 } 271 272 bool SpellingTrie::construct(const char* spelling_arr, size_t item_size, 273 size_t item_num, float score_amplifier, 274 unsigned char average_score) { 275 if (spelling_arr == NULL) 276 return false; 277 278 memset(h2f_start_, 0, sizeof(uint16) * kFullSplIdStart); 279 memset(h2f_num_, 0, sizeof(uint16) * kFullSplIdStart); 280 281 // If the arr is the same as the buf, means this function is called by 282 // load_table(), the table data are ready; otherwise the array should be 283 // saved. 284 if (spelling_arr != spelling_buf_) { 285 if (NULL != spelling_buf_) 286 delete [] spelling_buf_; 287 spelling_buf_ = new char[item_size * item_num]; 288 if (NULL == spelling_buf_) 289 return false; 290 memcpy(spelling_buf_, spelling_arr, sizeof(char) * item_size * item_num); 291 } 292 293 spelling_size_ = item_size; 294 spelling_num_ = item_num; 295 296 score_amplifier_ = score_amplifier; 297 average_score_ = average_score; 298 299 if (NULL != splstr_queried_) 300 delete [] splstr_queried_; 301 splstr_queried_ = new char[spelling_size_]; 302 if (NULL == splstr_queried_) 303 return false; 304 305 if (NULL != splstr16_queried_) 306 delete [] splstr16_queried_; 307 splstr16_queried_ = new char16[spelling_size_]; 308 if (NULL == splstr16_queried_) 309 return false; 310 311 // First, sort the buf to ensure they are in ascendant order 312 qsort(spelling_buf_, spelling_num_, spelling_size_, compare_spl); 313 314 #ifdef ___BUILD_MODEL___ 315 node_num_ = 1; 316 #endif 317 318 root_ = new SpellingNode(); 319 memset(root_, 0, sizeof(SpellingNode)); 320 321 dumb_node_ = new SpellingNode(); 322 memset(dumb_node_, 0, sizeof(SpellingNode)); 323 dumb_node_->score = average_score_; 324 325 splitter_node_ = new SpellingNode(); 326 memset(splitter_node_, 0, sizeof(SpellingNode)); 327 splitter_node_->score = average_score_; 328 329 memset(level1_sons_, 0, sizeof(SpellingNode*) * kValidSplCharNum); 330 331 root_->first_son = construct_spellings_subset(0, spelling_num_, 0, root_); 332 333 // Root's score should be cleared. 334 root_->score = 0; 335 336 if (NULL == root_->first_son) 337 return false; 338 339 h2f_start_[0] = h2f_num_[0] = 0; 340 341 if (!build_f2h()) 342 return false; 343 344 #ifdef ___BUILD_MODEL___ 345 if (kPrintDebug0) { 346 printf("---SpellingTrie Nodes: %d\n", node_num_); 347 } 348 return build_ym_info(); 349 #else 350 return true; 351 #endif 352 } 353 354 #ifdef ___BUILD_MODEL___ 355 const char* SpellingTrie::get_ym_str(const char *spl_str) { 356 bool start_ZCS = false; 357 if (is_shengmu_char(*spl_str)) { 358 if ('Z' == *spl_str || 'C' == *spl_str || 'S' == *spl_str) 359 start_ZCS = true; 360 spl_str += 1; 361 if (start_ZCS && 'h' == *spl_str) 362 spl_str += 1; 363 } 364 return spl_str; 365 } 366 367 bool SpellingTrie::build_ym_info() { 368 bool sucess; 369 SpellingTable *spl_table = new SpellingTable(); 370 371 sucess = spl_table->init_table(kMaxPinyinSize - 1, 2 * kMaxYmNum, false); 372 assert(sucess); 373 374 for (uint16 pos = 0; pos < spelling_num_; pos++) { 375 const char *spl_str = spelling_buf_ + spelling_size_ * pos; 376 spl_str = get_ym_str(spl_str); 377 if ('\0' != spl_str[0]) { 378 sucess = spl_table->put_spelling(spl_str, 0); 379 assert(sucess); 380 } 381 } 382 383 size_t ym_item_size; // '\0' is included 384 size_t ym_num; 385 const char* ym_buf; 386 ym_buf = spl_table->arrange(&ym_item_size, &ym_num); 387 388 if (NULL != ym_buf_) 389 delete [] ym_buf_; 390 ym_buf_ = new char[ym_item_size * ym_num]; 391 if (NULL == ym_buf_) { 392 delete spl_table; 393 return false; 394 } 395 396 memcpy(ym_buf_, ym_buf, sizeof(char) * ym_item_size * ym_num); 397 ym_size_ = ym_item_size; 398 ym_num_ = ym_num; 399 400 delete spl_table; 401 402 // Generate the maping from the spelling ids to the Yunmu ids. 403 if (spl_ym_ids_) 404 delete spl_ym_ids_; 405 spl_ym_ids_ = new uint8[spelling_num_ + kFullSplIdStart]; 406 if (NULL == spl_ym_ids_) 407 return false; 408 409 memset(spl_ym_ids_, 0, sizeof(uint8) * (spelling_num_ + kFullSplIdStart)); 410 411 for (uint16 id = 1; id < spelling_num_ + kFullSplIdStart; id++) { 412 const char *str = get_spelling_str(id); 413 414 str = get_ym_str(str); 415 if ('\0' != str[0]) { 416 uint8 ym_id = get_ym_id(str); 417 spl_ym_ids_[id] = ym_id; 418 assert(ym_id > 0); 419 } else { 420 spl_ym_ids_[id] = 0; 421 } 422 } 423 return true; 424 } 425 #endif 426 427 SpellingNode* SpellingTrie::construct_spellings_subset( 428 size_t item_start, size_t item_end, size_t level, SpellingNode* parent) { 429 if (level >= spelling_size_ || item_end <= item_start || NULL == parent) 430 return NULL; 431 432 SpellingNode *first_son = NULL; 433 uint16 num_of_son = 0; 434 unsigned char min_son_score = 255; 435 436 const char *spelling_last_start = spelling_buf_ + spelling_size_ * item_start; 437 char char_for_node = spelling_last_start[level]; 438 assert(char_for_node >= 'A' && char_for_node <= 'Z' || 439 'h' == char_for_node); 440 441 // Scan the array to find how many sons 442 for (size_t i = item_start + 1; i < item_end; i++) { 443 const char *spelling_current = spelling_buf_ + spelling_size_ * i; 444 char char_current = spelling_current[level]; 445 if (char_current != char_for_node) { 446 num_of_son++; 447 char_for_node = char_current; 448 } 449 } 450 num_of_son++; 451 452 // Allocate memory 453 #ifdef ___BUILD_MODEL___ 454 node_num_ += num_of_son; 455 #endif 456 first_son = new SpellingNode[num_of_son]; 457 memset(first_son, 0, sizeof(SpellingNode)*num_of_son); 458 459 // Now begin construct tree 460 size_t son_pos = 0; 461 462 spelling_last_start = spelling_buf_ + spelling_size_ * item_start; 463 char_for_node = spelling_last_start[level]; 464 465 bool spelling_endable = true; 466 if (spelling_last_start[level + 1] != '\0') 467 spelling_endable = false; 468 469 size_t item_start_next = item_start; 470 471 for (size_t i = item_start + 1; i < item_end; i++) { 472 const char *spelling_current = spelling_buf_ + spelling_size_ * i; 473 char char_current = spelling_current[level]; 474 assert(is_valid_spl_char(char_current)); 475 476 if (char_current != char_for_node) { 477 // Construct a node 478 SpellingNode *node_current = first_son + son_pos; 479 node_current->char_this_node = char_for_node; 480 481 // For quick search in the first level 482 if (0 == level) 483 level1_sons_[char_for_node - 'A'] = node_current; 484 485 if (spelling_endable) { 486 node_current->spelling_idx = kFullSplIdStart + item_start_next; 487 } 488 489 if (spelling_last_start[level + 1] != '\0' || i - item_start_next > 1) { 490 size_t real_start = item_start_next; 491 if (spelling_last_start[level + 1] == '\0') 492 real_start++; 493 494 node_current->first_son = 495 construct_spellings_subset(real_start, i, level + 1, 496 node_current); 497 498 if (real_start == item_start_next + 1) { 499 uint16 score_this = static_cast<unsigned char>( 500 spelling_last_start[spelling_size_ - 1]); 501 if (score_this < node_current->score) 502 node_current->score = score_this; 503 } 504 } else { 505 node_current->first_son = NULL; 506 node_current->score = static_cast<unsigned char>( 507 spelling_last_start[spelling_size_ - 1]); 508 } 509 510 if (node_current->score < min_son_score) 511 min_son_score = node_current->score; 512 513 bool is_half = false; 514 if (level == 0 && is_szm_char(char_for_node)) { 515 node_current->spelling_idx = 516 static_cast<uint16>(char_for_node - 'A' + 1); 517 518 if (char_for_node > 'C') 519 node_current->spelling_idx++; 520 if (char_for_node > 'S') 521 node_current->spelling_idx++; 522 523 h2f_num_[node_current->spelling_idx] = i - item_start_next; 524 is_half = true; 525 } else if (level == 1 && char_for_node == 'h') { 526 char ch_level0 = spelling_last_start[0]; 527 uint16 part_id = 0; 528 if (ch_level0 == 'C') 529 part_id = 'C' - 'A' + 1 + 1; 530 else if (ch_level0 == 'S') 531 part_id = 'S' - 'A' + 1 + 2; 532 else if (ch_level0 == 'Z') 533 part_id = 'Z' - 'A' + 1 + 3; 534 if (0 != part_id) { 535 node_current->spelling_idx = part_id; 536 h2f_num_[node_current->spelling_idx] = i - item_start_next; 537 is_half = true; 538 } 539 } 540 541 if (is_half) { 542 if (h2f_num_[node_current->spelling_idx] > 0) 543 h2f_start_[node_current->spelling_idx] = 544 item_start_next + kFullSplIdStart; 545 else 546 h2f_start_[node_current->spelling_idx] = 0; 547 } 548 549 // for next sibling 550 spelling_last_start = spelling_current; 551 char_for_node = char_current; 552 item_start_next = i; 553 spelling_endable = true; 554 if (spelling_current[level + 1] != '\0') 555 spelling_endable = false; 556 557 son_pos++; 558 } 559 } 560 561 // the last one 562 SpellingNode *node_current = first_son + son_pos; 563 node_current->char_this_node = char_for_node; 564 565 // For quick search in the first level 566 if (0 == level) 567 level1_sons_[char_for_node - 'A'] = node_current; 568 569 if (spelling_endable) { 570 node_current->spelling_idx = kFullSplIdStart + item_start_next; 571 } 572 573 if (spelling_last_start[level + 1] != '\0' || 574 item_end - item_start_next > 1) { 575 size_t real_start = item_start_next; 576 if (spelling_last_start[level + 1] == '\0') 577 real_start++; 578 579 node_current->first_son = 580 construct_spellings_subset(real_start, item_end, level + 1, 581 node_current); 582 583 if (real_start == item_start_next + 1) { 584 uint16 score_this = static_cast<unsigned char>( 585 spelling_last_start[spelling_size_ - 1]); 586 if (score_this < node_current->score) 587 node_current->score = score_this; 588 } 589 } else { 590 node_current->first_son = NULL; 591 node_current->score = static_cast<unsigned char>( 592 spelling_last_start[spelling_size_ - 1]); 593 } 594 595 if (node_current->score < min_son_score) 596 min_son_score = node_current->score; 597 598 assert(son_pos + 1 == num_of_son); 599 600 bool is_half = false; 601 if (level == 0 && szm_is_enabled(char_for_node)) { 602 node_current->spelling_idx = static_cast<uint16>(char_for_node - 'A' + 1); 603 604 if (char_for_node > 'C') 605 node_current->spelling_idx++; 606 if (char_for_node > 'S') 607 node_current->spelling_idx++; 608 609 h2f_num_[node_current->spelling_idx] = item_end - item_start_next; 610 is_half = true; 611 } else if (level == 1 && char_for_node == 'h') { 612 char ch_level0 = spelling_last_start[0]; 613 uint16 part_id = 0; 614 if (ch_level0 == 'C') 615 part_id = 'C' - 'A' + 1 + 1; 616 else if (ch_level0 == 'S') 617 part_id = 'S' - 'A' + 1 + 2; 618 else if (ch_level0 == 'Z') 619 part_id = 'Z' - 'A' + 1 + 3; 620 if (0 != part_id) { 621 node_current->spelling_idx = part_id; 622 h2f_num_[node_current->spelling_idx] = item_end - item_start_next; 623 is_half = true; 624 } 625 } 626 if (is_half) { 627 if (h2f_num_[node_current->spelling_idx] > 0) 628 h2f_start_[node_current->spelling_idx] = 629 item_start_next + kFullSplIdStart; 630 else 631 h2f_start_[node_current->spelling_idx] = 0; 632 } 633 634 parent->num_of_son = num_of_son; 635 parent->score = min_son_score; 636 return first_son; 637 } 638 639 bool SpellingTrie::save_spl_trie(FILE *fp) { 640 if (NULL == fp || NULL == spelling_buf_) 641 return false; 642 643 if (fwrite(&spelling_size_, sizeof(size_t), 1, fp) != 1) 644 return false; 645 646 if (fwrite(&spelling_num_, sizeof(size_t), 1, fp) != 1) 647 return false; 648 649 if (fwrite(&score_amplifier_, sizeof(float), 1, fp) != 1) 650 return false; 651 652 if (fwrite(&average_score_, sizeof(unsigned char), 1, fp) != 1) 653 return false; 654 655 if (fwrite(spelling_buf_, sizeof(char) * spelling_size_, 656 spelling_num_, fp) != spelling_num_) 657 return false; 658 659 return true; 660 } 661 662 bool SpellingTrie::load_spl_trie(FILE *fp) { 663 if (NULL == fp) 664 return false; 665 666 if (fread(&spelling_size_, sizeof(size_t), 1, fp) != 1) 667 return false; 668 669 if (fread(&spelling_num_, sizeof(size_t), 1, fp) != 1) 670 return false; 671 672 if (fread(&score_amplifier_, sizeof(float), 1, fp) != 1) 673 return false; 674 675 if (fread(&average_score_, sizeof(unsigned char), 1, fp) != 1) 676 return false; 677 678 if (NULL != spelling_buf_) 679 delete [] spelling_buf_; 680 681 spelling_buf_ = new char[spelling_size_ * spelling_num_]; 682 if (NULL == spelling_buf_) 683 return false; 684 685 if (fread(spelling_buf_, sizeof(char) * spelling_size_, 686 spelling_num_, fp) != spelling_num_) 687 return false; 688 689 return construct(spelling_buf_, spelling_size_, spelling_num_, 690 score_amplifier_, average_score_); 691 } 692 693 bool SpellingTrie::build_f2h() { 694 if (NULL != f2h_) 695 delete [] f2h_; 696 f2h_ = new uint16[spelling_num_]; 697 if (NULL == f2h_) 698 return false; 699 700 for (uint16 hid = 0; hid < kFullSplIdStart; hid++) { 701 for (uint16 fid = h2f_start_[hid]; 702 fid < h2f_start_[hid] + h2f_num_[hid]; fid++) 703 f2h_[fid - kFullSplIdStart] = hid; 704 } 705 706 return true; 707 } 708 709 size_t SpellingTrie::get_spelling_num() { 710 return spelling_num_; 711 } 712 713 uint8 SpellingTrie::get_ym_id(const char *ym_str) { 714 if (NULL == ym_str || NULL == ym_buf_) 715 return 0; 716 717 for (uint8 pos = 0; pos < ym_num_; pos++) 718 if (strcmp(ym_buf_ + ym_size_ * pos, ym_str) == 0) 719 return pos + 1; 720 721 return 0; 722 } 723 724 const char* SpellingTrie::get_spelling_str(uint16 splid) { 725 splstr_queried_[0] = '\0'; 726 727 if (splid >= kFullSplIdStart) { 728 splid -= kFullSplIdStart; 729 snprintf(splstr_queried_, spelling_size_, "%s", 730 spelling_buf_ + splid * spelling_size_); 731 } else { 732 if (splid == 'C' - 'A' + 1 + 1) { 733 snprintf(splstr_queried_, spelling_size_, "%s", "Ch"); 734 } else if (splid == 'S' - 'A' + 1 + 2) { 735 snprintf(splstr_queried_, spelling_size_, "%s", "Sh"); 736 } else if (splid == 'Z' - 'A' + 1 + 3) { 737 snprintf(splstr_queried_, spelling_size_, "%s", "Zh"); 738 } else { 739 if (splid > 'C' - 'A' + 1) 740 splid--; 741 if (splid > 'S' - 'A' + 1) 742 splid--; 743 splstr_queried_[0] = 'A' + splid - 1; 744 splstr_queried_[1] = '\0'; 745 } 746 } 747 return splstr_queried_; 748 } 749 750 const char16* SpellingTrie::get_spelling_str16(uint16 splid) { 751 splstr16_queried_[0] = '\0'; 752 753 if (splid >= kFullSplIdStart) { 754 splid -= kFullSplIdStart; 755 for (size_t pos = 0; pos < spelling_size_; pos++) { 756 splstr16_queried_[pos] = static_cast<char16> 757 (spelling_buf_[splid * spelling_size_ + pos]); 758 } 759 } else { 760 if (splid == 'C' - 'A' + 1 + 1) { 761 splstr16_queried_[0] = static_cast<char16>('C'); 762 splstr16_queried_[1] = static_cast<char16>('h'); 763 splstr16_queried_[2] = static_cast<char16>('\0'); 764 } else if (splid == 'S' - 'A' + 1 + 2) { 765 splstr16_queried_[0] = static_cast<char16>('S'); 766 splstr16_queried_[1] = static_cast<char16>('h'); 767 splstr16_queried_[2] = static_cast<char16>('\0'); 768 } else if (splid == 'Z' - 'A' + 1 + 3) { 769 splstr16_queried_[0] = static_cast<char16>('Z'); 770 splstr16_queried_[1] = static_cast<char16>('h'); 771 splstr16_queried_[2] = static_cast<char16>('\0'); 772 } else { 773 if (splid > 'C' - 'A' + 1) 774 splid--; 775 if (splid > 'S' - 'A' + 1) 776 splid--; 777 splstr16_queried_[0] = 'A' + splid - 1; 778 splstr16_queried_[1] = '\0'; 779 } 780 } 781 return splstr16_queried_; 782 } 783 784 size_t SpellingTrie::get_spelling_str16(uint16 splid, char16 *splstr16, 785 size_t splstr16_len) { 786 if (NULL == splstr16 || splstr16_len < kMaxPinyinSize + 1) return 0; 787 788 if (splid >= kFullSplIdStart) { 789 splid -= kFullSplIdStart; 790 for (size_t pos = 0; pos <= kMaxPinyinSize; pos++) { 791 splstr16[pos] = static_cast<char16> 792 (spelling_buf_[splid * spelling_size_ + pos]); 793 if (static_cast<char16>('\0') == splstr16[pos]) { 794 return pos; 795 } 796 } 797 } else { 798 if (splid == 'C' - 'A' + 1 + 1) { 799 splstr16[0] = static_cast<char16>('C'); 800 splstr16[1] = static_cast<char16>('h'); 801 splstr16[2] = static_cast<char16>('\0'); 802 return 2; 803 } else if (splid == 'S' - 'A' + 1 + 2) { 804 splstr16[0] = static_cast<char16>('S'); 805 splstr16[1] = static_cast<char16>('h'); 806 splstr16[2] = static_cast<char16>('\0'); 807 return 2; 808 } else if (splid == 'Z' - 'A' + 1 + 3) { 809 splstr16[0] = static_cast<char16>('Z'); 810 splstr16[1] = static_cast<char16>('h'); 811 splstr16[2] = static_cast<char16>('\0'); 812 return 2; 813 } else { 814 if (splid > 'C' - 'A' + 1) 815 splid--; 816 if (splid > 'S' - 'A' + 1) 817 splid--; 818 splstr16[0] = 'A' + splid - 1; 819 splstr16[1] = '\0'; 820 return 1; 821 } 822 } 823 824 // Not reachable. 825 return 0; 826 } 827 828 } // namespace ime_pinyin 829