1 /* 2 * Copyright (C) 2009 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License 15 */ 16 package com.android.providers.contacts; 17 18 import com.android.internal.util.HanziToPinyin; 19 import com.android.internal.util.HanziToPinyin.Token; 20 21 import android.content.ContentValues; 22 import android.provider.ContactsContract.FullNameStyle; 23 import android.provider.ContactsContract.PhoneticNameStyle; 24 import android.provider.ContactsContract.CommonDataKinds.StructuredName; 25 import android.text.TextUtils; 26 27 import java.lang.Character.UnicodeBlock; 28 import java.util.ArrayList; 29 import java.util.HashSet; 30 import java.util.Locale; 31 import java.util.StringTokenizer; 32 33 /** 34 * The purpose of this class is to split a full name into given names and last 35 * name. The logic only supports having a single last name. If the full name has 36 * multiple last names the output will be incorrect. 37 * <p> 38 * Core algorithm: 39 * <ol> 40 * <li>Remove the suffixes (III, Ph.D., M.D.).</li> 41 * <li>Remove the prefixes (Mr., Pastor, Reverend, Sir).</li> 42 * <li>Assign the last remaining token as the last name.</li> 43 * <li>If the previous word to the last name is one from LASTNAME_PREFIXES, use 44 * this word also as the last name.</li> 45 * <li>Assign the rest of the words as the "given names".</li> 46 * </ol> 47 */ 48 public class NameSplitter { 49 50 public static final int MAX_TOKENS = 10; 51 52 private static final String JAPANESE_LANGUAGE = Locale.JAPANESE.getLanguage().toLowerCase(); 53 private static final String KOREAN_LANGUAGE = Locale.KOREAN.getLanguage().toLowerCase(); 54 55 // This includes simplified and traditional Chinese 56 private static final String CHINESE_LANGUAGE = Locale.CHINESE.getLanguage().toLowerCase(); 57 58 private final HashSet<String> mPrefixesSet; 59 private final HashSet<String> mSuffixesSet; 60 private final int mMaxSuffixLength; 61 private final HashSet<String> mLastNamePrefixesSet; 62 private final HashSet<String> mConjuctions; 63 private final Locale mLocale; 64 private final String mLanguage; 65 66 public static class Name { 67 public String prefix; 68 public String givenNames; 69 public String middleName; 70 public String familyName; 71 public String suffix; 72 73 public int fullNameStyle; 74 75 public String phoneticFamilyName; 76 public String phoneticMiddleName; 77 public String phoneticGivenName; 78 79 public int phoneticNameStyle; 80 81 public Name() { 82 } 83 84 public Name(String prefix, String givenNames, String middleName, String familyName, 85 String suffix) { 86 this.prefix = prefix; 87 this.givenNames = givenNames; 88 this.middleName = middleName; 89 this.familyName = familyName; 90 this.suffix = suffix; 91 } 92 93 public String getPrefix() { 94 return prefix; 95 } 96 97 public String getGivenNames() { 98 return givenNames; 99 } 100 101 public String getMiddleName() { 102 return middleName; 103 } 104 105 public String getFamilyName() { 106 return familyName; 107 } 108 109 public String getSuffix() { 110 return suffix; 111 } 112 113 public int getFullNameStyle() { 114 return fullNameStyle; 115 } 116 117 public String getPhoneticFamilyName() { 118 return phoneticFamilyName; 119 } 120 121 public String getPhoneticMiddleName() { 122 return phoneticMiddleName; 123 } 124 125 public String getPhoneticGivenName() { 126 return phoneticGivenName; 127 } 128 129 public int getPhoneticNameStyle() { 130 return phoneticNameStyle; 131 } 132 133 public void fromValues(ContentValues values) { 134 prefix = values.getAsString(StructuredName.PREFIX); 135 givenNames = values.getAsString(StructuredName.GIVEN_NAME); 136 middleName = values.getAsString(StructuredName.MIDDLE_NAME); 137 familyName = values.getAsString(StructuredName.FAMILY_NAME); 138 suffix = values.getAsString(StructuredName.SUFFIX); 139 140 Integer integer = values.getAsInteger(StructuredName.FULL_NAME_STYLE); 141 fullNameStyle = integer == null ? FullNameStyle.UNDEFINED : integer; 142 143 phoneticFamilyName = values.getAsString(StructuredName.PHONETIC_FAMILY_NAME); 144 phoneticMiddleName = values.getAsString(StructuredName.PHONETIC_MIDDLE_NAME); 145 phoneticGivenName = values.getAsString(StructuredName.PHONETIC_GIVEN_NAME); 146 147 integer = values.getAsInteger(StructuredName.PHONETIC_NAME_STYLE); 148 phoneticNameStyle = integer == null ? PhoneticNameStyle.UNDEFINED : integer; 149 } 150 151 public void toValues(ContentValues values) { 152 putValueIfPresent(values, StructuredName.PREFIX, prefix); 153 putValueIfPresent(values, StructuredName.GIVEN_NAME, givenNames); 154 putValueIfPresent(values, StructuredName.MIDDLE_NAME, middleName); 155 putValueIfPresent(values, StructuredName.FAMILY_NAME, familyName); 156 putValueIfPresent(values, StructuredName.SUFFIX, suffix); 157 values.put(StructuredName.FULL_NAME_STYLE, fullNameStyle); 158 putValueIfPresent(values, StructuredName.PHONETIC_FAMILY_NAME, phoneticFamilyName); 159 putValueIfPresent(values, StructuredName.PHONETIC_MIDDLE_NAME, phoneticMiddleName); 160 putValueIfPresent(values, StructuredName.PHONETIC_GIVEN_NAME, phoneticGivenName); 161 values.put(StructuredName.PHONETIC_NAME_STYLE, phoneticNameStyle); 162 } 163 164 private void putValueIfPresent(ContentValues values, String name, String value) { 165 if (value != null) { 166 values.put(name, value); 167 } 168 } 169 170 public void clear() { 171 prefix = null; 172 givenNames = null; 173 middleName = null; 174 familyName = null; 175 suffix = null; 176 fullNameStyle = FullNameStyle.UNDEFINED; 177 phoneticFamilyName = null; 178 phoneticMiddleName = null; 179 phoneticGivenName = null; 180 phoneticNameStyle = PhoneticNameStyle.UNDEFINED; 181 } 182 183 public boolean isEmpty() { 184 return TextUtils.isEmpty(givenNames) 185 && TextUtils.isEmpty(middleName) 186 && TextUtils.isEmpty(familyName) 187 && TextUtils.isEmpty(suffix) 188 && TextUtils.isEmpty(phoneticFamilyName) 189 && TextUtils.isEmpty(phoneticMiddleName) 190 && TextUtils.isEmpty(phoneticGivenName); 191 } 192 193 @Override 194 public String toString() { 195 return "[given: " + givenNames + " middle: " + middleName + " family: " + familyName 196 + " ph/given: " + phoneticGivenName + " ph/middle: " + phoneticMiddleName 197 + " ph/family: " + phoneticFamilyName + "]"; 198 } 199 200 } 201 202 private static class NameTokenizer extends StringTokenizer { 203 private final String[] mTokens; 204 private int mDotBitmask; 205 private int mCommaBitmask; 206 private int mStartPointer; 207 private int mEndPointer; 208 209 public NameTokenizer(String fullName) { 210 super(fullName, " .,", true); 211 212 mTokens = new String[MAX_TOKENS]; 213 214 // Iterate over tokens, skipping over empty ones and marking tokens that 215 // are followed by dots. 216 while (hasMoreTokens() && mEndPointer < MAX_TOKENS) { 217 final String token = nextToken(); 218 if (token.length() > 0) { 219 final char c = token.charAt(0); 220 if (c == ' ') { 221 continue; 222 } 223 } 224 225 if (mEndPointer > 0 && token.charAt(0) == '.') { 226 mDotBitmask |= (1 << (mEndPointer - 1)); 227 } else if (mEndPointer > 0 && token.charAt(0) == ',') { 228 mCommaBitmask |= (1 << (mEndPointer - 1)); 229 } else { 230 mTokens[mEndPointer] = token; 231 mEndPointer++; 232 } 233 } 234 } 235 236 /** 237 * Returns true if the token is followed by a dot in the original full name. 238 */ 239 public boolean hasDot(int index) { 240 return (mDotBitmask & (1 << index)) != 0; 241 } 242 243 /** 244 * Returns true if the token is followed by a comma in the original full name. 245 */ 246 public boolean hasComma(int index) { 247 return (mCommaBitmask & (1 << index)) != 0; 248 } 249 } 250 251 /** 252 * Constructor. 253 * 254 * @param commonPrefixes comma-separated list of common prefixes, 255 * e.g. "Mr, Ms, Mrs" 256 * @param commonLastNamePrefixes comma-separated list of common last name prefixes, 257 * e.g. "d', st, st., von" 258 * @param commonSuffixes comma-separated list of common suffixes, 259 * e.g. "Jr, M.D., MD, D.D.S." 260 * @param commonConjunctions comma-separated list of common conjuctions, 261 * e.g. "AND, Or" 262 */ 263 public NameSplitter(String commonPrefixes, String commonLastNamePrefixes, 264 String commonSuffixes, String commonConjunctions, Locale locale) { 265 // TODO: refactor this to use <string-array> resources 266 mPrefixesSet = convertToSet(commonPrefixes); 267 mLastNamePrefixesSet = convertToSet(commonLastNamePrefixes); 268 mSuffixesSet = convertToSet(commonSuffixes); 269 mConjuctions = convertToSet(commonConjunctions); 270 mLocale = locale != null ? locale : Locale.getDefault(); 271 mLanguage = mLocale.getLanguage().toLowerCase(); 272 273 int maxLength = 0; 274 for (String suffix : mSuffixesSet) { 275 if (suffix.length() > maxLength) { 276 maxLength = suffix.length(); 277 } 278 } 279 280 mMaxSuffixLength = maxLength; 281 } 282 283 /** 284 * Converts a comma-separated list of Strings to a set of Strings. Trims strings 285 * and converts them to upper case. 286 */ 287 private static HashSet<String> convertToSet(String strings) { 288 HashSet<String> set = new HashSet<String>(); 289 if (strings != null) { 290 String[] split = strings.split(","); 291 for (int i = 0; i < split.length; i++) { 292 set.add(split[i].trim().toUpperCase()); 293 } 294 } 295 return set; 296 } 297 298 /** 299 * Parses a full name and returns components as a list of tokens. 300 */ 301 public int tokenize(String[] tokens, String fullName) { 302 if (fullName == null) { 303 return 0; 304 } 305 306 NameTokenizer tokenizer = new NameTokenizer(fullName); 307 308 if (tokenizer.mStartPointer == tokenizer.mEndPointer) { 309 return 0; 310 } 311 312 String firstToken = tokenizer.mTokens[tokenizer.mStartPointer]; 313 if (mPrefixesSet.contains(firstToken.toUpperCase())) { 314 tokenizer.mStartPointer++; 315 } 316 int count = 0; 317 for (int i = tokenizer.mStartPointer; i < tokenizer.mEndPointer; i++) { 318 tokens[count++] = tokenizer.mTokens[i]; 319 } 320 321 return count; 322 } 323 324 325 /** 326 * Parses a full name and returns parsed components in the Name object. 327 */ 328 public void split(Name name, String fullName) { 329 if (fullName == null) { 330 return; 331 } 332 333 int fullNameStyle = guessFullNameStyle(fullName); 334 if (fullNameStyle == FullNameStyle.CJK) { 335 fullNameStyle = getAdjustedFullNameStyle(fullNameStyle); 336 } 337 338 name.fullNameStyle = fullNameStyle; 339 340 switch (fullNameStyle) { 341 case FullNameStyle.CHINESE: 342 splitChineseName(name, fullName); 343 break; 344 345 case FullNameStyle.JAPANESE: 346 case FullNameStyle.KOREAN: 347 splitJapaneseOrKoreanName(name, fullName); 348 break; 349 350 default: 351 splitWesternName(name, fullName); 352 } 353 } 354 355 /** 356 * Splits a full name composed according to the Western tradition: 357 * <pre> 358 * [prefix] given name(s) [[middle name] family name] [, suffix] 359 * [prefix] family name, given name [middle name] [,suffix] 360 * </pre> 361 */ 362 private void splitWesternName(Name name, String fullName) { 363 NameTokenizer tokens = new NameTokenizer(fullName); 364 parsePrefix(name, tokens); 365 366 // If the name consists of just one or two tokens, treat them as first/last name, 367 // not as suffix. Example: John Ma; Ma is last name, not "M.A.". 368 if (tokens.mEndPointer > 2) { 369 parseSuffix(name, tokens); 370 } 371 372 if (name.prefix == null && tokens.mEndPointer - tokens.mStartPointer == 1) { 373 name.givenNames = tokens.mTokens[tokens.mStartPointer]; 374 } else { 375 parseLastName(name, tokens); 376 parseMiddleName(name, tokens); 377 parseGivenNames(name, tokens); 378 } 379 } 380 381 /** 382 * Splits a full name composed according to the Chinese tradition: 383 * <pre> 384 * [family name [middle name]] given name 385 * </pre> 386 */ 387 private void splitChineseName(Name name, String fullName) { 388 StringTokenizer tokenizer = new StringTokenizer(fullName); 389 while (tokenizer.hasMoreTokens()) { 390 String token = tokenizer.nextToken(); 391 if (name.givenNames == null) { 392 name.givenNames = token; 393 } else if (name.familyName == null) { 394 name.familyName = name.givenNames; 395 name.givenNames = token; 396 } else if (name.middleName == null) { 397 name.middleName = name.givenNames; 398 name.givenNames = token; 399 } else { 400 name.middleName = name.middleName + name.givenNames; 401 name.givenNames = token; 402 } 403 } 404 405 // If a single word parse that word up. 406 if (name.givenNames != null && name.familyName == null && name.middleName == null) { 407 int length = fullName.length(); 408 if (length == 2) { 409 name.familyName = fullName.substring(0, 1); 410 name.givenNames = fullName.substring(1); 411 } else if (length == 3) { 412 name.familyName = fullName.substring(0, 1); 413 name.middleName = fullName.substring(1, 2); 414 name.givenNames = fullName.substring(2); 415 } else if (length == 4) { 416 name.familyName = fullName.substring(0, 2); 417 name.middleName = fullName.substring(2, 3); 418 name.givenNames = fullName.substring(3); 419 } 420 421 } 422 } 423 424 /** 425 * Splits a full name composed according to the Japanese tradition: 426 * <pre> 427 * [family name] given name(s) 428 * </pre> 429 */ 430 private void splitJapaneseOrKoreanName(Name name, String fullName) { 431 StringTokenizer tokenizer = new StringTokenizer(fullName); 432 while (tokenizer.hasMoreTokens()) { 433 String token = tokenizer.nextToken(); 434 if (name.givenNames == null) { 435 name.givenNames = token; 436 } else if (name.familyName == null) { 437 name.familyName = name.givenNames; 438 name.givenNames = token; 439 } else { 440 name.givenNames += " " + token; 441 } 442 } 443 } 444 445 /** 446 * Concatenates components of a name according to the rules dictated by the name style. 447 * 448 * @param givenNameFirst is ignored for CJK display name styles 449 */ 450 public String join(Name name, boolean givenNameFirst) { 451 switch (name.fullNameStyle) { 452 case FullNameStyle.CJK: 453 case FullNameStyle.CHINESE: 454 case FullNameStyle.KOREAN: 455 return join(name.familyName, name.middleName, name.givenNames, name.suffix, 456 false, false, false); 457 458 case FullNameStyle.JAPANESE: 459 return join(name.familyName, name.middleName, name.givenNames, name.suffix, 460 true, false, false); 461 462 default: 463 if (givenNameFirst) { 464 return join(name.givenNames, name.middleName, name.familyName, name.suffix, 465 true, false, true); 466 } else { 467 return join(name.familyName, name.givenNames, name.middleName, name.suffix, 468 true, true, true); 469 } 470 } 471 } 472 473 /** 474 * Concatenates components of the phonetic name following the CJK tradition: 475 * family name + middle name + given name(s). 476 */ 477 public String joinPhoneticName(Name name) { 478 return join(name.phoneticFamilyName, name.phoneticMiddleName, 479 name.phoneticGivenName, null, true, false, false); 480 } 481 482 /** 483 * Concatenates parts of a full name inserting spaces and commas as specified. 484 */ 485 private String join(String part1, String part2, String part3, String suffix, 486 boolean useSpace, boolean useCommaAfterPart1, boolean useCommaAfterPart3) { 487 boolean hasPart1 = !TextUtils.isEmpty(part1); 488 boolean hasPart2 = !TextUtils.isEmpty(part2); 489 boolean hasPart3 = !TextUtils.isEmpty(part3); 490 boolean hasSuffix = !TextUtils.isEmpty(suffix); 491 492 boolean isSingleWord = true; 493 String singleWord = null; 494 if (hasPart1) { 495 singleWord = part1; 496 } 497 498 if (hasPart2) { 499 if (singleWord != null) { 500 isSingleWord = false; 501 } else { 502 singleWord = part2; 503 } 504 } 505 506 if (hasPart3) { 507 if (singleWord != null) { 508 isSingleWord = false; 509 } else { 510 singleWord = part3; 511 } 512 } 513 514 if (hasSuffix) { 515 if (singleWord != null) { 516 isSingleWord = false; 517 } else { 518 singleWord = normalizedSuffix(suffix); 519 } 520 } 521 522 if (isSingleWord) { 523 return singleWord; 524 } 525 526 StringBuilder sb = new StringBuilder(); 527 if (hasPart1) { 528 sb.append(part1); 529 } 530 531 if (hasPart2) { 532 if (hasPart1) { 533 if (useCommaAfterPart1) { 534 sb.append(','); 535 } 536 if (useSpace) { 537 sb.append(' '); 538 } 539 } 540 sb.append(part2); 541 } 542 543 if (hasPart3) { 544 if (hasPart1 || hasPart2) { 545 if (useSpace) { 546 sb.append(' '); 547 } 548 } 549 sb.append(part3); 550 } 551 552 if (hasSuffix) { 553 if (hasPart1 || hasPart2 || hasPart3) { 554 if (useCommaAfterPart3) { 555 sb.append(','); 556 } 557 if (useSpace) { 558 sb.append(' '); 559 } 560 } 561 sb.append(normalizedSuffix(suffix)); 562 } 563 564 return sb.toString(); 565 } 566 567 /** 568 * Puts a dot after the supplied suffix if that is the accepted form of the suffix, 569 * e.g. "Jr." and "Sr.", but not "I", "II" and "III". 570 */ 571 private String normalizedSuffix(String suffix) { 572 int length = suffix.length(); 573 if (length == 0 || suffix.charAt(length - 1) == '.') { 574 return suffix; 575 } 576 577 String withDot = suffix + '.'; 578 if (mSuffixesSet.contains(withDot.toUpperCase())) { 579 return withDot; 580 } else { 581 return suffix; 582 } 583 } 584 585 /** 586 * If the supplied name style is undefined, returns a default based on the language, 587 * otherwise returns the supplied name style itself. 588 * 589 * @param nameStyle See {@link FullNameStyle}. 590 */ 591 public int getAdjustedFullNameStyle(int nameStyle) { 592 if (nameStyle == FullNameStyle.UNDEFINED) { 593 if (JAPANESE_LANGUAGE.equals(mLanguage)) { 594 return FullNameStyle.JAPANESE; 595 } else if (KOREAN_LANGUAGE.equals(mLanguage)) { 596 return FullNameStyle.KOREAN; 597 } else if (CHINESE_LANGUAGE.equals(mLanguage)) { 598 return FullNameStyle.CHINESE; 599 } else { 600 return FullNameStyle.WESTERN; 601 } 602 } else if (nameStyle == FullNameStyle.CJK) { 603 if (JAPANESE_LANGUAGE.equals(mLanguage)) { 604 return FullNameStyle.JAPANESE; 605 } else if (KOREAN_LANGUAGE.equals(mLanguage)) { 606 return FullNameStyle.KOREAN; 607 } else { 608 return FullNameStyle.CHINESE; 609 } 610 } 611 return nameStyle; 612 } 613 614 /** 615 * Parses the first word from the name if it is a prefix. 616 */ 617 private void parsePrefix(Name name, NameTokenizer tokens) { 618 if (tokens.mStartPointer == tokens.mEndPointer) { 619 return; 620 } 621 622 String firstToken = tokens.mTokens[tokens.mStartPointer]; 623 if (mPrefixesSet.contains(firstToken.toUpperCase())) { 624 name.prefix = firstToken; 625 tokens.mStartPointer++; 626 } 627 } 628 629 /** 630 * Parses the last word(s) from the name if it is a suffix. 631 */ 632 private void parseSuffix(Name name, NameTokenizer tokens) { 633 if (tokens.mStartPointer == tokens.mEndPointer) { 634 return; 635 } 636 637 String lastToken = tokens.mTokens[tokens.mEndPointer - 1]; 638 if (lastToken.length() > mMaxSuffixLength) { 639 return; 640 } 641 642 String normalized = lastToken.toUpperCase(); 643 if (mSuffixesSet.contains(normalized)) { 644 name.suffix = lastToken; 645 tokens.mEndPointer--; 646 return; 647 } 648 649 if (tokens.hasDot(tokens.mEndPointer - 1)) { 650 lastToken += '.'; 651 } 652 normalized += "."; 653 654 // Take care of suffixes like M.D. and D.D.S. 655 int pos = tokens.mEndPointer - 1; 656 while (normalized.length() <= mMaxSuffixLength) { 657 658 if (mSuffixesSet.contains(normalized)) { 659 name.suffix = lastToken; 660 tokens.mEndPointer = pos; 661 return; 662 } 663 664 if (pos == tokens.mStartPointer) { 665 break; 666 } 667 668 pos--; 669 if (tokens.hasDot(pos)) { 670 lastToken = tokens.mTokens[pos] + "." + lastToken; 671 } else { 672 lastToken = tokens.mTokens[pos] + " " + lastToken; 673 } 674 675 normalized = tokens.mTokens[pos].toUpperCase() + "." + normalized; 676 } 677 } 678 679 private void parseLastName(Name name, NameTokenizer tokens) { 680 if (tokens.mStartPointer == tokens.mEndPointer) { 681 return; 682 } 683 684 // If the first word is followed by a comma, assume that it's the family name 685 if (tokens.hasComma(tokens.mStartPointer)) { 686 name.familyName = tokens.mTokens[tokens.mStartPointer]; 687 tokens.mStartPointer++; 688 return; 689 } 690 691 // If the second word is followed by a comma and the first word 692 // is a last name prefix as in "de Sade" and "von Cliburn", treat 693 // the first two words as the family name. 694 if (tokens.mStartPointer + 1 < tokens.mEndPointer 695 && tokens.hasComma(tokens.mStartPointer + 1) 696 && isFamilyNamePrefix(tokens.mTokens[tokens.mStartPointer])) { 697 String familyNamePrefix = tokens.mTokens[tokens.mStartPointer]; 698 if (tokens.hasDot(tokens.mStartPointer)) { 699 familyNamePrefix += '.'; 700 } 701 name.familyName = familyNamePrefix + " " + tokens.mTokens[tokens.mStartPointer + 1]; 702 tokens.mStartPointer += 2; 703 return; 704 } 705 706 // Finally, assume that the last word is the last name 707 name.familyName = tokens.mTokens[tokens.mEndPointer - 1]; 708 tokens.mEndPointer--; 709 710 // Take care of last names like "de Sade" and "von Cliburn" 711 if ((tokens.mEndPointer - tokens.mStartPointer) > 0) { 712 String lastNamePrefix = tokens.mTokens[tokens.mEndPointer - 1]; 713 if (isFamilyNamePrefix(lastNamePrefix)) { 714 if (tokens.hasDot(tokens.mEndPointer - 1)) { 715 lastNamePrefix += '.'; 716 } 717 name.familyName = lastNamePrefix + " " + name.familyName; 718 tokens.mEndPointer--; 719 } 720 } 721 } 722 723 /** 724 * Returns true if the supplied word is an accepted last name prefix, e.g. "von", "de" 725 */ 726 private boolean isFamilyNamePrefix(String word) { 727 final String normalized = word.toUpperCase(); 728 729 return mLastNamePrefixesSet.contains(normalized) 730 || mLastNamePrefixesSet.contains(normalized + "."); 731 } 732 733 734 private void parseMiddleName(Name name, NameTokenizer tokens) { 735 if (tokens.mStartPointer == tokens.mEndPointer) { 736 return; 737 } 738 739 if ((tokens.mEndPointer - tokens.mStartPointer) > 1) { 740 if ((tokens.mEndPointer - tokens.mStartPointer) == 2 741 || !mConjuctions.contains(tokens.mTokens[tokens.mEndPointer - 2]. 742 toUpperCase())) { 743 name.middleName = tokens.mTokens[tokens.mEndPointer - 1]; 744 if (tokens.hasDot(tokens.mEndPointer - 1)) { 745 name.middleName += '.'; 746 } 747 tokens.mEndPointer--; 748 } 749 } 750 } 751 752 private void parseGivenNames(Name name, NameTokenizer tokens) { 753 if (tokens.mStartPointer == tokens.mEndPointer) { 754 return; 755 } 756 757 if ((tokens.mEndPointer - tokens.mStartPointer) == 1) { 758 name.givenNames = tokens.mTokens[tokens.mStartPointer]; 759 } else { 760 StringBuilder sb = new StringBuilder(); 761 for (int i = tokens.mStartPointer; i < tokens.mEndPointer; i++) { 762 if (i != tokens.mStartPointer) { 763 sb.append(' '); 764 } 765 sb.append(tokens.mTokens[i]); 766 if (tokens.hasDot(i)) { 767 sb.append('.'); 768 } 769 } 770 name.givenNames = sb.toString(); 771 } 772 } 773 774 /** 775 * Makes the best guess at the expected full name style based on the character set 776 * used in the supplied name. If the phonetic name is also supplied, tries to 777 * differentiate between Chinese, Japanese and Korean based on the alphabet used 778 * for the phonetic name. 779 */ 780 public void guessNameStyle(Name name) { 781 guessFullNameStyle(name); 782 guessPhoneticNameStyle(name); 783 name.fullNameStyle = getAdjustedNameStyleBasedOnPhoneticNameStyle(name.fullNameStyle, 784 name.phoneticNameStyle); 785 } 786 787 /** 788 * Updates the display name style according to the phonetic name style if we 789 * were unsure about display name style based on the name components, but 790 * phonetic name makes it more definitive. 791 */ 792 public int getAdjustedNameStyleBasedOnPhoneticNameStyle(int nameStyle, int phoneticNameStyle) { 793 if (phoneticNameStyle != PhoneticNameStyle.UNDEFINED) { 794 if (nameStyle == FullNameStyle.UNDEFINED || nameStyle == FullNameStyle.CJK) { 795 if (phoneticNameStyle == PhoneticNameStyle.JAPANESE) { 796 return FullNameStyle.JAPANESE; 797 } else if (phoneticNameStyle == PhoneticNameStyle.KOREAN) { 798 return FullNameStyle.KOREAN; 799 } 800 if (nameStyle == FullNameStyle.CJK && phoneticNameStyle == PhoneticNameStyle.PINYIN) { 801 return FullNameStyle.CHINESE; 802 } 803 } 804 } 805 return nameStyle; 806 } 807 808 /** 809 * Makes the best guess at the expected full name style based on the character set 810 * used in the supplied name. 811 */ 812 private void guessFullNameStyle(NameSplitter.Name name) { 813 if (name.fullNameStyle != FullNameStyle.UNDEFINED) { 814 return; 815 } 816 817 int bestGuess = guessFullNameStyle(name.givenNames); 818 // A mix of Hanzi and latin chars are common in China, so we have to go through all names 819 // if the name is not JANPANESE or KOREAN. 820 if (bestGuess != FullNameStyle.UNDEFINED && bestGuess != FullNameStyle.CJK 821 && bestGuess != FullNameStyle.WESTERN) { 822 name.fullNameStyle = bestGuess; 823 return; 824 } 825 826 int guess = guessFullNameStyle(name.familyName); 827 if (guess != FullNameStyle.UNDEFINED) { 828 if (guess != FullNameStyle.CJK && guess != FullNameStyle.WESTERN) { 829 name.fullNameStyle = guess; 830 return; 831 } 832 bestGuess = guess; 833 } 834 835 guess = guessFullNameStyle(name.middleName); 836 if (guess != FullNameStyle.UNDEFINED) { 837 if (guess != FullNameStyle.CJK && guess != FullNameStyle.WESTERN) { 838 name.fullNameStyle = guess; 839 return; 840 } 841 bestGuess = guess; 842 } 843 844 name.fullNameStyle = bestGuess; 845 } 846 847 public int guessFullNameStyle(String name) { 848 if (name == null) { 849 return FullNameStyle.UNDEFINED; 850 } 851 852 int nameStyle = FullNameStyle.UNDEFINED; 853 int length = name.length(); 854 int offset = 0; 855 while (offset < length) { 856 int codePoint = Character.codePointAt(name, offset); 857 if (Character.isLetter(codePoint)) { 858 UnicodeBlock unicodeBlock = UnicodeBlock.of(codePoint); 859 860 if (!isLatinUnicodeBlock(unicodeBlock)) { 861 862 if (isCJKUnicodeBlock(unicodeBlock)) { 863 // We don't know if this is Chinese, Japanese or Korean - 864 // trying to figure out by looking at other characters in the name 865 return guessCJKNameStyle(name, offset + Character.charCount(codePoint)); 866 } 867 868 if (isJapanesePhoneticUnicodeBlock(unicodeBlock)) { 869 return FullNameStyle.JAPANESE; 870 } 871 872 if (isKoreanUnicodeBlock(unicodeBlock)) { 873 return FullNameStyle.KOREAN; 874 } 875 } 876 nameStyle = FullNameStyle.WESTERN; 877 } 878 offset += Character.charCount(codePoint); 879 } 880 return nameStyle; 881 } 882 883 private int guessCJKNameStyle(String name, int offset) { 884 int length = name.length(); 885 while (offset < length) { 886 int codePoint = Character.codePointAt(name, offset); 887 if (Character.isLetter(codePoint)) { 888 UnicodeBlock unicodeBlock = UnicodeBlock.of(codePoint); 889 if (isJapanesePhoneticUnicodeBlock(unicodeBlock)) { 890 return FullNameStyle.JAPANESE; 891 } 892 if (isKoreanUnicodeBlock(unicodeBlock)) { 893 return FullNameStyle.KOREAN; 894 } 895 } 896 offset += Character.charCount(codePoint); 897 } 898 899 return FullNameStyle.CJK; 900 } 901 902 private void guessPhoneticNameStyle(NameSplitter.Name name) { 903 if (name.phoneticNameStyle != PhoneticNameStyle.UNDEFINED) { 904 return; 905 } 906 907 int bestGuess = guessPhoneticNameStyle(name.phoneticFamilyName); 908 if (bestGuess != FullNameStyle.UNDEFINED && bestGuess != FullNameStyle.CJK) { 909 name.phoneticNameStyle = bestGuess; 910 return; 911 } 912 913 int guess = guessPhoneticNameStyle(name.phoneticGivenName); 914 if (guess != FullNameStyle.UNDEFINED) { 915 if (guess != FullNameStyle.CJK) { 916 name.phoneticNameStyle = guess; 917 return; 918 } 919 bestGuess = guess; 920 } 921 922 guess = guessPhoneticNameStyle(name.phoneticMiddleName); 923 if (guess != FullNameStyle.UNDEFINED) { 924 if (guess != FullNameStyle.CJK) { 925 name.phoneticNameStyle = guess; 926 return; 927 } 928 bestGuess = guess; 929 } 930 } 931 932 public int guessPhoneticNameStyle(String name) { 933 if (name == null) { 934 return PhoneticNameStyle.UNDEFINED; 935 } 936 937 int nameStyle = PhoneticNameStyle.UNDEFINED; 938 int length = name.length(); 939 int offset = 0; 940 while (offset < length) { 941 int codePoint = Character.codePointAt(name, offset); 942 if (Character.isLetter(codePoint)) { 943 UnicodeBlock unicodeBlock = UnicodeBlock.of(codePoint); 944 if (isJapanesePhoneticUnicodeBlock(unicodeBlock)) { 945 return PhoneticNameStyle.JAPANESE; 946 } 947 if (isKoreanUnicodeBlock(unicodeBlock)) { 948 return PhoneticNameStyle.KOREAN; 949 } 950 if (isLatinUnicodeBlock(unicodeBlock)) { 951 return PhoneticNameStyle.PINYIN; 952 } 953 } 954 offset += Character.charCount(codePoint); 955 } 956 957 return nameStyle; 958 } 959 960 private static boolean isLatinUnicodeBlock(UnicodeBlock unicodeBlock) { 961 return unicodeBlock == UnicodeBlock.BASIC_LATIN || 962 unicodeBlock == UnicodeBlock.LATIN_1_SUPPLEMENT || 963 unicodeBlock == UnicodeBlock.LATIN_EXTENDED_A || 964 unicodeBlock == UnicodeBlock.LATIN_EXTENDED_B || 965 unicodeBlock == UnicodeBlock.LATIN_EXTENDED_ADDITIONAL; 966 } 967 968 private static boolean isCJKUnicodeBlock(UnicodeBlock block) { 969 return block == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS 970 || block == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A 971 || block == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B 972 || block == UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION 973 || block == UnicodeBlock.CJK_RADICALS_SUPPLEMENT 974 || block == UnicodeBlock.CJK_COMPATIBILITY 975 || block == UnicodeBlock.CJK_COMPATIBILITY_FORMS 976 || block == UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS 977 || block == UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT; 978 } 979 980 private static boolean isKoreanUnicodeBlock(UnicodeBlock unicodeBlock) { 981 return unicodeBlock == UnicodeBlock.HANGUL_SYLLABLES || 982 unicodeBlock == UnicodeBlock.HANGUL_JAMO || 983 unicodeBlock == UnicodeBlock.HANGUL_COMPATIBILITY_JAMO; 984 } 985 986 private static boolean isJapanesePhoneticUnicodeBlock(UnicodeBlock unicodeBlock) { 987 return unicodeBlock == UnicodeBlock.KATAKANA || 988 unicodeBlock == UnicodeBlock.KATAKANA_PHONETIC_EXTENSIONS || 989 unicodeBlock == UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS || 990 unicodeBlock == UnicodeBlock.HIRAGANA; 991 } 992 } 993