Home | History | Annotate | Download | only in contacts
      1 /*
      2  * Copyright (C) 2009 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License
     15  */
     16 package com.android.providers.contacts;
     17 
     18 import android.content.ContentValues;
     19 import android.provider.ContactsContract.CommonDataKinds.StructuredName;
     20 import android.provider.ContactsContract.FullNameStyle;
     21 import android.provider.ContactsContract.PhoneticNameStyle;
     22 import android.text.TextUtils;
     23 
     24 import com.android.providers.contacts.util.NeededForTesting;
     25 
     26 import java.lang.Character.UnicodeBlock;
     27 import java.util.HashSet;
     28 import java.util.Locale;
     29 import java.util.StringTokenizer;
     30 
     31 /**
     32  * The purpose of this class is to split a full name into given names and last
     33  * name. The logic only supports having a single last name. If the full name has
     34  * multiple last names the output will be incorrect.
     35  * <p>
     36  * Core algorithm:
     37  * <ol>
     38  * <li>Remove the suffixes (III, Ph.D., M.D.).</li>
     39  * <li>Remove the prefixes (Mr., Pastor, Reverend, Sir).</li>
     40  * <li>Assign the last remaining token as the last name.</li>
     41  * <li>If the previous word to the last name is one from LASTNAME_PREFIXES, use
     42  * this word also as the last name.</li>
     43  * <li>Assign the rest of the words as the "given names".</li>
     44  * </ol>
     45  */
     46 public class NameSplitter {
     47 
     48     public static final int MAX_TOKENS = 10;
     49 
     50     private static final String JAPANESE_LANGUAGE = Locale.JAPANESE.getLanguage().toLowerCase();
     51     private static final String KOREAN_LANGUAGE = Locale.KOREAN.getLanguage().toLowerCase();
     52 
     53     // This includes simplified and traditional Chinese
     54     private static final String CHINESE_LANGUAGE = Locale.CHINESE.getLanguage().toLowerCase();
     55 
     56     private final HashSet<String> mPrefixesSet;
     57     private final HashSet<String> mSuffixesSet;
     58     private final int mMaxSuffixLength;
     59     private final HashSet<String> mLastNamePrefixesSet;
     60     private final HashSet<String> mConjuctions;
     61     private final Locale mLocale;
     62     private final String mLanguage;
     63 
     64     /**
     65      * Two-Chracter long Korean family names.
     66      * http://ko.wikipedia.org/wiki/%ED%95%9C%EA%B5%AD%EC%9D%98_%EB%B3%B5%EC%84%B1
     67      */
     68     private static final String[] KOREAN_TWO_CHARCTER_FAMILY_NAMES = {
     69         "\uAC15\uC804", // Gang Jeon
     70         "\uB0A8\uAD81", // Nam Goong
     71         "\uB3C5\uACE0", // Dok Go
     72         "\uB3D9\uBC29", // Dong Bang
     73         "\uB9DD\uC808", // Mang Jeol
     74         "\uC0AC\uACF5", // Sa Gong
     75         "\uC11C\uBB38", // Seo Moon
     76         "\uC120\uC6B0", // Seon Woo
     77         "\uC18C\uBD09", // So Bong
     78         "\uC5B4\uAE08", // Uh Geum
     79         "\uC7A5\uACE1", // Jang Gok
     80         "\uC81C\uAC08", // Je Gal
     81         "\uD669\uBCF4"  // Hwang Bo
     82     };
     83 
     84     public static class Name {
     85         public String prefix;
     86         public String givenNames;
     87         public String middleName;
     88         public String familyName;
     89         public String suffix;
     90 
     91         public int fullNameStyle;
     92 
     93         public String phoneticFamilyName;
     94         public String phoneticMiddleName;
     95         public String phoneticGivenName;
     96 
     97         public int phoneticNameStyle;
     98 
     99         public Name() {
    100         }
    101 
    102         public Name(String prefix, String givenNames, String middleName, String familyName,
    103                 String suffix) {
    104             this.prefix = prefix;
    105             this.givenNames = givenNames;
    106             this.middleName = middleName;
    107             this.familyName = familyName;
    108             this.suffix = suffix;
    109         }
    110 
    111         @NeededForTesting
    112         public String getPrefix() {
    113             return prefix;
    114         }
    115 
    116         public String getGivenNames() {
    117             return givenNames;
    118         }
    119 
    120         public String getMiddleName() {
    121             return middleName;
    122         }
    123 
    124         public String getFamilyName() {
    125             return familyName;
    126         }
    127 
    128         @NeededForTesting
    129         public String getSuffix() {
    130             return suffix;
    131         }
    132 
    133         public int getFullNameStyle() {
    134             return fullNameStyle;
    135         }
    136 
    137         public String getPhoneticFamilyName() {
    138             return phoneticFamilyName;
    139         }
    140 
    141         public String getPhoneticMiddleName() {
    142             return phoneticMiddleName;
    143         }
    144 
    145         public String getPhoneticGivenName() {
    146             return phoneticGivenName;
    147         }
    148 
    149         public int getPhoneticNameStyle() {
    150             return phoneticNameStyle;
    151         }
    152 
    153         public void fromValues(ContentValues values) {
    154             prefix = values.getAsString(StructuredName.PREFIX);
    155             givenNames = values.getAsString(StructuredName.GIVEN_NAME);
    156             middleName = values.getAsString(StructuredName.MIDDLE_NAME);
    157             familyName = values.getAsString(StructuredName.FAMILY_NAME);
    158             suffix = values.getAsString(StructuredName.SUFFIX);
    159 
    160             Integer integer = values.getAsInteger(StructuredName.FULL_NAME_STYLE);
    161             fullNameStyle = integer == null ? FullNameStyle.UNDEFINED : integer;
    162 
    163             phoneticFamilyName = values.getAsString(StructuredName.PHONETIC_FAMILY_NAME);
    164             phoneticMiddleName = values.getAsString(StructuredName.PHONETIC_MIDDLE_NAME);
    165             phoneticGivenName = values.getAsString(StructuredName.PHONETIC_GIVEN_NAME);
    166 
    167             integer = values.getAsInteger(StructuredName.PHONETIC_NAME_STYLE);
    168             phoneticNameStyle = integer == null ? PhoneticNameStyle.UNDEFINED : integer;
    169         }
    170 
    171         public void toValues(ContentValues values) {
    172             putValueIfPresent(values, StructuredName.PREFIX, prefix);
    173             putValueIfPresent(values, StructuredName.GIVEN_NAME, givenNames);
    174             putValueIfPresent(values, StructuredName.MIDDLE_NAME, middleName);
    175             putValueIfPresent(values, StructuredName.FAMILY_NAME, familyName);
    176             putValueIfPresent(values, StructuredName.SUFFIX, suffix);
    177             values.put(StructuredName.FULL_NAME_STYLE, fullNameStyle);
    178             putValueIfPresent(values, StructuredName.PHONETIC_FAMILY_NAME, phoneticFamilyName);
    179             putValueIfPresent(values, StructuredName.PHONETIC_MIDDLE_NAME, phoneticMiddleName);
    180             putValueIfPresent(values, StructuredName.PHONETIC_GIVEN_NAME, phoneticGivenName);
    181             values.put(StructuredName.PHONETIC_NAME_STYLE, phoneticNameStyle);
    182         }
    183 
    184         private void putValueIfPresent(ContentValues values, String name, String value) {
    185             if (value != null) {
    186                 values.put(name, value);
    187             }
    188         }
    189 
    190         public void clear() {
    191             prefix = null;
    192             givenNames = null;
    193             middleName = null;
    194             familyName = null;
    195             suffix = null;
    196             fullNameStyle = FullNameStyle.UNDEFINED;
    197             phoneticFamilyName = null;
    198             phoneticMiddleName = null;
    199             phoneticGivenName = null;
    200             phoneticNameStyle = PhoneticNameStyle.UNDEFINED;
    201         }
    202 
    203         public boolean isEmpty() {
    204             return TextUtils.isEmpty(givenNames)
    205                     && TextUtils.isEmpty(middleName)
    206                     && TextUtils.isEmpty(familyName)
    207                     && TextUtils.isEmpty(suffix)
    208                     && TextUtils.isEmpty(phoneticFamilyName)
    209                     && TextUtils.isEmpty(phoneticMiddleName)
    210                     && TextUtils.isEmpty(phoneticGivenName);
    211         }
    212 
    213         @Override
    214         public String toString() {
    215             return "[prefix: " + prefix + " given: " + givenNames + " middle: " + middleName
    216                     + " family: " + familyName + " suffix: " + suffix + " ph/given: "
    217                     + phoneticGivenName + " ph/middle: " + phoneticMiddleName + " ph/family: "
    218                     + phoneticFamilyName + "]";
    219         }
    220     }
    221 
    222     private static class NameTokenizer extends StringTokenizer {
    223         private final String[] mTokens;
    224         private int mDotBitmask;
    225         private int mCommaBitmask;
    226         private int mStartPointer;
    227         private int mEndPointer;
    228 
    229         public NameTokenizer(String fullName) {
    230             super(fullName, " .,", true);
    231 
    232             mTokens = new String[MAX_TOKENS];
    233 
    234             // Iterate over tokens, skipping over empty ones and marking tokens that
    235             // are followed by dots.
    236             while (hasMoreTokens() && mEndPointer < MAX_TOKENS) {
    237                 final String token = nextToken();
    238                 if (token.length() > 0) {
    239                     final char c = token.charAt(0);
    240                     if (c == ' ') {
    241                         continue;
    242                     }
    243                 }
    244 
    245                 if (mEndPointer > 0 && token.charAt(0) == '.') {
    246                     mDotBitmask |= (1 << (mEndPointer - 1));
    247                 } else if (mEndPointer > 0 && token.charAt(0) == ',') {
    248                     mCommaBitmask |= (1 << (mEndPointer - 1));
    249                 } else {
    250                     mTokens[mEndPointer] = token;
    251                     mEndPointer++;
    252                 }
    253             }
    254         }
    255 
    256         /**
    257          * Returns true if the token is followed by a dot in the original full name.
    258          */
    259         public boolean hasDot(int index) {
    260             return (mDotBitmask & (1 << index)) != 0;
    261         }
    262 
    263         /**
    264          * Returns true if the token is followed by a comma in the original full name.
    265          */
    266         public boolean hasComma(int index) {
    267             return (mCommaBitmask & (1 << index)) != 0;
    268         }
    269     }
    270 
    271     /**
    272      * Constructor.
    273      *
    274      * @param commonPrefixes comma-separated list of common prefixes,
    275      *            e.g. "Mr, Ms, Mrs"
    276      * @param commonLastNamePrefixes comma-separated list of common last name prefixes,
    277      *            e.g. "d', st, st., von"
    278      * @param commonSuffixes comma-separated list of common suffixes,
    279      *            e.g. "Jr, M.D., MD, D.D.S."
    280      * @param commonConjunctions comma-separated list of common conjuctions,
    281      *            e.g. "AND, Or"
    282      */
    283     public NameSplitter(String commonPrefixes, String commonLastNamePrefixes,
    284             String commonSuffixes, String commonConjunctions, Locale locale) {
    285         // TODO: refactor this to use <string-array> resources
    286         mPrefixesSet = convertToSet(commonPrefixes);
    287         mLastNamePrefixesSet = convertToSet(commonLastNamePrefixes);
    288         mSuffixesSet = convertToSet(commonSuffixes);
    289         mConjuctions = convertToSet(commonConjunctions);
    290         mLocale = locale != null ? locale : Locale.getDefault();
    291         mLanguage = mLocale.getLanguage().toLowerCase();
    292 
    293         int maxLength = 0;
    294         for (String suffix : mSuffixesSet) {
    295             if (suffix.length() > maxLength) {
    296                 maxLength = suffix.length();
    297             }
    298         }
    299 
    300         mMaxSuffixLength = maxLength;
    301     }
    302 
    303     /**
    304      * Converts a comma-separated list of Strings to a set of Strings. Trims strings
    305      * and converts them to upper case.
    306      */
    307     private static HashSet<String> convertToSet(String strings) {
    308         HashSet<String> set = new HashSet<String>();
    309         if (strings != null) {
    310             String[] split = strings.split(",");
    311             for (int i = 0; i < split.length; i++) {
    312                 set.add(split[i].trim().toUpperCase());
    313             }
    314         }
    315         return set;
    316     }
    317 
    318     /**
    319      * Parses a full name and returns components as a list of tokens.
    320      */
    321     public int tokenize(String[] tokens, String fullName) {
    322         if (fullName == null) {
    323             return 0;
    324         }
    325 
    326         NameTokenizer tokenizer = new NameTokenizer(fullName);
    327 
    328         if (tokenizer.mStartPointer == tokenizer.mEndPointer) {
    329             return 0;
    330         }
    331 
    332         String firstToken = tokenizer.mTokens[tokenizer.mStartPointer];
    333         if (mPrefixesSet.contains(firstToken.toUpperCase())) {
    334            tokenizer.mStartPointer++;
    335         }
    336         int count = 0;
    337         for (int i = tokenizer.mStartPointer; i < tokenizer.mEndPointer; i++) {
    338             tokens[count++] = tokenizer.mTokens[i];
    339         }
    340 
    341         return count;
    342     }
    343 
    344 
    345     /**
    346      * Parses a full name and returns parsed components in the Name object.
    347      */
    348     public void split(Name name, String fullName) {
    349         if (fullName == null) {
    350             return;
    351         }
    352 
    353         int fullNameStyle = guessFullNameStyle(fullName);
    354         if (fullNameStyle == FullNameStyle.CJK) {
    355             fullNameStyle = getAdjustedFullNameStyle(fullNameStyle);
    356         }
    357 
    358         split(name, fullName, fullNameStyle);
    359     }
    360 
    361     /**
    362      * Parses a full name and returns parsed components in the Name object
    363      * with a given fullNameStyle.
    364      */
    365     public void split(Name name, String fullName, int fullNameStyle) {
    366         if (fullName == null) {
    367             return;
    368         }
    369 
    370         name.fullNameStyle = fullNameStyle;
    371 
    372         switch (fullNameStyle) {
    373             case FullNameStyle.CHINESE:
    374                 splitChineseName(name, fullName);
    375                 break;
    376 
    377             case FullNameStyle.JAPANESE:
    378                 splitJapaneseName(name, fullName);
    379                 break;
    380 
    381             case FullNameStyle.KOREAN:
    382                 splitKoreanName(name, fullName);
    383                 break;
    384 
    385             default:
    386                 splitWesternName(name, fullName);
    387         }
    388     }
    389 
    390     /**
    391      * Splits a full name composed according to the Western tradition:
    392      * <pre>
    393      *   [prefix] given name(s) [[middle name] family name] [, suffix]
    394      *   [prefix] family name, given name [middle name] [,suffix]
    395      * </pre>
    396      */
    397     private void splitWesternName(Name name, String fullName) {
    398         NameTokenizer tokens = new NameTokenizer(fullName);
    399         parsePrefix(name, tokens);
    400 
    401         // If the name consists of just one or two tokens, treat them as first/last name,
    402         // not as suffix.  Example: John Ma; Ma is last name, not "M.A.".
    403         if (tokens.mEndPointer > 2) {
    404             parseSuffix(name, tokens);
    405         }
    406 
    407         if (name.prefix == null && tokens.mEndPointer - tokens.mStartPointer == 1) {
    408             name.givenNames = tokens.mTokens[tokens.mStartPointer];
    409         } else {
    410             parseLastName(name, tokens);
    411             parseMiddleName(name, tokens);
    412             parseGivenNames(name, tokens);
    413         }
    414     }
    415 
    416     /**
    417      * Splits a full name composed according to the Chinese tradition:
    418      * <pre>
    419      *   [family name [middle name]] given name
    420      * </pre>
    421      */
    422     private void splitChineseName(Name name, String fullName) {
    423         StringTokenizer tokenizer = new StringTokenizer(fullName);
    424         while (tokenizer.hasMoreTokens()) {
    425             String token = tokenizer.nextToken();
    426             if (name.givenNames == null) {
    427                 name.givenNames = token;
    428             } else if (name.familyName == null) {
    429                 name.familyName = name.givenNames;
    430                 name.givenNames = token;
    431             } else if (name.middleName == null) {
    432                 name.middleName = name.givenNames;
    433                 name.givenNames = token;
    434             } else {
    435                 name.middleName = name.middleName + name.givenNames;
    436                 name.givenNames = token;
    437             }
    438         }
    439 
    440         // If a single word parse that word up.
    441         if (name.givenNames != null && name.familyName == null && name.middleName == null) {
    442             int length = fullName.length();
    443             if (length == 2) {
    444                 name.familyName = fullName.substring(0, 1);
    445                 name.givenNames = fullName.substring(1);
    446             } else if (length == 3) {
    447                 name.familyName = fullName.substring(0, 1);
    448                 name.middleName = fullName.substring(1, 2);
    449                 name.givenNames = fullName.substring(2);
    450             } else if (length == 4) {
    451                 name.familyName = fullName.substring(0, 2);
    452                 name.middleName = fullName.substring(2, 3);
    453                 name.givenNames = fullName.substring(3);
    454             }
    455 
    456         }
    457     }
    458 
    459     /**
    460      * Splits a full name composed according to the Japanese tradition:
    461      * <pre>
    462      *   [family name] given name(s)
    463      * </pre>
    464      */
    465     private void splitJapaneseName(Name name, String fullName) {
    466         StringTokenizer tokenizer = new StringTokenizer(fullName);
    467         while (tokenizer.hasMoreTokens()) {
    468             String token = tokenizer.nextToken();
    469             if (name.givenNames == null) {
    470                 name.givenNames = token;
    471             } else if (name.familyName == null) {
    472                 name.familyName = name.givenNames;
    473                 name.givenNames = token;
    474             } else {
    475                 name.givenNames += " " + token;
    476             }
    477         }
    478     }
    479 
    480     /**
    481      * Splits a full name composed according to the Korean tradition:
    482      * <pre>
    483      *   [family name] given name(s)
    484      * </pre>
    485      */
    486     private void splitKoreanName(Name name, String fullName) {
    487         StringTokenizer tokenizer = new StringTokenizer(fullName);
    488         if (tokenizer.countTokens() > 1) {
    489             // Each name can be identified by separators.
    490             while (tokenizer.hasMoreTokens()) {
    491                 String token = tokenizer.nextToken();
    492                 if (name.givenNames == null) {
    493                     name.givenNames = token;
    494                 } else if (name.familyName == null) {
    495                     name.familyName = name.givenNames;
    496                     name.givenNames = token;
    497                 } else {
    498                     name.givenNames += " " + token;
    499                 }
    500             }
    501         } else {
    502             // There is no separator. Try to guess family name.
    503             // The length of most family names is 1.
    504             int familyNameLength = 1;
    505 
    506             // Compare with 2-length family names.
    507             for (String twoLengthFamilyName : KOREAN_TWO_CHARCTER_FAMILY_NAMES) {
    508                 if (fullName.startsWith(twoLengthFamilyName)) {
    509                     familyNameLength = 2;
    510                     break;
    511                 }
    512             }
    513 
    514             name.familyName = fullName.substring(0, familyNameLength);
    515             if (fullName.length() > familyNameLength) {
    516                 name.givenNames = fullName.substring(familyNameLength);
    517             }
    518         }
    519     }
    520 
    521     /**
    522      * Concatenates components of a name according to the rules dictated by the name style.
    523      *
    524      * @param givenNameFirst is ignored for CJK display name styles
    525      */
    526     public String join(Name name, boolean givenNameFirst, boolean includePrefix) {
    527         String prefix = includePrefix ? name.prefix : null;
    528         switch (name.fullNameStyle) {
    529             case FullNameStyle.CJK:
    530             case FullNameStyle.CHINESE:
    531             case FullNameStyle.KOREAN:
    532                 return join(prefix, name.familyName, name.middleName, name.givenNames,
    533                         name.suffix, false, false, false);
    534 
    535             case FullNameStyle.JAPANESE:
    536                 return join(prefix, name.familyName, name.middleName, name.givenNames,
    537                         name.suffix, true, false, false);
    538 
    539             default:
    540                 if (givenNameFirst) {
    541                     return join(prefix, name.givenNames, name.middleName, name.familyName,
    542                             name.suffix, true, false, true);
    543                 } else {
    544                     return join(prefix, name.familyName, name.givenNames, name.middleName,
    545                             name.suffix, true, true, true);
    546                 }
    547         }
    548     }
    549 
    550     /**
    551      * Concatenates components of the phonetic name following the CJK tradition:
    552      * family name + middle name + given name(s).
    553      */
    554     public String joinPhoneticName(Name name) {
    555         return join(null, name.phoneticFamilyName,
    556                 name.phoneticMiddleName, name.phoneticGivenName, null, true, false, false);
    557     }
    558 
    559     /**
    560      * Concatenates parts of a full name inserting spaces and commas as specified.
    561      */
    562     private String join(String prefix, String part1, String part2, String part3, String suffix,
    563             boolean useSpace, boolean useCommaAfterPart1, boolean useCommaAfterPart3) {
    564         prefix = prefix == null ? null: prefix.trim();
    565         part1 = part1 == null ? null: part1.trim();
    566         part2 = part2 == null ? null: part2.trim();
    567         part3 = part3 == null ? null: part3.trim();
    568         suffix = suffix == null ? null: suffix.trim();
    569 
    570         boolean hasPrefix = !TextUtils.isEmpty(prefix);
    571         boolean hasPart1 = !TextUtils.isEmpty(part1);
    572         boolean hasPart2 = !TextUtils.isEmpty(part2);
    573         boolean hasPart3 = !TextUtils.isEmpty(part3);
    574         boolean hasSuffix = !TextUtils.isEmpty(suffix);
    575 
    576         boolean isSingleWord = true;
    577         String singleWord = null;
    578 
    579         if (hasPrefix) {
    580             singleWord = prefix;
    581         }
    582 
    583         if (hasPart1) {
    584             if (singleWord != null) {
    585                 isSingleWord = false;
    586             } else {
    587                 singleWord = part1;
    588             }
    589         }
    590 
    591         if (hasPart2) {
    592             if (singleWord != null) {
    593                 isSingleWord = false;
    594             } else {
    595                 singleWord = part2;
    596             }
    597         }
    598 
    599         if (hasPart3) {
    600             if (singleWord != null) {
    601                 isSingleWord = false;
    602             } else {
    603                 singleWord = part3;
    604             }
    605         }
    606 
    607         if (hasSuffix) {
    608             if (singleWord != null) {
    609                 isSingleWord = false;
    610             } else {
    611                 singleWord = normalizedSuffix(suffix);
    612             }
    613         }
    614 
    615         if (isSingleWord) {
    616             return singleWord;
    617         }
    618 
    619         StringBuilder sb = new StringBuilder();
    620 
    621         if (hasPrefix) {
    622             sb.append(prefix);
    623         }
    624 
    625         if (hasPart1) {
    626             if (hasPrefix) {
    627                 sb.append(' ');
    628             }
    629             sb.append(part1);
    630         }
    631 
    632         if (hasPart2) {
    633             if (hasPrefix || hasPart1) {
    634                 if (useCommaAfterPart1) {
    635                     sb.append(',');
    636                 }
    637                 if (useSpace) {
    638                     sb.append(' ');
    639                 }
    640             }
    641             sb.append(part2);
    642         }
    643 
    644         if (hasPart3) {
    645             if (hasPrefix || hasPart1 || hasPart2) {
    646                 if (useSpace) {
    647                     sb.append(' ');
    648                 }
    649             }
    650             sb.append(part3);
    651         }
    652 
    653         if (hasSuffix) {
    654             if (hasPrefix || hasPart1 || hasPart2 || hasPart3) {
    655                 if (useCommaAfterPart3) {
    656                     sb.append(',');
    657                 }
    658                 if (useSpace) {
    659                     sb.append(' ');
    660                 }
    661             }
    662             sb.append(normalizedSuffix(suffix));
    663         }
    664 
    665         return sb.toString();
    666     }
    667 
    668     /**
    669      * Puts a dot after the supplied suffix if that is the accepted form of the suffix,
    670      * e.g. "Jr." and "Sr.", but not "I", "II" and "III".
    671      */
    672     private String normalizedSuffix(String suffix) {
    673         int length = suffix.length();
    674         if (length == 0 || suffix.charAt(length - 1) == '.') {
    675             return suffix;
    676         }
    677 
    678         String withDot = suffix + '.';
    679         if (mSuffixesSet.contains(withDot.toUpperCase())) {
    680             return withDot;
    681         } else {
    682             return suffix;
    683         }
    684     }
    685 
    686     /**
    687      * If the supplied name style is undefined, returns a default based on the language,
    688      * otherwise returns the supplied name style itself.
    689      *
    690      * @param nameStyle See {@link FullNameStyle}.
    691      */
    692     public int getAdjustedFullNameStyle(int nameStyle) {
    693         if (nameStyle == FullNameStyle.UNDEFINED) {
    694             if (JAPANESE_LANGUAGE.equals(mLanguage)) {
    695                 return FullNameStyle.JAPANESE;
    696             } else if (KOREAN_LANGUAGE.equals(mLanguage)) {
    697                 return FullNameStyle.KOREAN;
    698             } else if (CHINESE_LANGUAGE.equals(mLanguage)) {
    699                 return FullNameStyle.CHINESE;
    700             } else {
    701                 return FullNameStyle.WESTERN;
    702             }
    703         } else if (nameStyle == FullNameStyle.CJK) {
    704             if (JAPANESE_LANGUAGE.equals(mLanguage)) {
    705                 return FullNameStyle.JAPANESE;
    706             } else if (KOREAN_LANGUAGE.equals(mLanguage)) {
    707                 return FullNameStyle.KOREAN;
    708             } else {
    709                 return FullNameStyle.CHINESE;
    710             }
    711         }
    712         return nameStyle;
    713     }
    714 
    715     /**
    716      * Parses the first word from the name if it is a prefix.
    717      */
    718     private void parsePrefix(Name name, NameTokenizer tokens) {
    719         if (tokens.mStartPointer == tokens.mEndPointer) {
    720             return;
    721         }
    722 
    723         String firstToken = tokens.mTokens[tokens.mStartPointer];
    724         if (mPrefixesSet.contains(firstToken.toUpperCase())) {
    725             if (tokens.hasDot(tokens.mStartPointer)) {
    726                 firstToken += '.';
    727             }
    728             name.prefix = firstToken;
    729             tokens.mStartPointer++;
    730         }
    731     }
    732 
    733     /**
    734      * Parses the last word(s) from the name if it is a suffix.
    735      */
    736     private void parseSuffix(Name name, NameTokenizer tokens) {
    737         if (tokens.mStartPointer == tokens.mEndPointer) {
    738             return;
    739         }
    740 
    741         String lastToken = tokens.mTokens[tokens.mEndPointer - 1];
    742 
    743         // Take care of an explicit comma-separated suffix
    744         if (tokens.mEndPointer - tokens.mStartPointer > 2
    745                 && tokens.hasComma(tokens.mEndPointer - 2)) {
    746             if (tokens.hasDot(tokens.mEndPointer - 1)) {
    747                 lastToken += '.';
    748             }
    749             name.suffix = lastToken;
    750             tokens.mEndPointer--;
    751             return;
    752         }
    753 
    754         if (lastToken.length() > mMaxSuffixLength) {
    755             return;
    756         }
    757 
    758         String normalized = lastToken.toUpperCase();
    759         if (mSuffixesSet.contains(normalized)) {
    760             name.suffix = lastToken;
    761             tokens.mEndPointer--;
    762             return;
    763         }
    764 
    765         if (tokens.hasDot(tokens.mEndPointer - 1)) {
    766             lastToken += '.';
    767         }
    768         normalized += ".";
    769 
    770         // Take care of suffixes like M.D. and D.D.S.
    771         int pos = tokens.mEndPointer - 1;
    772         while (normalized.length() <= mMaxSuffixLength) {
    773 
    774             if (mSuffixesSet.contains(normalized)) {
    775                 name.suffix = lastToken;
    776                 tokens.mEndPointer = pos;
    777                 return;
    778             }
    779 
    780             if (pos == tokens.mStartPointer) {
    781                 break;
    782             }
    783 
    784             pos--;
    785             if (tokens.hasDot(pos)) {
    786                 lastToken = tokens.mTokens[pos] + "." + lastToken;
    787             } else {
    788                 lastToken = tokens.mTokens[pos] + " " + lastToken;
    789             }
    790 
    791             normalized = tokens.mTokens[pos].toUpperCase() + "." + normalized;
    792         }
    793     }
    794 
    795     private void parseLastName(Name name, NameTokenizer tokens) {
    796         if (tokens.mStartPointer == tokens.mEndPointer) {
    797             return;
    798         }
    799 
    800         // If the first word is followed by a comma, assume that it's the family name
    801         if (tokens.hasComma(tokens.mStartPointer)) {
    802            name.familyName = tokens.mTokens[tokens.mStartPointer];
    803            tokens.mStartPointer++;
    804            return;
    805         }
    806 
    807         // If the second word is followed by a comma and the first word
    808         // is a last name prefix as in "de Sade" and "von Cliburn", treat
    809         // the first two words as the family name.
    810         if (tokens.mStartPointer + 1 < tokens.mEndPointer
    811                 && tokens.hasComma(tokens.mStartPointer + 1)
    812                 && isFamilyNamePrefix(tokens.mTokens[tokens.mStartPointer])) {
    813             String familyNamePrefix = tokens.mTokens[tokens.mStartPointer];
    814             if (tokens.hasDot(tokens.mStartPointer)) {
    815                 familyNamePrefix += '.';
    816             }
    817             name.familyName = familyNamePrefix + " " + tokens.mTokens[tokens.mStartPointer + 1];
    818             tokens.mStartPointer += 2;
    819             return;
    820         }
    821 
    822         // Finally, assume that the last word is the last name
    823         name.familyName = tokens.mTokens[tokens.mEndPointer - 1];
    824         tokens.mEndPointer--;
    825 
    826         // Take care of last names like "de Sade" and "von Cliburn"
    827         if ((tokens.mEndPointer - tokens.mStartPointer) > 0) {
    828             String lastNamePrefix = tokens.mTokens[tokens.mEndPointer - 1];
    829             if (isFamilyNamePrefix(lastNamePrefix)) {
    830                 if (tokens.hasDot(tokens.mEndPointer - 1)) {
    831                     lastNamePrefix += '.';
    832                 }
    833                 name.familyName = lastNamePrefix + " " + name.familyName;
    834                 tokens.mEndPointer--;
    835             }
    836         }
    837     }
    838 
    839     /**
    840      * Returns true if the supplied word is an accepted last name prefix, e.g. "von", "de"
    841      */
    842     private boolean isFamilyNamePrefix(String word) {
    843         final String normalized = word.toUpperCase();
    844 
    845         return mLastNamePrefixesSet.contains(normalized)
    846                 || mLastNamePrefixesSet.contains(normalized + ".");
    847     }
    848 
    849 
    850     private void parseMiddleName(Name name, NameTokenizer tokens) {
    851         if (tokens.mStartPointer == tokens.mEndPointer) {
    852             return;
    853         }
    854 
    855         if ((tokens.mEndPointer - tokens.mStartPointer) > 1) {
    856             if ((tokens.mEndPointer - tokens.mStartPointer) == 2
    857                     || !mConjuctions.contains(tokens.mTokens[tokens.mEndPointer - 2].
    858                             toUpperCase())) {
    859                 name.middleName = tokens.mTokens[tokens.mEndPointer - 1];
    860                 if (tokens.hasDot(tokens.mEndPointer - 1)) {
    861                     name.middleName += '.';
    862                 }
    863                 tokens.mEndPointer--;
    864             }
    865         }
    866     }
    867 
    868     private void parseGivenNames(Name name, NameTokenizer tokens) {
    869         if (tokens.mStartPointer == tokens.mEndPointer) {
    870             return;
    871         }
    872 
    873         if ((tokens.mEndPointer - tokens.mStartPointer) == 1) {
    874             name.givenNames = tokens.mTokens[tokens.mStartPointer];
    875         } else {
    876             StringBuilder sb = new StringBuilder();
    877             for (int i = tokens.mStartPointer; i < tokens.mEndPointer; i++) {
    878                 if (i != tokens.mStartPointer) {
    879                     sb.append(' ');
    880                 }
    881                 sb.append(tokens.mTokens[i]);
    882                 if (tokens.hasDot(i)) {
    883                     sb.append('.');
    884                 }
    885             }
    886             name.givenNames = sb.toString();
    887         }
    888     }
    889 
    890     /**
    891      * Makes the best guess at the expected full name style based on the character set
    892      * used in the supplied name.  If the phonetic name is also supplied, tries to
    893      * differentiate between Chinese, Japanese and Korean based on the alphabet used
    894      * for the phonetic name.
    895      */
    896     public void guessNameStyle(Name name) {
    897         guessFullNameStyle(name);
    898         guessPhoneticNameStyle(name);
    899         name.fullNameStyle = getAdjustedNameStyleBasedOnPhoneticNameStyle(name.fullNameStyle,
    900                 name.phoneticNameStyle);
    901     }
    902 
    903     /**
    904      * Updates the display name style according to the phonetic name style if we
    905      * were unsure about display name style based on the name components, but
    906      * phonetic name makes it more definitive.
    907      */
    908     public int getAdjustedNameStyleBasedOnPhoneticNameStyle(int nameStyle, int phoneticNameStyle) {
    909         if (phoneticNameStyle != PhoneticNameStyle.UNDEFINED) {
    910             if (nameStyle == FullNameStyle.UNDEFINED || nameStyle == FullNameStyle.CJK) {
    911                 if (phoneticNameStyle == PhoneticNameStyle.JAPANESE) {
    912                     return FullNameStyle.JAPANESE;
    913                 } else if (phoneticNameStyle == PhoneticNameStyle.KOREAN) {
    914                     return FullNameStyle.KOREAN;
    915                 }
    916                 if (nameStyle == FullNameStyle.CJK && phoneticNameStyle == PhoneticNameStyle.PINYIN) {
    917                     return FullNameStyle.CHINESE;
    918                 }
    919             }
    920         }
    921         return nameStyle;
    922     }
    923 
    924     /**
    925      * Makes the best guess at the expected full name style based on the character set
    926      * used in the supplied name.
    927      */
    928     private void guessFullNameStyle(NameSplitter.Name name) {
    929         if (name.fullNameStyle != FullNameStyle.UNDEFINED) {
    930             return;
    931         }
    932 
    933         int bestGuess = guessFullNameStyle(name.givenNames);
    934         // A mix of Hanzi and latin chars are common in China, so we have to go through all names
    935         // if the name is not JANPANESE or KOREAN.
    936         if (bestGuess != FullNameStyle.UNDEFINED && bestGuess != FullNameStyle.CJK
    937                 && bestGuess != FullNameStyle.WESTERN) {
    938             name.fullNameStyle = bestGuess;
    939             return;
    940         }
    941 
    942         int guess = guessFullNameStyle(name.familyName);
    943         if (guess != FullNameStyle.UNDEFINED) {
    944             if (guess != FullNameStyle.CJK && guess != FullNameStyle.WESTERN) {
    945                 name.fullNameStyle = guess;
    946                 return;
    947             }
    948             bestGuess = guess;
    949         }
    950 
    951         guess = guessFullNameStyle(name.middleName);
    952         if (guess != FullNameStyle.UNDEFINED) {
    953             if (guess != FullNameStyle.CJK && guess != FullNameStyle.WESTERN) {
    954                 name.fullNameStyle = guess;
    955                 return;
    956             }
    957             bestGuess = guess;
    958         }
    959 
    960         guess = guessFullNameStyle(name.prefix);
    961         if (guess != FullNameStyle.UNDEFINED) {
    962             if (guess != FullNameStyle.CJK && guess != FullNameStyle.WESTERN) {
    963                 name.fullNameStyle = guess;
    964                 return;
    965             }
    966             bestGuess = guess;
    967         }
    968 
    969         guess = guessFullNameStyle(name.suffix);
    970         if (guess != FullNameStyle.UNDEFINED) {
    971             if (guess != FullNameStyle.CJK && guess != FullNameStyle.WESTERN) {
    972                 name.fullNameStyle = guess;
    973                 return;
    974             }
    975             bestGuess = guess;
    976         }
    977 
    978         name.fullNameStyle = bestGuess;
    979     }
    980 
    981     public int guessFullNameStyle(String name) {
    982         if (name == null) {
    983             return FullNameStyle.UNDEFINED;
    984         }
    985 
    986         int nameStyle = FullNameStyle.UNDEFINED;
    987         int length = name.length();
    988         int offset = 0;
    989         while (offset < length) {
    990             int codePoint = Character.codePointAt(name, offset);
    991             if (Character.isLetter(codePoint)) {
    992                 UnicodeBlock unicodeBlock = UnicodeBlock.of(codePoint);
    993 
    994                 if (!isLatinUnicodeBlock(unicodeBlock)) {
    995 
    996                     if (isCJKUnicodeBlock(unicodeBlock)) {
    997                         // We don't know if this is Chinese, Japanese or Korean -
    998                         // trying to figure out by looking at other characters in the name
    999                         return guessCJKNameStyle(name, offset + Character.charCount(codePoint));
   1000                     }
   1001 
   1002                     if (isJapanesePhoneticUnicodeBlock(unicodeBlock)) {
   1003                         return FullNameStyle.JAPANESE;
   1004                     }
   1005 
   1006                     if (isKoreanUnicodeBlock(unicodeBlock)) {
   1007                         return FullNameStyle.KOREAN;
   1008                     }
   1009                 }
   1010                 nameStyle = FullNameStyle.WESTERN;
   1011             }
   1012             offset += Character.charCount(codePoint);
   1013         }
   1014         return nameStyle;
   1015     }
   1016 
   1017     private int guessCJKNameStyle(String name, int offset) {
   1018         int length = name.length();
   1019         while (offset < length) {
   1020             int codePoint = Character.codePointAt(name, offset);
   1021             if (Character.isLetter(codePoint)) {
   1022                 UnicodeBlock unicodeBlock = UnicodeBlock.of(codePoint);
   1023                 if (isJapanesePhoneticUnicodeBlock(unicodeBlock)) {
   1024                     return FullNameStyle.JAPANESE;
   1025                 }
   1026                 if (isKoreanUnicodeBlock(unicodeBlock)) {
   1027                     return FullNameStyle.KOREAN;
   1028                 }
   1029             }
   1030             offset += Character.charCount(codePoint);
   1031         }
   1032 
   1033         return FullNameStyle.CJK;
   1034     }
   1035 
   1036     private void guessPhoneticNameStyle(NameSplitter.Name name) {
   1037         if (name.phoneticNameStyle != PhoneticNameStyle.UNDEFINED) {
   1038             return;
   1039         }
   1040 
   1041         int bestGuess = guessPhoneticNameStyle(name.phoneticFamilyName);
   1042         if (bestGuess != FullNameStyle.UNDEFINED && bestGuess != FullNameStyle.CJK) {
   1043             name.phoneticNameStyle = bestGuess;
   1044             return;
   1045         }
   1046 
   1047         int guess = guessPhoneticNameStyle(name.phoneticGivenName);
   1048         if (guess != FullNameStyle.UNDEFINED) {
   1049             if (guess != FullNameStyle.CJK) {
   1050                 name.phoneticNameStyle = guess;
   1051                 return;
   1052             }
   1053             bestGuess = guess;
   1054         }
   1055 
   1056         guess = guessPhoneticNameStyle(name.phoneticMiddleName);
   1057         if (guess != FullNameStyle.UNDEFINED) {
   1058             if (guess != FullNameStyle.CJK) {
   1059                 name.phoneticNameStyle = guess;
   1060                 return;
   1061             }
   1062             bestGuess = guess;
   1063         }
   1064     }
   1065 
   1066     public int guessPhoneticNameStyle(String name) {
   1067         if (name == null) {
   1068             return PhoneticNameStyle.UNDEFINED;
   1069         }
   1070 
   1071         int nameStyle = PhoneticNameStyle.UNDEFINED;
   1072         int length = name.length();
   1073         int offset = 0;
   1074         while (offset < length) {
   1075             int codePoint = Character.codePointAt(name, offset);
   1076             if (Character.isLetter(codePoint)) {
   1077                 UnicodeBlock unicodeBlock = UnicodeBlock.of(codePoint);
   1078                 if (isJapanesePhoneticUnicodeBlock(unicodeBlock)) {
   1079                     return PhoneticNameStyle.JAPANESE;
   1080                 }
   1081                 if (isKoreanUnicodeBlock(unicodeBlock)) {
   1082                     return PhoneticNameStyle.KOREAN;
   1083                 }
   1084                 if (isLatinUnicodeBlock(unicodeBlock)) {
   1085                     return PhoneticNameStyle.PINYIN;
   1086                 }
   1087             }
   1088             offset += Character.charCount(codePoint);
   1089         }
   1090 
   1091         return nameStyle;
   1092     }
   1093 
   1094     private static boolean isLatinUnicodeBlock(UnicodeBlock unicodeBlock) {
   1095         return unicodeBlock == UnicodeBlock.BASIC_LATIN ||
   1096                 unicodeBlock == UnicodeBlock.LATIN_1_SUPPLEMENT ||
   1097                 unicodeBlock == UnicodeBlock.LATIN_EXTENDED_A ||
   1098                 unicodeBlock == UnicodeBlock.LATIN_EXTENDED_B ||
   1099                 unicodeBlock == UnicodeBlock.LATIN_EXTENDED_ADDITIONAL;
   1100     }
   1101 
   1102     private static boolean isCJKUnicodeBlock(UnicodeBlock block) {
   1103         return block == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS
   1104                 || block == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
   1105                 || block == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B
   1106                 || block == UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION
   1107                 || block == UnicodeBlock.CJK_RADICALS_SUPPLEMENT
   1108                 || block == UnicodeBlock.CJK_COMPATIBILITY
   1109                 || block == UnicodeBlock.CJK_COMPATIBILITY_FORMS
   1110                 || block == UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS
   1111                 || block == UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT;
   1112     }
   1113 
   1114     private static boolean isKoreanUnicodeBlock(UnicodeBlock unicodeBlock) {
   1115         return unicodeBlock == UnicodeBlock.HANGUL_SYLLABLES ||
   1116                 unicodeBlock == UnicodeBlock.HANGUL_JAMO ||
   1117                 unicodeBlock == UnicodeBlock.HANGUL_COMPATIBILITY_JAMO;
   1118     }
   1119 
   1120     private static boolean isJapanesePhoneticUnicodeBlock(UnicodeBlock unicodeBlock) {
   1121         return unicodeBlock == UnicodeBlock.KATAKANA ||
   1122                 unicodeBlock == UnicodeBlock.KATAKANA_PHONETIC_EXTENSIONS ||
   1123                 unicodeBlock == UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS ||
   1124                 unicodeBlock == UnicodeBlock.HIRAGANA;
   1125     }
   1126 }
   1127