Home | History | Annotate | Download | only in contacts
      1 /*
      2  * Copyright (C) 2009 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License
     15  */
     16 package com.android.providers.contacts;
     17 
     18 import android.content.ContentValues;
     19 import android.provider.ContactsContract.FullNameStyle;
     20 import android.provider.ContactsContract.PhoneticNameStyle;
     21 import android.provider.ContactsContract.CommonDataKinds.StructuredName;
     22 import android.text.TextUtils;
     23 
     24 import java.lang.Character.UnicodeBlock;
     25 import java.util.HashSet;
     26 import java.util.Locale;
     27 import java.util.StringTokenizer;
     28 
     29 /**
     30  * The purpose of this class is to split a full name into given names and last
     31  * name. The logic only supports having a single last name. If the full name has
     32  * multiple last names the output will be incorrect.
     33  * <p>
     34  * Core algorithm:
     35  * <ol>
     36  * <li>Remove the suffixes (III, Ph.D., M.D.).</li>
     37  * <li>Remove the prefixes (Mr., Pastor, Reverend, Sir).</li>
     38  * <li>Assign the last remaining token as the last name.</li>
     39  * <li>If the previous word to the last name is one from LASTNAME_PREFIXES, use
     40  * this word also as the last name.</li>
     41  * <li>Assign the rest of the words as the "given names".</li>
     42  * </ol>
     43  */
     44 public class NameSplitter {
     45 
     46     public static final int MAX_TOKENS = 10;
     47 
     48     private static final String JAPANESE_LANGUAGE = Locale.JAPANESE.getLanguage().toLowerCase();
     49     private static final String KOREAN_LANGUAGE = Locale.KOREAN.getLanguage().toLowerCase();
     50 
     51     // This includes simplified and traditional Chinese
     52     private static final String CHINESE_LANGUAGE = Locale.CHINESE.getLanguage().toLowerCase();
     53 
     54     private final HashSet<String> mPrefixesSet;
     55     private final HashSet<String> mSuffixesSet;
     56     private final int mMaxSuffixLength;
     57     private final HashSet<String> mLastNamePrefixesSet;
     58     private final HashSet<String> mConjuctions;
     59     private final Locale mLocale;
     60     private final String mLanguage;
     61 
     62     /**
     63      * Two-Chracter long Korean family names.
     64      * http://ko.wikipedia.org/wiki/%ED%95%9C%EA%B5%AD%EC%9D%98_%EB%B3%B5%EC%84%B1
     65      */
     66     private static final String[] KOREAN_TWO_CHARCTER_FAMILY_NAMES = {
     67         "\uAC15\uC804", // Gang Jeon
     68         "\uB0A8\uAD81", // Nam Goong
     69         "\uB3C5\uACE0", // Dok Go
     70         "\uB3D9\uBC29", // Dong Bang
     71         "\uB9DD\uC808", // Mang Jeol
     72         "\uC0AC\uACF5", // Sa Gong
     73         "\uC11C\uBB38", // Seo Moon
     74         "\uC120\uC6B0", // Seon Woo
     75         "\uC18C\uBD09", // So Bong
     76         "\uC5B4\uAE08", // Uh Geum
     77         "\uC7A5\uACE1", // Jang Gok
     78         "\uC81C\uAC08", // Je Gal
     79         "\uD669\uBCF4"  // Hwang Bo
     80     };
     81 
     82     public static class Name {
     83         public String prefix;
     84         public String givenNames;
     85         public String middleName;
     86         public String familyName;
     87         public String suffix;
     88 
     89         public int fullNameStyle;
     90 
     91         public String phoneticFamilyName;
     92         public String phoneticMiddleName;
     93         public String phoneticGivenName;
     94 
     95         public int phoneticNameStyle;
     96 
     97         public Name() {
     98         }
     99 
    100         public Name(String prefix, String givenNames, String middleName, String familyName,
    101                 String suffix) {
    102             this.prefix = prefix;
    103             this.givenNames = givenNames;
    104             this.middleName = middleName;
    105             this.familyName = familyName;
    106             this.suffix = suffix;
    107         }
    108 
    109         public String getPrefix() {
    110             return prefix;
    111         }
    112 
    113         public String getGivenNames() {
    114             return givenNames;
    115         }
    116 
    117         public String getMiddleName() {
    118             return middleName;
    119         }
    120 
    121         public String getFamilyName() {
    122             return familyName;
    123         }
    124 
    125         public String getSuffix() {
    126             return suffix;
    127         }
    128 
    129         public int getFullNameStyle() {
    130             return fullNameStyle;
    131         }
    132 
    133         public String getPhoneticFamilyName() {
    134             return phoneticFamilyName;
    135         }
    136 
    137         public String getPhoneticMiddleName() {
    138             return phoneticMiddleName;
    139         }
    140 
    141         public String getPhoneticGivenName() {
    142             return phoneticGivenName;
    143         }
    144 
    145         public int getPhoneticNameStyle() {
    146             return phoneticNameStyle;
    147         }
    148 
    149         public void fromValues(ContentValues values) {
    150             prefix = values.getAsString(StructuredName.PREFIX);
    151             givenNames = values.getAsString(StructuredName.GIVEN_NAME);
    152             middleName = values.getAsString(StructuredName.MIDDLE_NAME);
    153             familyName = values.getAsString(StructuredName.FAMILY_NAME);
    154             suffix = values.getAsString(StructuredName.SUFFIX);
    155 
    156             Integer integer = values.getAsInteger(StructuredName.FULL_NAME_STYLE);
    157             fullNameStyle = integer == null ? FullNameStyle.UNDEFINED : integer;
    158 
    159             phoneticFamilyName = values.getAsString(StructuredName.PHONETIC_FAMILY_NAME);
    160             phoneticMiddleName = values.getAsString(StructuredName.PHONETIC_MIDDLE_NAME);
    161             phoneticGivenName = values.getAsString(StructuredName.PHONETIC_GIVEN_NAME);
    162 
    163             integer = values.getAsInteger(StructuredName.PHONETIC_NAME_STYLE);
    164             phoneticNameStyle = integer == null ? PhoneticNameStyle.UNDEFINED : integer;
    165         }
    166 
    167         public void toValues(ContentValues values) {
    168             putValueIfPresent(values, StructuredName.PREFIX, prefix);
    169             putValueIfPresent(values, StructuredName.GIVEN_NAME, givenNames);
    170             putValueIfPresent(values, StructuredName.MIDDLE_NAME, middleName);
    171             putValueIfPresent(values, StructuredName.FAMILY_NAME, familyName);
    172             putValueIfPresent(values, StructuredName.SUFFIX, suffix);
    173             values.put(StructuredName.FULL_NAME_STYLE, fullNameStyle);
    174             putValueIfPresent(values, StructuredName.PHONETIC_FAMILY_NAME, phoneticFamilyName);
    175             putValueIfPresent(values, StructuredName.PHONETIC_MIDDLE_NAME, phoneticMiddleName);
    176             putValueIfPresent(values, StructuredName.PHONETIC_GIVEN_NAME, phoneticGivenName);
    177             values.put(StructuredName.PHONETIC_NAME_STYLE, phoneticNameStyle);
    178         }
    179 
    180         private void putValueIfPresent(ContentValues values, String name, String value) {
    181             if (value != null) {
    182                 values.put(name, value);
    183             }
    184         }
    185 
    186         public void clear() {
    187             prefix = null;
    188             givenNames = null;
    189             middleName = null;
    190             familyName = null;
    191             suffix = null;
    192             fullNameStyle = FullNameStyle.UNDEFINED;
    193             phoneticFamilyName = null;
    194             phoneticMiddleName = null;
    195             phoneticGivenName = null;
    196             phoneticNameStyle = PhoneticNameStyle.UNDEFINED;
    197         }
    198 
    199         public boolean isEmpty() {
    200             return TextUtils.isEmpty(givenNames)
    201                     && TextUtils.isEmpty(middleName)
    202                     && TextUtils.isEmpty(familyName)
    203                     && TextUtils.isEmpty(suffix)
    204                     && TextUtils.isEmpty(phoneticFamilyName)
    205                     && TextUtils.isEmpty(phoneticMiddleName)
    206                     && TextUtils.isEmpty(phoneticGivenName);
    207         }
    208 
    209         @Override
    210         public String toString() {
    211             return "[prefix: " + prefix + " given: " + givenNames + " middle: " + middleName
    212                     + " family: " + familyName + " suffix: " + suffix + " ph/given: "
    213                     + phoneticGivenName + " ph/middle: " + phoneticMiddleName + " ph/family: "
    214                     + phoneticFamilyName + "]";
    215         }
    216     }
    217 
    218     private static class NameTokenizer extends StringTokenizer {
    219         private final String[] mTokens;
    220         private int mDotBitmask;
    221         private int mCommaBitmask;
    222         private int mStartPointer;
    223         private int mEndPointer;
    224 
    225         public NameTokenizer(String fullName) {
    226             super(fullName, " .,", true);
    227 
    228             mTokens = new String[MAX_TOKENS];
    229 
    230             // Iterate over tokens, skipping over empty ones and marking tokens that
    231             // are followed by dots.
    232             while (hasMoreTokens() && mEndPointer < MAX_TOKENS) {
    233                 final String token = nextToken();
    234                 if (token.length() > 0) {
    235                     final char c = token.charAt(0);
    236                     if (c == ' ') {
    237                         continue;
    238                     }
    239                 }
    240 
    241                 if (mEndPointer > 0 && token.charAt(0) == '.') {
    242                     mDotBitmask |= (1 << (mEndPointer - 1));
    243                 } else if (mEndPointer > 0 && token.charAt(0) == ',') {
    244                     mCommaBitmask |= (1 << (mEndPointer - 1));
    245                 } else {
    246                     mTokens[mEndPointer] = token;
    247                     mEndPointer++;
    248                 }
    249             }
    250         }
    251 
    252         /**
    253          * Returns true if the token is followed by a dot in the original full name.
    254          */
    255         public boolean hasDot(int index) {
    256             return (mDotBitmask & (1 << index)) != 0;
    257         }
    258 
    259         /**
    260          * Returns true if the token is followed by a comma in the original full name.
    261          */
    262         public boolean hasComma(int index) {
    263             return (mCommaBitmask & (1 << index)) != 0;
    264         }
    265     }
    266 
    267     /**
    268      * Constructor.
    269      *
    270      * @param commonPrefixes comma-separated list of common prefixes,
    271      *            e.g. "Mr, Ms, Mrs"
    272      * @param commonLastNamePrefixes comma-separated list of common last name prefixes,
    273      *            e.g. "d', st, st., von"
    274      * @param commonSuffixes comma-separated list of common suffixes,
    275      *            e.g. "Jr, M.D., MD, D.D.S."
    276      * @param commonConjunctions comma-separated list of common conjuctions,
    277      *            e.g. "AND, Or"
    278      */
    279     public NameSplitter(String commonPrefixes, String commonLastNamePrefixes,
    280             String commonSuffixes, String commonConjunctions, Locale locale) {
    281         // TODO: refactor this to use <string-array> resources
    282         mPrefixesSet = convertToSet(commonPrefixes);
    283         mLastNamePrefixesSet = convertToSet(commonLastNamePrefixes);
    284         mSuffixesSet = convertToSet(commonSuffixes);
    285         mConjuctions = convertToSet(commonConjunctions);
    286         mLocale = locale != null ? locale : Locale.getDefault();
    287         mLanguage = mLocale.getLanguage().toLowerCase();
    288 
    289         int maxLength = 0;
    290         for (String suffix : mSuffixesSet) {
    291             if (suffix.length() > maxLength) {
    292                 maxLength = suffix.length();
    293             }
    294         }
    295 
    296         mMaxSuffixLength = maxLength;
    297     }
    298 
    299     /**
    300      * Converts a comma-separated list of Strings to a set of Strings. Trims strings
    301      * and converts them to upper case.
    302      */
    303     private static HashSet<String> convertToSet(String strings) {
    304         HashSet<String> set = new HashSet<String>();
    305         if (strings != null) {
    306             String[] split = strings.split(",");
    307             for (int i = 0; i < split.length; i++) {
    308                 set.add(split[i].trim().toUpperCase());
    309             }
    310         }
    311         return set;
    312     }
    313 
    314     /**
    315      * Parses a full name and returns components as a list of tokens.
    316      */
    317     public int tokenize(String[] tokens, String fullName) {
    318         if (fullName == null) {
    319             return 0;
    320         }
    321 
    322         NameTokenizer tokenizer = new NameTokenizer(fullName);
    323 
    324         if (tokenizer.mStartPointer == tokenizer.mEndPointer) {
    325             return 0;
    326         }
    327 
    328         String firstToken = tokenizer.mTokens[tokenizer.mStartPointer];
    329         if (mPrefixesSet.contains(firstToken.toUpperCase())) {
    330            tokenizer.mStartPointer++;
    331         }
    332         int count = 0;
    333         for (int i = tokenizer.mStartPointer; i < tokenizer.mEndPointer; i++) {
    334             tokens[count++] = tokenizer.mTokens[i];
    335         }
    336 
    337         return count;
    338     }
    339 
    340 
    341     /**
    342      * Parses a full name and returns parsed components in the Name object.
    343      */
    344     public void split(Name name, String fullName) {
    345         if (fullName == null) {
    346             return;
    347         }
    348 
    349         int fullNameStyle = guessFullNameStyle(fullName);
    350         if (fullNameStyle == FullNameStyle.CJK) {
    351             fullNameStyle = getAdjustedFullNameStyle(fullNameStyle);
    352         }
    353 
    354         split(name, fullName, fullNameStyle);
    355     }
    356 
    357     /**
    358      * Parses a full name and returns parsed components in the Name object
    359      * with a given fullNameStyle.
    360      */
    361     public void split(Name name, String fullName, int fullNameStyle) {
    362         if (fullName == null) {
    363             return;
    364         }
    365 
    366         name.fullNameStyle = fullNameStyle;
    367 
    368         switch (fullNameStyle) {
    369             case FullNameStyle.CHINESE:
    370                 splitChineseName(name, fullName);
    371                 break;
    372 
    373             case FullNameStyle.JAPANESE:
    374                 splitJapaneseName(name, fullName);
    375                 break;
    376 
    377             case FullNameStyle.KOREAN:
    378                 splitKoreanName(name, fullName);
    379                 break;
    380 
    381             default:
    382                 splitWesternName(name, fullName);
    383         }
    384     }
    385 
    386     /**
    387      * Splits a full name composed according to the Western tradition:
    388      * <pre>
    389      *   [prefix] given name(s) [[middle name] family name] [, suffix]
    390      *   [prefix] family name, given name [middle name] [,suffix]
    391      * </pre>
    392      */
    393     private void splitWesternName(Name name, String fullName) {
    394         NameTokenizer tokens = new NameTokenizer(fullName);
    395         parsePrefix(name, tokens);
    396 
    397         // If the name consists of just one or two tokens, treat them as first/last name,
    398         // not as suffix.  Example: John Ma; Ma is last name, not "M.A.".
    399         if (tokens.mEndPointer > 2) {
    400             parseSuffix(name, tokens);
    401         }
    402 
    403         if (name.prefix == null && tokens.mEndPointer - tokens.mStartPointer == 1) {
    404             name.givenNames = tokens.mTokens[tokens.mStartPointer];
    405         } else {
    406             parseLastName(name, tokens);
    407             parseMiddleName(name, tokens);
    408             parseGivenNames(name, tokens);
    409         }
    410     }
    411 
    412     /**
    413      * Splits a full name composed according to the Chinese tradition:
    414      * <pre>
    415      *   [family name [middle name]] given name
    416      * </pre>
    417      */
    418     private void splitChineseName(Name name, String fullName) {
    419         StringTokenizer tokenizer = new StringTokenizer(fullName);
    420         while (tokenizer.hasMoreTokens()) {
    421             String token = tokenizer.nextToken();
    422             if (name.givenNames == null) {
    423                 name.givenNames = token;
    424             } else if (name.familyName == null) {
    425                 name.familyName = name.givenNames;
    426                 name.givenNames = token;
    427             } else if (name.middleName == null) {
    428                 name.middleName = name.givenNames;
    429                 name.givenNames = token;
    430             } else {
    431                 name.middleName = name.middleName + name.givenNames;
    432                 name.givenNames = token;
    433             }
    434         }
    435 
    436         // If a single word parse that word up.
    437         if (name.givenNames != null && name.familyName == null && name.middleName == null) {
    438             int length = fullName.length();
    439             if (length == 2) {
    440                 name.familyName = fullName.substring(0, 1);
    441                 name.givenNames = fullName.substring(1);
    442             } else if (length == 3) {
    443                 name.familyName = fullName.substring(0, 1);
    444                 name.middleName = fullName.substring(1, 2);
    445                 name.givenNames = fullName.substring(2);
    446             } else if (length == 4) {
    447                 name.familyName = fullName.substring(0, 2);
    448                 name.middleName = fullName.substring(2, 3);
    449                 name.givenNames = fullName.substring(3);
    450             }
    451 
    452         }
    453     }
    454 
    455     /**
    456      * Splits a full name composed according to the Japanese tradition:
    457      * <pre>
    458      *   [family name] given name(s)
    459      * </pre>
    460      */
    461     private void splitJapaneseName(Name name, String fullName) {
    462         StringTokenizer tokenizer = new StringTokenizer(fullName);
    463         while (tokenizer.hasMoreTokens()) {
    464             String token = tokenizer.nextToken();
    465             if (name.givenNames == null) {
    466                 name.givenNames = token;
    467             } else if (name.familyName == null) {
    468                 name.familyName = name.givenNames;
    469                 name.givenNames = token;
    470             } else {
    471                 name.givenNames += " " + token;
    472             }
    473         }
    474     }
    475 
    476     /**
    477      * Splits a full name composed according to the Korean tradition:
    478      * <pre>
    479      *   [family name] given name(s)
    480      * </pre>
    481      */
    482     private void splitKoreanName(Name name, String fullName) {
    483         StringTokenizer tokenizer = new StringTokenizer(fullName);
    484         if (tokenizer.countTokens() > 1) {
    485             // Each name can be identified by separators.
    486             while (tokenizer.hasMoreTokens()) {
    487                 String token = tokenizer.nextToken();
    488                 if (name.givenNames == null) {
    489                     name.givenNames = token;
    490                 } else if (name.familyName == null) {
    491                     name.familyName = name.givenNames;
    492                     name.givenNames = token;
    493                 } else {
    494                     name.givenNames += " " + token;
    495                 }
    496             }
    497         } else {
    498             // There is no separator. Try to guess family name.
    499             // The length of most family names is 1.
    500             int familyNameLength = 1;
    501 
    502             // Compare with 2-length family names.
    503             for (String twoLengthFamilyName : KOREAN_TWO_CHARCTER_FAMILY_NAMES) {
    504                 if (fullName.startsWith(twoLengthFamilyName)) {
    505                     familyNameLength = 2;
    506                     break;
    507                 }
    508             }
    509 
    510             name.familyName = fullName.substring(0, familyNameLength);
    511             if (fullName.length() > familyNameLength) {
    512                 name.givenNames = fullName.substring(familyNameLength);
    513             }
    514         }
    515     }
    516 
    517     /**
    518      * Concatenates components of a name according to the rules dictated by the name style.
    519      *
    520      * @param givenNameFirst is ignored for CJK display name styles
    521      */
    522     public String join(Name name, boolean givenNameFirst, boolean includePrefix) {
    523         String prefix = includePrefix ? name.prefix : null;
    524         switch (name.fullNameStyle) {
    525             case FullNameStyle.CJK:
    526             case FullNameStyle.CHINESE:
    527             case FullNameStyle.KOREAN:
    528                 return join(prefix, name.familyName, name.middleName, name.givenNames,
    529                         name.suffix, false, false, false);
    530 
    531             case FullNameStyle.JAPANESE:
    532                 return join(prefix, name.familyName, name.middleName, name.givenNames,
    533                         name.suffix, true, false, false);
    534 
    535             default:
    536                 if (givenNameFirst) {
    537                     return join(prefix, name.givenNames, name.middleName, name.familyName,
    538                             name.suffix, true, false, true);
    539                 } else {
    540                     return join(prefix, name.familyName, name.givenNames, name.middleName,
    541                             name.suffix, true, true, true);
    542                 }
    543         }
    544     }
    545 
    546     /**
    547      * Concatenates components of the phonetic name following the CJK tradition:
    548      * family name + middle name + given name(s).
    549      */
    550     public String joinPhoneticName(Name name) {
    551         return join(null, name.phoneticFamilyName,
    552                 name.phoneticMiddleName, name.phoneticGivenName, null, true, false, false);
    553     }
    554 
    555     /**
    556      * Concatenates parts of a full name inserting spaces and commas as specified.
    557      */
    558     private String join(String prefix, String part1, String part2, String part3, String suffix,
    559             boolean useSpace, boolean useCommaAfterPart1, boolean useCommaAfterPart3) {
    560         prefix = prefix == null ? null: prefix.trim();
    561         part1 = part1 == null ? null: part1.trim();
    562         part2 = part2 == null ? null: part2.trim();
    563         part3 = part3 == null ? null: part3.trim();
    564         suffix = suffix == null ? null: suffix.trim();
    565 
    566         boolean hasPrefix = !TextUtils.isEmpty(prefix);
    567         boolean hasPart1 = !TextUtils.isEmpty(part1);
    568         boolean hasPart2 = !TextUtils.isEmpty(part2);
    569         boolean hasPart3 = !TextUtils.isEmpty(part3);
    570         boolean hasSuffix = !TextUtils.isEmpty(suffix);
    571 
    572         boolean isSingleWord = true;
    573         String singleWord = null;
    574 
    575         if (hasPrefix) {
    576             singleWord = prefix;
    577         }
    578 
    579         if (hasPart1) {
    580             if (singleWord != null) {
    581                 isSingleWord = false;
    582             } else {
    583                 singleWord = part1;
    584             }
    585         }
    586 
    587         if (hasPart2) {
    588             if (singleWord != null) {
    589                 isSingleWord = false;
    590             } else {
    591                 singleWord = part2;
    592             }
    593         }
    594 
    595         if (hasPart3) {
    596             if (singleWord != null) {
    597                 isSingleWord = false;
    598             } else {
    599                 singleWord = part3;
    600             }
    601         }
    602 
    603         if (hasSuffix) {
    604             if (singleWord != null) {
    605                 isSingleWord = false;
    606             } else {
    607                 singleWord = normalizedSuffix(suffix);
    608             }
    609         }
    610 
    611         if (isSingleWord) {
    612             return singleWord;
    613         }
    614 
    615         StringBuilder sb = new StringBuilder();
    616 
    617         if (hasPrefix) {
    618             sb.append(prefix);
    619         }
    620 
    621         if (hasPart1) {
    622             if (hasPrefix) {
    623                 sb.append(' ');
    624             }
    625             sb.append(part1);
    626         }
    627 
    628         if (hasPart2) {
    629             if (hasPrefix || hasPart1) {
    630                 if (useCommaAfterPart1) {
    631                     sb.append(',');
    632                 }
    633                 if (useSpace) {
    634                     sb.append(' ');
    635                 }
    636             }
    637             sb.append(part2);
    638         }
    639 
    640         if (hasPart3) {
    641             if (hasPrefix || hasPart1 || hasPart2) {
    642                 if (useSpace) {
    643                     sb.append(' ');
    644                 }
    645             }
    646             sb.append(part3);
    647         }
    648 
    649         if (hasSuffix) {
    650             if (hasPrefix || hasPart1 || hasPart2 || hasPart3) {
    651                 if (useCommaAfterPart3) {
    652                     sb.append(',');
    653                 }
    654                 if (useSpace) {
    655                     sb.append(' ');
    656                 }
    657             }
    658             sb.append(normalizedSuffix(suffix));
    659         }
    660 
    661         return sb.toString();
    662     }
    663 
    664     /**
    665      * Puts a dot after the supplied suffix if that is the accepted form of the suffix,
    666      * e.g. "Jr." and "Sr.", but not "I", "II" and "III".
    667      */
    668     private String normalizedSuffix(String suffix) {
    669         int length = suffix.length();
    670         if (length == 0 || suffix.charAt(length - 1) == '.') {
    671             return suffix;
    672         }
    673 
    674         String withDot = suffix + '.';
    675         if (mSuffixesSet.contains(withDot.toUpperCase())) {
    676             return withDot;
    677         } else {
    678             return suffix;
    679         }
    680     }
    681 
    682     /**
    683      * If the supplied name style is undefined, returns a default based on the language,
    684      * otherwise returns the supplied name style itself.
    685      *
    686      * @param nameStyle See {@link FullNameStyle}.
    687      */
    688     public int getAdjustedFullNameStyle(int nameStyle) {
    689         if (nameStyle == FullNameStyle.UNDEFINED) {
    690             if (JAPANESE_LANGUAGE.equals(mLanguage)) {
    691                 return FullNameStyle.JAPANESE;
    692             } else if (KOREAN_LANGUAGE.equals(mLanguage)) {
    693                 return FullNameStyle.KOREAN;
    694             } else if (CHINESE_LANGUAGE.equals(mLanguage)) {
    695                 return FullNameStyle.CHINESE;
    696             } else {
    697                 return FullNameStyle.WESTERN;
    698             }
    699         } else if (nameStyle == FullNameStyle.CJK) {
    700             if (JAPANESE_LANGUAGE.equals(mLanguage)) {
    701                 return FullNameStyle.JAPANESE;
    702             } else if (KOREAN_LANGUAGE.equals(mLanguage)) {
    703                 return FullNameStyle.KOREAN;
    704             } else {
    705                 return FullNameStyle.CHINESE;
    706             }
    707         }
    708         return nameStyle;
    709     }
    710 
    711     /**
    712      * Parses the first word from the name if it is a prefix.
    713      */
    714     private void parsePrefix(Name name, NameTokenizer tokens) {
    715         if (tokens.mStartPointer == tokens.mEndPointer) {
    716             return;
    717         }
    718 
    719         String firstToken = tokens.mTokens[tokens.mStartPointer];
    720         if (mPrefixesSet.contains(firstToken.toUpperCase())) {
    721             if (tokens.hasDot(tokens.mStartPointer)) {
    722                 firstToken += '.';
    723             }
    724             name.prefix = firstToken;
    725             tokens.mStartPointer++;
    726         }
    727     }
    728 
    729     /**
    730      * Parses the last word(s) from the name if it is a suffix.
    731      */
    732     private void parseSuffix(Name name, NameTokenizer tokens) {
    733         if (tokens.mStartPointer == tokens.mEndPointer) {
    734             return;
    735         }
    736 
    737         String lastToken = tokens.mTokens[tokens.mEndPointer - 1];
    738 
    739         // Take care of an explicit comma-separated suffix
    740         if (tokens.mEndPointer - tokens.mStartPointer > 2
    741                 && tokens.hasComma(tokens.mEndPointer - 2)) {
    742             if (tokens.hasDot(tokens.mEndPointer - 1)) {
    743                 lastToken += '.';
    744             }
    745             name.suffix = lastToken;
    746             tokens.mEndPointer--;
    747             return;
    748         }
    749 
    750         if (lastToken.length() > mMaxSuffixLength) {
    751             return;
    752         }
    753 
    754         String normalized = lastToken.toUpperCase();
    755         if (mSuffixesSet.contains(normalized)) {
    756             name.suffix = lastToken;
    757             tokens.mEndPointer--;
    758             return;
    759         }
    760 
    761         if (tokens.hasDot(tokens.mEndPointer - 1)) {
    762             lastToken += '.';
    763         }
    764         normalized += ".";
    765 
    766         // Take care of suffixes like M.D. and D.D.S.
    767         int pos = tokens.mEndPointer - 1;
    768         while (normalized.length() <= mMaxSuffixLength) {
    769 
    770             if (mSuffixesSet.contains(normalized)) {
    771                 name.suffix = lastToken;
    772                 tokens.mEndPointer = pos;
    773                 return;
    774             }
    775 
    776             if (pos == tokens.mStartPointer) {
    777                 break;
    778             }
    779 
    780             pos--;
    781             if (tokens.hasDot(pos)) {
    782                 lastToken = tokens.mTokens[pos] + "." + lastToken;
    783             } else {
    784                 lastToken = tokens.mTokens[pos] + " " + lastToken;
    785             }
    786 
    787             normalized = tokens.mTokens[pos].toUpperCase() + "." + normalized;
    788         }
    789     }
    790 
    791     private void parseLastName(Name name, NameTokenizer tokens) {
    792         if (tokens.mStartPointer == tokens.mEndPointer) {
    793             return;
    794         }
    795 
    796         // If the first word is followed by a comma, assume that it's the family name
    797         if (tokens.hasComma(tokens.mStartPointer)) {
    798            name.familyName = tokens.mTokens[tokens.mStartPointer];
    799            tokens.mStartPointer++;
    800            return;
    801         }
    802 
    803         // If the second word is followed by a comma and the first word
    804         // is a last name prefix as in "de Sade" and "von Cliburn", treat
    805         // the first two words as the family name.
    806         if (tokens.mStartPointer + 1 < tokens.mEndPointer
    807                 && tokens.hasComma(tokens.mStartPointer + 1)
    808                 && isFamilyNamePrefix(tokens.mTokens[tokens.mStartPointer])) {
    809             String familyNamePrefix = tokens.mTokens[tokens.mStartPointer];
    810             if (tokens.hasDot(tokens.mStartPointer)) {
    811                 familyNamePrefix += '.';
    812             }
    813             name.familyName = familyNamePrefix + " " + tokens.mTokens[tokens.mStartPointer + 1];
    814             tokens.mStartPointer += 2;
    815             return;
    816         }
    817 
    818         // Finally, assume that the last word is the last name
    819         name.familyName = tokens.mTokens[tokens.mEndPointer - 1];
    820         tokens.mEndPointer--;
    821 
    822         // Take care of last names like "de Sade" and "von Cliburn"
    823         if ((tokens.mEndPointer - tokens.mStartPointer) > 0) {
    824             String lastNamePrefix = tokens.mTokens[tokens.mEndPointer - 1];
    825             if (isFamilyNamePrefix(lastNamePrefix)) {
    826                 if (tokens.hasDot(tokens.mEndPointer - 1)) {
    827                     lastNamePrefix += '.';
    828                 }
    829                 name.familyName = lastNamePrefix + " " + name.familyName;
    830                 tokens.mEndPointer--;
    831             }
    832         }
    833     }
    834 
    835     /**
    836      * Returns true if the supplied word is an accepted last name prefix, e.g. "von", "de"
    837      */
    838     private boolean isFamilyNamePrefix(String word) {
    839         final String normalized = word.toUpperCase();
    840 
    841         return mLastNamePrefixesSet.contains(normalized)
    842                 || mLastNamePrefixesSet.contains(normalized + ".");
    843     }
    844 
    845 
    846     private void parseMiddleName(Name name, NameTokenizer tokens) {
    847         if (tokens.mStartPointer == tokens.mEndPointer) {
    848             return;
    849         }
    850 
    851         if ((tokens.mEndPointer - tokens.mStartPointer) > 1) {
    852             if ((tokens.mEndPointer - tokens.mStartPointer) == 2
    853                     || !mConjuctions.contains(tokens.mTokens[tokens.mEndPointer - 2].
    854                             toUpperCase())) {
    855                 name.middleName = tokens.mTokens[tokens.mEndPointer - 1];
    856                 if (tokens.hasDot(tokens.mEndPointer - 1)) {
    857                     name.middleName += '.';
    858                 }
    859                 tokens.mEndPointer--;
    860             }
    861         }
    862     }
    863 
    864     private void parseGivenNames(Name name, NameTokenizer tokens) {
    865         if (tokens.mStartPointer == tokens.mEndPointer) {
    866             return;
    867         }
    868 
    869         if ((tokens.mEndPointer - tokens.mStartPointer) == 1) {
    870             name.givenNames = tokens.mTokens[tokens.mStartPointer];
    871         } else {
    872             StringBuilder sb = new StringBuilder();
    873             for (int i = tokens.mStartPointer; i < tokens.mEndPointer; i++) {
    874                 if (i != tokens.mStartPointer) {
    875                     sb.append(' ');
    876                 }
    877                 sb.append(tokens.mTokens[i]);
    878                 if (tokens.hasDot(i)) {
    879                     sb.append('.');
    880                 }
    881             }
    882             name.givenNames = sb.toString();
    883         }
    884     }
    885 
    886     /**
    887      * Makes the best guess at the expected full name style based on the character set
    888      * used in the supplied name.  If the phonetic name is also supplied, tries to
    889      * differentiate between Chinese, Japanese and Korean based on the alphabet used
    890      * for the phonetic name.
    891      */
    892     public void guessNameStyle(Name name) {
    893         guessFullNameStyle(name);
    894         guessPhoneticNameStyle(name);
    895         name.fullNameStyle = getAdjustedNameStyleBasedOnPhoneticNameStyle(name.fullNameStyle,
    896                 name.phoneticNameStyle);
    897     }
    898 
    899     /**
    900      * Updates the display name style according to the phonetic name style if we
    901      * were unsure about display name style based on the name components, but
    902      * phonetic name makes it more definitive.
    903      */
    904     public int getAdjustedNameStyleBasedOnPhoneticNameStyle(int nameStyle, int phoneticNameStyle) {
    905         if (phoneticNameStyle != PhoneticNameStyle.UNDEFINED) {
    906             if (nameStyle == FullNameStyle.UNDEFINED || nameStyle == FullNameStyle.CJK) {
    907                 if (phoneticNameStyle == PhoneticNameStyle.JAPANESE) {
    908                     return FullNameStyle.JAPANESE;
    909                 } else if (phoneticNameStyle == PhoneticNameStyle.KOREAN) {
    910                     return FullNameStyle.KOREAN;
    911                 }
    912                 if (nameStyle == FullNameStyle.CJK && phoneticNameStyle == PhoneticNameStyle.PINYIN) {
    913                     return FullNameStyle.CHINESE;
    914                 }
    915             }
    916         }
    917         return nameStyle;
    918     }
    919 
    920     /**
    921      * Makes the best guess at the expected full name style based on the character set
    922      * used in the supplied name.
    923      */
    924     private void guessFullNameStyle(NameSplitter.Name name) {
    925         if (name.fullNameStyle != FullNameStyle.UNDEFINED) {
    926             return;
    927         }
    928 
    929         int bestGuess = guessFullNameStyle(name.givenNames);
    930         // A mix of Hanzi and latin chars are common in China, so we have to go through all names
    931         // if the name is not JANPANESE or KOREAN.
    932         if (bestGuess != FullNameStyle.UNDEFINED && bestGuess != FullNameStyle.CJK
    933                 && bestGuess != FullNameStyle.WESTERN) {
    934             name.fullNameStyle = bestGuess;
    935             return;
    936         }
    937 
    938         int guess = guessFullNameStyle(name.familyName);
    939         if (guess != FullNameStyle.UNDEFINED) {
    940             if (guess != FullNameStyle.CJK && guess != FullNameStyle.WESTERN) {
    941                 name.fullNameStyle = guess;
    942                 return;
    943             }
    944             bestGuess = guess;
    945         }
    946 
    947         guess = guessFullNameStyle(name.middleName);
    948         if (guess != FullNameStyle.UNDEFINED) {
    949             if (guess != FullNameStyle.CJK && guess != FullNameStyle.WESTERN) {
    950                 name.fullNameStyle = guess;
    951                 return;
    952             }
    953             bestGuess = guess;
    954         }
    955 
    956         guess = guessFullNameStyle(name.prefix);
    957         if (guess != FullNameStyle.UNDEFINED) {
    958             if (guess != FullNameStyle.CJK && guess != FullNameStyle.WESTERN) {
    959                 name.fullNameStyle = guess;
    960                 return;
    961             }
    962             bestGuess = guess;
    963         }
    964 
    965         guess = guessFullNameStyle(name.suffix);
    966         if (guess != FullNameStyle.UNDEFINED) {
    967             if (guess != FullNameStyle.CJK && guess != FullNameStyle.WESTERN) {
    968                 name.fullNameStyle = guess;
    969                 return;
    970             }
    971             bestGuess = guess;
    972         }
    973 
    974         name.fullNameStyle = bestGuess;
    975     }
    976 
    977     public int guessFullNameStyle(String name) {
    978         if (name == null) {
    979             return FullNameStyle.UNDEFINED;
    980         }
    981 
    982         int nameStyle = FullNameStyle.UNDEFINED;
    983         int length = name.length();
    984         int offset = 0;
    985         while (offset < length) {
    986             int codePoint = Character.codePointAt(name, offset);
    987             if (Character.isLetter(codePoint)) {
    988                 UnicodeBlock unicodeBlock = UnicodeBlock.of(codePoint);
    989 
    990                 if (!isLatinUnicodeBlock(unicodeBlock)) {
    991 
    992                     if (isCJKUnicodeBlock(unicodeBlock)) {
    993                         // We don't know if this is Chinese, Japanese or Korean -
    994                         // trying to figure out by looking at other characters in the name
    995                         return guessCJKNameStyle(name, offset + Character.charCount(codePoint));
    996                     }
    997 
    998                     if (isJapanesePhoneticUnicodeBlock(unicodeBlock)) {
    999                         return FullNameStyle.JAPANESE;
   1000                     }
   1001 
   1002                     if (isKoreanUnicodeBlock(unicodeBlock)) {
   1003                         return FullNameStyle.KOREAN;
   1004                     }
   1005                 }
   1006                 nameStyle = FullNameStyle.WESTERN;
   1007             }
   1008             offset += Character.charCount(codePoint);
   1009         }
   1010         return nameStyle;
   1011     }
   1012 
   1013     private int guessCJKNameStyle(String name, int offset) {
   1014         int length = name.length();
   1015         while (offset < length) {
   1016             int codePoint = Character.codePointAt(name, offset);
   1017             if (Character.isLetter(codePoint)) {
   1018                 UnicodeBlock unicodeBlock = UnicodeBlock.of(codePoint);
   1019                 if (isJapanesePhoneticUnicodeBlock(unicodeBlock)) {
   1020                     return FullNameStyle.JAPANESE;
   1021                 }
   1022                 if (isKoreanUnicodeBlock(unicodeBlock)) {
   1023                     return FullNameStyle.KOREAN;
   1024                 }
   1025             }
   1026             offset += Character.charCount(codePoint);
   1027         }
   1028 
   1029         return FullNameStyle.CJK;
   1030     }
   1031 
   1032     private void guessPhoneticNameStyle(NameSplitter.Name name) {
   1033         if (name.phoneticNameStyle != PhoneticNameStyle.UNDEFINED) {
   1034             return;
   1035         }
   1036 
   1037         int bestGuess = guessPhoneticNameStyle(name.phoneticFamilyName);
   1038         if (bestGuess != FullNameStyle.UNDEFINED && bestGuess != FullNameStyle.CJK) {
   1039             name.phoneticNameStyle = bestGuess;
   1040             return;
   1041         }
   1042 
   1043         int guess = guessPhoneticNameStyle(name.phoneticGivenName);
   1044         if (guess != FullNameStyle.UNDEFINED) {
   1045             if (guess != FullNameStyle.CJK) {
   1046                 name.phoneticNameStyle = guess;
   1047                 return;
   1048             }
   1049             bestGuess = guess;
   1050         }
   1051 
   1052         guess = guessPhoneticNameStyle(name.phoneticMiddleName);
   1053         if (guess != FullNameStyle.UNDEFINED) {
   1054             if (guess != FullNameStyle.CJK) {
   1055                 name.phoneticNameStyle = guess;
   1056                 return;
   1057             }
   1058             bestGuess = guess;
   1059         }
   1060     }
   1061 
   1062     public int guessPhoneticNameStyle(String name) {
   1063         if (name == null) {
   1064             return PhoneticNameStyle.UNDEFINED;
   1065         }
   1066 
   1067         int nameStyle = PhoneticNameStyle.UNDEFINED;
   1068         int length = name.length();
   1069         int offset = 0;
   1070         while (offset < length) {
   1071             int codePoint = Character.codePointAt(name, offset);
   1072             if (Character.isLetter(codePoint)) {
   1073                 UnicodeBlock unicodeBlock = UnicodeBlock.of(codePoint);
   1074                 if (isJapanesePhoneticUnicodeBlock(unicodeBlock)) {
   1075                     return PhoneticNameStyle.JAPANESE;
   1076                 }
   1077                 if (isKoreanUnicodeBlock(unicodeBlock)) {
   1078                     return PhoneticNameStyle.KOREAN;
   1079                 }
   1080                 if (isLatinUnicodeBlock(unicodeBlock)) {
   1081                     return PhoneticNameStyle.PINYIN;
   1082                 }
   1083             }
   1084             offset += Character.charCount(codePoint);
   1085         }
   1086 
   1087         return nameStyle;
   1088     }
   1089 
   1090     private static boolean isLatinUnicodeBlock(UnicodeBlock unicodeBlock) {
   1091         return unicodeBlock == UnicodeBlock.BASIC_LATIN ||
   1092                 unicodeBlock == UnicodeBlock.LATIN_1_SUPPLEMENT ||
   1093                 unicodeBlock == UnicodeBlock.LATIN_EXTENDED_A ||
   1094                 unicodeBlock == UnicodeBlock.LATIN_EXTENDED_B ||
   1095                 unicodeBlock == UnicodeBlock.LATIN_EXTENDED_ADDITIONAL;
   1096     }
   1097 
   1098     private static boolean isCJKUnicodeBlock(UnicodeBlock block) {
   1099         return block == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS
   1100                 || block == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
   1101                 || block == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B
   1102                 || block == UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION
   1103                 || block == UnicodeBlock.CJK_RADICALS_SUPPLEMENT
   1104                 || block == UnicodeBlock.CJK_COMPATIBILITY
   1105                 || block == UnicodeBlock.CJK_COMPATIBILITY_FORMS
   1106                 || block == UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS
   1107                 || block == UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT;
   1108     }
   1109 
   1110     private static boolean isKoreanUnicodeBlock(UnicodeBlock unicodeBlock) {
   1111         return unicodeBlock == UnicodeBlock.HANGUL_SYLLABLES ||
   1112                 unicodeBlock == UnicodeBlock.HANGUL_JAMO ||
   1113                 unicodeBlock == UnicodeBlock.HANGUL_COMPATIBILITY_JAMO;
   1114     }
   1115 
   1116     private static boolean isJapanesePhoneticUnicodeBlock(UnicodeBlock unicodeBlock) {
   1117         return unicodeBlock == UnicodeBlock.KATAKANA ||
   1118                 unicodeBlock == UnicodeBlock.KATAKANA_PHONETIC_EXTENSIONS ||
   1119                 unicodeBlock == UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS ||
   1120                 unicodeBlock == UnicodeBlock.HIRAGANA;
   1121     }
   1122 }
   1123