Home | History | Annotate | Download | only in contacts
      1 /*
      2  * Copyright (C) 2009 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License
     15  */
     16 package com.android.providers.contacts;
     17 
     18 import com.android.internal.util.HanziToPinyin;
     19 import com.android.internal.util.HanziToPinyin.Token;
     20 
     21 import android.content.ContentValues;
     22 import android.provider.ContactsContract.FullNameStyle;
     23 import android.provider.ContactsContract.PhoneticNameStyle;
     24 import android.provider.ContactsContract.CommonDataKinds.StructuredName;
     25 import android.text.TextUtils;
     26 
     27 import java.lang.Character.UnicodeBlock;
     28 import java.util.ArrayList;
     29 import java.util.HashSet;
     30 import java.util.Locale;
     31 import java.util.StringTokenizer;
     32 
     33 /**
     34  * The purpose of this class is to split a full name into given names and last
     35  * name. The logic only supports having a single last name. If the full name has
     36  * multiple last names the output will be incorrect.
     37  * <p>
     38  * Core algorithm:
     39  * <ol>
     40  * <li>Remove the suffixes (III, Ph.D., M.D.).</li>
     41  * <li>Remove the prefixes (Mr., Pastor, Reverend, Sir).</li>
     42  * <li>Assign the last remaining token as the last name.</li>
     43  * <li>If the previous word to the last name is one from LASTNAME_PREFIXES, use
     44  * this word also as the last name.</li>
     45  * <li>Assign the rest of the words as the "given names".</li>
     46  * </ol>
     47  */
     48 public class NameSplitter {
     49 
     50     public static final int MAX_TOKENS = 10;
     51 
     52     private static final String JAPANESE_LANGUAGE = Locale.JAPANESE.getLanguage().toLowerCase();
     53     private static final String KOREAN_LANGUAGE = Locale.KOREAN.getLanguage().toLowerCase();
     54 
     55     // This includes simplified and traditional Chinese
     56     private static final String CHINESE_LANGUAGE = Locale.CHINESE.getLanguage().toLowerCase();
     57 
     58     private final HashSet<String> mPrefixesSet;
     59     private final HashSet<String> mSuffixesSet;
     60     private final int mMaxSuffixLength;
     61     private final HashSet<String> mLastNamePrefixesSet;
     62     private final HashSet<String> mConjuctions;
     63     private final Locale mLocale;
     64     private final String mLanguage;
     65 
     66     public static class Name {
     67         public String prefix;
     68         public String givenNames;
     69         public String middleName;
     70         public String familyName;
     71         public String suffix;
     72 
     73         public int fullNameStyle;
     74 
     75         public String phoneticFamilyName;
     76         public String phoneticMiddleName;
     77         public String phoneticGivenName;
     78 
     79         public int phoneticNameStyle;
     80 
     81         public Name() {
     82         }
     83 
     84         public Name(String prefix, String givenNames, String middleName, String familyName,
     85                 String suffix) {
     86             this.prefix = prefix;
     87             this.givenNames = givenNames;
     88             this.middleName = middleName;
     89             this.familyName = familyName;
     90             this.suffix = suffix;
     91         }
     92 
     93         public String getPrefix() {
     94             return prefix;
     95         }
     96 
     97         public String getGivenNames() {
     98             return givenNames;
     99         }
    100 
    101         public String getMiddleName() {
    102             return middleName;
    103         }
    104 
    105         public String getFamilyName() {
    106             return familyName;
    107         }
    108 
    109         public String getSuffix() {
    110             return suffix;
    111         }
    112 
    113         public int getFullNameStyle() {
    114             return fullNameStyle;
    115         }
    116 
    117         public String getPhoneticFamilyName() {
    118             return phoneticFamilyName;
    119         }
    120 
    121         public String getPhoneticMiddleName() {
    122             return phoneticMiddleName;
    123         }
    124 
    125         public String getPhoneticGivenName() {
    126             return phoneticGivenName;
    127         }
    128 
    129         public int getPhoneticNameStyle() {
    130             return phoneticNameStyle;
    131         }
    132 
    133         public void fromValues(ContentValues values) {
    134             prefix = values.getAsString(StructuredName.PREFIX);
    135             givenNames = values.getAsString(StructuredName.GIVEN_NAME);
    136             middleName = values.getAsString(StructuredName.MIDDLE_NAME);
    137             familyName = values.getAsString(StructuredName.FAMILY_NAME);
    138             suffix = values.getAsString(StructuredName.SUFFIX);
    139 
    140             Integer integer = values.getAsInteger(StructuredName.FULL_NAME_STYLE);
    141             fullNameStyle = integer == null ? FullNameStyle.UNDEFINED : integer;
    142 
    143             phoneticFamilyName = values.getAsString(StructuredName.PHONETIC_FAMILY_NAME);
    144             phoneticMiddleName = values.getAsString(StructuredName.PHONETIC_MIDDLE_NAME);
    145             phoneticGivenName = values.getAsString(StructuredName.PHONETIC_GIVEN_NAME);
    146 
    147             integer = values.getAsInteger(StructuredName.PHONETIC_NAME_STYLE);
    148             phoneticNameStyle = integer == null ? PhoneticNameStyle.UNDEFINED : integer;
    149         }
    150 
    151         public void toValues(ContentValues values) {
    152             putValueIfPresent(values, StructuredName.PREFIX, prefix);
    153             putValueIfPresent(values, StructuredName.GIVEN_NAME, givenNames);
    154             putValueIfPresent(values, StructuredName.MIDDLE_NAME, middleName);
    155             putValueIfPresent(values, StructuredName.FAMILY_NAME, familyName);
    156             putValueIfPresent(values, StructuredName.SUFFIX, suffix);
    157             values.put(StructuredName.FULL_NAME_STYLE, fullNameStyle);
    158             putValueIfPresent(values, StructuredName.PHONETIC_FAMILY_NAME, phoneticFamilyName);
    159             putValueIfPresent(values, StructuredName.PHONETIC_MIDDLE_NAME, phoneticMiddleName);
    160             putValueIfPresent(values, StructuredName.PHONETIC_GIVEN_NAME, phoneticGivenName);
    161             values.put(StructuredName.PHONETIC_NAME_STYLE, phoneticNameStyle);
    162         }
    163 
    164         private void putValueIfPresent(ContentValues values, String name, String value) {
    165             if (value != null) {
    166                 values.put(name, value);
    167             }
    168         }
    169 
    170         public void clear() {
    171             prefix = null;
    172             givenNames = null;
    173             middleName = null;
    174             familyName = null;
    175             suffix = null;
    176             fullNameStyle = FullNameStyle.UNDEFINED;
    177             phoneticFamilyName = null;
    178             phoneticMiddleName = null;
    179             phoneticGivenName = null;
    180             phoneticNameStyle = PhoneticNameStyle.UNDEFINED;
    181         }
    182 
    183         public boolean isEmpty() {
    184             return TextUtils.isEmpty(givenNames)
    185                     && TextUtils.isEmpty(middleName)
    186                     && TextUtils.isEmpty(familyName)
    187                     && TextUtils.isEmpty(suffix)
    188                     && TextUtils.isEmpty(phoneticFamilyName)
    189                     && TextUtils.isEmpty(phoneticMiddleName)
    190                     && TextUtils.isEmpty(phoneticGivenName);
    191         }
    192 
    193         @Override
    194         public String toString() {
    195             return "[given: " + givenNames + " middle: " + middleName + " family: " + familyName
    196                     + " ph/given: " + phoneticGivenName + " ph/middle: " + phoneticMiddleName
    197                     + " ph/family: " + phoneticFamilyName + "]";
    198         }
    199 
    200     }
    201 
    202     private static class NameTokenizer extends StringTokenizer {
    203         private final String[] mTokens;
    204         private int mDotBitmask;
    205         private int mCommaBitmask;
    206         private int mStartPointer;
    207         private int mEndPointer;
    208 
    209         public NameTokenizer(String fullName) {
    210             super(fullName, " .,", true);
    211 
    212             mTokens = new String[MAX_TOKENS];
    213 
    214             // Iterate over tokens, skipping over empty ones and marking tokens that
    215             // are followed by dots.
    216             while (hasMoreTokens() && mEndPointer < MAX_TOKENS) {
    217                 final String token = nextToken();
    218                 if (token.length() > 0) {
    219                     final char c = token.charAt(0);
    220                     if (c == ' ') {
    221                         continue;
    222                     }
    223                 }
    224 
    225                 if (mEndPointer > 0 && token.charAt(0) == '.') {
    226                     mDotBitmask |= (1 << (mEndPointer - 1));
    227                 } else if (mEndPointer > 0 && token.charAt(0) == ',') {
    228                     mCommaBitmask |= (1 << (mEndPointer - 1));
    229                 } else {
    230                     mTokens[mEndPointer] = token;
    231                     mEndPointer++;
    232                 }
    233             }
    234         }
    235 
    236         /**
    237          * Returns true if the token is followed by a dot in the original full name.
    238          */
    239         public boolean hasDot(int index) {
    240             return (mDotBitmask & (1 << index)) != 0;
    241         }
    242 
    243         /**
    244          * Returns true if the token is followed by a comma in the original full name.
    245          */
    246         public boolean hasComma(int index) {
    247             return (mCommaBitmask & (1 << index)) != 0;
    248         }
    249     }
    250 
    251     /**
    252      * Constructor.
    253      *
    254      * @param commonPrefixes comma-separated list of common prefixes,
    255      *            e.g. "Mr, Ms, Mrs"
    256      * @param commonLastNamePrefixes comma-separated list of common last name prefixes,
    257      *            e.g. "d', st, st., von"
    258      * @param commonSuffixes comma-separated list of common suffixes,
    259      *            e.g. "Jr, M.D., MD, D.D.S."
    260      * @param commonConjunctions comma-separated list of common conjuctions,
    261      *            e.g. "AND, Or"
    262      */
    263     public NameSplitter(String commonPrefixes, String commonLastNamePrefixes,
    264             String commonSuffixes, String commonConjunctions, Locale locale) {
    265         // TODO: refactor this to use <string-array> resources
    266         mPrefixesSet = convertToSet(commonPrefixes);
    267         mLastNamePrefixesSet = convertToSet(commonLastNamePrefixes);
    268         mSuffixesSet = convertToSet(commonSuffixes);
    269         mConjuctions = convertToSet(commonConjunctions);
    270         mLocale = locale != null ? locale : Locale.getDefault();
    271         mLanguage = mLocale.getLanguage().toLowerCase();
    272 
    273         int maxLength = 0;
    274         for (String suffix : mSuffixesSet) {
    275             if (suffix.length() > maxLength) {
    276                 maxLength = suffix.length();
    277             }
    278         }
    279 
    280         mMaxSuffixLength = maxLength;
    281     }
    282 
    283     /**
    284      * Converts a comma-separated list of Strings to a set of Strings. Trims strings
    285      * and converts them to upper case.
    286      */
    287     private static HashSet<String> convertToSet(String strings) {
    288         HashSet<String> set = new HashSet<String>();
    289         if (strings != null) {
    290             String[] split = strings.split(",");
    291             for (int i = 0; i < split.length; i++) {
    292                 set.add(split[i].trim().toUpperCase());
    293             }
    294         }
    295         return set;
    296     }
    297 
    298     /**
    299      * Parses a full name and returns components as a list of tokens.
    300      */
    301     public int tokenize(String[] tokens, String fullName) {
    302         if (fullName == null) {
    303             return 0;
    304         }
    305 
    306         NameTokenizer tokenizer = new NameTokenizer(fullName);
    307 
    308         if (tokenizer.mStartPointer == tokenizer.mEndPointer) {
    309             return 0;
    310         }
    311 
    312         String firstToken = tokenizer.mTokens[tokenizer.mStartPointer];
    313         if (mPrefixesSet.contains(firstToken.toUpperCase())) {
    314            tokenizer.mStartPointer++;
    315         }
    316         int count = 0;
    317         for (int i = tokenizer.mStartPointer; i < tokenizer.mEndPointer; i++) {
    318             tokens[count++] = tokenizer.mTokens[i];
    319         }
    320 
    321         return count;
    322     }
    323 
    324 
    325     /**
    326      * Parses a full name and returns parsed components in the Name object.
    327      */
    328     public void split(Name name, String fullName) {
    329         if (fullName == null) {
    330             return;
    331         }
    332 
    333         int fullNameStyle = guessFullNameStyle(fullName);
    334         if (fullNameStyle == FullNameStyle.CJK) {
    335             fullNameStyle = getAdjustedFullNameStyle(fullNameStyle);
    336         }
    337 
    338         name.fullNameStyle = fullNameStyle;
    339 
    340         switch (fullNameStyle) {
    341             case FullNameStyle.CHINESE:
    342                 splitChineseName(name, fullName);
    343                 break;
    344 
    345             case FullNameStyle.JAPANESE:
    346             case FullNameStyle.KOREAN:
    347                 splitJapaneseOrKoreanName(name, fullName);
    348                 break;
    349 
    350             default:
    351                 splitWesternName(name, fullName);
    352         }
    353     }
    354 
    355     /**
    356      * Splits a full name composed according to the Western tradition:
    357      * <pre>
    358      *   [prefix] given name(s) [[middle name] family name] [, suffix]
    359      *   [prefix] family name, given name [middle name] [,suffix]
    360      * </pre>
    361      */
    362     private void splitWesternName(Name name, String fullName) {
    363         NameTokenizer tokens = new NameTokenizer(fullName);
    364         parsePrefix(name, tokens);
    365 
    366         // If the name consists of just one or two tokens, treat them as first/last name,
    367         // not as suffix.  Example: John Ma; Ma is last name, not "M.A.".
    368         if (tokens.mEndPointer > 2) {
    369             parseSuffix(name, tokens);
    370         }
    371 
    372         if (name.prefix == null && tokens.mEndPointer - tokens.mStartPointer == 1) {
    373             name.givenNames = tokens.mTokens[tokens.mStartPointer];
    374         } else {
    375             parseLastName(name, tokens);
    376             parseMiddleName(name, tokens);
    377             parseGivenNames(name, tokens);
    378         }
    379     }
    380 
    381     /**
    382      * Splits a full name composed according to the Chinese tradition:
    383      * <pre>
    384      *   [family name [middle name]] given name
    385      * </pre>
    386      */
    387     private void splitChineseName(Name name, String fullName) {
    388         StringTokenizer tokenizer = new StringTokenizer(fullName);
    389         while (tokenizer.hasMoreTokens()) {
    390             String token = tokenizer.nextToken();
    391             if (name.givenNames == null) {
    392                 name.givenNames = token;
    393             } else if (name.familyName == null) {
    394                 name.familyName = name.givenNames;
    395                 name.givenNames = token;
    396             } else if (name.middleName == null) {
    397                 name.middleName = name.givenNames;
    398                 name.givenNames = token;
    399             } else {
    400                 name.middleName = name.middleName + name.givenNames;
    401                 name.givenNames = token;
    402             }
    403         }
    404 
    405         // If a single word parse that word up.
    406         if (name.givenNames != null && name.familyName == null && name.middleName == null) {
    407             int length = fullName.length();
    408             if (length == 2) {
    409                 name.familyName = fullName.substring(0, 1);
    410                 name.givenNames = fullName.substring(1);
    411             } else if (length == 3) {
    412                 name.familyName = fullName.substring(0, 1);
    413                 name.middleName = fullName.substring(1, 2);
    414                 name.givenNames = fullName.substring(2);
    415             } else if (length == 4) {
    416                 name.familyName = fullName.substring(0, 2);
    417                 name.middleName = fullName.substring(2, 3);
    418                 name.givenNames = fullName.substring(3);
    419             }
    420 
    421         }
    422     }
    423 
    424     /**
    425      * Splits a full name composed according to the Japanese tradition:
    426      * <pre>
    427      *   [family name] given name(s)
    428      * </pre>
    429      */
    430     private void splitJapaneseOrKoreanName(Name name, String fullName) {
    431         StringTokenizer tokenizer = new StringTokenizer(fullName);
    432         while (tokenizer.hasMoreTokens()) {
    433             String token = tokenizer.nextToken();
    434             if (name.givenNames == null) {
    435                 name.givenNames = token;
    436             } else if (name.familyName == null) {
    437                 name.familyName = name.givenNames;
    438                 name.givenNames = token;
    439             } else {
    440                 name.givenNames += " " + token;
    441             }
    442         }
    443     }
    444 
    445     /**
    446      * Concatenates components of a name according to the rules dictated by the name style.
    447      *
    448      * @param givenNameFirst is ignored for CJK display name styles
    449      */
    450     public String join(Name name, boolean givenNameFirst) {
    451         switch (name.fullNameStyle) {
    452             case FullNameStyle.CJK:
    453             case FullNameStyle.CHINESE:
    454             case FullNameStyle.KOREAN:
    455                 return join(name.familyName, name.middleName, name.givenNames, name.suffix,
    456                         false, false, false);
    457 
    458             case FullNameStyle.JAPANESE:
    459                 return join(name.familyName, name.middleName, name.givenNames, name.suffix,
    460                         true, false, false);
    461 
    462             default:
    463                 if (givenNameFirst) {
    464                     return join(name.givenNames, name.middleName, name.familyName, name.suffix,
    465                             true, false, true);
    466                 } else {
    467                     return join(name.familyName, name.givenNames, name.middleName, name.suffix,
    468                             true, true, true);
    469                 }
    470         }
    471     }
    472 
    473     /**
    474      * Concatenates components of the phonetic name following the CJK tradition:
    475      * family name + middle name + given name(s).
    476      */
    477     public String joinPhoneticName(Name name) {
    478         return join(name.phoneticFamilyName, name.phoneticMiddleName,
    479                 name.phoneticGivenName, null, true, false, false);
    480     }
    481 
    482     /**
    483      * Concatenates parts of a full name inserting spaces and commas as specified.
    484      */
    485     private String join(String part1, String part2, String part3, String suffix,
    486             boolean useSpace, boolean useCommaAfterPart1, boolean useCommaAfterPart3) {
    487         boolean hasPart1 = !TextUtils.isEmpty(part1);
    488         boolean hasPart2 = !TextUtils.isEmpty(part2);
    489         boolean hasPart3 = !TextUtils.isEmpty(part3);
    490         boolean hasSuffix = !TextUtils.isEmpty(suffix);
    491 
    492         boolean isSingleWord = true;
    493         String singleWord = null;
    494         if (hasPart1) {
    495             singleWord = part1;
    496         }
    497 
    498         if (hasPart2) {
    499             if (singleWord != null) {
    500                 isSingleWord = false;
    501             } else {
    502                 singleWord = part2;
    503             }
    504         }
    505 
    506         if (hasPart3) {
    507             if (singleWord != null) {
    508                 isSingleWord = false;
    509             } else {
    510                 singleWord = part3;
    511             }
    512         }
    513 
    514         if (hasSuffix) {
    515             if (singleWord != null) {
    516                 isSingleWord = false;
    517             } else {
    518                 singleWord = normalizedSuffix(suffix);
    519             }
    520         }
    521 
    522         if (isSingleWord) {
    523             return singleWord;
    524         }
    525 
    526         StringBuilder sb = new StringBuilder();
    527         if (hasPart1) {
    528             sb.append(part1);
    529         }
    530 
    531         if (hasPart2) {
    532             if (hasPart1) {
    533                 if (useCommaAfterPart1) {
    534                     sb.append(',');
    535                 }
    536                 if (useSpace) {
    537                     sb.append(' ');
    538                 }
    539             }
    540             sb.append(part2);
    541         }
    542 
    543         if (hasPart3) {
    544             if (hasPart1 || hasPart2) {
    545                 if (useSpace) {
    546                     sb.append(' ');
    547                 }
    548             }
    549             sb.append(part3);
    550         }
    551 
    552         if (hasSuffix) {
    553             if (hasPart1 || hasPart2 || hasPart3) {
    554                 if (useCommaAfterPart3) {
    555                     sb.append(',');
    556                 }
    557                 if (useSpace) {
    558                     sb.append(' ');
    559                 }
    560             }
    561             sb.append(normalizedSuffix(suffix));
    562         }
    563 
    564         return sb.toString();
    565     }
    566 
    567     /**
    568      * Puts a dot after the supplied suffix if that is the accepted form of the suffix,
    569      * e.g. "Jr." and "Sr.", but not "I", "II" and "III".
    570      */
    571     private String normalizedSuffix(String suffix) {
    572         int length = suffix.length();
    573         if (length == 0 || suffix.charAt(length - 1) == '.') {
    574             return suffix;
    575         }
    576 
    577         String withDot = suffix + '.';
    578         if (mSuffixesSet.contains(withDot.toUpperCase())) {
    579             return withDot;
    580         } else {
    581             return suffix;
    582         }
    583     }
    584 
    585     /**
    586      * If the supplied name style is undefined, returns a default based on the language,
    587      * otherwise returns the supplied name style itself.
    588      *
    589      * @param nameStyle See {@link FullNameStyle}.
    590      */
    591     public int getAdjustedFullNameStyle(int nameStyle) {
    592         if (nameStyle == FullNameStyle.UNDEFINED) {
    593             if (JAPANESE_LANGUAGE.equals(mLanguage)) {
    594                 return FullNameStyle.JAPANESE;
    595             } else if (KOREAN_LANGUAGE.equals(mLanguage)) {
    596                 return FullNameStyle.KOREAN;
    597             } else if (CHINESE_LANGUAGE.equals(mLanguage)) {
    598                 return FullNameStyle.CHINESE;
    599             } else {
    600                 return FullNameStyle.WESTERN;
    601             }
    602         } else if (nameStyle == FullNameStyle.CJK) {
    603             if (JAPANESE_LANGUAGE.equals(mLanguage)) {
    604                 return FullNameStyle.JAPANESE;
    605             } else if (KOREAN_LANGUAGE.equals(mLanguage)) {
    606                 return FullNameStyle.KOREAN;
    607             } else {
    608                 return FullNameStyle.CHINESE;
    609             }
    610         }
    611         return nameStyle;
    612     }
    613 
    614     /**
    615      * Parses the first word from the name if it is a prefix.
    616      */
    617     private void parsePrefix(Name name, NameTokenizer tokens) {
    618         if (tokens.mStartPointer == tokens.mEndPointer) {
    619             return;
    620         }
    621 
    622         String firstToken = tokens.mTokens[tokens.mStartPointer];
    623         if (mPrefixesSet.contains(firstToken.toUpperCase())) {
    624             name.prefix = firstToken;
    625             tokens.mStartPointer++;
    626         }
    627     }
    628 
    629     /**
    630      * Parses the last word(s) from the name if it is a suffix.
    631      */
    632     private void parseSuffix(Name name, NameTokenizer tokens) {
    633         if (tokens.mStartPointer == tokens.mEndPointer) {
    634             return;
    635         }
    636 
    637         String lastToken = tokens.mTokens[tokens.mEndPointer - 1];
    638         if (lastToken.length() > mMaxSuffixLength) {
    639             return;
    640         }
    641 
    642         String normalized = lastToken.toUpperCase();
    643         if (mSuffixesSet.contains(normalized)) {
    644             name.suffix = lastToken;
    645             tokens.mEndPointer--;
    646             return;
    647         }
    648 
    649         if (tokens.hasDot(tokens.mEndPointer - 1)) {
    650             lastToken += '.';
    651         }
    652         normalized += ".";
    653 
    654         // Take care of suffixes like M.D. and D.D.S.
    655         int pos = tokens.mEndPointer - 1;
    656         while (normalized.length() <= mMaxSuffixLength) {
    657 
    658             if (mSuffixesSet.contains(normalized)) {
    659                 name.suffix = lastToken;
    660                 tokens.mEndPointer = pos;
    661                 return;
    662             }
    663 
    664             if (pos == tokens.mStartPointer) {
    665                 break;
    666             }
    667 
    668             pos--;
    669             if (tokens.hasDot(pos)) {
    670                 lastToken = tokens.mTokens[pos] + "." + lastToken;
    671             } else {
    672                 lastToken = tokens.mTokens[pos] + " " + lastToken;
    673             }
    674 
    675             normalized = tokens.mTokens[pos].toUpperCase() + "." + normalized;
    676         }
    677     }
    678 
    679     private void parseLastName(Name name, NameTokenizer tokens) {
    680         if (tokens.mStartPointer == tokens.mEndPointer) {
    681             return;
    682         }
    683 
    684         // If the first word is followed by a comma, assume that it's the family name
    685         if (tokens.hasComma(tokens.mStartPointer)) {
    686            name.familyName = tokens.mTokens[tokens.mStartPointer];
    687            tokens.mStartPointer++;
    688            return;
    689         }
    690 
    691         // If the second word is followed by a comma and the first word
    692         // is a last name prefix as in "de Sade" and "von Cliburn", treat
    693         // the first two words as the family name.
    694         if (tokens.mStartPointer + 1 < tokens.mEndPointer
    695                 && tokens.hasComma(tokens.mStartPointer + 1)
    696                 && isFamilyNamePrefix(tokens.mTokens[tokens.mStartPointer])) {
    697             String familyNamePrefix = tokens.mTokens[tokens.mStartPointer];
    698             if (tokens.hasDot(tokens.mStartPointer)) {
    699                 familyNamePrefix += '.';
    700             }
    701             name.familyName = familyNamePrefix + " " + tokens.mTokens[tokens.mStartPointer + 1];
    702             tokens.mStartPointer += 2;
    703             return;
    704         }
    705 
    706         // Finally, assume that the last word is the last name
    707         name.familyName = tokens.mTokens[tokens.mEndPointer - 1];
    708         tokens.mEndPointer--;
    709 
    710         // Take care of last names like "de Sade" and "von Cliburn"
    711         if ((tokens.mEndPointer - tokens.mStartPointer) > 0) {
    712             String lastNamePrefix = tokens.mTokens[tokens.mEndPointer - 1];
    713             if (isFamilyNamePrefix(lastNamePrefix)) {
    714                 if (tokens.hasDot(tokens.mEndPointer - 1)) {
    715                     lastNamePrefix += '.';
    716                 }
    717                 name.familyName = lastNamePrefix + " " + name.familyName;
    718                 tokens.mEndPointer--;
    719             }
    720         }
    721     }
    722 
    723     /**
    724      * Returns true if the supplied word is an accepted last name prefix, e.g. "von", "de"
    725      */
    726     private boolean isFamilyNamePrefix(String word) {
    727         final String normalized = word.toUpperCase();
    728 
    729         return mLastNamePrefixesSet.contains(normalized)
    730                 || mLastNamePrefixesSet.contains(normalized + ".");
    731     }
    732 
    733 
    734     private void parseMiddleName(Name name, NameTokenizer tokens) {
    735         if (tokens.mStartPointer == tokens.mEndPointer) {
    736             return;
    737         }
    738 
    739         if ((tokens.mEndPointer - tokens.mStartPointer) > 1) {
    740             if ((tokens.mEndPointer - tokens.mStartPointer) == 2
    741                     || !mConjuctions.contains(tokens.mTokens[tokens.mEndPointer - 2].
    742                             toUpperCase())) {
    743                 name.middleName = tokens.mTokens[tokens.mEndPointer - 1];
    744                 if (tokens.hasDot(tokens.mEndPointer - 1)) {
    745                     name.middleName += '.';
    746                 }
    747                 tokens.mEndPointer--;
    748             }
    749         }
    750     }
    751 
    752     private void parseGivenNames(Name name, NameTokenizer tokens) {
    753         if (tokens.mStartPointer == tokens.mEndPointer) {
    754             return;
    755         }
    756 
    757         if ((tokens.mEndPointer - tokens.mStartPointer) == 1) {
    758             name.givenNames = tokens.mTokens[tokens.mStartPointer];
    759         } else {
    760             StringBuilder sb = new StringBuilder();
    761             for (int i = tokens.mStartPointer; i < tokens.mEndPointer; i++) {
    762                 if (i != tokens.mStartPointer) {
    763                     sb.append(' ');
    764                 }
    765                 sb.append(tokens.mTokens[i]);
    766                 if (tokens.hasDot(i)) {
    767                     sb.append('.');
    768                 }
    769             }
    770             name.givenNames = sb.toString();
    771         }
    772     }
    773 
    774     /**
    775      * Makes the best guess at the expected full name style based on the character set
    776      * used in the supplied name.  If the phonetic name is also supplied, tries to
    777      * differentiate between Chinese, Japanese and Korean based on the alphabet used
    778      * for the phonetic name.
    779      */
    780     public void guessNameStyle(Name name) {
    781         guessFullNameStyle(name);
    782         guessPhoneticNameStyle(name);
    783         name.fullNameStyle = getAdjustedNameStyleBasedOnPhoneticNameStyle(name.fullNameStyle,
    784                 name.phoneticNameStyle);
    785     }
    786 
    787     /**
    788      * Updates the display name style according to the phonetic name style if we
    789      * were unsure about display name style based on the name components, but
    790      * phonetic name makes it more definitive.
    791      */
    792     public int getAdjustedNameStyleBasedOnPhoneticNameStyle(int nameStyle, int phoneticNameStyle) {
    793         if (phoneticNameStyle != PhoneticNameStyle.UNDEFINED) {
    794             if (nameStyle == FullNameStyle.UNDEFINED || nameStyle == FullNameStyle.CJK) {
    795                 if (phoneticNameStyle == PhoneticNameStyle.JAPANESE) {
    796                     return FullNameStyle.JAPANESE;
    797                 } else if (phoneticNameStyle == PhoneticNameStyle.KOREAN) {
    798                     return FullNameStyle.KOREAN;
    799                 }
    800                 if (nameStyle == FullNameStyle.CJK && phoneticNameStyle == PhoneticNameStyle.PINYIN) {
    801                     return FullNameStyle.CHINESE;
    802                 }
    803             }
    804         }
    805         return nameStyle;
    806     }
    807 
    808     /**
    809      * Makes the best guess at the expected full name style based on the character set
    810      * used in the supplied name.
    811      */
    812     private void guessFullNameStyle(NameSplitter.Name name) {
    813         if (name.fullNameStyle != FullNameStyle.UNDEFINED) {
    814             return;
    815         }
    816 
    817         int bestGuess = guessFullNameStyle(name.givenNames);
    818         // A mix of Hanzi and latin chars are common in China, so we have to go through all names
    819         // if the name is not JANPANESE or KOREAN.
    820         if (bestGuess != FullNameStyle.UNDEFINED && bestGuess != FullNameStyle.CJK
    821                 && bestGuess != FullNameStyle.WESTERN) {
    822             name.fullNameStyle = bestGuess;
    823             return;
    824         }
    825 
    826         int guess = guessFullNameStyle(name.familyName);
    827         if (guess != FullNameStyle.UNDEFINED) {
    828             if (guess != FullNameStyle.CJK && guess != FullNameStyle.WESTERN) {
    829                 name.fullNameStyle = guess;
    830                 return;
    831             }
    832             bestGuess = guess;
    833         }
    834 
    835         guess = guessFullNameStyle(name.middleName);
    836         if (guess != FullNameStyle.UNDEFINED) {
    837             if (guess != FullNameStyle.CJK && guess != FullNameStyle.WESTERN) {
    838                 name.fullNameStyle = guess;
    839                 return;
    840             }
    841             bestGuess = guess;
    842         }
    843 
    844         name.fullNameStyle = bestGuess;
    845     }
    846 
    847     public int guessFullNameStyle(String name) {
    848         if (name == null) {
    849             return FullNameStyle.UNDEFINED;
    850         }
    851 
    852         int nameStyle = FullNameStyle.UNDEFINED;
    853         int length = name.length();
    854         int offset = 0;
    855         while (offset < length) {
    856             int codePoint = Character.codePointAt(name, offset);
    857             if (Character.isLetter(codePoint)) {
    858                 UnicodeBlock unicodeBlock = UnicodeBlock.of(codePoint);
    859 
    860                 if (!isLatinUnicodeBlock(unicodeBlock)) {
    861 
    862                     if (isCJKUnicodeBlock(unicodeBlock)) {
    863                         // We don't know if this is Chinese, Japanese or Korean -
    864                         // trying to figure out by looking at other characters in the name
    865                         return guessCJKNameStyle(name, offset + Character.charCount(codePoint));
    866                     }
    867 
    868                     if (isJapanesePhoneticUnicodeBlock(unicodeBlock)) {
    869                         return FullNameStyle.JAPANESE;
    870                     }
    871 
    872                     if (isKoreanUnicodeBlock(unicodeBlock)) {
    873                         return FullNameStyle.KOREAN;
    874                     }
    875                 }
    876                 nameStyle = FullNameStyle.WESTERN;
    877             }
    878             offset += Character.charCount(codePoint);
    879         }
    880         return nameStyle;
    881     }
    882 
    883     private int guessCJKNameStyle(String name, int offset) {
    884         int length = name.length();
    885         while (offset < length) {
    886             int codePoint = Character.codePointAt(name, offset);
    887             if (Character.isLetter(codePoint)) {
    888                 UnicodeBlock unicodeBlock = UnicodeBlock.of(codePoint);
    889                 if (isJapanesePhoneticUnicodeBlock(unicodeBlock)) {
    890                     return FullNameStyle.JAPANESE;
    891                 }
    892                 if (isKoreanUnicodeBlock(unicodeBlock)) {
    893                     return FullNameStyle.KOREAN;
    894                 }
    895             }
    896             offset += Character.charCount(codePoint);
    897         }
    898 
    899         return FullNameStyle.CJK;
    900     }
    901 
    902     private void guessPhoneticNameStyle(NameSplitter.Name name) {
    903         if (name.phoneticNameStyle != PhoneticNameStyle.UNDEFINED) {
    904             return;
    905         }
    906 
    907         int bestGuess = guessPhoneticNameStyle(name.phoneticFamilyName);
    908         if (bestGuess != FullNameStyle.UNDEFINED && bestGuess != FullNameStyle.CJK) {
    909             name.phoneticNameStyle = bestGuess;
    910             return;
    911         }
    912 
    913         int guess = guessPhoneticNameStyle(name.phoneticGivenName);
    914         if (guess != FullNameStyle.UNDEFINED) {
    915             if (guess != FullNameStyle.CJK) {
    916                 name.phoneticNameStyle = guess;
    917                 return;
    918             }
    919             bestGuess = guess;
    920         }
    921 
    922         guess = guessPhoneticNameStyle(name.phoneticMiddleName);
    923         if (guess != FullNameStyle.UNDEFINED) {
    924             if (guess != FullNameStyle.CJK) {
    925                 name.phoneticNameStyle = guess;
    926                 return;
    927             }
    928             bestGuess = guess;
    929         }
    930     }
    931 
    932     public int guessPhoneticNameStyle(String name) {
    933         if (name == null) {
    934             return PhoneticNameStyle.UNDEFINED;
    935         }
    936 
    937         int nameStyle = PhoneticNameStyle.UNDEFINED;
    938         int length = name.length();
    939         int offset = 0;
    940         while (offset < length) {
    941             int codePoint = Character.codePointAt(name, offset);
    942             if (Character.isLetter(codePoint)) {
    943                 UnicodeBlock unicodeBlock = UnicodeBlock.of(codePoint);
    944                 if (isJapanesePhoneticUnicodeBlock(unicodeBlock)) {
    945                     return PhoneticNameStyle.JAPANESE;
    946                 }
    947                 if (isKoreanUnicodeBlock(unicodeBlock)) {
    948                     return PhoneticNameStyle.KOREAN;
    949                 }
    950                 if (isLatinUnicodeBlock(unicodeBlock)) {
    951                     return PhoneticNameStyle.PINYIN;
    952                 }
    953             }
    954             offset += Character.charCount(codePoint);
    955         }
    956 
    957         return nameStyle;
    958     }
    959 
    960     private static boolean isLatinUnicodeBlock(UnicodeBlock unicodeBlock) {
    961         return unicodeBlock == UnicodeBlock.BASIC_LATIN ||
    962                 unicodeBlock == UnicodeBlock.LATIN_1_SUPPLEMENT ||
    963                 unicodeBlock == UnicodeBlock.LATIN_EXTENDED_A ||
    964                 unicodeBlock == UnicodeBlock.LATIN_EXTENDED_B ||
    965                 unicodeBlock == UnicodeBlock.LATIN_EXTENDED_ADDITIONAL;
    966     }
    967 
    968     private static boolean isCJKUnicodeBlock(UnicodeBlock block) {
    969         return block == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS
    970                 || block == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
    971                 || block == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B
    972                 || block == UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION
    973                 || block == UnicodeBlock.CJK_RADICALS_SUPPLEMENT
    974                 || block == UnicodeBlock.CJK_COMPATIBILITY
    975                 || block == UnicodeBlock.CJK_COMPATIBILITY_FORMS
    976                 || block == UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS
    977                 || block == UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT;
    978     }
    979 
    980     private static boolean isKoreanUnicodeBlock(UnicodeBlock unicodeBlock) {
    981         return unicodeBlock == UnicodeBlock.HANGUL_SYLLABLES ||
    982                 unicodeBlock == UnicodeBlock.HANGUL_JAMO ||
    983                 unicodeBlock == UnicodeBlock.HANGUL_COMPATIBILITY_JAMO;
    984     }
    985 
    986     private static boolean isJapanesePhoneticUnicodeBlock(UnicodeBlock unicodeBlock) {
    987         return unicodeBlock == UnicodeBlock.KATAKANA ||
    988                 unicodeBlock == UnicodeBlock.KATAKANA_PHONETIC_EXTENSIONS ||
    989                 unicodeBlock == UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS ||
    990                 unicodeBlock == UnicodeBlock.HIRAGANA;
    991     }
    992 }
    993