1 /* 2 * Copyright (C) 2009 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License 15 */ 16 package com.android.providers.contacts; 17 18 import com.android.providers.contacts.util.Hex; 19 import com.google.common.annotations.VisibleForTesting; 20 21 import java.text.CollationKey; 22 import java.text.Collator; 23 import java.text.RuleBasedCollator; 24 import java.util.Locale; 25 26 /** 27 * Converts a name to a normalized form by removing all non-letter characters and normalizing 28 * UNICODE according to http://unicode.org/unicode/reports/tr15 29 */ 30 public class NameNormalizer { 31 32 private static final Object sCollatorLock = new Object(); 33 34 private static Locale sCollatorLocale; 35 36 private static RuleBasedCollator sCachedCompressingCollator; 37 private static RuleBasedCollator sCachedComplexityCollator; 38 39 /** 40 * Ensure that the cached collators are for the current locale. 41 */ 42 private static void ensureCollators() { 43 final Locale locale = Locale.getDefault(); 44 if (locale.equals(sCollatorLocale)) { 45 return; 46 } 47 sCollatorLocale = locale; 48 49 sCachedCompressingCollator = (RuleBasedCollator) Collator.getInstance(locale); 50 sCachedCompressingCollator.setStrength(Collator.PRIMARY); 51 sCachedCompressingCollator.setDecomposition(Collator.CANONICAL_DECOMPOSITION); 52 53 sCachedComplexityCollator = (RuleBasedCollator) Collator.getInstance(locale); 54 sCachedComplexityCollator.setStrength(Collator.SECONDARY); 55 } 56 57 @VisibleForTesting 58 static RuleBasedCollator getCompressingCollator() { 59 synchronized (sCollatorLock) { 60 ensureCollators(); 61 return sCachedCompressingCollator; 62 } 63 } 64 65 @VisibleForTesting 66 static RuleBasedCollator getComplexityCollator() { 67 synchronized (sCollatorLock) { 68 ensureCollators(); 69 return sCachedComplexityCollator; 70 } 71 } 72 73 /** 74 * Converts the supplied name to a string that can be used to perform approximate matching 75 * of names. It ignores non-letter, non-digit characters, and removes accents. 76 */ 77 public static String normalize(String name) { 78 CollationKey key = getCompressingCollator().getCollationKey(lettersAndDigitsOnly(name)); 79 return Hex.encodeHex(key.toByteArray(), true); 80 } 81 82 /** 83 * Compares "complexity" of two names, which is determined by the presence 84 * of mixed case characters, accents and, if all else is equal, length. 85 */ 86 public static int compareComplexity(String name1, String name2) { 87 String clean1 = lettersAndDigitsOnly(name1); 88 String clean2 = lettersAndDigitsOnly(name2); 89 int diff = getComplexityCollator().compare(clean1, clean2); 90 if (diff != 0) { 91 return diff; 92 } 93 // compareTo sorts uppercase first. We know that there are no non-case 94 // differences from the above test, so we can negate here to get the 95 // lowercase-first comparison we really want... 96 diff = -clean1.compareTo(clean2); 97 if (diff != 0) { 98 return diff; 99 } 100 return name1.length() - name2.length(); 101 } 102 103 /** 104 * Returns a string containing just the letters and digits from the original string. 105 * Returns empty string if the original string is null. 106 */ 107 private static String lettersAndDigitsOnly(String name) { 108 if (name == null) { 109 return ""; 110 } 111 char[] letters = name.toCharArray(); 112 int length = 0; 113 for (int i = 0; i < letters.length; i++) { 114 final char c = letters[i]; 115 if (Character.isLetterOrDigit(c)) { 116 letters[length++] = c; 117 } 118 } 119 120 if (length != letters.length) { 121 return new String(letters, 0, length); 122 } 123 124 return name; 125 } 126 } 127