1 /* 2 * Copyright 2001-2004 The Apache Software Foundation. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package org.apache.commons.codec.language; 18 19 import org.apache.commons.codec.EncoderException; 20 import org.apache.commons.codec.StringEncoder; 21 22 /** 23 * Utility methods for {@link Soundex} and {@link RefinedSoundex} classes. 24 * 25 * @author Apache Software Foundation 26 * @version $Id: SoundexUtils.java,v 1.5 2004/03/17 18:31:35 ggregory Exp $ 27 * @since 1.3 28 * 29 * @deprecated Please use {@link java.net.URL#openConnection} instead. 30 * Please visit <a href="http://android-developers.blogspot.com/2011/09/androids-http-clients.html">this webpage</a> 31 * for further details. 32 */ 33 @Deprecated 34 final class SoundexUtils { 35 36 /** 37 * Cleans up the input string before Soundex processing by only returning 38 * upper case letters. 39 * 40 * @param str 41 * The String to clean. 42 * @return A clean String. 43 */ 44 static String clean(String str) { 45 if (str == null || str.length() == 0) { 46 return str; 47 } 48 int len = str.length(); 49 char[] chars = new char[len]; 50 int count = 0; 51 for (int i = 0; i < len; i++) { 52 if (Character.isLetter(str.charAt(i))) { 53 chars[count++] = str.charAt(i); 54 } 55 } 56 if (count == len) { 57 return str.toUpperCase(); 58 } 59 return new String(chars, 0, count).toUpperCase(); 60 } 61 62 /** 63 * Encodes the Strings and returns the number of characters in the two 64 * encoded Strings that are the same. 65 * <ul> 66 * <li>For Soundex, this return value ranges from 0 through 4: 0 indicates 67 * little or no similarity, and 4 indicates strong similarity or identical 68 * values.</li> 69 * <li>For refined Soundex, the return value can be greater than 4.</li> 70 * </ul> 71 * 72 * @param encoder 73 * The encoder to use to encode the Strings. 74 * @param s1 75 * A String that will be encoded and compared. 76 * @param s2 77 * A String that will be encoded and compared. 78 * @return The number of characters in the two Soundex encoded Strings that 79 * are the same. 80 * 81 * @see #differenceEncoded(String,String) 82 * @see <a href="http://msdn.microsoft.com/library/default.asp?url=/library/en-us/tsqlref/ts_de-dz_8co5.asp"> 83 * MS T-SQL DIFFERENCE</a> 84 * 85 * @throws EncoderException 86 * if an error occurs encoding one of the strings 87 */ 88 static int difference(StringEncoder encoder, String s1, String s2) throws EncoderException { 89 return differenceEncoded(encoder.encode(s1), encoder.encode(s2)); 90 } 91 92 /** 93 * Returns the number of characters in the two Soundex encoded Strings that 94 * are the same. 95 * <ul> 96 * <li>For Soundex, this return value ranges from 0 through 4: 0 indicates 97 * little or no similarity, and 4 indicates strong similarity or identical 98 * values.</li> 99 * <li>For refined Soundex, the return value can be greater than 4.</li> 100 * </ul> 101 * 102 * @param es1 103 * An encoded String. 104 * @param es2 105 * An encoded String. 106 * @return The number of characters in the two Soundex encoded Strings that 107 * are the same. 108 * 109 * @see <a href="http://msdn.microsoft.com/library/default.asp?url=/library/en-us/tsqlref/ts_de-dz_8co5.asp"> 110 * MS T-SQL DIFFERENCE</a> 111 */ 112 static int differenceEncoded(String es1, String es2) { 113 114 if (es1 == null || es2 == null) { 115 return 0; 116 } 117 int lengthToMatch = Math.min(es1.length(), es2.length()); 118 int diff = 0; 119 for (int i = 0; i < lengthToMatch; i++) { 120 if (es1.charAt(i) == es2.charAt(i)) { 121 diff++; 122 } 123 } 124 return diff; 125 } 126 127 } 128