1 package org.unicode.cldr.tool; 2 3 import java.io.BufferedReader; 4 import java.io.IOException; 5 import java.util.ArrayList; 6 import java.util.Collection; 7 import java.util.LinkedHashSet; 8 import java.util.List; 9 import java.util.Set; 10 import java.util.TreeMap; 11 12 import org.unicode.cldr.draft.FileUtilities; 13 14 import com.ibm.icu.impl.Relation; 15 16 /** 17 * Take mappings to IPA and interleave them. 18 */ 19 public class MatchStrings { 20 21 static String cldrDataDir = "C:\\cvsdata\\unicode\\cldr\\tools\\java\\org\\unicode\\cldr\\util\\data\\transforms\\"; 22 23 static class Info { 24 String english; 25 26 String ipa; 27 28 String fixedIpa; 29 30 public Info(String english, String ipa, String fixedIpa) { 31 this.english = english; 32 this.ipa = ipa; 33 this.fixedIpa = fixedIpa.equals(ipa) ? ipa : fixedIpa; // make == 34 } 35 36 public String toString() { 37 return "{" + english + "/" + ipa + (fixedIpa == ipa ? "" : "/" + fixedIpa) + "}"; 38 } 39 } 40 41 Relation<String, Info> letter_correspondances = Relation.of(new TreeMap<String, Set<Info>>(), 42 LinkedHashSet.class); 43 44 MatchStrings() throws IOException { 45 BufferedReader in = FileUtilities.openUTF8Reader(cldrDataDir, "internal_matchIpaRules.txt"); 46 while (true) { 47 String line = in.readLine(); 48 if (line == null) 49 break; 50 if (line.length() == 0) 51 continue; 52 String[] parts = line.split("\\s+"); 53 String ipa = parts.length > 1 ? parts[1] : ""; 54 add(parts[0], ipa, parts.length > 2 ? parts[2] : ipa); 55 } 56 in.close(); 57 } 58 59 void add(String english, String ipa, String fixedIpa) { 60 String key = english.length() == 0 ? "" : english.substring(0, 1); 61 letter_correspondances.put(key, new Info(english, ipa, 62 fixedIpa)); 63 } 64 65 /** 66 * Insert the IPA in after the string, such as baitt + /bet/ => b{b}ai{e}t{t} 67 * 68 * @param english 69 * @param ipa 70 * @return 71 */ 72 int interleaveIPA(String english, String ipa, List<Info> output) { 73 highWater = 0; 74 longestEnglish = 0; 75 longestIpa = 0; 76 highWaterList.clear(); 77 this.english = english; 78 this.ipa = ipa; 79 this.output = output; 80 output.clear(); 81 return interleave2(0, 0); 82 } 83 84 String english; 85 String ipa; 86 List<Info> output; 87 int highWater = 0; 88 List<Info> highWaterList = new ArrayList<Info>(); 89 private int longestEnglish; 90 private int longestIpa; 91 92 /** 93 * Recursively match the string. Right now, we just take the matches in order; 94 * later we could try a weighted fit 95 * 96 * @param english 97 * @param englishPosition 98 * @param ipa 99 * @param ipaPosition 100 * @param path2values 101 * @return 102 */ 103 private int interleave2(int englishPosition, int ipaPosition) { 104 105 if (highWater < ipaPosition) { 106 highWaterList.clear(); 107 highWaterList.addAll(output); 108 highWater = output.size(); 109 longestEnglish = englishPosition; 110 longestIpa = ipaPosition; 111 } 112 if (englishPosition == english.length()) { 113 if (ipaPosition == ipa.length()) { 114 return 1; 115 } 116 return 0; 117 } 118 String firstLetter = english.substring(englishPosition, englishPosition + 1); 119 Set<Info> possibilities = letter_correspondances.getAll(firstLetter); 120 if (possibilities != null) { 121 int result = checkPossibilities(possibilities, englishPosition, ipaPosition); 122 if (result != 0) { 123 return result; 124 } 125 } 126 127 // we failed, try the empty string 128 possibilities = letter_correspondances.getAll(""); 129 if (possibilities != null) { 130 int result = checkPossibilities(possibilities, englishPosition, ipaPosition); 131 if (result != 0) { 132 return result; 133 } 134 } 135 136 // failed, 137 138 // we failed to find a pair. Make last check to see if we just 139 // delete one English letter 140 Info last = output.size() == 0 ? null : output.get(output.size() - 1); 141 if (last == null || last.ipa.length() != 0) { 142 output.add(new Info(firstLetter, "", "")); 143 int result = interleave2(englishPosition + 1, ipaPosition); 144 if (result == 1) { 145 return 1; 146 } 147 // if we fail, then remove the pair, and continue 148 output.remove(output.size() - 1); 149 } 150 151 // if we get this far, we've exhausted the possibilities, so fail 152 return 0; 153 } 154 155 int checkPossibilities(Collection<Info> possibilities, int englishPosition, int ipaPosition) { 156 for (Info englishIpa : possibilities) { 157 // skip if we don't match 158 String englishPart = englishIpa.english; 159 String ipaPart = englishIpa.ipa; 160 if (!english.regionMatches(englishPosition, englishPart, 0, englishPart.length())) { 161 continue; 162 } 163 // boolean ipaMatches = ipa.regionMatches(ipaPosition, ipaPart, 0, ipaPart.length()); 164 // boolean ipa2Matches = matchAtIgnoring(ipaPosition, ipaPart); 165 // if (ipaMatches != ipa2Matches) { 166 // System.out.println("Fails " + ipa.substring(ipaPosition) + ", " + ipaPart); 167 // } 168 int matchesUpTo = matchAtIgnoring(ipaPosition, ipaPart); 169 if (matchesUpTo < 0) { 170 continue; 171 } 172 // we match, so recurse 173 output.add(englishIpa); 174 int result = interleave2(englishPosition + englishPart.length(), matchesUpTo); 175 if (result == 1) { 176 return 1; 177 } 178 // if we fail, then remove the pair, and continue 179 output.remove(output.size() - 1); 180 } 181 return 0; 182 } 183 184 /** 185 * Does ipaPart match ipa at the position, ignoring stress marks in ipa? 186 * Returns how far it got. 187 * 188 * @param ipaPosition 189 * @param ipaPart 190 * @return 191 */ 192 private int matchAtIgnoring(int ipaPosition, String ipaPart) { 193 if (ipaPart.length() == 0) return ipaPosition; 194 int j = 0; 195 for (int i = ipaPosition; i < ipa.length(); ++i) { 196 char ch = ipa.charAt(i); 197 if (ch == '' || ch == '') continue; 198 char ch2 = ipaPart.charAt(j++); 199 if (ch != ch2) return -1; 200 if (j >= ipaPart.length()) return i + 1; 201 } 202 return -1; 203 } 204 205 List<Info> current = new ArrayList<Info>(); 206 207 /** 208 * Fix the IPA in a string 209 * 210 * @param english 211 * @param ipa 212 * @return 213 */ 214 String fixIPA(String english, String ipa) { 215 int result = interleaveIPA(english, ipa, current); 216 if (result == 0) 217 return null; 218 StringBuilder buffer = new StringBuilder(); 219 for (Info englishIpa : current) { 220 buffer.append(englishIpa.fixedIpa); 221 } 222 return buffer.toString(); 223 } 224 225 String getTrace() { 226 return highWaterList.toString() + "\t\t" + english.substring(longestEnglish) + "\t\t" 227 + ipa.substring(longestIpa); 228 } 229 } 230