Home | History | Annotate | Download | only in tool
      1 package org.unicode.cldr.tool;
      2 
      3 import java.io.BufferedReader;
      4 import java.io.IOException;
      5 import java.util.ArrayList;
      6 import java.util.Collection;
      7 import java.util.LinkedHashSet;
      8 import java.util.List;
      9 import java.util.Set;
     10 import java.util.TreeMap;
     11 
     12 import org.unicode.cldr.draft.FileUtilities;
     13 
     14 import com.ibm.icu.impl.Relation;
     15 
     16 /**
     17  * Take mappings to IPA and interleave them.
     18  */
     19 public class MatchStrings {
     20 
     21     static String cldrDataDir = "C:\\cvsdata\\unicode\\cldr\\tools\\java\\org\\unicode\\cldr\\util\\data\\transforms\\";
     22 
     23     static class Info {
     24         String english;
     25 
     26         String ipa;
     27 
     28         String fixedIpa;
     29 
     30         public Info(String english, String ipa, String fixedIpa) {
     31             this.english = english;
     32             this.ipa = ipa;
     33             this.fixedIpa = fixedIpa.equals(ipa) ? ipa : fixedIpa; // make ==
     34         }
     35 
     36         public String toString() {
     37             return "{" + english + "/" + ipa + (fixedIpa == ipa ? "" : "/" + fixedIpa) + "}";
     38         }
     39     }
     40 
     41     Relation<String, Info> letter_correspondances = Relation.of(new TreeMap<String, Set<Info>>(),
     42         LinkedHashSet.class);
     43 
     44     MatchStrings() throws IOException {
     45         BufferedReader in = FileUtilities.openUTF8Reader(cldrDataDir, "internal_matchIpaRules.txt");
     46         while (true) {
     47             String line = in.readLine();
     48             if (line == null)
     49                 break;
     50             if (line.length() == 0)
     51                 continue;
     52             String[] parts = line.split("\\s+");
     53             String ipa = parts.length > 1 ? parts[1] : "";
     54             add(parts[0], ipa, parts.length > 2 ? parts[2] : ipa);
     55         }
     56         in.close();
     57     }
     58 
     59     void add(String english, String ipa, String fixedIpa) {
     60         String key = english.length() == 0 ? "" : english.substring(0, 1);
     61         letter_correspondances.put(key, new Info(english, ipa,
     62             fixedIpa));
     63     }
     64 
     65     /**
     66      * Insert the IPA in after the string, such as baitt + /bet/ => b{b}ai{e}t{t}
     67      *
     68      * @param english
     69      * @param ipa
     70      * @return
     71      */
     72     int interleaveIPA(String english, String ipa, List<Info> output) {
     73         highWater = 0;
     74         longestEnglish = 0;
     75         longestIpa = 0;
     76         highWaterList.clear();
     77         this.english = english;
     78         this.ipa = ipa;
     79         this.output = output;
     80         output.clear();
     81         return interleave2(0, 0);
     82     }
     83 
     84     String english;
     85     String ipa;
     86     List<Info> output;
     87     int highWater = 0;
     88     List<Info> highWaterList = new ArrayList<Info>();
     89     private int longestEnglish;
     90     private int longestIpa;
     91 
     92     /**
     93      * Recursively match the string. Right now, we just take the matches in order;
     94      * later we could try a weighted fit
     95      *
     96      * @param english
     97      * @param englishPosition
     98      * @param ipa
     99      * @param ipaPosition
    100      * @param path2values
    101      * @return
    102      */
    103     private int interleave2(int englishPosition, int ipaPosition) {
    104 
    105         if (highWater < ipaPosition) {
    106             highWaterList.clear();
    107             highWaterList.addAll(output);
    108             highWater = output.size();
    109             longestEnglish = englishPosition;
    110             longestIpa = ipaPosition;
    111         }
    112         if (englishPosition == english.length()) {
    113             if (ipaPosition == ipa.length()) {
    114                 return 1;
    115             }
    116             return 0;
    117         }
    118         String firstLetter = english.substring(englishPosition, englishPosition + 1);
    119         Set<Info> possibilities = letter_correspondances.getAll(firstLetter);
    120         if (possibilities != null) {
    121             int result = checkPossibilities(possibilities, englishPosition, ipaPosition);
    122             if (result != 0) {
    123                 return result;
    124             }
    125         }
    126 
    127         // we failed, try the empty string
    128         possibilities = letter_correspondances.getAll("");
    129         if (possibilities != null) {
    130             int result = checkPossibilities(possibilities, englishPosition, ipaPosition);
    131             if (result != 0) {
    132                 return result;
    133             }
    134         }
    135 
    136         // failed,
    137 
    138         // we failed to find a pair. Make last check to see if we just
    139         // delete one English letter
    140         Info last = output.size() == 0 ? null : output.get(output.size() - 1);
    141         if (last == null || last.ipa.length() != 0) {
    142             output.add(new Info(firstLetter, "", ""));
    143             int result = interleave2(englishPosition + 1, ipaPosition);
    144             if (result == 1) {
    145                 return 1;
    146             }
    147             // if we fail, then remove the pair, and continue
    148             output.remove(output.size() - 1);
    149         }
    150 
    151         // if we get this far, we've exhausted the possibilities, so fail
    152         return 0;
    153     }
    154 
    155     int checkPossibilities(Collection<Info> possibilities, int englishPosition, int ipaPosition) {
    156         for (Info englishIpa : possibilities) {
    157             // skip if we don't match
    158             String englishPart = englishIpa.english;
    159             String ipaPart = englishIpa.ipa;
    160             if (!english.regionMatches(englishPosition, englishPart, 0, englishPart.length())) {
    161                 continue;
    162             }
    163             // boolean ipaMatches = ipa.regionMatches(ipaPosition, ipaPart, 0, ipaPart.length());
    164             // boolean ipa2Matches = matchAtIgnoring(ipaPosition, ipaPart);
    165             // if (ipaMatches != ipa2Matches) {
    166             // System.out.println("Fails " + ipa.substring(ipaPosition) + ", " + ipaPart);
    167             // }
    168             int matchesUpTo = matchAtIgnoring(ipaPosition, ipaPart);
    169             if (matchesUpTo < 0) {
    170                 continue;
    171             }
    172             // we match, so recurse
    173             output.add(englishIpa);
    174             int result = interleave2(englishPosition + englishPart.length(), matchesUpTo);
    175             if (result == 1) {
    176                 return 1;
    177             }
    178             // if we fail, then remove the pair, and continue
    179             output.remove(output.size() - 1);
    180         }
    181         return 0;
    182     }
    183 
    184     /**
    185      * Does ipaPart match ipa at the position, ignoring stress marks in ipa?
    186      * Returns how far it got.
    187      *
    188      * @param ipaPosition
    189      * @param ipaPart
    190      * @return
    191      */
    192     private int matchAtIgnoring(int ipaPosition, String ipaPart) {
    193         if (ipaPart.length() == 0) return ipaPosition;
    194         int j = 0;
    195         for (int i = ipaPosition; i < ipa.length(); ++i) {
    196             char ch = ipa.charAt(i);
    197             if (ch == '' || ch == '') continue;
    198             char ch2 = ipaPart.charAt(j++);
    199             if (ch != ch2) return -1;
    200             if (j >= ipaPart.length()) return i + 1;
    201         }
    202         return -1;
    203     }
    204 
    205     List<Info> current = new ArrayList<Info>();
    206 
    207     /**
    208      * Fix the IPA in a string
    209      *
    210      * @param english
    211      * @param ipa
    212      * @return
    213      */
    214     String fixIPA(String english, String ipa) {
    215         int result = interleaveIPA(english, ipa, current);
    216         if (result == 0)
    217             return null;
    218         StringBuilder buffer = new StringBuilder();
    219         for (Info englishIpa : current) {
    220             buffer.append(englishIpa.fixedIpa);
    221         }
    222         return buffer.toString();
    223     }
    224 
    225     String getTrace() {
    226         return highWaterList.toString() + "\t\t" + english.substring(longestEnglish) + "\t\t"
    227             + ipa.substring(longestIpa);
    228     }
    229 }
    230