Home | History | Annotate | Download | only in util
      1 package org.unicode.cldr.util;
      2 
      3 import java.io.BufferedReader;
      4 import java.io.IOException;
      5 import java.nio.charset.Charset;
      6 import java.util.Arrays;
      7 import java.util.Collection;
      8 import java.util.Collections;
      9 import java.util.HashMap;
     10 import java.util.HashSet;
     11 import java.util.LinkedHashSet;
     12 import java.util.Locale;
     13 import java.util.Map;
     14 import java.util.Map.Entry;
     15 import java.util.Set;
     16 import java.util.TreeMap;
     17 import java.util.TreeSet;
     18 
     19 import org.unicode.cldr.tool.CountryCodeConverter;
     20 import org.unicode.cldr.tool.ToolConfig;
     21 import org.unicode.cldr.util.ChainedMap.M3;
     22 
     23 import com.ibm.icu.dev.util.CollectionUtilities;
     24 import com.ibm.icu.impl.Relation;
     25 import com.ibm.icu.text.Transform;
     26 import com.ibm.icu.text.Transliterator;
     27 import com.ibm.icu.util.ICUUncheckedIOException;
     28 import com.ibm.icu.util.Output;
     29 import com.ibm.icu.util.ULocale;
     30 
     31 public class Unlocode {
     32 
     33     private static final Charset LATIN1 = Charset.forName("ISO8859-1");
     34 
     35     public interface Mergeable<T> {
     36         T merge(T a);
     37     }
     38 
     39     public static class Iso3166_2Data implements Mergeable<Iso3166_2Data> {
     40         public final Set<String> names;
     41 
     42         public Iso3166_2Data(String... name) {
     43             this(Arrays.asList(name));
     44         }
     45 
     46         public Iso3166_2Data(Collection<String> names) {
     47             this.names = Collections.unmodifiableSet(new LinkedHashSet<String>(names));
     48         }
     49 
     50         @Override
     51         public String toString() {
     52             return names.toString();
     53         }
     54 
     55         @Override
     56         public boolean equals(Object obj) {
     57             return names.equals((Iso3166_2Data) obj);
     58         }
     59 
     60         @Override
     61         public int hashCode() {
     62             return names.hashCode();
     63         }
     64 
     65         @Override
     66         public Iso3166_2Data merge(Iso3166_2Data b) {
     67             LinkedHashSet<String> set = new LinkedHashSet<String>(names);
     68             set.addAll(b.names);
     69             return new Iso3166_2Data(set);
     70         }
     71     }
     72 
     73     public static class LocodeData implements Mergeable<LocodeData>, Comparable<LocodeData> {
     74         public final String locode;
     75         public final Set<String> names;
     76         public final String subdivision;
     77         public final float north;
     78         public final float east;
     79 
     80         public LocodeData(String locode, String name, String subdivision, float north, float east) {
     81             this(locode, Arrays.asList(name), subdivision, north, east);
     82         }
     83 
     84         public LocodeData(String locode, Collection<String> names, String subdivision, float north, float east) {
     85             this.locode = locode;
     86             this.names = Collections.unmodifiableSet(new LinkedHashSet<String>(names));
     87             this.subdivision = subdivision;
     88             this.north = north;
     89             this.east = east;
     90         }
     91 
     92         @Override
     93         public String toString() {
     94             return names + ", " + locode + ", " + subdivision + ", " + north + ", " + east;
     95         }
     96 
     97         /**
     98          * Warning, must never have locode datas with the same locode and different other data.
     99          */
    100         @Override
    101         public int compareTo(LocodeData o) {
    102             // TODO Auto-generated method stub
    103             return locode.compareTo(o.locode);
    104         }
    105 
    106         /**
    107          * Warning, must never have locode datas with the same locode and different other data.
    108          */
    109         @Override
    110         public boolean equals(Object obj) {
    111             LocodeData other = (LocodeData) obj;
    112             return locode.equals(other.locode);
    113         }
    114 
    115         @Override
    116         public int hashCode() {
    117             return locode.hashCode();
    118         }
    119 
    120         @Override
    121         public LocodeData merge(LocodeData other) {
    122             if (locode.equals(other.locode)
    123                 && subdivision.equals(other.subdivision)
    124                 && north == other.north
    125                 && east == other.east) {
    126                 LinkedHashSet<String> set = new LinkedHashSet<String>(names);
    127                 set.addAll(other.names);
    128                 return new LocodeData(locode, set, subdivision, north, east);
    129             }
    130             throw new IllegalArgumentException("Can't merge " + this + " with " + other);
    131         }
    132 
    133     }
    134 
    135     static Map<String, LocodeData> locodeToData = new HashMap<String, LocodeData>();
    136     static Relation<String, LocodeData> nameToLocodeData = Relation.of(new HashMap<String, Set<LocodeData>>(), HashSet.class);
    137     static Map<String, Iso3166_2Data> iso3166_2Data = new HashMap<String, Iso3166_2Data>();
    138     static Relation<String, String> ERRORS = Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class);
    139 
    140     static {
    141         // read the data
    142         try {
    143             loadIso();
    144             iso3166_2Data = Collections.unmodifiableMap(iso3166_2Data);
    145             load(1);
    146             load(2);
    147             load(3);
    148             // load exceptions
    149             try {
    150                 BufferedReader br = FileReaders.openFile(CldrUtility.class,
    151                     "data/external/alternate_locode_name.txt");
    152                 while (true) {
    153                     String line = br.readLine();
    154                     if (line == null) {
    155                         break;
    156                     }
    157                     int hash = line.indexOf('#');
    158                     if (hash >= 0) {
    159                         line = line.substring(0, hash);
    160                     }
    161                     line = line.trim();
    162                     if (line.isEmpty()) {
    163                         continue;
    164                     }
    165                     if (line.equals("EOF")) {
    166                         break;
    167                     }
    168                     String[] parts = line.split("\\s*;\\s*");
    169                     //System.out.println(Arrays.asList(parts));
    170                     String locode = parts[0].replace(" ", "");
    171                     if (locode.length() != 5) {
    172                         throw new IllegalArgumentException(line);
    173                     }
    174                     String alternateName = parts[1];
    175                     LocodeData locodeData = locodeToData.get(locode);
    176                     putCheckingDuplicate(locodeToData, locode, new LocodeData(
    177                         locode, alternateName, locodeData.subdivision, locodeData.north, locodeData.east));
    178                 }
    179                 br.close();
    180             } catch (IOException e) {
    181                 throw new ICUUncheckedIOException(e);
    182             }
    183             for (LocodeData s : locodeToData.values()) {
    184                 for (String name : s.names) {
    185                     nameToLocodeData.put(name, s);
    186                 }
    187             }
    188             nameToLocodeData.freeze();
    189             locodeToData = Collections.unmodifiableMap(locodeToData);
    190             ERRORS.freeze();
    191         } catch (IOException e) {
    192         }
    193     }
    194 
    195     /* http://www.unece.org/fileadmin/DAM/cefact/locode/unlocode_manual.pdf
    196     //
    197      * 0 ,
    198      * 1 "AD",
    199      * 2 "SJL",
    200      * 3 "Sant Juli de Lria",
    201      * 4 "Sant Julia de Loria",
    202      * 5 ?,
    203      * 6 "--3-----",
    204      * 7 "RL",
    205      * 8 "1101",
    206      * 9 ,
    207      * 10 "4228N 00130E",""
    208             0 Column Change
    209             X Marked for deletion in the next issue
    210             1 Country code
    211                     "XZ" - no country
    212             2 Column LOCODE
    213             3 Column Name
    214             4 Column Name Without Diacritics
    215             5 Column Subdivision
    216             6 Column Function
    217             7 Column Status
    218             8 Column Date
    219             9 Column IATA
    220             10 Latitude/Longitude
    221             Torbay: 4739N 05244W "4739N 05244W"
    222      */
    223 
    224     //    public static class FieldData<K extends Enum<K>> {
    225     //        private List<EnumMap<K,String>> data;
    226     //        public FieldData(Class<K> classInstance, BufferedReader r, String filename) {
    227     //            data = new ArrayList<EnumMap<K,String>>();
    228     //            FileUtilities.FileProcessor myReader = new FileUtilities.FileProcessor() {
    229     //                @Override
    230     //                protected boolean handleLine(int lineCount, String line) {
    231     //                    // TODO Auto-generated method stub
    232     //                    return super.handleLine(lineCount, line);
    233     //                }
    234     //            };
    235     //            myReader.process(r, filename);
    236     //            //new EnumMap<K, String>(classInstance);
    237     //        }
    238     //    }
    239 
    240     enum SubdivisionFields {
    241         Subdivision_category, Code_3166_2, Subdivision_name, Language_code, Romanization_system, Parent_subdivision
    242     }
    243 
    244     public static void loadIso() throws IOException {
    245         BufferedReader br = FileReaders.openFile(CldrUtility.class,
    246             "data/external/subdivisionData.txt", CldrUtility.UTF8);
    247         while (true) {
    248             // Subdivision category TAB 3166-2 code TAB Subdivision name TAB Language code TAB Romanization system TAB Parent subdivision
    249 
    250             String line = br.readLine();
    251             if (line == null) {
    252                 break;
    253             }
    254             int hash = line.indexOf('#');
    255             if (hash >= 0) {
    256                 line = line.substring(0, hash);
    257             }
    258             if (line.trim().isEmpty()) {
    259                 continue;
    260             }
    261             String[] list = line.split("\t");
    262             String locode = list[SubdivisionFields.Code_3166_2.ordinal()].trim();
    263             if (locode.endsWith("*")) {
    264                 locode = locode.substring(0, locode.length() - 1);
    265             }
    266             String bestName = list[SubdivisionFields.Subdivision_name.ordinal()].trim();
    267             //            if (!locode.contains("-")) {
    268             //                //System.out.println("*skipping: " + locode);
    269             //                continue;
    270             //            }
    271             //
    272             //            String names = list[5];
    273             //            String[] name = names.split("\\+");
    274             //            String bestName = null;
    275             //            for (String namePair : name) {
    276             //                if (bestName == null) {
    277             //                    bestName = namePair.split("=")[1];
    278             //                } else if (namePair.startsWith("en=")) {
    279             //                    bestName = namePair.split("=")[1];
    280             //                    break;
    281             //                }
    282             //            }
    283 //            System.out.println("\t" + locode + "\t" + bestName + "\t\t\t");
    284 
    285             putCheckingDuplicate(iso3166_2Data, locode, new Iso3166_2Data(bestName));
    286         }
    287         br.close();
    288     }
    289 
    290     public static void load(int file) throws IOException {
    291         BufferedReader br =
    292             //CldrUtility.getUTF8Data(
    293             FileReaders.openFile(CldrUtility.class,
    294                 "data/external/2013-1_UNLOCODE_CodeListPart" + file + ".csv",
    295                 LATIN1);
    296         M3<String, String, Boolean> nameToAlternate = ChainedMap.of(new TreeMap<String, Object>(), new TreeMap<String, Object>(), Boolean.class);
    297         Output<String> tempOutput = new Output<String>();
    298 
    299         String oldCountryCode = null;
    300         while (true) {
    301             String line = br.readLine();
    302             if (line == null) {
    303                 break;
    304             }
    305             line = line.trim();
    306             if (line.isEmpty()) {
    307                 continue;
    308             }
    309             String[] list = CldrUtility.splitCommaSeparated(line);
    310             String change = list[0];
    311             String locSuffix = list[2];
    312             if (change.equals("X")) {
    313                 continue;
    314             }
    315             String countryCode = list[1];
    316             if (!countryCode.equals(oldCountryCode)) {
    317                 nameToAlternate.clear();
    318                 oldCountryCode = countryCode;
    319             }
    320             String name = list[3];
    321             String name2 = list[4];
    322 
    323             if (change.equals("=")) {
    324                 String[] names = name.split("\\s*=\\s*");
    325                 if (names.length != 2) {
    326                     throw new IllegalArgumentException();
    327                 }
    328                 nameToAlternate.put(names[1], names[0], Boolean.TRUE);
    329                 if (!name.equals(name2)) {
    330                     names = name2.split("\\s*=\\s*");
    331                     if (names.length != 2) {
    332                         throw new IllegalArgumentException();
    333                     }
    334                     nameToAlternate.put(names[1], names[0], Boolean.TRUE);
    335                 }
    336                 continue;
    337             }
    338             if (locSuffix.isEmpty()) {
    339                 if (!name.startsWith(".")) {
    340                     // System.out.println("*** Skipping " + line);
    341                 }
    342                 continue;
    343             }
    344 
    345             name = removeParens(name, tempOutput);
    346             String name3 = tempOutput.value;
    347             name2 = removeParens(name2, tempOutput);
    348             String name4 = tempOutput.value;
    349 
    350             String subdivision = list[5];
    351             if (!subdivision.isEmpty()) {
    352                 subdivision = countryCode + "-" + subdivision;
    353                 if (getIso3166_2Data(subdivision) == null) {
    354                     ERRORS.put(subdivision, "Missing subdivision " + subdivision + " on line " + line);
    355                 }
    356             }
    357             String latLong = list[10];
    358             float latN = 0;
    359             float longE = 0;
    360             if (!latLong.isEmpty()) {
    361                 String[] latlong = latLong.split(" ");
    362                 latN = parse(latlong[0]);
    363                 longE = parse(latlong[1]);
    364             }
    365             String locode = countryCode + locSuffix;
    366             LocodeData locodeData = new LocodeData(locode, name, subdivision, latN, longE);
    367             putCheckingDuplicate(locodeToData, locode, locodeData);
    368             Map<String, Boolean> alternates = nameToAlternate.get(name);
    369             if (alternates != null) {
    370                 for (String alt : alternates.keySet()) {
    371                     putCheckingDuplicate(locodeToData, locode, new LocodeData(locode, alt, subdivision, latN, longE));
    372                 }
    373             }
    374             if (!name2.equals(name)) {
    375                 putCheckingDuplicate(locodeToData, locode, new LocodeData(locode, name2, subdivision, latN, longE));
    376                 alternates = nameToAlternate.get(name2);
    377                 if (alternates != null) {
    378                     for (String alt : alternates.keySet()) {
    379                         putCheckingDuplicate(locodeToData, locode, new LocodeData(locode, alt, subdivision, latN, longE));
    380                     }
    381                 }
    382             }
    383             if (name3 != null) {
    384                 putCheckingDuplicate(locodeToData, locode, new LocodeData(locode, name3, subdivision, latN, longE));
    385             }
    386             if (name4 != null && !name4.equals(name3)) {
    387                 putCheckingDuplicate(locodeToData, locode, new LocodeData(locode, name4, subdivision, latN, longE));
    388             }
    389         }
    390         br.close();
    391     }
    392 
    393     public static String removeParens(String name, Output<String> tempOutput) {
    394         int paren = name.indexOf("(");
    395         tempOutput.value = null;
    396         if (paren > 0) {
    397             int paren2 = name.indexOf(")", paren);
    398             if (paren2 < 0) {
    399                 paren2 = name.length();
    400             }
    401             // if the parens start with (ex, then it appears to be a safe alias.
    402             // if not, we don't know, since the UN format is ambiguous
    403             // sometimes yes: Ras Zubbaya (Ras Dubayyah)
    404             // sometimes no: Challis Venture (oil terminal)
    405             String temp = name.substring(paren + 1, paren2);
    406             if (temp.startsWith("ex ")) {
    407                 tempOutput.value = temp.substring(3);
    408             }
    409             name = paren2 == name.length()
    410                 ? name.substring(0, paren).trim()
    411                 : (name.substring(0, paren) + name.substring(paren2 + 1)).replace("  ", " ").trim();
    412             //System.out.println("" + orginal + " => " + name + ", " + tempOutput.value + "");
    413         }
    414         return name;
    415     }
    416 
    417     public static <K, V extends Mergeable<V>> void putCheckingDuplicate(Map<K, V> map, K key, V value) {
    418         V old = map.get(key);
    419         if (old != null && !old.equals(value)) {
    420             try {
    421                 map.put(key, old.merge(value));
    422             } catch (Exception e) {
    423                 ERRORS.put(key.toString(), "Can't merge records: " + key + "\t" + e.getMessage());
    424             }
    425         } else {
    426             map.put(key, value);
    427         }
    428     }
    429 
    430     public static LocodeData getLocodeData(String unlocode) {
    431         return locodeToData.get(unlocode);
    432     }
    433 
    434     public static Set<Entry<String, LocodeData>> entrySet() {
    435         return locodeToData.entrySet();
    436     }
    437 
    438     public static Set<String> getAvailable() {
    439         return locodeToData.keySet();
    440     }
    441 
    442     public static Iso3166_2Data getIso3166_2Data(String unlocode) {
    443         return iso3166_2Data.get(unlocode);
    444     }
    445 
    446     public static Set<Entry<String, Iso3166_2Data>> isoEntrySet() {
    447         return iso3166_2Data.entrySet();
    448     }
    449 
    450     public static Set<String> getAvailableIso3166_2() {
    451         return iso3166_2Data.keySet();
    452     }
    453 
    454     public static Relation<String, String> getLoadErrors() {
    455         return ERRORS;
    456     }
    457 
    458     private static float parse(String string) {
    459         int len = string.length();
    460         char dir = string.charAt(len - 1);
    461         int result0 = Integer.parseInt(string.substring(0, len - 1));
    462         float fract = (result0 % 100) / 60f;
    463         fract = ((int) (fract * 100 + 0.499999999f)) / 100f;
    464         float result = (result0 / 100) + fract;
    465         return dir == 'N' || dir == 'E' ? result : -result;
    466     }
    467 
    468     public static void main(String[] args) throws IOException {
    469         Relation<String, LocodeData> countryNameToCities = Relation.of(new TreeMap<String, Set<LocodeData>>(), TreeSet.class);
    470         Set<String> errors = new TreeSet<String>();
    471         loadCitiesCapitals(countryNameToCities, errors);
    472         loadCitiesOver1M(countryNameToCities, errors);
    473         SupplementalDataInfo supp = ToolConfig.getToolInstance().getSupplementalDataInfo();
    474         Set<String> missing = new TreeSet<String>(
    475             supp.getBcp47Keys().get("tz"));
    476         Set<String> already = new TreeSet<String>();
    477 
    478         for (Entry<String, LocodeData> entry : countryNameToCities.keyValueSet()) {
    479             String countryName = entry.getKey();
    480             LocodeData item = entry.getValue();
    481             String firstName = item.names.iterator().next();
    482             LinkedHashSet<String> remainingNames = new LinkedHashSet<String>(item.names);
    483             remainingNames.remove(firstName);
    484             String lowerLocode = item.locode.toLowerCase(Locale.ENGLISH);
    485             String info = countryName
    486                 + "\t" + (remainingNames.isEmpty() ? "" : remainingNames)
    487                 + "\t" + (item.subdivision.isEmpty() ? "" : "(" + item.subdivision + ")");
    488 
    489             if (missing.contains(lowerLocode)) {
    490                 missing.remove(lowerLocode);
    491                 already.add(lowerLocode);
    492                 continue;
    493             }
    494             System.out.println("<location type=\"" + lowerLocode
    495                 + "\">" + firstName
    496                 + "</location>\t<!--" + info
    497                 + "-->");
    498         }
    499         System.out.println();
    500         System.out.println(CollectionUtilities.join(errors, "\n"));
    501         System.out.println();
    502         showLocodes("In exemplars already:", already);
    503         System.out.println();
    504         showLocodes("In exemplars but not new cities:", missing);
    505         System.out.println();
    506         for (Entry<String, Set<String>> errorEntry : ERRORS.keyValuesSet()) {
    507             System.out.println(errorEntry.getKey() + "\t" + errorEntry.getValue());
    508         }
    509         if (true) return;
    510 
    511         int i = 0;
    512         //        for (String s : new TreeSet<String>(Unlocode.getAvailableIso3166_2())) {
    513         //            System.out.println((i++) + "\t" + s + "\t" + Unlocode.getIso3166_2Data(s));
    514         //            //if (i > 1000) break;
    515         //        }
    516         for (String s : new TreeSet<String>(Unlocode.getAvailable())) {
    517             if (!s.startsWith("GT")) {
    518                 continue;
    519             }
    520             System.out.println((i++) + "\t" + s + "\t" + Unlocode.getLocodeData(s));
    521             //if (i > 1000) break;
    522         }
    523 
    524         //        Set<String> KNOWN_ERRORS = new HashSet<String>(Arrays.asList("AR-LA", "DE-BR"));
    525         //
    526         //        for (Entry<String, Set<String>> s : getLoadErrors().keyValuesSet()) {
    527         //            String key = s.getKey();
    528         //            Set<String> values = s.getValue();
    529         //            if (KNOWN_ERRORS.contains(key)) {
    530         //                System.out.println("# Known error\t" + key);
    531         //                continue;
    532         //            }
    533         //            String s2 = values.toString();
    534         //            System.out.println(key + "\t" + s2.substring(0,Math.min(256, s2.length())) + "");
    535         //        }
    536     }
    537 
    538     public static void showLocodes(String title, Set<String> already) {
    539         Set<String> noData = new TreeSet<String>();
    540         Set<String> noData2 = new TreeSet<String>();
    541         for (String locode : already) {
    542             String upperLocode = locode.toUpperCase(Locale.ENGLISH);
    543             String countryName = ULocale.getDisplayCountry("und-" + upperLocode.substring(0, 2), ULocale.ENGLISH);
    544             LocodeData data = locodeToData.get(upperLocode);
    545             if (data == null) {
    546                 if (locode.length() == 5) {
    547                     noData.add(locode);
    548                 } else {
    549                     noData2.add(locode);
    550                 }
    551             } else {
    552                 System.out.println(title + "\t" + countryName + "\t" + data);
    553             }
    554         }
    555         System.out.println("* No locode data, len 5:\t" + noData);
    556         System.out.println("* No locode data:\t" + noData2);
    557     }
    558 
    559     public static int loadCitiesOver1M(Relation<String, LocodeData> countryNameToCities, Set<String> errors2) throws IOException {
    560         int i = 1;
    561 
    562         BufferedReader br = FileReaders.openFile(CldrUtility.class, "data/external/Cities-Over1M.txt");
    563         main: while (true) {
    564             String line = br.readLine();
    565             if (line == null) {
    566                 break;
    567             }
    568             if (line.startsWith("#")) {
    569                 continue;
    570             }
    571             String[] parts = line.split("\t");
    572             //System.out.println(Arrays.asList(parts));
    573             String cityName = parts[2];
    574             String subdivision = null;
    575             int bracket = cityName.indexOf('[');
    576             if (bracket > 0) {
    577                 try {
    578                     subdivision = cityName.substring(bracket + 1, cityName.indexOf(']'));
    579                     cityName = cityName.substring(0, bracket);
    580                 } catch (Exception e) {
    581                     throw new IllegalArgumentException(cityName);
    582                 }
    583             }
    584             String countryName = parts[3];
    585             add(countryName, subdivision, cityName, countryNameToCities, errors2);
    586 
    587             //                String countryCode = CountryCodeConverter.getCodeFromName(countryName);
    588             //                if (countryCode == null) {
    589             //                    System.out.println("*** Couldn't find country " + countryName);
    590             //                    continue;
    591             //                }
    592             //                Set<LocodeData> locodeDatas = nameToLocodeData.get(cityName);
    593             //                if (locodeDatas == null) {
    594             //                    System.out.println((i++) + " Couldn't find city " + cityName + " in " + countryName);
    595             //                    continue;
    596             //                } else if (locodeDatas.size() == 1) {
    597             //                    add(countryNameToCities,locodeDatas.iterator().next());
    598             //                } else  {
    599             //                    Set<LocodeData> rem = new LinkedHashSet();
    600             //                    for (LocodeData x : locodeDatas) {
    601             //                        if (x.subdivision.equals(subdivision)) {
    602             //                            add(countryNameToCities, x);
    603             //                            continue main;
    604             //                        }
    605             //                        if (x.subdivision.startsWith(countryCode)) {
    606             //                            rem.add(x);
    607             //                        }
    608             //                    }
    609             //                    if (rem.size() != 1) {
    610             //                        System.out.println((i++) + " No single record for " + cityName + "\t" + rem);
    611             //                    } else {
    612             //                        add(countryNameToCities, rem.iterator().next());
    613             //                    }
    614             //                }
    615         }
    616         br.close();
    617         return i;
    618     }
    619 
    620     public static int loadCitiesCapitals(Relation<String, LocodeData> countryNameToCities, Set<String> errors2) throws IOException {
    621         int i = 1;
    622         BufferedReader br = FileReaders.openFile(CldrUtility.class, "data/external/Cities-CountryCapitals.txt");
    623         while (true) {
    624             String line = br.readLine();
    625             if (line == null) {
    626                 break;
    627             }
    628             if (line.startsWith("#")) {
    629                 continue;
    630             }
    631             String[] parts = line.split(" *\t *");
    632             //System.out.println(Arrays.asList(parts));
    633             String cityName = parts[0];
    634             String countryName = parts[1];
    635             add(countryName, null, cityName, countryNameToCities, errors2);
    636         }
    637         br.close();
    638         return i;
    639     }
    640 
    641     static final Set<String> noncountries = new HashSet<String>(Arrays.asList(
    642         "United States Virgin Islands", "Akrotiri and Dhekelia", "Easter Island", "Somaliland", "Northern Cyprus", "Nagorno-Karabakh Republic", "Abkhazia",
    643         "Transnistria", "South Ossetia"));
    644 
    645     static final Transform<String, String> REMOVE_ACCENTS = Transliterator.getInstance("nfd;[:mn:]remove");
    646 
    647     static void add(String countryName, String subdivision, String cityName, Relation<String, LocodeData> countryNameToCities, Set<String> errors2) {
    648         String countryCode = CountryCodeConverter.getCodeFromName(countryName);
    649         if (countryCode == null) {
    650             if (noncountries.contains(countryName)) {
    651                 return; // skip
    652             }
    653             errors2.add("**Couldn't find country " + countryName);
    654             //continue;
    655         }
    656         countryName = ULocale.getDisplayCountry("und-" + countryCode, ULocale.ENGLISH);
    657         Set<LocodeData> locodeDatas = nameToLocodeData.get(cityName);
    658         if (locodeDatas == null) {
    659             // try again without accents
    660             String cityName2 = REMOVE_ACCENTS.transform(cityName);
    661             if (!cityName.equals(cityName2)) {
    662                 locodeDatas = nameToLocodeData.get(cityName2);
    663             }
    664         }
    665         if (locodeDatas == null) {
    666             errors2.add("** No matching record for\t" + countryName + "\t" + countryCode + "\t" + cityName);
    667         } else {
    668             Set<LocodeData> rem = new LinkedHashSet<LocodeData>();
    669             for (LocodeData x : locodeDatas) {
    670                 if (x.locode.startsWith(countryCode)) {
    671                     if (x.subdivision.equals(subdivision)) {
    672                         rem.clear();
    673                         rem.add(x);
    674                         break;
    675                     }
    676                     rem.add(x);
    677                 }
    678             }
    679             if (rem.size() == 0) {
    680                 errors2.add("** No matching country record for\t" + countryName + "\t" + countryCode + "\t" + cityName + "\t" + locodeDatas);
    681             } else if (rem.size() != 1) {
    682                 errors2.add("** Multiple matching country records for\t" + countryName + "\t" + countryCode + "\t" + cityName + "\t" + rem);
    683             } else {
    684                 LocodeData locodeData = rem.iterator().next();
    685                 countryNameToCities.put(countryName, locodeData);
    686             }
    687         }
    688     }
    689 }