Home | History | Annotate | Download | only in tool
      1 /*
      2  **********************************************************************
      3  * Copyright (c) 2009, International Business Machines
      4  * Corporation and others.  All Rights Reserved.
      5  **********************************************************************
      6  * Author: John Emmons
      7  **********************************************************************
      8  */
      9 package org.unicode.cldr.tool;
     10 
     11 import java.io.File;
     12 import java.io.IOException;
     13 import java.io.PrintWriter;
     14 
     15 import org.unicode.cldr.draft.FileUtilities;
     16 import org.unicode.cldr.util.CLDRPaths;
     17 import org.unicode.cldr.util.LDMLUtilities;
     18 import org.w3c.dom.Document;
     19 import org.w3c.dom.Node;
     20 
     21 import com.ibm.icu.text.Normalizer;
     22 
     23 /**
     24  * Tool to help determine if
     25  *
     26  * @author John C. Emmons
     27  */
     28 
     29 public class FilterCharacterFallbacks {
     30 
     31     public static void main(String[] args) throws IOException {
     32 
     33         Document fb;
     34         Node n;
     35         fb = LDMLUtilities.parse(CLDRPaths.DEFAULT_SUPPLEMENTAL_DIRECTORY + File.separator + "characters.xml", true);
     36         if (fb != null) {
     37             PrintWriter out = FileUtilities.openUTF8Writer(".", "report");
     38             n = LDMLUtilities.getNode(fb, "//supplementalData/characters/character-fallback");
     39             for (Node cf = n.getFirstChild(); cf != null; cf = cf.getNextSibling()) {
     40                 String srcChar = LDMLUtilities.getAttributeValue(cf, "value");
     41                 if (srcChar != null) {
     42                     for (Node sb = cf.getFirstChild(); sb != null; sb = sb.getNextSibling()) {
     43                         String subChars = LDMLUtilities.getNodeValue(sb);
     44                         if (subChars != null) {
     45                             boolean canonicallyEquivalent = (Normalizer.compare(srcChar, subChars, 0) == 0);
     46                             if (canonicallyEquivalent) {
     47                                 out.println("Remove Character \"" + srcChar + "\" ("
     48                                     + com.ibm.icu.impl.Utility.escape(srcChar) + ")    Substitute \"" + subChars
     49                                     + "\" (" + com.ibm.icu.impl.Utility.escape(subChars)
     50                                     + ") - Canonically equivalent.");
     51                             }
     52                             String toNFKC = Normalizer.normalize(srcChar, Normalizer.NFKC);
     53                             if (subChars.equals(toNFKC)) {
     54                                 out.println("Remove Character \"" + srcChar + "\" ("
     55                                     + com.ibm.icu.impl.Utility.escape(srcChar) + ")    Substitute \"" + subChars
     56                                     + "\" (" + com.ibm.icu.impl.Utility.escape(subChars) + ") - a toNFKC form.");
     57                             } else {
     58                                 out.println("OK - Character \"" + srcChar + "\" ("
     59                                     + com.ibm.icu.impl.Utility.escape(srcChar) + ")    Substitute \"" + subChars
     60                                     + "\" (" + com.ibm.icu.impl.Utility.escape(subChars) + ")");
     61                             }
     62                         }
     63                     }
     64                 }
     65             }
     66             out.close();
     67         } else
     68             System.out.println("Couldn't open characters.xml...");
     69 
     70     }
     71 }
     72