1 /* 2 ********************************************************************** 3 * Copyright (c) 2009, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ********************************************************************** 6 * Author: John Emmons 7 ********************************************************************** 8 */ 9 package org.unicode.cldr.tool; 10 11 import java.io.File; 12 import java.io.IOException; 13 import java.io.PrintWriter; 14 15 import org.unicode.cldr.draft.FileUtilities; 16 import org.unicode.cldr.util.CLDRPaths; 17 import org.unicode.cldr.util.LDMLUtilities; 18 import org.w3c.dom.Document; 19 import org.w3c.dom.Node; 20 21 import com.ibm.icu.text.Normalizer; 22 23 /** 24 * Tool to help determine if 25 * 26 * @author John C. Emmons 27 */ 28 29 public class FilterCharacterFallbacks { 30 31 public static void main(String[] args) throws IOException { 32 33 Document fb; 34 Node n; 35 fb = LDMLUtilities.parse(CLDRPaths.DEFAULT_SUPPLEMENTAL_DIRECTORY + File.separator + "characters.xml", true); 36 if (fb != null) { 37 PrintWriter out = FileUtilities.openUTF8Writer(".", "report"); 38 n = LDMLUtilities.getNode(fb, "//supplementalData/characters/character-fallback"); 39 for (Node cf = n.getFirstChild(); cf != null; cf = cf.getNextSibling()) { 40 String srcChar = LDMLUtilities.getAttributeValue(cf, "value"); 41 if (srcChar != null) { 42 for (Node sb = cf.getFirstChild(); sb != null; sb = sb.getNextSibling()) { 43 String subChars = LDMLUtilities.getNodeValue(sb); 44 if (subChars != null) { 45 boolean canonicallyEquivalent = (Normalizer.compare(srcChar, subChars, 0) == 0); 46 if (canonicallyEquivalent) { 47 out.println("Remove Character \"" + srcChar + "\" (" 48 + com.ibm.icu.impl.Utility.escape(srcChar) + ") Substitute \"" + subChars 49 + "\" (" + com.ibm.icu.impl.Utility.escape(subChars) 50 + ") - Canonically equivalent."); 51 } 52 String toNFKC = Normalizer.normalize(srcChar, Normalizer.NFKC); 53 if (subChars.equals(toNFKC)) { 54 out.println("Remove Character \"" + srcChar + "\" (" 55 + com.ibm.icu.impl.Utility.escape(srcChar) + ") Substitute \"" + subChars 56 + "\" (" + com.ibm.icu.impl.Utility.escape(subChars) + ") - a toNFKC form."); 57 } else { 58 out.println("OK - Character \"" + srcChar + "\" (" 59 + com.ibm.icu.impl.Utility.escape(srcChar) + ") Substitute \"" + subChars 60 + "\" (" + com.ibm.icu.impl.Utility.escape(subChars) + ")"); 61 } 62 } 63 } 64 } 65 } 66 out.close(); 67 } else 68 System.out.println("Couldn't open characters.xml..."); 69 70 } 71 } 72