1 /** 2 ******************************************************************************* 3 * Copyright (C) 2002-2010, International Business Machines Corporation and * 4 * others. All Rights Reserved. * 5 ******************************************************************************* 6 */ 7 package com.ibm.icu.dev.tool.translit; 8 9 import java.io.FileOutputStream; 10 import java.io.OutputStreamWriter; 11 import java.util.Hashtable; 12 13 import com.ibm.icu.impl.Utility; 14 import com.ibm.icu.lang.UCharacter; 15 import com.ibm.icu.text.Transliterator; 16 import com.ibm.icu.text.UTF16; 17 import com.ibm.icu.text.UnicodeSet; 18 import com.ibm.icu.text.UnicodeSetIterator; 19 /** 20 * @author ram 21 * 22 * To change this generated comment edit the template variable "typecomment": 23 * Window>Preferences>Java>Templates. 24 * To enable and disable the creation of type comments go to 25 * Window>Preferences>Java>Code Generation.7F 26 */ 27 public class WriteIndicCharts { 28 29 public static void main(String[] args){ 30 writeIICharts(); 31 } 32 33 34 static String header = "<html>\n" + 35 " <head><meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">"+ 36 " Inter-Indic Transliteration Comparison chart"+ 37 " </head>\n"+ 38 " <body bgcolor=#FFFFFF>\n"+ 39 " <table border=1 width=100% >\n"+ 40 " <tr>\n"+ 41 " <th width=9%>Inter-Indic</th>\n"+ 42 " <th width=9%>Latin</th>\n"+ 43 " <th width=9%>Devanagari</th>\n"+ 44 " <th width=9%>Bengali</th>\n"+ 45 " <th width=9%>Gurmukhi</th>\n"+ 46 " <th width=9%>Gujarati</th>\n"+ 47 " <th width=9%>Oriya</th>\n"+ 48 " <th width=9%>Tamil</th>\n"+ 49 " <th width=9%>Telugu</th>\n"+ 50 " <th width=9%>Kannada</th>\n"+ 51 " <th width=9%>Malayalam</th>\n"+ 52 " </tr>\n"; 53 static String footer = " </table>\n"+ 54 " </body>\n" + 55 "</html>\n"; 56 57 static UnicodeSet deva = new UnicodeSet("[:deva:]"); 58 static UnicodeSet beng = new UnicodeSet("[:beng:]"); 59 static UnicodeSet gujr = new UnicodeSet("[:gujr:]"); 60 static UnicodeSet guru = new UnicodeSet("[:guru:]"); 61 static UnicodeSet orya = new UnicodeSet("[:orya:]"); 62 static UnicodeSet taml = new UnicodeSet("[:taml:]"); 63 static UnicodeSet telu = new UnicodeSet("[:telu:]"); 64 static UnicodeSet knda = new UnicodeSet("[:knda:]"); 65 static UnicodeSet mlym = new UnicodeSet("[:mlym:]"); 66 static UnicodeSet inter= new UnicodeSet("[\uE000-\uE082]"); 67 68 public static void writeIICharts(){ 69 try{ 70 Transliterator t1 = Transliterator.getInstance("InterIndic-Bengali"); 71 Transliterator t2 = Transliterator.getInstance("InterIndic-Gurmukhi"); 72 Transliterator t3 = Transliterator.getInstance("InterIndic-Gujarati"); 73 Transliterator t4 = Transliterator.getInstance("InterIndic-Oriya"); 74 Transliterator t5 = Transliterator.getInstance("InterIndic-Tamil"); 75 Transliterator t6 = Transliterator.getInstance("InterIndic-Telugu"); 76 Transliterator t7 = Transliterator.getInstance("InterIndic-Kannada"); 77 Transliterator t8 = Transliterator.getInstance("InterIndic-Malayalam"); 78 Transliterator t9 = Transliterator.getInstance("InterIndic-Devanagari"); 79 Transliterator t10 = Transliterator.getInstance("InterIndic-Latin"); 80 //UnicodeSetIterator sIter = new UnicodeSetIterator(deva); 81 82 for(int i=0x00;i<=0x80;i++){ 83 String[] arr = new String[10]; 84 arr[0]=UTF16.valueOf(i+ 0xE000); 85 table.put(UTF16.valueOf(i),arr); 86 } 87 88 OutputStreamWriter os = new OutputStreamWriter(new FileOutputStream("comparison-chart.html"),"UTF-8"); 89 90 os.write(header); 91 92 writeIICharts(t9,0x0900,1); 93 writeIICharts(t1,0x0980,2); 94 writeIICharts(t2,0x0A00,3); 95 writeIICharts(t3,0x0A80,4); 96 writeIICharts(t4,0x0B00,5); 97 writeIICharts(t5,0x0B80,6); 98 writeIICharts(t6,0x0c00,7); 99 writeIICharts(t7,0x0C80,8); 100 writeIICharts(t8,0x0D00,9); 101 102 for(int i=0x00;i<=0x80;i++){ 103 String[] temp = (String[])table.get(UTF16.valueOf(i)); 104 boolean write = false; 105 for(int k=1;k<temp.length && temp[k]!=null;k++){ 106 if(UCharacter.getExtendedName(UTF16.charAt(temp[k],0)).indexOf("unassigned")<0 || 107 temp[k].indexOf(":UNASSIGNED")<0){ 108 write = true; 109 } 110 } 111 if(write){ 112 os.write(" <tr>\n"); 113 for(int j=0; j<temp.length;j++){ 114 if(temp[j]!=null){ 115 boolean fallback=false; 116 boolean unassigned=false; 117 boolean unmapped = false; 118 boolean consumed =false; 119 String str = temp[j]; 120 121 if(temp[j].indexOf(":FALLBACK")>=0){ 122 str = temp[j].substring(0,temp[j].indexOf(":")); 123 fallback=true; 124 // os.write(" <td bgcolor=#FFFF00 align=center title=\""++"\">"+str+"<br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n"); 125 } 126 if(temp[j].indexOf(":UNASSIGNED")>=0){ 127 str = temp[j].substring(0,temp[j].indexOf(":")); 128 unassigned=true; 129 } 130 131 if(temp[j].indexOf(":UNMAPPED")>=0){ 132 str = temp[j].substring(0,temp[j].indexOf(":")); 133 unmapped=true; 134 } 135 if(temp[j].indexOf(":CONSUMED")>=0){ 136 str = temp[j].substring(0,temp[j].indexOf(":")); 137 consumed=true; 138 } 139 140 String name; 141 StringBuffer nameBuf=new StringBuffer(); 142 for(int f=0; f<str.length();f++){ 143 if(f>0){ nameBuf.append("+");} 144 nameBuf.append(UCharacter.getExtendedName(UTF16.charAt(str,f))); 145 } 146 name = nameBuf.toString(); 147 if(fallback){ 148 149 if(UCharacter.getExtendedName(UTF16.charAt(str,0)).indexOf("unassigned")>0){ 150 os.write(" <td width=9% bgcolor=#BBBBFF align=center title=\""+name+"\">"+" <br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n"); 151 }else{ 152 os.write(" <td width=9% bgcolor=#BBBBFF align=center title=\""+name+"\">"+ str+"<br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n"); 153 } 154 }else if(unmapped){ 155 os.write(" <td bgcolor=#FF9999 align=center title=\""+name+"\">"+" <br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n"); 156 }else if(unassigned){ 157 if(UCharacter.getExtendedName(UTF16.charAt(str,0)).indexOf("unassigned")>0){ 158 os.write(" <td width=9% bgcolor=#00FFFF align=center title=\""+name+"\">"+" <br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n"); 159 }else{ 160 os.write(" <td width=9% bgcolor=#00FFFF align=center title=\""+name+"\">"+ str+"<br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n"); 161 } 162 }else if(consumed){ 163 if(UCharacter.getExtendedName(UTF16.charAt(str,0)).indexOf("unassigned")>0){ 164 os.write(" <td width=9% bgcolor=#FFFF55 align=center title=\""+name+"\">"+" <br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n"); 165 }else{ 166 os.write(" <td width=9% bgcolor=#FFFF55 align=center title=\""+""+"\">"+" <br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n"); 167 } 168 }else if(name.indexOf("private")!=-1){ 169 String s = t10.transliterate(str); 170 os.write(" <td width=9% bgcolor=#FFBBBB align=center title=\""+name+"\">"+" <br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n"); 171 if(!s.equals(str)){ 172 os.write(" <td width=9% bgcolor=#CCEEDD align=center>"+s +"</td>"); 173 }else{ 174 os.write(" <td width=9% bgcolor=#CCEEDD align=center> </td>"); 175 } 176 }else{ 177 os.write(" <td width=9% align=center title=\""+name+"\">"+ str+"<br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n"); 178 } 179 }else{ 180 os.write(" <td width=9% > </td>\n"); 181 } 182 } 183 os.write(" </tr>\n"); 184 } 185 } 186 os.write(footer); 187 os.close(); 188 }catch( Exception e){ 189 e.printStackTrace(); 190 } 191 } 192 public static void writeCharts(){ 193 try{ 194 Transliterator t1 = Transliterator.getInstance("InterIndic-Bengali"); 195 Transliterator t2 = Transliterator.getInstance("InterIndic-Gurmukhi"); 196 Transliterator t3 = Transliterator.getInstance("InterIndic-Gujarati"); 197 Transliterator t4 = Transliterator.getInstance("InterIndic-Oriya"); 198 Transliterator t5 = Transliterator.getInstance("InterIndic-Tamil"); 199 Transliterator t6 = Transliterator.getInstance("InterIndic-Telugu"); 200 Transliterator t7 = Transliterator.getInstance("InterIndic-Kannada"); 201 Transliterator t8 = Transliterator.getInstance("InterIndic-Malayalam"); 202 Transliterator t9 = Transliterator.getInstance("InterIndic-Devanagari"); 203 204 //UnicodeSetIterator sIter = new UnicodeSetIterator(deva); 205 206 for(int i=0x0900;i<=0x097F;i++){ 207 String[] arr = new String[10]; 208 arr[0]=UTF16.valueOf((i&0xFF) + 0xE000); 209 table.put(UTF16.valueOf(i),arr); 210 } 211 212 OutputStreamWriter os = new OutputStreamWriter(new FileOutputStream("comparison-chart.html"),"UTF-8"); 213 214 os.write(header); 215 /* 216 writeCharts(t1,beng,1); 217 writeCharts(t2,guru,2); 218 writeCharts(t3,gujr,3); 219 writeCharts(t4,orya,4); 220 writeCharts(t5,taml,5); 221 writeCharts(t6,telu,6); 222 writeCharts(t7,knda,7); 223 writeCharts(t8,mlym,8); 224 */ 225 /* 226 writeCharts(t9,0x0900,1); 227 writeCharts(t1,0x0980,2); 228 writeCharts(t2,0x0A00,3); 229 writeCharts(t3,0x0A80,4); 230 writeCharts(t4,0x0B00,5); 231 writeCharts(t5,0x0B80,6); 232 writeCharts(t6,0x0c00,7); 233 writeCharts(t7,0x0C80,8); 234 writeCharts(t8,0x0D00,9); 235 */ 236 writeIICharts(t9,0x0900,1); 237 writeIICharts(t1,0x0980,2); 238 writeIICharts(t2,0x0A00,3); 239 writeIICharts(t3,0x0A80,4); 240 writeIICharts(t4,0x0B00,5); 241 writeIICharts(t5,0x0B80,6); 242 writeIICharts(t6,0x0c00,7); 243 writeIICharts(t7,0x0C80,8); 244 writeIICharts(t8,0x0D00,9); 245 for(int i=0x0900;i<=0x097F;i++){ 246 String[] temp = (String[])table.get(UTF16.valueOf(i)); 247 boolean write = false; 248 for(int k=1;k<temp.length;k++){ 249 if(UCharacter.getExtendedName(UTF16.charAt(temp[k],0)).indexOf("unassigned")<0){ 250 write = true; 251 } 252 } 253 if(write){ 254 os.write(" <tr>\n"); 255 for(int j=0; j<temp.length;j++){ 256 if(temp[j]!=null){ 257 boolean fallback=false; 258 String str = temp[j]; 259 260 if(temp[j].indexOf(":FALLBACK")>=0){ 261 str = temp[j].substring(0,temp[j].indexOf(":")); 262 fallback=true; 263 // os.write(" <td bgcolor=#FFFF00 align=center title=\""++"\">"+str+"<br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n"); 264 } 265 String name = UCharacter.getExtendedName(UTF16.charAt(str,0)); 266 if(fallback){ 267 os.write(" <td bgcolor=#BBBBFF align=center title=\""+name+"\">"+ str+"<br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n"); 268 }else if(name.indexOf("unassigned")!=-1){ 269 os.write(" <td bgcolor=#CCCCCC align=center title=\""+name+"\">"+" <br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n"); 270 }else if(name.indexOf("private")!=-1){ 271 272 273 os.write(" <td bgcolor=#FFBBBB align=center title=\""+name+"\">"+" <br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n"); 274 275 }else{ 276 os.write(" <td align=center title=\""+name+"\">"+ str+"<br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n"); 277 } 278 }else{ 279 os.write(" <td> </td>\n"); 280 } 281 } 282 os.write(" </tr>\n"); 283 } 284 } 285 os.write(footer); 286 os.close(); 287 }catch( Exception e){ 288 e.printStackTrace(); 289 } 290 } 291 static Hashtable table = new Hashtable(); 292 static String getKey(int cp){ 293 int delta = cp & 0xFF; 294 delta-= (delta>0x7f)? 0x80 : 0; 295 //delta+=0x0900; 296 return UTF16.valueOf(delta); 297 } 298 299 public static void writeCharts(Transliterator trans, int start, int index){ 300 301 Transliterator inverse = trans.getInverse(); 302 for(int i=0;i<=0x7f;i++){ 303 String cp = UTF16.valueOf(start+i); 304 String s1 = inverse.transliterate(cp); 305 String s2 = trans.transliterate(s1); 306 307 String[] arr = (String[])table.get(getKey(start+i)); 308 if(cp.equals(s2)){ 309 arr[index] = s1; 310 }else{ 311 arr[index] = s1 + ":FALLBACK"; 312 } 313 } 314 } 315 316 public static void writeIICharts(Transliterator trans,int start, int index){ 317 318 Transliterator inverse = trans.getInverse(); 319 UnicodeSetIterator iter = new UnicodeSetIterator(inter); 320 321 while(iter.next()){ 322 String cp =UTF16.valueOf(iter.codepoint); 323 String s1 = trans.transliterate(cp); 324 String s2 = inverse.transliterate(s1); 325 String[] arr = (String[])table.get(UTF16.valueOf(iter.codepoint&0xFF)); 326 if(cp.equals(s1)){ 327 arr[index] = UTF16.valueOf(start+(((byte)iter.codepoint)&0xFF))+":UNASSIGNED"; 328 }else if(cp.equals(s2)){ 329 arr[index] = s1; 330 }else if(s1.equals(s2)){ 331 if(s1.equals("")){ 332 arr[index] = UTF16.valueOf(start+(((byte)iter.codepoint)&0xFF))+":CONSUMED"; 333 }else{ 334 arr[index] = s1+ ":FALLBACK"; 335 } 336 } else{ 337 if(s2.equals("")){ 338 arr[index] = UTF16.valueOf(start+(((byte)iter.codepoint)&0xFF))+":CONSUMED"; 339 }else{ 340 arr[index] = s1+ ":FALLBACK"; 341 } 342 } 343 } 344 } 345 public static void writeCharts(Transliterator trans, UnicodeSet target, int index){ 346 UnicodeSetIterator tIter = new UnicodeSetIterator(target); 347 Transliterator inverse = trans.getInverse(); 348 while(tIter.next()){ 349 String cp = UTF16.valueOf(tIter.codepoint); 350 String s1 = inverse.transliterate(cp); 351 String s2 = trans.transliterate(s1); 352 353 String[] arr = (String[])table.get(getKey(tIter.codepoint)); 354 if(cp.equals(s2)){ 355 arr[index] = cp; 356 }else{ 357 arr[index] = cp + ":FALLBACK"; 358 } 359 } 360 } 361 } 362 363