Home | History | Annotate | Download | only in translit
      1 /**
      2 *******************************************************************************
      3 * Copyright (C) 2002-2010, International Business Machines Corporation and    *
      4 * others. All Rights Reserved.                                                *
      5 *******************************************************************************
      6 */
      7 package com.ibm.icu.dev.tool.translit;
      8 
      9 import java.io.FileOutputStream;
     10 import java.io.OutputStreamWriter;
     11 import java.util.Hashtable;
     12 
     13 import com.ibm.icu.impl.Utility;
     14 import com.ibm.icu.lang.UCharacter;
     15 import com.ibm.icu.text.Transliterator;
     16 import com.ibm.icu.text.UTF16;
     17 import com.ibm.icu.text.UnicodeSet;
     18 import com.ibm.icu.text.UnicodeSetIterator;
     19 /**
     20  * @author ram
     21  *
     22  * To change this generated comment edit the template variable "typecomment":
     23  * Window>Preferences>Java>Templates.
     24  * To enable and disable the creation of type comments go to
     25  * Window>Preferences>Java>Code Generation.7F
     26  */
     27 public class WriteIndicCharts {
     28 
     29     public static void main(String[] args){
     30         writeIICharts();
     31     }
     32 
     33 
     34     static String header =  "<html>\n" +
     35                             "    <head><meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">"+
     36                             "           Inter-Indic Transliteration Comparison chart"+
     37                             "    </head>\n"+
     38                             "    <body bgcolor=#FFFFFF>\n"+
     39                             "         <table border=1 width=100% >\n"+
     40                             "            <tr>\n"+
     41                             "            <th width=9%>Inter-Indic</th>\n"+
     42                             "            <th width=9%>Latin</th>\n"+
     43                             "            <th width=9%>Devanagari</th>\n"+
     44                             "            <th width=9%>Bengali</th>\n"+
     45                             "            <th width=9%>Gurmukhi</th>\n"+
     46                             "            <th width=9%>Gujarati</th>\n"+
     47                             "            <th width=9%>Oriya</th>\n"+
     48                             "            <th width=9%>Tamil</th>\n"+
     49                             "            <th width=9%>Telugu</th>\n"+
     50                             "            <th width=9%>Kannada</th>\n"+
     51                             "            <th width=9%>Malayalam</th>\n"+
     52                             "            </tr>\n";
     53     static String footer =  "          </table>\n"+
     54                             "    </body>\n" +
     55                             "</html>\n";
     56 
     57     static UnicodeSet deva = new UnicodeSet("[:deva:]");
     58     static UnicodeSet beng = new UnicodeSet("[:beng:]");
     59     static UnicodeSet gujr = new UnicodeSet("[:gujr:]");
     60     static UnicodeSet guru = new UnicodeSet("[:guru:]");
     61     static UnicodeSet orya = new UnicodeSet("[:orya:]");
     62     static UnicodeSet taml = new UnicodeSet("[:taml:]");
     63     static UnicodeSet telu = new UnicodeSet("[:telu:]");
     64     static UnicodeSet knda = new UnicodeSet("[:knda:]");
     65     static UnicodeSet mlym = new UnicodeSet("[:mlym:]");
     66     static UnicodeSet inter= new UnicodeSet("[\uE000-\uE082]");
     67 
     68     public static void writeIICharts(){
     69         try{
     70             Transliterator t1 = Transliterator.getInstance("InterIndic-Bengali");
     71             Transliterator t2 = Transliterator.getInstance("InterIndic-Gurmukhi");
     72             Transliterator t3 = Transliterator.getInstance("InterIndic-Gujarati");
     73             Transliterator t4 = Transliterator.getInstance("InterIndic-Oriya");
     74             Transliterator t5 = Transliterator.getInstance("InterIndic-Tamil");
     75             Transliterator t6 = Transliterator.getInstance("InterIndic-Telugu");
     76             Transliterator t7 = Transliterator.getInstance("InterIndic-Kannada");
     77             Transliterator t8 = Transliterator.getInstance("InterIndic-Malayalam");
     78             Transliterator t9 = Transliterator.getInstance("InterIndic-Devanagari");
     79             Transliterator t10 = Transliterator.getInstance("InterIndic-Latin");
     80             //UnicodeSetIterator sIter = new UnicodeSetIterator(deva);
     81 
     82             for(int i=0x00;i<=0x80;i++){
     83                String[] arr =  new String[10];
     84                arr[0]=UTF16.valueOf(i+ 0xE000);
     85                table.put(UTF16.valueOf(i),arr);
     86             }
     87 
     88             OutputStreamWriter os = new OutputStreamWriter(new FileOutputStream("comparison-chart.html"),"UTF-8");
     89 
     90             os.write(header);
     91 
     92             writeIICharts(t9,0x0900,1);
     93             writeIICharts(t1,0x0980,2);
     94             writeIICharts(t2,0x0A00,3);
     95             writeIICharts(t3,0x0A80,4);
     96             writeIICharts(t4,0x0B00,5);
     97             writeIICharts(t5,0x0B80,6);
     98             writeIICharts(t6,0x0c00,7);
     99             writeIICharts(t7,0x0C80,8);
    100             writeIICharts(t8,0x0D00,9);
    101 
    102             for(int i=0x00;i<=0x80;i++){
    103                 String[] temp = (String[])table.get(UTF16.valueOf(i));
    104                 boolean write = false;
    105                 for(int k=1;k<temp.length && temp[k]!=null;k++){
    106                     if(UCharacter.getExtendedName(UTF16.charAt(temp[k],0)).indexOf("unassigned")<0 ||
    107                        temp[k].indexOf(":UNASSIGNED")<0){
    108                         write = true;
    109                     }
    110                 }
    111                 if(write){
    112                     os.write("        <tr>\n");
    113                     for(int j=0; j<temp.length;j++){
    114                         if(temp[j]!=null){
    115                             boolean fallback=false;
    116                             boolean unassigned=false;
    117                             boolean unmapped = false;
    118                             boolean consumed =false;
    119                             String str = temp[j];
    120 
    121                             if(temp[j].indexOf(":FALLBACK")>=0){
    122                                 str = temp[j].substring(0,temp[j].indexOf(":"));
    123                                 fallback=true;
    124                                // os.write("            <td bgcolor=#FFFF00 align=center title=\""++"\">"+str+"<br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n");
    125                             }
    126                             if(temp[j].indexOf(":UNASSIGNED")>=0){
    127                                 str = temp[j].substring(0,temp[j].indexOf(":"));
    128                                 unassigned=true;
    129                             }
    130 
    131                             if(temp[j].indexOf(":UNMAPPED")>=0){
    132                                 str = temp[j].substring(0,temp[j].indexOf(":"));
    133                                 unmapped=true;
    134                             }
    135                             if(temp[j].indexOf(":CONSUMED")>=0){
    136                                 str = temp[j].substring(0,temp[j].indexOf(":"));
    137                                 consumed=true;
    138                             }
    139 
    140                             String name;
    141                             StringBuffer nameBuf=new StringBuffer();
    142                             for(int f=0; f<str.length();f++){
    143                                 if(f>0){ nameBuf.append("+");}
    144                                 nameBuf.append(UCharacter.getExtendedName(UTF16.charAt(str,f)));
    145                             }
    146                             name = nameBuf.toString();
    147                             if(fallback){
    148 
    149                                 if(UCharacter.getExtendedName(UTF16.charAt(str,0)).indexOf("unassigned")>0){
    150                                     os.write("            <td  width=9% bgcolor=#BBBBFF align=center title=\""+name+"\">"+"&nbsp<br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n");
    151                                 }else{
    152                                     os.write("            <td width=9% bgcolor=#BBBBFF align=center title=\""+name+"\">"+ str+"<br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n");
    153                                 }
    154                             }else if(unmapped){
    155                                 os.write("            <td bgcolor=#FF9999 align=center title=\""+name+"\">"+"&nbsp<br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n");
    156                             }else if(unassigned){
    157                                 if(UCharacter.getExtendedName(UTF16.charAt(str,0)).indexOf("unassigned")>0){
    158                                     os.write("            <td width=9% bgcolor=#00FFFF align=center title=\""+name+"\">"+"&nbsp<br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n");
    159                                 }else{
    160                                     os.write("            <td width=9% bgcolor=#00FFFF align=center title=\""+name+"\">"+ str+"<br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n");
    161                                 }
    162                             }else if(consumed){
    163                                  if(UCharacter.getExtendedName(UTF16.charAt(str,0)).indexOf("unassigned")>0){
    164                                     os.write("            <td width=9% bgcolor=#FFFF55 align=center title=\""+name+"\">"+"&nbsp<br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n");
    165                                 }else{
    166                                     os.write("            <td width=9% bgcolor=#FFFF55 align=center title=\""+""+"\">"+"&nbsp<br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n");
    167                                 }
    168                             }else if(name.indexOf("private")!=-1){
    169                                 String s = t10.transliterate(str);
    170                                 os.write("            <td width=9% bgcolor=#FFBBBB  align=center title=\""+name+"\">"+"&nbsp<br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n");
    171                                 if(!s.equals(str)){
    172                                     os.write("            <td width=9%  bgcolor=#CCEEDD align=center>"+s +"</td>");
    173                                 }else{
    174                                     os.write("            <td width=9% bgcolor=#CCEEDD align=center>&nbsp;</td>");
    175                                 }
    176                             }else{
    177                                os.write("            <td width=9% align=center title=\""+name+"\">"+ str+"<br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n");
    178                             }
    179                         }else{
    180                              os.write("           <td width=9% >&nbsp</td>\n");
    181                         }
    182                     }
    183                     os.write("        </tr>\n");
    184                 }
    185             }
    186             os.write(footer);
    187             os.close();
    188         }catch( Exception e){
    189             e.printStackTrace();
    190         }
    191     }
    192     public static void writeCharts(){
    193         try{
    194             Transliterator t1 = Transliterator.getInstance("InterIndic-Bengali");
    195             Transliterator t2 = Transliterator.getInstance("InterIndic-Gurmukhi");
    196             Transliterator t3 = Transliterator.getInstance("InterIndic-Gujarati");
    197             Transliterator t4 = Transliterator.getInstance("InterIndic-Oriya");
    198             Transliterator t5 = Transliterator.getInstance("InterIndic-Tamil");
    199             Transliterator t6 = Transliterator.getInstance("InterIndic-Telugu");
    200             Transliterator t7 = Transliterator.getInstance("InterIndic-Kannada");
    201             Transliterator t8 = Transliterator.getInstance("InterIndic-Malayalam");
    202             Transliterator t9 = Transliterator.getInstance("InterIndic-Devanagari");
    203 
    204             //UnicodeSetIterator sIter = new UnicodeSetIterator(deva);
    205 
    206             for(int i=0x0900;i<=0x097F;i++){
    207                String[] arr =  new String[10];
    208                arr[0]=UTF16.valueOf((i&0xFF) + 0xE000);
    209                table.put(UTF16.valueOf(i),arr);
    210             }
    211 
    212             OutputStreamWriter os = new OutputStreamWriter(new FileOutputStream("comparison-chart.html"),"UTF-8");
    213 
    214             os.write(header);
    215             /*
    216             writeCharts(t1,beng,1);
    217             writeCharts(t2,guru,2);
    218             writeCharts(t3,gujr,3);
    219             writeCharts(t4,orya,4);
    220             writeCharts(t5,taml,5);
    221             writeCharts(t6,telu,6);
    222             writeCharts(t7,knda,7);
    223             writeCharts(t8,mlym,8);
    224             */
    225             /*
    226             writeCharts(t9,0x0900,1);
    227             writeCharts(t1,0x0980,2);
    228             writeCharts(t2,0x0A00,3);
    229             writeCharts(t3,0x0A80,4);
    230             writeCharts(t4,0x0B00,5);
    231             writeCharts(t5,0x0B80,6);
    232             writeCharts(t6,0x0c00,7);
    233             writeCharts(t7,0x0C80,8);
    234             writeCharts(t8,0x0D00,9);
    235             */
    236             writeIICharts(t9,0x0900,1);
    237             writeIICharts(t1,0x0980,2);
    238             writeIICharts(t2,0x0A00,3);
    239             writeIICharts(t3,0x0A80,4);
    240             writeIICharts(t4,0x0B00,5);
    241             writeIICharts(t5,0x0B80,6);
    242             writeIICharts(t6,0x0c00,7);
    243             writeIICharts(t7,0x0C80,8);
    244             writeIICharts(t8,0x0D00,9);
    245             for(int i=0x0900;i<=0x097F;i++){
    246                 String[] temp = (String[])table.get(UTF16.valueOf(i));
    247                 boolean write = false;
    248                 for(int k=1;k<temp.length;k++){
    249                     if(UCharacter.getExtendedName(UTF16.charAt(temp[k],0)).indexOf("unassigned")<0){
    250                         write = true;
    251                     }
    252                 }
    253                 if(write){
    254                     os.write("        <tr>\n");
    255                     for(int j=0; j<temp.length;j++){
    256                         if(temp[j]!=null){
    257                             boolean fallback=false;
    258                             String str = temp[j];
    259 
    260                             if(temp[j].indexOf(":FALLBACK")>=0){
    261                                 str = temp[j].substring(0,temp[j].indexOf(":"));
    262                                 fallback=true;
    263                                // os.write("            <td bgcolor=#FFFF00 align=center title=\""++"\">"+str+"<br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n");
    264                             }
    265                             String name = UCharacter.getExtendedName(UTF16.charAt(str,0));
    266                             if(fallback){
    267                                 os.write("            <td bgcolor=#BBBBFF align=center title=\""+name+"\">"+ str+"<br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n");
    268                             }else if(name.indexOf("unassigned")!=-1){
    269                                 os.write("            <td bgcolor=#CCCCCC align=center title=\""+name+"\">"+"&nbsp<br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n");
    270                             }else if(name.indexOf("private")!=-1){
    271 
    272 
    273                                 os.write("            <td bgcolor=#FFBBBB align=center title=\""+name+"\">"+"&nbsp<br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n");
    274 
    275                             }else{
    276                                os.write("            <td align=center title=\""+name+"\">"+ str+"<br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n");
    277                             }
    278                         }else{
    279                              os.write("           <td>&nbsp</td>\n");
    280                         }
    281                     }
    282                     os.write("        </tr>\n");
    283                 }
    284             }
    285             os.write(footer);
    286             os.close();
    287         }catch( Exception e){
    288             e.printStackTrace();
    289         }
    290     }
    291     static Hashtable table = new Hashtable();
    292     static String getKey(int cp){
    293         int delta = cp & 0xFF;
    294         delta-= (delta>0x7f)? 0x80 : 0;
    295         //delta+=0x0900;
    296         return UTF16.valueOf(delta);
    297     }
    298 
    299     public static void writeCharts(Transliterator trans, int start, int index){
    300 
    301         Transliterator inverse = trans.getInverse();
    302         for(int i=0;i<=0x7f;i++){
    303             String cp = UTF16.valueOf(start+i);
    304             String s1 = inverse.transliterate(cp);
    305             String s2 = trans.transliterate(s1);
    306 
    307             String[] arr = (String[])table.get(getKey(start+i));
    308             if(cp.equals(s2)){
    309                 arr[index] = s1;
    310             }else{
    311                 arr[index] = s1 + ":FALLBACK";
    312             }
    313         }
    314     }
    315 
    316     public static void writeIICharts(Transliterator trans,int start, int index){
    317 
    318         Transliterator inverse = trans.getInverse();
    319         UnicodeSetIterator iter = new UnicodeSetIterator(inter);
    320 
    321         while(iter.next()){
    322             String cp =UTF16.valueOf(iter.codepoint);
    323             String s1 = trans.transliterate(cp);
    324             String s2 = inverse.transliterate(s1);
    325             String[] arr = (String[])table.get(UTF16.valueOf(iter.codepoint&0xFF));
    326             if(cp.equals(s1)){
    327                 arr[index] = UTF16.valueOf(start+(((byte)iter.codepoint)&0xFF))+":UNASSIGNED";
    328             }else if(cp.equals(s2)){
    329                 arr[index] = s1;
    330             }else if(s1.equals(s2)){
    331                 if(s1.equals("")){
    332                     arr[index] = UTF16.valueOf(start+(((byte)iter.codepoint)&0xFF))+":CONSUMED";
    333                 }else{
    334                     arr[index] = s1+ ":FALLBACK";
    335                 }
    336             } else{
    337                 if(s2.equals("")){
    338                     arr[index] = UTF16.valueOf(start+(((byte)iter.codepoint)&0xFF))+":CONSUMED";
    339                 }else{
    340                     arr[index] = s1+ ":FALLBACK";
    341                 }
    342             }
    343         }
    344     }
    345     public static void writeCharts(Transliterator trans, UnicodeSet target, int index){
    346         UnicodeSetIterator tIter = new UnicodeSetIterator(target);
    347         Transliterator inverse = trans.getInverse();
    348         while(tIter.next()){
    349             String cp = UTF16.valueOf(tIter.codepoint);
    350             String s1 = inverse.transliterate(cp);
    351             String s2 = trans.transliterate(s1);
    352 
    353             String[] arr = (String[])table.get(getKey(tIter.codepoint));
    354             if(cp.equals(s2)){
    355                 arr[index] = cp;
    356             }else{
    357                 arr[index] = cp + ":FALLBACK";
    358             }
    359         }
    360     }
    361 }
    362 
    363