Home | History | Annotate | Download | only in translit
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html#License
      3 /**
      4 *******************************************************************************
      5 * Copyright (C) 2002-2010, International Business Machines Corporation and    *
      6 * others. All Rights Reserved.                                                *
      7 *******************************************************************************
      8 */
      9 package com.ibm.icu.dev.tool.translit;
     10 
     11 import java.io.FileOutputStream;
     12 import java.io.OutputStreamWriter;
     13 import java.util.Hashtable;
     14 
     15 import com.ibm.icu.impl.Utility;
     16 import com.ibm.icu.lang.UCharacter;
     17 import com.ibm.icu.text.Transliterator;
     18 import com.ibm.icu.text.UTF16;
     19 import com.ibm.icu.text.UnicodeSet;
     20 import com.ibm.icu.text.UnicodeSetIterator;
     21 /**
     22  * @author ram
     23  *
     24  * To change this generated comment edit the template variable "typecomment":
     25  * Window>Preferences>Java>Templates.
     26  * To enable and disable the creation of type comments go to
     27  * Window>Preferences>Java>Code Generation.7F
     28  */
     29 public class WriteIndicCharts {
     30 
     31     public static void main(String[] args){
     32         writeIICharts();
     33     }
     34 
     35 
     36     static String header =  "<html>\n" +
     37                             "    <head><meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">"+
     38                             "           Inter-Indic Transliteration Comparison chart"+
     39                             "    </head>\n"+
     40                             "    <body bgcolor=#FFFFFF>\n"+
     41                             "         <table border=1 width=100% >\n"+
     42                             "            <tr>\n"+
     43                             "            <th width=9%>Inter-Indic</th>\n"+
     44                             "            <th width=9%>Latin</th>\n"+
     45                             "            <th width=9%>Devanagari</th>\n"+
     46                             "            <th width=9%>Bengali</th>\n"+
     47                             "            <th width=9%>Gurmukhi</th>\n"+
     48                             "            <th width=9%>Gujarati</th>\n"+
     49                             "            <th width=9%>Oriya</th>\n"+
     50                             "            <th width=9%>Tamil</th>\n"+
     51                             "            <th width=9%>Telugu</th>\n"+
     52                             "            <th width=9%>Kannada</th>\n"+
     53                             "            <th width=9%>Malayalam</th>\n"+
     54                             "            </tr>\n";
     55     static String footer =  "          </table>\n"+
     56                             "    </body>\n" +
     57                             "</html>\n";
     58 
     59     static UnicodeSet deva = new UnicodeSet("[:deva:]");
     60     static UnicodeSet beng = new UnicodeSet("[:beng:]");
     61     static UnicodeSet gujr = new UnicodeSet("[:gujr:]");
     62     static UnicodeSet guru = new UnicodeSet("[:guru:]");
     63     static UnicodeSet orya = new UnicodeSet("[:orya:]");
     64     static UnicodeSet taml = new UnicodeSet("[:taml:]");
     65     static UnicodeSet telu = new UnicodeSet("[:telu:]");
     66     static UnicodeSet knda = new UnicodeSet("[:knda:]");
     67     static UnicodeSet mlym = new UnicodeSet("[:mlym:]");
     68     static UnicodeSet inter= new UnicodeSet("[\uE000-\uE082]");
     69 
     70     public static void writeIICharts(){
     71         try{
     72             Transliterator t1 = Transliterator.getInstance("InterIndic-Bengali");
     73             Transliterator t2 = Transliterator.getInstance("InterIndic-Gurmukhi");
     74             Transliterator t3 = Transliterator.getInstance("InterIndic-Gujarati");
     75             Transliterator t4 = Transliterator.getInstance("InterIndic-Oriya");
     76             Transliterator t5 = Transliterator.getInstance("InterIndic-Tamil");
     77             Transliterator t6 = Transliterator.getInstance("InterIndic-Telugu");
     78             Transliterator t7 = Transliterator.getInstance("InterIndic-Kannada");
     79             Transliterator t8 = Transliterator.getInstance("InterIndic-Malayalam");
     80             Transliterator t9 = Transliterator.getInstance("InterIndic-Devanagari");
     81             Transliterator t10 = Transliterator.getInstance("InterIndic-Latin");
     82             //UnicodeSetIterator sIter = new UnicodeSetIterator(deva);
     83 
     84             for(int i=0x00;i<=0x80;i++){
     85                String[] arr =  new String[10];
     86                arr[0]=UTF16.valueOf(i+ 0xE000);
     87                table.put(UTF16.valueOf(i),arr);
     88             }
     89 
     90             OutputStreamWriter os = new OutputStreamWriter(new FileOutputStream("comparison-chart.html"),"UTF-8");
     91 
     92             os.write(header);
     93 
     94             writeIICharts(t9,0x0900,1);
     95             writeIICharts(t1,0x0980,2);
     96             writeIICharts(t2,0x0A00,3);
     97             writeIICharts(t3,0x0A80,4);
     98             writeIICharts(t4,0x0B00,5);
     99             writeIICharts(t5,0x0B80,6);
    100             writeIICharts(t6,0x0c00,7);
    101             writeIICharts(t7,0x0C80,8);
    102             writeIICharts(t8,0x0D00,9);
    103 
    104             for(int i=0x00;i<=0x80;i++){
    105                 String[] temp = (String[])table.get(UTF16.valueOf(i));
    106                 boolean write = false;
    107                 for(int k=1;k<temp.length && temp[k]!=null;k++){
    108                     if(UCharacter.getExtendedName(UTF16.charAt(temp[k],0)).indexOf("unassigned")<0 ||
    109                        temp[k].indexOf(":UNASSIGNED")<0){
    110                         write = true;
    111                     }
    112                 }
    113                 if(write){
    114                     os.write("        <tr>\n");
    115                     for(int j=0; j<temp.length;j++){
    116                         if(temp[j]!=null){
    117                             boolean fallback=false;
    118                             boolean unassigned=false;
    119                             boolean unmapped = false;
    120                             boolean consumed =false;
    121                             String str = temp[j];
    122 
    123                             if(temp[j].indexOf(":FALLBACK")>=0){
    124                                 str = temp[j].substring(0,temp[j].indexOf(":"));
    125                                 fallback=true;
    126                                // os.write("            <td bgcolor=#FFFF00 align=center title=\""++"\">"+str+"<br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n");
    127                             }
    128                             if(temp[j].indexOf(":UNASSIGNED")>=0){
    129                                 str = temp[j].substring(0,temp[j].indexOf(":"));
    130                                 unassigned=true;
    131                             }
    132 
    133                             if(temp[j].indexOf(":UNMAPPED")>=0){
    134                                 str = temp[j].substring(0,temp[j].indexOf(":"));
    135                                 unmapped=true;
    136                             }
    137                             if(temp[j].indexOf(":CONSUMED")>=0){
    138                                 str = temp[j].substring(0,temp[j].indexOf(":"));
    139                                 consumed=true;
    140                             }
    141 
    142                             String name;
    143                             StringBuffer nameBuf=new StringBuffer();
    144                             for(int f=0; f<str.length();f++){
    145                                 if(f>0){ nameBuf.append("+");}
    146                                 nameBuf.append(UCharacter.getExtendedName(UTF16.charAt(str,f)));
    147                             }
    148                             name = nameBuf.toString();
    149                             if(fallback){
    150 
    151                                 if(UCharacter.getExtendedName(UTF16.charAt(str,0)).indexOf("unassigned")>0){
    152                                     os.write("            <td  width=9% bgcolor=#BBBBFF align=center title=\""+name+"\">"+"&nbsp<br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n");
    153                                 }else{
    154                                     os.write("            <td width=9% bgcolor=#BBBBFF align=center title=\""+name+"\">"+ str+"<br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n");
    155                                 }
    156                             }else if(unmapped){
    157                                 os.write("            <td bgcolor=#FF9999 align=center title=\""+name+"\">"+"&nbsp<br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n");
    158                             }else if(unassigned){
    159                                 if(UCharacter.getExtendedName(UTF16.charAt(str,0)).indexOf("unassigned")>0){
    160                                     os.write("            <td width=9% bgcolor=#00FFFF align=center title=\""+name+"\">"+"&nbsp<br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n");
    161                                 }else{
    162                                     os.write("            <td width=9% bgcolor=#00FFFF align=center title=\""+name+"\">"+ str+"<br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n");
    163                                 }
    164                             }else if(consumed){
    165                                  if(UCharacter.getExtendedName(UTF16.charAt(str,0)).indexOf("unassigned")>0){
    166                                     os.write("            <td width=9% bgcolor=#FFFF55 align=center title=\""+name+"\">"+"&nbsp<br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n");
    167                                 }else{
    168                                     os.write("            <td width=9% bgcolor=#FFFF55 align=center title=\""+""+"\">"+"&nbsp<br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n");
    169                                 }
    170                             }else if(name.indexOf("private")!=-1){
    171                                 String s = t10.transliterate(str);
    172                                 os.write("            <td width=9% bgcolor=#FFBBBB  align=center title=\""+name+"\">"+"&nbsp<br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n");
    173                                 if(!s.equals(str)){
    174                                     os.write("            <td width=9%  bgcolor=#CCEEDD align=center>"+s +"</td>");
    175                                 }else{
    176                                     os.write("            <td width=9% bgcolor=#CCEEDD align=center>&nbsp;</td>");
    177                                 }
    178                             }else{
    179                                os.write("            <td width=9% align=center title=\""+name+"\">"+ str+"<br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n");
    180                             }
    181                         }else{
    182                              os.write("           <td width=9% >&nbsp</td>\n");
    183                         }
    184                     }
    185                     os.write("        </tr>\n");
    186                 }
    187             }
    188             os.write(footer);
    189             os.close();
    190         }catch( Exception e){
    191             e.printStackTrace();
    192         }
    193     }
    194     public static void writeCharts(){
    195         try{
    196             Transliterator t1 = Transliterator.getInstance("InterIndic-Bengali");
    197             Transliterator t2 = Transliterator.getInstance("InterIndic-Gurmukhi");
    198             Transliterator t3 = Transliterator.getInstance("InterIndic-Gujarati");
    199             Transliterator t4 = Transliterator.getInstance("InterIndic-Oriya");
    200             Transliterator t5 = Transliterator.getInstance("InterIndic-Tamil");
    201             Transliterator t6 = Transliterator.getInstance("InterIndic-Telugu");
    202             Transliterator t7 = Transliterator.getInstance("InterIndic-Kannada");
    203             Transliterator t8 = Transliterator.getInstance("InterIndic-Malayalam");
    204             Transliterator t9 = Transliterator.getInstance("InterIndic-Devanagari");
    205 
    206             //UnicodeSetIterator sIter = new UnicodeSetIterator(deva);
    207 
    208             for(int i=0x0900;i<=0x097F;i++){
    209                String[] arr =  new String[10];
    210                arr[0]=UTF16.valueOf((i&0xFF) + 0xE000);
    211                table.put(UTF16.valueOf(i),arr);
    212             }
    213 
    214             OutputStreamWriter os = new OutputStreamWriter(new FileOutputStream("comparison-chart.html"),"UTF-8");
    215 
    216             os.write(header);
    217             /*
    218             writeCharts(t1,beng,1);
    219             writeCharts(t2,guru,2);
    220             writeCharts(t3,gujr,3);
    221             writeCharts(t4,orya,4);
    222             writeCharts(t5,taml,5);
    223             writeCharts(t6,telu,6);
    224             writeCharts(t7,knda,7);
    225             writeCharts(t8,mlym,8);
    226             */
    227             /*
    228             writeCharts(t9,0x0900,1);
    229             writeCharts(t1,0x0980,2);
    230             writeCharts(t2,0x0A00,3);
    231             writeCharts(t3,0x0A80,4);
    232             writeCharts(t4,0x0B00,5);
    233             writeCharts(t5,0x0B80,6);
    234             writeCharts(t6,0x0c00,7);
    235             writeCharts(t7,0x0C80,8);
    236             writeCharts(t8,0x0D00,9);
    237             */
    238             writeIICharts(t9,0x0900,1);
    239             writeIICharts(t1,0x0980,2);
    240             writeIICharts(t2,0x0A00,3);
    241             writeIICharts(t3,0x0A80,4);
    242             writeIICharts(t4,0x0B00,5);
    243             writeIICharts(t5,0x0B80,6);
    244             writeIICharts(t6,0x0c00,7);
    245             writeIICharts(t7,0x0C80,8);
    246             writeIICharts(t8,0x0D00,9);
    247             for(int i=0x0900;i<=0x097F;i++){
    248                 String[] temp = (String[])table.get(UTF16.valueOf(i));
    249                 boolean write = false;
    250                 for(int k=1;k<temp.length;k++){
    251                     if(UCharacter.getExtendedName(UTF16.charAt(temp[k],0)).indexOf("unassigned")<0){
    252                         write = true;
    253                     }
    254                 }
    255                 if(write){
    256                     os.write("        <tr>\n");
    257                     for(int j=0; j<temp.length;j++){
    258                         if(temp[j]!=null){
    259                             boolean fallback=false;
    260                             String str = temp[j];
    261 
    262                             if(temp[j].indexOf(":FALLBACK")>=0){
    263                                 str = temp[j].substring(0,temp[j].indexOf(":"));
    264                                 fallback=true;
    265                                // os.write("            <td bgcolor=#FFFF00 align=center title=\""++"\">"+str+"<br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n");
    266                             }
    267                             String name = UCharacter.getExtendedName(UTF16.charAt(str,0));
    268                             if(fallback){
    269                                 os.write("            <td bgcolor=#BBBBFF align=center title=\""+name+"\">"+ str+"<br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n");
    270                             }else if(name.indexOf("unassigned")!=-1){
    271                                 os.write("            <td bgcolor=#CCCCCC align=center title=\""+name+"\">"+"&nbsp<br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n");
    272                             }else if(name.indexOf("private")!=-1){
    273 
    274 
    275                                 os.write("            <td bgcolor=#FFBBBB align=center title=\""+name+"\">"+"&nbsp<br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n");
    276 
    277                             }else{
    278                                os.write("            <td align=center title=\""+name+"\">"+ str+"<br><tt>"+Utility.hex(str)+"</tt>"+"</td>\n");
    279                             }
    280                         }else{
    281                              os.write("           <td>&nbsp</td>\n");
    282                         }
    283                     }
    284                     os.write("        </tr>\n");
    285                 }
    286             }
    287             os.write(footer);
    288             os.close();
    289         }catch( Exception e){
    290             e.printStackTrace();
    291         }
    292     }
    293     static Hashtable table = new Hashtable();
    294     static String getKey(int cp){
    295         int delta = cp & 0xFF;
    296         delta-= (delta>0x7f)? 0x80 : 0;
    297         //delta+=0x0900;
    298         return UTF16.valueOf(delta);
    299     }
    300 
    301     public static void writeCharts(Transliterator trans, int start, int index){
    302 
    303         Transliterator inverse = trans.getInverse();
    304         for(int i=0;i<=0x7f;i++){
    305             String cp = UTF16.valueOf(start+i);
    306             String s1 = inverse.transliterate(cp);
    307             String s2 = trans.transliterate(s1);
    308 
    309             String[] arr = (String[])table.get(getKey(start+i));
    310             if(cp.equals(s2)){
    311                 arr[index] = s1;
    312             }else{
    313                 arr[index] = s1 + ":FALLBACK";
    314             }
    315         }
    316     }
    317 
    318     public static void writeIICharts(Transliterator trans,int start, int index){
    319 
    320         Transliterator inverse = trans.getInverse();
    321         UnicodeSetIterator iter = new UnicodeSetIterator(inter);
    322 
    323         while(iter.next()){
    324             String cp =UTF16.valueOf(iter.codepoint);
    325             String s1 = trans.transliterate(cp);
    326             String s2 = inverse.transliterate(s1);
    327             String[] arr = (String[])table.get(UTF16.valueOf(iter.codepoint&0xFF));
    328             if(cp.equals(s1)){
    329                 arr[index] = UTF16.valueOf(start+(((byte)iter.codepoint)&0xFF))+":UNASSIGNED";
    330             }else if(cp.equals(s2)){
    331                 arr[index] = s1;
    332             }else if(s1.equals(s2)){
    333                 if(s1.equals("")){
    334                     arr[index] = UTF16.valueOf(start+(((byte)iter.codepoint)&0xFF))+":CONSUMED";
    335                 }else{
    336                     arr[index] = s1+ ":FALLBACK";
    337                 }
    338             } else{
    339                 if(s2.equals("")){
    340                     arr[index] = UTF16.valueOf(start+(((byte)iter.codepoint)&0xFF))+":CONSUMED";
    341                 }else{
    342                     arr[index] = s1+ ":FALLBACK";
    343                 }
    344             }
    345         }
    346     }
    347     public static void writeCharts(Transliterator trans, UnicodeSet target, int index){
    348         UnicodeSetIterator tIter = new UnicodeSetIterator(target);
    349         Transliterator inverse = trans.getInverse();
    350         while(tIter.next()){
    351             String cp = UTF16.valueOf(tIter.codepoint);
    352             String s1 = inverse.transliterate(cp);
    353             String s2 = trans.transliterate(s1);
    354 
    355             String[] arr = (String[])table.get(getKey(tIter.codepoint));
    356             if(cp.equals(s2)){
    357                 arr[index] = cp;
    358             }else{
    359                 arr[index] = cp + ":FALLBACK";
    360             }
    361         }
    362     }
    363 }
    364 
    365