Home | History | Annotate | Download | only in stringprep
      1 /*
      2  *******************************************************************************
      3  * Copyright (C) 2003-2010, International Business Machines Corporation and    *
      4  * others. All Rights Reserved.                                                *
      5  *******************************************************************************
      6 */
      7 package com.ibm.icu.dev.test.stringprep;
      8 
      9 
     10 import java.lang.reflect.InvocationTargetException;
     11 import java.lang.reflect.Method;
     12 
     13 import com.ibm.icu.impl.ICUResourceBundle;
     14 import com.ibm.icu.lang.UCharacter;
     15 import com.ibm.icu.lang.UCharacterDirection;
     16 import com.ibm.icu.text.StringPrepParseException;
     17 import com.ibm.icu.text.UCharacterIterator;
     18 import com.ibm.icu.text.UnicodeSet;
     19 
     20 /**
     21  * @author ram
     22  *
     23  * To change the template for this generated type comment go to
     24  * Window>Preferences>Java>Code Generation>Code and Comments
     25  */
     26 public class NamePrepTransform {
     27 
     28     private static final NamePrepTransform transform = new NamePrepTransform();
     29 
     30     private UnicodeSet labelSeparatorSet;
     31     private UnicodeSet prohibitedSet;
     32     private UnicodeSet unassignedSet;
     33     private MapTransform mapTransform;
     34     public static final int NONE = 0;
     35     public static final int ALLOW_UNASSIGNED = 1;
     36 
     37     private NamePrepTransform(){
     38         // load the resource bundle
     39         ICUResourceBundle bundle = (ICUResourceBundle)ICUResourceBundle.getBundleInstance("com/ibm/icu/dev/data/testdata","idna_rules", NamePrepTransform.class.getClassLoader(), true);
     40         String  mapRules      = bundle.getString("MapNoNormalization");
     41         mapRules             += bundle.getString("MapNFKC");
     42         // disable
     43         mapTransform          = new MapTransform("CaseMap", mapRules, 0 /*Transliterator.FORWARD*/);
     44         labelSeparatorSet     = new UnicodeSet(bundle.getString("LabelSeparatorSet"));
     45         prohibitedSet         = new UnicodeSet(bundle.getString("ProhibitedSet"));
     46         unassignedSet         = new UnicodeSet(bundle.getString("UnassignedSet"));
     47     }
     48 
     49     public static final NamePrepTransform getInstance(){
     50         return transform;
     51     }
     52     public static boolean isLabelSeparator(int ch){
     53         return transform.labelSeparatorSet.contains(ch);
     54     }
     55 
     56      /*
     57        1) Map -- For each character in the input, check if it has a mapping
     58           and, if so, replace it with its mapping.
     59 
     60        2) Normalize -- Possibly normalize the result of step 1 using Unicode
     61           normalization.
     62 
     63        3) Prohibit -- Check for any characters that are not allowed in the
     64           output.  If any are found, return an error.
     65 
     66        4) Check bidi -- Possibly check for right-to-left characters, and if
     67           any are found, make sure that the whole string satisfies the
     68           requirements for bidirectional strings.  If the string does not
     69           satisfy the requirements for bidirectional strings, return an
     70           error.
     71           [Unicode3.2] defines several bidirectional categories; each character
     72            has one bidirectional category assigned to it.  For the purposes of
     73            the requirements below, an "RandALCat character" is a character that
     74            has Unicode bidirectional categories "R" or "AL"; an "LCat character"
     75            is a character that has Unicode bidirectional category "L".  Note
     76 
     77 
     78            that there are many characters which fall in neither of the above
     79            definitions; Latin digits (<U+0030> through <U+0039>) are examples of
     80            this because they have bidirectional category "EN".
     81 
     82            In any profile that specifies bidirectional character handling, all
     83            three of the following requirements MUST be met:
     84 
     85            1) The characters in section 5.8 MUST be prohibited.
     86 
     87            2) If a string contains any RandALCat character, the string MUST NOT
     88               contain any LCat character.
     89 
     90            3) If a string contains any RandALCat character, a RandALCat
     91               character MUST be the first character of the string, and a
     92               RandALCat character MUST be the last character of the string.
     93     */
     94 
     95     public boolean isReady() {
     96         return mapTransform.isReady();
     97     }
     98 
     99     public StringBuffer prepare(UCharacterIterator src,
    100                                        int options)
    101                                        throws StringPrepParseException{
    102              return prepare(src.getText(),options);
    103     }
    104 
    105     private String map ( String src, int options)
    106                                 throws StringPrepParseException{
    107         // map
    108         boolean allowUnassigned =  ((options & ALLOW_UNASSIGNED)>0);
    109         // disable test
    110         String caseMapOut = mapTransform.transliterate(src);
    111         UCharacterIterator iter = UCharacterIterator.getInstance(caseMapOut);
    112         int ch;
    113         while((ch=iter.nextCodePoint())!=UCharacterIterator.DONE){
    114             if(transform.unassignedSet.contains(ch)==true && allowUnassigned ==false){
    115                 throw new StringPrepParseException("An unassigned code point was found in the input",
    116                                          StringPrepParseException.UNASSIGNED_ERROR);
    117             }
    118         }
    119         return caseMapOut;
    120     }
    121     public StringBuffer prepare(String src,int options)
    122                                    throws StringPrepParseException{
    123 
    124         int ch;
    125         String mapOut = map(src,options);
    126         UCharacterIterator iter = UCharacterIterator.getInstance(mapOut);
    127 
    128         int direction=UCharacterDirection.CHAR_DIRECTION_COUNT,
    129             firstCharDir=UCharacterDirection.CHAR_DIRECTION_COUNT;
    130         int rtlPos=-1, ltrPos=-1;
    131         boolean rightToLeft=false, leftToRight=false;
    132 
    133         while((ch=iter.nextCodePoint())!= UCharacterIterator.DONE){
    134 
    135 
    136             if(transform.prohibitedSet.contains(ch)==true && ch!=0x0020){
    137                 throw new StringPrepParseException("A prohibited code point was found in the input",
    138                                          StringPrepParseException.PROHIBITED_ERROR,
    139                                          iter.getText(),iter.getIndex());
    140             }
    141 
    142             direction = UCharacter.getDirection(ch);
    143             if(firstCharDir == UCharacterDirection.CHAR_DIRECTION_COUNT){
    144                 firstCharDir = direction;
    145             }
    146             if(direction == UCharacterDirection.LEFT_TO_RIGHT){
    147                 leftToRight = true;
    148                 ltrPos = iter.getIndex()-1;
    149             }
    150             if(direction == UCharacterDirection.RIGHT_TO_LEFT || direction == UCharacterDirection.RIGHT_TO_LEFT_ARABIC){
    151                 rightToLeft = true;
    152                 rtlPos = iter.getIndex()-1;
    153             }
    154         }
    155 
    156         // satisfy 2
    157         if( leftToRight == true && rightToLeft == true){
    158             throw new StringPrepParseException("The input does not conform to the rules for BiDi code points.",
    159                                      StringPrepParseException.CHECK_BIDI_ERROR,iter.getText(),(rtlPos>ltrPos) ? rtlPos : ltrPos);
    160         }
    161 
    162         //satisfy 3
    163         if( rightToLeft == true &&
    164             !((firstCharDir == UCharacterDirection.RIGHT_TO_LEFT || firstCharDir == UCharacterDirection.RIGHT_TO_LEFT_ARABIC) &&
    165             (direction == UCharacterDirection.RIGHT_TO_LEFT || direction == UCharacterDirection.RIGHT_TO_LEFT_ARABIC))
    166            ){
    167             throw new StringPrepParseException("The input does not conform to the rules for BiDi code points.",
    168                                       StringPrepParseException.CHECK_BIDI_ERROR,iter.getText(),(rtlPos>ltrPos) ? rtlPos : ltrPos);
    169         }
    170 
    171         return new StringBuffer(mapOut);
    172 
    173     }
    174 
    175     private static class MapTransform {
    176         private Object translitInstance;
    177         private Method translitMethod;
    178         private boolean isReady;
    179 
    180         MapTransform(String id, String rule, int direction) {
    181             isReady = initialize(id, rule, direction);
    182         }
    183 
    184         boolean initialize(String id, String rule, int direction) {
    185             try {
    186                 Class cls = Class.forName("com.ibm.icu.text.Transliterator");
    187                 Method createMethod = cls.getMethod("createFromRules", String.class, String.class, Integer.TYPE);
    188                 translitInstance = createMethod.invoke(null, id, rule, Integer.valueOf(direction));
    189                 translitMethod = cls.getMethod("transliterate", String.class);
    190             } catch (Throwable e) {
    191                 return false;
    192             }
    193             return true;
    194         }
    195 
    196         boolean isReady() {
    197             return isReady;
    198         }
    199 
    200         String transliterate(String text) {
    201             if (!isReady) {
    202                 throw new IllegalStateException("Transliterator is not ready");
    203             }
    204             String result = null;
    205             try {
    206                 result = (String)translitMethod.invoke(translitInstance, text);
    207             } catch (InvocationTargetException ite) {
    208                 throw new RuntimeException(ite);
    209             } catch (IllegalAccessException iae) {
    210                 throw new RuntimeException(iae);
    211             }
    212             return result;
    213         }
    214     }
    215 }
    216