Home | History | Annotate | Download | only in text
      1 /* GENERATED SOURCE. DO NOT MODIFY. */
      2 //  2016 and later: Unicode, Inc. and others.
      3 // License & terms of use: http://www.unicode.org/copyright.html#License
      4 /*
      5  * Copyright (C) 1996-2011, International Business Machines Corporation and
      6  * others. All Rights Reserved.
      7  */
      8 package android.icu.text;
      9 import android.icu.impl.PatternProps;
     10 import android.icu.impl.UCharacterName;
     11 import android.icu.impl.Utility;
     12 import android.icu.lang.UCharacter;
     13 
     14 /**
     15  * A transliterator that performs name to character mapping.
     16  * @author Alan Liu
     17  */
     18 class NameUnicodeTransliterator extends Transliterator {
     19 
     20     static final String _ID = "Name-Any";
     21 
     22     static final String OPEN_PAT    = "\\N~{~";
     23     static final char   OPEN_DELIM  = '\\'; // first char of OPEN_PAT
     24     static final char   CLOSE_DELIM = '}';
     25     static final char   SPACE       = ' ';
     26 
     27 
     28     /**
     29      * System registration hook.
     30      */
     31     static void register() {
     32         Transliterator.registerFactory(_ID, new Transliterator.Factory() {
     33             @Override
     34             public Transliterator getInstance(String ID) {
     35                 return new NameUnicodeTransliterator(null);
     36             }
     37         });
     38     }
     39 
     40     /**
     41      * Constructs a transliterator.
     42      */
     43     public NameUnicodeTransliterator(UnicodeFilter filter) {
     44         super(_ID, filter);
     45     }
     46 
     47     /**
     48      * Implements {@link Transliterator#handleTransliterate}.
     49      */
     50     @Override
     51     protected void handleTransliterate(Replaceable text,
     52                                        Position offsets, boolean isIncremental) {
     53 
     54         int maxLen = UCharacterName.INSTANCE.getMaxCharNameLength() + 1; // allow for temporary trailing space
     55 
     56         StringBuffer name = new StringBuffer(maxLen);
     57 
     58         // Get the legal character set
     59         UnicodeSet legal = new UnicodeSet();
     60         UCharacterName.INSTANCE.getCharNameCharacters(legal);
     61 
     62         int cursor = offsets.start;
     63         int limit = offsets.limit;
     64 
     65         // Modes:
     66         // 0 - looking for open delimiter
     67         // 1 - after open delimiter
     68         int mode = 0;
     69         int openPos = -1; // open delim candidate pos
     70 
     71         int c;
     72         while (cursor < limit) {
     73             c = text.char32At(cursor);
     74 
     75             switch (mode) {
     76             case 0: // looking for open delimiter
     77                 if (c == OPEN_DELIM) { // quick check first
     78                     openPos = cursor;
     79                     int i = Utility.parsePattern(OPEN_PAT, text, cursor, limit);
     80                     if (i >= 0 && i < limit) {
     81                         mode = 1;
     82                         name.setLength(0);
     83                         cursor = i;
     84                         continue; // *** reprocess char32At(cursor)
     85                     }
     86                 }
     87                 break;
     88 
     89             case 1: // after open delimiter
     90                 // Look for legal chars.  If \s+ is found, convert it
     91                 // to a single space.  If closeDelimiter is found, exit
     92                 // the loop.  If any other character is found, exit the
     93                 // loop.  If the limit is reached, exit the loop.
     94 
     95                 // Convert \s+ => SPACE.  This assumes there are no
     96                 // runs of >1 space characters in names.
     97                 if (PatternProps.isWhiteSpace(c)) {
     98                     // Ignore leading whitespace
     99                     if (name.length() > 0 &&
    100                         name.charAt(name.length()-1) != SPACE) {
    101                         name.append(SPACE);
    102                         // If we are too long then abort.  maxLen includes
    103                         // temporary trailing space, so use '>'.
    104                         if (name.length() > maxLen) {
    105                             mode = 0;
    106                         }
    107                     }
    108                     break;
    109                 }
    110 
    111                 if (c == CLOSE_DELIM) {
    112 
    113                     int len = name.length();
    114 
    115                     // Delete trailing space, if any
    116                     if (len > 0 &&
    117                         name.charAt(len-1) == SPACE) {
    118                         name.setLength(--len);
    119                     }
    120 
    121                     c = UCharacter.getCharFromExtendedName(name.toString());
    122                     if (c != -1) {
    123                         // Lookup succeeded
    124 
    125                         // assert(UTF16.getCharCount(CLOSE_DELIM) == 1);
    126                         cursor++; // advance over CLOSE_DELIM
    127 
    128                         String str = UTF16.valueOf(c);
    129                         text.replace(openPos, cursor, str);
    130 
    131                         // Adjust indices for the change in the length of
    132                         // the string.  Do not assume that str.length() ==
    133                         // 1, in case of surrogates.
    134                         int delta = cursor - openPos - str.length();
    135                         cursor -= delta;
    136                         limit -= delta;
    137                         // assert(cursor == openPos + str.length());
    138                     }
    139                     // If the lookup failed, we leave things as-is and
    140                     // still switch to mode 0 and continue.
    141                     mode = 0;
    142                     openPos = -1; // close off candidate
    143                     continue; // *** reprocess char32At(cursor)
    144                 }
    145 
    146                 if (legal.contains(c)) {
    147                     UTF16.append(name, c);
    148                     // If we go past the longest possible name then abort.
    149                     // maxLen includes temporary trailing space, so use '>='.
    150                     if (name.length() >= maxLen) {
    151                         mode = 0;
    152                     }
    153                 }
    154 
    155                 // Invalid character
    156                 else {
    157                     --cursor; // Backup and reprocess this character
    158                     mode = 0;
    159                 }
    160 
    161                 break;
    162             }
    163 
    164             cursor += UTF16.getCharCount(c);
    165         }
    166 
    167         offsets.contextLimit += limit - offsets.limit;
    168         offsets.limit = limit;
    169         // In incremental mode, only advance the cursor up to the last
    170         // open delimiter candidate.
    171         offsets.start = (isIncremental && openPos >= 0) ? openPos : cursor;
    172     }
    173 
    174     /* (non-Javadoc)
    175      * @see android.icu.text.Transliterator#addSourceTargetSet(android.icu.text.UnicodeSet, android.icu.text.UnicodeSet, android.icu.text.UnicodeSet)
    176      */
    177     @Override
    178     public void addSourceTargetSet(UnicodeSet inputFilter, UnicodeSet sourceSet, UnicodeSet targetSet) {
    179         UnicodeSet myFilter = getFilterAsUnicodeSet(inputFilter);
    180         if (!myFilter.containsAll(UnicodeNameTransliterator.OPEN_DELIM) || !myFilter.contains(CLOSE_DELIM)) {
    181             return; // we have to contain both prefix and suffix
    182         }
    183         UnicodeSet items = new UnicodeSet()
    184         .addAll('0', '9')
    185         .addAll('A', 'F')
    186         .addAll('a', 'z') // for controls
    187         .add('<').add('>') // for controls
    188         .add('(').add(')') // for controls
    189         .add('-')
    190         .add(' ')
    191         .addAll(UnicodeNameTransliterator.OPEN_DELIM)
    192         .add(CLOSE_DELIM);
    193         items.retainAll(myFilter);
    194         if (items.size() > 0) {
    195             sourceSet.addAll(items);
    196             // could produce any character
    197             targetSet.addAll(0, 0x10FFFF);
    198         }
    199     }
    200 }
    201