Home | History | Annotate | Download | only in text
      1 /* GENERATED SOURCE. DO NOT MODIFY. */
      2 //  2016 and later: Unicode, Inc. and others.
      3 // License & terms of use: http://www.unicode.org/copyright.html#License
      4 /*
      5 *****************************************************************
      6 * Copyright (c) 2002-2014, International Business Machines Corporation
      7 * and others.  All Rights Reserved.
      8 *****************************************************************
      9 * Date        Name        Description
     10 * 06/06/2002  aliu        Creation.
     11 *****************************************************************
     12 */
     13 package android.icu.text;
     14 
     15 import java.util.ArrayList;
     16 import java.util.Enumeration;
     17 import java.util.HashMap;
     18 import java.util.HashSet;
     19 import java.util.List;
     20 import java.util.MissingResourceException;
     21 import java.util.Set;
     22 import java.util.concurrent.ConcurrentHashMap;
     23 
     24 import android.icu.lang.UScript;
     25 /**
     26  * A transliterator that translates multiple input scripts to a single
     27  * output script.  It is named Any-T or Any-T/V, where T is the target
     28  * and V is the optional variant.  The target T is a script.
     29  *
     30  * <p>An AnyTransliterator partitions text into runs of the same
     31  * script, together with adjacent COMMON or INHERITED characters.
     32  * After determining the script of each run, it transliterates from
     33  * that script to the given target/variant.  It does so by
     34  * instantiating a transliterator from the source script to the
     35  * target/variant.  If a run consists only of the target script,
     36  * COMMON, or INHERITED characters, then the run is not changed.
     37  *
     38  * <p>At startup, all possible AnyTransliterators are registered with
     39  * the system, as determined by examining the registered script
     40  * transliterators.
     41  *
     42  * @author Alan Liu
     43  */
     44 class AnyTransliterator extends Transliterator {
     45 
     46     //------------------------------------------------------------
     47     // Constants
     48 
     49     static final char TARGET_SEP = '-';
     50     static final char VARIANT_SEP = '/';
     51     static final String ANY = "Any";
     52     static final String NULL_ID = "Null";
     53     static final String LATIN_PIVOT = "-Latin;Latin-";
     54 
     55     /**
     56      * Cache mapping UScriptCode values to Transliterator*.
     57      */
     58     private ConcurrentHashMap<Integer, Transliterator> cache;
     59 
     60     /**
     61      * The target or target/variant string.
     62      */
     63     private String target;
     64 
     65     /**
     66      * The target script code.  Never USCRIPT_INVALID_CODE.
     67      */
     68     private int targetScript;
     69 
     70     /**
     71      * Special code for handling width characters
     72      */
     73     private Transliterator widthFix = Transliterator.getInstance("[[:dt=Nar:][:dt=Wide:]] nfkd");
     74 
     75     /**
     76      * Implements {@link Transliterator#handleTransliterate}.
     77      */
     78     @Override
     79     protected void handleTransliterate(Replaceable text,
     80                                        Position pos, boolean isIncremental) {
     81         int allStart = pos.start;
     82         int allLimit = pos.limit;
     83 
     84         ScriptRunIterator it =
     85             new ScriptRunIterator(text, pos.contextStart, pos.contextLimit);
     86 
     87         while (it.next()) {
     88             // Ignore runs in the ante context
     89             if (it.limit <= allStart) continue;
     90 
     91             // Try to instantiate transliterator from it.scriptCode to
     92             // our target or target/variant
     93             Transliterator t = getTransliterator(it.scriptCode);
     94 
     95             if (t == null) {
     96                 // We have no transliterator.  Do nothing, but keep
     97                 // pos.start up to date.
     98                 pos.start = it.limit;
     99                 continue;
    100             }
    101 
    102             // If the run end is before the transliteration limit, do
    103             // a non-incremental transliteration.  Otherwise do an
    104             // incremental one.
    105             boolean incremental = isIncremental && (it.limit >= allLimit);
    106 
    107             pos.start = Math.max(allStart, it.start);
    108             pos.limit = Math.min(allLimit, it.limit);
    109             int limit = pos.limit;
    110             t.filteredTransliterate(text, pos, incremental);
    111             int delta = pos.limit - limit;
    112             allLimit += delta;
    113             it.adjustLimit(delta);
    114 
    115             // We're done if we enter the post context
    116             if (it.limit >= allLimit) break;
    117         }
    118 
    119         // Restore limit.  pos.start is fine where the last transliterator
    120         // left it, or at the end of the last run.
    121         pos.limit = allLimit;
    122     }
    123 
    124     /**
    125      * Private constructor
    126      * @param id the ID of the form S-T or S-T/V, where T is theTarget
    127      * and V is theVariant.  Must not be empty.
    128      * @param theTarget the target name.  Must not be empty, and must
    129      * name a script corresponding to theTargetScript.
    130      * @param theVariant the variant name, or the empty string if
    131      * there is no variant
    132      * @param theTargetScript the script code corresponding to
    133      * theTarget.
    134      */
    135     private AnyTransliterator(String id,
    136                               String theTarget,
    137                               String theVariant,
    138                               int theTargetScript) {
    139         super(id, null);
    140         targetScript = theTargetScript;
    141         cache = new ConcurrentHashMap<Integer, Transliterator>();
    142 
    143         target = theTarget;
    144         if (theVariant.length() > 0) {
    145             target = theTarget + VARIANT_SEP + theVariant;
    146         }
    147     }
    148 
    149     /**
    150      * @param id the ID of the form S-T or S-T/V, where T is theTarget
    151      * and V is theVariant.  Must not be empty.
    152      * @param filter The Unicode filter.
    153      * @param target2 the target name.
    154      * @param targetScript2 the script code corresponding to theTarget.
    155      * @param widthFix2 The Transliterator width fix.
    156      * @param cache2 The Map object for cache.
    157      */
    158     public AnyTransliterator(String id, UnicodeFilter filter, String target2,
    159             int targetScript2, Transliterator widthFix2, ConcurrentHashMap<Integer, Transliterator> cache2) {
    160         super(id, filter);
    161         targetScript = targetScript2;
    162         cache = cache2;
    163         target = target2;
    164     }
    165 
    166     /**
    167      * Returns a transliterator from the given source to our target or
    168      * target/variant.  Returns NULL if the source is the same as our
    169      * target script, or if the source is USCRIPT_INVALID_CODE.
    170      * Caches the result and returns the same transliterator the next
    171      * time.  The caller does NOT own the result and must not delete
    172      * it.
    173      */
    174     private Transliterator getTransliterator(int source) {
    175         if (source == targetScript || source == UScript.INVALID_CODE) {
    176             if (isWide(targetScript)) {
    177                 return null;
    178             } else {
    179                 return widthFix;
    180             }
    181         }
    182 
    183         Integer key = Integer.valueOf(source);
    184         Transliterator t = cache.get(key);
    185         if (t == null) {
    186             String sourceName = UScript.getName(source);
    187             String id = sourceName + TARGET_SEP + target;
    188 
    189             try {
    190                 t = Transliterator.getInstance(id, FORWARD);
    191             } catch (RuntimeException e) { }
    192             if (t == null) {
    193 
    194                 // Try to pivot around Latin, our most common script
    195                 id = sourceName + LATIN_PIVOT + target;
    196                 try {
    197                     t = Transliterator.getInstance(id, FORWARD);
    198                 } catch (RuntimeException e) { }
    199             }
    200 
    201             if (t != null) {
    202                 if (!isWide(targetScript)) {
    203                     List<Transliterator> v = new ArrayList<Transliterator>();
    204                     v.add(widthFix);
    205                     v.add(t);
    206                     t = new CompoundTransliterator(v);
    207                 }
    208                 Transliterator prevCachedT = cache.putIfAbsent(key, t);
    209                 if (prevCachedT != null) {
    210                     t = prevCachedT;
    211                 }
    212             } else if (!isWide(targetScript)) {
    213                 return widthFix;
    214             }
    215         }
    216 
    217         return t;
    218     }
    219 
    220     /**
    221      * @param targetScript2
    222      * @return
    223      */
    224     private boolean isWide(int script) {
    225         return script == UScript.BOPOMOFO || script == UScript.HAN || script == UScript.HANGUL || script == UScript.HIRAGANA || script == UScript.KATAKANA;
    226     }
    227 
    228     /**
    229      * Registers standard transliterators with the system.  Called by
    230      * Transliterator during initialization.  Scan all current targets
    231      * and register those that are scripts T as Any-T/V.
    232      */
    233     static void register() {
    234 
    235         HashMap<String, Set<String>> seen = new HashMap<String, Set<String>>(); // old code used set, but was dependent on order
    236 
    237         for (Enumeration<String> s = Transliterator.getAvailableSources(); s.hasMoreElements(); ) {
    238             String source = s.nextElement();
    239 
    240             // Ignore the "Any" source
    241             if (source.equalsIgnoreCase(ANY)) continue;
    242 
    243             for (Enumeration<String> t = Transliterator.getAvailableTargets(source);
    244                  t.hasMoreElements(); ) {
    245                 String target = t.nextElement();
    246 
    247                 // Get the script code for the target.  If not a script, ignore.
    248                 int targetScript = scriptNameToCode(target);
    249                 if (targetScript == UScript.INVALID_CODE) {
    250                     continue;
    251                 }
    252 
    253                 Set<String> seenVariants = seen.get(target);
    254                 if (seenVariants == null) {
    255                     seen.put(target, seenVariants = new HashSet<String>());
    256                 }
    257 
    258                 for (Enumeration<String> v = Transliterator.getAvailableVariants(source, target);
    259                      v.hasMoreElements(); ) {
    260                     String variant = v.nextElement();
    261 
    262                     // Only process each target/variant pair once
    263                     if (seenVariants.contains(variant)) {
    264                         continue;
    265                     }
    266                     seenVariants.add(variant);
    267 
    268                     String id;
    269                     id = TransliteratorIDParser.STVtoID(ANY, target, variant);
    270                     AnyTransliterator trans = new AnyTransliterator(id, target, variant,
    271                                                                     targetScript);
    272                     Transliterator.registerInstance(trans);
    273                     Transliterator.registerSpecialInverse(target, NULL_ID, false);
    274                 }
    275             }
    276         }
    277     }
    278 
    279     /**
    280      * Return the script code for a given name, or
    281      * UScript.INVALID_CODE if not found.
    282      */
    283     private static int scriptNameToCode(String name) {
    284         try{
    285             int[] codes = UScript.getCode(name);
    286             return codes != null ? codes[0] : UScript.INVALID_CODE;
    287         }catch( MissingResourceException e){
    288             ///CLOVER:OFF
    289             return UScript.INVALID_CODE;
    290             ///CLOVER:ON
    291         }
    292     }
    293 
    294     //------------------------------------------------------------
    295     // ScriptRunIterator
    296 
    297     /**
    298      * Returns a series of ranges corresponding to scripts. They will be
    299      * of the form:
    300      *
    301      * ccccSScSSccccTTcTcccc   - c = common, S = first script, T = second
    302      * |            |          - first run (start, limit)
    303      *          |           |  - second run (start, limit)
    304      *
    305      * That is, the runs will overlap. The reason for this is so that a
    306      * transliterator can consider common characters both before and after
    307      * the scripts.
    308      */
    309     private static class ScriptRunIterator {
    310 
    311         private Replaceable text;
    312         private int textStart;
    313         private int textLimit;
    314 
    315         /**
    316          * The code of the current run, valid after next() returns.  May
    317          * be UScript.INVALID_CODE if and only if the entire text is
    318          * COMMON/INHERITED.
    319          */
    320         public int scriptCode;
    321 
    322         /**
    323          * The start of the run, inclusive, valid after next() returns.
    324          */
    325         public int start;
    326 
    327         /**
    328          * The end of the run, exclusive, valid after next() returns.
    329          */
    330         public int limit;
    331 
    332         /**
    333          * Constructs a run iterator over the given text from start
    334          * (inclusive) to limit (exclusive).
    335          */
    336         public ScriptRunIterator(Replaceable text, int start, int limit) {
    337             this.text = text;
    338             this.textStart = start;
    339             this.textLimit = limit;
    340             this.limit = start;
    341         }
    342 
    343 
    344         /**
    345          * Returns TRUE if there are any more runs.  TRUE is always
    346          * returned at least once.  Upon return, the caller should
    347          * examine scriptCode, start, and limit.
    348          */
    349         public boolean next() {
    350             int ch;
    351             int s;
    352 
    353             scriptCode = UScript.INVALID_CODE; // don't know script yet
    354             start = limit;
    355 
    356             // Are we done?
    357             if (start == textLimit) {
    358                 return false;
    359             }
    360 
    361             // Move start back to include adjacent COMMON or INHERITED
    362             // characters
    363             while (start > textStart) {
    364                 ch = text.char32At(start - 1); // look back
    365                 s = UScript.getScript(ch);
    366                 if (s == UScript.COMMON || s == UScript.INHERITED) {
    367                     --start;
    368                 } else {
    369                     break;
    370                 }
    371             }
    372 
    373             // Move limit ahead to include COMMON, INHERITED, and characters
    374             // of the current script.
    375             while (limit < textLimit) {
    376                 ch = text.char32At(limit); // look ahead
    377                 s = UScript.getScript(ch);
    378                 if (s != UScript.COMMON && s != UScript.INHERITED) {
    379                     if (scriptCode == UScript.INVALID_CODE) {
    380                         scriptCode = s;
    381                     } else if (s != scriptCode) {
    382                         break;
    383                     }
    384                 }
    385                 ++limit;
    386             }
    387 
    388             // Return TRUE even if the entire text is COMMON / INHERITED, in
    389             // which case scriptCode will be UScript.INVALID_CODE.
    390             return true;
    391         }
    392 
    393         /**
    394          * Adjusts internal indices for a change in the limit index of the
    395          * given delta.  A positive delta means the limit has increased.
    396          */
    397         public void adjustLimit(int delta) {
    398             limit += delta;
    399             textLimit += delta;
    400         }
    401     }
    402 
    403     /**
    404      * Temporary hack for registry problem. Needs to be replaced by better architecture.
    405      */
    406     public Transliterator safeClone() {
    407         UnicodeFilter filter = getFilter();
    408         if (filter != null && filter instanceof UnicodeSet) {
    409             filter = new UnicodeSet((UnicodeSet)filter);
    410         }
    411         return new AnyTransliterator(getID(), filter, target, targetScript, widthFix, cache);
    412     }
    413 
    414     /* (non-Javadoc)
    415      * @see android.icu.text.Transliterator#addSourceTargetSet(android.icu.text.UnicodeSet, android.icu.text.UnicodeSet, android.icu.text.UnicodeSet)
    416      */
    417     @Override
    418     public void addSourceTargetSet(UnicodeSet inputFilter, UnicodeSet sourceSet, UnicodeSet targetSet) {
    419         UnicodeSet myFilter = getFilterAsUnicodeSet(inputFilter);
    420         // Assume that it can modify any character to any other character
    421         sourceSet.addAll(myFilter);
    422         if (myFilter.size() != 0) {
    423             targetSet.addAll(0, 0x10FFFF);
    424         }
    425     }
    426 }
    427 
    428