Home | History | Annotate | Download | only in text
      1 /* GENERATED SOURCE. DO NOT MODIFY. */
      2 /*
      3 *******************************************************************************
      4 * Copyright (C) 2009-2014, International Business Machines Corporation and    *
      5 * others. All Rights Reserved.                                                *
      6 *******************************************************************************
      7 */
      8 
      9 package android.icu.impl.text;
     10 
     11 import java.util.HashMap;
     12 import java.util.Map;
     13 
     14 import android.icu.impl.ICUDebug;
     15 import android.icu.text.CollationElementIterator;
     16 import android.icu.text.Collator;
     17 import android.icu.text.RbnfLenientScanner;
     18 import android.icu.text.RbnfLenientScannerProvider;
     19 import android.icu.text.RuleBasedCollator;
     20 import android.icu.util.ULocale;
     21 
     22 /**
     23  * Returns RbnfLenientScanners that use the old RuleBasedNumberFormat
     24  * implementation behind setLenientParseMode, which is based on Collator.
     25  * @deprecated This API is ICU internal only.
     26  * @hide Only a subset of ICU is exposed in Android
     27  * @hide draft / provisional / internal are hidden on Android
     28  */
     29 @Deprecated
     30 public class RbnfScannerProviderImpl implements RbnfLenientScannerProvider {
     31     private static final boolean DEBUG = ICUDebug.enabled("rbnf");
     32     private Map<String, RbnfLenientScanner> cache;
     33 
     34     /**
     35      * @deprecated This API is ICU internal only.
     36      * @hide draft / provisional / internal are hidden on Android
     37      */
     38     @Deprecated
     39     public RbnfScannerProviderImpl() {
     40         cache = new HashMap<String, RbnfLenientScanner>();
     41     }
     42 
     43     /**
     44      * Returns a collation-based scanner.
     45      *
     46      * Only primary differences are treated as significant.  This means that case
     47      * differences, accent differences, alternate spellings of the same letter
     48      * (e.g., ae and a-umlaut in German), ignorable characters, etc. are ignored in
     49      * matching the text.  In many cases, numerals will be accepted in place of words
     50      * or phrases as well.
     51      *
     52      * For example, all of the following will correctly parse as 255 in English in
     53      * lenient-parse mode:
     54      * <br>"two hundred fifty-five"
     55      * <br>"two hundred fifty five"
     56      * <br>"TWO HUNDRED FIFTY-FIVE"
     57      * <br>"twohundredfiftyfive"
     58      * <br>"2 hundred fifty-5"
     59      *
     60      * The Collator used is determined by the locale that was
     61      * passed to this object on construction.  The description passed to this object
     62      * on construction may supply additional collation rules that are appended to the
     63      * end of the default collator for the locale, enabling additional equivalences
     64      * (such as adding more ignorable characters or permitting spelled-out version of
     65      * symbols; see the demo program for examples).
     66      *
     67      * It's important to emphasize that even strict parsing is relatively lenient: it
     68      * will accept some text that it won't produce as output.  In English, for example,
     69      * it will correctly parse "two hundred zero" and "fifteen hundred".
     70      *
     71      * @deprecated This API is ICU internal only.
     72      * @hide draft / provisional / internal are hidden on Android
     73      */
     74     @Deprecated
     75     public RbnfLenientScanner get(ULocale locale, String extras) {
     76         RbnfLenientScanner result = null;
     77         String key = locale.toString() + "/" + extras;
     78         synchronized(cache) {
     79             result = cache.get(key);
     80             if (result != null) {
     81                 return result;
     82             }
     83         }
     84         result = createScanner(locale, extras);
     85         synchronized(cache) {
     86             cache.put(key, result);
     87         }
     88         return result;
     89     }
     90 
     91     /**
     92      * @deprecated This API is ICU internal only.
     93      * @hide draft / provisional / internal are hidden on Android
     94      */
     95     @Deprecated
     96     protected RbnfLenientScanner createScanner(ULocale locale, String extras) {
     97         RuleBasedCollator collator = null;
     98         try {
     99             // create a default collator based on the locale,
    100             // then pull out that collator's rules, append any additional
    101             // rules specified in the description, and create a _new_
    102             // collator based on the combination of those rules
    103             collator = (RuleBasedCollator)Collator.getInstance(locale.toLocale());
    104             if (extras != null) {
    105                 String rules = collator.getRules() + extras;
    106                 collator = new RuleBasedCollator(rules);
    107             }
    108             collator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
    109         }
    110         catch (Exception e) {
    111             // If we get here, it means we have a malformed set of
    112             // collation rules, which hopefully won't happen
    113             ///CLOVER:OFF
    114             if (DEBUG){ // debug hook
    115                 e.printStackTrace(); System.out.println("++++");
    116             }
    117             collator = null;
    118             ///CLOVER:ON
    119         }
    120 
    121         return new RbnfLenientScannerImpl(collator);
    122     }
    123 
    124     private static class RbnfLenientScannerImpl implements RbnfLenientScanner {
    125         private final RuleBasedCollator collator;
    126 
    127         private RbnfLenientScannerImpl(RuleBasedCollator rbc) {
    128             this.collator = rbc;
    129         }
    130 
    131         public boolean allIgnorable(String s) {
    132             CollationElementIterator iter = collator.getCollationElementIterator(s);
    133 
    134             int o = iter.next();
    135             while (o != CollationElementIterator.NULLORDER
    136                    && CollationElementIterator.primaryOrder(o) == 0) {
    137                 o = iter.next();
    138             }
    139             return o == CollationElementIterator.NULLORDER;
    140         }
    141 
    142         public int[] findText(String str, String key, int startingAt) {
    143             int p = startingAt;
    144             int keyLen = 0;
    145 
    146             // basically just isolate smaller and smaller substrings of
    147             // the target string (each running to the end of the string,
    148             // and with the first one running from startingAt to the end)
    149             // and then use prefixLength() to see if the search key is at
    150             // the beginning of each substring.  This is excruciatingly
    151             // slow, but it will locate the key and tell use how long the
    152             // matching text was.
    153             while (p < str.length() && keyLen == 0) {
    154                 keyLen = prefixLength(str.substring(p), key);
    155                 if (keyLen != 0) {
    156                     return new int[] { p, keyLen };
    157                 }
    158                 ++p;
    159             }
    160             // if we make it to here, we didn't find it.  Return -1 for the
    161             // location.  The length should be ignored, but set it to 0,
    162             // which should be "safe"
    163             return new int[] { -1, 0 };
    164         }
    165 
    166         ///CLOVER:OFF
    167         // The following method contains the same signature as findText
    168         //  and has never been used by anything once.
    169         @SuppressWarnings("unused")
    170         public int[] findText2(String str, String key, int startingAt) {
    171 
    172             CollationElementIterator strIter = collator.getCollationElementIterator(str);
    173             CollationElementIterator keyIter = collator.getCollationElementIterator(key);
    174 
    175             int keyStart = -1;
    176 
    177             strIter.setOffset(startingAt);
    178 
    179             int oStr = strIter.next();
    180             int oKey = keyIter.next();
    181             while (oKey != CollationElementIterator.NULLORDER) {
    182                 while (oStr != CollationElementIterator.NULLORDER &&
    183                        CollationElementIterator.primaryOrder(oStr) == 0)
    184                     oStr = strIter.next();
    185 
    186                 while (oKey != CollationElementIterator.NULLORDER &&
    187                        CollationElementIterator.primaryOrder(oKey) == 0)
    188                     oKey = keyIter.next();
    189 
    190                 if (oStr == CollationElementIterator.NULLORDER) {
    191                     return new int[] { -1, 0 };
    192                 }
    193 
    194                 if (oKey == CollationElementIterator.NULLORDER) {
    195                     break;
    196                 }
    197 
    198                 if (CollationElementIterator.primaryOrder(oStr) ==
    199                     CollationElementIterator.primaryOrder(oKey)) {
    200                     keyStart = strIter.getOffset();
    201                     oStr = strIter.next();
    202                     oKey = keyIter.next();
    203                 } else {
    204                     if (keyStart != -1) {
    205                         keyStart = -1;
    206                         keyIter.reset();
    207                     } else {
    208                         oStr = strIter.next();
    209                     }
    210                 }
    211             }
    212 
    213             if (oKey == CollationElementIterator.NULLORDER) {
    214                 return new int[] { keyStart, strIter.getOffset() - keyStart };
    215             }
    216 
    217             return new int[] { -1, 0 };
    218         }
    219         ///CLOVER:ON
    220 
    221         public int prefixLength(String str, String prefix) {
    222             // Create two collation element iterators, one over the target string
    223             // and another over the prefix.
    224             //
    225             // Previous code was matching "fifty-" against " fifty" and leaving
    226             // the number " fifty-7" to parse as 43 (50 - 7).
    227             // Also it seems that if we consume the entire prefix, that's ok even
    228             // if we've consumed the entire string, so I switched the logic to
    229             // reflect this.
    230 
    231             CollationElementIterator strIter = collator.getCollationElementIterator(str);
    232             CollationElementIterator prefixIter = collator.getCollationElementIterator(prefix);
    233 
    234             // match collation elements between the strings
    235             int oStr = strIter.next();
    236             int oPrefix = prefixIter.next();
    237 
    238             while (oPrefix != CollationElementIterator.NULLORDER) {
    239                 // skip over ignorable characters in the target string
    240                 while (CollationElementIterator.primaryOrder(oStr) == 0 && oStr !=
    241                        CollationElementIterator.NULLORDER) {
    242                     oStr = strIter.next();
    243                 }
    244 
    245                 // skip over ignorable characters in the prefix
    246                 while (CollationElementIterator.primaryOrder(oPrefix) == 0 && oPrefix !=
    247                        CollationElementIterator.NULLORDER) {
    248                     oPrefix = prefixIter.next();
    249                 }
    250 
    251                 // if skipping over ignorables brought to the end of
    252                 // the prefix, we DID match: drop out of the loop
    253                 if (oPrefix == CollationElementIterator.NULLORDER) {
    254                     break;
    255                 }
    256 
    257                 // if skipping over ignorables brought us to the end
    258                 // of the target string, we didn't match and return 0
    259                 if (oStr == CollationElementIterator.NULLORDER) {
    260                     return 0;
    261                 }
    262 
    263                 // match collation elements from the two strings
    264                 // (considering only primary differences).  If we
    265                 // get a mismatch, dump out and return 0
    266                 if (CollationElementIterator.primaryOrder(oStr) !=
    267                     CollationElementIterator.primaryOrder(oPrefix)) {
    268                     return 0;
    269                 }
    270 
    271                 // otherwise, advance to the next character in each string
    272                 // and loop (we drop out of the loop when we exhaust
    273                 // collation elements in the prefix)
    274 
    275                 oStr = strIter.next();
    276                 oPrefix = prefixIter.next();
    277             }
    278 
    279             int result = strIter.getOffset();
    280             if (oStr != CollationElementIterator.NULLORDER) {
    281                 --result;
    282             }
    283             return result;
    284         }
    285     }
    286 }
    287