Home | History | Annotate | Download | only in text
      1 /* GENERATED SOURCE. DO NOT MODIFY. */
      2 //  2016 and later: Unicode, Inc. and others.
      3 // License & terms of use: http://www.unicode.org/copyright.html#License
      4 /*
      5  *******************************************************************************
      6  * Copyright (C) 2014, International Business Machines Corporation and         *
      7  * others. All Rights Reserved.                                                *
      8  *******************************************************************************
      9  */
     10 package android.icu.text;
     11 
     12 import java.text.CharacterIterator;
     13 
     14 import android.icu.impl.CharacterIteration;
     15 
     16 abstract class DictionaryBreakEngine implements LanguageBreakEngine {
     17 
     18     /* Helper class for improving readability of the Thai/Lao/Khmer word break
     19      * algorithm.
     20      */
     21     static class PossibleWord {
     22         // List size, limited by the maximum number of words in the dictionary
     23         // that form a nested sequence.
     24         private final static int POSSIBLE_WORD_LIST_MAX = 20;
     25         //list of word candidate lengths, in increasing length order
     26         private int lengths[];
     27         private int count[];    // Count of candidates
     28         private int prefix;     // The longest match with a dictionary word
     29         private int offset;     // Offset in the text of these candidates
     30         private int mark;       // The preferred candidate's offset
     31         private int current;    // The candidate we're currently looking at
     32 
     33         // Default constructor
     34         public PossibleWord() {
     35             lengths = new int[POSSIBLE_WORD_LIST_MAX];
     36             count = new int[1]; // count needs to be an array of 1 so that it can be pass as reference
     37             offset = -1;
     38         }
     39 
     40         // Fill the list of candidates if needed, select the longest, and return the number found
     41         public int candidates(CharacterIterator fIter, DictionaryMatcher dict, int rangeEnd) {
     42             int start = fIter.getIndex();
     43             if (start != offset) {
     44                 offset = start;
     45                 prefix = dict.matches(fIter, rangeEnd - start, lengths, count, lengths.length);
     46                 // Dictionary leaves text after longest prefix, not longest word. Back up.
     47                 if (count[0] <= 0) {
     48                     fIter.setIndex(start);
     49                 }
     50             }
     51             if (count[0] > 0) {
     52                 fIter.setIndex(start + lengths[count[0]-1]);
     53             }
     54             current = count[0] - 1;
     55             mark = current;
     56             return count[0];
     57         }
     58 
     59         // Select the currently marked candidate, point after it in the text, and invalidate self
     60         public int acceptMarked(CharacterIterator fIter) {
     61             fIter.setIndex(offset + lengths[mark]);
     62             return lengths[mark];
     63         }
     64 
     65         // Backup from the current candidate to the next shorter one; return true if that exists
     66         // and point the text after it
     67         public boolean backUp(CharacterIterator fIter) {
     68             if (current > 0) {
     69                 fIter.setIndex(offset + lengths[--current]);
     70                 return true;
     71             }
     72             return false;
     73         }
     74 
     75         // Return the longest prefix this candidate location shares with a dictionary word
     76         public int longestPrefix() {
     77             return prefix;
     78         }
     79 
     80         // Mark the current candidate as the one we like
     81         public void markCurrent() {
     82             mark = current;
     83         }
     84     }
     85 
     86     /**
     87      *  A deque-like structure holding raw ints.
     88      *  Partial, limited implementation, only what is needed by the dictionary implementation.
     89      *  For internal use only.
     90      * @hide draft / provisional / internal are hidden on Android
     91      */
     92     static class DequeI implements Cloneable {
     93         private int[] data = new int[50];
     94         private int lastIdx = 4;   // or base of stack. Index of element.
     95         private int firstIdx = 4;  // or Top of Stack. Index of element + 1.
     96 
     97         @Override
     98         public Object clone() throws CloneNotSupportedException {
     99             DequeI result = (DequeI)super.clone();
    100             result.data = data.clone();
    101             return result;
    102         }
    103 
    104         int size() {
    105             return firstIdx - lastIdx;
    106         }
    107 
    108         boolean isEmpty() {
    109             return size() == 0;
    110         }
    111 
    112         private void grow() {
    113             int[] newData = new int[data.length * 2];
    114             System.arraycopy(data,  0,  newData,  0, data.length);
    115             data = newData;
    116         }
    117 
    118         void offer(int v) {
    119             // Note that the actual use cases of offer() add at most one element.
    120             //   We make no attempt to handle more than a few.
    121             assert lastIdx > 0;
    122             data[--lastIdx] = v;
    123         }
    124 
    125         void push(int v) {
    126             if (firstIdx >= data.length) {
    127                 grow();
    128             }
    129             data[firstIdx++] = v;
    130         }
    131 
    132         int pop() {
    133             assert size() > 0;
    134             return data[--firstIdx];
    135         }
    136 
    137         int peek() {
    138             assert size() > 0;
    139             return data[firstIdx - 1];
    140         }
    141 
    142         int peekLast() {
    143             assert size() > 0;
    144             return data[lastIdx];
    145         }
    146 
    147         int pollLast() {
    148             assert size() > 0;
    149             return data[lastIdx++];
    150         }
    151 
    152         boolean contains(int v) {
    153             for (int i=lastIdx; i< firstIdx; i++) {
    154                 if (data[i] == v) {
    155                     return true;
    156                 }
    157             }
    158             return false;
    159         }
    160 
    161         int elementAt(int i) {
    162             assert i < size();
    163             return data[lastIdx + i];
    164         }
    165 
    166         void removeAllElements() {
    167             lastIdx = firstIdx = 4;
    168         }
    169     }
    170 
    171     UnicodeSet fSet = new UnicodeSet();
    172 
    173     /**
    174      *  Constructor
    175      */
    176     public DictionaryBreakEngine() {
    177     }
    178 
    179     @Override
    180     public boolean handles(int c) {
    181         return fSet.contains(c);        // we recognize the character
    182     }
    183 
    184     @Override
    185     public int findBreaks(CharacterIterator text, int startPos, int endPos,
    186             DequeI foundBreaks) {
    187         int result = 0;
    188 
    189          // Find the span of characters included in the set.
    190          //   The span to break begins at the current position int the text, and
    191          //   extends towards the start or end of the text, depending on 'reverse'.
    192 
    193         int start = text.getIndex();
    194         int current;
    195         int rangeStart;
    196         int rangeEnd;
    197         int c = CharacterIteration.current32(text);
    198         while ((current = text.getIndex()) < endPos && fSet.contains(c)) {
    199             CharacterIteration.next32(text);
    200             c = CharacterIteration.current32(text);
    201         }
    202         rangeStart = start;
    203         rangeEnd = current;
    204 
    205         result = divideUpDictionaryRange(text, rangeStart, rangeEnd, foundBreaks);
    206         text.setIndex(current);
    207 
    208         return result;
    209     }
    210 
    211     void setCharacters(UnicodeSet set) {
    212         fSet = new UnicodeSet(set);
    213         fSet.compact();
    214     }
    215 
    216     /**
    217      * <p>Divide up a range of known dictionary characters handled by this break engine.</p>
    218      *
    219      * @param text A UText representing the text
    220      * @param rangeStart The start of the range of dictionary characters
    221      * @param rangeEnd The end of the range of dictionary characters
    222      * @param foundBreaks Output of break positions. Positions are pushed.
    223      *                    Pre-existing contents of the output stack are unaltered.
    224      * @return The number of breaks found
    225      */
    226      abstract int divideUpDictionaryRange(CharacterIterator text,
    227                                           int               rangeStart,
    228                                           int               rangeEnd,
    229                                           DequeI            foundBreaks );
    230 }
    231