Home | History | Annotate | Download | only in impl
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html#License
      3 /*
      4  * ********************************************************************************
      5  * Copyright (C) 2007-2011, International Business Machines Corporation and others.
      6  * All Rights Reserved.
      7  * ********************************************************************************
      8  */
      9 package com.ibm.icu.impl;
     10 
     11 import java.util.Iterator;
     12 import java.util.LinkedList;
     13 import java.util.List;
     14 import java.util.ListIterator;
     15 
     16 import com.ibm.icu.lang.UCharacter;
     17 
     18 /**
     19  * TextTrieMap is a trie implementation for supporting
     20  * fast prefix match for the key.
     21  */
     22 public class TextTrieMap<V> {
     23 
     24     private Node _root = new Node();
     25     boolean _ignoreCase;
     26 
     27     /**
     28      * Constructs a TextTrieMap object.
     29      *
     30      * @param ignoreCase true to use simple case insensitive match
     31      */
     32     public TextTrieMap(boolean ignoreCase) {
     33         _ignoreCase = ignoreCase;
     34     }
     35 
     36     /**
     37      * Adds the text key and its associated object in this object.
     38      *
     39      * @param text The text.
     40      * @param val The value object associated with the text.
     41      */
     42     public TextTrieMap<V> put(CharSequence text, V val) {
     43         CharIterator chitr = new CharIterator(text, 0, _ignoreCase);
     44         _root.add(chitr, val);
     45         return this;
     46     }
     47 
     48     /**
     49      * Gets an iterator of the objects associated with the
     50      * longest prefix matching string key.
     51      *
     52      * @param text The text to be matched with prefixes.
     53      * @return An iterator of the objects associated with
     54      * the longest prefix matching matching key, or null
     55      * if no matching entry is found.
     56      */
     57     public Iterator<V> get(String text) {
     58         return get(text, 0);
     59     }
     60 
     61     /**
     62      * Gets an iterator of the objects associated with the
     63      * longest prefix matching string key starting at the
     64      * specified position.
     65      *
     66      * @param text The text to be matched with prefixes.
     67      * @param start The start index of of the text
     68      * @return An iterator of the objects associated with the
     69      * longest prefix matching matching key, or null if no
     70      * matching entry is found.
     71      */
     72     public Iterator<V> get(CharSequence text, int start) {
     73         return get(text, start, null);
     74     }
     75 
     76     public Iterator<V> get(CharSequence text, int start, int[] matchLen) {
     77         LongestMatchHandler<V> handler = new LongestMatchHandler<V>();
     78         find(text, start, handler);
     79         if (matchLen != null && matchLen.length > 0) {
     80             matchLen[0] = handler.getMatchLength();
     81         }
     82         return handler.getMatches();
     83     }
     84 
     85     public void find(CharSequence text, ResultHandler<V> handler) {
     86         find(text, 0, handler);
     87     }
     88 
     89     public void find(CharSequence text, int offset, ResultHandler<V> handler) {
     90         CharIterator chitr = new CharIterator(text, offset, _ignoreCase);
     91         find(_root, chitr, handler);
     92     }
     93 
     94     private synchronized void find(Node node, CharIterator chitr, ResultHandler<V> handler) {
     95         Iterator<V> values = node.values();
     96         if (values != null) {
     97             if (!handler.handlePrefixMatch(chitr.processedLength(), values)) {
     98                 return;
     99             }
    100         }
    101 
    102         Node nextMatch = node.findMatch(chitr);
    103         if (nextMatch != null) {
    104             find(nextMatch, chitr, handler);
    105         }
    106     }
    107 
    108     public static class CharIterator implements Iterator<Character> {
    109         private boolean _ignoreCase;
    110         private CharSequence _text;
    111         private int _nextIdx;
    112         private int _startIdx;
    113 
    114         private Character _remainingChar;
    115 
    116         CharIterator(CharSequence text, int offset, boolean ignoreCase) {
    117             _text = text;
    118             _nextIdx = _startIdx = offset;
    119             _ignoreCase = ignoreCase;
    120         }
    121 
    122         /* (non-Javadoc)
    123          * @see java.util.Iterator#hasNext()
    124          */
    125         @Override
    126         public boolean hasNext() {
    127             if (_nextIdx == _text.length() && _remainingChar == null) {
    128                 return false;
    129             }
    130             return true;
    131         }
    132 
    133         /* (non-Javadoc)
    134          * @see java.util.Iterator#next()
    135          */
    136         @Override
    137         public Character next() {
    138             if (_nextIdx == _text.length() && _remainingChar == null) {
    139                 return null;
    140             }
    141             Character next;
    142             if (_remainingChar != null) {
    143                 next = _remainingChar;
    144                 _remainingChar = null;
    145             } else {
    146                 if (_ignoreCase) {
    147                     int cp = UCharacter.foldCase(Character.codePointAt(_text, _nextIdx), true);
    148                     _nextIdx = _nextIdx + Character.charCount(cp);
    149 
    150                     char[] chars = Character.toChars(cp);
    151                     next = chars[0];
    152                     if (chars.length == 2) {
    153                         _remainingChar = chars[1];
    154                     }
    155                 } else {
    156                     next = _text.charAt(_nextIdx);
    157                     _nextIdx++;
    158                 }
    159             }
    160             return next;
    161         }
    162 
    163         /* (non-Javadoc)
    164          * @see java.util.Iterator#remove()
    165          */
    166         @Override
    167         public void remove() {
    168             throw new UnsupportedOperationException("remove() not supproted");
    169         }
    170 
    171         public int nextIndex() {
    172             return _nextIdx;
    173         }
    174 
    175         public int processedLength() {
    176             if (_remainingChar != null) {
    177                 throw new IllegalStateException("In the middle of surrogate pair");
    178             }
    179             return _nextIdx - _startIdx;
    180         }
    181     }
    182 
    183     /**
    184      * Callback handler for processing prefix matches used by
    185      * find method.
    186      */
    187     public interface ResultHandler<V> {
    188         /**
    189          * Handles a prefix key match
    190          *
    191          * @param matchLength Matched key's length
    192          * @param values An iterator of the objects associated with the matched key
    193          * @return Return true to continue the search in the trie, false to quit.
    194          */
    195         public boolean handlePrefixMatch(int matchLength, Iterator<V> values);
    196     }
    197 
    198     private static class LongestMatchHandler<V> implements ResultHandler<V> {
    199         private Iterator<V> matches = null;
    200         private int length = 0;
    201 
    202         @Override
    203         public boolean handlePrefixMatch(int matchLength, Iterator<V> values) {
    204             if (matchLength > length) {
    205                 length = matchLength;
    206                 matches = values;
    207             }
    208             return true;
    209         }
    210 
    211         public Iterator<V> getMatches() {
    212             return matches;
    213         }
    214 
    215         public int getMatchLength() {
    216             return length;
    217         }
    218     }
    219 
    220     /**
    221      * Inner class representing a text node in the trie.
    222      */
    223     private class Node {
    224         private char[] _text;
    225         private List<V> _values;
    226         private List<Node> _children;
    227 
    228         private Node() {
    229         }
    230 
    231         private Node(char[] text, List<V> values, List<Node> children) {
    232             _text = text;
    233             _values = values;
    234             _children = children;
    235         }
    236 
    237         public Iterator<V> values() {
    238             if (_values == null) {
    239                 return null;
    240             }
    241             return _values.iterator();
    242         }
    243 
    244         public void add(CharIterator chitr, V value) {
    245             StringBuilder buf = new StringBuilder();
    246             while (chitr.hasNext()) {
    247                 buf.append(chitr.next());
    248             }
    249             add(toCharArray(buf), 0, value);
    250         }
    251 
    252         public Node findMatch(CharIterator chitr) {
    253             if (_children == null) {
    254                 return null;
    255             }
    256             if (!chitr.hasNext()) {
    257                 return null;
    258             }
    259             Node match = null;
    260             Character ch = chitr.next();
    261             for (Node child : _children) {
    262                 if (ch < child._text[0]) {
    263                     break;
    264                 }
    265                 if (ch == child._text[0]) {
    266                     if (child.matchFollowing(chitr)) {
    267                         match = child;
    268                     }
    269                     break;
    270                 }
    271             }
    272             return match;
    273         }
    274 
    275         private void add(char[] text, int offset, V value) {
    276             if (text.length == offset) {
    277                 _values = addValue(_values, value);
    278                 return;
    279             }
    280 
    281             if (_children == null) {
    282                 _children = new LinkedList<Node>();
    283                 Node child = new Node(subArray(text, offset), addValue(null, value), null);
    284                 _children.add(child);
    285                 return;
    286             }
    287 
    288             // walk through children
    289             ListIterator<Node> litr = _children.listIterator();
    290             while (litr.hasNext()) {
    291                 Node next = litr.next();
    292                 if (text[offset] < next._text[0]) {
    293                     litr.previous();
    294                     break;
    295                 }
    296                 if (text[offset] == next._text[0]) {
    297                     int matchLen = next.lenMatches(text, offset);
    298                     if (matchLen == next._text.length) {
    299                         // full match
    300                         next.add(text, offset + matchLen, value);
    301                     } else {
    302                         // partial match, create a branch
    303                         next.split(matchLen);
    304                         next.add(text, offset + matchLen, value);
    305                     }
    306                     return;
    307                 }
    308             }
    309             // add a new child to this node
    310             litr.add(new Node(subArray(text, offset), addValue(null, value), null));
    311         }
    312 
    313         private boolean matchFollowing(CharIterator chitr) {
    314             boolean matched = true;
    315             int idx = 1;
    316             while (idx < _text.length) {
    317                 if(!chitr.hasNext()) {
    318                     matched = false;
    319                     break;
    320                 }
    321                 Character ch = chitr.next();
    322                 if (ch != _text[idx]) {
    323                     matched = false;
    324                     break;
    325                 }
    326                 idx++;
    327             }
    328             return matched;
    329         }
    330 
    331         private int lenMatches(char[] text, int offset) {
    332             int textLen = text.length - offset;
    333             int limit = _text.length < textLen ? _text.length : textLen;
    334             int len = 0;
    335             while (len < limit) {
    336                 if (_text[len] != text[offset + len]) {
    337                     break;
    338                 }
    339                 len++;
    340             }
    341             return len;
    342         }
    343 
    344         private void split(int offset) {
    345             // split the current node at the offset
    346             char[] childText = subArray(_text, offset);
    347             _text = subArray(_text, 0, offset);
    348 
    349             // add the Node representing after the offset as a child
    350             Node child = new Node(childText, _values, _children);
    351             _values = null;
    352 
    353             _children = new LinkedList<Node>();
    354             _children.add(child);
    355         }
    356 
    357         private List<V> addValue(List<V> list, V value) {
    358             if (list == null) {
    359                 list = new LinkedList<V>();
    360             }
    361             list.add(value);
    362             return list;
    363         }
    364     }
    365 
    366     private static char[] toCharArray(CharSequence text) {
    367         char[] array = new char[text.length()];
    368         for (int i = 0; i < array.length; i++) {
    369             array[i] = text.charAt(i);
    370         }
    371         return array;
    372     }
    373 
    374     private static char[] subArray(char[] array, int start) {
    375         if (start == 0) {
    376             return array;
    377         }
    378         char[] sub = new char[array.length - start];
    379         System.arraycopy(array, start, sub, 0, sub.length);
    380         return sub;
    381     }
    382 
    383     private static char[] subArray(char[] array, int start, int limit) {
    384         if (start == 0 && limit == array.length) {
    385             return array;
    386         }
    387         char[] sub = new char[limit - start];
    388         System.arraycopy(array, start, sub, 0, limit - start);
    389         return sub;
    390     }
    391 }
    392