Home | History | Annotate | Download | only in util
      1 /*
      2 *******************************************************************************
      3 *   Copyright (C) 2011-2014, International Business Machines
      4 *   Corporation and others.  All Rights Reserved.
      5 *******************************************************************************
      6 *   created on: 2011jan05
      7 *   created by: Markus W. Scherer
      8 *   ported from ICU4C stringtriebuilder.h/.cpp
      9 */
     10 package com.ibm.icu.util;
     11 
     12 import java.util.ArrayList;
     13 import java.util.HashMap;
     14 
     15 /**
     16  * Base class for string trie builder classes.
     17  *
     18  * <p>This class is not intended for public subclassing.
     19  *
     20  * @author Markus W. Scherer
     21  * @stable ICU 4.8
     22  */
     23 public abstract class StringTrieBuilder {
     24     /**
     25      * Build options for BytesTrieBuilder and CharsTrieBuilder.
     26      * @stable ICU 4.8
     27      */
     28     public enum Option {
     29         /**
     30          * Builds a trie quickly.
     31          * @stable ICU 4.8
     32          */
     33         FAST,
     34         /**
     35          * Builds a trie more slowly, attempting to generate
     36          * a shorter but equivalent serialization.
     37          * This build option also uses more memory.
     38          *
     39          * <p>This option can be effective when many integer values are the same
     40          * and string/byte sequence suffixes can be shared.
     41          * Runtime speed is not expected to improve.
     42          * @stable ICU 4.8
     43          */
     44         SMALL
     45     }
     46 
     47     /**
     48      * @internal
     49      * @deprecated This API is ICU internal only.
     50      */
     51     @Deprecated
     52     protected StringTrieBuilder() {}
     53 
     54     /**
     55      * @internal
     56      * @deprecated This API is ICU internal only.
     57      */
     58     @Deprecated
     59     protected void addImpl(CharSequence s, int value) {
     60         if(state!=State.ADDING) {
     61             // Cannot add elements after building.
     62             throw new IllegalStateException("Cannot add (string, value) pairs after build().");
     63         }
     64         if(s.length()>0xffff) {
     65             // Too long: Limited by iterator internals, and by builder recursion depth.
     66             throw new IndexOutOfBoundsException("The maximum string length is 0xffff.");
     67         }
     68         if(root==null) {
     69             root=createSuffixNode(s, 0, value);
     70         } else {
     71             root=root.add(this, s, 0, value);
     72         }
     73     }
     74 
     75     /**
     76      * @internal
     77      * @deprecated This API is ICU internal only.
     78      */
     79     @Deprecated
     80     protected final void buildImpl(Option buildOption) {
     81         switch(state) {
     82         case ADDING:
     83             if(root==null) {
     84                 throw new IndexOutOfBoundsException("No (string, value) pairs were added.");
     85             }
     86             if(buildOption==Option.FAST) {
     87                 state=State.BUILDING_FAST;
     88                 // Building "fast" is somewhat faster (25..50% in some test)
     89                 // because it makes registerNode() return the input node
     90                 // rather than checking for duplicates.
     91                 // As a result, we sometimes write larger trie serializations.
     92                 //
     93                 // In either case we need to fix-up linear-match nodes (for their maximum length)
     94                 // and branch nodes (turning dynamic branch nodes into trees of
     95                 // runtime-equivalent nodes), but the HashMap/hashCode()/equals() are omitted for
     96                 // nodes other than final values.
     97             } else {
     98                 state=State.BUILDING_SMALL;
     99             }
    100             break;
    101         case BUILDING_FAST:
    102         case BUILDING_SMALL:
    103             // Building must have failed.
    104             throw new IllegalStateException("Builder failed and must be clear()ed.");
    105         case BUILT:
    106             return;  // Nothing more to do.
    107         }
    108         // Implementation note:
    109         // We really build three versions of the trie.
    110         // The first is a fully dynamic trie, built successively by addImpl().
    111         // Then we call root.register() to turn it into a tree of nodes
    112         // which is 1:1 equivalent to the runtime data structure.
    113         // Finally, root.markRightEdgesFirst() and root.write() write that serialized form.
    114         root=root.register(this);
    115         root.markRightEdgesFirst(-1);
    116         root.write(this);
    117         state=State.BUILT;
    118     }
    119 
    120     /**
    121      * @internal
    122      * @deprecated This API is ICU internal only.
    123      */
    124     @Deprecated
    125     protected void clearImpl() {
    126         strings.setLength(0);
    127         nodes.clear();
    128         root=null;
    129         state=State.ADDING;
    130     }
    131 
    132     /**
    133      * Makes sure that there is only one unique node registered that is
    134      * equivalent to newNode, unless BUILDING_FAST.
    135      * @param newNode Input node. The builder takes ownership.
    136      * @return newNode if it is the first of its kind, or
    137      *         an equivalent node if newNode is a duplicate.
    138      */
    139     private final Node registerNode(Node newNode) {
    140         if(state==State.BUILDING_FAST) {
    141             return newNode;
    142         }
    143         // BUILDING_SMALL
    144         Node oldNode=nodes.get(newNode);
    145         if(oldNode!=null) {
    146             return oldNode;
    147         }
    148         // If put() returns a non-null value from an equivalent, previously
    149         // registered node, then get() failed to find that and we will leak newNode.
    150         oldNode=nodes.put(newNode, newNode);
    151         assert(oldNode==null);
    152         return newNode;
    153     }
    154 
    155     /**
    156      * Makes sure that there is only one unique FinalValueNode registered
    157      * with this value.
    158      * Avoids creating a node if the value is a duplicate.
    159      * @param value A final value.
    160      * @return A FinalValueNode with the given value.
    161      */
    162     private final ValueNode registerFinalValue(int value) {
    163         // We always register final values because while ADDING
    164         // we do not know yet whether we will build fast or small.
    165         lookupFinalValueNode.setFinalValue(value);
    166         Node oldNode=nodes.get(lookupFinalValueNode);
    167         if(oldNode!=null) {
    168             return (ValueNode)oldNode;
    169         }
    170         ValueNode newNode=new ValueNode(value);
    171         // If put() returns a non-null value from an equivalent, previously
    172         // registered node, then get() failed to find that and we will leak newNode.
    173         oldNode=nodes.put(newNode, newNode);
    174         assert(oldNode==null);
    175         return newNode;
    176     }
    177 
    178     private static abstract class Node {
    179         public Node() {
    180             offset=0;
    181         }
    182         // hashCode() and equals() for use with registerNode() and the nodes hash.
    183         @Override
    184         public abstract int hashCode() /*const*/;
    185         // Base class equals() compares the actual class types.
    186         @Override
    187         public boolean equals(Object other) {
    188             return this==other || this.getClass()==other.getClass();
    189         }
    190         /**
    191          * Recursive method for adding a new (string, value) pair.
    192          * Matches the remaining part of s from start,
    193          * and adds a new node where there is a mismatch.
    194          * @return this or a replacement Node
    195          */
    196         public Node add(StringTrieBuilder builder, CharSequence s, int start, int sValue) {
    197             return this;
    198         }
    199         /**
    200          * Recursive method for registering unique nodes,
    201          * after all (string, value) pairs have been added.
    202          * Final-value nodes are pre-registered while add()ing (string, value) pairs.
    203          * Other nodes created while add()ing registerNode() themselves later
    204          * and might replace themselves with new types of nodes for write()ing.
    205          * @return The registered version of this node which implements write().
    206          */
    207         public Node register(StringTrieBuilder builder) { return this; }
    208         /**
    209          * Traverses the Node graph and numbers branch edges, with rightmost edges first.
    210          * This is to avoid writing a duplicate node twice.
    211          *
    212          * Branch nodes in this trie data structure are not symmetric.
    213          * Most branch edges "jump" to other nodes but the rightmost branch edges
    214          * just continue without a jump.
    215          * Therefore, write() must write the rightmost branch edge last
    216          * (trie units are written backwards), and must write it at that point even if
    217          * it is a duplicate of a node previously written elsewhere.
    218          *
    219          * This function visits and marks right branch edges first.
    220          * Edges are numbered with increasingly negative values because we share the
    221          * offset field which gets positive values when nodes are written.
    222          * A branch edge also remembers the first number for any of its edges.
    223          *
    224          * When a further-left branch edge has a number in the range of the rightmost
    225          * edge's numbers, then it will be written as part of the required right edge
    226          * and we can avoid writing it first.
    227          *
    228          * After root.markRightEdgesFirst(-1) the offsets of all nodes are negative
    229          * edge numbers.
    230          *
    231          * @param edgeNumber The first edge number for this node and its sub-nodes.
    232          * @return An edge number that is at least the maximum-negative
    233          *         of the input edge number and the numbers of this node and all of its sub-nodes.
    234          */
    235         public int markRightEdgesFirst(int edgeNumber) {
    236             if(offset==0) {
    237                 offset=edgeNumber;
    238             }
    239             return edgeNumber;
    240         }
    241         // write() must set the offset to a positive value.
    242         public abstract void write(StringTrieBuilder builder);
    243         // See markRightEdgesFirst.
    244         public final void writeUnlessInsideRightEdge(int firstRight, int lastRight,
    245                                                StringTrieBuilder builder) {
    246             // Note: Edge numbers are negative, lastRight<=firstRight.
    247             // If offset>0 then this node and its sub-nodes have been written already
    248             // and we need not write them again.
    249             // If this node is part of the unwritten right branch edge,
    250             // then we wait until that is written.
    251             if(offset<0 && (offset<lastRight || firstRight<offset)) {
    252                 write(builder);
    253             }
    254         }
    255         public final int getOffset() /*const*/ { return offset; }
    256 
    257         protected int offset;
    258     }
    259 
    260     // Used directly for final values, and as as a superclass for
    261     // match nodes with intermediate values.
    262     private static class ValueNode extends Node {
    263         public ValueNode() {}
    264         public ValueNode(int v) {
    265             hasValue=true;
    266             value=v;
    267         }
    268         public final void setValue(int v) {
    269             assert(!hasValue);
    270             hasValue=true;
    271             value=v;
    272         }
    273         private void setFinalValue(int v) {
    274             hasValue=true;
    275             value=v;
    276         }
    277         @Override
    278         public int hashCode() /*const*/ {
    279             int hash=0x111111;
    280             if(hasValue) {
    281                 hash=hash*37+value;
    282             }
    283             return hash;
    284         }
    285         @Override
    286         public boolean equals(Object other) {
    287             if(this==other) {
    288                 return true;
    289             }
    290             if(!super.equals(other)) {
    291                 return false;
    292             }
    293             ValueNode o=(ValueNode)other;
    294             return hasValue==o.hasValue && (!hasValue || value==o.value);
    295         }
    296         @Override
    297         public Node add(StringTrieBuilder builder, CharSequence s, int start, int sValue) {
    298             if(start==s.length()) {
    299                 throw new IllegalArgumentException("Duplicate string.");
    300             }
    301             // Replace self with a node for the remaining string suffix and value.
    302             ValueNode node=builder.createSuffixNode(s, start, sValue);
    303             node.setValue(value);
    304             return node;
    305         }
    306         @Override
    307         public void write(StringTrieBuilder builder) {
    308             offset=builder.writeValueAndFinal(value, true);
    309         }
    310 
    311         protected boolean hasValue;
    312         protected int value;
    313     }
    314 
    315     private static final class IntermediateValueNode extends ValueNode {
    316         public IntermediateValueNode(int v, Node nextNode) {
    317             next=nextNode;
    318             setValue(v);
    319         }
    320         @Override
    321         public int hashCode() /*const*/ {
    322             return (0x222222*37+value)*37+next.hashCode();
    323         }
    324         @Override
    325         public boolean equals(Object other) {
    326             if(this==other) {
    327                 return true;
    328             }
    329             if(!super.equals(other)) {
    330                 return false;
    331             }
    332             IntermediateValueNode o=(IntermediateValueNode)other;
    333             return next==o.next;
    334         }
    335         @Override
    336         public int markRightEdgesFirst(int edgeNumber) {
    337             if(offset==0) {
    338                 offset=edgeNumber=next.markRightEdgesFirst(edgeNumber);
    339             }
    340             return edgeNumber;
    341         }
    342         @Override
    343         public void write(StringTrieBuilder builder) {
    344             next.write(builder);
    345             offset=builder.writeValueAndFinal(value, false);
    346         }
    347 
    348         private Node next;
    349     }
    350 
    351     private static final class LinearMatchNode extends ValueNode {
    352         public LinearMatchNode(CharSequence builderStrings, int sOffset, int len, Node nextNode) {
    353             strings=builderStrings;
    354             stringOffset=sOffset;
    355             length=len;
    356             next=nextNode;
    357         }
    358         @Override
    359         public int hashCode() /*const*/ { return hash; }
    360         @Override
    361         public boolean equals(Object other) {
    362             if(this==other) {
    363                 return true;
    364             }
    365             if(!super.equals(other)) {
    366                 return false;
    367             }
    368             LinearMatchNode o=(LinearMatchNode)other;
    369             if(length!=o.length || next!=o.next) {
    370                 return false;
    371             }
    372             for(int i=stringOffset, j=o.stringOffset, limit=stringOffset+length; i<limit; ++i, ++j) {
    373                 if(strings.charAt(i)!=strings.charAt(j)) {
    374                     return false;
    375                 }
    376             }
    377             return true;
    378         }
    379         @Override
    380         public Node add(StringTrieBuilder builder, CharSequence s, int start, int sValue) {
    381             if(start==s.length()) {
    382                 if(hasValue) {
    383                     throw new IllegalArgumentException("Duplicate string.");
    384                 } else {
    385                     setValue(sValue);
    386                     return this;
    387                 }
    388             }
    389             int limit=stringOffset+length;
    390             for(int i=stringOffset; i<limit; ++i, ++start) {
    391                 if(start==s.length()) {
    392                     // s is a prefix with a new value. Split self into two linear-match nodes.
    393                     int prefixLength=i-stringOffset;
    394                     LinearMatchNode suffixNode=new LinearMatchNode(strings, i, length-prefixLength, next);
    395                     suffixNode.setValue(sValue);
    396                     length=prefixLength;
    397                     next=suffixNode;
    398                     return this;
    399                 }
    400                 char thisChar=strings.charAt(i);
    401                 char newChar=s.charAt(start);
    402                 if(thisChar!=newChar) {
    403                     // Mismatch, insert a branch node.
    404                     DynamicBranchNode branchNode=new DynamicBranchNode();
    405                     // Reuse this node for one of the remaining substrings, if any.
    406                     Node result, thisSuffixNode;
    407                     if(i==stringOffset) {
    408                         // Mismatch on first character, turn this node into a suffix.
    409                         if(hasValue) {
    410                             // Move the value for prefix length "start" to the new node.
    411                             branchNode.setValue(value);
    412                             value=0;
    413                             hasValue=false;
    414                         }
    415                         ++stringOffset;
    416                         --length;
    417                         thisSuffixNode= length>0 ? this : next;
    418                         // C++: if(length==0) { delete this; }
    419                         result=branchNode;
    420                     } else if(i==limit-1) {
    421                         // Mismatch on last character, keep this node for the prefix.
    422                         --length;
    423                         thisSuffixNode=next;
    424                         next=branchNode;
    425                         result=this;
    426                     } else {
    427                         // Mismatch on intermediate character, keep this node for the prefix.
    428                         int prefixLength=i-stringOffset;
    429                         ++i;  // Suffix start offset (after thisChar).
    430                         thisSuffixNode=new LinearMatchNode(
    431                                 strings, i, length-(prefixLength+1), next);
    432                         length=prefixLength;
    433                         next=branchNode;
    434                         result=this;
    435                     }
    436                     ValueNode newSuffixNode=builder.createSuffixNode(s, start+1, sValue);
    437                     branchNode.add(thisChar, thisSuffixNode);
    438                     branchNode.add(newChar, newSuffixNode);
    439                     return result;
    440                 }
    441             }
    442             // s matches all of this node's characters.
    443             next=next.add(builder, s, start, sValue);
    444             return this;
    445         }
    446         @Override
    447         public Node register(StringTrieBuilder builder) {
    448             next=next.register(builder);
    449             // Break the linear-match sequence into chunks of at most kMaxLinearMatchLength.
    450             int maxLinearMatchLength=builder.getMaxLinearMatchLength();
    451             while(length>maxLinearMatchLength) {
    452                 int nextOffset=stringOffset+length-maxLinearMatchLength;
    453                 length-=maxLinearMatchLength;
    454                 LinearMatchNode suffixNode=
    455                     new LinearMatchNode(strings, nextOffset, maxLinearMatchLength, next);
    456                 suffixNode.setHashCode();
    457                 next=builder.registerNode(suffixNode);
    458             }
    459             Node result;
    460             if(hasValue && !builder.matchNodesCanHaveValues()) {
    461                 int intermediateValue=value;
    462                 value=0;
    463                 hasValue=false;
    464                 setHashCode();
    465                 result=new IntermediateValueNode(intermediateValue, builder.registerNode(this));
    466             } else {
    467                 setHashCode();
    468                 result=this;
    469             }
    470             return builder.registerNode(result);
    471         }
    472         @Override
    473         public int markRightEdgesFirst(int edgeNumber) {
    474             if(offset==0) {
    475                 offset=edgeNumber=next.markRightEdgesFirst(edgeNumber);
    476             }
    477             return edgeNumber;
    478         }
    479         @Override
    480         public void write(StringTrieBuilder builder) {
    481             next.write(builder);
    482             builder.write(stringOffset, length);
    483             offset=builder.writeValueAndType(hasValue, value, builder.getMinLinearMatch()+length-1);
    484         }
    485 
    486         // Must be called just before registerNode(this).
    487         private void setHashCode() /*const*/ {
    488             hash=(0x333333*37+length)*37+next.hashCode();
    489             if(hasValue) {
    490                 hash=hash*37+value;
    491             }
    492             for(int i=stringOffset, limit=stringOffset+length; i<limit; ++i) {
    493                 hash=hash*37+strings.charAt(i);
    494             }
    495         }
    496 
    497         private CharSequence strings;
    498         private int stringOffset;
    499         private int length;
    500         private Node next;
    501         private int hash;
    502     }
    503 
    504     private static final class DynamicBranchNode extends ValueNode {
    505         public DynamicBranchNode() {}
    506         // c must not be in chars yet.
    507         public void add(char c, Node node) {
    508             int i=find(c);
    509             chars.insert(i, c);
    510             equal.add(i, node);
    511         }
    512         @Override
    513         public Node add(StringTrieBuilder builder, CharSequence s, int start, int sValue) {
    514             if(start==s.length()) {
    515                 if(hasValue) {
    516                     throw new IllegalArgumentException("Duplicate string.");
    517                 } else {
    518                     setValue(sValue);
    519                     return this;
    520                 }
    521             }
    522             char c=s.charAt(start++);
    523             int i=find(c);
    524             if(i<chars.length() && c==chars.charAt(i)) {
    525                 equal.set(i, equal.get(i).add(builder, s, start, sValue));
    526             } else {
    527                 chars.insert(i, c);
    528                 equal.add(i, builder.createSuffixNode(s, start, sValue));
    529             }
    530             return this;
    531         }
    532         @Override
    533         public Node register(StringTrieBuilder builder) {
    534             Node subNode=register(builder, 0, chars.length());
    535             BranchHeadNode head=new BranchHeadNode(chars.length(), subNode);
    536             Node result=head;
    537             if(hasValue) {
    538                 if(builder.matchNodesCanHaveValues()) {
    539                     head.setValue(value);
    540                 } else {
    541                     result=new IntermediateValueNode(value, builder.registerNode(head));
    542                 }
    543             }
    544             return builder.registerNode(result);
    545         }
    546         private Node register(StringTrieBuilder builder, int start, int limit) {
    547             int length=limit-start;
    548             if(length>builder.getMaxBranchLinearSubNodeLength()) {
    549                 // Branch on the middle unit.
    550                 int middle=start+length/2;
    551                 return builder.registerNode(
    552                         new SplitBranchNode(
    553                                 chars.charAt(middle),
    554                                 register(builder, start, middle),
    555                                 register(builder, middle, limit)));
    556             }
    557             ListBranchNode listNode=new ListBranchNode(length);
    558             do {
    559                 char c=chars.charAt(start);
    560                 Node node=equal.get(start);
    561                 if(node.getClass()==ValueNode.class) {
    562                     // Final value.
    563                     listNode.add(c, ((ValueNode)node).value);
    564                 } else {
    565                     listNode.add(c, node.register(builder));
    566                 }
    567             } while(++start<limit);
    568             return builder.registerNode(listNode);
    569         }
    570 
    571         private int find(char c) {
    572             int start=0;
    573             int limit=chars.length();
    574             while(start<limit) {
    575                 int i=(start+limit)/2;
    576                 char middleChar=chars.charAt(i);
    577                 if(c<middleChar) {
    578                     limit=i;
    579                 } else if(c==middleChar) {
    580                     return i;
    581                 } else {
    582                     start=i+1;
    583                 }
    584             }
    585             return start;
    586         }
    587 
    588         private StringBuilder chars=new StringBuilder();
    589         private ArrayList<Node> equal=new ArrayList<Node>();
    590     }
    591 
    592     private static abstract class BranchNode extends Node {
    593         public BranchNode() {}
    594         @Override
    595         public int hashCode() /*const*/ { return hash; }
    596 
    597         protected int hash;
    598         protected int firstEdgeNumber;
    599     }
    600 
    601     private static final class ListBranchNode extends BranchNode {
    602         public ListBranchNode(int capacity) {
    603             hash=0x444444*37+capacity;
    604             equal=new Node[capacity];
    605             values=new int[capacity];
    606             units=new char[capacity];
    607         }
    608         @Override
    609         public boolean equals(Object other) {
    610             if(this==other) {
    611                 return true;
    612             }
    613             if(!super.equals(other)) {
    614                 return false;
    615             }
    616             ListBranchNode o=(ListBranchNode)other;
    617             for(int i=0; i<length; ++i) {
    618                 if(units[i]!=o.units[i] || values[i]!=o.values[i] || equal[i]!=o.equal[i]) {
    619                     return false;
    620                 }
    621             }
    622             return true;
    623         }
    624         @Override
    625         public int hashCode() {
    626             return super.hashCode();
    627         }
    628         @Override
    629         public int markRightEdgesFirst(int edgeNumber) {
    630             if(offset==0) {
    631                 firstEdgeNumber=edgeNumber;
    632                 int step=0;
    633                 int i=length;
    634                 do {
    635                     Node edge=equal[--i];
    636                     if(edge!=null) {
    637                         edgeNumber=edge.markRightEdgesFirst(edgeNumber-step);
    638                     }
    639                     // For all but the rightmost edge, decrement the edge number.
    640                     step=1;
    641                 } while(i>0);
    642                 offset=edgeNumber;
    643             }
    644             return edgeNumber;
    645         }
    646         @Override
    647         public void write(StringTrieBuilder builder) {
    648             // Write the sub-nodes in reverse order: The jump lengths are deltas from
    649             // after their own positions, so if we wrote the minUnit sub-node first,
    650             // then its jump delta would be larger.
    651             // Instead we write the minUnit sub-node last, for a shorter delta.
    652             int unitNumber=length-1;
    653             Node rightEdge=equal[unitNumber];
    654             int rightEdgeNumber= rightEdge==null ? firstEdgeNumber : rightEdge.getOffset();
    655             do {
    656                 --unitNumber;
    657                 if(equal[unitNumber]!=null) {
    658                     equal[unitNumber].writeUnlessInsideRightEdge(firstEdgeNumber, rightEdgeNumber, builder);
    659                 }
    660             } while(unitNumber>0);
    661             // The maxUnit sub-node is written as the very last one because we do
    662             // not jump for it at all.
    663             unitNumber=length-1;
    664             if(rightEdge==null) {
    665                 builder.writeValueAndFinal(values[unitNumber], true);
    666             } else {
    667                 rightEdge.write(builder);
    668             }
    669             offset=builder.write(units[unitNumber]);
    670             // Write the rest of this node's unit-value pairs.
    671             while(--unitNumber>=0) {
    672                 int value;
    673                 boolean isFinal;
    674                 if(equal[unitNumber]==null) {
    675                     // Write the final value for the one string ending with this unit.
    676                     value=values[unitNumber];
    677                     isFinal=true;
    678                 } else {
    679                     // Write the delta to the start position of the sub-node.
    680                     assert(equal[unitNumber].getOffset()>0);
    681                     value=offset-equal[unitNumber].getOffset();
    682                     isFinal=false;
    683                 }
    684                 builder.writeValueAndFinal(value, isFinal);
    685                 offset=builder.write(units[unitNumber]);
    686             }
    687         }
    688         // Adds a unit with a final value.
    689         public void add(int c, int value) {
    690             units[length]=(char)c;
    691             equal[length]=null;
    692             values[length]=value;
    693             ++length;
    694             hash=(hash*37+c)*37+value;
    695         }
    696         // Adds a unit which leads to another match node.
    697         public void add(int c, Node node) {
    698             units[length]=(char)c;
    699             equal[length]=node;
    700             values[length]=0;
    701             ++length;
    702             hash=(hash*37+c)*37+node.hashCode();
    703         }
    704 
    705         // Note: We could try to reduce memory allocations
    706         // by replacing these per-node arrays with per-builder ArrayLists and
    707         // (for units) a StringBuilder (or even use its strings for the units too).
    708         // It remains to be seen whether that would improve performance.
    709         private Node[] equal;  // null means "has final value".
    710         private int length;
    711         private int[] values;
    712         private char[] units;
    713     }
    714 
    715     private static final class SplitBranchNode extends BranchNode {
    716         public SplitBranchNode(char middleUnit, Node lessThanNode, Node greaterOrEqualNode) {
    717             hash=((0x555555*37+middleUnit)*37+
    718                     lessThanNode.hashCode())*37+greaterOrEqualNode.hashCode();
    719             unit=middleUnit;
    720             lessThan=lessThanNode;
    721             greaterOrEqual=greaterOrEqualNode;
    722         }
    723         @Override
    724         public boolean equals(Object other) {
    725             if(this==other) {
    726                 return true;
    727             }
    728             if(!super.equals(other)) {
    729                 return false;
    730             }
    731             SplitBranchNode o=(SplitBranchNode)other;
    732             return unit==o.unit && lessThan==o.lessThan && greaterOrEqual==o.greaterOrEqual;
    733         }
    734         @Override
    735         public int hashCode() {
    736             return super.hashCode();
    737         }
    738         @Override
    739         public int markRightEdgesFirst(int edgeNumber) {
    740             if(offset==0) {
    741                 firstEdgeNumber=edgeNumber;
    742                 edgeNumber=greaterOrEqual.markRightEdgesFirst(edgeNumber);
    743                 offset=edgeNumber=lessThan.markRightEdgesFirst(edgeNumber-1);
    744             }
    745             return edgeNumber;
    746         }
    747         @Override
    748         public void write(StringTrieBuilder builder) {
    749             // Encode the less-than branch first.
    750             lessThan.writeUnlessInsideRightEdge(firstEdgeNumber, greaterOrEqual.getOffset(), builder);
    751             // Encode the greater-or-equal branch last because we do not jump for it at all.
    752             greaterOrEqual.write(builder);
    753             // Write this node.
    754             assert(lessThan.getOffset()>0);
    755             builder.writeDeltaTo(lessThan.getOffset());  // less-than
    756             offset=builder.write(unit);
    757         }
    758 
    759         private char unit;
    760         private Node lessThan;
    761         private Node greaterOrEqual;
    762     }
    763 
    764     // Branch head node, for writing the actual node lead unit.
    765     private static final class BranchHeadNode extends ValueNode {
    766         public BranchHeadNode(int len, Node subNode) {
    767             length=len;
    768             next=subNode;
    769         }
    770         @Override
    771         public int hashCode() /*const*/ {
    772             return (0x666666*37+length)*37+next.hashCode();
    773         }
    774         @Override
    775         public boolean equals(Object other) {
    776             if(this==other) {
    777                 return true;
    778             }
    779             if(!super.equals(other)) {
    780                 return false;
    781             }
    782             BranchHeadNode o=(BranchHeadNode)other;
    783             return length==o.length && next==o.next;
    784         }
    785         @Override
    786         public int markRightEdgesFirst(int edgeNumber) {
    787             if(offset==0) {
    788                 offset=edgeNumber=next.markRightEdgesFirst(edgeNumber);
    789             }
    790             return edgeNumber;
    791         }
    792         @Override
    793         public void write(StringTrieBuilder builder) {
    794             next.write(builder);
    795             if(length<=builder.getMinLinearMatch()) {
    796                 offset=builder.writeValueAndType(hasValue, value, length-1);
    797             } else {
    798                 builder.write(length-1);
    799                 offset=builder.writeValueAndType(hasValue, value, 0);
    800             }
    801         }
    802 
    803         private int length;
    804         private Node next;  // A branch sub-node.
    805     }
    806 
    807     private ValueNode createSuffixNode(CharSequence s, int start, int sValue) {
    808         ValueNode node=registerFinalValue(sValue);
    809         if(start<s.length()) {
    810             int offset=strings.length();
    811             strings.append(s, start, s.length());
    812             node=new LinearMatchNode(strings, offset, s.length()-start, node);
    813         }
    814         return node;
    815     }
    816 
    817     /**
    818      * @internal
    819      * @deprecated This API is ICU internal only.
    820      */
    821     @Deprecated
    822     protected abstract boolean matchNodesCanHaveValues() /*const*/;
    823 
    824     /**
    825      * @internal
    826      * @deprecated This API is ICU internal only.
    827      */
    828     @Deprecated
    829     protected abstract int getMaxBranchLinearSubNodeLength() /*const*/;
    830     /**
    831      * @internal
    832      * @deprecated This API is ICU internal only.
    833      */
    834     @Deprecated
    835     protected abstract int getMinLinearMatch() /*const*/;
    836     /**
    837      * @internal
    838      * @deprecated This API is ICU internal only.
    839      */
    840     @Deprecated
    841     protected abstract int getMaxLinearMatchLength() /*const*/;
    842 
    843     /**
    844      * @internal
    845      * @deprecated This API is ICU internal only.
    846      */
    847     @Deprecated
    848     protected abstract int write(int unit);
    849     /**
    850      * @internal
    851      * @deprecated This API is ICU internal only.
    852      */
    853     @Deprecated
    854     protected abstract int write(int offset, int length);
    855     /**
    856      * @internal
    857      * @deprecated This API is ICU internal only.
    858      */
    859     @Deprecated
    860     protected abstract int writeValueAndFinal(int i, boolean isFinal);
    861     /**
    862      * @internal
    863      * @deprecated This API is ICU internal only.
    864      */
    865     @Deprecated
    866     protected abstract int writeValueAndType(boolean hasValue, int value, int node);
    867     /**
    868      * @internal
    869      * @deprecated This API is ICU internal only.
    870      */
    871     @Deprecated
    872     protected abstract int writeDeltaTo(int jumpTarget);
    873 
    874     private enum State {
    875         ADDING, BUILDING_FAST, BUILDING_SMALL, BUILT
    876     }
    877     private State state=State.ADDING;
    878 
    879     // Strings and sub-strings for linear-match nodes.
    880     /**
    881      * @internal
    882      * @deprecated This API is ICU internal only.
    883      */
    884     @Deprecated
    885     protected StringBuilder strings=new StringBuilder();
    886     private Node root;
    887 
    888     // Hash set of nodes, maps from nodes to integer 1.
    889     private HashMap<Node, Node> nodes=new HashMap<Node, Node>();
    890     private ValueNode lookupFinalValueNode=new ValueNode();
    891 }
    892