Home | History | Annotate | Download | only in common
      1 /**
      2  *******************************************************************************
      3  * Copyright (C) 2006-2008, International Business Machines Corporation        *
      4  * and others. All Rights Reserved.                                            *
      5  *******************************************************************************
      6  */
      7 
      8 #include "unicode/utypes.h"
      9 
     10 #if !UCONFIG_NO_BREAK_ITERATION
     11 
     12 #include "triedict.h"
     13 #include "unicode/chariter.h"
     14 #include "unicode/uchriter.h"
     15 #include "unicode/strenum.h"
     16 #include "unicode/uenum.h"
     17 #include "unicode/udata.h"
     18 #include "cmemory.h"
     19 #include "udataswp.h"
     20 #include "uvector.h"
     21 #include "uvectr32.h"
     22 #include "uarrsort.h"
     23 
     24 //#define DEBUG_TRIE_DICT 1
     25 
     26 #ifdef DEBUG_TRIE_DICT
     27 #include <sys/times.h>
     28 #include <limits.h>
     29 #include <stdio.h>
     30 #endif
     31 
     32 U_NAMESPACE_BEGIN
     33 
     34 /*******************************************************************
     35  * TrieWordDictionary
     36  */
     37 
     38 TrieWordDictionary::TrieWordDictionary() {
     39 }
     40 
     41 TrieWordDictionary::~TrieWordDictionary() {
     42 }
     43 
     44 /*******************************************************************
     45  * MutableTrieDictionary
     46  */
     47 
     48 // Node structure for the ternary, uncompressed trie
     49 struct TernaryNode : public UMemory {
     50     UChar       ch;         // UTF-16 code unit
     51     uint16_t    flags;      // Flag word
     52     TernaryNode *low;       // Less-than link
     53     TernaryNode *equal;     // Equal link
     54     TernaryNode *high;      // Greater-than link
     55 
     56     TernaryNode(UChar uc);
     57     ~TernaryNode();
     58 };
     59 
     60 enum MutableTrieNodeFlags {
     61     kEndsWord = 0x0001      // This node marks the end of a valid word
     62 };
     63 
     64 inline
     65 TernaryNode::TernaryNode(UChar uc) {
     66     ch = uc;
     67     flags = 0;
     68     low = NULL;
     69     equal = NULL;
     70     high = NULL;
     71 }
     72 
     73 // Not inline since it's recursive
     74 TernaryNode::~TernaryNode() {
     75     delete low;
     76     delete equal;
     77     delete high;
     78 }
     79 
     80 MutableTrieDictionary::MutableTrieDictionary( UChar median, UErrorCode &status ) {
     81     // Start the trie off with something. Having the root node already present
     82     // cuts a special case out of the search/insertion functions.
     83     // Making it a median character cuts the worse case for searches from
     84     // 4x a balanced trie to 2x a balanced trie. It's best to choose something
     85     // that starts a word that is midway in the list.
     86     fTrie = new TernaryNode(median);
     87     if (fTrie == NULL) {
     88         status = U_MEMORY_ALLOCATION_ERROR;
     89     }
     90     fIter = utext_openUChars(NULL, NULL, 0, &status);
     91     if (U_SUCCESS(status) && fIter == NULL) {
     92         status = U_MEMORY_ALLOCATION_ERROR;
     93     }
     94 }
     95 
     96 MutableTrieDictionary::MutableTrieDictionary( UErrorCode &status ) {
     97     fTrie = NULL;
     98     fIter = utext_openUChars(NULL, NULL, 0, &status);
     99     if (U_SUCCESS(status) && fIter == NULL) {
    100         status = U_MEMORY_ALLOCATION_ERROR;
    101     }
    102 }
    103 
    104 MutableTrieDictionary::~MutableTrieDictionary() {
    105     delete fTrie;
    106     utext_close(fIter);
    107 }
    108 
    109 int32_t
    110 MutableTrieDictionary::search( UText *text,
    111                                    int32_t maxLength,
    112                                    int32_t *lengths,
    113                                    int &count,
    114                                    int limit,
    115                                    TernaryNode *&parent,
    116                                    UBool &pMatched ) const {
    117     // TODO: current implementation works in UTF-16 space
    118     const TernaryNode *up = NULL;
    119     const TernaryNode *p = fTrie;
    120     int mycount = 0;
    121     pMatched = TRUE;
    122     int i;
    123 
    124     UChar uc = utext_current32(text);
    125     for (i = 0; i < maxLength && p != NULL; ++i) {
    126         while (p != NULL) {
    127             if (uc < p->ch) {
    128                 up = p;
    129                 p = p->low;
    130             }
    131             else if (uc == p->ch) {
    132                 break;
    133             }
    134             else {
    135                 up = p;
    136                 p = p->high;
    137             }
    138         }
    139         if (p == NULL) {
    140             pMatched = FALSE;
    141             break;
    142         }
    143         // Must be equal to get here
    144         if (limit > 0 && (p->flags & kEndsWord)) {
    145             lengths[mycount++] = i+1;
    146             --limit;
    147         }
    148         up = p;
    149         p = p->equal;
    150         uc = utext_next32(text);
    151         uc = utext_current32(text);
    152     }
    153 
    154     // Note that there is no way to reach here with up == 0 unless
    155     // maxLength is 0 coming in.
    156     parent = (TernaryNode *)up;
    157     count = mycount;
    158     return i;
    159 }
    160 
    161 void
    162 MutableTrieDictionary::addWord( const UChar *word,
    163                                 int32_t length,
    164                                 UErrorCode &status ) {
    165 #if 0
    166     if (length <= 0) {
    167         status = U_ILLEGAL_ARGUMENT_ERROR;
    168         return;
    169     }
    170 #endif
    171     TernaryNode *parent;
    172     UBool pMatched;
    173     int count;
    174     fIter = utext_openUChars(fIter, word, length, &status);
    175 
    176     int matched;
    177     matched = search(fIter, length, NULL, count, 0, parent, pMatched);
    178 
    179     while (matched++ < length) {
    180         UChar32 uc = utext_next32(fIter);  // TODO:  supplemetary support?
    181         U_ASSERT(uc != U_SENTINEL);
    182         TernaryNode *newNode = new TernaryNode(uc);
    183         if (newNode == NULL) {
    184             status = U_MEMORY_ALLOCATION_ERROR;
    185             return;
    186         }
    187         if (pMatched) {
    188             parent->equal = newNode;
    189         }
    190         else {
    191             pMatched = TRUE;
    192             if (uc < parent->ch) {
    193                 parent->low = newNode;
    194             }
    195             else {
    196                 parent->high = newNode;
    197             }
    198         }
    199         parent = newNode;
    200     }
    201 
    202     parent->flags |= kEndsWord;
    203 }
    204 
    205 #if 0
    206 void
    207 MutableTrieDictionary::addWords( UEnumeration *words,
    208                                   UErrorCode &status ) {
    209     int32_t length;
    210     const UChar *word;
    211     while ((word = uenum_unext(words, &length, &status)) && U_SUCCESS(status)) {
    212         addWord(word, length, status);
    213     }
    214 }
    215 #endif
    216 
    217 int32_t
    218 MutableTrieDictionary::matches( UText *text,
    219                                 int32_t maxLength,
    220                                 int32_t *lengths,
    221                                 int &count,
    222                                 int limit ) const {
    223     TernaryNode *parent;
    224     UBool pMatched;
    225     return search(text, maxLength, lengths, count, limit, parent, pMatched);
    226 }
    227 
    228 // Implementation of iteration for MutableTrieDictionary
    229 class MutableTrieEnumeration : public StringEnumeration {
    230 private:
    231     UStack      fNodeStack;     // Stack of nodes to process
    232     UVector32   fBranchStack;   // Stack of which branch we are working on
    233     TernaryNode *fRoot;         // Root node
    234     enum StackBranch {
    235         kLessThan,
    236         kEqual,
    237         kGreaterThan,
    238         kDone
    239     };
    240 
    241 public:
    242     static UClassID U_EXPORT2 getStaticClassID(void);
    243     virtual UClassID getDynamicClassID(void) const;
    244 public:
    245     MutableTrieEnumeration(TernaryNode *root, UErrorCode &status)
    246         : fNodeStack(status), fBranchStack(status) {
    247         fRoot = root;
    248         fNodeStack.push(root, status);
    249         fBranchStack.push(kLessThan, status);
    250         unistr.remove();
    251     }
    252 
    253     virtual ~MutableTrieEnumeration() {
    254     }
    255 
    256     virtual StringEnumeration *clone() const {
    257         UErrorCode status = U_ZERO_ERROR;
    258         return new MutableTrieEnumeration(fRoot, status);
    259     }
    260 
    261     virtual const UnicodeString *snext(UErrorCode &status) {
    262         if (fNodeStack.empty() || U_FAILURE(status)) {
    263             return NULL;
    264         }
    265         TernaryNode *node = (TernaryNode *) fNodeStack.peek();
    266         StackBranch where = (StackBranch) fBranchStack.peeki();
    267         while (!fNodeStack.empty() && U_SUCCESS(status)) {
    268             UBool emit;
    269             UBool equal;
    270 
    271             switch (where) {
    272             case kLessThan:
    273                 if (node->low != NULL) {
    274                     fBranchStack.setElementAt(kEqual, fBranchStack.size()-1);
    275                     node = (TernaryNode *) fNodeStack.push(node->low, status);
    276                     where = (StackBranch) fBranchStack.push(kLessThan, status);
    277                     break;
    278                 }
    279             case kEqual:
    280                 emit = (node->flags & kEndsWord) != 0;
    281                 equal = (node->equal != NULL);
    282                 // If this node should be part of the next emitted string, append
    283                 // the UChar to the string, and make sure we pop it when we come
    284                 // back to this node. The character should only be in the string
    285                 // for as long as we're traversing the equal subtree of this node
    286                 if (equal || emit) {
    287                     unistr.append(node->ch);
    288                     fBranchStack.setElementAt(kGreaterThan, fBranchStack.size()-1);
    289                 }
    290                 if (equal) {
    291                     node = (TernaryNode *) fNodeStack.push(node->equal, status);
    292                     where = (StackBranch) fBranchStack.push(kLessThan, status);
    293                 }
    294                 if (emit) {
    295                     return &unistr;
    296                 }
    297                 if (equal) {
    298                     break;
    299                 }
    300             case kGreaterThan:
    301                 // If this node's character is in the string, remove it.
    302                 if (node->equal != NULL || (node->flags & kEndsWord)) {
    303                     unistr.truncate(unistr.length()-1);
    304                 }
    305                 if (node->high != NULL) {
    306                     fBranchStack.setElementAt(kDone, fBranchStack.size()-1);
    307                     node = (TernaryNode *) fNodeStack.push(node->high, status);
    308                     where = (StackBranch) fBranchStack.push(kLessThan, status);
    309                     break;
    310                 }
    311             case kDone:
    312                 fNodeStack.pop();
    313                 fBranchStack.popi();
    314                 node = (TernaryNode *) fNodeStack.peek();
    315                 where = (StackBranch) fBranchStack.peeki();
    316                 break;
    317             default:
    318                 return NULL;
    319             }
    320         }
    321         return NULL;
    322     }
    323 
    324     // Very expensive, but this should never be used.
    325     virtual int32_t count(UErrorCode &status) const {
    326         MutableTrieEnumeration counter(fRoot, status);
    327         int32_t result = 0;
    328         while (counter.snext(status) != NULL && U_SUCCESS(status)) {
    329             ++result;
    330         }
    331         return result;
    332     }
    333 
    334     virtual void reset(UErrorCode &status) {
    335         fNodeStack.removeAllElements();
    336         fBranchStack.removeAllElements();
    337         fNodeStack.push(fRoot, status);
    338         fBranchStack.push(kLessThan, status);
    339         unistr.remove();
    340     }
    341 };
    342 
    343 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(MutableTrieEnumeration)
    344 
    345 StringEnumeration *
    346 MutableTrieDictionary::openWords( UErrorCode &status ) const {
    347     if (U_FAILURE(status)) {
    348         return NULL;
    349     }
    350     return new MutableTrieEnumeration(fTrie, status);
    351 }
    352 
    353 /*******************************************************************
    354  * CompactTrieDictionary
    355  */
    356 
    357 struct CompactTrieHeader {
    358     uint32_t        size;           // Size of the data in bytes
    359     uint32_t        magic;          // Magic number (including version)
    360     uint16_t        nodeCount;      // Number of entries in offsets[]
    361     uint16_t        root;           // Node number of the root node
    362     uint32_t        offsets[1];      // Offsets to nodes from start of data
    363 };
    364 
    365 // Note that to avoid platform-specific alignment issues, all members of the node
    366 // structures should be the same size, or should contain explicit padding to
    367 // natural alignment boundaries.
    368 
    369 // We can't use a bitfield for the flags+count field, because the layout of those
    370 // is not portable. 12 bits of count allows for up to 4096 entries in a node.
    371 struct CompactTrieNode {
    372     uint16_t        flagscount;     // Count of sub-entries, plus flags
    373 };
    374 
    375 enum CompactTrieNodeFlags {
    376     kVerticalNode   = 0x1000,       // This is a vertical node
    377     kParentEndsWord = 0x2000,       // The node whose equal link points to this ends a word
    378     kReservedFlag1  = 0x4000,
    379     kReservedFlag2  = 0x8000,
    380     kCountMask      = 0x0FFF,       // The count portion of flagscount
    381     kFlagMask       = 0xF000        // The flags portion of flagscount
    382 };
    383 
    384 // The two node types are distinguished by the kVerticalNode flag.
    385 
    386 struct CompactTrieHorizontalEntry {
    387     uint16_t        ch;             // UChar
    388     uint16_t        equal;          // Equal link node index
    389 };
    390 
    391 // We don't use inheritance here because C++ does not guarantee that the
    392 // base class comes first in memory!!
    393 
    394 struct CompactTrieHorizontalNode {
    395     uint16_t        flagscount;     // Count of sub-entries, plus flags
    396     CompactTrieHorizontalEntry      entries[1];
    397 };
    398 
    399 struct CompactTrieVerticalNode {
    400     uint16_t        flagscount;     // Count of sub-entries, plus flags
    401     uint16_t        equal;          // Equal link node index
    402     uint16_t        chars[1];       // Code units
    403 };
    404 
    405 // {'Dic', 1}, version 1
    406 #define COMPACT_TRIE_MAGIC_1 0x44696301
    407 
    408 CompactTrieDictionary::CompactTrieDictionary(UDataMemory *dataObj,
    409                                                 UErrorCode &status )
    410 : fUData(dataObj)
    411 {
    412     fData = (const CompactTrieHeader *) udata_getMemory(dataObj);
    413     fOwnData = FALSE;
    414     if (fData->magic != COMPACT_TRIE_MAGIC_1) {
    415         status = U_ILLEGAL_ARGUMENT_ERROR;
    416         fData = NULL;
    417     }
    418 }
    419 CompactTrieDictionary::CompactTrieDictionary( const void *data,
    420                                                 UErrorCode &status )
    421 : fUData(NULL)
    422 {
    423     fData = (const CompactTrieHeader *) data;
    424     fOwnData = FALSE;
    425     if (fData->magic != COMPACT_TRIE_MAGIC_1) {
    426         status = U_ILLEGAL_ARGUMENT_ERROR;
    427         fData = NULL;
    428     }
    429 }
    430 
    431 CompactTrieDictionary::CompactTrieDictionary( const MutableTrieDictionary &dict,
    432                                                 UErrorCode &status )
    433 : fUData(NULL)
    434 {
    435     fData = compactMutableTrieDictionary(dict, status);
    436     fOwnData = !U_FAILURE(status);
    437 }
    438 
    439 CompactTrieDictionary::~CompactTrieDictionary() {
    440     if (fOwnData) {
    441         uprv_free((void *)fData);
    442     }
    443     if (fUData) {
    444         udata_close(fUData);
    445     }
    446 }
    447 
    448 uint32_t
    449 CompactTrieDictionary::dataSize() const {
    450     return fData->size;
    451 }
    452 
    453 const void *
    454 CompactTrieDictionary::data() const {
    455     return fData;
    456 }
    457 
    458 // This function finds the address of a node for us, given its node ID
    459 static inline const CompactTrieNode *
    460 getCompactNode(const CompactTrieHeader *header, uint16_t node) {
    461     return (const CompactTrieNode *)((const uint8_t *)header + header->offsets[node]);
    462 }
    463 
    464 int32_t
    465 CompactTrieDictionary::matches( UText *text,
    466                                 int32_t maxLength,
    467                                 int32_t *lengths,
    468                                 int &count,
    469                                 int limit ) const {
    470     // TODO: current implementation works in UTF-16 space
    471     const CompactTrieNode *node = getCompactNode(fData, fData->root);
    472     int mycount = 0;
    473 
    474     UChar uc = utext_current32(text);
    475     int i = 0;
    476 
    477     while (node != NULL) {
    478         // Check if the node we just exited ends a word
    479         if (limit > 0 && (node->flagscount & kParentEndsWord)) {
    480             lengths[mycount++] = i;
    481             --limit;
    482         }
    483         // Check that we haven't exceeded the maximum number of input characters.
    484         // We have to do that here rather than in the while condition so that
    485         // we can check for ending a word, above.
    486         if (i >= maxLength) {
    487             break;
    488         }
    489 
    490         int nodeCount = (node->flagscount & kCountMask);
    491         if (nodeCount == 0) {
    492             // Special terminal node; return now
    493             break;
    494         }
    495         if (node->flagscount & kVerticalNode) {
    496             // Vertical node; check all the characters in it
    497             const CompactTrieVerticalNode *vnode = (const CompactTrieVerticalNode *)node;
    498             for (int j = 0; j < nodeCount && i < maxLength; ++j) {
    499                 if (uc != vnode->chars[j]) {
    500                     // We hit a non-equal character; return
    501                     goto exit;
    502                 }
    503                 utext_next32(text);
    504                 uc = utext_current32(text);
    505                 ++i;
    506             }
    507             // To get here we must have come through the whole list successfully;
    508             // go on to the next node. Note that a word cannot end in the middle
    509             // of a vertical node.
    510             node = getCompactNode(fData, vnode->equal);
    511         }
    512         else {
    513             // Horizontal node; do binary search
    514             const CompactTrieHorizontalNode *hnode = (const CompactTrieHorizontalNode *)node;
    515             int low = 0;
    516             int high = nodeCount-1;
    517             int middle;
    518             node = NULL;    // If we don't find a match, we'll fall out of the loop
    519             while (high >= low) {
    520                 middle = (high+low)/2;
    521                 if (uc == hnode->entries[middle].ch) {
    522                     // We hit a match; get the next node and next character
    523                     node = getCompactNode(fData, hnode->entries[middle].equal);
    524                     utext_next32(text);
    525                     uc = utext_current32(text);
    526                     ++i;
    527                     break;
    528                 }
    529                 else if (uc < hnode->entries[middle].ch) {
    530                     high = middle-1;
    531                 }
    532                 else {
    533                     low = middle+1;
    534                 }
    535             }
    536         }
    537     }
    538 exit:
    539     count = mycount;
    540     return i;
    541 }
    542 
    543 // Implementation of iteration for CompactTrieDictionary
    544 class CompactTrieEnumeration : public StringEnumeration {
    545 private:
    546     UVector32               fNodeStack;     // Stack of nodes to process
    547     UVector32               fIndexStack;    // Stack of where in node we are
    548     const CompactTrieHeader *fHeader;       // Trie data
    549 
    550 public:
    551     static UClassID U_EXPORT2 getStaticClassID(void);
    552     virtual UClassID getDynamicClassID(void) const;
    553 public:
    554     CompactTrieEnumeration(const CompactTrieHeader *header, UErrorCode &status)
    555         : fNodeStack(status), fIndexStack(status) {
    556         fHeader = header;
    557         fNodeStack.push(header->root, status);
    558         fIndexStack.push(0, status);
    559         unistr.remove();
    560     }
    561 
    562     virtual ~CompactTrieEnumeration() {
    563     }
    564 
    565     virtual StringEnumeration *clone() const {
    566         UErrorCode status = U_ZERO_ERROR;
    567         return new CompactTrieEnumeration(fHeader, status);
    568     }
    569 
    570     virtual const UnicodeString * snext(UErrorCode &status);
    571 
    572     // Very expensive, but this should never be used.
    573     virtual int32_t count(UErrorCode &status) const {
    574         CompactTrieEnumeration counter(fHeader, status);
    575         int32_t result = 0;
    576         while (counter.snext(status) != NULL && U_SUCCESS(status)) {
    577             ++result;
    578         }
    579         return result;
    580     }
    581 
    582     virtual void reset(UErrorCode &status) {
    583         fNodeStack.removeAllElements();
    584         fIndexStack.removeAllElements();
    585         fNodeStack.push(fHeader->root, status);
    586         fIndexStack.push(0, status);
    587         unistr.remove();
    588     }
    589 };
    590 
    591 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CompactTrieEnumeration)
    592 
    593 const UnicodeString *
    594 CompactTrieEnumeration::snext(UErrorCode &status) {
    595     if (fNodeStack.empty() || U_FAILURE(status)) {
    596         return NULL;
    597     }
    598     const CompactTrieNode *node = getCompactNode(fHeader, fNodeStack.peeki());
    599     int where = fIndexStack.peeki();
    600     while (!fNodeStack.empty() && U_SUCCESS(status)) {
    601         int nodeCount = (node->flagscount & kCountMask);
    602         UBool goingDown = FALSE;
    603         if (nodeCount == 0) {
    604             // Terminal node; go up immediately
    605             fNodeStack.popi();
    606             fIndexStack.popi();
    607             node = getCompactNode(fHeader, fNodeStack.peeki());
    608             where = fIndexStack.peeki();
    609         }
    610         else if (node->flagscount & kVerticalNode) {
    611             // Vertical node
    612             const CompactTrieVerticalNode *vnode = (const CompactTrieVerticalNode *)node;
    613             if (where == 0) {
    614                 // Going down
    615                 unistr.append((const UChar *)vnode->chars, (int32_t) nodeCount);
    616                 fIndexStack.setElementAt(1, fIndexStack.size()-1);
    617                 node = getCompactNode(fHeader, fNodeStack.push(vnode->equal, status));
    618                 where = fIndexStack.push(0, status);
    619                 goingDown = TRUE;
    620             }
    621             else {
    622                 // Going up
    623                 unistr.truncate(unistr.length()-nodeCount);
    624                 fNodeStack.popi();
    625                 fIndexStack.popi();
    626                 node = getCompactNode(fHeader, fNodeStack.peeki());
    627                 where = fIndexStack.peeki();
    628             }
    629         }
    630         else {
    631             // Horizontal node
    632             const CompactTrieHorizontalNode *hnode = (const CompactTrieHorizontalNode *)node;
    633             if (where > 0) {
    634                 // Pop previous char
    635                 unistr.truncate(unistr.length()-1);
    636             }
    637             if (where < nodeCount) {
    638                 // Push on next node
    639                 unistr.append((UChar)hnode->entries[where].ch);
    640                 fIndexStack.setElementAt(where+1, fIndexStack.size()-1);
    641                 node = getCompactNode(fHeader, fNodeStack.push(hnode->entries[where].equal, status));
    642                 where = fIndexStack.push(0, status);
    643                 goingDown = TRUE;
    644             }
    645             else {
    646                 // Going up
    647                 fNodeStack.popi();
    648                 fIndexStack.popi();
    649                 node = getCompactNode(fHeader, fNodeStack.peeki());
    650                 where = fIndexStack.peeki();
    651             }
    652         }
    653         // Check if the parent of the node we've just gone down to ends a
    654         // word. If so, return it.
    655         if (goingDown && (node->flagscount & kParentEndsWord)) {
    656             return &unistr;
    657         }
    658     }
    659     return NULL;
    660 }
    661 
    662 StringEnumeration *
    663 CompactTrieDictionary::openWords( UErrorCode &status ) const {
    664     if (U_FAILURE(status)) {
    665         return NULL;
    666     }
    667     return new CompactTrieEnumeration(fData, status);
    668 }
    669 
    670 //
    671 // Below here is all code related to converting a ternary trie to a compact trie
    672 // and back again
    673 //
    674 
    675 // Helper classes to construct the compact trie
    676 class BuildCompactTrieNode: public UMemory {
    677  public:
    678     UBool           fParentEndsWord;
    679     UBool           fVertical;
    680     UBool           fHasDuplicate;
    681     int32_t         fNodeID;
    682     UnicodeString   fChars;
    683 
    684  public:
    685     BuildCompactTrieNode(UBool parentEndsWord, UBool vertical, UStack &nodes, UErrorCode &status) {
    686         fParentEndsWord = parentEndsWord;
    687         fHasDuplicate = FALSE;
    688         fVertical = vertical;
    689         fNodeID = nodes.size();
    690         nodes.push(this, status);
    691     }
    692 
    693     virtual ~BuildCompactTrieNode() {
    694     }
    695 
    696     virtual uint32_t size() {
    697         return sizeof(uint16_t);
    698     }
    699 
    700     virtual void write(uint8_t *bytes, uint32_t &offset, const UVector32 &/*translate*/) {
    701         // Write flag/count
    702         *((uint16_t *)(bytes+offset)) = (fChars.length() & kCountMask)
    703             | (fVertical ? kVerticalNode : 0) | (fParentEndsWord ? kParentEndsWord : 0 );
    704         offset += sizeof(uint16_t);
    705     }
    706 };
    707 
    708 class BuildCompactTrieHorizontalNode: public BuildCompactTrieNode {
    709  public:
    710     UStack          fLinks;
    711 
    712  public:
    713     BuildCompactTrieHorizontalNode(UBool parentEndsWord, UStack &nodes, UErrorCode &status)
    714         : BuildCompactTrieNode(parentEndsWord, FALSE, nodes, status), fLinks(status) {
    715     }
    716 
    717     virtual ~BuildCompactTrieHorizontalNode() {
    718     }
    719 
    720     virtual uint32_t size() {
    721         return offsetof(CompactTrieHorizontalNode,entries) +
    722                 (fChars.length()*sizeof(CompactTrieHorizontalEntry));
    723     }
    724 
    725     virtual void write(uint8_t *bytes, uint32_t &offset, const UVector32 &translate) {
    726         BuildCompactTrieNode::write(bytes, offset, translate);
    727         int32_t count = fChars.length();
    728         for (int32_t i = 0; i < count; ++i) {
    729             CompactTrieHorizontalEntry *entry = (CompactTrieHorizontalEntry *)(bytes+offset);
    730             entry->ch = fChars[i];
    731             entry->equal = translate.elementAti(((BuildCompactTrieNode *)fLinks[i])->fNodeID);
    732 #ifdef DEBUG_TRIE_DICT
    733             if (entry->equal == 0) {
    734                 fprintf(stderr, "ERROR: horizontal link %d, logical node %d maps to physical node zero\n",
    735                         i, ((BuildCompactTrieNode *)fLinks[i])->fNodeID);
    736             }
    737 #endif
    738             offset += sizeof(CompactTrieHorizontalEntry);
    739         }
    740     }
    741 
    742     void addNode(UChar ch, BuildCompactTrieNode *link, UErrorCode &status) {
    743         fChars.append(ch);
    744         fLinks.push(link, status);
    745     }
    746 };
    747 
    748 class BuildCompactTrieVerticalNode: public BuildCompactTrieNode {
    749  public:
    750     BuildCompactTrieNode    *fEqual;
    751 
    752  public:
    753     BuildCompactTrieVerticalNode(UBool parentEndsWord, UStack &nodes, UErrorCode &status)
    754         : BuildCompactTrieNode(parentEndsWord, TRUE, nodes, status) {
    755         fEqual = NULL;
    756     }
    757 
    758     virtual ~BuildCompactTrieVerticalNode() {
    759     }
    760 
    761     virtual uint32_t size() {
    762         return offsetof(CompactTrieVerticalNode,chars) + (fChars.length()*sizeof(uint16_t));
    763     }
    764 
    765     virtual void write(uint8_t *bytes, uint32_t &offset, const UVector32 &translate) {
    766         CompactTrieVerticalNode *node = (CompactTrieVerticalNode *)(bytes+offset);
    767         BuildCompactTrieNode::write(bytes, offset, translate);
    768         node->equal = translate.elementAti(fEqual->fNodeID);
    769         offset += sizeof(node->equal);
    770 #ifdef DEBUG_TRIE_DICT
    771         if (node->equal == 0) {
    772             fprintf(stderr, "ERROR: vertical link, logical node %d maps to physical node zero\n",
    773                     fEqual->fNodeID);
    774         }
    775 #endif
    776         fChars.extract(0, fChars.length(), (UChar *)node->chars);
    777         offset += sizeof(uint16_t)*fChars.length();
    778     }
    779 
    780     void addChar(UChar ch) {
    781         fChars.append(ch);
    782     }
    783 
    784     void setLink(BuildCompactTrieNode *node) {
    785         fEqual = node;
    786     }
    787 };
    788 
    789 // Forward declaration
    790 static void walkHorizontal(const TernaryNode *node,
    791                             BuildCompactTrieHorizontalNode *building,
    792                             UStack &nodes,
    793                             UErrorCode &status);
    794 
    795 // Convert one node. Uses recursion.
    796 
    797 static BuildCompactTrieNode *
    798 compactOneNode(const TernaryNode *node, UBool parentEndsWord, UStack &nodes, UErrorCode &status) {
    799     if (U_FAILURE(status)) {
    800         return NULL;
    801     }
    802     BuildCompactTrieNode *result = NULL;
    803     UBool horizontal = (node->low != NULL || node->high != NULL);
    804     if (horizontal) {
    805         BuildCompactTrieHorizontalNode *hResult =
    806                 new BuildCompactTrieHorizontalNode(parentEndsWord, nodes, status);
    807         if (hResult == NULL) {
    808             status = U_MEMORY_ALLOCATION_ERROR;
    809             return NULL;
    810         }
    811         if (U_SUCCESS(status)) {
    812             walkHorizontal(node, hResult, nodes, status);
    813             result = hResult;
    814         }
    815     }
    816     else {
    817         BuildCompactTrieVerticalNode *vResult =
    818                 new BuildCompactTrieVerticalNode(parentEndsWord, nodes, status);
    819         if (vResult == NULL) {
    820             status = U_MEMORY_ALLOCATION_ERROR;
    821         }
    822         else if (U_SUCCESS(status)) {
    823             UBool   endsWord = FALSE;
    824             // Take up nodes until we end a word, or hit a node with < or > links
    825             do {
    826                 vResult->addChar(node->ch);
    827                 endsWord = (node->flags & kEndsWord) != 0;
    828                 node = node->equal;
    829             }
    830             while(node != NULL && !endsWord && node->low == NULL && node->high == NULL);
    831             if (node == NULL) {
    832                 if (!endsWord) {
    833                     status = U_ILLEGAL_ARGUMENT_ERROR;  // Corrupt input trie
    834                 }
    835                 else {
    836                     vResult->setLink((BuildCompactTrieNode *)nodes[1]);
    837                 }
    838             }
    839             else {
    840                 vResult->setLink(compactOneNode(node, endsWord, nodes, status));
    841             }
    842             result = vResult;
    843         }
    844     }
    845     return result;
    846 }
    847 
    848 // Walk the set of peers at the same level, to build a horizontal node.
    849 // Uses recursion.
    850 
    851 static void walkHorizontal(const TernaryNode *node,
    852                             BuildCompactTrieHorizontalNode *building,
    853                             UStack &nodes,
    854                             UErrorCode &status) {
    855     while (U_SUCCESS(status) && node != NULL) {
    856         if (node->low != NULL) {
    857             walkHorizontal(node->low, building, nodes, status);
    858         }
    859         BuildCompactTrieNode *link = NULL;
    860         if (node->equal != NULL) {
    861             link = compactOneNode(node->equal, (node->flags & kEndsWord) != 0, nodes, status);
    862         }
    863         else if (node->flags & kEndsWord) {
    864             link = (BuildCompactTrieNode *)nodes[1];
    865         }
    866         if (U_SUCCESS(status) && link != NULL) {
    867             building->addNode(node->ch, link, status);
    868         }
    869         // Tail recurse manually instead of leaving it to the compiler.
    870         //if (node->high != NULL) {
    871         //    walkHorizontal(node->high, building, nodes, status);
    872         //}
    873         node = node->high;
    874     }
    875 }
    876 
    877 U_NAMESPACE_END
    878 U_NAMESPACE_USE
    879 U_CDECL_BEGIN
    880 static int32_t U_CALLCONV
    881 _sortBuildNodes(const void * /*context*/, const void *voidl, const void *voidr) {
    882     BuildCompactTrieNode *left = *(BuildCompactTrieNode **)voidl;
    883     BuildCompactTrieNode *right = *(BuildCompactTrieNode **)voidr;
    884     // Check for comparing a node to itself, to avoid spurious duplicates
    885     if (left == right) {
    886         return 0;
    887     }
    888     // Most significant is type of node. Can never coalesce.
    889     if (left->fVertical != right->fVertical) {
    890         return left->fVertical - right->fVertical;
    891     }
    892     // Next, the "parent ends word" flag. If that differs, we cannot coalesce.
    893     if (left->fParentEndsWord != right->fParentEndsWord) {
    894         return left->fParentEndsWord - right->fParentEndsWord;
    895     }
    896     // Next, the string. If that differs, we can never coalesce.
    897     int32_t result = left->fChars.compare(right->fChars);
    898     if (result != 0) {
    899         return result;
    900     }
    901     // We know they're both the same node type, so branch for the two cases.
    902     if (left->fVertical) {
    903         result = ((BuildCompactTrieVerticalNode *)left)->fEqual->fNodeID
    904                             - ((BuildCompactTrieVerticalNode *)right)->fEqual->fNodeID;
    905     }
    906     else {
    907         // We need to compare the links vectors. They should be the
    908         // same size because the strings were equal.
    909         // We compare the node IDs instead of the pointers, to handle
    910         // coalesced nodes.
    911         BuildCompactTrieHorizontalNode *hleft, *hright;
    912         hleft = (BuildCompactTrieHorizontalNode *)left;
    913         hright = (BuildCompactTrieHorizontalNode *)right;
    914         int32_t count = hleft->fLinks.size();
    915         for (int32_t i = 0; i < count && result == 0; ++i) {
    916             result = ((BuildCompactTrieNode *)(hleft->fLinks[i]))->fNodeID -
    917                      ((BuildCompactTrieNode *)(hright->fLinks[i]))->fNodeID;
    918         }
    919     }
    920     // If they are equal to each other, mark them (speeds coalescing)
    921     if (result == 0) {
    922         left->fHasDuplicate = TRUE;
    923         right->fHasDuplicate = TRUE;
    924     }
    925     return result;
    926 }
    927 U_CDECL_END
    928 U_NAMESPACE_BEGIN
    929 
    930 static void coalesceDuplicates(UStack &nodes, UErrorCode &status) {
    931     // We sort the array of nodes to place duplicates next to each other
    932     if (U_FAILURE(status)) {
    933         return;
    934     }
    935     int32_t size = nodes.size();
    936     void **array = (void **)uprv_malloc(sizeof(void *)*size);
    937     if (array == NULL) {
    938         status = U_MEMORY_ALLOCATION_ERROR;
    939         return;
    940     }
    941     (void) nodes.toArray(array);
    942 
    943     // Now repeatedly identify duplicates until there are no more
    944     int32_t dupes = 0;
    945     long    passCount = 0;
    946 #ifdef DEBUG_TRIE_DICT
    947     long    totalDupes = 0;
    948 #endif
    949     do {
    950         BuildCompactTrieNode *node;
    951         BuildCompactTrieNode *first = NULL;
    952         BuildCompactTrieNode **p;
    953         BuildCompactTrieNode **pFirst = NULL;
    954         int32_t counter = size - 2;
    955         // Sort the array, skipping nodes 0 and 1. Use quicksort for the first
    956         // pass for speed. For the second and subsequent passes, we use stable
    957         // (insertion) sort for two reasons:
    958         // 1. The array is already mostly ordered, so we get better performance.
    959         // 2. The way we find one and only one instance of a set of duplicates is to
    960         //    check that the node ID equals the array index. If we used an unstable
    961         //    sort for the second or later passes, it's possible that none of the
    962         //    duplicates would wind up with a node ID equal to its array index.
    963         //    The sort stability guarantees that, because as we coalesce more and
    964         //    more groups, the first element of the resultant group will be one of
    965         //    the first elements of the groups being coalesced.
    966         // To use quicksort for the second and subsequent passes, we would have to
    967         // find the minimum of the node numbers in a group, and set all the nodes
    968         // in the group to that node number.
    969         uprv_sortArray(array+2, counter, sizeof(void *), _sortBuildNodes, NULL, (passCount > 0), &status);
    970         dupes = 0;
    971         for (p = (BuildCompactTrieNode **)array + 2; counter > 0; --counter, ++p) {
    972             node = *p;
    973             if (node->fHasDuplicate) {
    974                 if (first == NULL) {
    975                     first = node;
    976                     pFirst = p;
    977                 }
    978                 else if (_sortBuildNodes(NULL, pFirst, p) != 0) {
    979                     // Starting a new run of dupes
    980                     first = node;
    981                     pFirst = p;
    982                 }
    983                 else if (node->fNodeID != first->fNodeID) {
    984                     // Slave one to the other, note duplicate
    985                     node->fNodeID = first->fNodeID;
    986                     dupes += 1;
    987                 }
    988             }
    989             else {
    990                 // This node has no dupes
    991                 first = NULL;
    992                 pFirst = NULL;
    993             }
    994         }
    995         passCount += 1;
    996 #ifdef DEBUG_TRIE_DICT
    997         totalDupes += dupes;
    998         fprintf(stderr, "Trie node dupe removal, pass %d: %d nodes tagged\n", passCount, dupes);
    999 #endif
   1000     }
   1001     while (dupes > 0);
   1002 #ifdef DEBUG_TRIE_DICT
   1003     fprintf(stderr, "Trie node dupe removal complete: %d tagged in %d passes\n", totalDupes, passCount);
   1004 #endif
   1005 
   1006     // We no longer need the temporary array, as the nodes have all been marked appropriately.
   1007     uprv_free(array);
   1008 }
   1009 
   1010 U_NAMESPACE_END
   1011 U_CDECL_BEGIN
   1012 static void U_CALLCONV _deleteBuildNode(void *obj) {
   1013     delete (BuildCompactTrieNode *) obj;
   1014 }
   1015 U_CDECL_END
   1016 U_NAMESPACE_BEGIN
   1017 
   1018 CompactTrieHeader *
   1019 CompactTrieDictionary::compactMutableTrieDictionary( const MutableTrieDictionary &dict,
   1020                                 UErrorCode &status ) {
   1021     if (U_FAILURE(status)) {
   1022         return NULL;
   1023     }
   1024 #ifdef DEBUG_TRIE_DICT
   1025     struct tms timing;
   1026     struct tms previous;
   1027     (void) ::times(&previous);
   1028 #endif
   1029     UStack nodes(_deleteBuildNode, NULL, status);      // Index of nodes
   1030 
   1031     // Add node 0, used as the NULL pointer/sentinel.
   1032     nodes.addElement((int32_t)0, status);
   1033 
   1034     // Start by creating the special empty node we use to indicate that the parent
   1035     // terminates a word. This must be node 1, because the builder assumes
   1036     // that.
   1037     if (U_FAILURE(status)) {
   1038         return NULL;
   1039     }
   1040     BuildCompactTrieNode *terminal = new BuildCompactTrieNode(TRUE, FALSE, nodes, status);
   1041     if (terminal == NULL) {
   1042         status = U_MEMORY_ALLOCATION_ERROR;
   1043     }
   1044 
   1045     // This call does all the work of building the new trie structure. The root
   1046     // will be node 2.
   1047     BuildCompactTrieNode *root = compactOneNode(dict.fTrie, FALSE, nodes, status);
   1048 #ifdef DEBUG_TRIE_DICT
   1049     (void) ::times(&timing);
   1050     fprintf(stderr, "Compact trie built, %d nodes, time user %f system %f\n",
   1051         nodes.size(), (double)(timing.tms_utime-previous.tms_utime)/CLK_TCK,
   1052         (double)(timing.tms_stime-previous.tms_stime)/CLK_TCK);
   1053     previous = timing;
   1054 #endif
   1055 
   1056     // Now coalesce all duplicate nodes.
   1057     coalesceDuplicates(nodes, status);
   1058 #ifdef DEBUG_TRIE_DICT
   1059     (void) ::times(&timing);
   1060     fprintf(stderr, "Duplicates coalesced, time user %f system %f\n",
   1061         (double)(timing.tms_utime-previous.tms_utime)/CLK_TCK,
   1062         (double)(timing.tms_stime-previous.tms_stime)/CLK_TCK);
   1063     previous = timing;
   1064 #endif
   1065 
   1066     // Next, build the output trie.
   1067     // First we compute all the sizes and build the node ID translation table.
   1068     uint32_t totalSize = offsetof(CompactTrieHeader,offsets);
   1069     int32_t count = nodes.size();
   1070     int32_t nodeCount = 1;              // The sentinel node we already have
   1071     BuildCompactTrieNode *node;
   1072     int32_t i;
   1073     UVector32 translate(count, status); // Should be no growth needed after this
   1074     translate.push(0, status);          // The sentinel node
   1075 
   1076     if (U_FAILURE(status)) {
   1077         return NULL;
   1078     }
   1079 
   1080     for (i = 1; i < count; ++i) {
   1081         node = (BuildCompactTrieNode *)nodes[i];
   1082         if (node->fNodeID == i) {
   1083             // Only one node out of each duplicate set is used
   1084             if (i >= translate.size()) {
   1085                 // Logically extend the mapping table
   1086                 translate.setSize(i+1);
   1087             }
   1088             translate.setElementAt(nodeCount++, i);
   1089             totalSize += node->size();
   1090         }
   1091     }
   1092 
   1093     // Check for overflowing 16 bits worth of nodes.
   1094     if (nodeCount > 0x10000) {
   1095         status = U_ILLEGAL_ARGUMENT_ERROR;
   1096         return NULL;
   1097     }
   1098 
   1099     // Add enough room for the offsets.
   1100     totalSize += nodeCount*sizeof(uint32_t);
   1101 #ifdef DEBUG_TRIE_DICT
   1102     (void) ::times(&timing);
   1103     fprintf(stderr, "Sizes/mapping done, time user %f system %f\n",
   1104         (double)(timing.tms_utime-previous.tms_utime)/CLK_TCK,
   1105         (double)(timing.tms_stime-previous.tms_stime)/CLK_TCK);
   1106     previous = timing;
   1107     fprintf(stderr, "%d nodes, %d unique, %d bytes\n", nodes.size(), nodeCount, totalSize);
   1108 #endif
   1109     uint8_t *bytes = (uint8_t *)uprv_malloc(totalSize);
   1110     if (bytes == NULL) {
   1111         status = U_MEMORY_ALLOCATION_ERROR;
   1112         return NULL;
   1113     }
   1114 
   1115     CompactTrieHeader *header = (CompactTrieHeader *)bytes;
   1116     header->size = totalSize;
   1117     header->nodeCount = nodeCount;
   1118     header->offsets[0] = 0;                     // Sentinel
   1119     header->root = translate.elementAti(root->fNodeID);
   1120 #ifdef DEBUG_TRIE_DICT
   1121     if (header->root == 0) {
   1122         fprintf(stderr, "ERROR: root node %d translate to physical zero\n", root->fNodeID);
   1123     }
   1124 #endif
   1125     uint32_t offset = offsetof(CompactTrieHeader,offsets)+(nodeCount*sizeof(uint32_t));
   1126     nodeCount = 1;
   1127     // Now write the data
   1128     for (i = 1; i < count; ++i) {
   1129         node = (BuildCompactTrieNode *)nodes[i];
   1130         if (node->fNodeID == i) {
   1131             header->offsets[nodeCount++] = offset;
   1132             node->write(bytes, offset, translate);
   1133         }
   1134     }
   1135 #ifdef DEBUG_TRIE_DICT
   1136     (void) ::times(&timing);
   1137     fprintf(stderr, "Trie built, time user %f system %f\n",
   1138         (double)(timing.tms_utime-previous.tms_utime)/CLK_TCK,
   1139         (double)(timing.tms_stime-previous.tms_stime)/CLK_TCK);
   1140     previous = timing;
   1141     fprintf(stderr, "Final offset is %d\n", offset);
   1142 
   1143     // Collect statistics on node types and sizes
   1144     int hCount = 0;
   1145     int vCount = 0;
   1146     size_t hSize = 0;
   1147     size_t vSize = 0;
   1148     size_t hItemCount = 0;
   1149     size_t vItemCount = 0;
   1150     uint32_t previousOff = offset;
   1151     for (uint16_t nodeIdx = nodeCount-1; nodeIdx >= 2; --nodeIdx) {
   1152         const CompactTrieNode *node = getCompactNode(header, nodeIdx);
   1153         if (node->flagscount & kVerticalNode) {
   1154             vCount += 1;
   1155             vItemCount += (node->flagscount & kCountMask);
   1156             vSize += previousOff-header->offsets[nodeIdx];
   1157         }
   1158         else {
   1159             hCount += 1;
   1160             hItemCount += (node->flagscount & kCountMask);
   1161             hSize += previousOff-header->offsets[nodeIdx];
   1162         }
   1163         previousOff = header->offsets[nodeIdx];
   1164     }
   1165     fprintf(stderr, "Horizontal nodes: %d total, average %f bytes with %f items\n", hCount,
   1166                 (double)hSize/hCount, (double)hItemCount/hCount);
   1167     fprintf(stderr, "Vertical nodes: %d total, average %f bytes with %f items\n", vCount,
   1168                 (double)vSize/vCount, (double)vItemCount/vCount);
   1169 #endif
   1170 
   1171     if (U_FAILURE(status)) {
   1172         uprv_free(bytes);
   1173         header = NULL;
   1174     }
   1175     else {
   1176         header->magic = COMPACT_TRIE_MAGIC_1;
   1177     }
   1178     return header;
   1179 }
   1180 
   1181 // Forward declaration
   1182 static TernaryNode *
   1183 unpackOneNode( const CompactTrieHeader *header, const CompactTrieNode *node, UErrorCode &status );
   1184 
   1185 
   1186 // Convert a horizontal node (or subarray thereof) into a ternary subtrie
   1187 static TernaryNode *
   1188 unpackHorizontalArray( const CompactTrieHeader *header, const CompactTrieHorizontalEntry *array,
   1189                             int low, int high, UErrorCode &status ) {
   1190     if (U_FAILURE(status) || low > high) {
   1191         return NULL;
   1192     }
   1193     int middle = (low+high)/2;
   1194     TernaryNode *result = new TernaryNode(array[middle].ch);
   1195     if (result == NULL) {
   1196         status = U_MEMORY_ALLOCATION_ERROR;
   1197         return NULL;
   1198     }
   1199     const CompactTrieNode *equal = getCompactNode(header, array[middle].equal);
   1200     if (equal->flagscount & kParentEndsWord) {
   1201         result->flags |= kEndsWord;
   1202     }
   1203     result->low = unpackHorizontalArray(header, array, low, middle-1, status);
   1204     result->high = unpackHorizontalArray(header, array, middle+1, high, status);
   1205     result->equal = unpackOneNode(header, equal, status);
   1206     return result;
   1207 }
   1208 
   1209 // Convert one compact trie node into a ternary subtrie
   1210 static TernaryNode *
   1211 unpackOneNode( const CompactTrieHeader *header, const CompactTrieNode *node, UErrorCode &status ) {
   1212     int nodeCount = (node->flagscount & kCountMask);
   1213     if (nodeCount == 0 || U_FAILURE(status)) {
   1214         // Failure, or terminal node
   1215         return NULL;
   1216     }
   1217     if (node->flagscount & kVerticalNode) {
   1218         const CompactTrieVerticalNode *vnode = (const CompactTrieVerticalNode *)node;
   1219         TernaryNode *head = NULL;
   1220         TernaryNode *previous = NULL;
   1221         TernaryNode *latest = NULL;
   1222         for (int i = 0; i < nodeCount; ++i) {
   1223             latest = new TernaryNode(vnode->chars[i]);
   1224             if (latest == NULL) {
   1225                 status = U_MEMORY_ALLOCATION_ERROR;
   1226                 break;
   1227             }
   1228             if (head == NULL) {
   1229                 head = latest;
   1230             }
   1231             if (previous != NULL) {
   1232                 previous->equal = latest;
   1233             }
   1234             previous = latest;
   1235         }
   1236         if (latest != NULL) {
   1237             const CompactTrieNode *equal = getCompactNode(header, vnode->equal);
   1238             if (equal->flagscount & kParentEndsWord) {
   1239                 latest->flags |= kEndsWord;
   1240             }
   1241             latest->equal = unpackOneNode(header, equal, status);
   1242         }
   1243         return head;
   1244     }
   1245     else {
   1246         // Horizontal node
   1247         const CompactTrieHorizontalNode *hnode = (const CompactTrieHorizontalNode *)node;
   1248         return unpackHorizontalArray(header, &hnode->entries[0], 0, nodeCount-1, status);
   1249     }
   1250 }
   1251 
   1252 MutableTrieDictionary *
   1253 CompactTrieDictionary::cloneMutable( UErrorCode &status ) const {
   1254     MutableTrieDictionary *result = new MutableTrieDictionary( status );
   1255     if (result == NULL) {
   1256         status = U_MEMORY_ALLOCATION_ERROR;
   1257         return NULL;
   1258     }
   1259     TernaryNode *root = unpackOneNode(fData, getCompactNode(fData, fData->root), status);
   1260     if (U_FAILURE(status)) {
   1261         delete root;    // Clean up
   1262         delete result;
   1263         return NULL;
   1264     }
   1265     result->fTrie = root;
   1266     return result;
   1267 }
   1268 
   1269 U_NAMESPACE_END
   1270 
   1271 U_CAPI int32_t U_EXPORT2
   1272 triedict_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outData,
   1273            UErrorCode *status) {
   1274 
   1275     if (status == NULL || U_FAILURE(*status)) {
   1276         return 0;
   1277     }
   1278     if(ds==NULL || inData==NULL || length<-1 || (length>0 && outData==NULL)) {
   1279         *status=U_ILLEGAL_ARGUMENT_ERROR;
   1280         return 0;
   1281     }
   1282 
   1283     //
   1284     //  Check that the data header is for for dictionary data.
   1285     //    (Header contents are defined in genxxx.cpp)
   1286     //
   1287     const UDataInfo *pInfo = (const UDataInfo *)((const uint8_t *)inData+4);
   1288     if(!(  pInfo->dataFormat[0]==0x54 &&   /* dataFormat="TrDc" */
   1289            pInfo->dataFormat[1]==0x72 &&
   1290            pInfo->dataFormat[2]==0x44 &&
   1291            pInfo->dataFormat[3]==0x63 &&
   1292            pInfo->formatVersion[0]==1  )) {
   1293         udata_printError(ds, "triedict_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized\n",
   1294                          pInfo->dataFormat[0], pInfo->dataFormat[1],
   1295                          pInfo->dataFormat[2], pInfo->dataFormat[3],
   1296                          pInfo->formatVersion[0]);
   1297         *status=U_UNSUPPORTED_ERROR;
   1298         return 0;
   1299     }
   1300 
   1301     //
   1302     // Swap the data header.  (This is the generic ICU Data Header, not the
   1303     //                         CompactTrieHeader).  This swap also conveniently gets us
   1304     //                         the size of the ICU d.h., which lets us locate the start
   1305     //                         of the RBBI specific data.
   1306     //
   1307     int32_t headerSize=udata_swapDataHeader(ds, inData, length, outData, status);
   1308 
   1309     //
   1310     // Get the CompactTrieHeader, and check that it appears to be OK.
   1311     //
   1312     const uint8_t  *inBytes =(const uint8_t *)inData+headerSize;
   1313     const CompactTrieHeader *header = (const CompactTrieHeader *)inBytes;
   1314     if (ds->readUInt32(header->magic) != COMPACT_TRIE_MAGIC_1
   1315             || ds->readUInt32(header->size) < sizeof(CompactTrieHeader))
   1316     {
   1317         udata_printError(ds, "triedict_swap(): CompactTrieHeader is invalid.\n");
   1318         *status=U_UNSUPPORTED_ERROR;
   1319         return 0;
   1320     }
   1321 
   1322     //
   1323     // Prefight operation?  Just return the size
   1324     //
   1325     uint32_t totalSize = ds->readUInt32(header->size);
   1326     int32_t sizeWithUData = (int32_t)totalSize + headerSize;
   1327     if (length < 0) {
   1328         return sizeWithUData;
   1329     }
   1330 
   1331     //
   1332     // Check that length passed in is consistent with length from RBBI data header.
   1333     //
   1334     if (length < sizeWithUData) {
   1335         udata_printError(ds, "triedict_swap(): too few bytes (%d after ICU Data header) for trie data.\n",
   1336                             totalSize);
   1337         *status=U_INDEX_OUTOFBOUNDS_ERROR;
   1338         return 0;
   1339         }
   1340 
   1341     //
   1342     // Swap the Data.  Do the data itself first, then the CompactTrieHeader, because
   1343     //                 we need to reference the header to locate the data, and an
   1344     //                 inplace swap of the header leaves it unusable.
   1345     //
   1346     uint8_t             *outBytes = (uint8_t *)outData + headerSize;
   1347     CompactTrieHeader   *outputHeader = (CompactTrieHeader *)outBytes;
   1348 
   1349 #if 0
   1350     //
   1351     // If not swapping in place, zero out the output buffer before starting.
   1352     //
   1353     if (inBytes != outBytes) {
   1354         uprv_memset(outBytes, 0, totalSize);
   1355     }
   1356 
   1357     // We need to loop through all the nodes in the offset table, and swap each one.
   1358     uint16_t nodeCount = ds->readUInt16(header->nodeCount);
   1359     // Skip node 0, which should always be 0.
   1360     for (int i = 1; i < nodeCount; ++i) {
   1361         uint32_t nodeOff = ds->readUInt32(header->offsets[i]);
   1362         const CompactTrieNode *inNode = (const CompactTrieNode *)(inBytes + nodeOff);
   1363         CompactTrieNode *outNode = (CompactTrieNode *)(outBytes + nodeOff);
   1364         uint16_t flagscount = ds->readUInt16(inNode->flagscount);
   1365         uint16_t itemCount = flagscount & kCountMask;
   1366         ds->writeUInt16(&outNode->flagscount, flagscount);
   1367         if (itemCount > 0) {
   1368             if (flagscount & kVerticalNode) {
   1369                 ds->swapArray16(ds, inBytes+nodeOff+offsetof(CompactTrieVerticalNode,chars),
   1370                                     itemCount*sizeof(uint16_t),
   1371                                     outBytes+nodeOff+offsetof(CompactTrieVerticalNode,chars), status);
   1372                 uint16_t equal = ds->readUInt16(inBytes+nodeOff+offsetof(CompactTrieVerticalNode,equal);
   1373                 ds->writeUInt16(outBytes+nodeOff+offsetof(CompactTrieVerticalNode,equal));
   1374             }
   1375             else {
   1376                 const CompactTrieHorizontalNode *inHNode = (const CompactTrieHorizontalNode *)inNode;
   1377                 CompactTrieHorizontalNode *outHNode = (CompactTrieHorizontalNode *)outNode;
   1378                 for (int j = 0; j < itemCount; ++j) {
   1379                     uint16_t word = ds->readUInt16(inHNode->entries[j].ch);
   1380                     ds->writeUInt16(&outHNode->entries[j].ch, word);
   1381                     word = ds->readUInt16(inHNode->entries[j].equal);
   1382                     ds->writeUInt16(&outHNode->entries[j].equal, word);
   1383                 }
   1384             }
   1385         }
   1386     }
   1387 #endif
   1388 
   1389     // All the data in all the nodes consist of 16 bit items. Swap them all at once.
   1390     uint16_t nodeCount = ds->readUInt16(header->nodeCount);
   1391     uint32_t nodesOff = offsetof(CompactTrieHeader,offsets)+((uint32_t)nodeCount*sizeof(uint32_t));
   1392     ds->swapArray16(ds, inBytes+nodesOff, totalSize-nodesOff, outBytes+nodesOff, status);
   1393 
   1394     // Swap the header
   1395     ds->writeUInt32(&outputHeader->size, totalSize);
   1396     uint32_t magic = ds->readUInt32(header->magic);
   1397     ds->writeUInt32(&outputHeader->magic, magic);
   1398     ds->writeUInt16(&outputHeader->nodeCount, nodeCount);
   1399     uint16_t root = ds->readUInt16(header->root);
   1400     ds->writeUInt16(&outputHeader->root, root);
   1401     ds->swapArray32(ds, inBytes+offsetof(CompactTrieHeader,offsets),
   1402             sizeof(uint32_t)*(int32_t)nodeCount,
   1403             outBytes+offsetof(CompactTrieHeader,offsets), status);
   1404 
   1405     return sizeWithUData;
   1406 }
   1407 
   1408 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
   1409