1 /** 2 ******************************************************************************* 3 * Copyright (C) 2006-2008, International Business Machines Corporation * 4 * and others. All Rights Reserved. * 5 ******************************************************************************* 6 */ 7 8 #include "unicode/utypes.h" 9 10 #if !UCONFIG_NO_BREAK_ITERATION 11 12 #include "triedict.h" 13 #include "unicode/chariter.h" 14 #include "unicode/uchriter.h" 15 #include "unicode/strenum.h" 16 #include "unicode/uenum.h" 17 #include "unicode/udata.h" 18 #include "cmemory.h" 19 #include "udataswp.h" 20 #include "uvector.h" 21 #include "uvectr32.h" 22 #include "uarrsort.h" 23 24 //#define DEBUG_TRIE_DICT 1 25 26 #ifdef DEBUG_TRIE_DICT 27 #include <sys/times.h> 28 #include <limits.h> 29 #include <stdio.h> 30 #endif 31 32 U_NAMESPACE_BEGIN 33 34 /******************************************************************* 35 * TrieWordDictionary 36 */ 37 38 TrieWordDictionary::TrieWordDictionary() { 39 } 40 41 TrieWordDictionary::~TrieWordDictionary() { 42 } 43 44 /******************************************************************* 45 * MutableTrieDictionary 46 */ 47 48 // Node structure for the ternary, uncompressed trie 49 struct TernaryNode : public UMemory { 50 UChar ch; // UTF-16 code unit 51 uint16_t flags; // Flag word 52 TernaryNode *low; // Less-than link 53 TernaryNode *equal; // Equal link 54 TernaryNode *high; // Greater-than link 55 56 TernaryNode(UChar uc); 57 ~TernaryNode(); 58 }; 59 60 enum MutableTrieNodeFlags { 61 kEndsWord = 0x0001 // This node marks the end of a valid word 62 }; 63 64 inline 65 TernaryNode::TernaryNode(UChar uc) { 66 ch = uc; 67 flags = 0; 68 low = NULL; 69 equal = NULL; 70 high = NULL; 71 } 72 73 // Not inline since it's recursive 74 TernaryNode::~TernaryNode() { 75 delete low; 76 delete equal; 77 delete high; 78 } 79 80 MutableTrieDictionary::MutableTrieDictionary( UChar median, UErrorCode &status ) { 81 // Start the trie off with something. Having the root node already present 82 // cuts a special case out of the search/insertion functions. 83 // Making it a median character cuts the worse case for searches from 84 // 4x a balanced trie to 2x a balanced trie. It's best to choose something 85 // that starts a word that is midway in the list. 86 fTrie = new TernaryNode(median); 87 if (fTrie == NULL) { 88 status = U_MEMORY_ALLOCATION_ERROR; 89 } 90 fIter = utext_openUChars(NULL, NULL, 0, &status); 91 if (U_SUCCESS(status) && fIter == NULL) { 92 status = U_MEMORY_ALLOCATION_ERROR; 93 } 94 } 95 96 MutableTrieDictionary::MutableTrieDictionary( UErrorCode &status ) { 97 fTrie = NULL; 98 fIter = utext_openUChars(NULL, NULL, 0, &status); 99 if (U_SUCCESS(status) && fIter == NULL) { 100 status = U_MEMORY_ALLOCATION_ERROR; 101 } 102 } 103 104 MutableTrieDictionary::~MutableTrieDictionary() { 105 delete fTrie; 106 utext_close(fIter); 107 } 108 109 int32_t 110 MutableTrieDictionary::search( UText *text, 111 int32_t maxLength, 112 int32_t *lengths, 113 int &count, 114 int limit, 115 TernaryNode *&parent, 116 UBool &pMatched ) const { 117 // TODO: current implementation works in UTF-16 space 118 const TernaryNode *up = NULL; 119 const TernaryNode *p = fTrie; 120 int mycount = 0; 121 pMatched = TRUE; 122 int i; 123 124 UChar uc = utext_current32(text); 125 for (i = 0; i < maxLength && p != NULL; ++i) { 126 while (p != NULL) { 127 if (uc < p->ch) { 128 up = p; 129 p = p->low; 130 } 131 else if (uc == p->ch) { 132 break; 133 } 134 else { 135 up = p; 136 p = p->high; 137 } 138 } 139 if (p == NULL) { 140 pMatched = FALSE; 141 break; 142 } 143 // Must be equal to get here 144 if (limit > 0 && (p->flags & kEndsWord)) { 145 lengths[mycount++] = i+1; 146 --limit; 147 } 148 up = p; 149 p = p->equal; 150 uc = utext_next32(text); 151 uc = utext_current32(text); 152 } 153 154 // Note that there is no way to reach here with up == 0 unless 155 // maxLength is 0 coming in. 156 parent = (TernaryNode *)up; 157 count = mycount; 158 return i; 159 } 160 161 void 162 MutableTrieDictionary::addWord( const UChar *word, 163 int32_t length, 164 UErrorCode &status ) { 165 #if 0 166 if (length <= 0) { 167 status = U_ILLEGAL_ARGUMENT_ERROR; 168 return; 169 } 170 #endif 171 TernaryNode *parent; 172 UBool pMatched; 173 int count; 174 fIter = utext_openUChars(fIter, word, length, &status); 175 176 int matched; 177 matched = search(fIter, length, NULL, count, 0, parent, pMatched); 178 179 while (matched++ < length) { 180 UChar32 uc = utext_next32(fIter); // TODO: supplemetary support? 181 U_ASSERT(uc != U_SENTINEL); 182 TernaryNode *newNode = new TernaryNode(uc); 183 if (newNode == NULL) { 184 status = U_MEMORY_ALLOCATION_ERROR; 185 return; 186 } 187 if (pMatched) { 188 parent->equal = newNode; 189 } 190 else { 191 pMatched = TRUE; 192 if (uc < parent->ch) { 193 parent->low = newNode; 194 } 195 else { 196 parent->high = newNode; 197 } 198 } 199 parent = newNode; 200 } 201 202 parent->flags |= kEndsWord; 203 } 204 205 #if 0 206 void 207 MutableTrieDictionary::addWords( UEnumeration *words, 208 UErrorCode &status ) { 209 int32_t length; 210 const UChar *word; 211 while ((word = uenum_unext(words, &length, &status)) && U_SUCCESS(status)) { 212 addWord(word, length, status); 213 } 214 } 215 #endif 216 217 int32_t 218 MutableTrieDictionary::matches( UText *text, 219 int32_t maxLength, 220 int32_t *lengths, 221 int &count, 222 int limit ) const { 223 TernaryNode *parent; 224 UBool pMatched; 225 return search(text, maxLength, lengths, count, limit, parent, pMatched); 226 } 227 228 // Implementation of iteration for MutableTrieDictionary 229 class MutableTrieEnumeration : public StringEnumeration { 230 private: 231 UStack fNodeStack; // Stack of nodes to process 232 UVector32 fBranchStack; // Stack of which branch we are working on 233 TernaryNode *fRoot; // Root node 234 enum StackBranch { 235 kLessThan, 236 kEqual, 237 kGreaterThan, 238 kDone 239 }; 240 241 public: 242 static UClassID U_EXPORT2 getStaticClassID(void); 243 virtual UClassID getDynamicClassID(void) const; 244 public: 245 MutableTrieEnumeration(TernaryNode *root, UErrorCode &status) 246 : fNodeStack(status), fBranchStack(status) { 247 fRoot = root; 248 fNodeStack.push(root, status); 249 fBranchStack.push(kLessThan, status); 250 unistr.remove(); 251 } 252 253 virtual ~MutableTrieEnumeration() { 254 } 255 256 virtual StringEnumeration *clone() const { 257 UErrorCode status = U_ZERO_ERROR; 258 return new MutableTrieEnumeration(fRoot, status); 259 } 260 261 virtual const UnicodeString *snext(UErrorCode &status) { 262 if (fNodeStack.empty() || U_FAILURE(status)) { 263 return NULL; 264 } 265 TernaryNode *node = (TernaryNode *) fNodeStack.peek(); 266 StackBranch where = (StackBranch) fBranchStack.peeki(); 267 while (!fNodeStack.empty() && U_SUCCESS(status)) { 268 UBool emit; 269 UBool equal; 270 271 switch (where) { 272 case kLessThan: 273 if (node->low != NULL) { 274 fBranchStack.setElementAt(kEqual, fBranchStack.size()-1); 275 node = (TernaryNode *) fNodeStack.push(node->low, status); 276 where = (StackBranch) fBranchStack.push(kLessThan, status); 277 break; 278 } 279 case kEqual: 280 emit = (node->flags & kEndsWord) != 0; 281 equal = (node->equal != NULL); 282 // If this node should be part of the next emitted string, append 283 // the UChar to the string, and make sure we pop it when we come 284 // back to this node. The character should only be in the string 285 // for as long as we're traversing the equal subtree of this node 286 if (equal || emit) { 287 unistr.append(node->ch); 288 fBranchStack.setElementAt(kGreaterThan, fBranchStack.size()-1); 289 } 290 if (equal) { 291 node = (TernaryNode *) fNodeStack.push(node->equal, status); 292 where = (StackBranch) fBranchStack.push(kLessThan, status); 293 } 294 if (emit) { 295 return &unistr; 296 } 297 if (equal) { 298 break; 299 } 300 case kGreaterThan: 301 // If this node's character is in the string, remove it. 302 if (node->equal != NULL || (node->flags & kEndsWord)) { 303 unistr.truncate(unistr.length()-1); 304 } 305 if (node->high != NULL) { 306 fBranchStack.setElementAt(kDone, fBranchStack.size()-1); 307 node = (TernaryNode *) fNodeStack.push(node->high, status); 308 where = (StackBranch) fBranchStack.push(kLessThan, status); 309 break; 310 } 311 case kDone: 312 fNodeStack.pop(); 313 fBranchStack.popi(); 314 node = (TernaryNode *) fNodeStack.peek(); 315 where = (StackBranch) fBranchStack.peeki(); 316 break; 317 default: 318 return NULL; 319 } 320 } 321 return NULL; 322 } 323 324 // Very expensive, but this should never be used. 325 virtual int32_t count(UErrorCode &status) const { 326 MutableTrieEnumeration counter(fRoot, status); 327 int32_t result = 0; 328 while (counter.snext(status) != NULL && U_SUCCESS(status)) { 329 ++result; 330 } 331 return result; 332 } 333 334 virtual void reset(UErrorCode &status) { 335 fNodeStack.removeAllElements(); 336 fBranchStack.removeAllElements(); 337 fNodeStack.push(fRoot, status); 338 fBranchStack.push(kLessThan, status); 339 unistr.remove(); 340 } 341 }; 342 343 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(MutableTrieEnumeration) 344 345 StringEnumeration * 346 MutableTrieDictionary::openWords( UErrorCode &status ) const { 347 if (U_FAILURE(status)) { 348 return NULL; 349 } 350 return new MutableTrieEnumeration(fTrie, status); 351 } 352 353 /******************************************************************* 354 * CompactTrieDictionary 355 */ 356 357 struct CompactTrieHeader { 358 uint32_t size; // Size of the data in bytes 359 uint32_t magic; // Magic number (including version) 360 uint16_t nodeCount; // Number of entries in offsets[] 361 uint16_t root; // Node number of the root node 362 uint32_t offsets[1]; // Offsets to nodes from start of data 363 }; 364 365 // Note that to avoid platform-specific alignment issues, all members of the node 366 // structures should be the same size, or should contain explicit padding to 367 // natural alignment boundaries. 368 369 // We can't use a bitfield for the flags+count field, because the layout of those 370 // is not portable. 12 bits of count allows for up to 4096 entries in a node. 371 struct CompactTrieNode { 372 uint16_t flagscount; // Count of sub-entries, plus flags 373 }; 374 375 enum CompactTrieNodeFlags { 376 kVerticalNode = 0x1000, // This is a vertical node 377 kParentEndsWord = 0x2000, // The node whose equal link points to this ends a word 378 kReservedFlag1 = 0x4000, 379 kReservedFlag2 = 0x8000, 380 kCountMask = 0x0FFF, // The count portion of flagscount 381 kFlagMask = 0xF000 // The flags portion of flagscount 382 }; 383 384 // The two node types are distinguished by the kVerticalNode flag. 385 386 struct CompactTrieHorizontalEntry { 387 uint16_t ch; // UChar 388 uint16_t equal; // Equal link node index 389 }; 390 391 // We don't use inheritance here because C++ does not guarantee that the 392 // base class comes first in memory!! 393 394 struct CompactTrieHorizontalNode { 395 uint16_t flagscount; // Count of sub-entries, plus flags 396 CompactTrieHorizontalEntry entries[1]; 397 }; 398 399 struct CompactTrieVerticalNode { 400 uint16_t flagscount; // Count of sub-entries, plus flags 401 uint16_t equal; // Equal link node index 402 uint16_t chars[1]; // Code units 403 }; 404 405 // {'Dic', 1}, version 1 406 #define COMPACT_TRIE_MAGIC_1 0x44696301 407 408 CompactTrieDictionary::CompactTrieDictionary(UDataMemory *dataObj, 409 UErrorCode &status ) 410 : fUData(dataObj) 411 { 412 fData = (const CompactTrieHeader *) udata_getMemory(dataObj); 413 fOwnData = FALSE; 414 if (fData->magic != COMPACT_TRIE_MAGIC_1) { 415 status = U_ILLEGAL_ARGUMENT_ERROR; 416 fData = NULL; 417 } 418 } 419 CompactTrieDictionary::CompactTrieDictionary( const void *data, 420 UErrorCode &status ) 421 : fUData(NULL) 422 { 423 fData = (const CompactTrieHeader *) data; 424 fOwnData = FALSE; 425 if (fData->magic != COMPACT_TRIE_MAGIC_1) { 426 status = U_ILLEGAL_ARGUMENT_ERROR; 427 fData = NULL; 428 } 429 } 430 431 CompactTrieDictionary::CompactTrieDictionary( const MutableTrieDictionary &dict, 432 UErrorCode &status ) 433 : fUData(NULL) 434 { 435 fData = compactMutableTrieDictionary(dict, status); 436 fOwnData = !U_FAILURE(status); 437 } 438 439 CompactTrieDictionary::~CompactTrieDictionary() { 440 if (fOwnData) { 441 uprv_free((void *)fData); 442 } 443 if (fUData) { 444 udata_close(fUData); 445 } 446 } 447 448 uint32_t 449 CompactTrieDictionary::dataSize() const { 450 return fData->size; 451 } 452 453 const void * 454 CompactTrieDictionary::data() const { 455 return fData; 456 } 457 458 // This function finds the address of a node for us, given its node ID 459 static inline const CompactTrieNode * 460 getCompactNode(const CompactTrieHeader *header, uint16_t node) { 461 return (const CompactTrieNode *)((const uint8_t *)header + header->offsets[node]); 462 } 463 464 int32_t 465 CompactTrieDictionary::matches( UText *text, 466 int32_t maxLength, 467 int32_t *lengths, 468 int &count, 469 int limit ) const { 470 // TODO: current implementation works in UTF-16 space 471 const CompactTrieNode *node = getCompactNode(fData, fData->root); 472 int mycount = 0; 473 474 UChar uc = utext_current32(text); 475 int i = 0; 476 477 while (node != NULL) { 478 // Check if the node we just exited ends a word 479 if (limit > 0 && (node->flagscount & kParentEndsWord)) { 480 lengths[mycount++] = i; 481 --limit; 482 } 483 // Check that we haven't exceeded the maximum number of input characters. 484 // We have to do that here rather than in the while condition so that 485 // we can check for ending a word, above. 486 if (i >= maxLength) { 487 break; 488 } 489 490 int nodeCount = (node->flagscount & kCountMask); 491 if (nodeCount == 0) { 492 // Special terminal node; return now 493 break; 494 } 495 if (node->flagscount & kVerticalNode) { 496 // Vertical node; check all the characters in it 497 const CompactTrieVerticalNode *vnode = (const CompactTrieVerticalNode *)node; 498 for (int j = 0; j < nodeCount && i < maxLength; ++j) { 499 if (uc != vnode->chars[j]) { 500 // We hit a non-equal character; return 501 goto exit; 502 } 503 utext_next32(text); 504 uc = utext_current32(text); 505 ++i; 506 } 507 // To get here we must have come through the whole list successfully; 508 // go on to the next node. Note that a word cannot end in the middle 509 // of a vertical node. 510 node = getCompactNode(fData, vnode->equal); 511 } 512 else { 513 // Horizontal node; do binary search 514 const CompactTrieHorizontalNode *hnode = (const CompactTrieHorizontalNode *)node; 515 int low = 0; 516 int high = nodeCount-1; 517 int middle; 518 node = NULL; // If we don't find a match, we'll fall out of the loop 519 while (high >= low) { 520 middle = (high+low)/2; 521 if (uc == hnode->entries[middle].ch) { 522 // We hit a match; get the next node and next character 523 node = getCompactNode(fData, hnode->entries[middle].equal); 524 utext_next32(text); 525 uc = utext_current32(text); 526 ++i; 527 break; 528 } 529 else if (uc < hnode->entries[middle].ch) { 530 high = middle-1; 531 } 532 else { 533 low = middle+1; 534 } 535 } 536 } 537 } 538 exit: 539 count = mycount; 540 return i; 541 } 542 543 // Implementation of iteration for CompactTrieDictionary 544 class CompactTrieEnumeration : public StringEnumeration { 545 private: 546 UVector32 fNodeStack; // Stack of nodes to process 547 UVector32 fIndexStack; // Stack of where in node we are 548 const CompactTrieHeader *fHeader; // Trie data 549 550 public: 551 static UClassID U_EXPORT2 getStaticClassID(void); 552 virtual UClassID getDynamicClassID(void) const; 553 public: 554 CompactTrieEnumeration(const CompactTrieHeader *header, UErrorCode &status) 555 : fNodeStack(status), fIndexStack(status) { 556 fHeader = header; 557 fNodeStack.push(header->root, status); 558 fIndexStack.push(0, status); 559 unistr.remove(); 560 } 561 562 virtual ~CompactTrieEnumeration() { 563 } 564 565 virtual StringEnumeration *clone() const { 566 UErrorCode status = U_ZERO_ERROR; 567 return new CompactTrieEnumeration(fHeader, status); 568 } 569 570 virtual const UnicodeString * snext(UErrorCode &status); 571 572 // Very expensive, but this should never be used. 573 virtual int32_t count(UErrorCode &status) const { 574 CompactTrieEnumeration counter(fHeader, status); 575 int32_t result = 0; 576 while (counter.snext(status) != NULL && U_SUCCESS(status)) { 577 ++result; 578 } 579 return result; 580 } 581 582 virtual void reset(UErrorCode &status) { 583 fNodeStack.removeAllElements(); 584 fIndexStack.removeAllElements(); 585 fNodeStack.push(fHeader->root, status); 586 fIndexStack.push(0, status); 587 unistr.remove(); 588 } 589 }; 590 591 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CompactTrieEnumeration) 592 593 const UnicodeString * 594 CompactTrieEnumeration::snext(UErrorCode &status) { 595 if (fNodeStack.empty() || U_FAILURE(status)) { 596 return NULL; 597 } 598 const CompactTrieNode *node = getCompactNode(fHeader, fNodeStack.peeki()); 599 int where = fIndexStack.peeki(); 600 while (!fNodeStack.empty() && U_SUCCESS(status)) { 601 int nodeCount = (node->flagscount & kCountMask); 602 UBool goingDown = FALSE; 603 if (nodeCount == 0) { 604 // Terminal node; go up immediately 605 fNodeStack.popi(); 606 fIndexStack.popi(); 607 node = getCompactNode(fHeader, fNodeStack.peeki()); 608 where = fIndexStack.peeki(); 609 } 610 else if (node->flagscount & kVerticalNode) { 611 // Vertical node 612 const CompactTrieVerticalNode *vnode = (const CompactTrieVerticalNode *)node; 613 if (where == 0) { 614 // Going down 615 unistr.append((const UChar *)vnode->chars, (int32_t) nodeCount); 616 fIndexStack.setElementAt(1, fIndexStack.size()-1); 617 node = getCompactNode(fHeader, fNodeStack.push(vnode->equal, status)); 618 where = fIndexStack.push(0, status); 619 goingDown = TRUE; 620 } 621 else { 622 // Going up 623 unistr.truncate(unistr.length()-nodeCount); 624 fNodeStack.popi(); 625 fIndexStack.popi(); 626 node = getCompactNode(fHeader, fNodeStack.peeki()); 627 where = fIndexStack.peeki(); 628 } 629 } 630 else { 631 // Horizontal node 632 const CompactTrieHorizontalNode *hnode = (const CompactTrieHorizontalNode *)node; 633 if (where > 0) { 634 // Pop previous char 635 unistr.truncate(unistr.length()-1); 636 } 637 if (where < nodeCount) { 638 // Push on next node 639 unistr.append((UChar)hnode->entries[where].ch); 640 fIndexStack.setElementAt(where+1, fIndexStack.size()-1); 641 node = getCompactNode(fHeader, fNodeStack.push(hnode->entries[where].equal, status)); 642 where = fIndexStack.push(0, status); 643 goingDown = TRUE; 644 } 645 else { 646 // Going up 647 fNodeStack.popi(); 648 fIndexStack.popi(); 649 node = getCompactNode(fHeader, fNodeStack.peeki()); 650 where = fIndexStack.peeki(); 651 } 652 } 653 // Check if the parent of the node we've just gone down to ends a 654 // word. If so, return it. 655 if (goingDown && (node->flagscount & kParentEndsWord)) { 656 return &unistr; 657 } 658 } 659 return NULL; 660 } 661 662 StringEnumeration * 663 CompactTrieDictionary::openWords( UErrorCode &status ) const { 664 if (U_FAILURE(status)) { 665 return NULL; 666 } 667 return new CompactTrieEnumeration(fData, status); 668 } 669 670 // 671 // Below here is all code related to converting a ternary trie to a compact trie 672 // and back again 673 // 674 675 // Helper classes to construct the compact trie 676 class BuildCompactTrieNode: public UMemory { 677 public: 678 UBool fParentEndsWord; 679 UBool fVertical; 680 UBool fHasDuplicate; 681 int32_t fNodeID; 682 UnicodeString fChars; 683 684 public: 685 BuildCompactTrieNode(UBool parentEndsWord, UBool vertical, UStack &nodes, UErrorCode &status) { 686 fParentEndsWord = parentEndsWord; 687 fHasDuplicate = FALSE; 688 fVertical = vertical; 689 fNodeID = nodes.size(); 690 nodes.push(this, status); 691 } 692 693 virtual ~BuildCompactTrieNode() { 694 } 695 696 virtual uint32_t size() { 697 return sizeof(uint16_t); 698 } 699 700 virtual void write(uint8_t *bytes, uint32_t &offset, const UVector32 &/*translate*/) { 701 // Write flag/count 702 *((uint16_t *)(bytes+offset)) = (fChars.length() & kCountMask) 703 | (fVertical ? kVerticalNode : 0) | (fParentEndsWord ? kParentEndsWord : 0 ); 704 offset += sizeof(uint16_t); 705 } 706 }; 707 708 class BuildCompactTrieHorizontalNode: public BuildCompactTrieNode { 709 public: 710 UStack fLinks; 711 712 public: 713 BuildCompactTrieHorizontalNode(UBool parentEndsWord, UStack &nodes, UErrorCode &status) 714 : BuildCompactTrieNode(parentEndsWord, FALSE, nodes, status), fLinks(status) { 715 } 716 717 virtual ~BuildCompactTrieHorizontalNode() { 718 } 719 720 virtual uint32_t size() { 721 return offsetof(CompactTrieHorizontalNode,entries) + 722 (fChars.length()*sizeof(CompactTrieHorizontalEntry)); 723 } 724 725 virtual void write(uint8_t *bytes, uint32_t &offset, const UVector32 &translate) { 726 BuildCompactTrieNode::write(bytes, offset, translate); 727 int32_t count = fChars.length(); 728 for (int32_t i = 0; i < count; ++i) { 729 CompactTrieHorizontalEntry *entry = (CompactTrieHorizontalEntry *)(bytes+offset); 730 entry->ch = fChars[i]; 731 entry->equal = translate.elementAti(((BuildCompactTrieNode *)fLinks[i])->fNodeID); 732 #ifdef DEBUG_TRIE_DICT 733 if (entry->equal == 0) { 734 fprintf(stderr, "ERROR: horizontal link %d, logical node %d maps to physical node zero\n", 735 i, ((BuildCompactTrieNode *)fLinks[i])->fNodeID); 736 } 737 #endif 738 offset += sizeof(CompactTrieHorizontalEntry); 739 } 740 } 741 742 void addNode(UChar ch, BuildCompactTrieNode *link, UErrorCode &status) { 743 fChars.append(ch); 744 fLinks.push(link, status); 745 } 746 }; 747 748 class BuildCompactTrieVerticalNode: public BuildCompactTrieNode { 749 public: 750 BuildCompactTrieNode *fEqual; 751 752 public: 753 BuildCompactTrieVerticalNode(UBool parentEndsWord, UStack &nodes, UErrorCode &status) 754 : BuildCompactTrieNode(parentEndsWord, TRUE, nodes, status) { 755 fEqual = NULL; 756 } 757 758 virtual ~BuildCompactTrieVerticalNode() { 759 } 760 761 virtual uint32_t size() { 762 return offsetof(CompactTrieVerticalNode,chars) + (fChars.length()*sizeof(uint16_t)); 763 } 764 765 virtual void write(uint8_t *bytes, uint32_t &offset, const UVector32 &translate) { 766 CompactTrieVerticalNode *node = (CompactTrieVerticalNode *)(bytes+offset); 767 BuildCompactTrieNode::write(bytes, offset, translate); 768 node->equal = translate.elementAti(fEqual->fNodeID); 769 offset += sizeof(node->equal); 770 #ifdef DEBUG_TRIE_DICT 771 if (node->equal == 0) { 772 fprintf(stderr, "ERROR: vertical link, logical node %d maps to physical node zero\n", 773 fEqual->fNodeID); 774 } 775 #endif 776 fChars.extract(0, fChars.length(), (UChar *)node->chars); 777 offset += sizeof(uint16_t)*fChars.length(); 778 } 779 780 void addChar(UChar ch) { 781 fChars.append(ch); 782 } 783 784 void setLink(BuildCompactTrieNode *node) { 785 fEqual = node; 786 } 787 }; 788 789 // Forward declaration 790 static void walkHorizontal(const TernaryNode *node, 791 BuildCompactTrieHorizontalNode *building, 792 UStack &nodes, 793 UErrorCode &status); 794 795 // Convert one node. Uses recursion. 796 797 static BuildCompactTrieNode * 798 compactOneNode(const TernaryNode *node, UBool parentEndsWord, UStack &nodes, UErrorCode &status) { 799 if (U_FAILURE(status)) { 800 return NULL; 801 } 802 BuildCompactTrieNode *result = NULL; 803 UBool horizontal = (node->low != NULL || node->high != NULL); 804 if (horizontal) { 805 BuildCompactTrieHorizontalNode *hResult = 806 new BuildCompactTrieHorizontalNode(parentEndsWord, nodes, status); 807 if (hResult == NULL) { 808 status = U_MEMORY_ALLOCATION_ERROR; 809 return NULL; 810 } 811 if (U_SUCCESS(status)) { 812 walkHorizontal(node, hResult, nodes, status); 813 result = hResult; 814 } 815 } 816 else { 817 BuildCompactTrieVerticalNode *vResult = 818 new BuildCompactTrieVerticalNode(parentEndsWord, nodes, status); 819 if (vResult == NULL) { 820 status = U_MEMORY_ALLOCATION_ERROR; 821 } 822 else if (U_SUCCESS(status)) { 823 UBool endsWord = FALSE; 824 // Take up nodes until we end a word, or hit a node with < or > links 825 do { 826 vResult->addChar(node->ch); 827 endsWord = (node->flags & kEndsWord) != 0; 828 node = node->equal; 829 } 830 while(node != NULL && !endsWord && node->low == NULL && node->high == NULL); 831 if (node == NULL) { 832 if (!endsWord) { 833 status = U_ILLEGAL_ARGUMENT_ERROR; // Corrupt input trie 834 } 835 else { 836 vResult->setLink((BuildCompactTrieNode *)nodes[1]); 837 } 838 } 839 else { 840 vResult->setLink(compactOneNode(node, endsWord, nodes, status)); 841 } 842 result = vResult; 843 } 844 } 845 return result; 846 } 847 848 // Walk the set of peers at the same level, to build a horizontal node. 849 // Uses recursion. 850 851 static void walkHorizontal(const TernaryNode *node, 852 BuildCompactTrieHorizontalNode *building, 853 UStack &nodes, 854 UErrorCode &status) { 855 while (U_SUCCESS(status) && node != NULL) { 856 if (node->low != NULL) { 857 walkHorizontal(node->low, building, nodes, status); 858 } 859 BuildCompactTrieNode *link = NULL; 860 if (node->equal != NULL) { 861 link = compactOneNode(node->equal, (node->flags & kEndsWord) != 0, nodes, status); 862 } 863 else if (node->flags & kEndsWord) { 864 link = (BuildCompactTrieNode *)nodes[1]; 865 } 866 if (U_SUCCESS(status) && link != NULL) { 867 building->addNode(node->ch, link, status); 868 } 869 // Tail recurse manually instead of leaving it to the compiler. 870 //if (node->high != NULL) { 871 // walkHorizontal(node->high, building, nodes, status); 872 //} 873 node = node->high; 874 } 875 } 876 877 U_NAMESPACE_END 878 U_NAMESPACE_USE 879 U_CDECL_BEGIN 880 static int32_t U_CALLCONV 881 _sortBuildNodes(const void * /*context*/, const void *voidl, const void *voidr) { 882 BuildCompactTrieNode *left = *(BuildCompactTrieNode **)voidl; 883 BuildCompactTrieNode *right = *(BuildCompactTrieNode **)voidr; 884 // Check for comparing a node to itself, to avoid spurious duplicates 885 if (left == right) { 886 return 0; 887 } 888 // Most significant is type of node. Can never coalesce. 889 if (left->fVertical != right->fVertical) { 890 return left->fVertical - right->fVertical; 891 } 892 // Next, the "parent ends word" flag. If that differs, we cannot coalesce. 893 if (left->fParentEndsWord != right->fParentEndsWord) { 894 return left->fParentEndsWord - right->fParentEndsWord; 895 } 896 // Next, the string. If that differs, we can never coalesce. 897 int32_t result = left->fChars.compare(right->fChars); 898 if (result != 0) { 899 return result; 900 } 901 // We know they're both the same node type, so branch for the two cases. 902 if (left->fVertical) { 903 result = ((BuildCompactTrieVerticalNode *)left)->fEqual->fNodeID 904 - ((BuildCompactTrieVerticalNode *)right)->fEqual->fNodeID; 905 } 906 else { 907 // We need to compare the links vectors. They should be the 908 // same size because the strings were equal. 909 // We compare the node IDs instead of the pointers, to handle 910 // coalesced nodes. 911 BuildCompactTrieHorizontalNode *hleft, *hright; 912 hleft = (BuildCompactTrieHorizontalNode *)left; 913 hright = (BuildCompactTrieHorizontalNode *)right; 914 int32_t count = hleft->fLinks.size(); 915 for (int32_t i = 0; i < count && result == 0; ++i) { 916 result = ((BuildCompactTrieNode *)(hleft->fLinks[i]))->fNodeID - 917 ((BuildCompactTrieNode *)(hright->fLinks[i]))->fNodeID; 918 } 919 } 920 // If they are equal to each other, mark them (speeds coalescing) 921 if (result == 0) { 922 left->fHasDuplicate = TRUE; 923 right->fHasDuplicate = TRUE; 924 } 925 return result; 926 } 927 U_CDECL_END 928 U_NAMESPACE_BEGIN 929 930 static void coalesceDuplicates(UStack &nodes, UErrorCode &status) { 931 // We sort the array of nodes to place duplicates next to each other 932 if (U_FAILURE(status)) { 933 return; 934 } 935 int32_t size = nodes.size(); 936 void **array = (void **)uprv_malloc(sizeof(void *)*size); 937 if (array == NULL) { 938 status = U_MEMORY_ALLOCATION_ERROR; 939 return; 940 } 941 (void) nodes.toArray(array); 942 943 // Now repeatedly identify duplicates until there are no more 944 int32_t dupes = 0; 945 long passCount = 0; 946 #ifdef DEBUG_TRIE_DICT 947 long totalDupes = 0; 948 #endif 949 do { 950 BuildCompactTrieNode *node; 951 BuildCompactTrieNode *first = NULL; 952 BuildCompactTrieNode **p; 953 BuildCompactTrieNode **pFirst = NULL; 954 int32_t counter = size - 2; 955 // Sort the array, skipping nodes 0 and 1. Use quicksort for the first 956 // pass for speed. For the second and subsequent passes, we use stable 957 // (insertion) sort for two reasons: 958 // 1. The array is already mostly ordered, so we get better performance. 959 // 2. The way we find one and only one instance of a set of duplicates is to 960 // check that the node ID equals the array index. If we used an unstable 961 // sort for the second or later passes, it's possible that none of the 962 // duplicates would wind up with a node ID equal to its array index. 963 // The sort stability guarantees that, because as we coalesce more and 964 // more groups, the first element of the resultant group will be one of 965 // the first elements of the groups being coalesced. 966 // To use quicksort for the second and subsequent passes, we would have to 967 // find the minimum of the node numbers in a group, and set all the nodes 968 // in the group to that node number. 969 uprv_sortArray(array+2, counter, sizeof(void *), _sortBuildNodes, NULL, (passCount > 0), &status); 970 dupes = 0; 971 for (p = (BuildCompactTrieNode **)array + 2; counter > 0; --counter, ++p) { 972 node = *p; 973 if (node->fHasDuplicate) { 974 if (first == NULL) { 975 first = node; 976 pFirst = p; 977 } 978 else if (_sortBuildNodes(NULL, pFirst, p) != 0) { 979 // Starting a new run of dupes 980 first = node; 981 pFirst = p; 982 } 983 else if (node->fNodeID != first->fNodeID) { 984 // Slave one to the other, note duplicate 985 node->fNodeID = first->fNodeID; 986 dupes += 1; 987 } 988 } 989 else { 990 // This node has no dupes 991 first = NULL; 992 pFirst = NULL; 993 } 994 } 995 passCount += 1; 996 #ifdef DEBUG_TRIE_DICT 997 totalDupes += dupes; 998 fprintf(stderr, "Trie node dupe removal, pass %d: %d nodes tagged\n", passCount, dupes); 999 #endif 1000 } 1001 while (dupes > 0); 1002 #ifdef DEBUG_TRIE_DICT 1003 fprintf(stderr, "Trie node dupe removal complete: %d tagged in %d passes\n", totalDupes, passCount); 1004 #endif 1005 1006 // We no longer need the temporary array, as the nodes have all been marked appropriately. 1007 uprv_free(array); 1008 } 1009 1010 U_NAMESPACE_END 1011 U_CDECL_BEGIN 1012 static void U_CALLCONV _deleteBuildNode(void *obj) { 1013 delete (BuildCompactTrieNode *) obj; 1014 } 1015 U_CDECL_END 1016 U_NAMESPACE_BEGIN 1017 1018 CompactTrieHeader * 1019 CompactTrieDictionary::compactMutableTrieDictionary( const MutableTrieDictionary &dict, 1020 UErrorCode &status ) { 1021 if (U_FAILURE(status)) { 1022 return NULL; 1023 } 1024 #ifdef DEBUG_TRIE_DICT 1025 struct tms timing; 1026 struct tms previous; 1027 (void) ::times(&previous); 1028 #endif 1029 UStack nodes(_deleteBuildNode, NULL, status); // Index of nodes 1030 1031 // Add node 0, used as the NULL pointer/sentinel. 1032 nodes.addElement((int32_t)0, status); 1033 1034 // Start by creating the special empty node we use to indicate that the parent 1035 // terminates a word. This must be node 1, because the builder assumes 1036 // that. 1037 if (U_FAILURE(status)) { 1038 return NULL; 1039 } 1040 BuildCompactTrieNode *terminal = new BuildCompactTrieNode(TRUE, FALSE, nodes, status); 1041 if (terminal == NULL) { 1042 status = U_MEMORY_ALLOCATION_ERROR; 1043 } 1044 1045 // This call does all the work of building the new trie structure. The root 1046 // will be node 2. 1047 BuildCompactTrieNode *root = compactOneNode(dict.fTrie, FALSE, nodes, status); 1048 #ifdef DEBUG_TRIE_DICT 1049 (void) ::times(&timing); 1050 fprintf(stderr, "Compact trie built, %d nodes, time user %f system %f\n", 1051 nodes.size(), (double)(timing.tms_utime-previous.tms_utime)/CLK_TCK, 1052 (double)(timing.tms_stime-previous.tms_stime)/CLK_TCK); 1053 previous = timing; 1054 #endif 1055 1056 // Now coalesce all duplicate nodes. 1057 coalesceDuplicates(nodes, status); 1058 #ifdef DEBUG_TRIE_DICT 1059 (void) ::times(&timing); 1060 fprintf(stderr, "Duplicates coalesced, time user %f system %f\n", 1061 (double)(timing.tms_utime-previous.tms_utime)/CLK_TCK, 1062 (double)(timing.tms_stime-previous.tms_stime)/CLK_TCK); 1063 previous = timing; 1064 #endif 1065 1066 // Next, build the output trie. 1067 // First we compute all the sizes and build the node ID translation table. 1068 uint32_t totalSize = offsetof(CompactTrieHeader,offsets); 1069 int32_t count = nodes.size(); 1070 int32_t nodeCount = 1; // The sentinel node we already have 1071 BuildCompactTrieNode *node; 1072 int32_t i; 1073 UVector32 translate(count, status); // Should be no growth needed after this 1074 translate.push(0, status); // The sentinel node 1075 1076 if (U_FAILURE(status)) { 1077 return NULL; 1078 } 1079 1080 for (i = 1; i < count; ++i) { 1081 node = (BuildCompactTrieNode *)nodes[i]; 1082 if (node->fNodeID == i) { 1083 // Only one node out of each duplicate set is used 1084 if (i >= translate.size()) { 1085 // Logically extend the mapping table 1086 translate.setSize(i+1); 1087 } 1088 translate.setElementAt(nodeCount++, i); 1089 totalSize += node->size(); 1090 } 1091 } 1092 1093 // Check for overflowing 16 bits worth of nodes. 1094 if (nodeCount > 0x10000) { 1095 status = U_ILLEGAL_ARGUMENT_ERROR; 1096 return NULL; 1097 } 1098 1099 // Add enough room for the offsets. 1100 totalSize += nodeCount*sizeof(uint32_t); 1101 #ifdef DEBUG_TRIE_DICT 1102 (void) ::times(&timing); 1103 fprintf(stderr, "Sizes/mapping done, time user %f system %f\n", 1104 (double)(timing.tms_utime-previous.tms_utime)/CLK_TCK, 1105 (double)(timing.tms_stime-previous.tms_stime)/CLK_TCK); 1106 previous = timing; 1107 fprintf(stderr, "%d nodes, %d unique, %d bytes\n", nodes.size(), nodeCount, totalSize); 1108 #endif 1109 uint8_t *bytes = (uint8_t *)uprv_malloc(totalSize); 1110 if (bytes == NULL) { 1111 status = U_MEMORY_ALLOCATION_ERROR; 1112 return NULL; 1113 } 1114 1115 CompactTrieHeader *header = (CompactTrieHeader *)bytes; 1116 header->size = totalSize; 1117 header->nodeCount = nodeCount; 1118 header->offsets[0] = 0; // Sentinel 1119 header->root = translate.elementAti(root->fNodeID); 1120 #ifdef DEBUG_TRIE_DICT 1121 if (header->root == 0) { 1122 fprintf(stderr, "ERROR: root node %d translate to physical zero\n", root->fNodeID); 1123 } 1124 #endif 1125 uint32_t offset = offsetof(CompactTrieHeader,offsets)+(nodeCount*sizeof(uint32_t)); 1126 nodeCount = 1; 1127 // Now write the data 1128 for (i = 1; i < count; ++i) { 1129 node = (BuildCompactTrieNode *)nodes[i]; 1130 if (node->fNodeID == i) { 1131 header->offsets[nodeCount++] = offset; 1132 node->write(bytes, offset, translate); 1133 } 1134 } 1135 #ifdef DEBUG_TRIE_DICT 1136 (void) ::times(&timing); 1137 fprintf(stderr, "Trie built, time user %f system %f\n", 1138 (double)(timing.tms_utime-previous.tms_utime)/CLK_TCK, 1139 (double)(timing.tms_stime-previous.tms_stime)/CLK_TCK); 1140 previous = timing; 1141 fprintf(stderr, "Final offset is %d\n", offset); 1142 1143 // Collect statistics on node types and sizes 1144 int hCount = 0; 1145 int vCount = 0; 1146 size_t hSize = 0; 1147 size_t vSize = 0; 1148 size_t hItemCount = 0; 1149 size_t vItemCount = 0; 1150 uint32_t previousOff = offset; 1151 for (uint16_t nodeIdx = nodeCount-1; nodeIdx >= 2; --nodeIdx) { 1152 const CompactTrieNode *node = getCompactNode(header, nodeIdx); 1153 if (node->flagscount & kVerticalNode) { 1154 vCount += 1; 1155 vItemCount += (node->flagscount & kCountMask); 1156 vSize += previousOff-header->offsets[nodeIdx]; 1157 } 1158 else { 1159 hCount += 1; 1160 hItemCount += (node->flagscount & kCountMask); 1161 hSize += previousOff-header->offsets[nodeIdx]; 1162 } 1163 previousOff = header->offsets[nodeIdx]; 1164 } 1165 fprintf(stderr, "Horizontal nodes: %d total, average %f bytes with %f items\n", hCount, 1166 (double)hSize/hCount, (double)hItemCount/hCount); 1167 fprintf(stderr, "Vertical nodes: %d total, average %f bytes with %f items\n", vCount, 1168 (double)vSize/vCount, (double)vItemCount/vCount); 1169 #endif 1170 1171 if (U_FAILURE(status)) { 1172 uprv_free(bytes); 1173 header = NULL; 1174 } 1175 else { 1176 header->magic = COMPACT_TRIE_MAGIC_1; 1177 } 1178 return header; 1179 } 1180 1181 // Forward declaration 1182 static TernaryNode * 1183 unpackOneNode( const CompactTrieHeader *header, const CompactTrieNode *node, UErrorCode &status ); 1184 1185 1186 // Convert a horizontal node (or subarray thereof) into a ternary subtrie 1187 static TernaryNode * 1188 unpackHorizontalArray( const CompactTrieHeader *header, const CompactTrieHorizontalEntry *array, 1189 int low, int high, UErrorCode &status ) { 1190 if (U_FAILURE(status) || low > high) { 1191 return NULL; 1192 } 1193 int middle = (low+high)/2; 1194 TernaryNode *result = new TernaryNode(array[middle].ch); 1195 if (result == NULL) { 1196 status = U_MEMORY_ALLOCATION_ERROR; 1197 return NULL; 1198 } 1199 const CompactTrieNode *equal = getCompactNode(header, array[middle].equal); 1200 if (equal->flagscount & kParentEndsWord) { 1201 result->flags |= kEndsWord; 1202 } 1203 result->low = unpackHorizontalArray(header, array, low, middle-1, status); 1204 result->high = unpackHorizontalArray(header, array, middle+1, high, status); 1205 result->equal = unpackOneNode(header, equal, status); 1206 return result; 1207 } 1208 1209 // Convert one compact trie node into a ternary subtrie 1210 static TernaryNode * 1211 unpackOneNode( const CompactTrieHeader *header, const CompactTrieNode *node, UErrorCode &status ) { 1212 int nodeCount = (node->flagscount & kCountMask); 1213 if (nodeCount == 0 || U_FAILURE(status)) { 1214 // Failure, or terminal node 1215 return NULL; 1216 } 1217 if (node->flagscount & kVerticalNode) { 1218 const CompactTrieVerticalNode *vnode = (const CompactTrieVerticalNode *)node; 1219 TernaryNode *head = NULL; 1220 TernaryNode *previous = NULL; 1221 TernaryNode *latest = NULL; 1222 for (int i = 0; i < nodeCount; ++i) { 1223 latest = new TernaryNode(vnode->chars[i]); 1224 if (latest == NULL) { 1225 status = U_MEMORY_ALLOCATION_ERROR; 1226 break; 1227 } 1228 if (head == NULL) { 1229 head = latest; 1230 } 1231 if (previous != NULL) { 1232 previous->equal = latest; 1233 } 1234 previous = latest; 1235 } 1236 if (latest != NULL) { 1237 const CompactTrieNode *equal = getCompactNode(header, vnode->equal); 1238 if (equal->flagscount & kParentEndsWord) { 1239 latest->flags |= kEndsWord; 1240 } 1241 latest->equal = unpackOneNode(header, equal, status); 1242 } 1243 return head; 1244 } 1245 else { 1246 // Horizontal node 1247 const CompactTrieHorizontalNode *hnode = (const CompactTrieHorizontalNode *)node; 1248 return unpackHorizontalArray(header, &hnode->entries[0], 0, nodeCount-1, status); 1249 } 1250 } 1251 1252 MutableTrieDictionary * 1253 CompactTrieDictionary::cloneMutable( UErrorCode &status ) const { 1254 MutableTrieDictionary *result = new MutableTrieDictionary( status ); 1255 if (result == NULL) { 1256 status = U_MEMORY_ALLOCATION_ERROR; 1257 return NULL; 1258 } 1259 TernaryNode *root = unpackOneNode(fData, getCompactNode(fData, fData->root), status); 1260 if (U_FAILURE(status)) { 1261 delete root; // Clean up 1262 delete result; 1263 return NULL; 1264 } 1265 result->fTrie = root; 1266 return result; 1267 } 1268 1269 U_NAMESPACE_END 1270 1271 U_CAPI int32_t U_EXPORT2 1272 triedict_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, 1273 UErrorCode *status) { 1274 1275 if (status == NULL || U_FAILURE(*status)) { 1276 return 0; 1277 } 1278 if(ds==NULL || inData==NULL || length<-1 || (length>0 && outData==NULL)) { 1279 *status=U_ILLEGAL_ARGUMENT_ERROR; 1280 return 0; 1281 } 1282 1283 // 1284 // Check that the data header is for for dictionary data. 1285 // (Header contents are defined in genxxx.cpp) 1286 // 1287 const UDataInfo *pInfo = (const UDataInfo *)((const uint8_t *)inData+4); 1288 if(!( pInfo->dataFormat[0]==0x54 && /* dataFormat="TrDc" */ 1289 pInfo->dataFormat[1]==0x72 && 1290 pInfo->dataFormat[2]==0x44 && 1291 pInfo->dataFormat[3]==0x63 && 1292 pInfo->formatVersion[0]==1 )) { 1293 udata_printError(ds, "triedict_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized\n", 1294 pInfo->dataFormat[0], pInfo->dataFormat[1], 1295 pInfo->dataFormat[2], pInfo->dataFormat[3], 1296 pInfo->formatVersion[0]); 1297 *status=U_UNSUPPORTED_ERROR; 1298 return 0; 1299 } 1300 1301 // 1302 // Swap the data header. (This is the generic ICU Data Header, not the 1303 // CompactTrieHeader). This swap also conveniently gets us 1304 // the size of the ICU d.h., which lets us locate the start 1305 // of the RBBI specific data. 1306 // 1307 int32_t headerSize=udata_swapDataHeader(ds, inData, length, outData, status); 1308 1309 // 1310 // Get the CompactTrieHeader, and check that it appears to be OK. 1311 // 1312 const uint8_t *inBytes =(const uint8_t *)inData+headerSize; 1313 const CompactTrieHeader *header = (const CompactTrieHeader *)inBytes; 1314 if (ds->readUInt32(header->magic) != COMPACT_TRIE_MAGIC_1 1315 || ds->readUInt32(header->size) < sizeof(CompactTrieHeader)) 1316 { 1317 udata_printError(ds, "triedict_swap(): CompactTrieHeader is invalid.\n"); 1318 *status=U_UNSUPPORTED_ERROR; 1319 return 0; 1320 } 1321 1322 // 1323 // Prefight operation? Just return the size 1324 // 1325 uint32_t totalSize = ds->readUInt32(header->size); 1326 int32_t sizeWithUData = (int32_t)totalSize + headerSize; 1327 if (length < 0) { 1328 return sizeWithUData; 1329 } 1330 1331 // 1332 // Check that length passed in is consistent with length from RBBI data header. 1333 // 1334 if (length < sizeWithUData) { 1335 udata_printError(ds, "triedict_swap(): too few bytes (%d after ICU Data header) for trie data.\n", 1336 totalSize); 1337 *status=U_INDEX_OUTOFBOUNDS_ERROR; 1338 return 0; 1339 } 1340 1341 // 1342 // Swap the Data. Do the data itself first, then the CompactTrieHeader, because 1343 // we need to reference the header to locate the data, and an 1344 // inplace swap of the header leaves it unusable. 1345 // 1346 uint8_t *outBytes = (uint8_t *)outData + headerSize; 1347 CompactTrieHeader *outputHeader = (CompactTrieHeader *)outBytes; 1348 1349 #if 0 1350 // 1351 // If not swapping in place, zero out the output buffer before starting. 1352 // 1353 if (inBytes != outBytes) { 1354 uprv_memset(outBytes, 0, totalSize); 1355 } 1356 1357 // We need to loop through all the nodes in the offset table, and swap each one. 1358 uint16_t nodeCount = ds->readUInt16(header->nodeCount); 1359 // Skip node 0, which should always be 0. 1360 for (int i = 1; i < nodeCount; ++i) { 1361 uint32_t nodeOff = ds->readUInt32(header->offsets[i]); 1362 const CompactTrieNode *inNode = (const CompactTrieNode *)(inBytes + nodeOff); 1363 CompactTrieNode *outNode = (CompactTrieNode *)(outBytes + nodeOff); 1364 uint16_t flagscount = ds->readUInt16(inNode->flagscount); 1365 uint16_t itemCount = flagscount & kCountMask; 1366 ds->writeUInt16(&outNode->flagscount, flagscount); 1367 if (itemCount > 0) { 1368 if (flagscount & kVerticalNode) { 1369 ds->swapArray16(ds, inBytes+nodeOff+offsetof(CompactTrieVerticalNode,chars), 1370 itemCount*sizeof(uint16_t), 1371 outBytes+nodeOff+offsetof(CompactTrieVerticalNode,chars), status); 1372 uint16_t equal = ds->readUInt16(inBytes+nodeOff+offsetof(CompactTrieVerticalNode,equal); 1373 ds->writeUInt16(outBytes+nodeOff+offsetof(CompactTrieVerticalNode,equal)); 1374 } 1375 else { 1376 const CompactTrieHorizontalNode *inHNode = (const CompactTrieHorizontalNode *)inNode; 1377 CompactTrieHorizontalNode *outHNode = (CompactTrieHorizontalNode *)outNode; 1378 for (int j = 0; j < itemCount; ++j) { 1379 uint16_t word = ds->readUInt16(inHNode->entries[j].ch); 1380 ds->writeUInt16(&outHNode->entries[j].ch, word); 1381 word = ds->readUInt16(inHNode->entries[j].equal); 1382 ds->writeUInt16(&outHNode->entries[j].equal, word); 1383 } 1384 } 1385 } 1386 } 1387 #endif 1388 1389 // All the data in all the nodes consist of 16 bit items. Swap them all at once. 1390 uint16_t nodeCount = ds->readUInt16(header->nodeCount); 1391 uint32_t nodesOff = offsetof(CompactTrieHeader,offsets)+((uint32_t)nodeCount*sizeof(uint32_t)); 1392 ds->swapArray16(ds, inBytes+nodesOff, totalSize-nodesOff, outBytes+nodesOff, status); 1393 1394 // Swap the header 1395 ds->writeUInt32(&outputHeader->size, totalSize); 1396 uint32_t magic = ds->readUInt32(header->magic); 1397 ds->writeUInt32(&outputHeader->magic, magic); 1398 ds->writeUInt16(&outputHeader->nodeCount, nodeCount); 1399 uint16_t root = ds->readUInt16(header->root); 1400 ds->writeUInt16(&outputHeader->root, root); 1401 ds->swapArray32(ds, inBytes+offsetof(CompactTrieHeader,offsets), 1402 sizeof(uint32_t)*(int32_t)nodeCount, 1403 outBytes+offsetof(CompactTrieHeader,offsets), status); 1404 1405 return sizeWithUData; 1406 } 1407 1408 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ 1409