1 /* 2 ****************************************************************************** 3 * 4 * Copyright (C) 2001-2014, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ****************************************************************************** 8 * file name: utrie2.cpp 9 * encoding: US-ASCII 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 2008aug16 (starting from a copy of utrie.c) 14 * created by: Markus W. Scherer 15 * 16 * This is a common implementation of a Unicode trie. 17 * It is a kind of compressed, serializable table of 16- or 32-bit values associated with 18 * Unicode code points (0..0x10ffff). 19 * This is the second common version of a Unicode trie (hence the name UTrie2). 20 * See utrie2.h for a comparison. 21 * 22 * This file contains only the runtime and enumeration code, for read-only access. 23 * See utrie2_builder.c for the builder code. 24 */ 25 #ifdef UTRIE2_DEBUG 26 # include <stdio.h> 27 #endif 28 29 #include "unicode/utypes.h" 30 #include "unicode/utf.h" 31 #include "unicode/utf8.h" 32 #include "unicode/utf16.h" 33 #include "cmemory.h" 34 #include "utrie2.h" 35 #include "utrie2_impl.h" 36 #include "uassert.h" 37 38 /* Public UTrie2 API implementation ----------------------------------------- */ 39 40 static uint32_t 41 get32(const UNewTrie2 *trie, UChar32 c, UBool fromLSCP) { 42 int32_t i2, block; 43 44 if(c>=trie->highStart && (!U_IS_LEAD(c) || fromLSCP)) { 45 return trie->data[trie->dataLength-UTRIE2_DATA_GRANULARITY]; 46 } 47 48 if(U_IS_LEAD(c) && fromLSCP) { 49 i2=(UTRIE2_LSCP_INDEX_2_OFFSET-(0xd800>>UTRIE2_SHIFT_2))+ 50 (c>>UTRIE2_SHIFT_2); 51 } else { 52 i2=trie->index1[c>>UTRIE2_SHIFT_1]+ 53 ((c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK); 54 } 55 block=trie->index2[i2]; 56 return trie->data[block+(c&UTRIE2_DATA_MASK)]; 57 } 58 59 U_CAPI uint32_t U_EXPORT2 60 utrie2_get32(const UTrie2 *trie, UChar32 c) { 61 if(trie->data16!=NULL) { 62 return UTRIE2_GET16(trie, c); 63 } else if(trie->data32!=NULL) { 64 return UTRIE2_GET32(trie, c); 65 } else if((uint32_t)c>0x10ffff) { 66 return trie->errorValue; 67 } else { 68 return get32(trie->newTrie, c, TRUE); 69 } 70 } 71 72 U_CAPI uint32_t U_EXPORT2 73 utrie2_get32FromLeadSurrogateCodeUnit(const UTrie2 *trie, UChar32 c) { 74 if(!U_IS_LEAD(c)) { 75 return trie->errorValue; 76 } 77 if(trie->data16!=NULL) { 78 return UTRIE2_GET16_FROM_U16_SINGLE_LEAD(trie, c); 79 } else if(trie->data32!=NULL) { 80 return UTRIE2_GET32_FROM_U16_SINGLE_LEAD(trie, c); 81 } else { 82 return get32(trie->newTrie, c, FALSE); 83 } 84 } 85 86 static inline int32_t 87 u8Index(const UTrie2 *trie, UChar32 c, int32_t i) { 88 int32_t idx= 89 _UTRIE2_INDEX_FROM_CP( 90 trie, 91 trie->data32==NULL ? trie->indexLength : 0, 92 c); 93 return (idx<<3)|i; 94 } 95 96 U_CAPI int32_t U_EXPORT2 97 utrie2_internalU8NextIndex(const UTrie2 *trie, UChar32 c, 98 const uint8_t *src, const uint8_t *limit) { 99 int32_t i, length; 100 i=0; 101 /* support 64-bit pointers by avoiding cast of arbitrary difference */ 102 if((limit-src)<=7) { 103 length=(int32_t)(limit-src); 104 } else { 105 length=7; 106 } 107 c=utf8_nextCharSafeBody(src, &i, length, c, -1); 108 return u8Index(trie, c, i); 109 } 110 111 U_CAPI int32_t U_EXPORT2 112 utrie2_internalU8PrevIndex(const UTrie2 *trie, UChar32 c, 113 const uint8_t *start, const uint8_t *src) { 114 int32_t i, length; 115 /* support 64-bit pointers by avoiding cast of arbitrary difference */ 116 if((src-start)<=7) { 117 i=length=(int32_t)(src-start); 118 } else { 119 i=length=7; 120 start=src-7; 121 } 122 c=utf8_prevCharSafeBody(start, 0, &i, c, -1); 123 i=length-i; /* number of bytes read backward from src */ 124 return u8Index(trie, c, i); 125 } 126 127 U_CAPI UTrie2 * U_EXPORT2 128 utrie2_openFromSerialized(UTrie2ValueBits valueBits, 129 const void *data, int32_t length, int32_t *pActualLength, 130 UErrorCode *pErrorCode) { 131 const UTrie2Header *header; 132 const uint16_t *p16; 133 int32_t actualLength; 134 135 UTrie2 tempTrie; 136 UTrie2 *trie; 137 138 if(U_FAILURE(*pErrorCode)) { 139 return 0; 140 } 141 142 if( length<=0 || (U_POINTER_MASK_LSB(data, 3)!=0) || 143 valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits 144 ) { 145 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 146 return 0; 147 } 148 149 /* enough data for a trie header? */ 150 if(length<(int32_t)sizeof(UTrie2Header)) { 151 *pErrorCode=U_INVALID_FORMAT_ERROR; 152 return 0; 153 } 154 155 /* check the signature */ 156 header=(const UTrie2Header *)data; 157 if(header->signature!=UTRIE2_SIG) { 158 *pErrorCode=U_INVALID_FORMAT_ERROR; 159 return 0; 160 } 161 162 /* get the options */ 163 if(valueBits!=(UTrie2ValueBits)(header->options&UTRIE2_OPTIONS_VALUE_BITS_MASK)) { 164 *pErrorCode=U_INVALID_FORMAT_ERROR; 165 return 0; 166 } 167 168 /* get the length values and offsets */ 169 uprv_memset(&tempTrie, 0, sizeof(tempTrie)); 170 tempTrie.indexLength=header->indexLength; 171 tempTrie.dataLength=header->shiftedDataLength<<UTRIE2_INDEX_SHIFT; 172 tempTrie.index2NullOffset=header->index2NullOffset; 173 tempTrie.dataNullOffset=header->dataNullOffset; 174 175 tempTrie.highStart=header->shiftedHighStart<<UTRIE2_SHIFT_1; 176 tempTrie.highValueIndex=tempTrie.dataLength-UTRIE2_DATA_GRANULARITY; 177 if(valueBits==UTRIE2_16_VALUE_BITS) { 178 tempTrie.highValueIndex+=tempTrie.indexLength; 179 } 180 181 /* calculate the actual length */ 182 actualLength=(int32_t)sizeof(UTrie2Header)+tempTrie.indexLength*2; 183 if(valueBits==UTRIE2_16_VALUE_BITS) { 184 actualLength+=tempTrie.dataLength*2; 185 } else { 186 actualLength+=tempTrie.dataLength*4; 187 } 188 if(length<actualLength) { 189 *pErrorCode=U_INVALID_FORMAT_ERROR; /* not enough bytes */ 190 return 0; 191 } 192 193 /* allocate the trie */ 194 trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2)); 195 if(trie==NULL) { 196 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 197 return 0; 198 } 199 uprv_memcpy(trie, &tempTrie, sizeof(tempTrie)); 200 trie->memory=(uint32_t *)data; 201 trie->length=actualLength; 202 trie->isMemoryOwned=FALSE; 203 204 /* set the pointers to its index and data arrays */ 205 p16=(const uint16_t *)(header+1); 206 trie->index=p16; 207 p16+=trie->indexLength; 208 209 /* get the data */ 210 switch(valueBits) { 211 case UTRIE2_16_VALUE_BITS: 212 trie->data16=p16; 213 trie->data32=NULL; 214 trie->initialValue=trie->index[trie->dataNullOffset]; 215 trie->errorValue=trie->data16[UTRIE2_BAD_UTF8_DATA_OFFSET]; 216 break; 217 case UTRIE2_32_VALUE_BITS: 218 trie->data16=NULL; 219 trie->data32=(const uint32_t *)p16; 220 trie->initialValue=trie->data32[trie->dataNullOffset]; 221 trie->errorValue=trie->data32[UTRIE2_BAD_UTF8_DATA_OFFSET]; 222 break; 223 default: 224 *pErrorCode=U_INVALID_FORMAT_ERROR; 225 return 0; 226 } 227 228 if(pActualLength!=NULL) { 229 *pActualLength=actualLength; 230 } 231 return trie; 232 } 233 234 U_CAPI UTrie2 * U_EXPORT2 235 utrie2_openDummy(UTrie2ValueBits valueBits, 236 uint32_t initialValue, uint32_t errorValue, 237 UErrorCode *pErrorCode) { 238 UTrie2 *trie; 239 UTrie2Header *header; 240 uint32_t *p; 241 uint16_t *dest16; 242 int32_t indexLength, dataLength, length, i; 243 int32_t dataMove; /* >0 if the data is moved to the end of the index array */ 244 245 if(U_FAILURE(*pErrorCode)) { 246 return 0; 247 } 248 249 if(valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits) { 250 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 251 return 0; 252 } 253 254 /* calculate the total length of the dummy trie data */ 255 indexLength=UTRIE2_INDEX_1_OFFSET; 256 dataLength=UTRIE2_DATA_START_OFFSET+UTRIE2_DATA_GRANULARITY; 257 length=(int32_t)sizeof(UTrie2Header)+indexLength*2; 258 if(valueBits==UTRIE2_16_VALUE_BITS) { 259 length+=dataLength*2; 260 } else { 261 length+=dataLength*4; 262 } 263 264 /* allocate the trie */ 265 trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2)); 266 if(trie==NULL) { 267 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 268 return 0; 269 } 270 uprv_memset(trie, 0, sizeof(UTrie2)); 271 trie->memory=uprv_malloc(length); 272 if(trie->memory==NULL) { 273 uprv_free(trie); 274 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 275 return 0; 276 } 277 trie->length=length; 278 trie->isMemoryOwned=TRUE; 279 280 /* set the UTrie2 fields */ 281 if(valueBits==UTRIE2_16_VALUE_BITS) { 282 dataMove=indexLength; 283 } else { 284 dataMove=0; 285 } 286 287 trie->indexLength=indexLength; 288 trie->dataLength=dataLength; 289 trie->index2NullOffset=UTRIE2_INDEX_2_OFFSET; 290 trie->dataNullOffset=(uint16_t)dataMove; 291 trie->initialValue=initialValue; 292 trie->errorValue=errorValue; 293 trie->highStart=0; 294 trie->highValueIndex=dataMove+UTRIE2_DATA_START_OFFSET; 295 296 /* set the header fields */ 297 header=(UTrie2Header *)trie->memory; 298 299 header->signature=UTRIE2_SIG; /* "Tri2" */ 300 header->options=(uint16_t)valueBits; 301 302 header->indexLength=(uint16_t)indexLength; 303 header->shiftedDataLength=(uint16_t)(dataLength>>UTRIE2_INDEX_SHIFT); 304 header->index2NullOffset=(uint16_t)UTRIE2_INDEX_2_OFFSET; 305 header->dataNullOffset=(uint16_t)dataMove; 306 header->shiftedHighStart=0; 307 308 /* fill the index and data arrays */ 309 dest16=(uint16_t *)(header+1); 310 trie->index=dest16; 311 312 /* write the index-2 array values shifted right by UTRIE2_INDEX_SHIFT */ 313 for(i=0; i<UTRIE2_INDEX_2_BMP_LENGTH; ++i) { 314 *dest16++=(uint16_t)(dataMove>>UTRIE2_INDEX_SHIFT); /* null data block */ 315 } 316 317 /* write UTF-8 2-byte index-2 values, not right-shifted */ 318 for(i=0; i<(0xc2-0xc0); ++i) { /* C0..C1 */ 319 *dest16++=(uint16_t)(dataMove+UTRIE2_BAD_UTF8_DATA_OFFSET); 320 } 321 for(; i<(0xe0-0xc0); ++i) { /* C2..DF */ 322 *dest16++=(uint16_t)dataMove; 323 } 324 325 /* write the 16/32-bit data array */ 326 switch(valueBits) { 327 case UTRIE2_16_VALUE_BITS: 328 /* write 16-bit data values */ 329 trie->data16=dest16; 330 trie->data32=NULL; 331 for(i=0; i<0x80; ++i) { 332 *dest16++=(uint16_t)initialValue; 333 } 334 for(; i<0xc0; ++i) { 335 *dest16++=(uint16_t)errorValue; 336 } 337 /* highValue and reserved values */ 338 for(i=0; i<UTRIE2_DATA_GRANULARITY; ++i) { 339 *dest16++=(uint16_t)initialValue; 340 } 341 break; 342 case UTRIE2_32_VALUE_BITS: 343 /* write 32-bit data values */ 344 p=(uint32_t *)dest16; 345 trie->data16=NULL; 346 trie->data32=p; 347 for(i=0; i<0x80; ++i) { 348 *p++=initialValue; 349 } 350 for(; i<0xc0; ++i) { 351 *p++=errorValue; 352 } 353 /* highValue and reserved values */ 354 for(i=0; i<UTRIE2_DATA_GRANULARITY; ++i) { 355 *p++=initialValue; 356 } 357 break; 358 default: 359 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 360 return 0; 361 } 362 363 return trie; 364 } 365 366 U_CAPI void U_EXPORT2 367 utrie2_close(UTrie2 *trie) { 368 if(trie!=NULL) { 369 if(trie->isMemoryOwned) { 370 uprv_free(trie->memory); 371 } 372 if(trie->newTrie!=NULL) { 373 uprv_free(trie->newTrie->data); 374 uprv_free(trie->newTrie); 375 } 376 uprv_free(trie); 377 } 378 } 379 380 U_CAPI int32_t U_EXPORT2 381 utrie2_getVersion(const void *data, int32_t length, UBool anyEndianOk) { 382 uint32_t signature; 383 if(length<16 || data==NULL || (U_POINTER_MASK_LSB(data, 3)!=0)) { 384 return 0; 385 } 386 signature=*(const uint32_t *)data; 387 if(signature==UTRIE2_SIG) { 388 return 2; 389 } 390 if(anyEndianOk && signature==UTRIE2_OE_SIG) { 391 return 2; 392 } 393 if(signature==UTRIE_SIG) { 394 return 1; 395 } 396 if(anyEndianOk && signature==UTRIE_OE_SIG) { 397 return 1; 398 } 399 return 0; 400 } 401 402 U_CAPI UBool U_EXPORT2 403 utrie2_isFrozen(const UTrie2 *trie) { 404 return (UBool)(trie->newTrie==NULL); 405 } 406 407 U_CAPI int32_t U_EXPORT2 408 utrie2_serialize(const UTrie2 *trie, 409 void *data, int32_t capacity, 410 UErrorCode *pErrorCode) { 411 /* argument check */ 412 if(U_FAILURE(*pErrorCode)) { 413 return 0; 414 } 415 416 if( trie==NULL || trie->memory==NULL || trie->newTrie!=NULL || 417 capacity<0 || (capacity>0 && (data==NULL || (U_POINTER_MASK_LSB(data, 3)!=0))) 418 ) { 419 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 420 return 0; 421 } 422 423 if(capacity>=trie->length) { 424 uprv_memcpy(data, trie->memory, trie->length); 425 } else { 426 *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 427 } 428 return trie->length; 429 } 430 431 U_CAPI int32_t U_EXPORT2 432 utrie2_swap(const UDataSwapper *ds, 433 const void *inData, int32_t length, void *outData, 434 UErrorCode *pErrorCode) { 435 const UTrie2Header *inTrie; 436 UTrie2Header trie; 437 int32_t dataLength, size; 438 UTrie2ValueBits valueBits; 439 440 if(U_FAILURE(*pErrorCode)) { 441 return 0; 442 } 443 if(ds==NULL || inData==NULL || (length>=0 && outData==NULL)) { 444 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 445 return 0; 446 } 447 448 /* setup and swapping */ 449 if(length>=0 && length<(int32_t)sizeof(UTrie2Header)) { 450 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 451 return 0; 452 } 453 454 inTrie=(const UTrie2Header *)inData; 455 trie.signature=ds->readUInt32(inTrie->signature); 456 trie.options=ds->readUInt16(inTrie->options); 457 trie.indexLength=ds->readUInt16(inTrie->indexLength); 458 trie.shiftedDataLength=ds->readUInt16(inTrie->shiftedDataLength); 459 460 valueBits=(UTrie2ValueBits)(trie.options&UTRIE2_OPTIONS_VALUE_BITS_MASK); 461 dataLength=(int32_t)trie.shiftedDataLength<<UTRIE2_INDEX_SHIFT; 462 463 if( trie.signature!=UTRIE2_SIG || 464 valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits || 465 trie.indexLength<UTRIE2_INDEX_1_OFFSET || 466 dataLength<UTRIE2_DATA_START_OFFSET 467 ) { 468 *pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UTrie */ 469 return 0; 470 } 471 472 size=sizeof(UTrie2Header)+trie.indexLength*2; 473 switch(valueBits) { 474 case UTRIE2_16_VALUE_BITS: 475 size+=dataLength*2; 476 break; 477 case UTRIE2_32_VALUE_BITS: 478 size+=dataLength*4; 479 break; 480 default: 481 *pErrorCode=U_INVALID_FORMAT_ERROR; 482 return 0; 483 } 484 485 if(length>=0) { 486 UTrie2Header *outTrie; 487 488 if(length<size) { 489 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 490 return 0; 491 } 492 493 outTrie=(UTrie2Header *)outData; 494 495 /* swap the header */ 496 ds->swapArray32(ds, &inTrie->signature, 4, &outTrie->signature, pErrorCode); 497 ds->swapArray16(ds, &inTrie->options, 12, &outTrie->options, pErrorCode); 498 499 /* swap the index and the data */ 500 switch(valueBits) { 501 case UTRIE2_16_VALUE_BITS: 502 ds->swapArray16(ds, inTrie+1, (trie.indexLength+dataLength)*2, outTrie+1, pErrorCode); 503 break; 504 case UTRIE2_32_VALUE_BITS: 505 ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode); 506 ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, dataLength*4, 507 (uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode); 508 break; 509 default: 510 *pErrorCode=U_INVALID_FORMAT_ERROR; 511 return 0; 512 } 513 } 514 515 return size; 516 } 517 518 // utrie2_swapAnyVersion() should be defined here but lives in utrie2_builder.c 519 // to avoid a dependency from utrie2.cpp on utrie.c. 520 521 /* enumeration -------------------------------------------------------------- */ 522 523 #define MIN_VALUE(a, b) ((a)<(b) ? (a) : (b)) 524 525 /* default UTrie2EnumValue() returns the input value itself */ 526 static uint32_t U_CALLCONV 527 enumSameValue(const void * /*context*/, uint32_t value) { 528 return value; 529 } 530 531 /** 532 * Enumerate all ranges of code points with the same relevant values. 533 * The values are transformed from the raw trie entries by the enumValue function. 534 * 535 * Currently requires start<limit and both start and limit must be multiples 536 * of UTRIE2_DATA_BLOCK_LENGTH. 537 * 538 * Optimizations: 539 * - Skip a whole block if we know that it is filled with a single value, 540 * and it is the same as we visited just before. 541 * - Handle the null block specially because we know a priori that it is filled 542 * with a single value. 543 */ 544 static void 545 enumEitherTrie(const UTrie2 *trie, 546 UChar32 start, UChar32 limit, 547 UTrie2EnumValue *enumValue, UTrie2EnumRange *enumRange, const void *context) { 548 const uint32_t *data32; 549 const uint16_t *idx; 550 551 uint32_t value, prevValue, initialValue; 552 UChar32 c, prev, highStart; 553 int32_t j, i2Block, prevI2Block, index2NullOffset, block, prevBlock, nullBlock; 554 555 if(enumRange==NULL) { 556 return; 557 } 558 if(enumValue==NULL) { 559 enumValue=enumSameValue; 560 } 561 562 if(trie->newTrie==NULL) { 563 /* frozen trie */ 564 idx=trie->index; 565 U_ASSERT(idx!=NULL); /* the following code assumes trie->newTrie is not NULL when idx is NULL */ 566 data32=trie->data32; 567 568 index2NullOffset=trie->index2NullOffset; 569 nullBlock=trie->dataNullOffset; 570 } else { 571 /* unfrozen, mutable trie */ 572 idx=NULL; 573 data32=trie->newTrie->data; 574 U_ASSERT(data32!=NULL); /* the following code assumes idx is not NULL when data32 is NULL */ 575 576 index2NullOffset=trie->newTrie->index2NullOffset; 577 nullBlock=trie->newTrie->dataNullOffset; 578 } 579 580 highStart=trie->highStart; 581 582 /* get the enumeration value that corresponds to an initial-value trie data entry */ 583 initialValue=enumValue(context, trie->initialValue); 584 585 /* set variables for previous range */ 586 prevI2Block=-1; 587 prevBlock=-1; 588 prev=start; 589 prevValue=0; 590 591 /* enumerate index-2 blocks */ 592 for(c=start; c<limit && c<highStart;) { 593 /* Code point limit for iterating inside this i2Block. */ 594 UChar32 tempLimit=c+UTRIE2_CP_PER_INDEX_1_ENTRY; 595 if(limit<tempLimit) { 596 tempLimit=limit; 597 } 598 if(c<=0xffff) { 599 if(!U_IS_SURROGATE(c)) { 600 i2Block=c>>UTRIE2_SHIFT_2; 601 } else if(U_IS_SURROGATE_LEAD(c)) { 602 /* 603 * Enumerate values for lead surrogate code points, not code units: 604 * This special block has half the normal length. 605 */ 606 i2Block=UTRIE2_LSCP_INDEX_2_OFFSET; 607 tempLimit=MIN_VALUE(0xdc00, limit); 608 } else { 609 /* 610 * Switch back to the normal part of the index-2 table. 611 * Enumerate the second half of the surrogates block. 612 */ 613 i2Block=0xd800>>UTRIE2_SHIFT_2; 614 tempLimit=MIN_VALUE(0xe000, limit); 615 } 616 } else { 617 /* supplementary code points */ 618 if(idx!=NULL) { 619 i2Block=idx[(UTRIE2_INDEX_1_OFFSET-UTRIE2_OMITTED_BMP_INDEX_1_LENGTH)+ 620 (c>>UTRIE2_SHIFT_1)]; 621 } else { 622 i2Block=trie->newTrie->index1[c>>UTRIE2_SHIFT_1]; 623 } 624 if(i2Block==prevI2Block && (c-prev)>=UTRIE2_CP_PER_INDEX_1_ENTRY) { 625 /* 626 * The index-2 block is the same as the previous one, and filled with prevValue. 627 * Only possible for supplementary code points because the linear-BMP index-2 628 * table creates unique i2Block values. 629 */ 630 c+=UTRIE2_CP_PER_INDEX_1_ENTRY; 631 continue; 632 } 633 } 634 prevI2Block=i2Block; 635 if(i2Block==index2NullOffset) { 636 /* this is the null index-2 block */ 637 if(prevValue!=initialValue) { 638 if(prev<c && !enumRange(context, prev, c-1, prevValue)) { 639 return; 640 } 641 prevBlock=nullBlock; 642 prev=c; 643 prevValue=initialValue; 644 } 645 c+=UTRIE2_CP_PER_INDEX_1_ENTRY; 646 } else { 647 /* enumerate data blocks for one index-2 block */ 648 int32_t i2, i2Limit; 649 i2=(c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK; 650 if((c>>UTRIE2_SHIFT_1)==(tempLimit>>UTRIE2_SHIFT_1)) { 651 i2Limit=(tempLimit>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK; 652 } else { 653 i2Limit=UTRIE2_INDEX_2_BLOCK_LENGTH; 654 } 655 for(; i2<i2Limit; ++i2) { 656 if(idx!=NULL) { 657 block=(int32_t)idx[i2Block+i2]<<UTRIE2_INDEX_SHIFT; 658 } else { 659 block=trie->newTrie->index2[i2Block+i2]; 660 } 661 if(block==prevBlock && (c-prev)>=UTRIE2_DATA_BLOCK_LENGTH) { 662 /* the block is the same as the previous one, and filled with prevValue */ 663 c+=UTRIE2_DATA_BLOCK_LENGTH; 664 continue; 665 } 666 prevBlock=block; 667 if(block==nullBlock) { 668 /* this is the null data block */ 669 if(prevValue!=initialValue) { 670 if(prev<c && !enumRange(context, prev, c-1, prevValue)) { 671 return; 672 } 673 prev=c; 674 prevValue=initialValue; 675 } 676 c+=UTRIE2_DATA_BLOCK_LENGTH; 677 } else { 678 for(j=0; j<UTRIE2_DATA_BLOCK_LENGTH; ++j) { 679 value=enumValue(context, data32!=NULL ? data32[block+j] : idx[block+j]); 680 if(value!=prevValue) { 681 if(prev<c && !enumRange(context, prev, c-1, prevValue)) { 682 return; 683 } 684 prev=c; 685 prevValue=value; 686 } 687 ++c; 688 } 689 } 690 } 691 } 692 } 693 694 if(c>limit) { 695 c=limit; /* could be higher if in the index2NullOffset */ 696 } else if(c<limit) { 697 /* c==highStart<limit */ 698 uint32_t highValue; 699 if(idx!=NULL) { 700 highValue= 701 data32!=NULL ? 702 data32[trie->highValueIndex] : 703 idx[trie->highValueIndex]; 704 } else { 705 highValue=trie->newTrie->data[trie->newTrie->dataLength-UTRIE2_DATA_GRANULARITY]; 706 } 707 value=enumValue(context, highValue); 708 if(value!=prevValue) { 709 if(prev<c && !enumRange(context, prev, c-1, prevValue)) { 710 return; 711 } 712 prev=c; 713 prevValue=value; 714 } 715 c=limit; 716 } 717 718 /* deliver last range */ 719 enumRange(context, prev, c-1, prevValue); 720 } 721 722 U_CAPI void U_EXPORT2 723 utrie2_enum(const UTrie2 *trie, 724 UTrie2EnumValue *enumValue, UTrie2EnumRange *enumRange, const void *context) { 725 enumEitherTrie(trie, 0, 0x110000, enumValue, enumRange, context); 726 } 727 728 U_CAPI void U_EXPORT2 729 utrie2_enumForLeadSurrogate(const UTrie2 *trie, UChar32 lead, 730 UTrie2EnumValue *enumValue, UTrie2EnumRange *enumRange, 731 const void *context) { 732 if(!U16_IS_LEAD(lead)) { 733 return; 734 } 735 lead=(lead-0xd7c0)<<10; /* start code point */ 736 enumEitherTrie(trie, lead, lead+0x400, enumValue, enumRange, context); 737 } 738 739 /* C++ convenience wrappers ------------------------------------------------- */ 740 741 U_NAMESPACE_BEGIN 742 743 uint16_t BackwardUTrie2StringIterator::previous16() { 744 codePointLimit=codePointStart; 745 if(start>=codePointStart) { 746 codePoint=U_SENTINEL; 747 return 0; 748 } 749 uint16_t result; 750 UTRIE2_U16_PREV16(trie, start, codePointStart, codePoint, result); 751 return result; 752 } 753 754 uint16_t ForwardUTrie2StringIterator::next16() { 755 codePointStart=codePointLimit; 756 if(codePointLimit==limit) { 757 codePoint=U_SENTINEL; 758 return 0; 759 } 760 uint16_t result; 761 UTRIE2_U16_NEXT16(trie, codePointLimit, limit, codePoint, result); 762 return result; 763 } 764 765 U_NAMESPACE_END 766