1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ****************************************************************************** 5 * 6 * Copyright (C) 2001-2014, International Business Machines 7 * Corporation and others. All Rights Reserved. 8 * 9 ****************************************************************************** 10 * file name: utrie2.cpp 11 * encoding: UTF-8 12 * tab size: 8 (not used) 13 * indentation:4 14 * 15 * created on: 2008aug16 (starting from a copy of utrie.c) 16 * created by: Markus W. Scherer 17 * 18 * This is a common implementation of a Unicode trie. 19 * It is a kind of compressed, serializable table of 16- or 32-bit values associated with 20 * Unicode code points (0..0x10ffff). 21 * This is the second common version of a Unicode trie (hence the name UTrie2). 22 * See utrie2.h for a comparison. 23 * 24 * This file contains only the runtime and enumeration code, for read-only access. 25 * See utrie2_builder.c for the builder code. 26 */ 27 #ifdef UTRIE2_DEBUG 28 # include <stdio.h> 29 #endif 30 31 #include "unicode/utypes.h" 32 #include "unicode/utf.h" 33 #include "unicode/utf8.h" 34 #include "unicode/utf16.h" 35 #include "cmemory.h" 36 #include "utrie2.h" 37 #include "utrie2_impl.h" 38 #include "uassert.h" 39 40 /* Public UTrie2 API implementation ----------------------------------------- */ 41 42 static uint32_t 43 get32(const UNewTrie2 *trie, UChar32 c, UBool fromLSCP) { 44 int32_t i2, block; 45 46 if(c>=trie->highStart && (!U_IS_LEAD(c) || fromLSCP)) { 47 return trie->data[trie->dataLength-UTRIE2_DATA_GRANULARITY]; 48 } 49 50 if(U_IS_LEAD(c) && fromLSCP) { 51 i2=(UTRIE2_LSCP_INDEX_2_OFFSET-(0xd800>>UTRIE2_SHIFT_2))+ 52 (c>>UTRIE2_SHIFT_2); 53 } else { 54 i2=trie->index1[c>>UTRIE2_SHIFT_1]+ 55 ((c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK); 56 } 57 block=trie->index2[i2]; 58 return trie->data[block+(c&UTRIE2_DATA_MASK)]; 59 } 60 61 U_CAPI uint32_t U_EXPORT2 62 utrie2_get32(const UTrie2 *trie, UChar32 c) { 63 if(trie->data16!=NULL) { 64 return UTRIE2_GET16(trie, c); 65 } else if(trie->data32!=NULL) { 66 return UTRIE2_GET32(trie, c); 67 } else if((uint32_t)c>0x10ffff) { 68 return trie->errorValue; 69 } else { 70 return get32(trie->newTrie, c, TRUE); 71 } 72 } 73 74 U_CAPI uint32_t U_EXPORT2 75 utrie2_get32FromLeadSurrogateCodeUnit(const UTrie2 *trie, UChar32 c) { 76 if(!U_IS_LEAD(c)) { 77 return trie->errorValue; 78 } 79 if(trie->data16!=NULL) { 80 return UTRIE2_GET16_FROM_U16_SINGLE_LEAD(trie, c); 81 } else if(trie->data32!=NULL) { 82 return UTRIE2_GET32_FROM_U16_SINGLE_LEAD(trie, c); 83 } else { 84 return get32(trie->newTrie, c, FALSE); 85 } 86 } 87 88 static inline int32_t 89 u8Index(const UTrie2 *trie, UChar32 c, int32_t i) { 90 int32_t idx= 91 _UTRIE2_INDEX_FROM_CP( 92 trie, 93 trie->data32==NULL ? trie->indexLength : 0, 94 c); 95 return (idx<<3)|i; 96 } 97 98 U_CAPI int32_t U_EXPORT2 99 utrie2_internalU8NextIndex(const UTrie2 *trie, UChar32 c, 100 const uint8_t *src, const uint8_t *limit) { 101 int32_t i, length; 102 i=0; 103 /* support 64-bit pointers by avoiding cast of arbitrary difference */ 104 if((limit-src)<=7) { 105 length=(int32_t)(limit-src); 106 } else { 107 length=7; 108 } 109 c=utf8_nextCharSafeBody(src, &i, length, c, -1); 110 return u8Index(trie, c, i); 111 } 112 113 U_CAPI int32_t U_EXPORT2 114 utrie2_internalU8PrevIndex(const UTrie2 *trie, UChar32 c, 115 const uint8_t *start, const uint8_t *src) { 116 int32_t i, length; 117 /* support 64-bit pointers by avoiding cast of arbitrary difference */ 118 if((src-start)<=7) { 119 i=length=(int32_t)(src-start); 120 } else { 121 i=length=7; 122 start=src-7; 123 } 124 c=utf8_prevCharSafeBody(start, 0, &i, c, -1); 125 i=length-i; /* number of bytes read backward from src */ 126 return u8Index(trie, c, i); 127 } 128 129 U_CAPI UTrie2 * U_EXPORT2 130 utrie2_openFromSerialized(UTrie2ValueBits valueBits, 131 const void *data, int32_t length, int32_t *pActualLength, 132 UErrorCode *pErrorCode) { 133 const UTrie2Header *header; 134 const uint16_t *p16; 135 int32_t actualLength; 136 137 UTrie2 tempTrie; 138 UTrie2 *trie; 139 140 if(U_FAILURE(*pErrorCode)) { 141 return 0; 142 } 143 144 if( length<=0 || (U_POINTER_MASK_LSB(data, 3)!=0) || 145 valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits 146 ) { 147 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 148 return 0; 149 } 150 151 /* enough data for a trie header? */ 152 if(length<(int32_t)sizeof(UTrie2Header)) { 153 *pErrorCode=U_INVALID_FORMAT_ERROR; 154 return 0; 155 } 156 157 /* check the signature */ 158 header=(const UTrie2Header *)data; 159 if(header->signature!=UTRIE2_SIG) { 160 *pErrorCode=U_INVALID_FORMAT_ERROR; 161 return 0; 162 } 163 164 /* get the options */ 165 if(valueBits!=(UTrie2ValueBits)(header->options&UTRIE2_OPTIONS_VALUE_BITS_MASK)) { 166 *pErrorCode=U_INVALID_FORMAT_ERROR; 167 return 0; 168 } 169 170 /* get the length values and offsets */ 171 uprv_memset(&tempTrie, 0, sizeof(tempTrie)); 172 tempTrie.indexLength=header->indexLength; 173 tempTrie.dataLength=header->shiftedDataLength<<UTRIE2_INDEX_SHIFT; 174 tempTrie.index2NullOffset=header->index2NullOffset; 175 tempTrie.dataNullOffset=header->dataNullOffset; 176 177 tempTrie.highStart=header->shiftedHighStart<<UTRIE2_SHIFT_1; 178 tempTrie.highValueIndex=tempTrie.dataLength-UTRIE2_DATA_GRANULARITY; 179 if(valueBits==UTRIE2_16_VALUE_BITS) { 180 tempTrie.highValueIndex+=tempTrie.indexLength; 181 } 182 183 /* calculate the actual length */ 184 actualLength=(int32_t)sizeof(UTrie2Header)+tempTrie.indexLength*2; 185 if(valueBits==UTRIE2_16_VALUE_BITS) { 186 actualLength+=tempTrie.dataLength*2; 187 } else { 188 actualLength+=tempTrie.dataLength*4; 189 } 190 if(length<actualLength) { 191 *pErrorCode=U_INVALID_FORMAT_ERROR; /* not enough bytes */ 192 return 0; 193 } 194 195 /* allocate the trie */ 196 trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2)); 197 if(trie==NULL) { 198 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 199 return 0; 200 } 201 uprv_memcpy(trie, &tempTrie, sizeof(tempTrie)); 202 trie->memory=(uint32_t *)data; 203 trie->length=actualLength; 204 trie->isMemoryOwned=FALSE; 205 206 /* set the pointers to its index and data arrays */ 207 p16=(const uint16_t *)(header+1); 208 trie->index=p16; 209 p16+=trie->indexLength; 210 211 /* get the data */ 212 switch(valueBits) { 213 case UTRIE2_16_VALUE_BITS: 214 trie->data16=p16; 215 trie->data32=NULL; 216 trie->initialValue=trie->index[trie->dataNullOffset]; 217 trie->errorValue=trie->data16[UTRIE2_BAD_UTF8_DATA_OFFSET]; 218 break; 219 case UTRIE2_32_VALUE_BITS: 220 trie->data16=NULL; 221 trie->data32=(const uint32_t *)p16; 222 trie->initialValue=trie->data32[trie->dataNullOffset]; 223 trie->errorValue=trie->data32[UTRIE2_BAD_UTF8_DATA_OFFSET]; 224 break; 225 default: 226 *pErrorCode=U_INVALID_FORMAT_ERROR; 227 return 0; 228 } 229 230 if(pActualLength!=NULL) { 231 *pActualLength=actualLength; 232 } 233 return trie; 234 } 235 236 U_CAPI UTrie2 * U_EXPORT2 237 utrie2_openDummy(UTrie2ValueBits valueBits, 238 uint32_t initialValue, uint32_t errorValue, 239 UErrorCode *pErrorCode) { 240 UTrie2 *trie; 241 UTrie2Header *header; 242 uint32_t *p; 243 uint16_t *dest16; 244 int32_t indexLength, dataLength, length, i; 245 int32_t dataMove; /* >0 if the data is moved to the end of the index array */ 246 247 if(U_FAILURE(*pErrorCode)) { 248 return 0; 249 } 250 251 if(valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits) { 252 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 253 return 0; 254 } 255 256 /* calculate the total length of the dummy trie data */ 257 indexLength=UTRIE2_INDEX_1_OFFSET; 258 dataLength=UTRIE2_DATA_START_OFFSET+UTRIE2_DATA_GRANULARITY; 259 length=(int32_t)sizeof(UTrie2Header)+indexLength*2; 260 if(valueBits==UTRIE2_16_VALUE_BITS) { 261 length+=dataLength*2; 262 } else { 263 length+=dataLength*4; 264 } 265 266 /* allocate the trie */ 267 trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2)); 268 if(trie==NULL) { 269 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 270 return 0; 271 } 272 uprv_memset(trie, 0, sizeof(UTrie2)); 273 trie->memory=uprv_malloc(length); 274 if(trie->memory==NULL) { 275 uprv_free(trie); 276 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 277 return 0; 278 } 279 trie->length=length; 280 trie->isMemoryOwned=TRUE; 281 282 /* set the UTrie2 fields */ 283 if(valueBits==UTRIE2_16_VALUE_BITS) { 284 dataMove=indexLength; 285 } else { 286 dataMove=0; 287 } 288 289 trie->indexLength=indexLength; 290 trie->dataLength=dataLength; 291 trie->index2NullOffset=UTRIE2_INDEX_2_OFFSET; 292 trie->dataNullOffset=(uint16_t)dataMove; 293 trie->initialValue=initialValue; 294 trie->errorValue=errorValue; 295 trie->highStart=0; 296 trie->highValueIndex=dataMove+UTRIE2_DATA_START_OFFSET; 297 298 /* set the header fields */ 299 header=(UTrie2Header *)trie->memory; 300 301 header->signature=UTRIE2_SIG; /* "Tri2" */ 302 header->options=(uint16_t)valueBits; 303 304 header->indexLength=(uint16_t)indexLength; 305 header->shiftedDataLength=(uint16_t)(dataLength>>UTRIE2_INDEX_SHIFT); 306 header->index2NullOffset=(uint16_t)UTRIE2_INDEX_2_OFFSET; 307 header->dataNullOffset=(uint16_t)dataMove; 308 header->shiftedHighStart=0; 309 310 /* fill the index and data arrays */ 311 dest16=(uint16_t *)(header+1); 312 trie->index=dest16; 313 314 /* write the index-2 array values shifted right by UTRIE2_INDEX_SHIFT */ 315 for(i=0; i<UTRIE2_INDEX_2_BMP_LENGTH; ++i) { 316 *dest16++=(uint16_t)(dataMove>>UTRIE2_INDEX_SHIFT); /* null data block */ 317 } 318 319 /* write UTF-8 2-byte index-2 values, not right-shifted */ 320 for(i=0; i<(0xc2-0xc0); ++i) { /* C0..C1 */ 321 *dest16++=(uint16_t)(dataMove+UTRIE2_BAD_UTF8_DATA_OFFSET); 322 } 323 for(; i<(0xe0-0xc0); ++i) { /* C2..DF */ 324 *dest16++=(uint16_t)dataMove; 325 } 326 327 /* write the 16/32-bit data array */ 328 switch(valueBits) { 329 case UTRIE2_16_VALUE_BITS: 330 /* write 16-bit data values */ 331 trie->data16=dest16; 332 trie->data32=NULL; 333 for(i=0; i<0x80; ++i) { 334 *dest16++=(uint16_t)initialValue; 335 } 336 for(; i<0xc0; ++i) { 337 *dest16++=(uint16_t)errorValue; 338 } 339 /* highValue and reserved values */ 340 for(i=0; i<UTRIE2_DATA_GRANULARITY; ++i) { 341 *dest16++=(uint16_t)initialValue; 342 } 343 break; 344 case UTRIE2_32_VALUE_BITS: 345 /* write 32-bit data values */ 346 p=(uint32_t *)dest16; 347 trie->data16=NULL; 348 trie->data32=p; 349 for(i=0; i<0x80; ++i) { 350 *p++=initialValue; 351 } 352 for(; i<0xc0; ++i) { 353 *p++=errorValue; 354 } 355 /* highValue and reserved values */ 356 for(i=0; i<UTRIE2_DATA_GRANULARITY; ++i) { 357 *p++=initialValue; 358 } 359 break; 360 default: 361 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 362 return 0; 363 } 364 365 return trie; 366 } 367 368 U_CAPI void U_EXPORT2 369 utrie2_close(UTrie2 *trie) { 370 if(trie!=NULL) { 371 if(trie->isMemoryOwned) { 372 uprv_free(trie->memory); 373 } 374 if(trie->newTrie!=NULL) { 375 uprv_free(trie->newTrie->data); 376 uprv_free(trie->newTrie); 377 } 378 uprv_free(trie); 379 } 380 } 381 382 U_CAPI int32_t U_EXPORT2 383 utrie2_getVersion(const void *data, int32_t length, UBool anyEndianOk) { 384 uint32_t signature; 385 if(length<16 || data==NULL || (U_POINTER_MASK_LSB(data, 3)!=0)) { 386 return 0; 387 } 388 signature=*(const uint32_t *)data; 389 if(signature==UTRIE2_SIG) { 390 return 2; 391 } 392 if(anyEndianOk && signature==UTRIE2_OE_SIG) { 393 return 2; 394 } 395 if(signature==UTRIE_SIG) { 396 return 1; 397 } 398 if(anyEndianOk && signature==UTRIE_OE_SIG) { 399 return 1; 400 } 401 return 0; 402 } 403 404 U_CAPI UBool U_EXPORT2 405 utrie2_isFrozen(const UTrie2 *trie) { 406 return (UBool)(trie->newTrie==NULL); 407 } 408 409 U_CAPI int32_t U_EXPORT2 410 utrie2_serialize(const UTrie2 *trie, 411 void *data, int32_t capacity, 412 UErrorCode *pErrorCode) { 413 /* argument check */ 414 if(U_FAILURE(*pErrorCode)) { 415 return 0; 416 } 417 418 if( trie==NULL || trie->memory==NULL || trie->newTrie!=NULL || 419 capacity<0 || (capacity>0 && (data==NULL || (U_POINTER_MASK_LSB(data, 3)!=0))) 420 ) { 421 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 422 return 0; 423 } 424 425 if(capacity>=trie->length) { 426 uprv_memcpy(data, trie->memory, trie->length); 427 } else { 428 *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 429 } 430 return trie->length; 431 } 432 433 U_CAPI int32_t U_EXPORT2 434 utrie2_swap(const UDataSwapper *ds, 435 const void *inData, int32_t length, void *outData, 436 UErrorCode *pErrorCode) { 437 const UTrie2Header *inTrie; 438 UTrie2Header trie; 439 int32_t dataLength, size; 440 UTrie2ValueBits valueBits; 441 442 if(U_FAILURE(*pErrorCode)) { 443 return 0; 444 } 445 if(ds==NULL || inData==NULL || (length>=0 && outData==NULL)) { 446 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 447 return 0; 448 } 449 450 /* setup and swapping */ 451 if(length>=0 && length<(int32_t)sizeof(UTrie2Header)) { 452 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 453 return 0; 454 } 455 456 inTrie=(const UTrie2Header *)inData; 457 trie.signature=ds->readUInt32(inTrie->signature); 458 trie.options=ds->readUInt16(inTrie->options); 459 trie.indexLength=ds->readUInt16(inTrie->indexLength); 460 trie.shiftedDataLength=ds->readUInt16(inTrie->shiftedDataLength); 461 462 valueBits=(UTrie2ValueBits)(trie.options&UTRIE2_OPTIONS_VALUE_BITS_MASK); 463 dataLength=(int32_t)trie.shiftedDataLength<<UTRIE2_INDEX_SHIFT; 464 465 if( trie.signature!=UTRIE2_SIG || 466 valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits || 467 trie.indexLength<UTRIE2_INDEX_1_OFFSET || 468 dataLength<UTRIE2_DATA_START_OFFSET 469 ) { 470 *pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UTrie */ 471 return 0; 472 } 473 474 size=sizeof(UTrie2Header)+trie.indexLength*2; 475 switch(valueBits) { 476 case UTRIE2_16_VALUE_BITS: 477 size+=dataLength*2; 478 break; 479 case UTRIE2_32_VALUE_BITS: 480 size+=dataLength*4; 481 break; 482 default: 483 *pErrorCode=U_INVALID_FORMAT_ERROR; 484 return 0; 485 } 486 487 if(length>=0) { 488 UTrie2Header *outTrie; 489 490 if(length<size) { 491 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 492 return 0; 493 } 494 495 outTrie=(UTrie2Header *)outData; 496 497 /* swap the header */ 498 ds->swapArray32(ds, &inTrie->signature, 4, &outTrie->signature, pErrorCode); 499 ds->swapArray16(ds, &inTrie->options, 12, &outTrie->options, pErrorCode); 500 501 /* swap the index and the data */ 502 switch(valueBits) { 503 case UTRIE2_16_VALUE_BITS: 504 ds->swapArray16(ds, inTrie+1, (trie.indexLength+dataLength)*2, outTrie+1, pErrorCode); 505 break; 506 case UTRIE2_32_VALUE_BITS: 507 ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode); 508 ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, dataLength*4, 509 (uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode); 510 break; 511 default: 512 *pErrorCode=U_INVALID_FORMAT_ERROR; 513 return 0; 514 } 515 } 516 517 return size; 518 } 519 520 // utrie2_swapAnyVersion() should be defined here but lives in utrie2_builder.c 521 // to avoid a dependency from utrie2.cpp on utrie.c. 522 523 /* enumeration -------------------------------------------------------------- */ 524 525 #define MIN_VALUE(a, b) ((a)<(b) ? (a) : (b)) 526 527 /* default UTrie2EnumValue() returns the input value itself */ 528 static uint32_t U_CALLCONV 529 enumSameValue(const void * /*context*/, uint32_t value) { 530 return value; 531 } 532 533 /** 534 * Enumerate all ranges of code points with the same relevant values. 535 * The values are transformed from the raw trie entries by the enumValue function. 536 * 537 * Currently requires start<limit and both start and limit must be multiples 538 * of UTRIE2_DATA_BLOCK_LENGTH. 539 * 540 * Optimizations: 541 * - Skip a whole block if we know that it is filled with a single value, 542 * and it is the same as we visited just before. 543 * - Handle the null block specially because we know a priori that it is filled 544 * with a single value. 545 */ 546 static void 547 enumEitherTrie(const UTrie2 *trie, 548 UChar32 start, UChar32 limit, 549 UTrie2EnumValue *enumValue, UTrie2EnumRange *enumRange, const void *context) { 550 const uint32_t *data32; 551 const uint16_t *idx; 552 553 uint32_t value, prevValue, initialValue; 554 UChar32 c, prev, highStart; 555 int32_t j, i2Block, prevI2Block, index2NullOffset, block, prevBlock, nullBlock; 556 557 if(enumRange==NULL) { 558 return; 559 } 560 if(enumValue==NULL) { 561 enumValue=enumSameValue; 562 } 563 564 if(trie->newTrie==NULL) { 565 /* frozen trie */ 566 idx=trie->index; 567 U_ASSERT(idx!=NULL); /* the following code assumes trie->newTrie is not NULL when idx is NULL */ 568 data32=trie->data32; 569 570 index2NullOffset=trie->index2NullOffset; 571 nullBlock=trie->dataNullOffset; 572 } else { 573 /* unfrozen, mutable trie */ 574 idx=NULL; 575 data32=trie->newTrie->data; 576 U_ASSERT(data32!=NULL); /* the following code assumes idx is not NULL when data32 is NULL */ 577 578 index2NullOffset=trie->newTrie->index2NullOffset; 579 nullBlock=trie->newTrie->dataNullOffset; 580 } 581 582 highStart=trie->highStart; 583 584 /* get the enumeration value that corresponds to an initial-value trie data entry */ 585 initialValue=enumValue(context, trie->initialValue); 586 587 /* set variables for previous range */ 588 prevI2Block=-1; 589 prevBlock=-1; 590 prev=start; 591 prevValue=0; 592 593 /* enumerate index-2 blocks */ 594 for(c=start; c<limit && c<highStart;) { 595 /* Code point limit for iterating inside this i2Block. */ 596 UChar32 tempLimit=c+UTRIE2_CP_PER_INDEX_1_ENTRY; 597 if(limit<tempLimit) { 598 tempLimit=limit; 599 } 600 if(c<=0xffff) { 601 if(!U_IS_SURROGATE(c)) { 602 i2Block=c>>UTRIE2_SHIFT_2; 603 } else if(U_IS_SURROGATE_LEAD(c)) { 604 /* 605 * Enumerate values for lead surrogate code points, not code units: 606 * This special block has half the normal length. 607 */ 608 i2Block=UTRIE2_LSCP_INDEX_2_OFFSET; 609 tempLimit=MIN_VALUE(0xdc00, limit); 610 } else { 611 /* 612 * Switch back to the normal part of the index-2 table. 613 * Enumerate the second half of the surrogates block. 614 */ 615 i2Block=0xd800>>UTRIE2_SHIFT_2; 616 tempLimit=MIN_VALUE(0xe000, limit); 617 } 618 } else { 619 /* supplementary code points */ 620 if(idx!=NULL) { 621 i2Block=idx[(UTRIE2_INDEX_1_OFFSET-UTRIE2_OMITTED_BMP_INDEX_1_LENGTH)+ 622 (c>>UTRIE2_SHIFT_1)]; 623 } else { 624 i2Block=trie->newTrie->index1[c>>UTRIE2_SHIFT_1]; 625 } 626 if(i2Block==prevI2Block && (c-prev)>=UTRIE2_CP_PER_INDEX_1_ENTRY) { 627 /* 628 * The index-2 block is the same as the previous one, and filled with prevValue. 629 * Only possible for supplementary code points because the linear-BMP index-2 630 * table creates unique i2Block values. 631 */ 632 c+=UTRIE2_CP_PER_INDEX_1_ENTRY; 633 continue; 634 } 635 } 636 prevI2Block=i2Block; 637 if(i2Block==index2NullOffset) { 638 /* this is the null index-2 block */ 639 if(prevValue!=initialValue) { 640 if(prev<c && !enumRange(context, prev, c-1, prevValue)) { 641 return; 642 } 643 prevBlock=nullBlock; 644 prev=c; 645 prevValue=initialValue; 646 } 647 c+=UTRIE2_CP_PER_INDEX_1_ENTRY; 648 } else { 649 /* enumerate data blocks for one index-2 block */ 650 int32_t i2, i2Limit; 651 i2=(c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK; 652 if((c>>UTRIE2_SHIFT_1)==(tempLimit>>UTRIE2_SHIFT_1)) { 653 i2Limit=(tempLimit>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK; 654 } else { 655 i2Limit=UTRIE2_INDEX_2_BLOCK_LENGTH; 656 } 657 for(; i2<i2Limit; ++i2) { 658 if(idx!=NULL) { 659 block=(int32_t)idx[i2Block+i2]<<UTRIE2_INDEX_SHIFT; 660 } else { 661 block=trie->newTrie->index2[i2Block+i2]; 662 } 663 if(block==prevBlock && (c-prev)>=UTRIE2_DATA_BLOCK_LENGTH) { 664 /* the block is the same as the previous one, and filled with prevValue */ 665 c+=UTRIE2_DATA_BLOCK_LENGTH; 666 continue; 667 } 668 prevBlock=block; 669 if(block==nullBlock) { 670 /* this is the null data block */ 671 if(prevValue!=initialValue) { 672 if(prev<c && !enumRange(context, prev, c-1, prevValue)) { 673 return; 674 } 675 prev=c; 676 prevValue=initialValue; 677 } 678 c+=UTRIE2_DATA_BLOCK_LENGTH; 679 } else { 680 for(j=0; j<UTRIE2_DATA_BLOCK_LENGTH; ++j) { 681 value=enumValue(context, data32!=NULL ? data32[block+j] : idx[block+j]); 682 if(value!=prevValue) { 683 if(prev<c && !enumRange(context, prev, c-1, prevValue)) { 684 return; 685 } 686 prev=c; 687 prevValue=value; 688 } 689 ++c; 690 } 691 } 692 } 693 } 694 } 695 696 if(c>limit) { 697 c=limit; /* could be higher if in the index2NullOffset */ 698 } else if(c<limit) { 699 /* c==highStart<limit */ 700 uint32_t highValue; 701 if(idx!=NULL) { 702 highValue= 703 data32!=NULL ? 704 data32[trie->highValueIndex] : 705 idx[trie->highValueIndex]; 706 } else { 707 highValue=trie->newTrie->data[trie->newTrie->dataLength-UTRIE2_DATA_GRANULARITY]; 708 } 709 value=enumValue(context, highValue); 710 if(value!=prevValue) { 711 if(prev<c && !enumRange(context, prev, c-1, prevValue)) { 712 return; 713 } 714 prev=c; 715 prevValue=value; 716 } 717 c=limit; 718 } 719 720 /* deliver last range */ 721 enumRange(context, prev, c-1, prevValue); 722 } 723 724 U_CAPI void U_EXPORT2 725 utrie2_enum(const UTrie2 *trie, 726 UTrie2EnumValue *enumValue, UTrie2EnumRange *enumRange, const void *context) { 727 enumEitherTrie(trie, 0, 0x110000, enumValue, enumRange, context); 728 } 729 730 U_CAPI void U_EXPORT2 731 utrie2_enumForLeadSurrogate(const UTrie2 *trie, UChar32 lead, 732 UTrie2EnumValue *enumValue, UTrie2EnumRange *enumRange, 733 const void *context) { 734 if(!U16_IS_LEAD(lead)) { 735 return; 736 } 737 lead=(lead-0xd7c0)<<10; /* start code point */ 738 enumEitherTrie(trie, lead, lead+0x400, enumValue, enumRange, context); 739 } 740 741 /* C++ convenience wrappers ------------------------------------------------- */ 742 743 U_NAMESPACE_BEGIN 744 745 uint16_t BackwardUTrie2StringIterator::previous16() { 746 codePointLimit=codePointStart; 747 if(start>=codePointStart) { 748 codePoint=U_SENTINEL; 749 return trie->errorValue; 750 } 751 uint16_t result; 752 UTRIE2_U16_PREV16(trie, start, codePointStart, codePoint, result); 753 return result; 754 } 755 756 uint16_t ForwardUTrie2StringIterator::next16() { 757 codePointStart=codePointLimit; 758 if(codePointLimit==limit) { 759 codePoint=U_SENTINEL; 760 return trie->errorValue; 761 } 762 uint16_t result; 763 UTRIE2_U16_NEXT16(trie, codePointLimit, limit, codePoint, result); 764 return result; 765 } 766 767 U_NAMESPACE_END 768