1 /* 2 ****************************************************************************** 3 * 4 * Copyright (C) 2001-2011, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ****************************************************************************** 8 * file name: utrie2.cpp 9 * encoding: US-ASCII 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 2008aug16 (starting from a copy of utrie.c) 14 * created by: Markus W. Scherer 15 * 16 * This is a common implementation of a Unicode trie. 17 * It is a kind of compressed, serializable table of 16- or 32-bit values associated with 18 * Unicode code points (0..0x10ffff). 19 * This is the second common version of a Unicode trie (hence the name UTrie2). 20 * See utrie2.h for a comparison. 21 * 22 * This file contains only the runtime and enumeration code, for read-only access. 23 * See utrie2_builder.c for the builder code. 24 */ 25 #ifdef UTRIE2_DEBUG 26 # include <stdio.h> 27 #endif 28 29 #include "unicode/utypes.h" 30 #include "unicode/utf.h" 31 #include "unicode/utf8.h" 32 #include "unicode/utf16.h" 33 #include "cmemory.h" 34 #include "utrie2.h" 35 #include "utrie2_impl.h" 36 #include "uassert.h" 37 38 /* Public UTrie2 API implementation ----------------------------------------- */ 39 40 static uint32_t 41 get32(const UNewTrie2 *trie, UChar32 c, UBool fromLSCP) { 42 int32_t i2, block; 43 44 if(c>=trie->highStart && (!U_IS_LEAD(c) || fromLSCP)) { 45 return trie->data[trie->dataLength-UTRIE2_DATA_GRANULARITY]; 46 } 47 48 if(U_IS_LEAD(c) && fromLSCP) { 49 i2=(UTRIE2_LSCP_INDEX_2_OFFSET-(0xd800>>UTRIE2_SHIFT_2))+ 50 (c>>UTRIE2_SHIFT_2); 51 } else { 52 i2=trie->index1[c>>UTRIE2_SHIFT_1]+ 53 ((c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK); 54 } 55 block=trie->index2[i2]; 56 return trie->data[block+(c&UTRIE2_DATA_MASK)]; 57 } 58 59 U_CAPI uint32_t U_EXPORT2 60 utrie2_get32(const UTrie2 *trie, UChar32 c) { 61 if(trie->data16!=NULL) { 62 return UTRIE2_GET16(trie, c); 63 } else if(trie->data32!=NULL) { 64 return UTRIE2_GET32(trie, c); 65 } else if((uint32_t)c>0x10ffff) { 66 return trie->errorValue; 67 } else { 68 return get32(trie->newTrie, c, TRUE); 69 } 70 } 71 72 U_CAPI uint32_t U_EXPORT2 73 utrie2_get32FromLeadSurrogateCodeUnit(const UTrie2 *trie, UChar32 c) { 74 if(!U_IS_LEAD(c)) { 75 return trie->errorValue; 76 } 77 if(trie->data16!=NULL) { 78 return UTRIE2_GET16_FROM_U16_SINGLE_LEAD(trie, c); 79 } else if(trie->data32!=NULL) { 80 return UTRIE2_GET32_FROM_U16_SINGLE_LEAD(trie, c); 81 } else { 82 return get32(trie->newTrie, c, FALSE); 83 } 84 } 85 86 static inline int32_t 87 u8Index(const UTrie2 *trie, UChar32 c, int32_t i) { 88 int32_t idx= 89 _UTRIE2_INDEX_FROM_CP( 90 trie, 91 trie->data32==NULL ? trie->indexLength : 0, 92 c); 93 return (idx<<3)|i; 94 } 95 96 U_CAPI int32_t U_EXPORT2 97 utrie2_internalU8NextIndex(const UTrie2 *trie, UChar32 c, 98 const uint8_t *src, const uint8_t *limit) { 99 int32_t i, length; 100 i=0; 101 /* support 64-bit pointers by avoiding cast of arbitrary difference */ 102 if((limit-src)<=7) { 103 length=(int32_t)(limit-src); 104 } else { 105 length=7; 106 } 107 c=utf8_nextCharSafeBody(src, &i, length, c, -1); 108 return u8Index(trie, c, i); 109 } 110 111 U_CAPI int32_t U_EXPORT2 112 utrie2_internalU8PrevIndex(const UTrie2 *trie, UChar32 c, 113 const uint8_t *start, const uint8_t *src) { 114 int32_t i, length; 115 /* support 64-bit pointers by avoiding cast of arbitrary difference */ 116 if((src-start)<=7) { 117 i=length=(int32_t)(src-start); 118 } else { 119 i=length=7; 120 start=src-7; 121 } 122 c=utf8_prevCharSafeBody(start, 0, &i, c, -1); 123 i=length-i; /* number of bytes read backward from src */ 124 return u8Index(trie, c, i); 125 } 126 127 U_CAPI UTrie2 * U_EXPORT2 128 utrie2_openFromSerialized(UTrie2ValueBits valueBits, 129 const void *data, int32_t length, int32_t *pActualLength, 130 UErrorCode *pErrorCode) { 131 const UTrie2Header *header; 132 const uint16_t *p16; 133 int32_t actualLength; 134 135 UTrie2 tempTrie; 136 UTrie2 *trie; 137 138 if(U_FAILURE(*pErrorCode)) { 139 return 0; 140 } 141 142 if( length<=0 || (U_POINTER_MASK_LSB(data, 3)!=0) || 143 valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits 144 ) { 145 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 146 return 0; 147 } 148 149 /* enough data for a trie header? */ 150 if(length<(int32_t)sizeof(UTrie2Header)) { 151 *pErrorCode=U_INVALID_FORMAT_ERROR; 152 return 0; 153 } 154 155 /* check the signature */ 156 header=(const UTrie2Header *)data; 157 if(header->signature!=UTRIE2_SIG) { 158 *pErrorCode=U_INVALID_FORMAT_ERROR; 159 return 0; 160 } 161 162 /* get the options */ 163 if(valueBits!=(UTrie2ValueBits)(header->options&UTRIE2_OPTIONS_VALUE_BITS_MASK)) { 164 *pErrorCode=U_INVALID_FORMAT_ERROR; 165 return 0; 166 } 167 168 /* get the length values and offsets */ 169 uprv_memset(&tempTrie, 0, sizeof(tempTrie)); 170 tempTrie.indexLength=header->indexLength; 171 tempTrie.dataLength=header->shiftedDataLength<<UTRIE2_INDEX_SHIFT; 172 tempTrie.index2NullOffset=header->index2NullOffset; 173 tempTrie.dataNullOffset=header->dataNullOffset; 174 175 tempTrie.highStart=header->shiftedHighStart<<UTRIE2_SHIFT_1; 176 tempTrie.highValueIndex=tempTrie.dataLength-UTRIE2_DATA_GRANULARITY; 177 if(valueBits==UTRIE2_16_VALUE_BITS) { 178 tempTrie.highValueIndex+=tempTrie.indexLength; 179 } 180 181 /* calculate the actual length */ 182 actualLength=(int32_t)sizeof(UTrie2Header)+tempTrie.indexLength*2; 183 if(valueBits==UTRIE2_16_VALUE_BITS) { 184 actualLength+=tempTrie.dataLength*2; 185 } else { 186 actualLength+=tempTrie.dataLength*4; 187 } 188 if(length<actualLength) { 189 *pErrorCode=U_INVALID_FORMAT_ERROR; /* not enough bytes */ 190 return 0; 191 } 192 193 /* allocate the trie */ 194 trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2)); 195 if(trie==NULL) { 196 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 197 return 0; 198 } 199 uprv_memcpy(trie, &tempTrie, sizeof(tempTrie)); 200 trie->memory=(uint32_t *)data; 201 trie->length=actualLength; 202 trie->isMemoryOwned=FALSE; 203 204 /* set the pointers to its index and data arrays */ 205 p16=(const uint16_t *)(header+1); 206 trie->index=p16; 207 p16+=trie->indexLength; 208 209 /* get the data */ 210 switch(valueBits) { 211 case UTRIE2_16_VALUE_BITS: 212 trie->data16=p16; 213 trie->data32=NULL; 214 trie->initialValue=trie->index[trie->dataNullOffset]; 215 trie->errorValue=trie->data16[UTRIE2_BAD_UTF8_DATA_OFFSET]; 216 break; 217 case UTRIE2_32_VALUE_BITS: 218 trie->data16=NULL; 219 trie->data32=(const uint32_t *)p16; 220 trie->initialValue=trie->data32[trie->dataNullOffset]; 221 trie->errorValue=trie->data32[UTRIE2_BAD_UTF8_DATA_OFFSET]; 222 break; 223 default: 224 *pErrorCode=U_INVALID_FORMAT_ERROR; 225 return 0; 226 } 227 228 if(pActualLength!=NULL) { 229 *pActualLength=actualLength; 230 } 231 return trie; 232 } 233 234 U_CAPI UTrie2 * U_EXPORT2 235 utrie2_openDummy(UTrie2ValueBits valueBits, 236 uint32_t initialValue, uint32_t errorValue, 237 UErrorCode *pErrorCode) { 238 UTrie2 *trie; 239 UTrie2Header *header; 240 uint32_t *p; 241 uint16_t *dest16; 242 int32_t indexLength, dataLength, length, i; 243 int32_t dataMove; /* >0 if the data is moved to the end of the index array */ 244 245 if(U_FAILURE(*pErrorCode)) { 246 return 0; 247 } 248 249 if(valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits) { 250 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 251 return 0; 252 } 253 254 /* calculate the total length of the dummy trie data */ 255 indexLength=UTRIE2_INDEX_1_OFFSET; 256 dataLength=UTRIE2_DATA_START_OFFSET+UTRIE2_DATA_GRANULARITY; 257 length=(int32_t)sizeof(UTrie2Header)+indexLength*2; 258 if(valueBits==UTRIE2_16_VALUE_BITS) { 259 length+=dataLength*2; 260 } else { 261 length+=dataLength*4; 262 } 263 264 /* allocate the trie */ 265 trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2)); 266 if(trie==NULL) { 267 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 268 return 0; 269 } 270 uprv_memset(trie, 0, sizeof(UTrie2)); 271 trie->memory=uprv_malloc(length); 272 if(trie->memory==NULL) { 273 uprv_free(trie); 274 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 275 return 0; 276 } 277 trie->length=length; 278 trie->isMemoryOwned=TRUE; 279 280 /* set the UTrie2 fields */ 281 if(valueBits==UTRIE2_16_VALUE_BITS) { 282 dataMove=indexLength; 283 } else { 284 dataMove=0; 285 } 286 287 trie->indexLength=indexLength; 288 trie->dataLength=dataLength; 289 trie->index2NullOffset=UTRIE2_INDEX_2_OFFSET; 290 trie->dataNullOffset=(uint16_t)dataMove; 291 trie->initialValue=initialValue; 292 trie->errorValue=errorValue; 293 trie->highStart=0; 294 trie->highValueIndex=dataMove+UTRIE2_DATA_START_OFFSET; 295 296 /* set the header fields */ 297 header=(UTrie2Header *)trie->memory; 298 299 header->signature=UTRIE2_SIG; /* "Tri2" */ 300 header->options=(uint16_t)valueBits; 301 302 header->indexLength=(uint16_t)indexLength; 303 header->shiftedDataLength=(uint16_t)(dataLength>>UTRIE2_INDEX_SHIFT); 304 header->index2NullOffset=(uint16_t)UTRIE2_INDEX_2_OFFSET; 305 header->dataNullOffset=(uint16_t)dataMove; 306 header->shiftedHighStart=0; 307 308 /* fill the index and data arrays */ 309 dest16=(uint16_t *)(header+1); 310 trie->index=dest16; 311 312 /* write the index-2 array values shifted right by UTRIE2_INDEX_SHIFT */ 313 for(i=0; i<UTRIE2_INDEX_2_BMP_LENGTH; ++i) { 314 *dest16++=(uint16_t)(dataMove>>UTRIE2_INDEX_SHIFT); /* null data block */ 315 } 316 317 /* write UTF-8 2-byte index-2 values, not right-shifted */ 318 for(i=0; i<(0xc2-0xc0); ++i) { /* C0..C1 */ 319 *dest16++=(uint16_t)(dataMove+UTRIE2_BAD_UTF8_DATA_OFFSET); 320 } 321 for(; i<(0xe0-0xc0); ++i) { /* C2..DF */ 322 *dest16++=(uint16_t)dataMove; 323 } 324 325 /* write the 16/32-bit data array */ 326 switch(valueBits) { 327 case UTRIE2_16_VALUE_BITS: 328 /* write 16-bit data values */ 329 trie->data16=dest16; 330 trie->data32=NULL; 331 for(i=0; i<0x80; ++i) { 332 *dest16++=(uint16_t)initialValue; 333 } 334 for(; i<0xc0; ++i) { 335 *dest16++=(uint16_t)errorValue; 336 } 337 /* highValue and reserved values */ 338 for(i=0; i<UTRIE2_DATA_GRANULARITY; ++i) { 339 *dest16++=(uint16_t)initialValue; 340 } 341 break; 342 case UTRIE2_32_VALUE_BITS: 343 /* write 32-bit data values */ 344 p=(uint32_t *)dest16; 345 trie->data16=NULL; 346 trie->data32=p; 347 for(i=0; i<0x80; ++i) { 348 *p++=initialValue; 349 } 350 for(; i<0xc0; ++i) { 351 *p++=errorValue; 352 } 353 /* highValue and reserved values */ 354 for(i=0; i<UTRIE2_DATA_GRANULARITY; ++i) { 355 *p++=initialValue; 356 } 357 break; 358 default: 359 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 360 return 0; 361 } 362 363 return trie; 364 } 365 366 U_CAPI void U_EXPORT2 367 utrie2_close(UTrie2 *trie) { 368 if(trie!=NULL) { 369 if(trie->isMemoryOwned) { 370 uprv_free(trie->memory); 371 } 372 if(trie->newTrie!=NULL) { 373 uprv_free(trie->newTrie->data); 374 uprv_free(trie->newTrie); 375 } 376 uprv_free(trie); 377 } 378 } 379 380 U_CAPI int32_t U_EXPORT2 381 utrie2_getVersion(const void *data, int32_t length, UBool anyEndianOk) { 382 uint32_t signature; 383 if(length<16 || data==NULL || (U_POINTER_MASK_LSB(data, 3)!=0)) { 384 return 0; 385 } 386 signature=*(const uint32_t *)data; 387 if(signature==UTRIE2_SIG) { 388 return 2; 389 } 390 if(anyEndianOk && signature==UTRIE2_OE_SIG) { 391 return 2; 392 } 393 if(signature==UTRIE_SIG) { 394 return 1; 395 } 396 if(anyEndianOk && signature==UTRIE_OE_SIG) { 397 return 1; 398 } 399 return 0; 400 } 401 402 U_CAPI int32_t U_EXPORT2 403 utrie2_swap(const UDataSwapper *ds, 404 const void *inData, int32_t length, void *outData, 405 UErrorCode *pErrorCode) { 406 const UTrie2Header *inTrie; 407 UTrie2Header trie; 408 int32_t dataLength, size; 409 UTrie2ValueBits valueBits; 410 411 if(U_FAILURE(*pErrorCode)) { 412 return 0; 413 } 414 if(ds==NULL || inData==NULL || (length>=0 && outData==NULL)) { 415 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 416 return 0; 417 } 418 419 /* setup and swapping */ 420 if(length>=0 && length<(int32_t)sizeof(UTrie2Header)) { 421 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 422 return 0; 423 } 424 425 inTrie=(const UTrie2Header *)inData; 426 trie.signature=ds->readUInt32(inTrie->signature); 427 trie.options=ds->readUInt16(inTrie->options); 428 trie.indexLength=ds->readUInt16(inTrie->indexLength); 429 trie.shiftedDataLength=ds->readUInt16(inTrie->shiftedDataLength); 430 431 valueBits=(UTrie2ValueBits)(trie.options&UTRIE2_OPTIONS_VALUE_BITS_MASK); 432 dataLength=(int32_t)trie.shiftedDataLength<<UTRIE2_INDEX_SHIFT; 433 434 if( trie.signature!=UTRIE2_SIG || 435 valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits || 436 trie.indexLength<UTRIE2_INDEX_1_OFFSET || 437 dataLength<UTRIE2_DATA_START_OFFSET 438 ) { 439 *pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UTrie */ 440 return 0; 441 } 442 443 size=sizeof(UTrie2Header)+trie.indexLength*2; 444 switch(valueBits) { 445 case UTRIE2_16_VALUE_BITS: 446 size+=dataLength*2; 447 break; 448 case UTRIE2_32_VALUE_BITS: 449 size+=dataLength*4; 450 break; 451 default: 452 *pErrorCode=U_INVALID_FORMAT_ERROR; 453 return 0; 454 } 455 456 if(length>=0) { 457 UTrie2Header *outTrie; 458 459 if(length<size) { 460 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 461 return 0; 462 } 463 464 outTrie=(UTrie2Header *)outData; 465 466 /* swap the header */ 467 ds->swapArray32(ds, &inTrie->signature, 4, &outTrie->signature, pErrorCode); 468 ds->swapArray16(ds, &inTrie->options, 12, &outTrie->options, pErrorCode); 469 470 /* swap the index and the data */ 471 switch(valueBits) { 472 case UTRIE2_16_VALUE_BITS: 473 ds->swapArray16(ds, inTrie+1, (trie.indexLength+dataLength)*2, outTrie+1, pErrorCode); 474 break; 475 case UTRIE2_32_VALUE_BITS: 476 ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode); 477 ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, dataLength*4, 478 (uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode); 479 break; 480 default: 481 *pErrorCode=U_INVALID_FORMAT_ERROR; 482 return 0; 483 } 484 } 485 486 return size; 487 } 488 489 // utrie2_swapAnyVersion() should be defined here but lives in utrie2_builder.c 490 // to avoid a dependency from utrie2.cpp on utrie.c. 491 492 /* enumeration -------------------------------------------------------------- */ 493 494 #define MIN_VALUE(a, b) ((a)<(b) ? (a) : (b)) 495 496 /* default UTrie2EnumValue() returns the input value itself */ 497 static uint32_t U_CALLCONV 498 enumSameValue(const void * /*context*/, uint32_t value) { 499 return value; 500 } 501 502 /** 503 * Enumerate all ranges of code points with the same relevant values. 504 * The values are transformed from the raw trie entries by the enumValue function. 505 * 506 * Currently requires start<limit and both start and limit must be multiples 507 * of UTRIE2_DATA_BLOCK_LENGTH. 508 * 509 * Optimizations: 510 * - Skip a whole block if we know that it is filled with a single value, 511 * and it is the same as we visited just before. 512 * - Handle the null block specially because we know a priori that it is filled 513 * with a single value. 514 */ 515 static void 516 enumEitherTrie(const UTrie2 *trie, 517 UChar32 start, UChar32 limit, 518 UTrie2EnumValue *enumValue, UTrie2EnumRange *enumRange, const void *context) { 519 const uint32_t *data32; 520 const uint16_t *idx; 521 522 uint32_t value, prevValue, initialValue; 523 UChar32 c, prev, highStart; 524 int32_t j, i2Block, prevI2Block, index2NullOffset, block, prevBlock, nullBlock; 525 526 if(enumRange==NULL) { 527 return; 528 } 529 if(enumValue==NULL) { 530 enumValue=enumSameValue; 531 } 532 533 if(trie->newTrie==NULL) { 534 /* frozen trie */ 535 idx=trie->index; 536 U_ASSERT(idx!=NULL); /* the following code assumes trie->newTrie is not NULL when idx is NULL */ 537 data32=trie->data32; 538 539 index2NullOffset=trie->index2NullOffset; 540 nullBlock=trie->dataNullOffset; 541 } else { 542 /* unfrozen, mutable trie */ 543 idx=NULL; 544 data32=trie->newTrie->data; 545 U_ASSERT(data32!=NULL); /* the following code assumes idx is not NULL when data32 is NULL */ 546 547 index2NullOffset=trie->newTrie->index2NullOffset; 548 nullBlock=trie->newTrie->dataNullOffset; 549 } 550 551 highStart=trie->highStart; 552 553 /* get the enumeration value that corresponds to an initial-value trie data entry */ 554 initialValue=enumValue(context, trie->initialValue); 555 556 /* set variables for previous range */ 557 prevI2Block=-1; 558 prevBlock=-1; 559 prev=start; 560 prevValue=0; 561 562 /* enumerate index-2 blocks */ 563 for(c=start; c<limit && c<highStart;) { 564 /* Code point limit for iterating inside this i2Block. */ 565 UChar32 tempLimit=c+UTRIE2_CP_PER_INDEX_1_ENTRY; 566 if(limit<tempLimit) { 567 tempLimit=limit; 568 } 569 if(c<=0xffff) { 570 if(!U_IS_SURROGATE(c)) { 571 i2Block=c>>UTRIE2_SHIFT_2; 572 } else if(U_IS_SURROGATE_LEAD(c)) { 573 /* 574 * Enumerate values for lead surrogate code points, not code units: 575 * This special block has half the normal length. 576 */ 577 i2Block=UTRIE2_LSCP_INDEX_2_OFFSET; 578 tempLimit=MIN_VALUE(0xdc00, limit); 579 } else { 580 /* 581 * Switch back to the normal part of the index-2 table. 582 * Enumerate the second half of the surrogates block. 583 */ 584 i2Block=0xd800>>UTRIE2_SHIFT_2; 585 tempLimit=MIN_VALUE(0xe000, limit); 586 } 587 } else { 588 /* supplementary code points */ 589 if(idx!=NULL) { 590 i2Block=idx[(UTRIE2_INDEX_1_OFFSET-UTRIE2_OMITTED_BMP_INDEX_1_LENGTH)+ 591 (c>>UTRIE2_SHIFT_1)]; 592 } else { 593 i2Block=trie->newTrie->index1[c>>UTRIE2_SHIFT_1]; 594 } 595 if(i2Block==prevI2Block && (c-prev)>=UTRIE2_CP_PER_INDEX_1_ENTRY) { 596 /* 597 * The index-2 block is the same as the previous one, and filled with prevValue. 598 * Only possible for supplementary code points because the linear-BMP index-2 599 * table creates unique i2Block values. 600 */ 601 c+=UTRIE2_CP_PER_INDEX_1_ENTRY; 602 continue; 603 } 604 } 605 prevI2Block=i2Block; 606 if(i2Block==index2NullOffset) { 607 /* this is the null index-2 block */ 608 if(prevValue!=initialValue) { 609 if(prev<c && !enumRange(context, prev, c-1, prevValue)) { 610 return; 611 } 612 prevBlock=nullBlock; 613 prev=c; 614 prevValue=initialValue; 615 } 616 c+=UTRIE2_CP_PER_INDEX_1_ENTRY; 617 } else { 618 /* enumerate data blocks for one index-2 block */ 619 int32_t i2, i2Limit; 620 i2=(c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK; 621 if((c>>UTRIE2_SHIFT_1)==(tempLimit>>UTRIE2_SHIFT_1)) { 622 i2Limit=(tempLimit>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK; 623 } else { 624 i2Limit=UTRIE2_INDEX_2_BLOCK_LENGTH; 625 } 626 for(; i2<i2Limit; ++i2) { 627 if(idx!=NULL) { 628 block=(int32_t)idx[i2Block+i2]<<UTRIE2_INDEX_SHIFT; 629 } else { 630 block=trie->newTrie->index2[i2Block+i2]; 631 } 632 if(block==prevBlock && (c-prev)>=UTRIE2_DATA_BLOCK_LENGTH) { 633 /* the block is the same as the previous one, and filled with prevValue */ 634 c+=UTRIE2_DATA_BLOCK_LENGTH; 635 continue; 636 } 637 prevBlock=block; 638 if(block==nullBlock) { 639 /* this is the null data block */ 640 if(prevValue!=initialValue) { 641 if(prev<c && !enumRange(context, prev, c-1, prevValue)) { 642 return; 643 } 644 prev=c; 645 prevValue=initialValue; 646 } 647 c+=UTRIE2_DATA_BLOCK_LENGTH; 648 } else { 649 for(j=0; j<UTRIE2_DATA_BLOCK_LENGTH; ++j) { 650 value=enumValue(context, data32!=NULL ? data32[block+j] : idx[block+j]); 651 if(value!=prevValue) { 652 if(prev<c && !enumRange(context, prev, c-1, prevValue)) { 653 return; 654 } 655 prev=c; 656 prevValue=value; 657 } 658 ++c; 659 } 660 } 661 } 662 } 663 } 664 665 if(c>limit) { 666 c=limit; /* could be higher if in the index2NullOffset */ 667 } else if(c<limit) { 668 /* c==highStart<limit */ 669 uint32_t highValue; 670 if(idx!=NULL) { 671 highValue= 672 data32!=NULL ? 673 data32[trie->highValueIndex] : 674 idx[trie->highValueIndex]; 675 } else { 676 highValue=trie->newTrie->data[trie->newTrie->dataLength-UTRIE2_DATA_GRANULARITY]; 677 } 678 value=enumValue(context, highValue); 679 if(value!=prevValue) { 680 if(prev<c && !enumRange(context, prev, c-1, prevValue)) { 681 return; 682 } 683 prev=c; 684 prevValue=value; 685 } 686 c=limit; 687 } 688 689 /* deliver last range */ 690 enumRange(context, prev, c-1, prevValue); 691 } 692 693 U_CAPI void U_EXPORT2 694 utrie2_enum(const UTrie2 *trie, 695 UTrie2EnumValue *enumValue, UTrie2EnumRange *enumRange, const void *context) { 696 enumEitherTrie(trie, 0, 0x110000, enumValue, enumRange, context); 697 } 698 699 U_CAPI void U_EXPORT2 700 utrie2_enumForLeadSurrogate(const UTrie2 *trie, UChar32 lead, 701 UTrie2EnumValue *enumValue, UTrie2EnumRange *enumRange, 702 const void *context) { 703 if(!U16_IS_LEAD(lead)) { 704 return; 705 } 706 lead=(lead-0xd7c0)<<10; /* start code point */ 707 enumEitherTrie(trie, lead, lead+0x400, enumValue, enumRange, context); 708 } 709 710 /* C++ convenience wrappers ------------------------------------------------- */ 711 712 U_NAMESPACE_BEGIN 713 714 uint16_t BackwardUTrie2StringIterator::previous16() { 715 codePointLimit=codePointStart; 716 if(start>=codePointStart) { 717 codePoint=U_SENTINEL; 718 return 0; 719 } 720 uint16_t result; 721 UTRIE2_U16_PREV16(trie, start, codePointStart, codePoint, result); 722 return result; 723 } 724 725 uint16_t ForwardUTrie2StringIterator::next16() { 726 codePointStart=codePointLimit; 727 if(codePointLimit==limit) { 728 codePoint=U_SENTINEL; 729 return 0; 730 } 731 uint16_t result; 732 UTRIE2_U16_NEXT16(trie, codePointLimit, limit, codePoint, result); 733 return result; 734 } 735 736 UTrie2 *UTrie2Singleton::getInstance(InstantiatorFn *instantiator, const void *context, 737 UErrorCode &errorCode) { 738 void *duplicate; 739 UTrie2 *instance=(UTrie2 *)singleton.getInstance(instantiator, context, duplicate, errorCode); 740 utrie2_close((UTrie2 *)duplicate); 741 return instance; 742 } 743 744 U_NAMESPACE_END 745