1 /* 2 ******************************************************************************** 3 * Copyright (C) 1996-2010, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ******************************************************************************** 6 * 7 * File UCHAR.C 8 * 9 * Modification History: 10 * 11 * Date Name Description 12 * 04/02/97 aliu Creation. 13 * 4/15/99 Madhu Updated all the function definitions for C Implementation 14 * 5/20/99 Madhu Added the function u_getVersion() 15 * 8/19/1999 srl Upgraded scripts to Unicode3.0 16 * 11/11/1999 weiv added u_isalnum(), cleaned comments 17 * 01/11/2000 helena Renamed u_getVersion to u_getUnicodeVersion. 18 * 06/20/2000 helena OS/400 port changes; mostly typecast. 19 ****************************************************************************** 20 */ 21 22 #include "unicode/utypes.h" 23 #include "unicode/uchar.h" 24 #include "unicode/uscript.h" 25 #include "unicode/udata.h" 26 #include "umutex.h" 27 #include "cmemory.h" 28 #include "ucln_cmn.h" 29 #include "utrie2.h" 30 #include "udataswp.h" 31 #include "uprops.h" 32 #include "ustr_imp.h" 33 34 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 35 36 /* uchar_props_data.c is machine-generated by genprops --csource */ 37 #include "uchar_props_data.c" 38 39 /* constants and macros for access to the data ------------------------------ */ 40 41 /* getting a uint32_t properties word from the data */ 42 #define GET_PROPS(c, result) ((result)=UTRIE2_GET16(&propsTrie, c)); 43 44 U_CFUNC UBool 45 uprv_haveProperties(UErrorCode *pErrorCode) { 46 if(U_FAILURE(*pErrorCode)) { 47 return FALSE; 48 } 49 return TRUE; 50 } 51 52 /* API functions ------------------------------------------------------------ */ 53 54 /* Gets the Unicode character's general category.*/ 55 U_CAPI int8_t U_EXPORT2 56 u_charType(UChar32 c) { 57 uint32_t props; 58 GET_PROPS(c, props); 59 return (int8_t)GET_CATEGORY(props); 60 } 61 62 /* Enumerate all code points with their general categories. */ 63 struct _EnumTypeCallback { 64 UCharEnumTypeRange *enumRange; 65 const void *context; 66 }; 67 68 static uint32_t U_CALLCONV 69 _enumTypeValue(const void *context, uint32_t value) { 70 return GET_CATEGORY(value); 71 } 72 73 static UBool U_CALLCONV 74 _enumTypeRange(const void *context, UChar32 start, UChar32 end, uint32_t value) { 75 /* just cast the value to UCharCategory */ 76 return ((struct _EnumTypeCallback *)context)-> 77 enumRange(((struct _EnumTypeCallback *)context)->context, 78 start, end+1, (UCharCategory)value); 79 } 80 81 U_CAPI void U_EXPORT2 82 u_enumCharTypes(UCharEnumTypeRange *enumRange, const void *context) { 83 struct _EnumTypeCallback callback; 84 85 if(enumRange==NULL) { 86 return; 87 } 88 89 callback.enumRange=enumRange; 90 callback.context=context; 91 utrie2_enum(&propsTrie, _enumTypeValue, _enumTypeRange, &callback); 92 } 93 94 /* Checks if ch is a lower case letter.*/ 95 U_CAPI UBool U_EXPORT2 96 u_islower(UChar32 c) { 97 uint32_t props; 98 GET_PROPS(c, props); 99 return (UBool)(GET_CATEGORY(props)==U_LOWERCASE_LETTER); 100 } 101 102 /* Checks if ch is an upper case letter.*/ 103 U_CAPI UBool U_EXPORT2 104 u_isupper(UChar32 c) { 105 uint32_t props; 106 GET_PROPS(c, props); 107 return (UBool)(GET_CATEGORY(props)==U_UPPERCASE_LETTER); 108 } 109 110 /* Checks if ch is a title case letter; usually upper case letters.*/ 111 U_CAPI UBool U_EXPORT2 112 u_istitle(UChar32 c) { 113 uint32_t props; 114 GET_PROPS(c, props); 115 return (UBool)(GET_CATEGORY(props)==U_TITLECASE_LETTER); 116 } 117 118 /* Checks if ch is a decimal digit. */ 119 U_CAPI UBool U_EXPORT2 120 u_isdigit(UChar32 c) { 121 uint32_t props; 122 GET_PROPS(c, props); 123 return (UBool)(GET_CATEGORY(props)==U_DECIMAL_DIGIT_NUMBER); 124 } 125 126 U_CAPI UBool U_EXPORT2 127 u_isxdigit(UChar32 c) { 128 uint32_t props; 129 130 /* check ASCII and Fullwidth ASCII a-fA-F */ 131 if( 132 (c<=0x66 && c>=0x41 && (c<=0x46 || c>=0x61)) || 133 (c>=0xff21 && c<=0xff46 && (c<=0xff26 || c>=0xff41)) 134 ) { 135 return TRUE; 136 } 137 138 GET_PROPS(c, props); 139 return (UBool)(GET_CATEGORY(props)==U_DECIMAL_DIGIT_NUMBER); 140 } 141 142 /* Checks if the Unicode character is a letter.*/ 143 U_CAPI UBool U_EXPORT2 144 u_isalpha(UChar32 c) { 145 uint32_t props; 146 GET_PROPS(c, props); 147 return (UBool)((CAT_MASK(props)&U_GC_L_MASK)!=0); 148 } 149 150 U_CAPI UBool U_EXPORT2 151 u_isUAlphabetic(UChar32 c) { 152 return (u_getUnicodeProperties(c, 1)&U_MASK(UPROPS_ALPHABETIC))!=0; 153 } 154 155 /* Checks if c is a letter or a decimal digit */ 156 U_CAPI UBool U_EXPORT2 157 u_isalnum(UChar32 c) { 158 uint32_t props; 159 GET_PROPS(c, props); 160 return (UBool)((CAT_MASK(props)&(U_GC_L_MASK|U_GC_ND_MASK))!=0); 161 } 162 163 /** 164 * Checks if c is alphabetic, or a decimal digit; implements UCHAR_POSIX_ALNUM. 165 * @internal 166 */ 167 U_CFUNC UBool 168 u_isalnumPOSIX(UChar32 c) { 169 return (UBool)(u_isUAlphabetic(c) || u_isdigit(c)); 170 } 171 172 /* Checks if ch is a unicode character with assigned character type.*/ 173 U_CAPI UBool U_EXPORT2 174 u_isdefined(UChar32 c) { 175 uint32_t props; 176 GET_PROPS(c, props); 177 return (UBool)(GET_CATEGORY(props)!=0); 178 } 179 180 /* Checks if the Unicode character is a base form character that can take a diacritic.*/ 181 U_CAPI UBool U_EXPORT2 182 u_isbase(UChar32 c) { 183 uint32_t props; 184 GET_PROPS(c, props); 185 return (UBool)((CAT_MASK(props)&(U_GC_L_MASK|U_GC_N_MASK|U_GC_MC_MASK|U_GC_ME_MASK))!=0); 186 } 187 188 /* Checks if the Unicode character is a control character.*/ 189 U_CAPI UBool U_EXPORT2 190 u_iscntrl(UChar32 c) { 191 uint32_t props; 192 GET_PROPS(c, props); 193 return (UBool)((CAT_MASK(props)&(U_GC_CC_MASK|U_GC_CF_MASK|U_GC_ZL_MASK|U_GC_ZP_MASK))!=0); 194 } 195 196 U_CAPI UBool U_EXPORT2 197 u_isISOControl(UChar32 c) { 198 return (uint32_t)c<=0x9f && (c<=0x1f || c>=0x7f); 199 } 200 201 /* Some control characters that are used as space. */ 202 #define IS_THAT_CONTROL_SPACE(c) \ 203 (c<=0x9f && ((c>=TAB && c<=CR) || (c>=0x1c && c <=0x1f) || c==NL)) 204 205 /* Java has decided that U+0085 New Line is not whitespace any more. */ 206 #define IS_THAT_ASCII_CONTROL_SPACE(c) \ 207 (c<=0x1f && c>=TAB && (c<=CR || c>=0x1c)) 208 209 /* Checks if the Unicode character is a space character.*/ 210 U_CAPI UBool U_EXPORT2 211 u_isspace(UChar32 c) { 212 uint32_t props; 213 GET_PROPS(c, props); 214 return (UBool)((CAT_MASK(props)&U_GC_Z_MASK)!=0 || IS_THAT_CONTROL_SPACE(c)); 215 } 216 217 U_CAPI UBool U_EXPORT2 218 u_isJavaSpaceChar(UChar32 c) { 219 uint32_t props; 220 GET_PROPS(c, props); 221 return (UBool)((CAT_MASK(props)&U_GC_Z_MASK)!=0); 222 } 223 224 /* Checks if the Unicode character is a whitespace character.*/ 225 U_CAPI UBool U_EXPORT2 226 u_isWhitespace(UChar32 c) { 227 uint32_t props; 228 GET_PROPS(c, props); 229 return (UBool)( 230 ((CAT_MASK(props)&U_GC_Z_MASK)!=0 && 231 c!=NBSP && c!=FIGURESP && c!=NNBSP) || /* exclude no-break spaces */ 232 IS_THAT_ASCII_CONTROL_SPACE(c) 233 ); 234 } 235 236 U_CAPI UBool U_EXPORT2 237 u_isblank(UChar32 c) { 238 if((uint32_t)c<=0x9f) { 239 return c==9 || c==0x20; /* TAB or SPACE */ 240 } else { 241 /* Zs */ 242 uint32_t props; 243 GET_PROPS(c, props); 244 return (UBool)(GET_CATEGORY(props)==U_SPACE_SEPARATOR); 245 } 246 } 247 248 U_CAPI UBool U_EXPORT2 249 u_isUWhiteSpace(UChar32 c) { 250 return (u_getUnicodeProperties(c, 1)&U_MASK(UPROPS_WHITE_SPACE))!=0; 251 } 252 253 /* Checks if the Unicode character is printable.*/ 254 U_CAPI UBool U_EXPORT2 255 u_isprint(UChar32 c) { 256 uint32_t props; 257 GET_PROPS(c, props); 258 /* comparing ==0 returns FALSE for the categories mentioned */ 259 return (UBool)((CAT_MASK(props)&U_GC_C_MASK)==0); 260 } 261 262 /** 263 * Checks if c is in \p{graph}\p{blank} - \p{cntrl}. 264 * Implements UCHAR_POSIX_PRINT. 265 * @internal 266 */ 267 U_CFUNC UBool 268 u_isprintPOSIX(UChar32 c) { 269 uint32_t props; 270 GET_PROPS(c, props); 271 /* 272 * The only cntrl character in graph+blank is TAB (in blank). 273 * Here we implement (blank-TAB)=Zs instead of calling u_isblank(). 274 */ 275 return (UBool)((GET_CATEGORY(props)==U_SPACE_SEPARATOR) || u_isgraphPOSIX(c)); 276 } 277 278 U_CAPI UBool U_EXPORT2 279 u_isgraph(UChar32 c) { 280 uint32_t props; 281 GET_PROPS(c, props); 282 /* comparing ==0 returns FALSE for the categories mentioned */ 283 return (UBool)((CAT_MASK(props)& 284 (U_GC_CC_MASK|U_GC_CF_MASK|U_GC_CS_MASK|U_GC_CN_MASK|U_GC_Z_MASK)) 285 ==0); 286 } 287 288 /** 289 * Checks if c is in 290 * [^\p{space}\p{gc=Control}\p{gc=Surrogate}\p{gc=Unassigned}] 291 * with space=\p{Whitespace} and Control=Cc. 292 * Implements UCHAR_POSIX_GRAPH. 293 * @internal 294 */ 295 U_CFUNC UBool 296 u_isgraphPOSIX(UChar32 c) { 297 uint32_t props; 298 GET_PROPS(c, props); 299 /* \p{space}\p{gc=Control} == \p{gc=Z}\p{Control} */ 300 /* comparing ==0 returns FALSE for the categories mentioned */ 301 return (UBool)((CAT_MASK(props)& 302 (U_GC_CC_MASK|U_GC_CS_MASK|U_GC_CN_MASK|U_GC_Z_MASK)) 303 ==0); 304 } 305 306 U_CAPI UBool U_EXPORT2 307 u_ispunct(UChar32 c) { 308 uint32_t props; 309 GET_PROPS(c, props); 310 return (UBool)((CAT_MASK(props)&U_GC_P_MASK)!=0); 311 } 312 313 /* Checks if the Unicode character can start a Unicode identifier.*/ 314 U_CAPI UBool U_EXPORT2 315 u_isIDStart(UChar32 c) { 316 /* same as u_isalpha() */ 317 uint32_t props; 318 GET_PROPS(c, props); 319 return (UBool)((CAT_MASK(props)&(U_GC_L_MASK|U_GC_NL_MASK))!=0); 320 } 321 322 /* Checks if the Unicode character can be a Unicode identifier part other than starting the 323 identifier.*/ 324 U_CAPI UBool U_EXPORT2 325 u_isIDPart(UChar32 c) { 326 uint32_t props; 327 GET_PROPS(c, props); 328 return (UBool)( 329 (CAT_MASK(props)& 330 (U_GC_ND_MASK|U_GC_NL_MASK| 331 U_GC_L_MASK| 332 U_GC_PC_MASK|U_GC_MC_MASK|U_GC_MN_MASK) 333 )!=0 || 334 u_isIDIgnorable(c)); 335 } 336 337 /*Checks if the Unicode character can be ignorable in a Java or Unicode identifier.*/ 338 U_CAPI UBool U_EXPORT2 339 u_isIDIgnorable(UChar32 c) { 340 if(c<=0x9f) { 341 return u_isISOControl(c) && !IS_THAT_ASCII_CONTROL_SPACE(c); 342 } else { 343 uint32_t props; 344 GET_PROPS(c, props); 345 return (UBool)(GET_CATEGORY(props)==U_FORMAT_CHAR); 346 } 347 } 348 349 /*Checks if the Unicode character can start a Java identifier.*/ 350 U_CAPI UBool U_EXPORT2 351 u_isJavaIDStart(UChar32 c) { 352 uint32_t props; 353 GET_PROPS(c, props); 354 return (UBool)((CAT_MASK(props)&(U_GC_L_MASK|U_GC_SC_MASK|U_GC_PC_MASK))!=0); 355 } 356 357 /*Checks if the Unicode character can be a Java identifier part other than starting the 358 * identifier. 359 */ 360 U_CAPI UBool U_EXPORT2 361 u_isJavaIDPart(UChar32 c) { 362 uint32_t props; 363 GET_PROPS(c, props); 364 return (UBool)( 365 (CAT_MASK(props)& 366 (U_GC_ND_MASK|U_GC_NL_MASK| 367 U_GC_L_MASK| 368 U_GC_SC_MASK|U_GC_PC_MASK| 369 U_GC_MC_MASK|U_GC_MN_MASK) 370 )!=0 || 371 u_isIDIgnorable(c)); 372 } 373 374 U_CAPI int32_t U_EXPORT2 375 u_charDigitValue(UChar32 c) { 376 uint32_t props; 377 int32_t value; 378 GET_PROPS(c, props); 379 value=(int32_t)GET_NUMERIC_TYPE_VALUE(props)-UPROPS_NTV_DECIMAL_START; 380 if(value<=9) { 381 return value; 382 } else { 383 return -1; 384 } 385 } 386 387 U_CAPI double U_EXPORT2 388 u_getNumericValue(UChar32 c) { 389 uint32_t props; 390 int32_t ntv; 391 GET_PROPS(c, props); 392 ntv=(int32_t)GET_NUMERIC_TYPE_VALUE(props); 393 394 if(ntv==UPROPS_NTV_NONE) { 395 return U_NO_NUMERIC_VALUE; 396 } else if(ntv<UPROPS_NTV_DIGIT_START) { 397 /* decimal digit */ 398 return ntv-UPROPS_NTV_DECIMAL_START; 399 } else if(ntv<UPROPS_NTV_NUMERIC_START) { 400 /* other digit */ 401 return ntv-UPROPS_NTV_DIGIT_START; 402 } else if(ntv<UPROPS_NTV_FRACTION_START) { 403 /* small integer */ 404 return ntv-UPROPS_NTV_NUMERIC_START; 405 } else if(ntv<UPROPS_NTV_LARGE_START) { 406 /* fraction */ 407 int32_t numerator=(ntv>>4)-12; 408 int32_t denominator=(ntv&0xf)+1; 409 return (double)numerator/denominator; 410 } else if(ntv<UPROPS_NTV_RESERVED_START) { 411 /* large, single-significant-digit integer */ 412 double numValue; 413 int32_t mant=(ntv>>5)-14; 414 int32_t exp=(ntv&0x1f)+2; 415 numValue=mant; 416 417 /* multiply by 10^exp without math.h */ 418 while(exp>=4) { 419 numValue*=10000.; 420 exp-=4; 421 } 422 switch(exp) { 423 case 3: 424 numValue*=1000.; 425 break; 426 case 2: 427 numValue*=100.; 428 break; 429 case 1: 430 numValue*=10.; 431 break; 432 case 0: 433 default: 434 break; 435 } 436 437 return numValue; 438 } else { 439 /* reserved */ 440 return U_NO_NUMERIC_VALUE; 441 } 442 } 443 444 U_CAPI int32_t U_EXPORT2 445 u_digit(UChar32 ch, int8_t radix) { 446 int8_t value; 447 if((uint8_t)(radix-2)<=(36-2)) { 448 value=(int8_t)u_charDigitValue(ch); 449 if(value<0) { 450 /* ch is not a decimal digit, try latin letters */ 451 if(ch>=0x61 && ch<=0x7A) { 452 value=(int8_t)(ch-0x57); /* ch - 'a' + 10 */ 453 } else if(ch>=0x41 && ch<=0x5A) { 454 value=(int8_t)(ch-0x37); /* ch - 'A' + 10 */ 455 } else if(ch>=0xFF41 && ch<=0xFF5A) { 456 value=(int8_t)(ch-0xFF37); /* fullwidth ASCII a-z */ 457 } else if(ch>=0xFF21 && ch<=0xFF3A) { 458 value=(int8_t)(ch-0xFF17); /* fullwidth ASCII A-Z */ 459 } 460 } 461 } else { 462 value=-1; /* invalid radix */ 463 } 464 return (int8_t)((value<radix) ? value : -1); 465 } 466 467 U_CAPI UChar32 U_EXPORT2 468 u_forDigit(int32_t digit, int8_t radix) { 469 if((uint8_t)(radix-2)>(36-2) || (uint32_t)digit>=(uint32_t)radix) { 470 return 0; 471 } else if(digit<10) { 472 return (UChar32)(0x30+digit); 473 } else { 474 return (UChar32)((0x61-10)+digit); 475 } 476 } 477 478 /* miscellaneous, and support for uprops.c ---------------------------------- */ 479 480 U_CAPI void U_EXPORT2 481 u_getUnicodeVersion(UVersionInfo versionArray) { 482 if(versionArray!=NULL) { 483 uprv_memcpy(versionArray, dataVersion, U_MAX_VERSION_LENGTH); 484 } 485 } 486 487 U_CFUNC uint32_t 488 u_getUnicodeProperties(UChar32 c, int32_t column) { 489 uint16_t vecIndex; 490 491 if(column==-1) { 492 uint32_t props; 493 GET_PROPS(c, props); 494 return props; 495 } else if( 496 column<0 || column>=propsVectorsColumns 497 ) { 498 return 0; 499 } else { 500 vecIndex=UTRIE2_GET16(&propsVectorsTrie, c); 501 return propsVectors[vecIndex+column]; 502 } 503 } 504 505 U_CFUNC int32_t 506 uprv_getMaxValues(int32_t column) { 507 switch(column) { 508 case 0: 509 return indexes[UPROPS_MAX_VALUES_INDEX]; 510 case 2: 511 return indexes[UPROPS_MAX_VALUES_2_INDEX]; 512 default: 513 return 0; 514 } 515 } 516 517 U_CAPI void U_EXPORT2 518 u_charAge(UChar32 c, UVersionInfo versionArray) { 519 if(versionArray!=NULL) { 520 uint32_t version=u_getUnicodeProperties(c, 0)>>UPROPS_AGE_SHIFT; 521 versionArray[0]=(uint8_t)(version>>4); 522 versionArray[1]=(uint8_t)(version&0xf); 523 versionArray[2]=versionArray[3]=0; 524 } 525 } 526 527 U_CAPI UScriptCode U_EXPORT2 528 uscript_getScript(UChar32 c, UErrorCode *pErrorCode) { 529 uint32_t scriptX; 530 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 531 return USCRIPT_INVALID_CODE; 532 } 533 if((uint32_t)c>0x10ffff) { 534 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 535 return USCRIPT_INVALID_CODE; 536 } 537 scriptX=u_getUnicodeProperties(c, 0)&UPROPS_SCRIPT_X_MASK; 538 if(scriptX<UPROPS_SCRIPT_X_WITH_COMMON) { 539 return (UScriptCode)scriptX; 540 } else if(scriptX<UPROPS_SCRIPT_X_WITH_INHERITED) { 541 return USCRIPT_COMMON; 542 } else if(scriptX<UPROPS_SCRIPT_X_WITH_OTHER) { 543 return USCRIPT_INHERITED; 544 } else { 545 return (UScriptCode)scriptExtensions[scriptX&UPROPS_SCRIPT_MASK]; 546 } 547 } 548 549 U_DRAFT UBool U_EXPORT2 550 uscript_hasScript(UChar32 c, UScriptCode sc) { 551 UScriptCode script; 552 const uint16_t *scx; 553 uint32_t scriptX=u_getUnicodeProperties(c, 0)&UPROPS_SCRIPT_X_MASK; 554 if(scriptX<UPROPS_SCRIPT_X_WITH_COMMON) { 555 return sc==(UScriptCode)scriptX; 556 } 557 558 scx=scriptExtensions+(scriptX&UPROPS_SCRIPT_MASK); 559 if(scriptX<UPROPS_SCRIPT_X_WITH_INHERITED) { 560 script=USCRIPT_COMMON; 561 } else if(scriptX<UPROPS_SCRIPT_X_WITH_OTHER) { 562 script=USCRIPT_INHERITED; 563 } else { 564 script=(UScriptCode)scx[0]; 565 scx=scriptExtensions+scx[1]; 566 } 567 if(sc==script) { 568 return TRUE; 569 } 570 while(sc>*scx) { 571 ++scx; 572 } 573 return sc==(*scx&0x7fff); 574 } 575 576 U_DRAFT int32_t U_EXPORT2 577 uscript_getScriptExtensions(UChar32 c, 578 UScriptCode *scripts, int32_t capacity, 579 UErrorCode *pErrorCode) { 580 uint32_t scriptX; 581 int32_t length; 582 const uint16_t *scx; 583 uint16_t sx; 584 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 585 return 0; 586 } 587 if(capacity<0 || (capacity>0 && scripts==NULL)) { 588 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 589 return 0; 590 } 591 scriptX=u_getUnicodeProperties(c, 0)&UPROPS_SCRIPT_X_MASK; 592 if(scriptX<UPROPS_SCRIPT_X_WITH_COMMON) { 593 return 0; 594 } 595 596 length=0; 597 scx=scriptExtensions+(scriptX&UPROPS_SCRIPT_MASK); 598 if(scriptX>=UPROPS_SCRIPT_X_WITH_OTHER) { 599 scx=scriptExtensions+scx[1]; 600 } 601 do { 602 sx=*scx++; 603 if(length<capacity) { 604 scripts[length]=sx&0x7fff; 605 } 606 ++length; 607 } while(sx<0x8000); 608 if(length>capacity) { 609 *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 610 } 611 return length; 612 } 613 614 U_CAPI UBlockCode U_EXPORT2 615 ublock_getCode(UChar32 c) { 616 return (UBlockCode)((u_getUnicodeProperties(c, 0)&UPROPS_BLOCK_MASK)>>UPROPS_BLOCK_SHIFT); 617 } 618 619 /* property starts for UnicodeSet ------------------------------------------- */ 620 621 static UBool U_CALLCONV 622 _enumPropertyStartsRange(const void *context, UChar32 start, UChar32 end, uint32_t value) { 623 /* add the start code point to the USet */ 624 const USetAdder *sa=(const USetAdder *)context; 625 sa->add(sa->set, start); 626 return TRUE; 627 } 628 629 #define USET_ADD_CP_AND_NEXT(sa, cp) sa->add(sa->set, cp); sa->add(sa->set, cp+1) 630 631 U_CFUNC void U_EXPORT2 632 uchar_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) { 633 if(U_FAILURE(*pErrorCode)) { 634 return; 635 } 636 637 /* add the start code point of each same-value range of the main trie */ 638 utrie2_enum(&propsTrie, NULL, _enumPropertyStartsRange, sa); 639 640 /* add code points with hardcoded properties, plus the ones following them */ 641 642 /* add for u_isblank() */ 643 USET_ADD_CP_AND_NEXT(sa, TAB); 644 645 /* add for IS_THAT_CONTROL_SPACE() */ 646 sa->add(sa->set, CR+1); /* range TAB..CR */ 647 sa->add(sa->set, 0x1c); 648 sa->add(sa->set, 0x1f+1); 649 USET_ADD_CP_AND_NEXT(sa, NL); 650 651 /* add for u_isIDIgnorable() what was not added above */ 652 sa->add(sa->set, DEL); /* range DEL..NBSP-1, NBSP added below */ 653 sa->add(sa->set, HAIRSP); 654 sa->add(sa->set, RLM+1); 655 sa->add(sa->set, INHSWAP); 656 sa->add(sa->set, NOMDIG+1); 657 USET_ADD_CP_AND_NEXT(sa, ZWNBSP); 658 659 /* add no-break spaces for u_isWhitespace() what was not added above */ 660 USET_ADD_CP_AND_NEXT(sa, NBSP); 661 USET_ADD_CP_AND_NEXT(sa, FIGURESP); 662 USET_ADD_CP_AND_NEXT(sa, NNBSP); 663 664 /* add for u_digit() */ 665 sa->add(sa->set, U_a); 666 sa->add(sa->set, U_z+1); 667 sa->add(sa->set, U_A); 668 sa->add(sa->set, U_Z+1); 669 sa->add(sa->set, U_FW_a); 670 sa->add(sa->set, U_FW_z+1); 671 sa->add(sa->set, U_FW_A); 672 sa->add(sa->set, U_FW_Z+1); 673 674 /* add for u_isxdigit() */ 675 sa->add(sa->set, U_f+1); 676 sa->add(sa->set, U_F+1); 677 sa->add(sa->set, U_FW_f+1); 678 sa->add(sa->set, U_FW_F+1); 679 680 /* add for UCHAR_DEFAULT_IGNORABLE_CODE_POINT what was not added above */ 681 sa->add(sa->set, WJ); /* range WJ..NOMDIG */ 682 sa->add(sa->set, 0xfff0); 683 sa->add(sa->set, 0xfffb+1); 684 sa->add(sa->set, 0xe0000); 685 sa->add(sa->set, 0xe0fff+1); 686 687 /* add for UCHAR_GRAPHEME_BASE and others */ 688 USET_ADD_CP_AND_NEXT(sa, CGJ); 689 } 690 691 U_CFUNC void U_EXPORT2 692 upropsvec_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) { 693 if(U_FAILURE(*pErrorCode)) { 694 return; 695 } 696 697 /* add the start code point of each same-value range of the properties vectors trie */ 698 if(propsVectorsColumns>0) { 699 /* if propsVectorsColumns==0 then the properties vectors trie may not be there at all */ 700 utrie2_enum(&propsVectorsTrie, NULL, _enumPropertyStartsRange, sa); 701 } 702 } 703