1 /* 2 ******************************************************************************** 3 * Copyright (C) 1996-2014, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ******************************************************************************** 6 * 7 * File UCHAR.C 8 * 9 * Modification History: 10 * 11 * Date Name Description 12 * 04/02/97 aliu Creation. 13 * 4/15/99 Madhu Updated all the function definitions for C Implementation 14 * 5/20/99 Madhu Added the function u_getVersion() 15 * 8/19/1999 srl Upgraded scripts to Unicode3.0 16 * 11/11/1999 weiv added u_isalnum(), cleaned comments 17 * 01/11/2000 helena Renamed u_getVersion to u_getUnicodeVersion. 18 * 06/20/2000 helena OS/400 port changes; mostly typecast. 19 ****************************************************************************** 20 */ 21 22 #include "unicode/utypes.h" 23 #include "unicode/uchar.h" 24 #include "unicode/uscript.h" 25 #include "unicode/udata.h" 26 #include "uassert.h" 27 #include "cmemory.h" 28 #include "ucln_cmn.h" 29 #include "utrie2.h" 30 #include "udataswp.h" 31 #include "uprops.h" 32 #include "ustr_imp.h" 33 34 /* uchar_props_data.h is machine-generated by genprops --csource */ 35 #define INCLUDED_FROM_UCHAR_C 36 #include "uchar_props_data.h" 37 38 /* constants and macros for access to the data ------------------------------ */ 39 40 /* getting a uint32_t properties word from the data */ 41 #define GET_PROPS(c, result) ((result)=UTRIE2_GET16(&propsTrie, c)); 42 43 U_CFUNC UBool 44 uprv_haveProperties(UErrorCode *pErrorCode) { 45 if(U_FAILURE(*pErrorCode)) { 46 return FALSE; 47 } 48 return TRUE; 49 } 50 51 /* API functions ------------------------------------------------------------ */ 52 53 /* Gets the Unicode character's general category.*/ 54 U_CAPI int8_t U_EXPORT2 55 u_charType(UChar32 c) { 56 uint32_t props; 57 GET_PROPS(c, props); 58 return (int8_t)GET_CATEGORY(props); 59 } 60 61 /* Enumerate all code points with their general categories. */ 62 struct _EnumTypeCallback { 63 UCharEnumTypeRange *enumRange; 64 const void *context; 65 }; 66 67 static uint32_t U_CALLCONV 68 _enumTypeValue(const void *context, uint32_t value) { 69 return GET_CATEGORY(value); 70 } 71 72 static UBool U_CALLCONV 73 _enumTypeRange(const void *context, UChar32 start, UChar32 end, uint32_t value) { 74 /* just cast the value to UCharCategory */ 75 return ((struct _EnumTypeCallback *)context)-> 76 enumRange(((struct _EnumTypeCallback *)context)->context, 77 start, end+1, (UCharCategory)value); 78 } 79 80 U_CAPI void U_EXPORT2 81 u_enumCharTypes(UCharEnumTypeRange *enumRange, const void *context) { 82 struct _EnumTypeCallback callback; 83 84 if(enumRange==NULL) { 85 return; 86 } 87 88 callback.enumRange=enumRange; 89 callback.context=context; 90 utrie2_enum(&propsTrie, _enumTypeValue, _enumTypeRange, &callback); 91 } 92 93 /* Checks if ch is a lower case letter.*/ 94 U_CAPI UBool U_EXPORT2 95 u_islower(UChar32 c) { 96 uint32_t props; 97 GET_PROPS(c, props); 98 return (UBool)(GET_CATEGORY(props)==U_LOWERCASE_LETTER); 99 } 100 101 /* Checks if ch is an upper case letter.*/ 102 U_CAPI UBool U_EXPORT2 103 u_isupper(UChar32 c) { 104 uint32_t props; 105 GET_PROPS(c, props); 106 return (UBool)(GET_CATEGORY(props)==U_UPPERCASE_LETTER); 107 } 108 109 /* Checks if ch is a title case letter; usually upper case letters.*/ 110 U_CAPI UBool U_EXPORT2 111 u_istitle(UChar32 c) { 112 uint32_t props; 113 GET_PROPS(c, props); 114 return (UBool)(GET_CATEGORY(props)==U_TITLECASE_LETTER); 115 } 116 117 /* Checks if ch is a decimal digit. */ 118 U_CAPI UBool U_EXPORT2 119 u_isdigit(UChar32 c) { 120 uint32_t props; 121 GET_PROPS(c, props); 122 return (UBool)(GET_CATEGORY(props)==U_DECIMAL_DIGIT_NUMBER); 123 } 124 125 U_CAPI UBool U_EXPORT2 126 u_isxdigit(UChar32 c) { 127 uint32_t props; 128 129 /* check ASCII and Fullwidth ASCII a-fA-F */ 130 if( 131 (c<=0x66 && c>=0x41 && (c<=0x46 || c>=0x61)) || 132 (c>=0xff21 && c<=0xff46 && (c<=0xff26 || c>=0xff41)) 133 ) { 134 return TRUE; 135 } 136 137 GET_PROPS(c, props); 138 return (UBool)(GET_CATEGORY(props)==U_DECIMAL_DIGIT_NUMBER); 139 } 140 141 /* Checks if the Unicode character is a letter.*/ 142 U_CAPI UBool U_EXPORT2 143 u_isalpha(UChar32 c) { 144 uint32_t props; 145 GET_PROPS(c, props); 146 return (UBool)((CAT_MASK(props)&U_GC_L_MASK)!=0); 147 } 148 149 U_CAPI UBool U_EXPORT2 150 u_isUAlphabetic(UChar32 c) { 151 return (u_getUnicodeProperties(c, 1)&U_MASK(UPROPS_ALPHABETIC))!=0; 152 } 153 154 /* Checks if c is a letter or a decimal digit */ 155 U_CAPI UBool U_EXPORT2 156 u_isalnum(UChar32 c) { 157 uint32_t props; 158 GET_PROPS(c, props); 159 return (UBool)((CAT_MASK(props)&(U_GC_L_MASK|U_GC_ND_MASK))!=0); 160 } 161 162 /** 163 * Checks if c is alphabetic, or a decimal digit; implements UCHAR_POSIX_ALNUM. 164 * @internal 165 */ 166 U_CFUNC UBool 167 u_isalnumPOSIX(UChar32 c) { 168 return (UBool)(u_isUAlphabetic(c) || u_isdigit(c)); 169 } 170 171 /* Checks if ch is a unicode character with assigned character type.*/ 172 U_CAPI UBool U_EXPORT2 173 u_isdefined(UChar32 c) { 174 uint32_t props; 175 GET_PROPS(c, props); 176 return (UBool)(GET_CATEGORY(props)!=0); 177 } 178 179 /* Checks if the Unicode character is a base form character that can take a diacritic.*/ 180 U_CAPI UBool U_EXPORT2 181 u_isbase(UChar32 c) { 182 uint32_t props; 183 GET_PROPS(c, props); 184 return (UBool)((CAT_MASK(props)&(U_GC_L_MASK|U_GC_N_MASK|U_GC_MC_MASK|U_GC_ME_MASK))!=0); 185 } 186 187 /* Checks if the Unicode character is a control character.*/ 188 U_CAPI UBool U_EXPORT2 189 u_iscntrl(UChar32 c) { 190 uint32_t props; 191 GET_PROPS(c, props); 192 return (UBool)((CAT_MASK(props)&(U_GC_CC_MASK|U_GC_CF_MASK|U_GC_ZL_MASK|U_GC_ZP_MASK))!=0); 193 } 194 195 U_CAPI UBool U_EXPORT2 196 u_isISOControl(UChar32 c) { 197 return (uint32_t)c<=0x9f && (c<=0x1f || c>=0x7f); 198 } 199 200 /* Some control characters that are used as space. */ 201 #define IS_THAT_CONTROL_SPACE(c) \ 202 (c<=0x9f && ((c>=TAB && c<=CR) || (c>=0x1c && c <=0x1f) || c==NL)) 203 204 /* Java has decided that U+0085 New Line is not whitespace any more. */ 205 #define IS_THAT_ASCII_CONTROL_SPACE(c) \ 206 (c<=0x1f && c>=TAB && (c<=CR || c>=0x1c)) 207 208 /* Checks if the Unicode character is a space character.*/ 209 U_CAPI UBool U_EXPORT2 210 u_isspace(UChar32 c) { 211 uint32_t props; 212 GET_PROPS(c, props); 213 return (UBool)((CAT_MASK(props)&U_GC_Z_MASK)!=0 || IS_THAT_CONTROL_SPACE(c)); 214 } 215 216 U_CAPI UBool U_EXPORT2 217 u_isJavaSpaceChar(UChar32 c) { 218 uint32_t props; 219 GET_PROPS(c, props); 220 return (UBool)((CAT_MASK(props)&U_GC_Z_MASK)!=0); 221 } 222 223 /* Checks if the Unicode character is a whitespace character.*/ 224 U_CAPI UBool U_EXPORT2 225 u_isWhitespace(UChar32 c) { 226 uint32_t props; 227 GET_PROPS(c, props); 228 return (UBool)( 229 ((CAT_MASK(props)&U_GC_Z_MASK)!=0 && 230 c!=NBSP && c!=FIGURESP && c!=NNBSP) || /* exclude no-break spaces */ 231 IS_THAT_ASCII_CONTROL_SPACE(c) 232 ); 233 } 234 235 U_CAPI UBool U_EXPORT2 236 u_isblank(UChar32 c) { 237 if((uint32_t)c<=0x9f) { 238 return c==9 || c==0x20; /* TAB or SPACE */ 239 } else { 240 /* Zs */ 241 uint32_t props; 242 GET_PROPS(c, props); 243 return (UBool)(GET_CATEGORY(props)==U_SPACE_SEPARATOR); 244 } 245 } 246 247 U_CAPI UBool U_EXPORT2 248 u_isUWhiteSpace(UChar32 c) { 249 return (u_getUnicodeProperties(c, 1)&U_MASK(UPROPS_WHITE_SPACE))!=0; 250 } 251 252 /* Checks if the Unicode character is printable.*/ 253 U_CAPI UBool U_EXPORT2 254 u_isprint(UChar32 c) { 255 uint32_t props; 256 GET_PROPS(c, props); 257 /* comparing ==0 returns FALSE for the categories mentioned */ 258 return (UBool)((CAT_MASK(props)&U_GC_C_MASK)==0); 259 } 260 261 /** 262 * Checks if c is in \p{graph}\p{blank} - \p{cntrl}. 263 * Implements UCHAR_POSIX_PRINT. 264 * @internal 265 */ 266 U_CFUNC UBool 267 u_isprintPOSIX(UChar32 c) { 268 uint32_t props; 269 GET_PROPS(c, props); 270 /* 271 * The only cntrl character in graph+blank is TAB (in blank). 272 * Here we implement (blank-TAB)=Zs instead of calling u_isblank(). 273 */ 274 return (UBool)((GET_CATEGORY(props)==U_SPACE_SEPARATOR) || u_isgraphPOSIX(c)); 275 } 276 277 U_CAPI UBool U_EXPORT2 278 u_isgraph(UChar32 c) { 279 uint32_t props; 280 GET_PROPS(c, props); 281 /* comparing ==0 returns FALSE for the categories mentioned */ 282 return (UBool)((CAT_MASK(props)& 283 (U_GC_CC_MASK|U_GC_CF_MASK|U_GC_CS_MASK|U_GC_CN_MASK|U_GC_Z_MASK)) 284 ==0); 285 } 286 287 /** 288 * Checks if c is in 289 * [^\p{space}\p{gc=Control}\p{gc=Surrogate}\p{gc=Unassigned}] 290 * with space=\p{Whitespace} and Control=Cc. 291 * Implements UCHAR_POSIX_GRAPH. 292 * @internal 293 */ 294 U_CFUNC UBool 295 u_isgraphPOSIX(UChar32 c) { 296 uint32_t props; 297 GET_PROPS(c, props); 298 /* \p{space}\p{gc=Control} == \p{gc=Z}\p{Control} */ 299 /* comparing ==0 returns FALSE for the categories mentioned */ 300 return (UBool)((CAT_MASK(props)& 301 (U_GC_CC_MASK|U_GC_CS_MASK|U_GC_CN_MASK|U_GC_Z_MASK)) 302 ==0); 303 } 304 305 U_CAPI UBool U_EXPORT2 306 u_ispunct(UChar32 c) { 307 uint32_t props; 308 GET_PROPS(c, props); 309 return (UBool)((CAT_MASK(props)&U_GC_P_MASK)!=0); 310 } 311 312 /* Checks if the Unicode character can start a Unicode identifier.*/ 313 U_CAPI UBool U_EXPORT2 314 u_isIDStart(UChar32 c) { 315 /* same as u_isalpha() */ 316 uint32_t props; 317 GET_PROPS(c, props); 318 return (UBool)((CAT_MASK(props)&(U_GC_L_MASK|U_GC_NL_MASK))!=0); 319 } 320 321 /* Checks if the Unicode character can be a Unicode identifier part other than starting the 322 identifier.*/ 323 U_CAPI UBool U_EXPORT2 324 u_isIDPart(UChar32 c) { 325 uint32_t props; 326 GET_PROPS(c, props); 327 return (UBool)( 328 (CAT_MASK(props)& 329 (U_GC_ND_MASK|U_GC_NL_MASK| 330 U_GC_L_MASK| 331 U_GC_PC_MASK|U_GC_MC_MASK|U_GC_MN_MASK) 332 )!=0 || 333 u_isIDIgnorable(c)); 334 } 335 336 /*Checks if the Unicode character can be ignorable in a Java or Unicode identifier.*/ 337 U_CAPI UBool U_EXPORT2 338 u_isIDIgnorable(UChar32 c) { 339 if(c<=0x9f) { 340 return u_isISOControl(c) && !IS_THAT_ASCII_CONTROL_SPACE(c); 341 } else { 342 uint32_t props; 343 GET_PROPS(c, props); 344 return (UBool)(GET_CATEGORY(props)==U_FORMAT_CHAR); 345 } 346 } 347 348 /*Checks if the Unicode character can start a Java identifier.*/ 349 U_CAPI UBool U_EXPORT2 350 u_isJavaIDStart(UChar32 c) { 351 uint32_t props; 352 GET_PROPS(c, props); 353 return (UBool)((CAT_MASK(props)&(U_GC_L_MASK|U_GC_SC_MASK|U_GC_PC_MASK))!=0); 354 } 355 356 /*Checks if the Unicode character can be a Java identifier part other than starting the 357 * identifier. 358 */ 359 U_CAPI UBool U_EXPORT2 360 u_isJavaIDPart(UChar32 c) { 361 uint32_t props; 362 GET_PROPS(c, props); 363 return (UBool)( 364 (CAT_MASK(props)& 365 (U_GC_ND_MASK|U_GC_NL_MASK| 366 U_GC_L_MASK| 367 U_GC_SC_MASK|U_GC_PC_MASK| 368 U_GC_MC_MASK|U_GC_MN_MASK) 369 )!=0 || 370 u_isIDIgnorable(c)); 371 } 372 373 U_CAPI int32_t U_EXPORT2 374 u_charDigitValue(UChar32 c) { 375 uint32_t props; 376 int32_t value; 377 GET_PROPS(c, props); 378 value=(int32_t)GET_NUMERIC_TYPE_VALUE(props)-UPROPS_NTV_DECIMAL_START; 379 if(value<=9) { 380 return value; 381 } else { 382 return -1; 383 } 384 } 385 386 U_CAPI double U_EXPORT2 387 u_getNumericValue(UChar32 c) { 388 uint32_t props; 389 int32_t ntv; 390 GET_PROPS(c, props); 391 ntv=(int32_t)GET_NUMERIC_TYPE_VALUE(props); 392 393 if(ntv==UPROPS_NTV_NONE) { 394 return U_NO_NUMERIC_VALUE; 395 } else if(ntv<UPROPS_NTV_DIGIT_START) { 396 /* decimal digit */ 397 return ntv-UPROPS_NTV_DECIMAL_START; 398 } else if(ntv<UPROPS_NTV_NUMERIC_START) { 399 /* other digit */ 400 return ntv-UPROPS_NTV_DIGIT_START; 401 } else if(ntv<UPROPS_NTV_FRACTION_START) { 402 /* small integer */ 403 return ntv-UPROPS_NTV_NUMERIC_START; 404 } else if(ntv<UPROPS_NTV_LARGE_START) { 405 /* fraction */ 406 int32_t numerator=(ntv>>4)-12; 407 int32_t denominator=(ntv&0xf)+1; 408 return (double)numerator/denominator; 409 } else if(ntv<UPROPS_NTV_BASE60_START) { 410 /* large, single-significant-digit integer */ 411 double numValue; 412 int32_t mant=(ntv>>5)-14; 413 int32_t exp=(ntv&0x1f)+2; 414 numValue=mant; 415 416 /* multiply by 10^exp without math.h */ 417 while(exp>=4) { 418 numValue*=10000.; 419 exp-=4; 420 } 421 switch(exp) { 422 case 3: 423 numValue*=1000.; 424 break; 425 case 2: 426 numValue*=100.; 427 break; 428 case 1: 429 numValue*=10.; 430 break; 431 case 0: 432 default: 433 break; 434 } 435 436 return numValue; 437 } else if(ntv<UPROPS_NTV_RESERVED_START) { 438 /* sexagesimal (base 60) integer */ 439 int32_t numValue=(ntv>>2)-0xbf; 440 int32_t exp=(ntv&3)+1; 441 442 switch(exp) { 443 case 4: 444 numValue*=60*60*60*60; 445 break; 446 case 3: 447 numValue*=60*60*60; 448 break; 449 case 2: 450 numValue*=60*60; 451 break; 452 case 1: 453 numValue*=60; 454 break; 455 case 0: 456 default: 457 break; 458 } 459 460 return numValue; 461 } else { 462 /* reserved */ 463 return U_NO_NUMERIC_VALUE; 464 } 465 } 466 467 U_CAPI int32_t U_EXPORT2 468 u_digit(UChar32 ch, int8_t radix) { 469 int8_t value; 470 if((uint8_t)(radix-2)<=(36-2)) { 471 value=(int8_t)u_charDigitValue(ch); 472 if(value<0) { 473 /* ch is not a decimal digit, try latin letters */ 474 if(ch>=0x61 && ch<=0x7A) { 475 value=(int8_t)(ch-0x57); /* ch - 'a' + 10 */ 476 } else if(ch>=0x41 && ch<=0x5A) { 477 value=(int8_t)(ch-0x37); /* ch - 'A' + 10 */ 478 } else if(ch>=0xFF41 && ch<=0xFF5A) { 479 value=(int8_t)(ch-0xFF37); /* fullwidth ASCII a-z */ 480 } else if(ch>=0xFF21 && ch<=0xFF3A) { 481 value=(int8_t)(ch-0xFF17); /* fullwidth ASCII A-Z */ 482 } 483 } 484 } else { 485 value=-1; /* invalid radix */ 486 } 487 return (int8_t)((value<radix) ? value : -1); 488 } 489 490 U_CAPI UChar32 U_EXPORT2 491 u_forDigit(int32_t digit, int8_t radix) { 492 if((uint8_t)(radix-2)>(36-2) || (uint32_t)digit>=(uint32_t)radix) { 493 return 0; 494 } else if(digit<10) { 495 return (UChar32)(0x30+digit); 496 } else { 497 return (UChar32)((0x61-10)+digit); 498 } 499 } 500 501 /* miscellaneous, and support for uprops.cpp -------------------------------- */ 502 503 U_CAPI void U_EXPORT2 504 u_getUnicodeVersion(UVersionInfo versionArray) { 505 if(versionArray!=NULL) { 506 uprv_memcpy(versionArray, dataVersion, U_MAX_VERSION_LENGTH); 507 } 508 } 509 510 U_CFUNC uint32_t 511 u_getMainProperties(UChar32 c) { 512 uint32_t props; 513 GET_PROPS(c, props); 514 return props; 515 } 516 517 U_CFUNC uint32_t 518 u_getUnicodeProperties(UChar32 c, int32_t column) { 519 U_ASSERT(column>=0); 520 if(column>=propsVectorsColumns) { 521 return 0; 522 } else { 523 uint16_t vecIndex=UTRIE2_GET16(&propsVectorsTrie, c); 524 return propsVectors[vecIndex+column]; 525 } 526 } 527 528 U_CFUNC int32_t 529 uprv_getMaxValues(int32_t column) { 530 switch(column) { 531 case 0: 532 return indexes[UPROPS_MAX_VALUES_INDEX]; 533 case 2: 534 return indexes[UPROPS_MAX_VALUES_2_INDEX]; 535 default: 536 return 0; 537 } 538 } 539 540 U_CAPI void U_EXPORT2 541 u_charAge(UChar32 c, UVersionInfo versionArray) { 542 if(versionArray!=NULL) { 543 uint32_t version=u_getUnicodeProperties(c, 0)>>UPROPS_AGE_SHIFT; 544 versionArray[0]=(uint8_t)(version>>4); 545 versionArray[1]=(uint8_t)(version&0xf); 546 versionArray[2]=versionArray[3]=0; 547 } 548 } 549 550 U_CAPI UScriptCode U_EXPORT2 551 uscript_getScript(UChar32 c, UErrorCode *pErrorCode) { 552 uint32_t scriptX; 553 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 554 return USCRIPT_INVALID_CODE; 555 } 556 if((uint32_t)c>0x10ffff) { 557 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 558 return USCRIPT_INVALID_CODE; 559 } 560 scriptX=u_getUnicodeProperties(c, 0)&UPROPS_SCRIPT_X_MASK; 561 if(scriptX<UPROPS_SCRIPT_X_WITH_COMMON) { 562 return (UScriptCode)scriptX; 563 } else if(scriptX<UPROPS_SCRIPT_X_WITH_INHERITED) { 564 return USCRIPT_COMMON; 565 } else if(scriptX<UPROPS_SCRIPT_X_WITH_OTHER) { 566 return USCRIPT_INHERITED; 567 } else { 568 return (UScriptCode)scriptExtensions[scriptX&UPROPS_SCRIPT_MASK]; 569 } 570 } 571 572 U_CAPI UBool U_EXPORT2 573 uscript_hasScript(UChar32 c, UScriptCode sc) { 574 const uint16_t *scx; 575 uint32_t scriptX=u_getUnicodeProperties(c, 0)&UPROPS_SCRIPT_X_MASK; 576 if(scriptX<UPROPS_SCRIPT_X_WITH_COMMON) { 577 return sc==(UScriptCode)scriptX; 578 } 579 580 scx=scriptExtensions+(scriptX&UPROPS_SCRIPT_MASK); 581 if(scriptX>=UPROPS_SCRIPT_X_WITH_OTHER) { 582 scx=scriptExtensions+scx[1]; 583 } 584 if(sc>=USCRIPT_CODE_LIMIT) { 585 /* Guard against bogus input that would make us go past the Script_Extensions terminator. */ 586 return FALSE; 587 } 588 while(sc>*scx) { 589 ++scx; 590 } 591 return sc==(*scx&0x7fff); 592 } 593 594 U_CAPI int32_t U_EXPORT2 595 uscript_getScriptExtensions(UChar32 c, 596 UScriptCode *scripts, int32_t capacity, 597 UErrorCode *pErrorCode) { 598 uint32_t scriptX; 599 int32_t length; 600 const uint16_t *scx; 601 uint16_t sx; 602 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 603 return 0; 604 } 605 if(capacity<0 || (capacity>0 && scripts==NULL)) { 606 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 607 return 0; 608 } 609 scriptX=u_getUnicodeProperties(c, 0)&UPROPS_SCRIPT_X_MASK; 610 if(scriptX<UPROPS_SCRIPT_X_WITH_COMMON) { 611 if(capacity==0) { 612 *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 613 } else { 614 scripts[0]=(UScriptCode)scriptX; 615 } 616 return 1; 617 } 618 619 scx=scriptExtensions+(scriptX&UPROPS_SCRIPT_MASK); 620 if(scriptX>=UPROPS_SCRIPT_X_WITH_OTHER) { 621 scx=scriptExtensions+scx[1]; 622 } 623 length=0; 624 do { 625 sx=*scx++; 626 if(length<capacity) { 627 scripts[length]=(UScriptCode)(sx&0x7fff); 628 } 629 ++length; 630 } while(sx<0x8000); 631 if(length>capacity) { 632 *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 633 } 634 return length; 635 } 636 637 U_CAPI UBlockCode U_EXPORT2 638 ublock_getCode(UChar32 c) { 639 return (UBlockCode)((u_getUnicodeProperties(c, 0)&UPROPS_BLOCK_MASK)>>UPROPS_BLOCK_SHIFT); 640 } 641 642 /* property starts for UnicodeSet ------------------------------------------- */ 643 644 static UBool U_CALLCONV 645 _enumPropertyStartsRange(const void *context, UChar32 start, UChar32 end, uint32_t value) { 646 /* add the start code point to the USet */ 647 const USetAdder *sa=(const USetAdder *)context; 648 sa->add(sa->set, start); 649 return TRUE; 650 } 651 652 #define USET_ADD_CP_AND_NEXT(sa, cp) sa->add(sa->set, cp); sa->add(sa->set, cp+1) 653 654 U_CFUNC void U_EXPORT2 655 uchar_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) { 656 if(U_FAILURE(*pErrorCode)) { 657 return; 658 } 659 660 /* add the start code point of each same-value range of the main trie */ 661 utrie2_enum(&propsTrie, NULL, _enumPropertyStartsRange, sa); 662 663 /* add code points with hardcoded properties, plus the ones following them */ 664 665 /* add for u_isblank() */ 666 USET_ADD_CP_AND_NEXT(sa, TAB); 667 668 /* add for IS_THAT_CONTROL_SPACE() */ 669 sa->add(sa->set, CR+1); /* range TAB..CR */ 670 sa->add(sa->set, 0x1c); 671 sa->add(sa->set, 0x1f+1); 672 USET_ADD_CP_AND_NEXT(sa, NL); 673 674 /* add for u_isIDIgnorable() what was not added above */ 675 sa->add(sa->set, DEL); /* range DEL..NBSP-1, NBSP added below */ 676 sa->add(sa->set, HAIRSP); 677 sa->add(sa->set, RLM+1); 678 sa->add(sa->set, INHSWAP); 679 sa->add(sa->set, NOMDIG+1); 680 USET_ADD_CP_AND_NEXT(sa, ZWNBSP); 681 682 /* add no-break spaces for u_isWhitespace() what was not added above */ 683 USET_ADD_CP_AND_NEXT(sa, NBSP); 684 USET_ADD_CP_AND_NEXT(sa, FIGURESP); 685 USET_ADD_CP_AND_NEXT(sa, NNBSP); 686 687 /* add for u_digit() */ 688 sa->add(sa->set, U_a); 689 sa->add(sa->set, U_z+1); 690 sa->add(sa->set, U_A); 691 sa->add(sa->set, U_Z+1); 692 sa->add(sa->set, U_FW_a); 693 sa->add(sa->set, U_FW_z+1); 694 sa->add(sa->set, U_FW_A); 695 sa->add(sa->set, U_FW_Z+1); 696 697 /* add for u_isxdigit() */ 698 sa->add(sa->set, U_f+1); 699 sa->add(sa->set, U_F+1); 700 sa->add(sa->set, U_FW_f+1); 701 sa->add(sa->set, U_FW_F+1); 702 703 /* add for UCHAR_DEFAULT_IGNORABLE_CODE_POINT what was not added above */ 704 sa->add(sa->set, WJ); /* range WJ..NOMDIG */ 705 sa->add(sa->set, 0xfff0); 706 sa->add(sa->set, 0xfffb+1); 707 sa->add(sa->set, 0xe0000); 708 sa->add(sa->set, 0xe0fff+1); 709 710 /* add for UCHAR_GRAPHEME_BASE and others */ 711 USET_ADD_CP_AND_NEXT(sa, CGJ); 712 } 713 714 U_CFUNC void U_EXPORT2 715 upropsvec_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) { 716 if(U_FAILURE(*pErrorCode)) { 717 return; 718 } 719 720 /* add the start code point of each same-value range of the properties vectors trie */ 721 if(propsVectorsColumns>0) { 722 /* if propsVectorsColumns==0 then the properties vectors trie may not be there at all */ 723 utrie2_enum(&propsVectorsTrie, NULL, _enumPropertyStartsRange, sa); 724 } 725 } 726