1 /* 2 ******************************************************************************** 3 * Copyright (C) 1996-2012, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ******************************************************************************** 6 * 7 * File UCHAR.C 8 * 9 * Modification History: 10 * 11 * Date Name Description 12 * 04/02/97 aliu Creation. 13 * 4/15/99 Madhu Updated all the function definitions for C Implementation 14 * 5/20/99 Madhu Added the function u_getVersion() 15 * 8/19/1999 srl Upgraded scripts to Unicode3.0 16 * 11/11/1999 weiv added u_isalnum(), cleaned comments 17 * 01/11/2000 helena Renamed u_getVersion to u_getUnicodeVersion. 18 * 06/20/2000 helena OS/400 port changes; mostly typecast. 19 ****************************************************************************** 20 */ 21 22 #include "unicode/utypes.h" 23 #include "unicode/uchar.h" 24 #include "unicode/uscript.h" 25 #include "unicode/udata.h" 26 #include "uassert.h" 27 #include "cmemory.h" 28 #include "ucln_cmn.h" 29 #include "utrie2.h" 30 #include "udataswp.h" 31 #include "uprops.h" 32 #include "ustr_imp.h" 33 34 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 35 36 /* uchar_props_data.h is machine-generated by genprops --csource */ 37 #define INCLUDED_FROM_UCHAR_C 38 #include "uchar_props_data.h" 39 40 /* constants and macros for access to the data ------------------------------ */ 41 42 /* getting a uint32_t properties word from the data */ 43 #define GET_PROPS(c, result) ((result)=UTRIE2_GET16(&propsTrie, c)); 44 45 U_CFUNC UBool 46 uprv_haveProperties(UErrorCode *pErrorCode) { 47 if(U_FAILURE(*pErrorCode)) { 48 return FALSE; 49 } 50 return TRUE; 51 } 52 53 /* API functions ------------------------------------------------------------ */ 54 55 /* Gets the Unicode character's general category.*/ 56 U_CAPI int8_t U_EXPORT2 57 u_charType(UChar32 c) { 58 uint32_t props; 59 GET_PROPS(c, props); 60 return (int8_t)GET_CATEGORY(props); 61 } 62 63 /* Enumerate all code points with their general categories. */ 64 struct _EnumTypeCallback { 65 UCharEnumTypeRange *enumRange; 66 const void *context; 67 }; 68 69 static uint32_t U_CALLCONV 70 _enumTypeValue(const void *context, uint32_t value) { 71 return GET_CATEGORY(value); 72 } 73 74 static UBool U_CALLCONV 75 _enumTypeRange(const void *context, UChar32 start, UChar32 end, uint32_t value) { 76 /* just cast the value to UCharCategory */ 77 return ((struct _EnumTypeCallback *)context)-> 78 enumRange(((struct _EnumTypeCallback *)context)->context, 79 start, end+1, (UCharCategory)value); 80 } 81 82 U_CAPI void U_EXPORT2 83 u_enumCharTypes(UCharEnumTypeRange *enumRange, const void *context) { 84 struct _EnumTypeCallback callback; 85 86 if(enumRange==NULL) { 87 return; 88 } 89 90 callback.enumRange=enumRange; 91 callback.context=context; 92 utrie2_enum(&propsTrie, _enumTypeValue, _enumTypeRange, &callback); 93 } 94 95 /* Checks if ch is a lower case letter.*/ 96 U_CAPI UBool U_EXPORT2 97 u_islower(UChar32 c) { 98 uint32_t props; 99 GET_PROPS(c, props); 100 return (UBool)(GET_CATEGORY(props)==U_LOWERCASE_LETTER); 101 } 102 103 /* Checks if ch is an upper case letter.*/ 104 U_CAPI UBool U_EXPORT2 105 u_isupper(UChar32 c) { 106 uint32_t props; 107 GET_PROPS(c, props); 108 return (UBool)(GET_CATEGORY(props)==U_UPPERCASE_LETTER); 109 } 110 111 /* Checks if ch is a title case letter; usually upper case letters.*/ 112 U_CAPI UBool U_EXPORT2 113 u_istitle(UChar32 c) { 114 uint32_t props; 115 GET_PROPS(c, props); 116 return (UBool)(GET_CATEGORY(props)==U_TITLECASE_LETTER); 117 } 118 119 /* Checks if ch is a decimal digit. */ 120 U_CAPI UBool U_EXPORT2 121 u_isdigit(UChar32 c) { 122 uint32_t props; 123 GET_PROPS(c, props); 124 return (UBool)(GET_CATEGORY(props)==U_DECIMAL_DIGIT_NUMBER); 125 } 126 127 U_CAPI UBool U_EXPORT2 128 u_isxdigit(UChar32 c) { 129 uint32_t props; 130 131 /* check ASCII and Fullwidth ASCII a-fA-F */ 132 if( 133 (c<=0x66 && c>=0x41 && (c<=0x46 || c>=0x61)) || 134 (c>=0xff21 && c<=0xff46 && (c<=0xff26 || c>=0xff41)) 135 ) { 136 return TRUE; 137 } 138 139 GET_PROPS(c, props); 140 return (UBool)(GET_CATEGORY(props)==U_DECIMAL_DIGIT_NUMBER); 141 } 142 143 /* Checks if the Unicode character is a letter.*/ 144 U_CAPI UBool U_EXPORT2 145 u_isalpha(UChar32 c) { 146 uint32_t props; 147 GET_PROPS(c, props); 148 return (UBool)((CAT_MASK(props)&U_GC_L_MASK)!=0); 149 } 150 151 U_CAPI UBool U_EXPORT2 152 u_isUAlphabetic(UChar32 c) { 153 return (u_getUnicodeProperties(c, 1)&U_MASK(UPROPS_ALPHABETIC))!=0; 154 } 155 156 /* Checks if c is a letter or a decimal digit */ 157 U_CAPI UBool U_EXPORT2 158 u_isalnum(UChar32 c) { 159 uint32_t props; 160 GET_PROPS(c, props); 161 return (UBool)((CAT_MASK(props)&(U_GC_L_MASK|U_GC_ND_MASK))!=0); 162 } 163 164 /** 165 * Checks if c is alphabetic, or a decimal digit; implements UCHAR_POSIX_ALNUM. 166 * @internal 167 */ 168 U_CFUNC UBool 169 u_isalnumPOSIX(UChar32 c) { 170 return (UBool)(u_isUAlphabetic(c) || u_isdigit(c)); 171 } 172 173 /* Checks if ch is a unicode character with assigned character type.*/ 174 U_CAPI UBool U_EXPORT2 175 u_isdefined(UChar32 c) { 176 uint32_t props; 177 GET_PROPS(c, props); 178 return (UBool)(GET_CATEGORY(props)!=0); 179 } 180 181 /* Checks if the Unicode character is a base form character that can take a diacritic.*/ 182 U_CAPI UBool U_EXPORT2 183 u_isbase(UChar32 c) { 184 uint32_t props; 185 GET_PROPS(c, props); 186 return (UBool)((CAT_MASK(props)&(U_GC_L_MASK|U_GC_N_MASK|U_GC_MC_MASK|U_GC_ME_MASK))!=0); 187 } 188 189 /* Checks if the Unicode character is a control character.*/ 190 U_CAPI UBool U_EXPORT2 191 u_iscntrl(UChar32 c) { 192 uint32_t props; 193 GET_PROPS(c, props); 194 return (UBool)((CAT_MASK(props)&(U_GC_CC_MASK|U_GC_CF_MASK|U_GC_ZL_MASK|U_GC_ZP_MASK))!=0); 195 } 196 197 U_CAPI UBool U_EXPORT2 198 u_isISOControl(UChar32 c) { 199 return (uint32_t)c<=0x9f && (c<=0x1f || c>=0x7f); 200 } 201 202 /* Some control characters that are used as space. */ 203 #define IS_THAT_CONTROL_SPACE(c) \ 204 (c<=0x9f && ((c>=TAB && c<=CR) || (c>=0x1c && c <=0x1f) || c==NL)) 205 206 /* Java has decided that U+0085 New Line is not whitespace any more. */ 207 #define IS_THAT_ASCII_CONTROL_SPACE(c) \ 208 (c<=0x1f && c>=TAB && (c<=CR || c>=0x1c)) 209 210 /* Checks if the Unicode character is a space character.*/ 211 U_CAPI UBool U_EXPORT2 212 u_isspace(UChar32 c) { 213 uint32_t props; 214 GET_PROPS(c, props); 215 return (UBool)((CAT_MASK(props)&U_GC_Z_MASK)!=0 || IS_THAT_CONTROL_SPACE(c)); 216 } 217 218 U_CAPI UBool U_EXPORT2 219 u_isJavaSpaceChar(UChar32 c) { 220 uint32_t props; 221 GET_PROPS(c, props); 222 return (UBool)((CAT_MASK(props)&U_GC_Z_MASK)!=0); 223 } 224 225 /* Checks if the Unicode character is a whitespace character.*/ 226 U_CAPI UBool U_EXPORT2 227 u_isWhitespace(UChar32 c) { 228 uint32_t props; 229 GET_PROPS(c, props); 230 return (UBool)( 231 ((CAT_MASK(props)&U_GC_Z_MASK)!=0 && 232 c!=NBSP && c!=FIGURESP && c!=NNBSP) || /* exclude no-break spaces */ 233 IS_THAT_ASCII_CONTROL_SPACE(c) 234 ); 235 } 236 237 U_CAPI UBool U_EXPORT2 238 u_isblank(UChar32 c) { 239 if((uint32_t)c<=0x9f) { 240 return c==9 || c==0x20; /* TAB or SPACE */ 241 } else { 242 /* Zs */ 243 uint32_t props; 244 GET_PROPS(c, props); 245 return (UBool)(GET_CATEGORY(props)==U_SPACE_SEPARATOR); 246 } 247 } 248 249 U_CAPI UBool U_EXPORT2 250 u_isUWhiteSpace(UChar32 c) { 251 return (u_getUnicodeProperties(c, 1)&U_MASK(UPROPS_WHITE_SPACE))!=0; 252 } 253 254 /* Checks if the Unicode character is printable.*/ 255 U_CAPI UBool U_EXPORT2 256 u_isprint(UChar32 c) { 257 uint32_t props; 258 GET_PROPS(c, props); 259 /* comparing ==0 returns FALSE for the categories mentioned */ 260 return (UBool)((CAT_MASK(props)&U_GC_C_MASK)==0); 261 } 262 263 /** 264 * Checks if c is in \p{graph}\p{blank} - \p{cntrl}. 265 * Implements UCHAR_POSIX_PRINT. 266 * @internal 267 */ 268 U_CFUNC UBool 269 u_isprintPOSIX(UChar32 c) { 270 uint32_t props; 271 GET_PROPS(c, props); 272 /* 273 * The only cntrl character in graph+blank is TAB (in blank). 274 * Here we implement (blank-TAB)=Zs instead of calling u_isblank(). 275 */ 276 return (UBool)((GET_CATEGORY(props)==U_SPACE_SEPARATOR) || u_isgraphPOSIX(c)); 277 } 278 279 U_CAPI UBool U_EXPORT2 280 u_isgraph(UChar32 c) { 281 uint32_t props; 282 GET_PROPS(c, props); 283 /* comparing ==0 returns FALSE for the categories mentioned */ 284 return (UBool)((CAT_MASK(props)& 285 (U_GC_CC_MASK|U_GC_CF_MASK|U_GC_CS_MASK|U_GC_CN_MASK|U_GC_Z_MASK)) 286 ==0); 287 } 288 289 /** 290 * Checks if c is in 291 * [^\p{space}\p{gc=Control}\p{gc=Surrogate}\p{gc=Unassigned}] 292 * with space=\p{Whitespace} and Control=Cc. 293 * Implements UCHAR_POSIX_GRAPH. 294 * @internal 295 */ 296 U_CFUNC UBool 297 u_isgraphPOSIX(UChar32 c) { 298 uint32_t props; 299 GET_PROPS(c, props); 300 /* \p{space}\p{gc=Control} == \p{gc=Z}\p{Control} */ 301 /* comparing ==0 returns FALSE for the categories mentioned */ 302 return (UBool)((CAT_MASK(props)& 303 (U_GC_CC_MASK|U_GC_CS_MASK|U_GC_CN_MASK|U_GC_Z_MASK)) 304 ==0); 305 } 306 307 U_CAPI UBool U_EXPORT2 308 u_ispunct(UChar32 c) { 309 uint32_t props; 310 GET_PROPS(c, props); 311 return (UBool)((CAT_MASK(props)&U_GC_P_MASK)!=0); 312 } 313 314 /* Checks if the Unicode character can start a Unicode identifier.*/ 315 U_CAPI UBool U_EXPORT2 316 u_isIDStart(UChar32 c) { 317 /* same as u_isalpha() */ 318 uint32_t props; 319 GET_PROPS(c, props); 320 return (UBool)((CAT_MASK(props)&(U_GC_L_MASK|U_GC_NL_MASK))!=0); 321 } 322 323 /* Checks if the Unicode character can be a Unicode identifier part other than starting the 324 identifier.*/ 325 U_CAPI UBool U_EXPORT2 326 u_isIDPart(UChar32 c) { 327 uint32_t props; 328 GET_PROPS(c, props); 329 return (UBool)( 330 (CAT_MASK(props)& 331 (U_GC_ND_MASK|U_GC_NL_MASK| 332 U_GC_L_MASK| 333 U_GC_PC_MASK|U_GC_MC_MASK|U_GC_MN_MASK) 334 )!=0 || 335 u_isIDIgnorable(c)); 336 } 337 338 /*Checks if the Unicode character can be ignorable in a Java or Unicode identifier.*/ 339 U_CAPI UBool U_EXPORT2 340 u_isIDIgnorable(UChar32 c) { 341 if(c<=0x9f) { 342 return u_isISOControl(c) && !IS_THAT_ASCII_CONTROL_SPACE(c); 343 } else { 344 uint32_t props; 345 GET_PROPS(c, props); 346 return (UBool)(GET_CATEGORY(props)==U_FORMAT_CHAR); 347 } 348 } 349 350 /*Checks if the Unicode character can start a Java identifier.*/ 351 U_CAPI UBool U_EXPORT2 352 u_isJavaIDStart(UChar32 c) { 353 uint32_t props; 354 GET_PROPS(c, props); 355 return (UBool)((CAT_MASK(props)&(U_GC_L_MASK|U_GC_SC_MASK|U_GC_PC_MASK))!=0); 356 } 357 358 /*Checks if the Unicode character can be a Java identifier part other than starting the 359 * identifier. 360 */ 361 U_CAPI UBool U_EXPORT2 362 u_isJavaIDPart(UChar32 c) { 363 uint32_t props; 364 GET_PROPS(c, props); 365 return (UBool)( 366 (CAT_MASK(props)& 367 (U_GC_ND_MASK|U_GC_NL_MASK| 368 U_GC_L_MASK| 369 U_GC_SC_MASK|U_GC_PC_MASK| 370 U_GC_MC_MASK|U_GC_MN_MASK) 371 )!=0 || 372 u_isIDIgnorable(c)); 373 } 374 375 U_CAPI int32_t U_EXPORT2 376 u_charDigitValue(UChar32 c) { 377 uint32_t props; 378 int32_t value; 379 GET_PROPS(c, props); 380 value=(int32_t)GET_NUMERIC_TYPE_VALUE(props)-UPROPS_NTV_DECIMAL_START; 381 if(value<=9) { 382 return value; 383 } else { 384 return -1; 385 } 386 } 387 388 U_CAPI double U_EXPORT2 389 u_getNumericValue(UChar32 c) { 390 uint32_t props; 391 int32_t ntv; 392 GET_PROPS(c, props); 393 ntv=(int32_t)GET_NUMERIC_TYPE_VALUE(props); 394 395 if(ntv==UPROPS_NTV_NONE) { 396 return U_NO_NUMERIC_VALUE; 397 } else if(ntv<UPROPS_NTV_DIGIT_START) { 398 /* decimal digit */ 399 return ntv-UPROPS_NTV_DECIMAL_START; 400 } else if(ntv<UPROPS_NTV_NUMERIC_START) { 401 /* other digit */ 402 return ntv-UPROPS_NTV_DIGIT_START; 403 } else if(ntv<UPROPS_NTV_FRACTION_START) { 404 /* small integer */ 405 return ntv-UPROPS_NTV_NUMERIC_START; 406 } else if(ntv<UPROPS_NTV_LARGE_START) { 407 /* fraction */ 408 int32_t numerator=(ntv>>4)-12; 409 int32_t denominator=(ntv&0xf)+1; 410 return (double)numerator/denominator; 411 } else if(ntv<UPROPS_NTV_BASE60_START) { 412 /* large, single-significant-digit integer */ 413 double numValue; 414 int32_t mant=(ntv>>5)-14; 415 int32_t exp=(ntv&0x1f)+2; 416 numValue=mant; 417 418 /* multiply by 10^exp without math.h */ 419 while(exp>=4) { 420 numValue*=10000.; 421 exp-=4; 422 } 423 switch(exp) { 424 case 3: 425 numValue*=1000.; 426 break; 427 case 2: 428 numValue*=100.; 429 break; 430 case 1: 431 numValue*=10.; 432 break; 433 case 0: 434 default: 435 break; 436 } 437 438 return numValue; 439 } else if(ntv<UPROPS_NTV_RESERVED_START) { 440 /* sexagesimal (base 60) integer */ 441 int32_t numValue=(ntv>>2)-0xbf; 442 int32_t exp=(ntv&3)+1; 443 444 switch(exp) { 445 case 4: 446 numValue*=60*60*60*60; 447 break; 448 case 3: 449 numValue*=60*60*60; 450 break; 451 case 2: 452 numValue*=60*60; 453 break; 454 case 1: 455 numValue*=60; 456 break; 457 case 0: 458 default: 459 break; 460 } 461 462 return numValue; 463 } else { 464 /* reserved */ 465 return U_NO_NUMERIC_VALUE; 466 } 467 } 468 469 U_CAPI int32_t U_EXPORT2 470 u_digit(UChar32 ch, int8_t radix) { 471 int8_t value; 472 if((uint8_t)(radix-2)<=(36-2)) { 473 value=(int8_t)u_charDigitValue(ch); 474 if(value<0) { 475 /* ch is not a decimal digit, try latin letters */ 476 if(ch>=0x61 && ch<=0x7A) { 477 value=(int8_t)(ch-0x57); /* ch - 'a' + 10 */ 478 } else if(ch>=0x41 && ch<=0x5A) { 479 value=(int8_t)(ch-0x37); /* ch - 'A' + 10 */ 480 } else if(ch>=0xFF41 && ch<=0xFF5A) { 481 value=(int8_t)(ch-0xFF37); /* fullwidth ASCII a-z */ 482 } else if(ch>=0xFF21 && ch<=0xFF3A) { 483 value=(int8_t)(ch-0xFF17); /* fullwidth ASCII A-Z */ 484 } 485 } 486 } else { 487 value=-1; /* invalid radix */ 488 } 489 return (int8_t)((value<radix) ? value : -1); 490 } 491 492 U_CAPI UChar32 U_EXPORT2 493 u_forDigit(int32_t digit, int8_t radix) { 494 if((uint8_t)(radix-2)>(36-2) || (uint32_t)digit>=(uint32_t)radix) { 495 return 0; 496 } else if(digit<10) { 497 return (UChar32)(0x30+digit); 498 } else { 499 return (UChar32)((0x61-10)+digit); 500 } 501 } 502 503 /* miscellaneous, and support for uprops.cpp -------------------------------- */ 504 505 U_CAPI void U_EXPORT2 506 u_getUnicodeVersion(UVersionInfo versionArray) { 507 if(versionArray!=NULL) { 508 uprv_memcpy(versionArray, dataVersion, U_MAX_VERSION_LENGTH); 509 } 510 } 511 512 U_CFUNC uint32_t 513 u_getMainProperties(UChar32 c) { 514 uint32_t props; 515 GET_PROPS(c, props); 516 return props; 517 } 518 519 U_CFUNC uint32_t 520 u_getUnicodeProperties(UChar32 c, int32_t column) { 521 U_ASSERT(column>=0); 522 if(column>=propsVectorsColumns) { 523 return 0; 524 } else { 525 uint16_t vecIndex=UTRIE2_GET16(&propsVectorsTrie, c); 526 return propsVectors[vecIndex+column]; 527 } 528 } 529 530 U_CFUNC int32_t 531 uprv_getMaxValues(int32_t column) { 532 switch(column) { 533 case 0: 534 return indexes[UPROPS_MAX_VALUES_INDEX]; 535 case 2: 536 return indexes[UPROPS_MAX_VALUES_2_INDEX]; 537 default: 538 return 0; 539 } 540 } 541 542 U_CAPI void U_EXPORT2 543 u_charAge(UChar32 c, UVersionInfo versionArray) { 544 if(versionArray!=NULL) { 545 uint32_t version=u_getUnicodeProperties(c, 0)>>UPROPS_AGE_SHIFT; 546 versionArray[0]=(uint8_t)(version>>4); 547 versionArray[1]=(uint8_t)(version&0xf); 548 versionArray[2]=versionArray[3]=0; 549 } 550 } 551 552 U_CAPI UScriptCode U_EXPORT2 553 uscript_getScript(UChar32 c, UErrorCode *pErrorCode) { 554 uint32_t scriptX; 555 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 556 return USCRIPT_INVALID_CODE; 557 } 558 if((uint32_t)c>0x10ffff) { 559 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 560 return USCRIPT_INVALID_CODE; 561 } 562 scriptX=u_getUnicodeProperties(c, 0)&UPROPS_SCRIPT_X_MASK; 563 if(scriptX<UPROPS_SCRIPT_X_WITH_COMMON) { 564 return (UScriptCode)scriptX; 565 } else if(scriptX<UPROPS_SCRIPT_X_WITH_INHERITED) { 566 return USCRIPT_COMMON; 567 } else if(scriptX<UPROPS_SCRIPT_X_WITH_OTHER) { 568 return USCRIPT_INHERITED; 569 } else { 570 return (UScriptCode)scriptExtensions[scriptX&UPROPS_SCRIPT_MASK]; 571 } 572 } 573 574 U_CAPI UBool U_EXPORT2 575 uscript_hasScript(UChar32 c, UScriptCode sc) { 576 const uint16_t *scx; 577 uint32_t scriptX=u_getUnicodeProperties(c, 0)&UPROPS_SCRIPT_X_MASK; 578 if(scriptX<UPROPS_SCRIPT_X_WITH_COMMON) { 579 return sc==(UScriptCode)scriptX; 580 } 581 582 scx=scriptExtensions+(scriptX&UPROPS_SCRIPT_MASK); 583 if(scriptX>=UPROPS_SCRIPT_X_WITH_OTHER) { 584 scx=scriptExtensions+scx[1]; 585 } 586 if(sc>=USCRIPT_CODE_LIMIT) { 587 /* Guard against bogus input that would make us go past the Script_Extensions terminator. */ 588 return FALSE; 589 } 590 while(sc>*scx) { 591 ++scx; 592 } 593 return sc==(*scx&0x7fff); 594 } 595 596 U_CAPI int32_t U_EXPORT2 597 uscript_getScriptExtensions(UChar32 c, 598 UScriptCode *scripts, int32_t capacity, 599 UErrorCode *pErrorCode) { 600 uint32_t scriptX; 601 int32_t length; 602 const uint16_t *scx; 603 uint16_t sx; 604 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 605 return 0; 606 } 607 if(capacity<0 || (capacity>0 && scripts==NULL)) { 608 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 609 return 0; 610 } 611 scriptX=u_getUnicodeProperties(c, 0)&UPROPS_SCRIPT_X_MASK; 612 if(scriptX<UPROPS_SCRIPT_X_WITH_COMMON) { 613 if(capacity==0) { 614 *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 615 } else { 616 scripts[0]=(UScriptCode)scriptX; 617 } 618 return 1; 619 } 620 621 scx=scriptExtensions+(scriptX&UPROPS_SCRIPT_MASK); 622 if(scriptX>=UPROPS_SCRIPT_X_WITH_OTHER) { 623 scx=scriptExtensions+scx[1]; 624 } 625 length=0; 626 do { 627 sx=*scx++; 628 if(length<capacity) { 629 scripts[length]=(UScriptCode)(sx&0x7fff); 630 } 631 ++length; 632 } while(sx<0x8000); 633 if(length>capacity) { 634 *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 635 } 636 return length; 637 } 638 639 U_CAPI UBlockCode U_EXPORT2 640 ublock_getCode(UChar32 c) { 641 return (UBlockCode)((u_getUnicodeProperties(c, 0)&UPROPS_BLOCK_MASK)>>UPROPS_BLOCK_SHIFT); 642 } 643 644 /* property starts for UnicodeSet ------------------------------------------- */ 645 646 static UBool U_CALLCONV 647 _enumPropertyStartsRange(const void *context, UChar32 start, UChar32 end, uint32_t value) { 648 /* add the start code point to the USet */ 649 const USetAdder *sa=(const USetAdder *)context; 650 sa->add(sa->set, start); 651 return TRUE; 652 } 653 654 #define USET_ADD_CP_AND_NEXT(sa, cp) sa->add(sa->set, cp); sa->add(sa->set, cp+1) 655 656 U_CFUNC void U_EXPORT2 657 uchar_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) { 658 if(U_FAILURE(*pErrorCode)) { 659 return; 660 } 661 662 /* add the start code point of each same-value range of the main trie */ 663 utrie2_enum(&propsTrie, NULL, _enumPropertyStartsRange, sa); 664 665 /* add code points with hardcoded properties, plus the ones following them */ 666 667 /* add for u_isblank() */ 668 USET_ADD_CP_AND_NEXT(sa, TAB); 669 670 /* add for IS_THAT_CONTROL_SPACE() */ 671 sa->add(sa->set, CR+1); /* range TAB..CR */ 672 sa->add(sa->set, 0x1c); 673 sa->add(sa->set, 0x1f+1); 674 USET_ADD_CP_AND_NEXT(sa, NL); 675 676 /* add for u_isIDIgnorable() what was not added above */ 677 sa->add(sa->set, DEL); /* range DEL..NBSP-1, NBSP added below */ 678 sa->add(sa->set, HAIRSP); 679 sa->add(sa->set, RLM+1); 680 sa->add(sa->set, INHSWAP); 681 sa->add(sa->set, NOMDIG+1); 682 USET_ADD_CP_AND_NEXT(sa, ZWNBSP); 683 684 /* add no-break spaces for u_isWhitespace() what was not added above */ 685 USET_ADD_CP_AND_NEXT(sa, NBSP); 686 USET_ADD_CP_AND_NEXT(sa, FIGURESP); 687 USET_ADD_CP_AND_NEXT(sa, NNBSP); 688 689 /* add for u_digit() */ 690 sa->add(sa->set, U_a); 691 sa->add(sa->set, U_z+1); 692 sa->add(sa->set, U_A); 693 sa->add(sa->set, U_Z+1); 694 sa->add(sa->set, U_FW_a); 695 sa->add(sa->set, U_FW_z+1); 696 sa->add(sa->set, U_FW_A); 697 sa->add(sa->set, U_FW_Z+1); 698 699 /* add for u_isxdigit() */ 700 sa->add(sa->set, U_f+1); 701 sa->add(sa->set, U_F+1); 702 sa->add(sa->set, U_FW_f+1); 703 sa->add(sa->set, U_FW_F+1); 704 705 /* add for UCHAR_DEFAULT_IGNORABLE_CODE_POINT what was not added above */ 706 sa->add(sa->set, WJ); /* range WJ..NOMDIG */ 707 sa->add(sa->set, 0xfff0); 708 sa->add(sa->set, 0xfffb+1); 709 sa->add(sa->set, 0xe0000); 710 sa->add(sa->set, 0xe0fff+1); 711 712 /* add for UCHAR_GRAPHEME_BASE and others */ 713 USET_ADD_CP_AND_NEXT(sa, CGJ); 714 } 715 716 U_CFUNC void U_EXPORT2 717 upropsvec_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) { 718 if(U_FAILURE(*pErrorCode)) { 719 return; 720 } 721 722 /* add the start code point of each same-value range of the properties vectors trie */ 723 if(propsVectorsColumns>0) { 724 /* if propsVectorsColumns==0 then the properties vectors trie may not be there at all */ 725 utrie2_enum(&propsVectorsTrie, NULL, _enumPropertyStartsRange, sa); 726 } 727 } 728