1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************** 5 * Copyright (C) 1996-2016, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ******************************************************************************** 8 * 9 * File UCHAR.C 10 * 11 * Modification History: 12 * 13 * Date Name Description 14 * 04/02/97 aliu Creation. 15 * 4/15/99 Madhu Updated all the function definitions for C Implementation 16 * 5/20/99 Madhu Added the function u_getVersion() 17 * 8/19/1999 srl Upgraded scripts to Unicode3.0 18 * 11/11/1999 weiv added u_isalnum(), cleaned comments 19 * 01/11/2000 helena Renamed u_getVersion to u_getUnicodeVersion. 20 * 06/20/2000 helena OS/400 port changes; mostly typecast. 21 ****************************************************************************** 22 */ 23 24 #include "unicode/utypes.h" 25 #include "unicode/uchar.h" 26 #include "unicode/uscript.h" 27 #include "unicode/udata.h" 28 #include "uassert.h" 29 #include "cmemory.h" 30 #include "ucln_cmn.h" 31 #include "utrie2.h" 32 #include "udataswp.h" 33 #include "uprops.h" 34 #include "ustr_imp.h" 35 36 /* uchar_props_data.h is machine-generated by genprops --csource */ 37 #define INCLUDED_FROM_UCHAR_C 38 #include "uchar_props_data.h" 39 40 /* constants and macros for access to the data ------------------------------ */ 41 42 /* getting a uint32_t properties word from the data */ 43 #define GET_PROPS(c, result) ((result)=UTRIE2_GET16(&propsTrie, c)); 44 45 U_CFUNC UBool 46 uprv_haveProperties(UErrorCode *pErrorCode) { 47 if(U_FAILURE(*pErrorCode)) { 48 return FALSE; 49 } 50 return TRUE; 51 } 52 53 /* API functions ------------------------------------------------------------ */ 54 55 /* Gets the Unicode character's general category.*/ 56 U_CAPI int8_t U_EXPORT2 57 u_charType(UChar32 c) { 58 uint32_t props; 59 GET_PROPS(c, props); 60 return (int8_t)GET_CATEGORY(props); 61 } 62 63 /* Enumerate all code points with their general categories. */ 64 struct _EnumTypeCallback { 65 UCharEnumTypeRange *enumRange; 66 const void *context; 67 }; 68 69 static uint32_t U_CALLCONV 70 _enumTypeValue(const void *context, uint32_t value) { 71 (void)context; 72 return GET_CATEGORY(value); 73 } 74 75 static UBool U_CALLCONV 76 _enumTypeRange(const void *context, UChar32 start, UChar32 end, uint32_t value) { 77 /* just cast the value to UCharCategory */ 78 return ((struct _EnumTypeCallback *)context)-> 79 enumRange(((struct _EnumTypeCallback *)context)->context, 80 start, end+1, (UCharCategory)value); 81 } 82 83 U_CAPI void U_EXPORT2 84 u_enumCharTypes(UCharEnumTypeRange *enumRange, const void *context) { 85 struct _EnumTypeCallback callback; 86 87 if(enumRange==NULL) { 88 return; 89 } 90 91 callback.enumRange=enumRange; 92 callback.context=context; 93 utrie2_enum(&propsTrie, _enumTypeValue, _enumTypeRange, &callback); 94 } 95 96 /* Checks if ch is a lower case letter.*/ 97 U_CAPI UBool U_EXPORT2 98 u_islower(UChar32 c) { 99 uint32_t props; 100 GET_PROPS(c, props); 101 return (UBool)(GET_CATEGORY(props)==U_LOWERCASE_LETTER); 102 } 103 104 /* Checks if ch is an upper case letter.*/ 105 U_CAPI UBool U_EXPORT2 106 u_isupper(UChar32 c) { 107 uint32_t props; 108 GET_PROPS(c, props); 109 return (UBool)(GET_CATEGORY(props)==U_UPPERCASE_LETTER); 110 } 111 112 /* Checks if ch is a title case letter; usually upper case letters.*/ 113 U_CAPI UBool U_EXPORT2 114 u_istitle(UChar32 c) { 115 uint32_t props; 116 GET_PROPS(c, props); 117 return (UBool)(GET_CATEGORY(props)==U_TITLECASE_LETTER); 118 } 119 120 /* Checks if ch is a decimal digit. */ 121 U_CAPI UBool U_EXPORT2 122 u_isdigit(UChar32 c) { 123 uint32_t props; 124 GET_PROPS(c, props); 125 return (UBool)(GET_CATEGORY(props)==U_DECIMAL_DIGIT_NUMBER); 126 } 127 128 U_CAPI UBool U_EXPORT2 129 u_isxdigit(UChar32 c) { 130 uint32_t props; 131 132 /* check ASCII and Fullwidth ASCII a-fA-F */ 133 if( 134 (c<=0x66 && c>=0x41 && (c<=0x46 || c>=0x61)) || 135 (c>=0xff21 && c<=0xff46 && (c<=0xff26 || c>=0xff41)) 136 ) { 137 return TRUE; 138 } 139 140 GET_PROPS(c, props); 141 return (UBool)(GET_CATEGORY(props)==U_DECIMAL_DIGIT_NUMBER); 142 } 143 144 /* Checks if the Unicode character is a letter.*/ 145 U_CAPI UBool U_EXPORT2 146 u_isalpha(UChar32 c) { 147 uint32_t props; 148 GET_PROPS(c, props); 149 return (UBool)((CAT_MASK(props)&U_GC_L_MASK)!=0); 150 } 151 152 U_CAPI UBool U_EXPORT2 153 u_isUAlphabetic(UChar32 c) { 154 return (u_getUnicodeProperties(c, 1)&U_MASK(UPROPS_ALPHABETIC))!=0; 155 } 156 157 /* Checks if c is a letter or a decimal digit */ 158 U_CAPI UBool U_EXPORT2 159 u_isalnum(UChar32 c) { 160 uint32_t props; 161 GET_PROPS(c, props); 162 return (UBool)((CAT_MASK(props)&(U_GC_L_MASK|U_GC_ND_MASK))!=0); 163 } 164 165 /** 166 * Checks if c is alphabetic, or a decimal digit; implements UCHAR_POSIX_ALNUM. 167 * @internal 168 */ 169 U_CFUNC UBool 170 u_isalnumPOSIX(UChar32 c) { 171 return (UBool)(u_isUAlphabetic(c) || u_isdigit(c)); 172 } 173 174 /* Checks if ch is a unicode character with assigned character type.*/ 175 U_CAPI UBool U_EXPORT2 176 u_isdefined(UChar32 c) { 177 uint32_t props; 178 GET_PROPS(c, props); 179 return (UBool)(GET_CATEGORY(props)!=0); 180 } 181 182 /* Checks if the Unicode character is a base form character that can take a diacritic.*/ 183 U_CAPI UBool U_EXPORT2 184 u_isbase(UChar32 c) { 185 uint32_t props; 186 GET_PROPS(c, props); 187 return (UBool)((CAT_MASK(props)&(U_GC_L_MASK|U_GC_N_MASK|U_GC_MC_MASK|U_GC_ME_MASK))!=0); 188 } 189 190 /* Checks if the Unicode character is a control character.*/ 191 U_CAPI UBool U_EXPORT2 192 u_iscntrl(UChar32 c) { 193 uint32_t props; 194 GET_PROPS(c, props); 195 return (UBool)((CAT_MASK(props)&(U_GC_CC_MASK|U_GC_CF_MASK|U_GC_ZL_MASK|U_GC_ZP_MASK))!=0); 196 } 197 198 U_CAPI UBool U_EXPORT2 199 u_isISOControl(UChar32 c) { 200 return (uint32_t)c<=0x9f && (c<=0x1f || c>=0x7f); 201 } 202 203 /* Some control characters that are used as space. */ 204 #define IS_THAT_CONTROL_SPACE(c) \ 205 (c<=0x9f && ((c>=TAB && c<=CR) || (c>=0x1c && c <=0x1f) || c==NL)) 206 207 /* Java has decided that U+0085 New Line is not whitespace any more. */ 208 #define IS_THAT_ASCII_CONTROL_SPACE(c) \ 209 (c<=0x1f && c>=TAB && (c<=CR || c>=0x1c)) 210 211 /* Checks if the Unicode character is a space character.*/ 212 U_CAPI UBool U_EXPORT2 213 u_isspace(UChar32 c) { 214 uint32_t props; 215 GET_PROPS(c, props); 216 return (UBool)((CAT_MASK(props)&U_GC_Z_MASK)!=0 || IS_THAT_CONTROL_SPACE(c)); 217 } 218 219 U_CAPI UBool U_EXPORT2 220 u_isJavaSpaceChar(UChar32 c) { 221 uint32_t props; 222 GET_PROPS(c, props); 223 return (UBool)((CAT_MASK(props)&U_GC_Z_MASK)!=0); 224 } 225 226 /* Checks if the Unicode character is a whitespace character.*/ 227 U_CAPI UBool U_EXPORT2 228 u_isWhitespace(UChar32 c) { 229 uint32_t props; 230 GET_PROPS(c, props); 231 return (UBool)( 232 ((CAT_MASK(props)&U_GC_Z_MASK)!=0 && 233 c!=NBSP && c!=FIGURESP && c!=NNBSP) || /* exclude no-break spaces */ 234 IS_THAT_ASCII_CONTROL_SPACE(c) 235 ); 236 } 237 238 U_CAPI UBool U_EXPORT2 239 u_isblank(UChar32 c) { 240 if((uint32_t)c<=0x9f) { 241 return c==9 || c==0x20; /* TAB or SPACE */ 242 } else { 243 /* Zs */ 244 uint32_t props; 245 GET_PROPS(c, props); 246 return (UBool)(GET_CATEGORY(props)==U_SPACE_SEPARATOR); 247 } 248 } 249 250 U_CAPI UBool U_EXPORT2 251 u_isUWhiteSpace(UChar32 c) { 252 return (u_getUnicodeProperties(c, 1)&U_MASK(UPROPS_WHITE_SPACE))!=0; 253 } 254 255 /* Checks if the Unicode character is printable.*/ 256 U_CAPI UBool U_EXPORT2 257 u_isprint(UChar32 c) { 258 uint32_t props; 259 GET_PROPS(c, props); 260 /* comparing ==0 returns FALSE for the categories mentioned */ 261 return (UBool)((CAT_MASK(props)&U_GC_C_MASK)==0); 262 } 263 264 /** 265 * Checks if c is in \p{graph}\p{blank} - \p{cntrl}. 266 * Implements UCHAR_POSIX_PRINT. 267 * @internal 268 */ 269 U_CFUNC UBool 270 u_isprintPOSIX(UChar32 c) { 271 uint32_t props; 272 GET_PROPS(c, props); 273 /* 274 * The only cntrl character in graph+blank is TAB (in blank). 275 * Here we implement (blank-TAB)=Zs instead of calling u_isblank(). 276 */ 277 return (UBool)((GET_CATEGORY(props)==U_SPACE_SEPARATOR) || u_isgraphPOSIX(c)); 278 } 279 280 U_CAPI UBool U_EXPORT2 281 u_isgraph(UChar32 c) { 282 uint32_t props; 283 GET_PROPS(c, props); 284 /* comparing ==0 returns FALSE for the categories mentioned */ 285 return (UBool)((CAT_MASK(props)& 286 (U_GC_CC_MASK|U_GC_CF_MASK|U_GC_CS_MASK|U_GC_CN_MASK|U_GC_Z_MASK)) 287 ==0); 288 } 289 290 /** 291 * Checks if c is in 292 * [^\p{space}\p{gc=Control}\p{gc=Surrogate}\p{gc=Unassigned}] 293 * with space=\p{Whitespace} and Control=Cc. 294 * Implements UCHAR_POSIX_GRAPH. 295 * @internal 296 */ 297 U_CFUNC UBool 298 u_isgraphPOSIX(UChar32 c) { 299 uint32_t props; 300 GET_PROPS(c, props); 301 /* \p{space}\p{gc=Control} == \p{gc=Z}\p{Control} */ 302 /* comparing ==0 returns FALSE for the categories mentioned */ 303 return (UBool)((CAT_MASK(props)& 304 (U_GC_CC_MASK|U_GC_CS_MASK|U_GC_CN_MASK|U_GC_Z_MASK)) 305 ==0); 306 } 307 308 U_CAPI UBool U_EXPORT2 309 u_ispunct(UChar32 c) { 310 uint32_t props; 311 GET_PROPS(c, props); 312 return (UBool)((CAT_MASK(props)&U_GC_P_MASK)!=0); 313 } 314 315 /* Checks if the Unicode character can start a Unicode identifier.*/ 316 U_CAPI UBool U_EXPORT2 317 u_isIDStart(UChar32 c) { 318 /* same as u_isalpha() */ 319 uint32_t props; 320 GET_PROPS(c, props); 321 return (UBool)((CAT_MASK(props)&(U_GC_L_MASK|U_GC_NL_MASK))!=0); 322 } 323 324 /* Checks if the Unicode character can be a Unicode identifier part other than starting the 325 identifier.*/ 326 U_CAPI UBool U_EXPORT2 327 u_isIDPart(UChar32 c) { 328 uint32_t props; 329 GET_PROPS(c, props); 330 return (UBool)( 331 (CAT_MASK(props)& 332 (U_GC_ND_MASK|U_GC_NL_MASK| 333 U_GC_L_MASK| 334 U_GC_PC_MASK|U_GC_MC_MASK|U_GC_MN_MASK) 335 )!=0 || 336 u_isIDIgnorable(c)); 337 } 338 339 /*Checks if the Unicode character can be ignorable in a Java or Unicode identifier.*/ 340 U_CAPI UBool U_EXPORT2 341 u_isIDIgnorable(UChar32 c) { 342 if(c<=0x9f) { 343 return u_isISOControl(c) && !IS_THAT_ASCII_CONTROL_SPACE(c); 344 } else { 345 uint32_t props; 346 GET_PROPS(c, props); 347 return (UBool)(GET_CATEGORY(props)==U_FORMAT_CHAR); 348 } 349 } 350 351 /*Checks if the Unicode character can start a Java identifier.*/ 352 U_CAPI UBool U_EXPORT2 353 u_isJavaIDStart(UChar32 c) { 354 uint32_t props; 355 GET_PROPS(c, props); 356 return (UBool)((CAT_MASK(props)&(U_GC_L_MASK|U_GC_SC_MASK|U_GC_PC_MASK))!=0); 357 } 358 359 /*Checks if the Unicode character can be a Java identifier part other than starting the 360 * identifier. 361 */ 362 U_CAPI UBool U_EXPORT2 363 u_isJavaIDPart(UChar32 c) { 364 uint32_t props; 365 GET_PROPS(c, props); 366 return (UBool)( 367 (CAT_MASK(props)& 368 (U_GC_ND_MASK|U_GC_NL_MASK| 369 U_GC_L_MASK| 370 U_GC_SC_MASK|U_GC_PC_MASK| 371 U_GC_MC_MASK|U_GC_MN_MASK) 372 )!=0 || 373 u_isIDIgnorable(c)); 374 } 375 376 U_CAPI int32_t U_EXPORT2 377 u_charDigitValue(UChar32 c) { 378 uint32_t props; 379 int32_t value; 380 GET_PROPS(c, props); 381 value=(int32_t)GET_NUMERIC_TYPE_VALUE(props)-UPROPS_NTV_DECIMAL_START; 382 if(value<=9) { 383 return value; 384 } else { 385 return -1; 386 } 387 } 388 389 U_CAPI double U_EXPORT2 390 u_getNumericValue(UChar32 c) { 391 uint32_t props; 392 int32_t ntv; 393 GET_PROPS(c, props); 394 ntv=(int32_t)GET_NUMERIC_TYPE_VALUE(props); 395 396 if(ntv==UPROPS_NTV_NONE) { 397 return U_NO_NUMERIC_VALUE; 398 } else if(ntv<UPROPS_NTV_DIGIT_START) { 399 /* decimal digit */ 400 return ntv-UPROPS_NTV_DECIMAL_START; 401 } else if(ntv<UPROPS_NTV_NUMERIC_START) { 402 /* other digit */ 403 return ntv-UPROPS_NTV_DIGIT_START; 404 } else if(ntv<UPROPS_NTV_FRACTION_START) { 405 /* small integer */ 406 return ntv-UPROPS_NTV_NUMERIC_START; 407 } else if(ntv<UPROPS_NTV_LARGE_START) { 408 /* fraction */ 409 int32_t numerator=(ntv>>4)-12; 410 int32_t denominator=(ntv&0xf)+1; 411 return (double)numerator/denominator; 412 } else if(ntv<UPROPS_NTV_BASE60_START) { 413 /* large, single-significant-digit integer */ 414 double numValue; 415 int32_t mant=(ntv>>5)-14; 416 int32_t exp=(ntv&0x1f)+2; 417 numValue=mant; 418 419 /* multiply by 10^exp without math.h */ 420 while(exp>=4) { 421 numValue*=10000.; 422 exp-=4; 423 } 424 switch(exp) { 425 case 3: 426 numValue*=1000.; 427 break; 428 case 2: 429 numValue*=100.; 430 break; 431 case 1: 432 numValue*=10.; 433 break; 434 case 0: 435 default: 436 break; 437 } 438 439 return numValue; 440 } else if(ntv<UPROPS_NTV_FRACTION20_START) { 441 /* sexagesimal (base 60) integer */ 442 int32_t numValue=(ntv>>2)-0xbf; 443 int32_t exp=(ntv&3)+1; 444 445 switch(exp) { 446 case 4: 447 numValue*=60*60*60*60; 448 break; 449 case 3: 450 numValue*=60*60*60; 451 break; 452 case 2: 453 numValue*=60*60; 454 break; 455 case 1: 456 numValue*=60; 457 break; 458 case 0: 459 default: 460 break; 461 } 462 463 return numValue; 464 } else if(ntv<UPROPS_NTV_RESERVED_START) { 465 // fraction-20 e.g. 3/80 466 int32_t frac20=ntv-UPROPS_NTV_FRACTION20_START; // 0..0x17 467 int32_t numerator=2*(frac20&3)+1; 468 int32_t denominator=20<<(frac20>>2); 469 return (double)numerator/denominator; 470 } else { 471 /* reserved */ 472 return U_NO_NUMERIC_VALUE; 473 } 474 } 475 476 U_CAPI int32_t U_EXPORT2 477 u_digit(UChar32 ch, int8_t radix) { 478 int8_t value; 479 if((uint8_t)(radix-2)<=(36-2)) { 480 value=(int8_t)u_charDigitValue(ch); 481 if(value<0) { 482 /* ch is not a decimal digit, try latin letters */ 483 if(ch>=0x61 && ch<=0x7A) { 484 value=(int8_t)(ch-0x57); /* ch - 'a' + 10 */ 485 } else if(ch>=0x41 && ch<=0x5A) { 486 value=(int8_t)(ch-0x37); /* ch - 'A' + 10 */ 487 } else if(ch>=0xFF41 && ch<=0xFF5A) { 488 value=(int8_t)(ch-0xFF37); /* fullwidth ASCII a-z */ 489 } else if(ch>=0xFF21 && ch<=0xFF3A) { 490 value=(int8_t)(ch-0xFF17); /* fullwidth ASCII A-Z */ 491 } 492 } 493 } else { 494 value=-1; /* invalid radix */ 495 } 496 return (int8_t)((value<radix) ? value : -1); 497 } 498 499 U_CAPI UChar32 U_EXPORT2 500 u_forDigit(int32_t digit, int8_t radix) { 501 if((uint8_t)(radix-2)>(36-2) || (uint32_t)digit>=(uint32_t)radix) { 502 return 0; 503 } else if(digit<10) { 504 return (UChar32)(0x30+digit); 505 } else { 506 return (UChar32)((0x61-10)+digit); 507 } 508 } 509 510 /* miscellaneous, and support for uprops.cpp -------------------------------- */ 511 512 U_CAPI void U_EXPORT2 513 u_getUnicodeVersion(UVersionInfo versionArray) { 514 if(versionArray!=NULL) { 515 uprv_memcpy(versionArray, dataVersion, U_MAX_VERSION_LENGTH); 516 } 517 } 518 519 U_CFUNC uint32_t 520 u_getMainProperties(UChar32 c) { 521 uint32_t props; 522 GET_PROPS(c, props); 523 return props; 524 } 525 526 U_CFUNC uint32_t 527 u_getUnicodeProperties(UChar32 c, int32_t column) { 528 U_ASSERT(column>=0); 529 if(column>=propsVectorsColumns) { 530 return 0; 531 } else { 532 uint16_t vecIndex=UTRIE2_GET16(&propsVectorsTrie, c); 533 return propsVectors[vecIndex+column]; 534 } 535 } 536 537 U_CFUNC int32_t 538 uprv_getMaxValues(int32_t column) { 539 switch(column) { 540 case 0: 541 return indexes[UPROPS_MAX_VALUES_INDEX]; 542 case 2: 543 return indexes[UPROPS_MAX_VALUES_2_INDEX]; 544 default: 545 return 0; 546 } 547 } 548 549 U_CAPI void U_EXPORT2 550 u_charAge(UChar32 c, UVersionInfo versionArray) { 551 if(versionArray!=NULL) { 552 uint32_t version=u_getUnicodeProperties(c, 0)>>UPROPS_AGE_SHIFT; 553 versionArray[0]=(uint8_t)(version>>4); 554 versionArray[1]=(uint8_t)(version&0xf); 555 versionArray[2]=versionArray[3]=0; 556 } 557 } 558 559 U_CAPI UScriptCode U_EXPORT2 560 uscript_getScript(UChar32 c, UErrorCode *pErrorCode) { 561 uint32_t scriptX; 562 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 563 return USCRIPT_INVALID_CODE; 564 } 565 if((uint32_t)c>0x10ffff) { 566 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 567 return USCRIPT_INVALID_CODE; 568 } 569 scriptX=u_getUnicodeProperties(c, 0)&UPROPS_SCRIPT_X_MASK; 570 if(scriptX<UPROPS_SCRIPT_X_WITH_COMMON) { 571 return (UScriptCode)scriptX; 572 } else if(scriptX<UPROPS_SCRIPT_X_WITH_INHERITED) { 573 return USCRIPT_COMMON; 574 } else if(scriptX<UPROPS_SCRIPT_X_WITH_OTHER) { 575 return USCRIPT_INHERITED; 576 } else { 577 return (UScriptCode)scriptExtensions[scriptX&UPROPS_SCRIPT_MASK]; 578 } 579 } 580 581 U_CAPI UBool U_EXPORT2 582 uscript_hasScript(UChar32 c, UScriptCode sc) { 583 const uint16_t *scx; 584 uint32_t scriptX=u_getUnicodeProperties(c, 0)&UPROPS_SCRIPT_X_MASK; 585 if(scriptX<UPROPS_SCRIPT_X_WITH_COMMON) { 586 return sc==(UScriptCode)scriptX; 587 } 588 589 scx=scriptExtensions+(scriptX&UPROPS_SCRIPT_MASK); 590 if(scriptX>=UPROPS_SCRIPT_X_WITH_OTHER) { 591 scx=scriptExtensions+scx[1]; 592 } 593 if(sc>=USCRIPT_CODE_LIMIT) { 594 /* Guard against bogus input that would make us go past the Script_Extensions terminator. */ 595 return FALSE; 596 } 597 while(sc>*scx) { 598 ++scx; 599 } 600 return sc==(*scx&0x7fff); 601 } 602 603 U_CAPI int32_t U_EXPORT2 604 uscript_getScriptExtensions(UChar32 c, 605 UScriptCode *scripts, int32_t capacity, 606 UErrorCode *pErrorCode) { 607 uint32_t scriptX; 608 int32_t length; 609 const uint16_t *scx; 610 uint16_t sx; 611 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 612 return 0; 613 } 614 if(capacity<0 || (capacity>0 && scripts==NULL)) { 615 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 616 return 0; 617 } 618 scriptX=u_getUnicodeProperties(c, 0)&UPROPS_SCRIPT_X_MASK; 619 if(scriptX<UPROPS_SCRIPT_X_WITH_COMMON) { 620 if(capacity==0) { 621 *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 622 } else { 623 scripts[0]=(UScriptCode)scriptX; 624 } 625 return 1; 626 } 627 628 scx=scriptExtensions+(scriptX&UPROPS_SCRIPT_MASK); 629 if(scriptX>=UPROPS_SCRIPT_X_WITH_OTHER) { 630 scx=scriptExtensions+scx[1]; 631 } 632 length=0; 633 do { 634 sx=*scx++; 635 if(length<capacity) { 636 scripts[length]=(UScriptCode)(sx&0x7fff); 637 } 638 ++length; 639 } while(sx<0x8000); 640 if(length>capacity) { 641 *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 642 } 643 return length; 644 } 645 646 U_CAPI UBlockCode U_EXPORT2 647 ublock_getCode(UChar32 c) { 648 return (UBlockCode)((u_getUnicodeProperties(c, 0)&UPROPS_BLOCK_MASK)>>UPROPS_BLOCK_SHIFT); 649 } 650 651 /* property starts for UnicodeSet ------------------------------------------- */ 652 653 static UBool U_CALLCONV 654 _enumPropertyStartsRange(const void *context, UChar32 start, UChar32 end, uint32_t value) { 655 /* add the start code point to the USet */ 656 const USetAdder *sa=(const USetAdder *)context; 657 sa->add(sa->set, start); 658 (void)end; 659 (void)value; 660 return TRUE; 661 } 662 663 #define USET_ADD_CP_AND_NEXT(sa, cp) sa->add(sa->set, cp); sa->add(sa->set, cp+1) 664 665 U_CFUNC void U_EXPORT2 666 uchar_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) { 667 if(U_FAILURE(*pErrorCode)) { 668 return; 669 } 670 671 /* add the start code point of each same-value range of the main trie */ 672 utrie2_enum(&propsTrie, NULL, _enumPropertyStartsRange, sa); 673 674 /* add code points with hardcoded properties, plus the ones following them */ 675 676 /* add for u_isblank() */ 677 USET_ADD_CP_AND_NEXT(sa, TAB); 678 679 /* add for IS_THAT_CONTROL_SPACE() */ 680 sa->add(sa->set, CR+1); /* range TAB..CR */ 681 sa->add(sa->set, 0x1c); 682 sa->add(sa->set, 0x1f+1); 683 USET_ADD_CP_AND_NEXT(sa, NL); 684 685 /* add for u_isIDIgnorable() what was not added above */ 686 sa->add(sa->set, DEL); /* range DEL..NBSP-1, NBSP added below */ 687 sa->add(sa->set, HAIRSP); 688 sa->add(sa->set, RLM+1); 689 sa->add(sa->set, INHSWAP); 690 sa->add(sa->set, NOMDIG+1); 691 USET_ADD_CP_AND_NEXT(sa, ZWNBSP); 692 693 /* add no-break spaces for u_isWhitespace() what was not added above */ 694 USET_ADD_CP_AND_NEXT(sa, NBSP); 695 USET_ADD_CP_AND_NEXT(sa, FIGURESP); 696 USET_ADD_CP_AND_NEXT(sa, NNBSP); 697 698 /* add for u_digit() */ 699 sa->add(sa->set, U_a); 700 sa->add(sa->set, U_z+1); 701 sa->add(sa->set, U_A); 702 sa->add(sa->set, U_Z+1); 703 sa->add(sa->set, U_FW_a); 704 sa->add(sa->set, U_FW_z+1); 705 sa->add(sa->set, U_FW_A); 706 sa->add(sa->set, U_FW_Z+1); 707 708 /* add for u_isxdigit() */ 709 sa->add(sa->set, U_f+1); 710 sa->add(sa->set, U_F+1); 711 sa->add(sa->set, U_FW_f+1); 712 sa->add(sa->set, U_FW_F+1); 713 714 /* add for UCHAR_DEFAULT_IGNORABLE_CODE_POINT what was not added above */ 715 sa->add(sa->set, WJ); /* range WJ..NOMDIG */ 716 sa->add(sa->set, 0xfff0); 717 sa->add(sa->set, 0xfffb+1); 718 sa->add(sa->set, 0xe0000); 719 sa->add(sa->set, 0xe0fff+1); 720 721 /* add for UCHAR_GRAPHEME_BASE and others */ 722 USET_ADD_CP_AND_NEXT(sa, CGJ); 723 } 724 725 U_CFUNC void U_EXPORT2 726 upropsvec_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) { 727 if(U_FAILURE(*pErrorCode)) { 728 return; 729 } 730 731 /* add the start code point of each same-value range of the properties vectors trie */ 732 utrie2_enum(&propsVectorsTrie, NULL, _enumPropertyStartsRange, sa); 733 } 734