1 /* 2 ****************************************************************************** 3 * 4 * Copyright (C) 1999-2009, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ****************************************************************************** 8 * file name: unames.c 9 * encoding: US-ASCII 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 1999oct04 14 * created by: Markus W. Scherer 15 */ 16 17 #include "unicode/utypes.h" 18 #include "unicode/putil.h" 19 #include "unicode/uchar.h" 20 #include "unicode/udata.h" 21 #include "ustr_imp.h" 22 #include "umutex.h" 23 #include "cmemory.h" 24 #include "cstring.h" 25 #include "ucln_cmn.h" 26 #include "udataswp.h" 27 #include "uprops.h" 28 29 /* prototypes ------------------------------------------------------------- */ 30 31 #define LENGTHOF(array) (sizeof(array)/sizeof((array)[0])) 32 33 static const char DATA_NAME[] = "unames"; 34 static const char DATA_TYPE[] = "icu"; 35 36 #define GROUP_SHIFT 5 37 #define LINES_PER_GROUP (1UL<<GROUP_SHIFT) 38 #define GROUP_MASK (LINES_PER_GROUP-1) 39 40 /* 41 * This struct was replaced by explicitly accessing equivalent 42 * fields from triples of uint16_t. 43 * The Group struct was padded to 8 bytes on compilers for early ARM CPUs, 44 * which broke the assumption that sizeof(Group)==6 and that the ++ operator 45 * would advance by 6 bytes (3 uint16_t). 46 * 47 * We can't just change the data structure because it's loaded from a data file, 48 * and we don't want to make it less compact, so we changed the access code. 49 * 50 * For details see ICU tickets 6331 and 6008. 51 typedef struct { 52 uint16_t groupMSB, 53 offsetHigh, offsetLow; /* avoid padding * / 54 } Group; 55 */ 56 enum { 57 GROUP_MSB, 58 GROUP_OFFSET_HIGH, 59 GROUP_OFFSET_LOW, 60 GROUP_LENGTH 61 }; 62 63 /* 64 * Get the 32-bit group offset. 65 * @param group (const uint16_t *) pointer to a Group triple of uint16_t 66 * @return group offset (int32_t) 67 */ 68 #define GET_GROUP_OFFSET(group) ((int32_t)(group)[GROUP_OFFSET_HIGH]<<16|(group)[GROUP_OFFSET_LOW]) 69 70 #define NEXT_GROUP(group) ((group)+GROUP_LENGTH) 71 #define PREV_GROUP(group) ((group)-GROUP_LENGTH) 72 73 typedef struct { 74 uint32_t start, end; 75 uint8_t type, variant; 76 uint16_t size; 77 } AlgorithmicRange; 78 79 typedef struct { 80 uint32_t tokenStringOffset, groupsOffset, groupStringOffset, algNamesOffset; 81 } UCharNames; 82 83 /* 84 * Get the groups table from a UCharNames struct. 85 * The groups table consists of one uint16_t groupCount followed by 86 * groupCount groups. Each group is a triple of uint16_t, see GROUP_LENGTH 87 * and the comment for the old struct Group above. 88 * 89 * @param names (const UCharNames *) pointer to the UCharNames indexes 90 * @return (const uint16_t *) pointer to the groups table 91 */ 92 #define GET_GROUPS(names) (const uint16_t *)((const char *)names+names->groupsOffset) 93 94 typedef struct { 95 const char *otherName; 96 UChar32 code; 97 } FindName; 98 99 #define DO_FIND_NAME NULL 100 101 static UDataMemory *uCharNamesData=NULL; 102 static UCharNames *uCharNames=NULL; 103 static UErrorCode gLoadErrorCode=U_ZERO_ERROR; 104 105 /* 106 * Maximum length of character names (regular & 1.0). 107 */ 108 static int32_t gMaxNameLength=0; 109 110 /* 111 * Set of chars used in character names (regular & 1.0). 112 * Chars are platform-dependent (can be EBCDIC). 113 */ 114 static uint32_t gNameSet[8]={ 0 }; 115 116 #define U_NONCHARACTER_CODE_POINT U_CHAR_CATEGORY_COUNT 117 #define U_LEAD_SURROGATE U_CHAR_CATEGORY_COUNT + 1 118 #define U_TRAIL_SURROGATE U_CHAR_CATEGORY_COUNT + 2 119 120 #define U_CHAR_EXTENDED_CATEGORY_COUNT (U_CHAR_CATEGORY_COUNT + 3) 121 122 static const char * const charCatNames[U_CHAR_EXTENDED_CATEGORY_COUNT] = { 123 "unassigned", 124 "uppercase letter", 125 "lowercase letter", 126 "titlecase letter", 127 "modifier letter", 128 "other letter", 129 "non spacing mark", 130 "enclosing mark", 131 "combining spacing mark", 132 "decimal digit number", 133 "letter number", 134 "other number", 135 "space separator", 136 "line separator", 137 "paragraph separator", 138 "control", 139 "format", 140 "private use area", 141 "surrogate", 142 "dash punctuation", 143 "start punctuation", 144 "end punctuation", 145 "connector punctuation", 146 "other punctuation", 147 "math symbol", 148 "currency symbol", 149 "modifier symbol", 150 "other symbol", 151 "initial punctuation", 152 "final punctuation", 153 "noncharacter", 154 "lead surrogate", 155 "trail surrogate" 156 }; 157 158 /* implementation ----------------------------------------------------------- */ 159 160 static UBool U_CALLCONV unames_cleanup(void) 161 { 162 if(uCharNamesData) { 163 udata_close(uCharNamesData); 164 uCharNamesData = NULL; 165 } 166 if(uCharNames) { 167 uCharNames = NULL; 168 } 169 gMaxNameLength=0; 170 return TRUE; 171 } 172 173 static UBool U_CALLCONV 174 isAcceptable(void *context, 175 const char *type, const char *name, 176 const UDataInfo *pInfo) { 177 return (UBool)( 178 pInfo->size>=20 && 179 pInfo->isBigEndian==U_IS_BIG_ENDIAN && 180 pInfo->charsetFamily==U_CHARSET_FAMILY && 181 pInfo->dataFormat[0]==0x75 && /* dataFormat="unam" */ 182 pInfo->dataFormat[1]==0x6e && 183 pInfo->dataFormat[2]==0x61 && 184 pInfo->dataFormat[3]==0x6d && 185 pInfo->formatVersion[0]==1); 186 } 187 188 static UBool 189 isDataLoaded(UErrorCode *pErrorCode) { 190 /* load UCharNames from file if necessary */ 191 UBool isCached; 192 193 /* do this because double-checked locking is broken */ 194 UMTX_CHECK(NULL, (uCharNames!=NULL), isCached); 195 196 if(!isCached) { 197 UCharNames *names; 198 UDataMemory *data; 199 200 /* check error code from previous attempt */ 201 if(U_FAILURE(gLoadErrorCode)) { 202 *pErrorCode=gLoadErrorCode; 203 return FALSE; 204 } 205 206 /* open the data outside the mutex block */ 207 data=udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, pErrorCode); 208 if(U_FAILURE(*pErrorCode)) { 209 gLoadErrorCode=*pErrorCode; 210 return FALSE; 211 } 212 213 names=(UCharNames *)udata_getMemory(data); 214 215 /* in the mutex block, set the data for this process */ 216 { 217 umtx_lock(NULL); 218 if(uCharNames==NULL) { 219 uCharNamesData=data; 220 uCharNames=names; 221 data=NULL; 222 names=NULL; 223 ucln_common_registerCleanup(UCLN_COMMON_UNAMES, unames_cleanup); 224 } 225 umtx_unlock(NULL); 226 } 227 228 /* if a different thread set it first, then close the extra data */ 229 if(data!=NULL) { 230 udata_close(data); /* NULL if it was set correctly */ 231 } 232 } 233 return TRUE; 234 } 235 236 #define WRITE_CHAR(buffer, bufferLength, bufferPos, c) { \ 237 if((bufferLength)>0) { \ 238 *(buffer)++=c; \ 239 --(bufferLength); \ 240 } \ 241 ++(bufferPos); \ 242 } 243 244 #define U_ISO_COMMENT U_CHAR_NAME_CHOICE_COUNT 245 246 /* 247 * Important: expandName() and compareName() are almost the same - 248 * apply fixes to both. 249 * 250 * UnicodeData.txt uses ';' as a field separator, so no 251 * field can contain ';' as part of its contents. 252 * In unames.dat, it is marked as token[';']==-1 only if the 253 * semicolon is used in the data file - which is iff we 254 * have Unicode 1.0 names or ISO comments. 255 * So, it will be token[';']==-1 if we store U1.0 names/ISO comments 256 * although we know that it will never be part of a name. 257 */ 258 static uint16_t 259 expandName(UCharNames *names, 260 const uint8_t *name, uint16_t nameLength, UCharNameChoice nameChoice, 261 char *buffer, uint16_t bufferLength) { 262 uint16_t *tokens=(uint16_t *)names+8; 263 uint16_t token, tokenCount=*tokens++, bufferPos=0; 264 uint8_t *tokenStrings=(uint8_t *)names+names->tokenStringOffset; 265 uint8_t c; 266 267 if(nameChoice==U_UNICODE_10_CHAR_NAME || nameChoice==U_ISO_COMMENT) { 268 /* 269 * skip the modern name if it is not requested _and_ 270 * if the semicolon byte value is a character, not a token number 271 */ 272 if((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) { 273 while(nameLength>0) { 274 --nameLength; 275 if(*name++==';') { 276 break; 277 } 278 } 279 if(nameChoice==U_ISO_COMMENT) { 280 /* skip the Unicode 1.0 name as well to get the ISO comment */ 281 while(nameLength>0) { 282 --nameLength; 283 if(*name++==';') { 284 break; 285 } 286 } 287 } 288 } else { 289 /* 290 * the semicolon byte value is a token number, therefore 291 * only modern names are stored in unames.dat and there is no 292 * such requested Unicode 1.0 name here 293 */ 294 nameLength=0; 295 } 296 } 297 298 /* write each letter directly, and write a token word per token */ 299 while(nameLength>0) { 300 --nameLength; 301 c=*name++; 302 303 if(c>=tokenCount) { 304 if(c!=';') { 305 /* implicit letter */ 306 WRITE_CHAR(buffer, bufferLength, bufferPos, c); 307 } else { 308 /* finished */ 309 break; 310 } 311 } else { 312 token=tokens[c]; 313 if(token==(uint16_t)(-2)) { 314 /* this is a lead byte for a double-byte token */ 315 token=tokens[c<<8|*name++]; 316 --nameLength; 317 } 318 if(token==(uint16_t)(-1)) { 319 if(c!=';') { 320 /* explicit letter */ 321 WRITE_CHAR(buffer, bufferLength, bufferPos, c); 322 } else { 323 /* stop, but skip the semicolon if we are seeking 324 extended names and there was no 2.0 name but there 325 is a 1.0 name. */ 326 if(!bufferPos && nameChoice == U_EXTENDED_CHAR_NAME) { 327 if ((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) { 328 continue; 329 } 330 } 331 /* finished */ 332 break; 333 } 334 } else { 335 /* write token word */ 336 uint8_t *tokenString=tokenStrings+token; 337 while((c=*tokenString++)!=0) { 338 WRITE_CHAR(buffer, bufferLength, bufferPos, c); 339 } 340 } 341 } 342 } 343 344 /* zero-terminate */ 345 if(bufferLength>0) { 346 *buffer=0; 347 } 348 349 return bufferPos; 350 } 351 352 /* 353 * compareName() is almost the same as expandName() except that it compares 354 * the currently expanded name to an input name. 355 * It returns the match/no match result as soon as possible. 356 */ 357 static UBool 358 compareName(UCharNames *names, 359 const uint8_t *name, uint16_t nameLength, UCharNameChoice nameChoice, 360 const char *otherName) { 361 uint16_t *tokens=(uint16_t *)names+8; 362 uint16_t token, tokenCount=*tokens++; 363 uint8_t *tokenStrings=(uint8_t *)names+names->tokenStringOffset; 364 uint8_t c; 365 const char *origOtherName = otherName; 366 367 if(nameChoice==U_UNICODE_10_CHAR_NAME) { 368 /* 369 * skip the modern name if it is not requested _and_ 370 * if the semicolon byte value is a character, not a token number 371 */ 372 if((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) { 373 while(nameLength>0) { 374 --nameLength; 375 if(*name++==';') { 376 break; 377 } 378 } 379 } else { 380 /* 381 * the semicolon byte value is a token number, therefore 382 * only modern names are stored in unames.dat and there is no 383 * such requested Unicode 1.0 name here 384 */ 385 nameLength=0; 386 } 387 } 388 389 /* compare each letter directly, and compare a token word per token */ 390 while(nameLength>0) { 391 --nameLength; 392 c=*name++; 393 394 if(c>=tokenCount) { 395 if(c!=';') { 396 /* implicit letter */ 397 if((char)c!=*otherName++) { 398 return FALSE; 399 } 400 } else { 401 /* finished */ 402 break; 403 } 404 } else { 405 token=tokens[c]; 406 if(token==(uint16_t)(-2)) { 407 /* this is a lead byte for a double-byte token */ 408 token=tokens[c<<8|*name++]; 409 --nameLength; 410 } 411 if(token==(uint16_t)(-1)) { 412 if(c!=';') { 413 /* explicit letter */ 414 if((char)c!=*otherName++) { 415 return FALSE; 416 } 417 } else { 418 /* stop, but skip the semicolon if we are seeking 419 extended names and there was no 2.0 name but there 420 is a 1.0 name. */ 421 if(otherName == origOtherName && nameChoice == U_EXTENDED_CHAR_NAME) { 422 if ((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) { 423 continue; 424 } 425 } 426 /* finished */ 427 break; 428 } 429 } else { 430 /* write token word */ 431 uint8_t *tokenString=tokenStrings+token; 432 while((c=*tokenString++)!=0) { 433 if((char)c!=*otherName++) { 434 return FALSE; 435 } 436 } 437 } 438 } 439 } 440 441 /* complete match? */ 442 return (UBool)(*otherName==0); 443 } 444 445 static uint8_t getCharCat(UChar32 cp) { 446 uint8_t cat; 447 448 if (UTF_IS_UNICODE_NONCHAR(cp)) { 449 return U_NONCHARACTER_CODE_POINT; 450 } 451 452 if ((cat = u_charType(cp)) == U_SURROGATE) { 453 cat = UTF_IS_LEAD(cp) ? U_LEAD_SURROGATE : U_TRAIL_SURROGATE; 454 } 455 456 return cat; 457 } 458 459 static const char *getCharCatName(UChar32 cp) { 460 uint8_t cat = getCharCat(cp); 461 462 /* Return unknown if the table of names above is not up to 463 date. */ 464 465 if (cat >= LENGTHOF(charCatNames)) { 466 return "unknown"; 467 } else { 468 return charCatNames[cat]; 469 } 470 } 471 472 static uint16_t getExtName(uint32_t code, char *buffer, uint16_t bufferLength) { 473 const char *catname = getCharCatName(code); 474 uint16_t length = 0; 475 476 UChar32 cp; 477 int ndigits, i; 478 479 WRITE_CHAR(buffer, bufferLength, length, '<'); 480 while (catname[length - 1]) { 481 WRITE_CHAR(buffer, bufferLength, length, catname[length - 1]); 482 } 483 WRITE_CHAR(buffer, bufferLength, length, '-'); 484 for (cp = code, ndigits = 0; cp; ++ndigits, cp >>= 4) 485 ; 486 if (ndigits < 4) 487 ndigits = 4; 488 for (cp = code, i = ndigits; (cp || i > 0) && bufferLength; cp >>= 4, bufferLength--) { 489 uint8_t v = (uint8_t)(cp & 0xf); 490 buffer[--i] = (v < 10 ? '0' + v : 'A' + v - 10); 491 } 492 buffer += ndigits; 493 length += ndigits; 494 WRITE_CHAR(buffer, bufferLength, length, '>'); 495 496 return length; 497 } 498 499 /* 500 * getGroup() does a binary search for the group that contains the 501 * Unicode code point "code". 502 * The return value is always a valid Group* that may contain "code" 503 * or else is the highest group before "code". 504 * If the lowest group is after "code", then that one is returned. 505 */ 506 static const uint16_t * 507 getGroup(UCharNames *names, uint32_t code) { 508 const uint16_t *groups=GET_GROUPS(names); 509 uint16_t groupMSB=(uint16_t)(code>>GROUP_SHIFT), 510 start=0, 511 limit=*groups++, 512 number; 513 514 /* binary search for the group of names that contains the one for code */ 515 while(start<limit-1) { 516 number=(uint16_t)((start+limit)/2); 517 if(groupMSB<groups[number*GROUP_LENGTH+GROUP_MSB]) { 518 limit=number; 519 } else { 520 start=number; 521 } 522 } 523 524 /* return this regardless of whether it is an exact match */ 525 return groups+start*GROUP_LENGTH; 526 } 527 528 /* 529 * expandGroupLengths() reads a block of compressed lengths of 32 strings and 530 * expands them into offsets and lengths for each string. 531 * Lengths are stored with a variable-width encoding in consecutive nibbles: 532 * If a nibble<0xc, then it is the length itself (0=empty string). 533 * If a nibble>=0xc, then it forms a length value with the following nibble. 534 * Calculation see below. 535 * The offsets and lengths arrays must be at least 33 (one more) long because 536 * there is no check here at the end if the last nibble is still used. 537 */ 538 static const uint8_t * 539 expandGroupLengths(const uint8_t *s, 540 uint16_t offsets[LINES_PER_GROUP+1], uint16_t lengths[LINES_PER_GROUP+1]) { 541 /* read the lengths of the 32 strings in this group and get each string's offset */ 542 uint16_t i=0, offset=0, length=0; 543 uint8_t lengthByte; 544 545 /* all 32 lengths must be read to get the offset of the first group string */ 546 while(i<LINES_PER_GROUP) { 547 lengthByte=*s++; 548 549 /* read even nibble - MSBs of lengthByte */ 550 if(length>=12) { 551 /* double-nibble length spread across two bytes */ 552 length=(uint16_t)(((length&0x3)<<4|lengthByte>>4)+12); 553 lengthByte&=0xf; 554 } else if((lengthByte /* &0xf0 */)>=0xc0) { 555 /* double-nibble length spread across this one byte */ 556 length=(uint16_t)((lengthByte&0x3f)+12); 557 } else { 558 /* single-nibble length in MSBs */ 559 length=(uint16_t)(lengthByte>>4); 560 lengthByte&=0xf; 561 } 562 563 *offsets++=offset; 564 *lengths++=length; 565 566 offset+=length; 567 ++i; 568 569 /* read odd nibble - LSBs of lengthByte */ 570 if((lengthByte&0xf0)==0) { 571 /* this nibble was not consumed for a double-nibble length above */ 572 length=lengthByte; 573 if(length<12) { 574 /* single-nibble length in LSBs */ 575 *offsets++=offset; 576 *lengths++=length; 577 578 offset+=length; 579 ++i; 580 } 581 } else { 582 length=0; /* prevent double-nibble detection in the next iteration */ 583 } 584 } 585 586 /* now, s is at the first group string */ 587 return s; 588 } 589 590 static uint16_t 591 expandGroupName(UCharNames *names, const uint16_t *group, 592 uint16_t lineNumber, UCharNameChoice nameChoice, 593 char *buffer, uint16_t bufferLength) { 594 uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2]; 595 const uint8_t *s=(uint8_t *)names+names->groupStringOffset+GET_GROUP_OFFSET(group); 596 s=expandGroupLengths(s, offsets, lengths); 597 return expandName(names, s+offsets[lineNumber], lengths[lineNumber], nameChoice, 598 buffer, bufferLength); 599 } 600 601 static uint16_t 602 getName(UCharNames *names, uint32_t code, UCharNameChoice nameChoice, 603 char *buffer, uint16_t bufferLength) { 604 const uint16_t *group=getGroup(names, code); 605 if((uint16_t)(code>>GROUP_SHIFT)==group[GROUP_MSB]) { 606 return expandGroupName(names, group, (uint16_t)(code&GROUP_MASK), nameChoice, 607 buffer, bufferLength); 608 } else { 609 /* group not found */ 610 /* zero-terminate */ 611 if(bufferLength>0) { 612 *buffer=0; 613 } 614 return 0; 615 } 616 } 617 618 /* 619 * enumGroupNames() enumerates all the names in a 32-group 620 * and either calls the enumerator function or finds a given input name. 621 */ 622 static UBool 623 enumGroupNames(UCharNames *names, const uint16_t *group, 624 UChar32 start, UChar32 end, 625 UEnumCharNamesFn *fn, void *context, 626 UCharNameChoice nameChoice) { 627 uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2]; 628 const uint8_t *s=(uint8_t *)names+names->groupStringOffset+GET_GROUP_OFFSET(group); 629 630 s=expandGroupLengths(s, offsets, lengths); 631 if(fn!=DO_FIND_NAME) { 632 char buffer[200]; 633 uint16_t length; 634 635 while(start<=end) { 636 length=expandName(names, s+offsets[start&GROUP_MASK], lengths[start&GROUP_MASK], nameChoice, buffer, sizeof(buffer)); 637 if (!length && nameChoice == U_EXTENDED_CHAR_NAME) { 638 buffer[length = getExtName(start, buffer, sizeof(buffer))] = 0; 639 } 640 /* here, we assume that the buffer is large enough */ 641 if(length>0) { 642 if(!fn(context, start, nameChoice, buffer, length)) { 643 return FALSE; 644 } 645 } 646 ++start; 647 } 648 } else { 649 const char *otherName=((FindName *)context)->otherName; 650 while(start<=end) { 651 if(compareName(names, s+offsets[start&GROUP_MASK], lengths[start&GROUP_MASK], nameChoice, otherName)) { 652 ((FindName *)context)->code=start; 653 return FALSE; 654 } 655 ++start; 656 } 657 } 658 return TRUE; 659 } 660 661 /* 662 * enumExtNames enumerate extended names. 663 * It only needs to do it if it is called with a real function and not 664 * with the dummy DO_FIND_NAME, because u_charFromName() does a check 665 * for extended names by itself. 666 */ 667 static UBool 668 enumExtNames(UChar32 start, UChar32 end, 669 UEnumCharNamesFn *fn, void *context) 670 { 671 if(fn!=DO_FIND_NAME) { 672 char buffer[200]; 673 uint16_t length; 674 675 while(start<=end) { 676 buffer[length = getExtName(start, buffer, sizeof(buffer))] = 0; 677 /* here, we assume that the buffer is large enough */ 678 if(length>0) { 679 if(!fn(context, start, U_EXTENDED_CHAR_NAME, buffer, length)) { 680 return FALSE; 681 } 682 } 683 ++start; 684 } 685 } 686 687 return TRUE; 688 } 689 690 static UBool 691 enumNames(UCharNames *names, 692 UChar32 start, UChar32 limit, 693 UEnumCharNamesFn *fn, void *context, 694 UCharNameChoice nameChoice) { 695 uint16_t startGroupMSB, endGroupMSB, groupCount; 696 const uint16_t *group, *groupLimit; 697 698 startGroupMSB=(uint16_t)(start>>GROUP_SHIFT); 699 endGroupMSB=(uint16_t)((limit-1)>>GROUP_SHIFT); 700 701 /* find the group that contains start, or the highest before it */ 702 group=getGroup(names, start); 703 704 if(startGroupMSB==endGroupMSB) { 705 if(startGroupMSB==group[GROUP_MSB]) { 706 /* if start and limit-1 are in the same group, then enumerate only in that one */ 707 return enumGroupNames(names, group, start, limit-1, fn, context, nameChoice); 708 } 709 } else { 710 const uint16_t *groups=GET_GROUPS(names); 711 groupCount=*groups++; 712 groupLimit=groups+groupCount*GROUP_LENGTH; 713 714 if(startGroupMSB==group[GROUP_MSB]) { 715 /* enumerate characters in the partial start group */ 716 if((start&GROUP_MASK)!=0) { 717 if(!enumGroupNames(names, group, 718 start, ((UChar32)startGroupMSB<<GROUP_SHIFT)+LINES_PER_GROUP-1, 719 fn, context, nameChoice)) { 720 return FALSE; 721 } 722 group=NEXT_GROUP(group); /* continue with the next group */ 723 } 724 } else if(startGroupMSB>group[GROUP_MSB]) { 725 /* make sure that we start enumerating with the first group after start */ 726 const uint16_t *nextGroup=NEXT_GROUP(group); 727 if (nextGroup < groupLimit && nextGroup[GROUP_MSB] > startGroupMSB && nameChoice == U_EXTENDED_CHAR_NAME) { 728 UChar32 end = nextGroup[GROUP_MSB] << GROUP_SHIFT; 729 if (end > limit) { 730 end = limit; 731 } 732 if (!enumExtNames(start, end - 1, fn, context)) { 733 return FALSE; 734 } 735 } 736 group=nextGroup; 737 } 738 739 /* enumerate entire groups between the start- and end-groups */ 740 while(group<groupLimit && group[GROUP_MSB]<endGroupMSB) { 741 const uint16_t *nextGroup; 742 start=(UChar32)group[GROUP_MSB]<<GROUP_SHIFT; 743 if(!enumGroupNames(names, group, start, start+LINES_PER_GROUP-1, fn, context, nameChoice)) { 744 return FALSE; 745 } 746 nextGroup=NEXT_GROUP(group); 747 if (nextGroup < groupLimit && nextGroup[GROUP_MSB] > group[GROUP_MSB] + 1 && nameChoice == U_EXTENDED_CHAR_NAME) { 748 UChar32 end = nextGroup[GROUP_MSB] << GROUP_SHIFT; 749 if (end > limit) { 750 end = limit; 751 } 752 if (!enumExtNames((group[GROUP_MSB] + 1) << GROUP_SHIFT, end - 1, fn, context)) { 753 return FALSE; 754 } 755 } 756 group=nextGroup; 757 } 758 759 /* enumerate within the end group (group[GROUP_MSB]==endGroupMSB) */ 760 if(group<groupLimit && group[GROUP_MSB]==endGroupMSB) { 761 return enumGroupNames(names, group, (limit-1)&~GROUP_MASK, limit-1, fn, context, nameChoice); 762 } else if (nameChoice == U_EXTENDED_CHAR_NAME && group == groupLimit) { 763 UChar32 next = (PREV_GROUP(group)[GROUP_MSB] + 1) << GROUP_SHIFT; 764 if (next > start) { 765 start = next; 766 } 767 } else { 768 return TRUE; 769 } 770 } 771 772 /* we have not found a group, which means everything is made of 773 extended names. */ 774 if (nameChoice == U_EXTENDED_CHAR_NAME) { 775 if (limit > UCHAR_MAX_VALUE + 1) { 776 limit = UCHAR_MAX_VALUE + 1; 777 } 778 return enumExtNames(start, limit - 1, fn, context); 779 } 780 781 return TRUE; 782 } 783 784 static uint16_t 785 writeFactorSuffix(const uint16_t *factors, uint16_t count, 786 const char *s, /* suffix elements */ 787 uint32_t code, 788 uint16_t indexes[8], /* output fields from here */ 789 const char *elementBases[8], const char *elements[8], 790 char *buffer, uint16_t bufferLength) { 791 uint16_t i, factor, bufferPos=0; 792 char c; 793 794 /* write elements according to the factors */ 795 796 /* 797 * the factorized elements are determined by modulo arithmetic 798 * with the factors of this algorithm 799 * 800 * note that for fewer operations, count is decremented here 801 */ 802 --count; 803 for(i=count; i>0; --i) { 804 factor=factors[i]; 805 indexes[i]=(uint16_t)(code%factor); 806 code/=factor; 807 } 808 /* 809 * we don't need to calculate the last modulus because start<=code<=end 810 * guarantees here that code<=factors[0] 811 */ 812 indexes[0]=(uint16_t)code; 813 814 /* write each element */ 815 for(;;) { 816 if(elementBases!=NULL) { 817 *elementBases++=s; 818 } 819 820 /* skip indexes[i] strings */ 821 factor=indexes[i]; 822 while(factor>0) { 823 while(*s++!=0) {} 824 --factor; 825 } 826 if(elements!=NULL) { 827 *elements++=s; 828 } 829 830 /* write element */ 831 while((c=*s++)!=0) { 832 WRITE_CHAR(buffer, bufferLength, bufferPos, c); 833 } 834 835 /* we do not need to perform the rest of this loop for i==count - break here */ 836 if(i>=count) { 837 break; 838 } 839 840 /* skip the rest of the strings for this factors[i] */ 841 factor=(uint16_t)(factors[i]-indexes[i]-1); 842 while(factor>0) { 843 while(*s++!=0) {} 844 --factor; 845 } 846 847 ++i; 848 } 849 850 /* zero-terminate */ 851 if(bufferLength>0) { 852 *buffer=0; 853 } 854 855 return bufferPos; 856 } 857 858 /* 859 * Important: 860 * Parts of findAlgName() are almost the same as some of getAlgName(). 861 * Fixes must be applied to both. 862 */ 863 static uint16_t 864 getAlgName(AlgorithmicRange *range, uint32_t code, UCharNameChoice nameChoice, 865 char *buffer, uint16_t bufferLength) { 866 uint16_t bufferPos=0; 867 868 /* 869 * Do not write algorithmic Unicode 1.0 names because 870 * Unihan names are the same as the modern ones, 871 * extension A was only introduced with Unicode 3.0, and 872 * the Hangul syllable block was moved and changed around Unicode 1.1.5. 873 */ 874 if(nameChoice==U_UNICODE_10_CHAR_NAME) { 875 /* zero-terminate */ 876 if(bufferLength>0) { 877 *buffer=0; 878 } 879 return 0; 880 } 881 882 switch(range->type) { 883 case 0: { 884 /* name = prefix hex-digits */ 885 const char *s=(const char *)(range+1); 886 char c; 887 888 uint16_t i, count; 889 890 /* copy prefix */ 891 while((c=*s++)!=0) { 892 WRITE_CHAR(buffer, bufferLength, bufferPos, c); 893 } 894 895 /* write hexadecimal code point value */ 896 count=range->variant; 897 898 /* zero-terminate */ 899 if(count<bufferLength) { 900 buffer[count]=0; 901 } 902 903 for(i=count; i>0;) { 904 if(--i<bufferLength) { 905 c=(char)(code&0xf); 906 if(c<10) { 907 c+='0'; 908 } else { 909 c+='A'-10; 910 } 911 buffer[i]=c; 912 } 913 code>>=4; 914 } 915 916 bufferPos+=count; 917 break; 918 } 919 case 1: { 920 /* name = prefix factorized-elements */ 921 uint16_t indexes[8]; 922 const uint16_t *factors=(const uint16_t *)(range+1); 923 uint16_t count=range->variant; 924 const char *s=(const char *)(factors+count); 925 char c; 926 927 /* copy prefix */ 928 while((c=*s++)!=0) { 929 WRITE_CHAR(buffer, bufferLength, bufferPos, c); 930 } 931 932 bufferPos+=writeFactorSuffix(factors, count, 933 s, code-range->start, indexes, NULL, NULL, buffer, bufferLength); 934 break; 935 } 936 default: 937 /* undefined type */ 938 /* zero-terminate */ 939 if(bufferLength>0) { 940 *buffer=0; 941 } 942 break; 943 } 944 945 return bufferPos; 946 } 947 948 /* 949 * Important: enumAlgNames() and findAlgName() are almost the same. 950 * Any fix must be applied to both. 951 */ 952 static UBool 953 enumAlgNames(AlgorithmicRange *range, 954 UChar32 start, UChar32 limit, 955 UEnumCharNamesFn *fn, void *context, 956 UCharNameChoice nameChoice) { 957 char buffer[200]; 958 uint16_t length; 959 960 if(nameChoice==U_UNICODE_10_CHAR_NAME) { 961 return TRUE; 962 } 963 964 switch(range->type) { 965 case 0: { 966 char *s, *end; 967 char c; 968 969 /* get the full name of the start character */ 970 length=getAlgName(range, (uint32_t)start, nameChoice, buffer, sizeof(buffer)); 971 if(length<=0) { 972 return TRUE; 973 } 974 975 /* call the enumerator function with this first character */ 976 if(!fn(context, start, nameChoice, buffer, length)) { 977 return FALSE; 978 } 979 980 /* go to the end of the name; all these names have the same length */ 981 end=buffer; 982 while(*end!=0) { 983 ++end; 984 } 985 986 /* enumerate the rest of the names */ 987 while(++start<limit) { 988 /* increment the hexadecimal number on a character-basis */ 989 s=end; 990 for (;;) { 991 c=*--s; 992 if(('0'<=c && c<'9') || ('A'<=c && c<'F')) { 993 *s=(char)(c+1); 994 break; 995 } else if(c=='9') { 996 *s='A'; 997 break; 998 } else if(c=='F') { 999 *s='0'; 1000 } 1001 } 1002 1003 if(!fn(context, start, nameChoice, buffer, length)) { 1004 return FALSE; 1005 } 1006 } 1007 break; 1008 } 1009 case 1: { 1010 uint16_t indexes[8]; 1011 const char *elementBases[8], *elements[8]; 1012 const uint16_t *factors=(const uint16_t *)(range+1); 1013 uint16_t count=range->variant; 1014 const char *s=(const char *)(factors+count); 1015 char *suffix, *t; 1016 uint16_t prefixLength, i, idx; 1017 1018 char c; 1019 1020 /* name = prefix factorized-elements */ 1021 1022 /* copy prefix */ 1023 suffix=buffer; 1024 prefixLength=0; 1025 while((c=*s++)!=0) { 1026 *suffix++=c; 1027 ++prefixLength; 1028 } 1029 1030 /* append the suffix of the start character */ 1031 length=(uint16_t)(prefixLength+writeFactorSuffix(factors, count, 1032 s, (uint32_t)start-range->start, 1033 indexes, elementBases, elements, 1034 suffix, (uint16_t)(sizeof(buffer)-prefixLength))); 1035 1036 /* call the enumerator function with this first character */ 1037 if(!fn(context, start, nameChoice, buffer, length)) { 1038 return FALSE; 1039 } 1040 1041 /* enumerate the rest of the names */ 1042 while(++start<limit) { 1043 /* increment the indexes in lexical order bound by the factors */ 1044 i=count; 1045 for (;;) { 1046 idx=(uint16_t)(indexes[--i]+1); 1047 if(idx<factors[i]) { 1048 /* skip one index and its element string */ 1049 indexes[i]=idx; 1050 s=elements[i]; 1051 while(*s++!=0) { 1052 } 1053 elements[i]=s; 1054 break; 1055 } else { 1056 /* reset this index to 0 and its element string to the first one */ 1057 indexes[i]=0; 1058 elements[i]=elementBases[i]; 1059 } 1060 } 1061 1062 /* to make matters a little easier, just append all elements to the suffix */ 1063 t=suffix; 1064 length=prefixLength; 1065 for(i=0; i<count; ++i) { 1066 s=elements[i]; 1067 while((c=*s++)!=0) { 1068 *t++=c; 1069 ++length; 1070 } 1071 } 1072 /* zero-terminate */ 1073 *t=0; 1074 1075 if(!fn(context, start, nameChoice, buffer, length)) { 1076 return FALSE; 1077 } 1078 } 1079 break; 1080 } 1081 default: 1082 /* undefined type */ 1083 break; 1084 } 1085 1086 return TRUE; 1087 } 1088 1089 /* 1090 * findAlgName() is almost the same as enumAlgNames() except that it 1091 * returns the code point for a name if it fits into the range. 1092 * It returns 0xffff otherwise. 1093 */ 1094 static UChar32 1095 findAlgName(AlgorithmicRange *range, UCharNameChoice nameChoice, const char *otherName) { 1096 UChar32 code; 1097 1098 if(nameChoice==U_UNICODE_10_CHAR_NAME) { 1099 return 0xffff; 1100 } 1101 1102 switch(range->type) { 1103 case 0: { 1104 /* name = prefix hex-digits */ 1105 const char *s=(const char *)(range+1); 1106 char c; 1107 1108 uint16_t i, count; 1109 1110 /* compare prefix */ 1111 while((c=*s++)!=0) { 1112 if((char)c!=*otherName++) { 1113 return 0xffff; 1114 } 1115 } 1116 1117 /* read hexadecimal code point value */ 1118 count=range->variant; 1119 code=0; 1120 for(i=0; i<count; ++i) { 1121 c=*otherName++; 1122 if('0'<=c && c<='9') { 1123 code=(code<<4)|(c-'0'); 1124 } else if('A'<=c && c<='F') { 1125 code=(code<<4)|(c-'A'+10); 1126 } else { 1127 return 0xffff; 1128 } 1129 } 1130 1131 /* does it fit into the range? */ 1132 if(*otherName==0 && range->start<=(uint32_t)code && (uint32_t)code<=range->end) { 1133 return code; 1134 } 1135 break; 1136 } 1137 case 1: { 1138 char buffer[64]; 1139 uint16_t indexes[8]; 1140 const char *elementBases[8], *elements[8]; 1141 const uint16_t *factors=(const uint16_t *)(range+1); 1142 uint16_t count=range->variant; 1143 const char *s=(const char *)(factors+count), *t; 1144 UChar32 start, limit; 1145 uint16_t i, idx; 1146 1147 char c; 1148 1149 /* name = prefix factorized-elements */ 1150 1151 /* compare prefix */ 1152 while((c=*s++)!=0) { 1153 if((char)c!=*otherName++) { 1154 return 0xffff; 1155 } 1156 } 1157 1158 start=(UChar32)range->start; 1159 limit=(UChar32)(range->end+1); 1160 1161 /* initialize the suffix elements for enumeration; indexes should all be set to 0 */ 1162 writeFactorSuffix(factors, count, s, 0, 1163 indexes, elementBases, elements, buffer, sizeof(buffer)); 1164 1165 /* compare the first suffix */ 1166 if(0==uprv_strcmp(otherName, buffer)) { 1167 return start; 1168 } 1169 1170 /* enumerate and compare the rest of the suffixes */ 1171 while(++start<limit) { 1172 /* increment the indexes in lexical order bound by the factors */ 1173 i=count; 1174 for (;;) { 1175 idx=(uint16_t)(indexes[--i]+1); 1176 if(idx<factors[i]) { 1177 /* skip one index and its element string */ 1178 indexes[i]=idx; 1179 s=elements[i]; 1180 while(*s++!=0) {} 1181 elements[i]=s; 1182 break; 1183 } else { 1184 /* reset this index to 0 and its element string to the first one */ 1185 indexes[i]=0; 1186 elements[i]=elementBases[i]; 1187 } 1188 } 1189 1190 /* to make matters a little easier, just compare all elements of the suffix */ 1191 t=otherName; 1192 for(i=0; i<count; ++i) { 1193 s=elements[i]; 1194 while((c=*s++)!=0) { 1195 if(c!=*t++) { 1196 s=""; /* does not match */ 1197 i=99; 1198 } 1199 } 1200 } 1201 if(i<99 && *t==0) { 1202 return start; 1203 } 1204 } 1205 break; 1206 } 1207 default: 1208 /* undefined type */ 1209 break; 1210 } 1211 1212 return 0xffff; 1213 } 1214 1215 /* sets of name characters, maximum name lengths ---------------------------- */ 1216 1217 #define SET_ADD(set, c) ((set)[(uint8_t)c>>5]|=((uint32_t)1<<((uint8_t)c&0x1f))) 1218 #define SET_CONTAINS(set, c) (((set)[(uint8_t)c>>5]&((uint32_t)1<<((uint8_t)c&0x1f)))!=0) 1219 1220 static int32_t 1221 calcStringSetLength(uint32_t set[8], const char *s) { 1222 int32_t length=0; 1223 char c; 1224 1225 while((c=*s++)!=0) { 1226 SET_ADD(set, c); 1227 ++length; 1228 } 1229 return length; 1230 } 1231 1232 static int32_t 1233 calcAlgNameSetsLengths(int32_t maxNameLength) { 1234 AlgorithmicRange *range; 1235 uint32_t *p; 1236 uint32_t rangeCount; 1237 int32_t length; 1238 1239 /* enumerate algorithmic ranges */ 1240 p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset); 1241 rangeCount=*p; 1242 range=(AlgorithmicRange *)(p+1); 1243 while(rangeCount>0) { 1244 switch(range->type) { 1245 case 0: 1246 /* name = prefix + (range->variant times) hex-digits */ 1247 /* prefix */ 1248 length=calcStringSetLength(gNameSet, (const char *)(range+1))+range->variant; 1249 if(length>maxNameLength) { 1250 maxNameLength=length; 1251 } 1252 break; 1253 case 1: { 1254 /* name = prefix factorized-elements */ 1255 const uint16_t *factors=(const uint16_t *)(range+1); 1256 const char *s; 1257 int32_t i, count=range->variant, factor, factorLength, maxFactorLength; 1258 1259 /* prefix length */ 1260 s=(const char *)(factors+count); 1261 length=calcStringSetLength(gNameSet, s); 1262 s+=length+1; /* start of factor suffixes */ 1263 1264 /* get the set and maximum factor suffix length for each factor */ 1265 for(i=0; i<count; ++i) { 1266 maxFactorLength=0; 1267 for(factor=factors[i]; factor>0; --factor) { 1268 factorLength=calcStringSetLength(gNameSet, s); 1269 s+=factorLength+1; 1270 if(factorLength>maxFactorLength) { 1271 maxFactorLength=factorLength; 1272 } 1273 } 1274 length+=maxFactorLength; 1275 } 1276 1277 if(length>maxNameLength) { 1278 maxNameLength=length; 1279 } 1280 break; 1281 } 1282 default: 1283 /* unknown type */ 1284 break; 1285 } 1286 1287 range=(AlgorithmicRange *)((uint8_t *)range+range->size); 1288 --rangeCount; 1289 } 1290 return maxNameLength; 1291 } 1292 1293 static int32_t 1294 calcExtNameSetsLengths(int32_t maxNameLength) { 1295 int32_t i, length; 1296 1297 for(i=0; i<LENGTHOF(charCatNames); ++i) { 1298 /* 1299 * for each category, count the length of the category name 1300 * plus 9= 1301 * 2 for <> 1302 * 1 for - 1303 * 6 for most hex digits per code point 1304 */ 1305 length=9+calcStringSetLength(gNameSet, charCatNames[i]); 1306 if(length>maxNameLength) { 1307 maxNameLength=length; 1308 } 1309 } 1310 return maxNameLength; 1311 } 1312 1313 static int32_t 1314 calcNameSetLength(const uint16_t *tokens, uint16_t tokenCount, const uint8_t *tokenStrings, int8_t *tokenLengths, 1315 uint32_t set[8], 1316 const uint8_t **pLine, const uint8_t *lineLimit) { 1317 const uint8_t *line=*pLine; 1318 int32_t length=0, tokenLength; 1319 uint16_t c, token; 1320 1321 while(line!=lineLimit && (c=*line++)!=(uint8_t)';') { 1322 if(c>=tokenCount) { 1323 /* implicit letter */ 1324 SET_ADD(set, c); 1325 ++length; 1326 } else { 1327 token=tokens[c]; 1328 if(token==(uint16_t)(-2)) { 1329 /* this is a lead byte for a double-byte token */ 1330 c=c<<8|*line++; 1331 token=tokens[c]; 1332 } 1333 if(token==(uint16_t)(-1)) { 1334 /* explicit letter */ 1335 SET_ADD(set, c); 1336 ++length; 1337 } else { 1338 /* count token word */ 1339 if(tokenLengths!=NULL) { 1340 /* use cached token length */ 1341 tokenLength=tokenLengths[c]; 1342 if(tokenLength==0) { 1343 tokenLength=calcStringSetLength(set, (const char *)tokenStrings+token); 1344 tokenLengths[c]=(int8_t)tokenLength; 1345 } 1346 } else { 1347 tokenLength=calcStringSetLength(set, (const char *)tokenStrings+token); 1348 } 1349 length+=tokenLength; 1350 } 1351 } 1352 } 1353 1354 *pLine=line; 1355 return length; 1356 } 1357 1358 static void 1359 calcGroupNameSetsLengths(int32_t maxNameLength) { 1360 uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2]; 1361 1362 uint16_t *tokens=(uint16_t *)uCharNames+8; 1363 uint16_t tokenCount=*tokens++; 1364 uint8_t *tokenStrings=(uint8_t *)uCharNames+uCharNames->tokenStringOffset; 1365 1366 int8_t *tokenLengths; 1367 1368 const uint16_t *group; 1369 const uint8_t *s, *line, *lineLimit; 1370 1371 int32_t groupCount, lineNumber, length; 1372 1373 tokenLengths=(int8_t *)uprv_malloc(tokenCount); 1374 if(tokenLengths!=NULL) { 1375 uprv_memset(tokenLengths, 0, tokenCount); 1376 } 1377 1378 group=GET_GROUPS(uCharNames); 1379 groupCount=*group++; 1380 1381 /* enumerate all groups */ 1382 while(groupCount>0) { 1383 s=(uint8_t *)uCharNames+uCharNames->groupStringOffset+GET_GROUP_OFFSET(group); 1384 s=expandGroupLengths(s, offsets, lengths); 1385 1386 /* enumerate all lines in each group */ 1387 for(lineNumber=0; lineNumber<LINES_PER_GROUP; ++lineNumber) { 1388 line=s+offsets[lineNumber]; 1389 length=lengths[lineNumber]; 1390 if(length==0) { 1391 continue; 1392 } 1393 1394 lineLimit=line+length; 1395 1396 /* read regular name */ 1397 length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gNameSet, &line, lineLimit); 1398 if(length>maxNameLength) { 1399 maxNameLength=length; 1400 } 1401 if(line==lineLimit) { 1402 continue; 1403 } 1404 1405 /* read Unicode 1.0 name */ 1406 length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gNameSet, &line, lineLimit); 1407 if(length>maxNameLength) { 1408 maxNameLength=length; 1409 } 1410 if(line==lineLimit) { 1411 continue; 1412 } 1413 1414 /* read ISO comment */ 1415 /*length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gISOCommentSet, &line, lineLimit);*/ 1416 } 1417 1418 group=NEXT_GROUP(group); 1419 --groupCount; 1420 } 1421 1422 if(tokenLengths!=NULL) { 1423 uprv_free(tokenLengths); 1424 } 1425 1426 /* set gMax... - name length last for threading */ 1427 gMaxNameLength=maxNameLength; 1428 } 1429 1430 static UBool 1431 calcNameSetsLengths(UErrorCode *pErrorCode) { 1432 static const char extChars[]="0123456789ABCDEF<>-"; 1433 int32_t i, maxNameLength; 1434 1435 if(gMaxNameLength!=0) { 1436 return TRUE; 1437 } 1438 1439 if(!isDataLoaded(pErrorCode)) { 1440 return FALSE; 1441 } 1442 1443 /* set hex digits, used in various names, and <>-, used in extended names */ 1444 for(i=0; i<sizeof(extChars)-1; ++i) { 1445 SET_ADD(gNameSet, extChars[i]); 1446 } 1447 1448 /* set sets and lengths from algorithmic names */ 1449 maxNameLength=calcAlgNameSetsLengths(0); 1450 1451 /* set sets and lengths from extended names */ 1452 maxNameLength=calcExtNameSetsLengths(maxNameLength); 1453 1454 /* set sets and lengths from group names, set global maximum values */ 1455 calcGroupNameSetsLengths(maxNameLength); 1456 1457 return TRUE; 1458 } 1459 1460 /* public API --------------------------------------------------------------- */ 1461 1462 U_CAPI int32_t U_EXPORT2 1463 u_charName(UChar32 code, UCharNameChoice nameChoice, 1464 char *buffer, int32_t bufferLength, 1465 UErrorCode *pErrorCode) { 1466 AlgorithmicRange *algRange; 1467 uint32_t *p; 1468 uint32_t i; 1469 int32_t length; 1470 1471 /* check the argument values */ 1472 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 1473 return 0; 1474 } else if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT || 1475 bufferLength<0 || (bufferLength>0 && buffer==NULL) 1476 ) { 1477 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1478 return 0; 1479 } 1480 1481 if((uint32_t)code>UCHAR_MAX_VALUE || !isDataLoaded(pErrorCode)) { 1482 return u_terminateChars(buffer, bufferLength, 0, pErrorCode); 1483 } 1484 1485 length=0; 1486 1487 /* try algorithmic names first */ 1488 p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset); 1489 i=*p; 1490 algRange=(AlgorithmicRange *)(p+1); 1491 while(i>0) { 1492 if(algRange->start<=(uint32_t)code && (uint32_t)code<=algRange->end) { 1493 length=getAlgName(algRange, (uint32_t)code, nameChoice, buffer, (uint16_t)bufferLength); 1494 break; 1495 } 1496 algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size); 1497 --i; 1498 } 1499 1500 if(i==0) { 1501 if (nameChoice == U_EXTENDED_CHAR_NAME) { 1502 length = getName(uCharNames, (uint32_t )code, U_EXTENDED_CHAR_NAME, buffer, (uint16_t) bufferLength); 1503 if (!length) { 1504 /* extended character name */ 1505 length = getExtName((uint32_t) code, buffer, (uint16_t) bufferLength); 1506 } 1507 } else { 1508 /* normal character name */ 1509 length=getName(uCharNames, (uint32_t)code, nameChoice, buffer, (uint16_t)bufferLength); 1510 } 1511 } 1512 1513 return u_terminateChars(buffer, bufferLength, length, pErrorCode); 1514 } 1515 1516 U_CAPI int32_t U_EXPORT2 1517 u_getISOComment(UChar32 c, 1518 char *dest, int32_t destCapacity, 1519 UErrorCode *pErrorCode) { 1520 int32_t length; 1521 1522 /* check the argument values */ 1523 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 1524 return 0; 1525 } else if(destCapacity<0 || (destCapacity>0 && dest==NULL)) { 1526 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1527 return 0; 1528 } 1529 1530 if((uint32_t)c>UCHAR_MAX_VALUE || !isDataLoaded(pErrorCode)) { 1531 return u_terminateChars(dest, destCapacity, 0, pErrorCode); 1532 } 1533 1534 /* the ISO comment is stored like a normal character name */ 1535 length=getName(uCharNames, (uint32_t)c, U_ISO_COMMENT, dest, (uint16_t)destCapacity); 1536 return u_terminateChars(dest, destCapacity, length, pErrorCode); 1537 } 1538 1539 U_CAPI UChar32 U_EXPORT2 1540 u_charFromName(UCharNameChoice nameChoice, 1541 const char *name, 1542 UErrorCode *pErrorCode) { 1543 char upper[120], lower[120]; 1544 FindName findName; 1545 AlgorithmicRange *algRange; 1546 uint32_t *p; 1547 uint32_t i; 1548 UChar32 cp = 0; 1549 char c0; 1550 UChar32 error = 0xffff; /* Undefined, but use this for backwards compatibility. */ 1551 1552 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 1553 return error; 1554 } 1555 1556 if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT || name==NULL || *name==0) { 1557 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1558 return error; 1559 } 1560 1561 if(!isDataLoaded(pErrorCode)) { 1562 return error; 1563 } 1564 1565 /* construct the uppercase and lowercase of the name first */ 1566 for(i=0; i<sizeof(upper); ++i) { 1567 if((c0=*name++)!=0) { 1568 upper[i]=uprv_toupper(c0); 1569 lower[i]=uprv_tolower(c0); 1570 } else { 1571 upper[i]=lower[i]=0; 1572 break; 1573 } 1574 } 1575 if(i==sizeof(upper)) { 1576 /* name too long, there is no such character */ 1577 *pErrorCode = U_ILLEGAL_CHAR_FOUND; 1578 return error; 1579 } 1580 1581 /* try extended names first */ 1582 if (lower[0] == '<') { 1583 if (nameChoice == U_EXTENDED_CHAR_NAME) { 1584 if (lower[--i] == '>') { 1585 for (--i; lower[i] && lower[i] != '-'; --i) { 1586 } 1587 1588 if (lower[i] == '-') { /* We've got a category. */ 1589 uint32_t cIdx; 1590 1591 lower[i] = 0; 1592 1593 for (++i; lower[i] != '>'; ++i) { 1594 if (lower[i] >= '0' && lower[i] <= '9') { 1595 cp = (cp << 4) + lower[i] - '0'; 1596 } else if (lower[i] >= 'a' && lower[i] <= 'f') { 1597 cp = (cp << 4) + lower[i] - 'a' + 10; 1598 } else { 1599 *pErrorCode = U_ILLEGAL_CHAR_FOUND; 1600 return error; 1601 } 1602 } 1603 1604 /* Now validate the category name. 1605 We could use a binary search, or a trie, if 1606 we really wanted to. */ 1607 1608 for (lower[i] = 0, cIdx = 0; cIdx < LENGTHOF(charCatNames); ++cIdx) { 1609 1610 if (!uprv_strcmp(lower + 1, charCatNames[cIdx])) { 1611 if (getCharCat(cp) == cIdx) { 1612 return cp; 1613 } 1614 break; 1615 } 1616 } 1617 } 1618 } 1619 } 1620 1621 *pErrorCode = U_ILLEGAL_CHAR_FOUND; 1622 return error; 1623 } 1624 1625 /* try algorithmic names now */ 1626 p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset); 1627 i=*p; 1628 algRange=(AlgorithmicRange *)(p+1); 1629 while(i>0) { 1630 if((cp=findAlgName(algRange, nameChoice, upper))!=0xffff) { 1631 return cp; 1632 } 1633 algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size); 1634 --i; 1635 } 1636 1637 /* normal character name */ 1638 findName.otherName=upper; 1639 findName.code=error; 1640 enumNames(uCharNames, 0, UCHAR_MAX_VALUE + 1, DO_FIND_NAME, &findName, nameChoice); 1641 if (findName.code == error) { 1642 *pErrorCode = U_ILLEGAL_CHAR_FOUND; 1643 } 1644 return findName.code; 1645 } 1646 1647 U_CAPI void U_EXPORT2 1648 u_enumCharNames(UChar32 start, UChar32 limit, 1649 UEnumCharNamesFn *fn, 1650 void *context, 1651 UCharNameChoice nameChoice, 1652 UErrorCode *pErrorCode) { 1653 AlgorithmicRange *algRange; 1654 uint32_t *p; 1655 uint32_t i; 1656 1657 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 1658 return; 1659 } 1660 1661 if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT || fn==NULL) { 1662 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1663 return; 1664 } 1665 1666 if((uint32_t) limit > UCHAR_MAX_VALUE + 1) { 1667 limit = UCHAR_MAX_VALUE + 1; 1668 } 1669 if((uint32_t)start>=(uint32_t)limit) { 1670 return; 1671 } 1672 1673 if(!isDataLoaded(pErrorCode)) { 1674 return; 1675 } 1676 1677 /* interleave the data-driven ones with the algorithmic ones */ 1678 /* iterate over all algorithmic ranges; assume that they are in ascending order */ 1679 p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset); 1680 i=*p; 1681 algRange=(AlgorithmicRange *)(p+1); 1682 while(i>0) { 1683 /* enumerate the character names before the current algorithmic range */ 1684 /* here: start<limit */ 1685 if((uint32_t)start<algRange->start) { 1686 if((uint32_t)limit<=algRange->start) { 1687 enumNames(uCharNames, start, limit, fn, context, nameChoice); 1688 return; 1689 } 1690 if(!enumNames(uCharNames, start, (UChar32)algRange->start, fn, context, nameChoice)) { 1691 return; 1692 } 1693 start=(UChar32)algRange->start; 1694 } 1695 /* enumerate the character names in the current algorithmic range */ 1696 /* here: algRange->start<=start<limit */ 1697 if((uint32_t)start<=algRange->end) { 1698 if((uint32_t)limit<=(algRange->end+1)) { 1699 enumAlgNames(algRange, start, limit, fn, context, nameChoice); 1700 return; 1701 } 1702 if(!enumAlgNames(algRange, start, (UChar32)algRange->end+1, fn, context, nameChoice)) { 1703 return; 1704 } 1705 start=(UChar32)algRange->end+1; 1706 } 1707 /* continue to the next algorithmic range (here: start<limit) */ 1708 algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size); 1709 --i; 1710 } 1711 /* enumerate the character names after the last algorithmic range */ 1712 enumNames(uCharNames, start, limit, fn, context, nameChoice); 1713 } 1714 1715 U_CAPI int32_t U_EXPORT2 1716 uprv_getMaxCharNameLength() { 1717 UErrorCode errorCode=U_ZERO_ERROR; 1718 if(calcNameSetsLengths(&errorCode)) { 1719 return gMaxNameLength; 1720 } else { 1721 return 0; 1722 } 1723 } 1724 1725 /** 1726 * Converts the char set cset into a Unicode set uset. 1727 * @param cset Set of 256 bit flags corresponding to a set of chars. 1728 * @param uset USet to receive characters. Existing contents are deleted. 1729 */ 1730 static void 1731 charSetToUSet(uint32_t cset[8], const USetAdder *sa) { 1732 UChar us[256]; 1733 char cs[256]; 1734 1735 int32_t i, length; 1736 UErrorCode errorCode; 1737 1738 errorCode=U_ZERO_ERROR; 1739 1740 if(!calcNameSetsLengths(&errorCode)) { 1741 return; 1742 } 1743 1744 /* build a char string with all chars that are used in character names */ 1745 length=0; 1746 for(i=0; i<256; ++i) { 1747 if(SET_CONTAINS(cset, i)) { 1748 cs[length++]=(char)i; 1749 } 1750 } 1751 1752 /* convert the char string to a UChar string */ 1753 u_charsToUChars(cs, us, length); 1754 1755 /* add each UChar to the USet */ 1756 for(i=0; i<length; ++i) { 1757 if(us[i]!=0 || cs[i]==0) { /* non-invariant chars become (UChar)0 */ 1758 sa->add(sa->set, us[i]); 1759 } 1760 } 1761 } 1762 1763 /** 1764 * Fills set with characters that are used in Unicode character names. 1765 * @param set USet to receive characters. 1766 */ 1767 U_CAPI void U_EXPORT2 1768 uprv_getCharNameCharacters(const USetAdder *sa) { 1769 charSetToUSet(gNameSet, sa); 1770 } 1771 1772 /* data swapping ------------------------------------------------------------ */ 1773 1774 /* 1775 * The token table contains non-negative entries for token bytes, 1776 * and -1 for bytes that represent themselves in the data file's charset. 1777 * -2 entries are used for lead bytes. 1778 * 1779 * Direct bytes (-1 entries) must be translated from the input charset family 1780 * to the output charset family. 1781 * makeTokenMap() writes a permutation mapping for this. 1782 * Use it once for single-/lead-byte tokens and once more for all trail byte 1783 * tokens. (';' is an unused trail byte marked with -1.) 1784 */ 1785 static void 1786 makeTokenMap(const UDataSwapper *ds, 1787 int16_t tokens[], uint16_t tokenCount, 1788 uint8_t map[256], 1789 UErrorCode *pErrorCode) { 1790 UBool usedOutChar[256]; 1791 uint16_t i, j; 1792 uint8_t c1, c2; 1793 1794 if(U_FAILURE(*pErrorCode)) { 1795 return; 1796 } 1797 1798 if(ds->inCharset==ds->outCharset) { 1799 /* Same charset family: identity permutation */ 1800 for(i=0; i<256; ++i) { 1801 map[i]=(uint8_t)i; 1802 } 1803 } else { 1804 uprv_memset(map, 0, 256); 1805 uprv_memset(usedOutChar, 0, 256); 1806 1807 if(tokenCount>256) { 1808 tokenCount=256; 1809 } 1810 1811 /* set the direct bytes (byte 0 always maps to itself) */ 1812 for(i=1; i<tokenCount; ++i) { 1813 if(tokens[i]==-1) { 1814 /* convert the direct byte character */ 1815 c1=(uint8_t)i; 1816 ds->swapInvChars(ds, &c1, 1, &c2, pErrorCode); 1817 if(U_FAILURE(*pErrorCode)) { 1818 udata_printError(ds, "unames/makeTokenMap() finds variant character 0x%02x used (input charset family %d)\n", 1819 i, ds->inCharset); 1820 return; 1821 } 1822 1823 /* enter the converted character into the map and mark it used */ 1824 map[c1]=c2; 1825 usedOutChar[c2]=TRUE; 1826 } 1827 } 1828 1829 /* set the mappings for the rest of the permutation */ 1830 for(i=j=1; i<tokenCount; ++i) { 1831 /* set mappings that were not set for direct bytes */ 1832 if(map[i]==0) { 1833 /* set an output byte value that was not used as an output byte above */ 1834 while(usedOutChar[j]) { 1835 ++j; 1836 } 1837 map[i]=(uint8_t)j++; 1838 } 1839 } 1840 1841 /* 1842 * leave mappings at tokenCount and above unset if tokenCount<256 1843 * because they won't be used 1844 */ 1845 } 1846 } 1847 1848 U_CAPI int32_t U_EXPORT2 1849 uchar_swapNames(const UDataSwapper *ds, 1850 const void *inData, int32_t length, void *outData, 1851 UErrorCode *pErrorCode) { 1852 const UDataInfo *pInfo; 1853 int32_t headerSize; 1854 1855 const uint8_t *inBytes; 1856 uint8_t *outBytes; 1857 1858 uint32_t tokenStringOffset, groupsOffset, groupStringOffset, algNamesOffset, 1859 offset, i, count, stringsCount; 1860 1861 const AlgorithmicRange *inRange; 1862 AlgorithmicRange *outRange; 1863 1864 /* udata_swapDataHeader checks the arguments */ 1865 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); 1866 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 1867 return 0; 1868 } 1869 1870 /* check data format and format version */ 1871 pInfo=(const UDataInfo *)((const char *)inData+4); 1872 if(!( 1873 pInfo->dataFormat[0]==0x75 && /* dataFormat="unam" */ 1874 pInfo->dataFormat[1]==0x6e && 1875 pInfo->dataFormat[2]==0x61 && 1876 pInfo->dataFormat[3]==0x6d && 1877 pInfo->formatVersion[0]==1 1878 )) { 1879 udata_printError(ds, "uchar_swapNames(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as unames.icu\n", 1880 pInfo->dataFormat[0], pInfo->dataFormat[1], 1881 pInfo->dataFormat[2], pInfo->dataFormat[3], 1882 pInfo->formatVersion[0]); 1883 *pErrorCode=U_UNSUPPORTED_ERROR; 1884 return 0; 1885 } 1886 1887 inBytes=(const uint8_t *)inData+headerSize; 1888 outBytes=(uint8_t *)outData+headerSize; 1889 if(length<0) { 1890 algNamesOffset=ds->readUInt32(((const uint32_t *)inBytes)[3]); 1891 } else { 1892 length-=headerSize; 1893 if( length<20 || 1894 (uint32_t)length<(algNamesOffset=ds->readUInt32(((const uint32_t *)inBytes)[3])) 1895 ) { 1896 udata_printError(ds, "uchar_swapNames(): too few bytes (%d after header) for unames.icu\n", 1897 length); 1898 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 1899 return 0; 1900 } 1901 } 1902 1903 if(length<0) { 1904 /* preflighting: iterate through algorithmic ranges */ 1905 offset=algNamesOffset; 1906 count=ds->readUInt32(*((const uint32_t *)(inBytes+offset))); 1907 offset+=4; 1908 1909 for(i=0; i<count; ++i) { 1910 inRange=(const AlgorithmicRange *)(inBytes+offset); 1911 offset+=ds->readUInt16(inRange->size); 1912 } 1913 } else { 1914 /* swap data */ 1915 const uint16_t *p; 1916 uint16_t *q, *temp; 1917 1918 int16_t tokens[512]; 1919 uint16_t tokenCount; 1920 1921 uint8_t map[256], trailMap[256]; 1922 1923 /* copy the data for inaccessible bytes */ 1924 if(inBytes!=outBytes) { 1925 uprv_memcpy(outBytes, inBytes, length); 1926 } 1927 1928 /* the initial 4 offsets first */ 1929 tokenStringOffset=ds->readUInt32(((const uint32_t *)inBytes)[0]); 1930 groupsOffset=ds->readUInt32(((const uint32_t *)inBytes)[1]); 1931 groupStringOffset=ds->readUInt32(((const uint32_t *)inBytes)[2]); 1932 ds->swapArray32(ds, inBytes, 16, outBytes, pErrorCode); 1933 1934 /* 1935 * now the tokens table 1936 * it needs to be permutated along with the compressed name strings 1937 */ 1938 p=(const uint16_t *)(inBytes+16); 1939 q=(uint16_t *)(outBytes+16); 1940 1941 /* read and swap the tokenCount */ 1942 tokenCount=ds->readUInt16(*p); 1943 ds->swapArray16(ds, p, 2, q, pErrorCode); 1944 ++p; 1945 ++q; 1946 1947 /* read the first 512 tokens and make the token maps */ 1948 if(tokenCount<=512) { 1949 count=tokenCount; 1950 } else { 1951 count=512; 1952 } 1953 for(i=0; i<count; ++i) { 1954 tokens[i]=udata_readInt16(ds, p[i]); 1955 } 1956 for(; i<512; ++i) { 1957 tokens[i]=0; /* fill the rest of the tokens array if tokenCount<512 */ 1958 } 1959 makeTokenMap(ds, tokens, tokenCount, map, pErrorCode); 1960 makeTokenMap(ds, tokens+256, (uint16_t)(tokenCount>256 ? tokenCount-256 : 0), trailMap, pErrorCode); 1961 if(U_FAILURE(*pErrorCode)) { 1962 return 0; 1963 } 1964 1965 /* 1966 * swap and permutate the tokens 1967 * go through a temporary array to support in-place swapping 1968 */ 1969 temp=(uint16_t *)uprv_malloc(tokenCount*2); 1970 if(temp==NULL) { 1971 udata_printError(ds, "out of memory swapping %u unames.icu tokens\n", 1972 tokenCount); 1973 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 1974 return 0; 1975 } 1976 1977 /* swap and permutate single-/lead-byte tokens */ 1978 for(i=0; i<tokenCount && i<256; ++i) { 1979 ds->swapArray16(ds, p+i, 2, temp+map[i], pErrorCode); 1980 } 1981 1982 /* swap and permutate trail-byte tokens */ 1983 for(; i<tokenCount; ++i) { 1984 ds->swapArray16(ds, p+i, 2, temp+(i&0xffffff00)+trailMap[i&0xff], pErrorCode); 1985 } 1986 1987 /* copy the result into the output and free the temporary array */ 1988 uprv_memcpy(q, temp, tokenCount*2); 1989 uprv_free(temp); 1990 1991 /* 1992 * swap the token strings but not a possible padding byte after 1993 * the terminating NUL of the last string 1994 */ 1995 udata_swapInvStringBlock(ds, inBytes+tokenStringOffset, (int32_t)(groupsOffset-tokenStringOffset), 1996 outBytes+tokenStringOffset, pErrorCode); 1997 if(U_FAILURE(*pErrorCode)) { 1998 udata_printError(ds, "uchar_swapNames(token strings) failed\n"); 1999 return 0; 2000 } 2001 2002 /* swap the group table */ 2003 count=ds->readUInt16(*((const uint16_t *)(inBytes+groupsOffset))); 2004 ds->swapArray16(ds, inBytes+groupsOffset, (int32_t)((1+count*3)*2), 2005 outBytes+groupsOffset, pErrorCode); 2006 2007 /* 2008 * swap the group strings 2009 * swap the string bytes but not the nibble-encoded string lengths 2010 */ 2011 if(ds->inCharset!=ds->outCharset) { 2012 uint16_t offsets[LINES_PER_GROUP+1], lengths[LINES_PER_GROUP+1]; 2013 2014 const uint8_t *inStrings, *nextInStrings; 2015 uint8_t *outStrings; 2016 2017 uint8_t c; 2018 2019 inStrings=inBytes+groupStringOffset; 2020 outStrings=outBytes+groupStringOffset; 2021 2022 stringsCount=algNamesOffset-groupStringOffset; 2023 2024 /* iterate through string groups until only a few padding bytes are left */ 2025 while(stringsCount>32) { 2026 nextInStrings=expandGroupLengths(inStrings, offsets, lengths); 2027 2028 /* move past the length bytes */ 2029 stringsCount-=(uint32_t)(nextInStrings-inStrings); 2030 outStrings+=nextInStrings-inStrings; 2031 inStrings=nextInStrings; 2032 2033 count=offsets[31]+lengths[31]; /* total number of string bytes in this group */ 2034 stringsCount-=count; 2035 2036 /* swap the string bytes using map[] and trailMap[] */ 2037 while(count>0) { 2038 c=*inStrings++; 2039 *outStrings++=map[c]; 2040 if(tokens[c]!=-2) { 2041 --count; 2042 } else { 2043 /* token lead byte: swap the trail byte, too */ 2044 *outStrings++=trailMap[*inStrings++]; 2045 count-=2; 2046 } 2047 } 2048 } 2049 } 2050 2051 /* swap the algorithmic ranges */ 2052 offset=algNamesOffset; 2053 count=ds->readUInt32(*((const uint32_t *)(inBytes+offset))); 2054 ds->swapArray32(ds, inBytes+offset, 4, outBytes+offset, pErrorCode); 2055 offset+=4; 2056 2057 for(i=0; i<count; ++i) { 2058 if(offset>(uint32_t)length) { 2059 udata_printError(ds, "uchar_swapNames(): too few bytes (%d after header) for unames.icu algorithmic range %u\n", 2060 length, i); 2061 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 2062 return 0; 2063 } 2064 2065 inRange=(const AlgorithmicRange *)(inBytes+offset); 2066 outRange=(AlgorithmicRange *)(outBytes+offset); 2067 offset+=ds->readUInt16(inRange->size); 2068 2069 ds->swapArray32(ds, inRange, 8, outRange, pErrorCode); 2070 ds->swapArray16(ds, &inRange->size, 2, &outRange->size, pErrorCode); 2071 switch(inRange->type) { 2072 case 0: 2073 /* swap prefix string */ 2074 ds->swapInvChars(ds, inRange+1, (int32_t)uprv_strlen((const char *)(inRange+1)), 2075 outRange+1, pErrorCode); 2076 if(U_FAILURE(*pErrorCode)) { 2077 udata_printError(ds, "uchar_swapNames(prefix string of algorithmic range %u) failed\n", 2078 i); 2079 return 0; 2080 } 2081 break; 2082 case 1: 2083 { 2084 /* swap factors and the prefix and factor strings */ 2085 uint32_t factorsCount; 2086 2087 factorsCount=inRange->variant; 2088 p=(const uint16_t *)(inRange+1); 2089 q=(uint16_t *)(outRange+1); 2090 ds->swapArray16(ds, p, (int32_t)(factorsCount*2), q, pErrorCode); 2091 2092 /* swap the strings, up to the last terminating NUL */ 2093 p+=factorsCount; 2094 q+=factorsCount; 2095 stringsCount=(uint32_t)((inBytes+offset)-(const uint8_t *)p); 2096 while(stringsCount>0 && ((const uint8_t *)p)[stringsCount-1]!=0) { 2097 --stringsCount; 2098 } 2099 ds->swapInvChars(ds, p, (int32_t)stringsCount, q, pErrorCode); 2100 } 2101 break; 2102 default: 2103 udata_printError(ds, "uchar_swapNames(): unknown type %u of algorithmic range %u\n", 2104 inRange->type, i); 2105 *pErrorCode=U_UNSUPPORTED_ERROR; 2106 return 0; 2107 } 2108 } 2109 } 2110 2111 return headerSize+(int32_t)offset; 2112 } 2113 2114 /* 2115 * Hey, Emacs, please set the following: 2116 * 2117 * Local Variables: 2118 * indent-tabs-mode: nil 2119 * End: 2120 * 2121 */ 2122