1 /* 2 ****************************************************************************** 3 * 4 * Copyright (C) 1999-2009, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ****************************************************************************** 8 * file name: unames.c 9 * encoding: US-ASCII 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 1999oct04 14 * created by: Markus W. Scherer 15 */ 16 17 #include "unicode/utypes.h" 18 #include "unicode/putil.h" 19 #include "unicode/uchar.h" 20 #include "unicode/udata.h" 21 #include "ustr_imp.h" 22 #include "umutex.h" 23 #include "cmemory.h" 24 #include "cstring.h" 25 #include "ucln_cmn.h" 26 #include "udataswp.h" 27 #include "uprops.h" 28 29 /* prototypes ------------------------------------------------------------- */ 30 31 #define LENGTHOF(array) (sizeof(array)/sizeof((array)[0])) 32 33 static const char DATA_NAME[] = "unames"; 34 static const char DATA_TYPE[] = "icu"; 35 36 #define GROUP_SHIFT 5 37 #define LINES_PER_GROUP (1UL<<GROUP_SHIFT) 38 #define GROUP_MASK (LINES_PER_GROUP-1) 39 40 /* 41 * This struct was replaced by explicitly accessing equivalent 42 * fields from triples of uint16_t. 43 * The Group struct was padded to 8 bytes on compilers for early ARM CPUs, 44 * which broke the assumption that sizeof(Group)==6 and that the ++ operator 45 * would advance by 6 bytes (3 uint16_t). 46 * 47 * We can't just change the data structure because it's loaded from a data file, 48 * and we don't want to make it less compact, so we changed the access code. 49 * 50 * For details see ICU tickets 6331 and 6008. 51 typedef struct { 52 uint16_t groupMSB, 53 offsetHigh, offsetLow; / * avoid padding * / 54 } Group; 55 */ 56 enum { 57 GROUP_MSB, 58 GROUP_OFFSET_HIGH, 59 GROUP_OFFSET_LOW, 60 GROUP_LENGTH 61 }; 62 63 /* 64 * Get the 32-bit group offset. 65 * @param group (const uint16_t *) pointer to a Group triple of uint16_t 66 * @return group offset (int32_t) 67 */ 68 #define GET_GROUP_OFFSET(group) ((int32_t)(group)[GROUP_OFFSET_HIGH]<<16|(group)[GROUP_OFFSET_LOW]) 69 70 #define NEXT_GROUP(group) ((group)+GROUP_LENGTH) 71 #define PREV_GROUP(group) ((group)-GROUP_LENGTH) 72 73 typedef struct { 74 uint32_t start, end; 75 uint8_t type, variant; 76 uint16_t size; 77 } AlgorithmicRange; 78 79 typedef struct { 80 uint32_t tokenStringOffset, groupsOffset, groupStringOffset, algNamesOffset; 81 } UCharNames; 82 83 /* 84 * Get the groups table from a UCharNames struct. 85 * The groups table consists of one uint16_t groupCount followed by 86 * groupCount groups. Each group is a triple of uint16_t, see GROUP_LENGTH 87 * and the comment for the old struct Group above. 88 * 89 * @param names (const UCharNames *) pointer to the UCharNames indexes 90 * @return (const uint16_t *) pointer to the groups table 91 */ 92 #define GET_GROUPS(names) (const uint16_t *)((const char *)names+names->groupsOffset) 93 94 typedef struct { 95 const char *otherName; 96 UChar32 code; 97 } FindName; 98 99 #define DO_FIND_NAME NULL 100 101 static UDataMemory *uCharNamesData=NULL; 102 static UCharNames *uCharNames=NULL; 103 static UErrorCode gLoadErrorCode=U_ZERO_ERROR; 104 105 /* 106 * Maximum length of character names (regular & 1.0). 107 */ 108 static int32_t gMaxNameLength=0; 109 110 /* 111 * Set of chars used in character names (regular & 1.0). 112 * Chars are platform-dependent (can be EBCDIC). 113 */ 114 static uint32_t gNameSet[8]={ 0 }; 115 116 #define U_NONCHARACTER_CODE_POINT U_CHAR_CATEGORY_COUNT 117 #define U_LEAD_SURROGATE U_CHAR_CATEGORY_COUNT + 1 118 #define U_TRAIL_SURROGATE U_CHAR_CATEGORY_COUNT + 2 119 120 #define U_CHAR_EXTENDED_CATEGORY_COUNT (U_CHAR_CATEGORY_COUNT + 3) 121 122 static const char * const charCatNames[U_CHAR_EXTENDED_CATEGORY_COUNT] = { 123 "unassigned", 124 "uppercase letter", 125 "lowercase letter", 126 "titlecase letter", 127 "modifier letter", 128 "other letter", 129 "non spacing mark", 130 "enclosing mark", 131 "combining spacing mark", 132 "decimal digit number", 133 "letter number", 134 "other number", 135 "space separator", 136 "line separator", 137 "paragraph separator", 138 "control", 139 "format", 140 "private use area", 141 "surrogate", 142 "dash punctuation", 143 "start punctuation", 144 "end punctuation", 145 "connector punctuation", 146 "other punctuation", 147 "math symbol", 148 "currency symbol", 149 "modifier symbol", 150 "other symbol", 151 "initial punctuation", 152 "final punctuation", 153 "noncharacter", 154 "lead surrogate", 155 "trail surrogate" 156 }; 157 158 /* implementation ----------------------------------------------------------- */ 159 160 static UBool U_CALLCONV unames_cleanup(void) 161 { 162 if(uCharNamesData) { 163 udata_close(uCharNamesData); 164 uCharNamesData = NULL; 165 } 166 if(uCharNames) { 167 uCharNames = NULL; 168 } 169 gMaxNameLength=0; 170 return TRUE; 171 } 172 173 static UBool U_CALLCONV 174 isAcceptable(void *context, 175 const char *type, const char *name, 176 const UDataInfo *pInfo) { 177 return (UBool)( 178 pInfo->size>=20 && 179 pInfo->isBigEndian==U_IS_BIG_ENDIAN && 180 pInfo->charsetFamily==U_CHARSET_FAMILY && 181 pInfo->dataFormat[0]==0x75 && /* dataFormat="unam" */ 182 pInfo->dataFormat[1]==0x6e && 183 pInfo->dataFormat[2]==0x61 && 184 pInfo->dataFormat[3]==0x6d && 185 pInfo->formatVersion[0]==1); 186 } 187 188 static UBool 189 isDataLoaded(UErrorCode *pErrorCode) { 190 /* load UCharNames from file if necessary */ 191 UBool isCached; 192 193 /* do this because double-checked locking is broken */ 194 UMTX_CHECK(NULL, (uCharNames!=NULL), isCached); 195 196 if(!isCached) { 197 UCharNames *names; 198 UDataMemory *data; 199 200 /* check error code from previous attempt */ 201 if(U_FAILURE(gLoadErrorCode)) { 202 *pErrorCode=gLoadErrorCode; 203 return FALSE; 204 } 205 206 /* open the data outside the mutex block */ 207 data=udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, pErrorCode); 208 if(U_FAILURE(*pErrorCode)) { 209 gLoadErrorCode=*pErrorCode; 210 return FALSE; 211 } 212 213 names=(UCharNames *)udata_getMemory(data); 214 215 /* in the mutex block, set the data for this process */ 216 { 217 umtx_lock(NULL); 218 if(uCharNames==NULL) { 219 uCharNamesData=data; 220 uCharNames=names; 221 data=NULL; 222 names=NULL; 223 ucln_common_registerCleanup(UCLN_COMMON_UNAMES, unames_cleanup); 224 } 225 umtx_unlock(NULL); 226 } 227 228 /* if a different thread set it first, then close the extra data */ 229 if(data!=NULL) { 230 udata_close(data); /* NULL if it was set correctly */ 231 } 232 } 233 return TRUE; 234 } 235 236 #define WRITE_CHAR(buffer, bufferLength, bufferPos, c) { \ 237 if((bufferLength)>0) { \ 238 *(buffer)++=c; \ 239 --(bufferLength); \ 240 } \ 241 ++(bufferPos); \ 242 } 243 244 #define U_ISO_COMMENT U_CHAR_NAME_CHOICE_COUNT 245 246 /* 247 * Important: expandName() and compareName() are almost the same - 248 * apply fixes to both. 249 * 250 * UnicodeData.txt uses ';' as a field separator, so no 251 * field can contain ';' as part of its contents. 252 * In unames.dat, it is marked as token[';']==-1 only if the 253 * semicolon is used in the data file - which is iff we 254 * have Unicode 1.0 names or ISO comments or aliases. 255 * So, it will be token[';']==-1 if we store U1.0 names/ISO comments/aliases 256 * although we know that it will never be part of a name. 257 */ 258 static uint16_t 259 expandName(UCharNames *names, 260 const uint8_t *name, uint16_t nameLength, UCharNameChoice nameChoice, 261 char *buffer, uint16_t bufferLength) { 262 uint16_t *tokens=(uint16_t *)names+8; 263 uint16_t token, tokenCount=*tokens++, bufferPos=0; 264 uint8_t *tokenStrings=(uint8_t *)names+names->tokenStringOffset; 265 uint8_t c; 266 267 if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) { 268 /* 269 * skip the modern name if it is not requested _and_ 270 * if the semicolon byte value is a character, not a token number 271 */ 272 if((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) { 273 int fieldIndex= nameChoice==U_ISO_COMMENT ? 2 : nameChoice; 274 do { 275 while(nameLength>0) { 276 --nameLength; 277 if(*name++==';') { 278 break; 279 } 280 } 281 } while(--fieldIndex>0); 282 } else { 283 /* 284 * the semicolon byte value is a token number, therefore 285 * only modern names are stored in unames.dat and there is no 286 * such requested alternate name here 287 */ 288 nameLength=0; 289 } 290 } 291 292 /* write each letter directly, and write a token word per token */ 293 while(nameLength>0) { 294 --nameLength; 295 c=*name++; 296 297 if(c>=tokenCount) { 298 if(c!=';') { 299 /* implicit letter */ 300 WRITE_CHAR(buffer, bufferLength, bufferPos, c); 301 } else { 302 /* finished */ 303 break; 304 } 305 } else { 306 token=tokens[c]; 307 if(token==(uint16_t)(-2)) { 308 /* this is a lead byte for a double-byte token */ 309 token=tokens[c<<8|*name++]; 310 --nameLength; 311 } 312 if(token==(uint16_t)(-1)) { 313 if(c!=';') { 314 /* explicit letter */ 315 WRITE_CHAR(buffer, bufferLength, bufferPos, c); 316 } else { 317 /* stop, but skip the semicolon if we are seeking 318 extended names and there was no 2.0 name but there 319 is a 1.0 name. */ 320 if(!bufferPos && nameChoice == U_EXTENDED_CHAR_NAME) { 321 if ((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) { 322 continue; 323 } 324 } 325 /* finished */ 326 break; 327 } 328 } else { 329 /* write token word */ 330 uint8_t *tokenString=tokenStrings+token; 331 while((c=*tokenString++)!=0) { 332 WRITE_CHAR(buffer, bufferLength, bufferPos, c); 333 } 334 } 335 } 336 } 337 338 /* zero-terminate */ 339 if(bufferLength>0) { 340 *buffer=0; 341 } 342 343 return bufferPos; 344 } 345 346 /* 347 * compareName() is almost the same as expandName() except that it compares 348 * the currently expanded name to an input name. 349 * It returns the match/no match result as soon as possible. 350 */ 351 static UBool 352 compareName(UCharNames *names, 353 const uint8_t *name, uint16_t nameLength, UCharNameChoice nameChoice, 354 const char *otherName) { 355 uint16_t *tokens=(uint16_t *)names+8; 356 uint16_t token, tokenCount=*tokens++; 357 uint8_t *tokenStrings=(uint8_t *)names+names->tokenStringOffset; 358 uint8_t c; 359 const char *origOtherName = otherName; 360 361 if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) { 362 /* 363 * skip the modern name if it is not requested _and_ 364 * if the semicolon byte value is a character, not a token number 365 */ 366 if((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) { 367 int fieldIndex= nameChoice==U_ISO_COMMENT ? 2 : nameChoice; 368 do { 369 while(nameLength>0) { 370 --nameLength; 371 if(*name++==';') { 372 break; 373 } 374 } 375 } while(--fieldIndex>0); 376 } else { 377 /* 378 * the semicolon byte value is a token number, therefore 379 * only modern names are stored in unames.dat and there is no 380 * such requested alternate name here 381 */ 382 nameLength=0; 383 } 384 } 385 386 /* compare each letter directly, and compare a token word per token */ 387 while(nameLength>0) { 388 --nameLength; 389 c=*name++; 390 391 if(c>=tokenCount) { 392 if(c!=';') { 393 /* implicit letter */ 394 if((char)c!=*otherName++) { 395 return FALSE; 396 } 397 } else { 398 /* finished */ 399 break; 400 } 401 } else { 402 token=tokens[c]; 403 if(token==(uint16_t)(-2)) { 404 /* this is a lead byte for a double-byte token */ 405 token=tokens[c<<8|*name++]; 406 --nameLength; 407 } 408 if(token==(uint16_t)(-1)) { 409 if(c!=';') { 410 /* explicit letter */ 411 if((char)c!=*otherName++) { 412 return FALSE; 413 } 414 } else { 415 /* stop, but skip the semicolon if we are seeking 416 extended names and there was no 2.0 name but there 417 is a 1.0 name. */ 418 if(otherName == origOtherName && nameChoice == U_EXTENDED_CHAR_NAME) { 419 if ((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) { 420 continue; 421 } 422 } 423 /* finished */ 424 break; 425 } 426 } else { 427 /* write token word */ 428 uint8_t *tokenString=tokenStrings+token; 429 while((c=*tokenString++)!=0) { 430 if((char)c!=*otherName++) { 431 return FALSE; 432 } 433 } 434 } 435 } 436 } 437 438 /* complete match? */ 439 return (UBool)(*otherName==0); 440 } 441 442 static uint8_t getCharCat(UChar32 cp) { 443 uint8_t cat; 444 445 if (UTF_IS_UNICODE_NONCHAR(cp)) { 446 return U_NONCHARACTER_CODE_POINT; 447 } 448 449 if ((cat = u_charType(cp)) == U_SURROGATE) { 450 cat = UTF_IS_LEAD(cp) ? U_LEAD_SURROGATE : U_TRAIL_SURROGATE; 451 } 452 453 return cat; 454 } 455 456 static const char *getCharCatName(UChar32 cp) { 457 uint8_t cat = getCharCat(cp); 458 459 /* Return unknown if the table of names above is not up to 460 date. */ 461 462 if (cat >= LENGTHOF(charCatNames)) { 463 return "unknown"; 464 } else { 465 return charCatNames[cat]; 466 } 467 } 468 469 static uint16_t getExtName(uint32_t code, char *buffer, uint16_t bufferLength) { 470 const char *catname = getCharCatName(code); 471 uint16_t length = 0; 472 473 UChar32 cp; 474 int ndigits, i; 475 476 WRITE_CHAR(buffer, bufferLength, length, '<'); 477 while (catname[length - 1]) { 478 WRITE_CHAR(buffer, bufferLength, length, catname[length - 1]); 479 } 480 WRITE_CHAR(buffer, bufferLength, length, '-'); 481 for (cp = code, ndigits = 0; cp; ++ndigits, cp >>= 4) 482 ; 483 if (ndigits < 4) 484 ndigits = 4; 485 for (cp = code, i = ndigits; (cp || i > 0) && bufferLength; cp >>= 4, bufferLength--) { 486 uint8_t v = (uint8_t)(cp & 0xf); 487 buffer[--i] = (v < 10 ? '0' + v : 'A' + v - 10); 488 } 489 buffer += ndigits; 490 length += ndigits; 491 WRITE_CHAR(buffer, bufferLength, length, '>'); 492 493 return length; 494 } 495 496 /* 497 * getGroup() does a binary search for the group that contains the 498 * Unicode code point "code". 499 * The return value is always a valid Group* that may contain "code" 500 * or else is the highest group before "code". 501 * If the lowest group is after "code", then that one is returned. 502 */ 503 static const uint16_t * 504 getGroup(UCharNames *names, uint32_t code) { 505 const uint16_t *groups=GET_GROUPS(names); 506 uint16_t groupMSB=(uint16_t)(code>>GROUP_SHIFT), 507 start=0, 508 limit=*groups++, 509 number; 510 511 /* binary search for the group of names that contains the one for code */ 512 while(start<limit-1) { 513 number=(uint16_t)((start+limit)/2); 514 if(groupMSB<groups[number*GROUP_LENGTH+GROUP_MSB]) { 515 limit=number; 516 } else { 517 start=number; 518 } 519 } 520 521 /* return this regardless of whether it is an exact match */ 522 return groups+start*GROUP_LENGTH; 523 } 524 525 /* 526 * expandGroupLengths() reads a block of compressed lengths of 32 strings and 527 * expands them into offsets and lengths for each string. 528 * Lengths are stored with a variable-width encoding in consecutive nibbles: 529 * If a nibble<0xc, then it is the length itself (0=empty string). 530 * If a nibble>=0xc, then it forms a length value with the following nibble. 531 * Calculation see below. 532 * The offsets and lengths arrays must be at least 33 (one more) long because 533 * there is no check here at the end if the last nibble is still used. 534 */ 535 static const uint8_t * 536 expandGroupLengths(const uint8_t *s, 537 uint16_t offsets[LINES_PER_GROUP+1], uint16_t lengths[LINES_PER_GROUP+1]) { 538 /* read the lengths of the 32 strings in this group and get each string's offset */ 539 uint16_t i=0, offset=0, length=0; 540 uint8_t lengthByte; 541 542 /* all 32 lengths must be read to get the offset of the first group string */ 543 while(i<LINES_PER_GROUP) { 544 lengthByte=*s++; 545 546 /* read even nibble - MSBs of lengthByte */ 547 if(length>=12) { 548 /* double-nibble length spread across two bytes */ 549 length=(uint16_t)(((length&0x3)<<4|lengthByte>>4)+12); 550 lengthByte&=0xf; 551 } else if((lengthByte /* &0xf0 */)>=0xc0) { 552 /* double-nibble length spread across this one byte */ 553 length=(uint16_t)((lengthByte&0x3f)+12); 554 } else { 555 /* single-nibble length in MSBs */ 556 length=(uint16_t)(lengthByte>>4); 557 lengthByte&=0xf; 558 } 559 560 *offsets++=offset; 561 *lengths++=length; 562 563 offset+=length; 564 ++i; 565 566 /* read odd nibble - LSBs of lengthByte */ 567 if((lengthByte&0xf0)==0) { 568 /* this nibble was not consumed for a double-nibble length above */ 569 length=lengthByte; 570 if(length<12) { 571 /* single-nibble length in LSBs */ 572 *offsets++=offset; 573 *lengths++=length; 574 575 offset+=length; 576 ++i; 577 } 578 } else { 579 length=0; /* prevent double-nibble detection in the next iteration */ 580 } 581 } 582 583 /* now, s is at the first group string */ 584 return s; 585 } 586 587 static uint16_t 588 expandGroupName(UCharNames *names, const uint16_t *group, 589 uint16_t lineNumber, UCharNameChoice nameChoice, 590 char *buffer, uint16_t bufferLength) { 591 uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2]; 592 const uint8_t *s=(uint8_t *)names+names->groupStringOffset+GET_GROUP_OFFSET(group); 593 s=expandGroupLengths(s, offsets, lengths); 594 return expandName(names, s+offsets[lineNumber], lengths[lineNumber], nameChoice, 595 buffer, bufferLength); 596 } 597 598 static uint16_t 599 getName(UCharNames *names, uint32_t code, UCharNameChoice nameChoice, 600 char *buffer, uint16_t bufferLength) { 601 const uint16_t *group=getGroup(names, code); 602 if((uint16_t)(code>>GROUP_SHIFT)==group[GROUP_MSB]) { 603 return expandGroupName(names, group, (uint16_t)(code&GROUP_MASK), nameChoice, 604 buffer, bufferLength); 605 } else { 606 /* group not found */ 607 /* zero-terminate */ 608 if(bufferLength>0) { 609 *buffer=0; 610 } 611 return 0; 612 } 613 } 614 615 /* 616 * enumGroupNames() enumerates all the names in a 32-group 617 * and either calls the enumerator function or finds a given input name. 618 */ 619 static UBool 620 enumGroupNames(UCharNames *names, const uint16_t *group, 621 UChar32 start, UChar32 end, 622 UEnumCharNamesFn *fn, void *context, 623 UCharNameChoice nameChoice) { 624 uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2]; 625 const uint8_t *s=(uint8_t *)names+names->groupStringOffset+GET_GROUP_OFFSET(group); 626 627 s=expandGroupLengths(s, offsets, lengths); 628 if(fn!=DO_FIND_NAME) { 629 char buffer[200]; 630 uint16_t length; 631 632 while(start<=end) { 633 length=expandName(names, s+offsets[start&GROUP_MASK], lengths[start&GROUP_MASK], nameChoice, buffer, sizeof(buffer)); 634 if (!length && nameChoice == U_EXTENDED_CHAR_NAME) { 635 buffer[length = getExtName(start, buffer, sizeof(buffer))] = 0; 636 } 637 /* here, we assume that the buffer is large enough */ 638 if(length>0) { 639 if(!fn(context, start, nameChoice, buffer, length)) { 640 return FALSE; 641 } 642 } 643 ++start; 644 } 645 } else { 646 const char *otherName=((FindName *)context)->otherName; 647 while(start<=end) { 648 if(compareName(names, s+offsets[start&GROUP_MASK], lengths[start&GROUP_MASK], nameChoice, otherName)) { 649 ((FindName *)context)->code=start; 650 return FALSE; 651 } 652 ++start; 653 } 654 } 655 return TRUE; 656 } 657 658 /* 659 * enumExtNames enumerate extended names. 660 * It only needs to do it if it is called with a real function and not 661 * with the dummy DO_FIND_NAME, because u_charFromName() does a check 662 * for extended names by itself. 663 */ 664 static UBool 665 enumExtNames(UChar32 start, UChar32 end, 666 UEnumCharNamesFn *fn, void *context) 667 { 668 if(fn!=DO_FIND_NAME) { 669 char buffer[200]; 670 uint16_t length; 671 672 while(start<=end) { 673 buffer[length = getExtName(start, buffer, sizeof(buffer))] = 0; 674 /* here, we assume that the buffer is large enough */ 675 if(length>0) { 676 if(!fn(context, start, U_EXTENDED_CHAR_NAME, buffer, length)) { 677 return FALSE; 678 } 679 } 680 ++start; 681 } 682 } 683 684 return TRUE; 685 } 686 687 static UBool 688 enumNames(UCharNames *names, 689 UChar32 start, UChar32 limit, 690 UEnumCharNamesFn *fn, void *context, 691 UCharNameChoice nameChoice) { 692 uint16_t startGroupMSB, endGroupMSB, groupCount; 693 const uint16_t *group, *groupLimit; 694 695 startGroupMSB=(uint16_t)(start>>GROUP_SHIFT); 696 endGroupMSB=(uint16_t)((limit-1)>>GROUP_SHIFT); 697 698 /* find the group that contains start, or the highest before it */ 699 group=getGroup(names, start); 700 701 if(startGroupMSB==endGroupMSB) { 702 if(startGroupMSB==group[GROUP_MSB]) { 703 /* if start and limit-1 are in the same group, then enumerate only in that one */ 704 return enumGroupNames(names, group, start, limit-1, fn, context, nameChoice); 705 } 706 } else { 707 const uint16_t *groups=GET_GROUPS(names); 708 groupCount=*groups++; 709 groupLimit=groups+groupCount*GROUP_LENGTH; 710 711 if(startGroupMSB==group[GROUP_MSB]) { 712 /* enumerate characters in the partial start group */ 713 if((start&GROUP_MASK)!=0) { 714 if(!enumGroupNames(names, group, 715 start, ((UChar32)startGroupMSB<<GROUP_SHIFT)+LINES_PER_GROUP-1, 716 fn, context, nameChoice)) { 717 return FALSE; 718 } 719 group=NEXT_GROUP(group); /* continue with the next group */ 720 } 721 } else if(startGroupMSB>group[GROUP_MSB]) { 722 /* make sure that we start enumerating with the first group after start */ 723 const uint16_t *nextGroup=NEXT_GROUP(group); 724 if (nextGroup < groupLimit && nextGroup[GROUP_MSB] > startGroupMSB && nameChoice == U_EXTENDED_CHAR_NAME) { 725 UChar32 end = nextGroup[GROUP_MSB] << GROUP_SHIFT; 726 if (end > limit) { 727 end = limit; 728 } 729 if (!enumExtNames(start, end - 1, fn, context)) { 730 return FALSE; 731 } 732 } 733 group=nextGroup; 734 } 735 736 /* enumerate entire groups between the start- and end-groups */ 737 while(group<groupLimit && group[GROUP_MSB]<endGroupMSB) { 738 const uint16_t *nextGroup; 739 start=(UChar32)group[GROUP_MSB]<<GROUP_SHIFT; 740 if(!enumGroupNames(names, group, start, start+LINES_PER_GROUP-1, fn, context, nameChoice)) { 741 return FALSE; 742 } 743 nextGroup=NEXT_GROUP(group); 744 if (nextGroup < groupLimit && nextGroup[GROUP_MSB] > group[GROUP_MSB] + 1 && nameChoice == U_EXTENDED_CHAR_NAME) { 745 UChar32 end = nextGroup[GROUP_MSB] << GROUP_SHIFT; 746 if (end > limit) { 747 end = limit; 748 } 749 if (!enumExtNames((group[GROUP_MSB] + 1) << GROUP_SHIFT, end - 1, fn, context)) { 750 return FALSE; 751 } 752 } 753 group=nextGroup; 754 } 755 756 /* enumerate within the end group (group[GROUP_MSB]==endGroupMSB) */ 757 if(group<groupLimit && group[GROUP_MSB]==endGroupMSB) { 758 return enumGroupNames(names, group, (limit-1)&~GROUP_MASK, limit-1, fn, context, nameChoice); 759 } else if (nameChoice == U_EXTENDED_CHAR_NAME && group == groupLimit) { 760 UChar32 next = (PREV_GROUP(group)[GROUP_MSB] + 1) << GROUP_SHIFT; 761 if (next > start) { 762 start = next; 763 } 764 } else { 765 return TRUE; 766 } 767 } 768 769 /* we have not found a group, which means everything is made of 770 extended names. */ 771 if (nameChoice == U_EXTENDED_CHAR_NAME) { 772 if (limit > UCHAR_MAX_VALUE + 1) { 773 limit = UCHAR_MAX_VALUE + 1; 774 } 775 return enumExtNames(start, limit - 1, fn, context); 776 } 777 778 return TRUE; 779 } 780 781 static uint16_t 782 writeFactorSuffix(const uint16_t *factors, uint16_t count, 783 const char *s, /* suffix elements */ 784 uint32_t code, 785 uint16_t indexes[8], /* output fields from here */ 786 const char *elementBases[8], const char *elements[8], 787 char *buffer, uint16_t bufferLength) { 788 uint16_t i, factor, bufferPos=0; 789 char c; 790 791 /* write elements according to the factors */ 792 793 /* 794 * the factorized elements are determined by modulo arithmetic 795 * with the factors of this algorithm 796 * 797 * note that for fewer operations, count is decremented here 798 */ 799 --count; 800 for(i=count; i>0; --i) { 801 factor=factors[i]; 802 indexes[i]=(uint16_t)(code%factor); 803 code/=factor; 804 } 805 /* 806 * we don't need to calculate the last modulus because start<=code<=end 807 * guarantees here that code<=factors[0] 808 */ 809 indexes[0]=(uint16_t)code; 810 811 /* write each element */ 812 for(;;) { 813 if(elementBases!=NULL) { 814 *elementBases++=s; 815 } 816 817 /* skip indexes[i] strings */ 818 factor=indexes[i]; 819 while(factor>0) { 820 while(*s++!=0) {} 821 --factor; 822 } 823 if(elements!=NULL) { 824 *elements++=s; 825 } 826 827 /* write element */ 828 while((c=*s++)!=0) { 829 WRITE_CHAR(buffer, bufferLength, bufferPos, c); 830 } 831 832 /* we do not need to perform the rest of this loop for i==count - break here */ 833 if(i>=count) { 834 break; 835 } 836 837 /* skip the rest of the strings for this factors[i] */ 838 factor=(uint16_t)(factors[i]-indexes[i]-1); 839 while(factor>0) { 840 while(*s++!=0) {} 841 --factor; 842 } 843 844 ++i; 845 } 846 847 /* zero-terminate */ 848 if(bufferLength>0) { 849 *buffer=0; 850 } 851 852 return bufferPos; 853 } 854 855 /* 856 * Important: 857 * Parts of findAlgName() are almost the same as some of getAlgName(). 858 * Fixes must be applied to both. 859 */ 860 static uint16_t 861 getAlgName(AlgorithmicRange *range, uint32_t code, UCharNameChoice nameChoice, 862 char *buffer, uint16_t bufferLength) { 863 uint16_t bufferPos=0; 864 865 /* Only the normative character name can be algorithmic. */ 866 if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) { 867 /* zero-terminate */ 868 if(bufferLength>0) { 869 *buffer=0; 870 } 871 return 0; 872 } 873 874 switch(range->type) { 875 case 0: { 876 /* name = prefix hex-digits */ 877 const char *s=(const char *)(range+1); 878 char c; 879 880 uint16_t i, count; 881 882 /* copy prefix */ 883 while((c=*s++)!=0) { 884 WRITE_CHAR(buffer, bufferLength, bufferPos, c); 885 } 886 887 /* write hexadecimal code point value */ 888 count=range->variant; 889 890 /* zero-terminate */ 891 if(count<bufferLength) { 892 buffer[count]=0; 893 } 894 895 for(i=count; i>0;) { 896 if(--i<bufferLength) { 897 c=(char)(code&0xf); 898 if(c<10) { 899 c+='0'; 900 } else { 901 c+='A'-10; 902 } 903 buffer[i]=c; 904 } 905 code>>=4; 906 } 907 908 bufferPos+=count; 909 break; 910 } 911 case 1: { 912 /* name = prefix factorized-elements */ 913 uint16_t indexes[8]; 914 const uint16_t *factors=(const uint16_t *)(range+1); 915 uint16_t count=range->variant; 916 const char *s=(const char *)(factors+count); 917 char c; 918 919 /* copy prefix */ 920 while((c=*s++)!=0) { 921 WRITE_CHAR(buffer, bufferLength, bufferPos, c); 922 } 923 924 bufferPos+=writeFactorSuffix(factors, count, 925 s, code-range->start, indexes, NULL, NULL, buffer, bufferLength); 926 break; 927 } 928 default: 929 /* undefined type */ 930 /* zero-terminate */ 931 if(bufferLength>0) { 932 *buffer=0; 933 } 934 break; 935 } 936 937 return bufferPos; 938 } 939 940 /* 941 * Important: enumAlgNames() and findAlgName() are almost the same. 942 * Any fix must be applied to both. 943 */ 944 static UBool 945 enumAlgNames(AlgorithmicRange *range, 946 UChar32 start, UChar32 limit, 947 UEnumCharNamesFn *fn, void *context, 948 UCharNameChoice nameChoice) { 949 char buffer[200]; 950 uint16_t length; 951 952 if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) { 953 return TRUE; 954 } 955 956 switch(range->type) { 957 case 0: { 958 char *s, *end; 959 char c; 960 961 /* get the full name of the start character */ 962 length=getAlgName(range, (uint32_t)start, nameChoice, buffer, sizeof(buffer)); 963 if(length<=0) { 964 return TRUE; 965 } 966 967 /* call the enumerator function with this first character */ 968 if(!fn(context, start, nameChoice, buffer, length)) { 969 return FALSE; 970 } 971 972 /* go to the end of the name; all these names have the same length */ 973 end=buffer; 974 while(*end!=0) { 975 ++end; 976 } 977 978 /* enumerate the rest of the names */ 979 while(++start<limit) { 980 /* increment the hexadecimal number on a character-basis */ 981 s=end; 982 for (;;) { 983 c=*--s; 984 if(('0'<=c && c<'9') || ('A'<=c && c<'F')) { 985 *s=(char)(c+1); 986 break; 987 } else if(c=='9') { 988 *s='A'; 989 break; 990 } else if(c=='F') { 991 *s='0'; 992 } 993 } 994 995 if(!fn(context, start, nameChoice, buffer, length)) { 996 return FALSE; 997 } 998 } 999 break; 1000 } 1001 case 1: { 1002 uint16_t indexes[8]; 1003 const char *elementBases[8], *elements[8]; 1004 const uint16_t *factors=(const uint16_t *)(range+1); 1005 uint16_t count=range->variant; 1006 const char *s=(const char *)(factors+count); 1007 char *suffix, *t; 1008 uint16_t prefixLength, i, idx; 1009 1010 char c; 1011 1012 /* name = prefix factorized-elements */ 1013 1014 /* copy prefix */ 1015 suffix=buffer; 1016 prefixLength=0; 1017 while((c=*s++)!=0) { 1018 *suffix++=c; 1019 ++prefixLength; 1020 } 1021 1022 /* append the suffix of the start character */ 1023 length=(uint16_t)(prefixLength+writeFactorSuffix(factors, count, 1024 s, (uint32_t)start-range->start, 1025 indexes, elementBases, elements, 1026 suffix, (uint16_t)(sizeof(buffer)-prefixLength))); 1027 1028 /* call the enumerator function with this first character */ 1029 if(!fn(context, start, nameChoice, buffer, length)) { 1030 return FALSE; 1031 } 1032 1033 /* enumerate the rest of the names */ 1034 while(++start<limit) { 1035 /* increment the indexes in lexical order bound by the factors */ 1036 i=count; 1037 for (;;) { 1038 idx=(uint16_t)(indexes[--i]+1); 1039 if(idx<factors[i]) { 1040 /* skip one index and its element string */ 1041 indexes[i]=idx; 1042 s=elements[i]; 1043 while(*s++!=0) { 1044 } 1045 elements[i]=s; 1046 break; 1047 } else { 1048 /* reset this index to 0 and its element string to the first one */ 1049 indexes[i]=0; 1050 elements[i]=elementBases[i]; 1051 } 1052 } 1053 1054 /* to make matters a little easier, just append all elements to the suffix */ 1055 t=suffix; 1056 length=prefixLength; 1057 for(i=0; i<count; ++i) { 1058 s=elements[i]; 1059 while((c=*s++)!=0) { 1060 *t++=c; 1061 ++length; 1062 } 1063 } 1064 /* zero-terminate */ 1065 *t=0; 1066 1067 if(!fn(context, start, nameChoice, buffer, length)) { 1068 return FALSE; 1069 } 1070 } 1071 break; 1072 } 1073 default: 1074 /* undefined type */ 1075 break; 1076 } 1077 1078 return TRUE; 1079 } 1080 1081 /* 1082 * findAlgName() is almost the same as enumAlgNames() except that it 1083 * returns the code point for a name if it fits into the range. 1084 * It returns 0xffff otherwise. 1085 */ 1086 static UChar32 1087 findAlgName(AlgorithmicRange *range, UCharNameChoice nameChoice, const char *otherName) { 1088 UChar32 code; 1089 1090 if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) { 1091 return 0xffff; 1092 } 1093 1094 switch(range->type) { 1095 case 0: { 1096 /* name = prefix hex-digits */ 1097 const char *s=(const char *)(range+1); 1098 char c; 1099 1100 uint16_t i, count; 1101 1102 /* compare prefix */ 1103 while((c=*s++)!=0) { 1104 if((char)c!=*otherName++) { 1105 return 0xffff; 1106 } 1107 } 1108 1109 /* read hexadecimal code point value */ 1110 count=range->variant; 1111 code=0; 1112 for(i=0; i<count; ++i) { 1113 c=*otherName++; 1114 if('0'<=c && c<='9') { 1115 code=(code<<4)|(c-'0'); 1116 } else if('A'<=c && c<='F') { 1117 code=(code<<4)|(c-'A'+10); 1118 } else { 1119 return 0xffff; 1120 } 1121 } 1122 1123 /* does it fit into the range? */ 1124 if(*otherName==0 && range->start<=(uint32_t)code && (uint32_t)code<=range->end) { 1125 return code; 1126 } 1127 break; 1128 } 1129 case 1: { 1130 char buffer[64]; 1131 uint16_t indexes[8]; 1132 const char *elementBases[8], *elements[8]; 1133 const uint16_t *factors=(const uint16_t *)(range+1); 1134 uint16_t count=range->variant; 1135 const char *s=(const char *)(factors+count), *t; 1136 UChar32 start, limit; 1137 uint16_t i, idx; 1138 1139 char c; 1140 1141 /* name = prefix factorized-elements */ 1142 1143 /* compare prefix */ 1144 while((c=*s++)!=0) { 1145 if((char)c!=*otherName++) { 1146 return 0xffff; 1147 } 1148 } 1149 1150 start=(UChar32)range->start; 1151 limit=(UChar32)(range->end+1); 1152 1153 /* initialize the suffix elements for enumeration; indexes should all be set to 0 */ 1154 writeFactorSuffix(factors, count, s, 0, 1155 indexes, elementBases, elements, buffer, sizeof(buffer)); 1156 1157 /* compare the first suffix */ 1158 if(0==uprv_strcmp(otherName, buffer)) { 1159 return start; 1160 } 1161 1162 /* enumerate and compare the rest of the suffixes */ 1163 while(++start<limit) { 1164 /* increment the indexes in lexical order bound by the factors */ 1165 i=count; 1166 for (;;) { 1167 idx=(uint16_t)(indexes[--i]+1); 1168 if(idx<factors[i]) { 1169 /* skip one index and its element string */ 1170 indexes[i]=idx; 1171 s=elements[i]; 1172 while(*s++!=0) {} 1173 elements[i]=s; 1174 break; 1175 } else { 1176 /* reset this index to 0 and its element string to the first one */ 1177 indexes[i]=0; 1178 elements[i]=elementBases[i]; 1179 } 1180 } 1181 1182 /* to make matters a little easier, just compare all elements of the suffix */ 1183 t=otherName; 1184 for(i=0; i<count; ++i) { 1185 s=elements[i]; 1186 while((c=*s++)!=0) { 1187 if(c!=*t++) { 1188 s=""; /* does not match */ 1189 i=99; 1190 } 1191 } 1192 } 1193 if(i<99 && *t==0) { 1194 return start; 1195 } 1196 } 1197 break; 1198 } 1199 default: 1200 /* undefined type */ 1201 break; 1202 } 1203 1204 return 0xffff; 1205 } 1206 1207 /* sets of name characters, maximum name lengths ---------------------------- */ 1208 1209 #define SET_ADD(set, c) ((set)[(uint8_t)c>>5]|=((uint32_t)1<<((uint8_t)c&0x1f))) 1210 #define SET_CONTAINS(set, c) (((set)[(uint8_t)c>>5]&((uint32_t)1<<((uint8_t)c&0x1f)))!=0) 1211 1212 static int32_t 1213 calcStringSetLength(uint32_t set[8], const char *s) { 1214 int32_t length=0; 1215 char c; 1216 1217 while((c=*s++)!=0) { 1218 SET_ADD(set, c); 1219 ++length; 1220 } 1221 return length; 1222 } 1223 1224 static int32_t 1225 calcAlgNameSetsLengths(int32_t maxNameLength) { 1226 AlgorithmicRange *range; 1227 uint32_t *p; 1228 uint32_t rangeCount; 1229 int32_t length; 1230 1231 /* enumerate algorithmic ranges */ 1232 p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset); 1233 rangeCount=*p; 1234 range=(AlgorithmicRange *)(p+1); 1235 while(rangeCount>0) { 1236 switch(range->type) { 1237 case 0: 1238 /* name = prefix + (range->variant times) hex-digits */ 1239 /* prefix */ 1240 length=calcStringSetLength(gNameSet, (const char *)(range+1))+range->variant; 1241 if(length>maxNameLength) { 1242 maxNameLength=length; 1243 } 1244 break; 1245 case 1: { 1246 /* name = prefix factorized-elements */ 1247 const uint16_t *factors=(const uint16_t *)(range+1); 1248 const char *s; 1249 int32_t i, count=range->variant, factor, factorLength, maxFactorLength; 1250 1251 /* prefix length */ 1252 s=(const char *)(factors+count); 1253 length=calcStringSetLength(gNameSet, s); 1254 s+=length+1; /* start of factor suffixes */ 1255 1256 /* get the set and maximum factor suffix length for each factor */ 1257 for(i=0; i<count; ++i) { 1258 maxFactorLength=0; 1259 for(factor=factors[i]; factor>0; --factor) { 1260 factorLength=calcStringSetLength(gNameSet, s); 1261 s+=factorLength+1; 1262 if(factorLength>maxFactorLength) { 1263 maxFactorLength=factorLength; 1264 } 1265 } 1266 length+=maxFactorLength; 1267 } 1268 1269 if(length>maxNameLength) { 1270 maxNameLength=length; 1271 } 1272 break; 1273 } 1274 default: 1275 /* unknown type */ 1276 break; 1277 } 1278 1279 range=(AlgorithmicRange *)((uint8_t *)range+range->size); 1280 --rangeCount; 1281 } 1282 return maxNameLength; 1283 } 1284 1285 static int32_t 1286 calcExtNameSetsLengths(int32_t maxNameLength) { 1287 int32_t i, length; 1288 1289 for(i=0; i<LENGTHOF(charCatNames); ++i) { 1290 /* 1291 * for each category, count the length of the category name 1292 * plus 9= 1293 * 2 for <> 1294 * 1 for - 1295 * 6 for most hex digits per code point 1296 */ 1297 length=9+calcStringSetLength(gNameSet, charCatNames[i]); 1298 if(length>maxNameLength) { 1299 maxNameLength=length; 1300 } 1301 } 1302 return maxNameLength; 1303 } 1304 1305 static int32_t 1306 calcNameSetLength(const uint16_t *tokens, uint16_t tokenCount, const uint8_t *tokenStrings, int8_t *tokenLengths, 1307 uint32_t set[8], 1308 const uint8_t **pLine, const uint8_t *lineLimit) { 1309 const uint8_t *line=*pLine; 1310 int32_t length=0, tokenLength; 1311 uint16_t c, token; 1312 1313 while(line!=lineLimit && (c=*line++)!=(uint8_t)';') { 1314 if(c>=tokenCount) { 1315 /* implicit letter */ 1316 SET_ADD(set, c); 1317 ++length; 1318 } else { 1319 token=tokens[c]; 1320 if(token==(uint16_t)(-2)) { 1321 /* this is a lead byte for a double-byte token */ 1322 c=c<<8|*line++; 1323 token=tokens[c]; 1324 } 1325 if(token==(uint16_t)(-1)) { 1326 /* explicit letter */ 1327 SET_ADD(set, c); 1328 ++length; 1329 } else { 1330 /* count token word */ 1331 if(tokenLengths!=NULL) { 1332 /* use cached token length */ 1333 tokenLength=tokenLengths[c]; 1334 if(tokenLength==0) { 1335 tokenLength=calcStringSetLength(set, (const char *)tokenStrings+token); 1336 tokenLengths[c]=(int8_t)tokenLength; 1337 } 1338 } else { 1339 tokenLength=calcStringSetLength(set, (const char *)tokenStrings+token); 1340 } 1341 length+=tokenLength; 1342 } 1343 } 1344 } 1345 1346 *pLine=line; 1347 return length; 1348 } 1349 1350 static void 1351 calcGroupNameSetsLengths(int32_t maxNameLength) { 1352 uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2]; 1353 1354 uint16_t *tokens=(uint16_t *)uCharNames+8; 1355 uint16_t tokenCount=*tokens++; 1356 uint8_t *tokenStrings=(uint8_t *)uCharNames+uCharNames->tokenStringOffset; 1357 1358 int8_t *tokenLengths; 1359 1360 const uint16_t *group; 1361 const uint8_t *s, *line, *lineLimit; 1362 1363 int32_t groupCount, lineNumber, length; 1364 1365 tokenLengths=(int8_t *)uprv_malloc(tokenCount); 1366 if(tokenLengths!=NULL) { 1367 uprv_memset(tokenLengths, 0, tokenCount); 1368 } 1369 1370 group=GET_GROUPS(uCharNames); 1371 groupCount=*group++; 1372 1373 /* enumerate all groups */ 1374 while(groupCount>0) { 1375 s=(uint8_t *)uCharNames+uCharNames->groupStringOffset+GET_GROUP_OFFSET(group); 1376 s=expandGroupLengths(s, offsets, lengths); 1377 1378 /* enumerate all lines in each group */ 1379 for(lineNumber=0; lineNumber<LINES_PER_GROUP; ++lineNumber) { 1380 line=s+offsets[lineNumber]; 1381 length=lengths[lineNumber]; 1382 if(length==0) { 1383 continue; 1384 } 1385 1386 lineLimit=line+length; 1387 1388 /* read regular name */ 1389 length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gNameSet, &line, lineLimit); 1390 if(length>maxNameLength) { 1391 maxNameLength=length; 1392 } 1393 if(line==lineLimit) { 1394 continue; 1395 } 1396 1397 /* read Unicode 1.0 name */ 1398 length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gNameSet, &line, lineLimit); 1399 if(length>maxNameLength) { 1400 maxNameLength=length; 1401 } 1402 if(line==lineLimit) { 1403 continue; 1404 } 1405 1406 /* read ISO comment */ 1407 /*length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gISOCommentSet, &line, lineLimit);*/ 1408 } 1409 1410 group=NEXT_GROUP(group); 1411 --groupCount; 1412 } 1413 1414 if(tokenLengths!=NULL) { 1415 uprv_free(tokenLengths); 1416 } 1417 1418 /* set gMax... - name length last for threading */ 1419 gMaxNameLength=maxNameLength; 1420 } 1421 1422 static UBool 1423 calcNameSetsLengths(UErrorCode *pErrorCode) { 1424 static const char extChars[]="0123456789ABCDEF<>-"; 1425 int32_t i, maxNameLength; 1426 1427 if(gMaxNameLength!=0) { 1428 return TRUE; 1429 } 1430 1431 if(!isDataLoaded(pErrorCode)) { 1432 return FALSE; 1433 } 1434 1435 /* set hex digits, used in various names, and <>-, used in extended names */ 1436 for(i=0; i<sizeof(extChars)-1; ++i) { 1437 SET_ADD(gNameSet, extChars[i]); 1438 } 1439 1440 /* set sets and lengths from algorithmic names */ 1441 maxNameLength=calcAlgNameSetsLengths(0); 1442 1443 /* set sets and lengths from extended names */ 1444 maxNameLength=calcExtNameSetsLengths(maxNameLength); 1445 1446 /* set sets and lengths from group names, set global maximum values */ 1447 calcGroupNameSetsLengths(maxNameLength); 1448 1449 return TRUE; 1450 } 1451 1452 /* public API --------------------------------------------------------------- */ 1453 1454 U_CAPI int32_t U_EXPORT2 1455 u_charName(UChar32 code, UCharNameChoice nameChoice, 1456 char *buffer, int32_t bufferLength, 1457 UErrorCode *pErrorCode) { 1458 AlgorithmicRange *algRange; 1459 uint32_t *p; 1460 uint32_t i; 1461 int32_t length; 1462 1463 /* check the argument values */ 1464 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 1465 return 0; 1466 } else if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT || 1467 bufferLength<0 || (bufferLength>0 && buffer==NULL) 1468 ) { 1469 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1470 return 0; 1471 } 1472 1473 if((uint32_t)code>UCHAR_MAX_VALUE || !isDataLoaded(pErrorCode)) { 1474 return u_terminateChars(buffer, bufferLength, 0, pErrorCode); 1475 } 1476 1477 length=0; 1478 1479 /* try algorithmic names first */ 1480 p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset); 1481 i=*p; 1482 algRange=(AlgorithmicRange *)(p+1); 1483 while(i>0) { 1484 if(algRange->start<=(uint32_t)code && (uint32_t)code<=algRange->end) { 1485 length=getAlgName(algRange, (uint32_t)code, nameChoice, buffer, (uint16_t)bufferLength); 1486 break; 1487 } 1488 algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size); 1489 --i; 1490 } 1491 1492 if(i==0) { 1493 if (nameChoice == U_EXTENDED_CHAR_NAME) { 1494 length = getName(uCharNames, (uint32_t )code, U_EXTENDED_CHAR_NAME, buffer, (uint16_t) bufferLength); 1495 if (!length) { 1496 /* extended character name */ 1497 length = getExtName((uint32_t) code, buffer, (uint16_t) bufferLength); 1498 } 1499 } else { 1500 /* normal character name */ 1501 length=getName(uCharNames, (uint32_t)code, nameChoice, buffer, (uint16_t)bufferLength); 1502 } 1503 } 1504 1505 return u_terminateChars(buffer, bufferLength, length, pErrorCode); 1506 } 1507 1508 U_CAPI int32_t U_EXPORT2 1509 u_getISOComment(UChar32 c, 1510 char *dest, int32_t destCapacity, 1511 UErrorCode *pErrorCode) { 1512 int32_t length; 1513 1514 /* check the argument values */ 1515 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 1516 return 0; 1517 } else if(destCapacity<0 || (destCapacity>0 && dest==NULL)) { 1518 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1519 return 0; 1520 } 1521 1522 if((uint32_t)c>UCHAR_MAX_VALUE || !isDataLoaded(pErrorCode)) { 1523 return u_terminateChars(dest, destCapacity, 0, pErrorCode); 1524 } 1525 1526 /* the ISO comment is stored like a normal character name */ 1527 length=getName(uCharNames, (uint32_t)c, U_ISO_COMMENT, dest, (uint16_t)destCapacity); 1528 return u_terminateChars(dest, destCapacity, length, pErrorCode); 1529 } 1530 1531 U_CAPI UChar32 U_EXPORT2 1532 u_charFromName(UCharNameChoice nameChoice, 1533 const char *name, 1534 UErrorCode *pErrorCode) { 1535 char upper[120], lower[120]; 1536 FindName findName; 1537 AlgorithmicRange *algRange; 1538 uint32_t *p; 1539 uint32_t i; 1540 UChar32 cp = 0; 1541 char c0; 1542 UChar32 error = 0xffff; /* Undefined, but use this for backwards compatibility. */ 1543 1544 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 1545 return error; 1546 } 1547 1548 if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT || name==NULL || *name==0) { 1549 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1550 return error; 1551 } 1552 1553 if(!isDataLoaded(pErrorCode)) { 1554 return error; 1555 } 1556 1557 /* construct the uppercase and lowercase of the name first */ 1558 for(i=0; i<sizeof(upper); ++i) { 1559 if((c0=*name++)!=0) { 1560 upper[i]=uprv_toupper(c0); 1561 lower[i]=uprv_tolower(c0); 1562 } else { 1563 upper[i]=lower[i]=0; 1564 break; 1565 } 1566 } 1567 if(i==sizeof(upper)) { 1568 /* name too long, there is no such character */ 1569 *pErrorCode = U_ILLEGAL_CHAR_FOUND; 1570 return error; 1571 } 1572 1573 /* try extended names first */ 1574 if (lower[0] == '<') { 1575 if (nameChoice == U_EXTENDED_CHAR_NAME) { 1576 if (lower[--i] == '>') { 1577 for (--i; lower[i] && lower[i] != '-'; --i) { 1578 } 1579 1580 if (lower[i] == '-') { /* We've got a category. */ 1581 uint32_t cIdx; 1582 1583 lower[i] = 0; 1584 1585 for (++i; lower[i] != '>'; ++i) { 1586 if (lower[i] >= '0' && lower[i] <= '9') { 1587 cp = (cp << 4) + lower[i] - '0'; 1588 } else if (lower[i] >= 'a' && lower[i] <= 'f') { 1589 cp = (cp << 4) + lower[i] - 'a' + 10; 1590 } else { 1591 *pErrorCode = U_ILLEGAL_CHAR_FOUND; 1592 return error; 1593 } 1594 } 1595 1596 /* Now validate the category name. 1597 We could use a binary search, or a trie, if 1598 we really wanted to. */ 1599 1600 for (lower[i] = 0, cIdx = 0; cIdx < LENGTHOF(charCatNames); ++cIdx) { 1601 1602 if (!uprv_strcmp(lower + 1, charCatNames[cIdx])) { 1603 if (getCharCat(cp) == cIdx) { 1604 return cp; 1605 } 1606 break; 1607 } 1608 } 1609 } 1610 } 1611 } 1612 1613 *pErrorCode = U_ILLEGAL_CHAR_FOUND; 1614 return error; 1615 } 1616 1617 /* try algorithmic names now */ 1618 p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset); 1619 i=*p; 1620 algRange=(AlgorithmicRange *)(p+1); 1621 while(i>0) { 1622 if((cp=findAlgName(algRange, nameChoice, upper))!=0xffff) { 1623 return cp; 1624 } 1625 algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size); 1626 --i; 1627 } 1628 1629 /* normal character name */ 1630 findName.otherName=upper; 1631 findName.code=error; 1632 enumNames(uCharNames, 0, UCHAR_MAX_VALUE + 1, DO_FIND_NAME, &findName, nameChoice); 1633 if (findName.code == error) { 1634 *pErrorCode = U_ILLEGAL_CHAR_FOUND; 1635 } 1636 return findName.code; 1637 } 1638 1639 U_CAPI void U_EXPORT2 1640 u_enumCharNames(UChar32 start, UChar32 limit, 1641 UEnumCharNamesFn *fn, 1642 void *context, 1643 UCharNameChoice nameChoice, 1644 UErrorCode *pErrorCode) { 1645 AlgorithmicRange *algRange; 1646 uint32_t *p; 1647 uint32_t i; 1648 1649 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 1650 return; 1651 } 1652 1653 if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT || fn==NULL) { 1654 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1655 return; 1656 } 1657 1658 if((uint32_t) limit > UCHAR_MAX_VALUE + 1) { 1659 limit = UCHAR_MAX_VALUE + 1; 1660 } 1661 if((uint32_t)start>=(uint32_t)limit) { 1662 return; 1663 } 1664 1665 if(!isDataLoaded(pErrorCode)) { 1666 return; 1667 } 1668 1669 /* interleave the data-driven ones with the algorithmic ones */ 1670 /* iterate over all algorithmic ranges; assume that they are in ascending order */ 1671 p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset); 1672 i=*p; 1673 algRange=(AlgorithmicRange *)(p+1); 1674 while(i>0) { 1675 /* enumerate the character names before the current algorithmic range */ 1676 /* here: start<limit */ 1677 if((uint32_t)start<algRange->start) { 1678 if((uint32_t)limit<=algRange->start) { 1679 enumNames(uCharNames, start, limit, fn, context, nameChoice); 1680 return; 1681 } 1682 if(!enumNames(uCharNames, start, (UChar32)algRange->start, fn, context, nameChoice)) { 1683 return; 1684 } 1685 start=(UChar32)algRange->start; 1686 } 1687 /* enumerate the character names in the current algorithmic range */ 1688 /* here: algRange->start<=start<limit */ 1689 if((uint32_t)start<=algRange->end) { 1690 if((uint32_t)limit<=(algRange->end+1)) { 1691 enumAlgNames(algRange, start, limit, fn, context, nameChoice); 1692 return; 1693 } 1694 if(!enumAlgNames(algRange, start, (UChar32)algRange->end+1, fn, context, nameChoice)) { 1695 return; 1696 } 1697 start=(UChar32)algRange->end+1; 1698 } 1699 /* continue to the next algorithmic range (here: start<limit) */ 1700 algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size); 1701 --i; 1702 } 1703 /* enumerate the character names after the last algorithmic range */ 1704 enumNames(uCharNames, start, limit, fn, context, nameChoice); 1705 } 1706 1707 U_CAPI int32_t U_EXPORT2 1708 uprv_getMaxCharNameLength() { 1709 UErrorCode errorCode=U_ZERO_ERROR; 1710 if(calcNameSetsLengths(&errorCode)) { 1711 return gMaxNameLength; 1712 } else { 1713 return 0; 1714 } 1715 } 1716 1717 /** 1718 * Converts the char set cset into a Unicode set uset. 1719 * @param cset Set of 256 bit flags corresponding to a set of chars. 1720 * @param uset USet to receive characters. Existing contents are deleted. 1721 */ 1722 static void 1723 charSetToUSet(uint32_t cset[8], const USetAdder *sa) { 1724 UChar us[256]; 1725 char cs[256]; 1726 1727 int32_t i, length; 1728 UErrorCode errorCode; 1729 1730 errorCode=U_ZERO_ERROR; 1731 1732 if(!calcNameSetsLengths(&errorCode)) { 1733 return; 1734 } 1735 1736 /* build a char string with all chars that are used in character names */ 1737 length=0; 1738 for(i=0; i<256; ++i) { 1739 if(SET_CONTAINS(cset, i)) { 1740 cs[length++]=(char)i; 1741 } 1742 } 1743 1744 /* convert the char string to a UChar string */ 1745 u_charsToUChars(cs, us, length); 1746 1747 /* add each UChar to the USet */ 1748 for(i=0; i<length; ++i) { 1749 if(us[i]!=0 || cs[i]==0) { /* non-invariant chars become (UChar)0 */ 1750 sa->add(sa->set, us[i]); 1751 } 1752 } 1753 } 1754 1755 /** 1756 * Fills set with characters that are used in Unicode character names. 1757 * @param set USet to receive characters. 1758 */ 1759 U_CAPI void U_EXPORT2 1760 uprv_getCharNameCharacters(const USetAdder *sa) { 1761 charSetToUSet(gNameSet, sa); 1762 } 1763 1764 /* data swapping ------------------------------------------------------------ */ 1765 1766 /* 1767 * The token table contains non-negative entries for token bytes, 1768 * and -1 for bytes that represent themselves in the data file's charset. 1769 * -2 entries are used for lead bytes. 1770 * 1771 * Direct bytes (-1 entries) must be translated from the input charset family 1772 * to the output charset family. 1773 * makeTokenMap() writes a permutation mapping for this. 1774 * Use it once for single-/lead-byte tokens and once more for all trail byte 1775 * tokens. (';' is an unused trail byte marked with -1.) 1776 */ 1777 static void 1778 makeTokenMap(const UDataSwapper *ds, 1779 int16_t tokens[], uint16_t tokenCount, 1780 uint8_t map[256], 1781 UErrorCode *pErrorCode) { 1782 UBool usedOutChar[256]; 1783 uint16_t i, j; 1784 uint8_t c1, c2; 1785 1786 if(U_FAILURE(*pErrorCode)) { 1787 return; 1788 } 1789 1790 if(ds->inCharset==ds->outCharset) { 1791 /* Same charset family: identity permutation */ 1792 for(i=0; i<256; ++i) { 1793 map[i]=(uint8_t)i; 1794 } 1795 } else { 1796 uprv_memset(map, 0, 256); 1797 uprv_memset(usedOutChar, 0, 256); 1798 1799 if(tokenCount>256) { 1800 tokenCount=256; 1801 } 1802 1803 /* set the direct bytes (byte 0 always maps to itself) */ 1804 for(i=1; i<tokenCount; ++i) { 1805 if(tokens[i]==-1) { 1806 /* convert the direct byte character */ 1807 c1=(uint8_t)i; 1808 ds->swapInvChars(ds, &c1, 1, &c2, pErrorCode); 1809 if(U_FAILURE(*pErrorCode)) { 1810 udata_printError(ds, "unames/makeTokenMap() finds variant character 0x%02x used (input charset family %d)\n", 1811 i, ds->inCharset); 1812 return; 1813 } 1814 1815 /* enter the converted character into the map and mark it used */ 1816 map[c1]=c2; 1817 usedOutChar[c2]=TRUE; 1818 } 1819 } 1820 1821 /* set the mappings for the rest of the permutation */ 1822 for(i=j=1; i<tokenCount; ++i) { 1823 /* set mappings that were not set for direct bytes */ 1824 if(map[i]==0) { 1825 /* set an output byte value that was not used as an output byte above */ 1826 while(usedOutChar[j]) { 1827 ++j; 1828 } 1829 map[i]=(uint8_t)j++; 1830 } 1831 } 1832 1833 /* 1834 * leave mappings at tokenCount and above unset if tokenCount<256 1835 * because they won't be used 1836 */ 1837 } 1838 } 1839 1840 U_CAPI int32_t U_EXPORT2 1841 uchar_swapNames(const UDataSwapper *ds, 1842 const void *inData, int32_t length, void *outData, 1843 UErrorCode *pErrorCode) { 1844 const UDataInfo *pInfo; 1845 int32_t headerSize; 1846 1847 const uint8_t *inBytes; 1848 uint8_t *outBytes; 1849 1850 uint32_t tokenStringOffset, groupsOffset, groupStringOffset, algNamesOffset, 1851 offset, i, count, stringsCount; 1852 1853 const AlgorithmicRange *inRange; 1854 AlgorithmicRange *outRange; 1855 1856 /* udata_swapDataHeader checks the arguments */ 1857 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); 1858 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 1859 return 0; 1860 } 1861 1862 /* check data format and format version */ 1863 pInfo=(const UDataInfo *)((const char *)inData+4); 1864 if(!( 1865 pInfo->dataFormat[0]==0x75 && /* dataFormat="unam" */ 1866 pInfo->dataFormat[1]==0x6e && 1867 pInfo->dataFormat[2]==0x61 && 1868 pInfo->dataFormat[3]==0x6d && 1869 pInfo->formatVersion[0]==1 1870 )) { 1871 udata_printError(ds, "uchar_swapNames(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as unames.icu\n", 1872 pInfo->dataFormat[0], pInfo->dataFormat[1], 1873 pInfo->dataFormat[2], pInfo->dataFormat[3], 1874 pInfo->formatVersion[0]); 1875 *pErrorCode=U_UNSUPPORTED_ERROR; 1876 return 0; 1877 } 1878 1879 inBytes=(const uint8_t *)inData+headerSize; 1880 outBytes=(uint8_t *)outData+headerSize; 1881 if(length<0) { 1882 algNamesOffset=ds->readUInt32(((const uint32_t *)inBytes)[3]); 1883 } else { 1884 length-=headerSize; 1885 if( length<20 || 1886 (uint32_t)length<(algNamesOffset=ds->readUInt32(((const uint32_t *)inBytes)[3])) 1887 ) { 1888 udata_printError(ds, "uchar_swapNames(): too few bytes (%d after header) for unames.icu\n", 1889 length); 1890 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 1891 return 0; 1892 } 1893 } 1894 1895 if(length<0) { 1896 /* preflighting: iterate through algorithmic ranges */ 1897 offset=algNamesOffset; 1898 count=ds->readUInt32(*((const uint32_t *)(inBytes+offset))); 1899 offset+=4; 1900 1901 for(i=0; i<count; ++i) { 1902 inRange=(const AlgorithmicRange *)(inBytes+offset); 1903 offset+=ds->readUInt16(inRange->size); 1904 } 1905 } else { 1906 /* swap data */ 1907 const uint16_t *p; 1908 uint16_t *q, *temp; 1909 1910 int16_t tokens[512]; 1911 uint16_t tokenCount; 1912 1913 uint8_t map[256], trailMap[256]; 1914 1915 /* copy the data for inaccessible bytes */ 1916 if(inBytes!=outBytes) { 1917 uprv_memcpy(outBytes, inBytes, length); 1918 } 1919 1920 /* the initial 4 offsets first */ 1921 tokenStringOffset=ds->readUInt32(((const uint32_t *)inBytes)[0]); 1922 groupsOffset=ds->readUInt32(((const uint32_t *)inBytes)[1]); 1923 groupStringOffset=ds->readUInt32(((const uint32_t *)inBytes)[2]); 1924 ds->swapArray32(ds, inBytes, 16, outBytes, pErrorCode); 1925 1926 /* 1927 * now the tokens table 1928 * it needs to be permutated along with the compressed name strings 1929 */ 1930 p=(const uint16_t *)(inBytes+16); 1931 q=(uint16_t *)(outBytes+16); 1932 1933 /* read and swap the tokenCount */ 1934 tokenCount=ds->readUInt16(*p); 1935 ds->swapArray16(ds, p, 2, q, pErrorCode); 1936 ++p; 1937 ++q; 1938 1939 /* read the first 512 tokens and make the token maps */ 1940 if(tokenCount<=512) { 1941 count=tokenCount; 1942 } else { 1943 count=512; 1944 } 1945 for(i=0; i<count; ++i) { 1946 tokens[i]=udata_readInt16(ds, p[i]); 1947 } 1948 for(; i<512; ++i) { 1949 tokens[i]=0; /* fill the rest of the tokens array if tokenCount<512 */ 1950 } 1951 makeTokenMap(ds, tokens, tokenCount, map, pErrorCode); 1952 makeTokenMap(ds, tokens+256, (uint16_t)(tokenCount>256 ? tokenCount-256 : 0), trailMap, pErrorCode); 1953 if(U_FAILURE(*pErrorCode)) { 1954 return 0; 1955 } 1956 1957 /* 1958 * swap and permutate the tokens 1959 * go through a temporary array to support in-place swapping 1960 */ 1961 temp=(uint16_t *)uprv_malloc(tokenCount*2); 1962 if(temp==NULL) { 1963 udata_printError(ds, "out of memory swapping %u unames.icu tokens\n", 1964 tokenCount); 1965 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 1966 return 0; 1967 } 1968 1969 /* swap and permutate single-/lead-byte tokens */ 1970 for(i=0; i<tokenCount && i<256; ++i) { 1971 ds->swapArray16(ds, p+i, 2, temp+map[i], pErrorCode); 1972 } 1973 1974 /* swap and permutate trail-byte tokens */ 1975 for(; i<tokenCount; ++i) { 1976 ds->swapArray16(ds, p+i, 2, temp+(i&0xffffff00)+trailMap[i&0xff], pErrorCode); 1977 } 1978 1979 /* copy the result into the output and free the temporary array */ 1980 uprv_memcpy(q, temp, tokenCount*2); 1981 uprv_free(temp); 1982 1983 /* 1984 * swap the token strings but not a possible padding byte after 1985 * the terminating NUL of the last string 1986 */ 1987 udata_swapInvStringBlock(ds, inBytes+tokenStringOffset, (int32_t)(groupsOffset-tokenStringOffset), 1988 outBytes+tokenStringOffset, pErrorCode); 1989 if(U_FAILURE(*pErrorCode)) { 1990 udata_printError(ds, "uchar_swapNames(token strings) failed\n"); 1991 return 0; 1992 } 1993 1994 /* swap the group table */ 1995 count=ds->readUInt16(*((const uint16_t *)(inBytes+groupsOffset))); 1996 ds->swapArray16(ds, inBytes+groupsOffset, (int32_t)((1+count*3)*2), 1997 outBytes+groupsOffset, pErrorCode); 1998 1999 /* 2000 * swap the group strings 2001 * swap the string bytes but not the nibble-encoded string lengths 2002 */ 2003 if(ds->inCharset!=ds->outCharset) { 2004 uint16_t offsets[LINES_PER_GROUP+1], lengths[LINES_PER_GROUP+1]; 2005 2006 const uint8_t *inStrings, *nextInStrings; 2007 uint8_t *outStrings; 2008 2009 uint8_t c; 2010 2011 inStrings=inBytes+groupStringOffset; 2012 outStrings=outBytes+groupStringOffset; 2013 2014 stringsCount=algNamesOffset-groupStringOffset; 2015 2016 /* iterate through string groups until only a few padding bytes are left */ 2017 while(stringsCount>32) { 2018 nextInStrings=expandGroupLengths(inStrings, offsets, lengths); 2019 2020 /* move past the length bytes */ 2021 stringsCount-=(uint32_t)(nextInStrings-inStrings); 2022 outStrings+=nextInStrings-inStrings; 2023 inStrings=nextInStrings; 2024 2025 count=offsets[31]+lengths[31]; /* total number of string bytes in this group */ 2026 stringsCount-=count; 2027 2028 /* swap the string bytes using map[] and trailMap[] */ 2029 while(count>0) { 2030 c=*inStrings++; 2031 *outStrings++=map[c]; 2032 if(tokens[c]!=-2) { 2033 --count; 2034 } else { 2035 /* token lead byte: swap the trail byte, too */ 2036 *outStrings++=trailMap[*inStrings++]; 2037 count-=2; 2038 } 2039 } 2040 } 2041 } 2042 2043 /* swap the algorithmic ranges */ 2044 offset=algNamesOffset; 2045 count=ds->readUInt32(*((const uint32_t *)(inBytes+offset))); 2046 ds->swapArray32(ds, inBytes+offset, 4, outBytes+offset, pErrorCode); 2047 offset+=4; 2048 2049 for(i=0; i<count; ++i) { 2050 if(offset>(uint32_t)length) { 2051 udata_printError(ds, "uchar_swapNames(): too few bytes (%d after header) for unames.icu algorithmic range %u\n", 2052 length, i); 2053 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 2054 return 0; 2055 } 2056 2057 inRange=(const AlgorithmicRange *)(inBytes+offset); 2058 outRange=(AlgorithmicRange *)(outBytes+offset); 2059 offset+=ds->readUInt16(inRange->size); 2060 2061 ds->swapArray32(ds, inRange, 8, outRange, pErrorCode); 2062 ds->swapArray16(ds, &inRange->size, 2, &outRange->size, pErrorCode); 2063 switch(inRange->type) { 2064 case 0: 2065 /* swap prefix string */ 2066 ds->swapInvChars(ds, inRange+1, (int32_t)uprv_strlen((const char *)(inRange+1)), 2067 outRange+1, pErrorCode); 2068 if(U_FAILURE(*pErrorCode)) { 2069 udata_printError(ds, "uchar_swapNames(prefix string of algorithmic range %u) failed\n", 2070 i); 2071 return 0; 2072 } 2073 break; 2074 case 1: 2075 { 2076 /* swap factors and the prefix and factor strings */ 2077 uint32_t factorsCount; 2078 2079 factorsCount=inRange->variant; 2080 p=(const uint16_t *)(inRange+1); 2081 q=(uint16_t *)(outRange+1); 2082 ds->swapArray16(ds, p, (int32_t)(factorsCount*2), q, pErrorCode); 2083 2084 /* swap the strings, up to the last terminating NUL */ 2085 p+=factorsCount; 2086 q+=factorsCount; 2087 stringsCount=(uint32_t)((inBytes+offset)-(const uint8_t *)p); 2088 while(stringsCount>0 && ((const uint8_t *)p)[stringsCount-1]!=0) { 2089 --stringsCount; 2090 } 2091 ds->swapInvChars(ds, p, (int32_t)stringsCount, q, pErrorCode); 2092 } 2093 break; 2094 default: 2095 udata_printError(ds, "uchar_swapNames(): unknown type %u of algorithmic range %u\n", 2096 inRange->type, i); 2097 *pErrorCode=U_UNSUPPORTED_ERROR; 2098 return 0; 2099 } 2100 } 2101 } 2102 2103 return headerSize+(int32_t)offset; 2104 } 2105 2106 /* 2107 * Hey, Emacs, please set the following: 2108 * 2109 * Local Variables: 2110 * indent-tabs-mode: nil 2111 * End: 2112 * 2113 */ 2114