1 /* 2 ******************************************************************************* 3 * 4 * Copyright (C) 2003-2007, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ******************************************************************************* 8 * file name: idnaref.cpp 9 * encoding: US-ASCII 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 2003feb1 14 * created by: Ram Viswanadha 15 */ 16 17 #include "unicode/utypes.h" 18 19 #if !UCONFIG_NO_IDNA && !UCONFIG_NO_TRANSLITERATION 20 #include "idnaref.h" 21 #include "punyref.h" 22 #include "ustr_imp.h" 23 #include "cmemory.h" 24 #include "sprpimpl.h" 25 #include "nptrans.h" 26 #include "testidna.h" 27 #include "punycode.h" 28 #include "unicode/ustring.h" 29 30 /* it is official IDNA ACE Prefix is "xn--" */ 31 static const UChar ACE_PREFIX[] ={ 0x0078,0x006E,0x002d,0x002d } ; 32 #define ACE_PREFIX_LENGTH 4 33 34 #define MAX_LABEL_LENGTH 63 35 #define HYPHEN 0x002D 36 /* The Max length of the labels should not be more than 64 */ 37 #define MAX_LABEL_BUFFER_SIZE 100 38 #define MAX_IDN_BUFFER_SIZE 300 39 40 #define CAPITAL_A 0x0041 41 #define CAPITAL_Z 0x005A 42 #define LOWER_CASE_DELTA 0x0020 43 #define FULL_STOP 0x002E 44 45 46 inline static UBool 47 startsWithPrefix(const UChar* src , int32_t srcLength){ 48 UBool startsWithPrefix = TRUE; 49 50 if(srcLength < ACE_PREFIX_LENGTH){ 51 return FALSE; 52 } 53 54 for(int8_t i=0; i< ACE_PREFIX_LENGTH; i++){ 55 if(u_tolower(src[i]) != ACE_PREFIX[i]){ 56 startsWithPrefix = FALSE; 57 } 58 } 59 return startsWithPrefix; 60 } 61 62 inline static UChar 63 toASCIILower(UChar ch){ 64 if(CAPITAL_A <= ch && ch <= CAPITAL_Z){ 65 return ch + LOWER_CASE_DELTA; 66 } 67 return ch; 68 } 69 70 inline static int32_t 71 compareCaseInsensitiveASCII(const UChar* s1, int32_t s1Len, 72 const UChar* s2, int32_t s2Len){ 73 if(s1Len != s2Len){ 74 return (s1Len > s2Len) ? s1Len : s2Len; 75 } 76 UChar c1,c2; 77 int32_t rc; 78 79 for(int32_t i =0;/* no condition */;i++) { 80 /* If we reach the ends of both strings then they match */ 81 if(i == s1Len) { 82 return 0; 83 } 84 85 c1 = s1[i]; 86 c2 = s2[i]; 87 88 /* Case-insensitive comparison */ 89 if(c1!=c2) { 90 rc=(int32_t)toASCIILower(c1)-(int32_t)toASCIILower(c2); 91 if(rc!=0) { 92 return rc; 93 } 94 } 95 } 96 97 } 98 99 static UErrorCode getError(enum punycode_status status){ 100 switch(status){ 101 case punycode_success: 102 return U_ZERO_ERROR; 103 case punycode_bad_input: /* Input is invalid. */ 104 return U_INVALID_CHAR_FOUND; 105 case punycode_big_output: /* Output would exceed the space provided. */ 106 return U_BUFFER_OVERFLOW_ERROR; 107 case punycode_overflow : /* Input requires wider integers to process. */ 108 return U_INDEX_OUTOFBOUNDS_ERROR; 109 default: 110 return U_INTERNAL_PROGRAM_ERROR; 111 } 112 } 113 114 static inline int32_t convertASCIIToUChars(const char* src,UChar* dest, int32_t length){ 115 int i; 116 for(i=0;i<length;i++){ 117 dest[i] = src[i]; 118 } 119 return i; 120 } 121 static inline int32_t convertUCharsToASCII(const UChar* src,char* dest, int32_t length){ 122 int i; 123 for(i=0;i<length;i++){ 124 dest[i] = (char)src[i]; 125 } 126 return i; 127 } 128 // wrapper around the reference Punycode implementation 129 static int32_t convertToPuny(const UChar* src, int32_t srcLength, 130 UChar* dest, int32_t destCapacity, 131 UErrorCode& status){ 132 uint32_t b1Stack[MAX_LABEL_BUFFER_SIZE]; 133 int32_t b1Len = 0, b1Capacity = MAX_LABEL_BUFFER_SIZE; 134 uint32_t* b1 = b1Stack; 135 char b2Stack[MAX_LABEL_BUFFER_SIZE]; 136 char* b2 = b2Stack; 137 int32_t b2Len =MAX_LABEL_BUFFER_SIZE ; 138 punycode_status error; 139 unsigned char* caseFlags = NULL; 140 141 u_strToUTF32((UChar32*)b1,b1Capacity,&b1Len,src,srcLength,&status); 142 if(status == U_BUFFER_OVERFLOW_ERROR){ 143 // redo processing of string 144 /* we do not have enough room so grow the buffer*/ 145 b1 = (uint32_t*) uprv_malloc(b1Len * sizeof(uint32_t)); 146 if(b1==NULL){ 147 status = U_MEMORY_ALLOCATION_ERROR; 148 goto CLEANUP; 149 } 150 151 status = U_ZERO_ERROR; // reset error 152 153 u_strToUTF32((UChar32*)b1,b1Len,&b1Len,src,srcLength,&status); 154 } 155 if(U_FAILURE(status)){ 156 goto CLEANUP; 157 } 158 159 //caseFlags = (unsigned char*) uprv_malloc(b1Len *sizeof(unsigned char)); 160 161 error = punycode_encode(b1Len,b1,caseFlags, (uint32_t*)&b2Len, b2); 162 status = getError(error); 163 164 if(status == U_BUFFER_OVERFLOW_ERROR){ 165 /* we do not have enough room so grow the buffer*/ 166 b2 = (char*) uprv_malloc( b2Len * sizeof(char)); 167 if(b2==NULL){ 168 status = U_MEMORY_ALLOCATION_ERROR; 169 goto CLEANUP; 170 } 171 172 status = U_ZERO_ERROR; // reset error 173 174 punycode_status error = punycode_encode(b1Len,b1,caseFlags, (uint32_t*)&b2Len, b2); 175 status = getError(error); 176 } 177 if(U_FAILURE(status)){ 178 goto CLEANUP; 179 } 180 181 if(b2Len < destCapacity){ 182 convertASCIIToUChars(b2,dest,b2Len); 183 }else{ 184 status =U_BUFFER_OVERFLOW_ERROR; 185 } 186 187 CLEANUP: 188 if(b1Stack != b1){ 189 uprv_free(b1); 190 } 191 if(b2Stack != b2){ 192 uprv_free(b2); 193 } 194 uprv_free(caseFlags); 195 196 return b2Len; 197 } 198 199 static int32_t convertFromPuny( const UChar* src, int32_t srcLength, 200 UChar* dest, int32_t destCapacity, 201 UErrorCode& status){ 202 char b1Stack[MAX_LABEL_BUFFER_SIZE]; 203 char* b1 = b1Stack; 204 int32_t destLen =0; 205 206 convertUCharsToASCII(src, b1,srcLength); 207 208 uint32_t b2Stack[MAX_LABEL_BUFFER_SIZE]; 209 uint32_t* b2 = b2Stack; 210 int32_t b2Len =MAX_LABEL_BUFFER_SIZE; 211 unsigned char* caseFlags = NULL; //(unsigned char*) uprv_malloc(srcLength * sizeof(unsigned char*)); 212 punycode_status error = punycode_decode(srcLength,b1,(uint32_t*)&b2Len,b2,caseFlags); 213 status = getError(error); 214 if(status == U_BUFFER_OVERFLOW_ERROR){ 215 b2 = (uint32_t*) uprv_malloc(b2Len * sizeof(uint32_t)); 216 if(b2 == NULL){ 217 status = U_MEMORY_ALLOCATION_ERROR; 218 goto CLEANUP; 219 } 220 error = punycode_decode(srcLength,b1,(uint32_t*)&b2Len,b2,caseFlags); 221 status = getError(error); 222 } 223 224 if(U_FAILURE(status)){ 225 goto CLEANUP; 226 } 227 228 u_strFromUTF32(dest,destCapacity,&destLen,(UChar32*)b2,b2Len,&status); 229 230 CLEANUP: 231 if(b1Stack != b1){ 232 uprv_free(b1); 233 } 234 if(b2Stack != b2){ 235 uprv_free(b2); 236 } 237 uprv_free(caseFlags); 238 239 return destLen; 240 } 241 242 243 U_CFUNC int32_t U_EXPORT2 244 idnaref_toASCII(const UChar* src, int32_t srcLength, 245 UChar* dest, int32_t destCapacity, 246 int32_t options, 247 UParseError* parseError, 248 UErrorCode* status){ 249 250 if(status == NULL || U_FAILURE(*status)){ 251 return 0; 252 } 253 if((src == NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ 254 *status = U_ILLEGAL_ARGUMENT_ERROR; 255 return 0; 256 } 257 UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE]; 258 //initialize pointers to stack buffers 259 UChar *b1 = b1Stack, *b2 = b2Stack; 260 int32_t b1Len=0, b2Len=0, 261 b1Capacity = MAX_LABEL_BUFFER_SIZE, 262 b2Capacity = MAX_LABEL_BUFFER_SIZE , 263 reqLength=0; 264 265 //get the options 266 UBool allowUnassigned = (UBool)((options & IDNAREF_ALLOW_UNASSIGNED) != 0); 267 UBool useSTD3ASCIIRules = (UBool)((options & IDNAREF_USE_STD3_RULES) != 0); 268 269 UBool* caseFlags = NULL; 270 271 // assume the source contains all ascii codepoints 272 UBool srcIsASCII = TRUE; 273 // assume the source contains all LDH codepoints 274 UBool srcIsLDH = TRUE; 275 int32_t j=0; 276 277 if(srcLength == -1){ 278 srcLength = u_strlen(src); 279 } 280 281 // step 1 282 for( j=0;j<srcLength;j++){ 283 if(src[j] > 0x7F){ 284 srcIsASCII = FALSE; 285 } 286 b1[b1Len++] = src[j]; 287 } 288 // step 2 289 NamePrepTransform* prep = TestIDNA::getInstance(*status); 290 291 if(U_FAILURE(*status)){ 292 goto CLEANUP; 293 } 294 295 b1Len = prep->process(src,srcLength,b1, b1Capacity,allowUnassigned,parseError,*status); 296 297 if(*status == U_BUFFER_OVERFLOW_ERROR){ 298 // redo processing of string 299 /* we do not have enough room so grow the buffer*/ 300 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); 301 if(b1==NULL){ 302 *status = U_MEMORY_ALLOCATION_ERROR; 303 goto CLEANUP; 304 } 305 306 *status = U_ZERO_ERROR; // reset error 307 308 b1Len = prep->process(src,srcLength,b1, b1Len,allowUnassigned, parseError, *status); 309 } 310 // error bail out 311 if(U_FAILURE(*status)){ 312 goto CLEANUP; 313 } 314 315 if(b1Len == 0){ 316 *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR; 317 goto CLEANUP; 318 } 319 320 srcIsASCII = TRUE; 321 // step 3 & 4 322 for( j=0;j<b1Len;j++){ 323 if(b1[j] > 0x7F){// check if output of usprep_prepare is all ASCII 324 srcIsASCII = FALSE; 325 }else if(prep->isLDHChar(b1[j])==FALSE){ // if the char is in ASCII range verify that it is an LDH character{ 326 srcIsLDH = FALSE; 327 } 328 } 329 330 if(useSTD3ASCIIRules == TRUE){ 331 // verify 3a and 3b 332 if( srcIsLDH == FALSE /* source contains some non-LDH characters */ 333 || b1[0] == HYPHEN || b1[b1Len-1] == HYPHEN){ 334 *status = U_IDNA_STD3_ASCII_RULES_ERROR; 335 goto CLEANUP; 336 } 337 } 338 if(srcIsASCII){ 339 if(b1Len <= destCapacity){ 340 uprv_memmove(dest, b1, b1Len * U_SIZEOF_UCHAR); 341 reqLength = b1Len; 342 }else{ 343 reqLength = b1Len; 344 goto CLEANUP; 345 } 346 }else{ 347 // step 5 : verify the sequence does not begin with ACE prefix 348 if(!startsWithPrefix(b1,b1Len)){ 349 350 //step 6: encode the sequence with punycode 351 //caseFlags = (UBool*) uprv_malloc(b1Len * sizeof(UBool)); 352 353 b2Len = convertToPuny(b1,b1Len, b2,b2Capacity,*status); 354 //b2Len = u_strToPunycode(b2,b2Capacity,b1,b1Len, caseFlags, status); 355 if(*status == U_BUFFER_OVERFLOW_ERROR){ 356 // redo processing of string 357 /* we do not have enough room so grow the buffer*/ 358 b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR); 359 if(b2 == NULL){ 360 *status = U_MEMORY_ALLOCATION_ERROR; 361 goto CLEANUP; 362 } 363 364 *status = U_ZERO_ERROR; // reset error 365 366 b2Len = convertToPuny(b1, b1Len, b2, b2Len, *status); 367 //b2Len = u_strToPunycode(b2,b2Len,b1,b1Len, caseFlags, status); 368 369 } 370 //error bail out 371 if(U_FAILURE(*status)){ 372 goto CLEANUP; 373 } 374 reqLength = b2Len+ACE_PREFIX_LENGTH; 375 376 if(reqLength > destCapacity){ 377 *status = U_BUFFER_OVERFLOW_ERROR; 378 goto CLEANUP; 379 } 380 //Step 7: prepend the ACE prefix 381 uprv_memcpy(dest,ACE_PREFIX,ACE_PREFIX_LENGTH * U_SIZEOF_UCHAR); 382 //Step 6: copy the contents in b2 into dest 383 uprv_memcpy(dest+ACE_PREFIX_LENGTH, b2, b2Len * U_SIZEOF_UCHAR); 384 385 }else{ 386 *status = U_IDNA_ACE_PREFIX_ERROR; 387 goto CLEANUP; 388 } 389 } 390 391 if(reqLength > MAX_LABEL_LENGTH){ 392 *status = U_IDNA_LABEL_TOO_LONG_ERROR; 393 } 394 395 CLEANUP: 396 if(b1 != b1Stack){ 397 uprv_free(b1); 398 } 399 if(b2 != b2Stack){ 400 uprv_free(b2); 401 } 402 uprv_free(caseFlags); 403 404 // delete prep; 405 406 return u_terminateUChars(dest, destCapacity, reqLength, status); 407 } 408 409 410 U_CFUNC int32_t U_EXPORT2 411 idnaref_toUnicode(const UChar* src, int32_t srcLength, 412 UChar* dest, int32_t destCapacity, 413 int32_t options, 414 UParseError* parseError, 415 UErrorCode* status){ 416 417 if(status == NULL || U_FAILURE(*status)){ 418 return 0; 419 } 420 if((src == NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ 421 *status = U_ILLEGAL_ARGUMENT_ERROR; 422 return 0; 423 } 424 425 426 427 UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE], b3Stack[MAX_LABEL_BUFFER_SIZE]; 428 429 //initialize pointers to stack buffers 430 UChar *b1 = b1Stack, *b2 = b2Stack, *b1Prime=NULL, *b3=b3Stack; 431 int32_t b1Len, b2Len, b1PrimeLen, b3Len, 432 b1Capacity = MAX_LABEL_BUFFER_SIZE, 433 b2Capacity = MAX_LABEL_BUFFER_SIZE, 434 b3Capacity = MAX_LABEL_BUFFER_SIZE, 435 reqLength=0; 436 // UParseError parseError; 437 438 NamePrepTransform* prep = TestIDNA::getInstance(*status); 439 b1Len = 0; 440 UBool* caseFlags = NULL; 441 442 //get the options 443 UBool allowUnassigned = (UBool)((options & IDNAREF_ALLOW_UNASSIGNED) != 0); 444 UBool useSTD3ASCIIRules = (UBool)((options & IDNAREF_USE_STD3_RULES) != 0); 445 446 UBool srcIsASCII = TRUE; 447 UBool srcIsLDH = TRUE; 448 int32_t failPos =0; 449 450 if(U_FAILURE(*status)){ 451 goto CLEANUP; 452 } 453 // step 1: find out if all the codepoints in src are ASCII 454 if(srcLength==-1){ 455 srcLength = 0; 456 for(;src[srcLength]!=0;){ 457 if(src[srcLength]> 0x7f){ 458 srcIsASCII = FALSE; 459 }if(prep->isLDHChar(src[srcLength])==FALSE){ 460 // here we do not assemble surrogates 461 // since we know that LDH code points 462 // are in the ASCII range only 463 srcIsLDH = FALSE; 464 failPos = srcLength; 465 } 466 srcLength++; 467 } 468 }else{ 469 for(int32_t j=0; j<srcLength; j++){ 470 if(src[j]> 0x7f){ 471 srcIsASCII = FALSE; 472 }else if(prep->isLDHChar(src[j])==FALSE){ 473 // here we do not assemble surrogates 474 // since we know that LDH code points 475 // are in the ASCII range only 476 srcIsLDH = FALSE; 477 failPos = j; 478 } 479 } 480 } 481 482 if(srcIsASCII == FALSE){ 483 // step 2: process the string 484 b1Len = prep->process(src,srcLength,b1,b1Capacity,allowUnassigned, parseError, *status); 485 if(*status == U_BUFFER_OVERFLOW_ERROR){ 486 // redo processing of string 487 /* we do not have enough room so grow the buffer*/ 488 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); 489 if(b1==NULL){ 490 *status = U_MEMORY_ALLOCATION_ERROR; 491 goto CLEANUP; 492 } 493 494 *status = U_ZERO_ERROR; // reset error 495 496 b1Len = prep->process(src,srcLength,b1, b1Len,allowUnassigned, parseError, *status); 497 } 498 //bail out on error 499 if(U_FAILURE(*status)){ 500 goto CLEANUP; 501 } 502 }else{ 503 504 // copy everything to b1 505 if(srcLength < b1Capacity){ 506 uprv_memmove(b1,src, srcLength * U_SIZEOF_UCHAR); 507 }else{ 508 /* we do not have enough room so grow the buffer*/ 509 b1 = (UChar*) uprv_malloc(srcLength * U_SIZEOF_UCHAR); 510 if(b1==NULL){ 511 *status = U_MEMORY_ALLOCATION_ERROR; 512 goto CLEANUP; 513 } 514 uprv_memmove(b1,src, srcLength * U_SIZEOF_UCHAR); 515 } 516 b1Len = srcLength; 517 } 518 //step 3: verify ACE Prefix 519 if(startsWithPrefix(src,srcLength)){ 520 521 //step 4: Remove the ACE Prefix 522 b1Prime = b1 + ACE_PREFIX_LENGTH; 523 b1PrimeLen = b1Len - ACE_PREFIX_LENGTH; 524 525 //step 5: Decode using punycode 526 b2Len = convertFromPuny(b1Prime,b1PrimeLen, b2, b2Capacity, *status); 527 //b2Len = u_strFromPunycode(b2, b2Capacity,b1Prime,b1PrimeLen, caseFlags, status); 528 529 if(*status == U_BUFFER_OVERFLOW_ERROR){ 530 // redo processing of string 531 /* we do not have enough room so grow the buffer*/ 532 b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR); 533 if(b2==NULL){ 534 *status = U_MEMORY_ALLOCATION_ERROR; 535 goto CLEANUP; 536 } 537 538 *status = U_ZERO_ERROR; // reset error 539 540 b2Len = convertFromPuny(b1Prime,b1PrimeLen, b2, b2Len, *status); 541 //b2Len = u_strFromPunycode(b2, b2Len,b1Prime,b1PrimeLen,caseFlags, status); 542 } 543 544 545 //step 6:Apply toASCII 546 b3Len = idnaref_toASCII(b2,b2Len,b3,b3Capacity,options,parseError, status); 547 548 if(*status == U_BUFFER_OVERFLOW_ERROR){ 549 // redo processing of string 550 /* we do not have enough room so grow the buffer*/ 551 b3 = (UChar*) uprv_malloc(b3Len * U_SIZEOF_UCHAR); 552 if(b3==NULL){ 553 *status = U_MEMORY_ALLOCATION_ERROR; 554 goto CLEANUP; 555 } 556 557 *status = U_ZERO_ERROR; // reset error 558 559 b3Len = idnaref_toASCII(b2,b2Len,b3,b3Len, options, parseError, status); 560 561 } 562 //bail out on error 563 if(U_FAILURE(*status)){ 564 goto CLEANUP; 565 } 566 567 //step 7: verify 568 if(compareCaseInsensitiveASCII(b1, b1Len, b3, b3Len) !=0){ 569 *status = U_IDNA_VERIFICATION_ERROR; 570 goto CLEANUP; 571 } 572 573 //step 8: return output of step 5 574 reqLength = b2Len; 575 if(b2Len <= destCapacity) { 576 uprv_memmove(dest, b2, b2Len * U_SIZEOF_UCHAR); 577 } 578 }else{ 579 // verify that STD3 ASCII rules are satisfied 580 if(useSTD3ASCIIRules == TRUE){ 581 if( srcIsLDH == FALSE /* source contains some non-LDH characters */ 582 || src[0] == HYPHEN || src[srcLength-1] == HYPHEN){ 583 *status = U_IDNA_STD3_ASCII_RULES_ERROR; 584 585 /* populate the parseError struct */ 586 if(srcIsLDH==FALSE){ 587 // failPos is always set the index of failure 588 uprv_syntaxError(src,failPos, srcLength,parseError); 589 }else if(src[0] == HYPHEN){ 590 // fail position is 0 591 uprv_syntaxError(src,0,srcLength,parseError); 592 }else{ 593 // the last index in the source is always length-1 594 uprv_syntaxError(src, (srcLength>0) ? srcLength-1 : srcLength, srcLength,parseError); 595 } 596 597 goto CLEANUP; 598 } 599 } 600 //copy the source to destination 601 if(srcLength <= destCapacity){ 602 uprv_memmove(dest,src,srcLength * U_SIZEOF_UCHAR); 603 } 604 reqLength = srcLength; 605 } 606 607 CLEANUP: 608 609 if(b1 != b1Stack){ 610 uprv_free(b1); 611 } 612 if(b2 != b2Stack){ 613 uprv_free(b2); 614 } 615 uprv_free(caseFlags); 616 617 // The RFC states that 618 // <quote> 619 // ToUnicode never fails. If any step fails, then the original input 620 // is returned immediately in that step. 621 // </quote> 622 // So if any step fails lets copy source to destination 623 if(U_FAILURE(*status)){ 624 //copy the source to destination 625 if(dest && srcLength <= destCapacity){ 626 if(srcLength == -1) { 627 uprv_memmove(dest,src,u_strlen(src)* U_SIZEOF_UCHAR); 628 } else { 629 uprv_memmove(dest,src,srcLength * U_SIZEOF_UCHAR); 630 } 631 } 632 reqLength = srcLength; 633 *status = U_ZERO_ERROR; 634 } 635 return u_terminateUChars(dest, destCapacity, reqLength, status); 636 } 637 638 639 static int32_t 640 getNextSeparator(UChar *src,int32_t srcLength,NamePrepTransform* prep, 641 UChar **limit, 642 UBool *done, 643 UErrorCode *status){ 644 if(srcLength == -1){ 645 int32_t i; 646 for(i=0 ; ;i++){ 647 if(src[i] == 0){ 648 *limit = src + i; // point to null 649 *done = TRUE; 650 return i; 651 } 652 if(prep->isLabelSeparator(src[i],*status)){ 653 *limit = src + (i+1); // go past the delimiter 654 return i; 655 656 } 657 } 658 }else{ 659 int32_t i; 660 for(i=0;i<srcLength;i++){ 661 if(prep->isLabelSeparator(src[i],*status)){ 662 *limit = src + (i+1); // go past the delimiter 663 return i; 664 } 665 } 666 // we have not found the delimiter 667 if(i==srcLength){ 668 *limit = src+srcLength; 669 *done = TRUE; 670 } 671 return i; 672 } 673 } 674 675 U_CFUNC int32_t U_EXPORT2 676 idnaref_IDNToASCII( const UChar* src, int32_t srcLength, 677 UChar* dest, int32_t destCapacity, 678 int32_t options, 679 UParseError* parseError, 680 UErrorCode* status){ 681 682 if(status == NULL || U_FAILURE(*status)){ 683 return 0; 684 } 685 if((src == NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ 686 *status = U_ILLEGAL_ARGUMENT_ERROR; 687 return 0; 688 } 689 690 int32_t reqLength = 0; 691 // UParseError parseError; 692 693 NamePrepTransform* prep = TestIDNA::getInstance(*status); 694 695 //initialize pointers to stack buffers 696 UChar b1Stack[MAX_LABEL_BUFFER_SIZE]; 697 UChar *b1 = b1Stack; 698 int32_t b1Len, labelLen; 699 UChar* delimiter = (UChar*)src; 700 UChar* labelStart = (UChar*)src; 701 int32_t remainingLen = srcLength; 702 int32_t b1Capacity = MAX_LABEL_BUFFER_SIZE; 703 704 //get the options 705 // UBool allowUnassigned = (UBool)((options & IDNAREF_ALLOW_UNASSIGNED) != 0); 706 // UBool useSTD3ASCIIRules = (UBool)((options & IDNAREF_USE_STD3_RULES) != 0); 707 UBool done = FALSE; 708 709 if(U_FAILURE(*status)){ 710 goto CLEANUP; 711 } 712 713 714 if(srcLength == -1){ 715 for(;;){ 716 717 if(*delimiter == 0){ 718 break; 719 } 720 721 labelLen = getNextSeparator(labelStart, -1, prep, &delimiter, &done, status); 722 b1Len = 0; 723 if(!(labelLen==0 && done)){// make sure this is not a root label separator. 724 725 b1Len = idnaref_toASCII(labelStart, labelLen, b1, b1Capacity, 726 options, parseError, status); 727 728 if(*status == U_BUFFER_OVERFLOW_ERROR){ 729 // redo processing of string 730 /* we do not have enough room so grow the buffer*/ 731 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); 732 if(b1==NULL){ 733 *status = U_MEMORY_ALLOCATION_ERROR; 734 goto CLEANUP; 735 } 736 737 *status = U_ZERO_ERROR; // reset error 738 739 b1Len = idnaref_toASCII(labelStart, labelLen, b1, b1Len, 740 options, parseError, status); 741 742 } 743 } 744 745 if(U_FAILURE(*status)){ 746 goto CLEANUP; 747 } 748 int32_t tempLen = (reqLength + b1Len ); 749 // copy to dest 750 if( tempLen< destCapacity){ 751 uprv_memmove(dest+reqLength, b1, b1Len * U_SIZEOF_UCHAR); 752 } 753 754 reqLength = tempLen; 755 756 // add the label separator 757 if(done == FALSE){ 758 if(reqLength < destCapacity){ 759 dest[reqLength] = FULL_STOP; 760 } 761 reqLength++; 762 } 763 764 labelStart = delimiter; 765 } 766 }else{ 767 for(;;){ 768 769 if(delimiter == src+srcLength){ 770 break; 771 } 772 773 labelLen = getNextSeparator(labelStart, remainingLen, prep, &delimiter, &done, status); 774 775 b1Len = idnaref_toASCII(labelStart, labelLen, b1, b1Capacity, 776 options,parseError, status); 777 778 if(*status == U_BUFFER_OVERFLOW_ERROR){ 779 // redo processing of string 780 /* we do not have enough room so grow the buffer*/ 781 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); 782 if(b1==NULL){ 783 *status = U_MEMORY_ALLOCATION_ERROR; 784 goto CLEANUP; 785 } 786 787 *status = U_ZERO_ERROR; // reset error 788 789 b1Len = idnaref_toASCII(labelStart, labelLen, b1, b1Len, 790 options, parseError, status); 791 792 } 793 794 if(U_FAILURE(*status)){ 795 goto CLEANUP; 796 } 797 int32_t tempLen = (reqLength + b1Len ); 798 // copy to dest 799 if( tempLen< destCapacity){ 800 uprv_memmove(dest+reqLength, b1, b1Len * U_SIZEOF_UCHAR); 801 } 802 803 reqLength = tempLen; 804 805 // add the label separator 806 if(done == FALSE){ 807 if(reqLength < destCapacity){ 808 dest[reqLength] = FULL_STOP; 809 } 810 reqLength++; 811 } 812 813 labelStart = delimiter; 814 remainingLen = srcLength - (delimiter - src); 815 } 816 } 817 818 819 CLEANUP: 820 821 if(b1 != b1Stack){ 822 uprv_free(b1); 823 } 824 825 // delete prep; 826 827 return u_terminateUChars(dest, destCapacity, reqLength, status); 828 } 829 830 U_CFUNC int32_t U_EXPORT2 831 idnaref_IDNToUnicode( const UChar* src, int32_t srcLength, 832 UChar* dest, int32_t destCapacity, 833 int32_t options, 834 UParseError* parseError, 835 UErrorCode* status){ 836 837 if(status == NULL || U_FAILURE(*status)){ 838 return 0; 839 } 840 if((src == NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ 841 *status = U_ILLEGAL_ARGUMENT_ERROR; 842 return 0; 843 } 844 845 int32_t reqLength = 0; 846 847 UBool done = FALSE; 848 849 NamePrepTransform* prep = TestIDNA::getInstance(*status); 850 851 //initialize pointers to stack buffers 852 UChar b1Stack[MAX_LABEL_BUFFER_SIZE]; 853 UChar *b1 = b1Stack; 854 int32_t b1Len, labelLen; 855 UChar* delimiter = (UChar*)src; 856 UChar* labelStart = (UChar*)src; 857 int32_t remainingLen = srcLength; 858 int32_t b1Capacity = MAX_LABEL_BUFFER_SIZE; 859 860 //get the options 861 // UBool allowUnassigned = (UBool)((options & IDNAREF_ALLOW_UNASSIGNED) != 0); 862 // UBool useSTD3ASCIIRules = (UBool)((options & IDNAREF_USE_STD3_RULES) != 0); 863 864 if(U_FAILURE(*status)){ 865 goto CLEANUP; 866 } 867 868 if(srcLength == -1){ 869 for(;;){ 870 871 if(*delimiter == 0){ 872 break; 873 } 874 875 labelLen = getNextSeparator(labelStart, -1, prep, &delimiter, &done, status); 876 877 if(labelLen==0 && done==FALSE){ 878 *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR; 879 } 880 b1Len = idnaref_toUnicode(labelStart, labelLen, b1, b1Capacity, 881 options, parseError, status); 882 883 if(*status == U_BUFFER_OVERFLOW_ERROR){ 884 // redo processing of string 885 /* we do not have enough room so grow the buffer*/ 886 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); 887 if(b1==NULL){ 888 *status = U_MEMORY_ALLOCATION_ERROR; 889 goto CLEANUP; 890 } 891 892 *status = U_ZERO_ERROR; // reset error 893 894 b1Len = idnaref_toUnicode( labelStart, labelLen, b1, b1Len, 895 options, parseError, status); 896 897 } 898 899 if(U_FAILURE(*status)){ 900 goto CLEANUP; 901 } 902 int32_t tempLen = (reqLength + b1Len ); 903 // copy to dest 904 if( tempLen< destCapacity){ 905 uprv_memmove(dest+reqLength, b1, b1Len * U_SIZEOF_UCHAR); 906 } 907 908 reqLength = tempLen; 909 // add the label separator 910 if(done == FALSE){ 911 if(reqLength < destCapacity){ 912 dest[reqLength] = FULL_STOP; 913 } 914 reqLength++; 915 } 916 917 labelStart = delimiter; 918 } 919 }else{ 920 for(;;){ 921 922 if(delimiter == src+srcLength){ 923 break; 924 } 925 926 labelLen = getNextSeparator(labelStart, remainingLen, prep, &delimiter, &done, status); 927 928 if(labelLen==0 && done==FALSE){ 929 *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR; 930 } 931 932 b1Len = idnaref_toUnicode( labelStart,labelLen, b1, b1Capacity, 933 options, parseError, status); 934 935 if(*status == U_BUFFER_OVERFLOW_ERROR){ 936 // redo processing of string 937 /* we do not have enough room so grow the buffer*/ 938 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); 939 if(b1==NULL){ 940 *status = U_MEMORY_ALLOCATION_ERROR; 941 goto CLEANUP; 942 } 943 944 *status = U_ZERO_ERROR; // reset error 945 946 b1Len = idnaref_toUnicode( labelStart, labelLen, b1, b1Len, 947 options, parseError, status); 948 949 } 950 951 if(U_FAILURE(*status)){ 952 goto CLEANUP; 953 } 954 int32_t tempLen = (reqLength + b1Len ); 955 // copy to dest 956 if( tempLen< destCapacity){ 957 uprv_memmove(dest+reqLength, b1, b1Len * U_SIZEOF_UCHAR); 958 } 959 960 reqLength = tempLen; 961 962 // add the label separator 963 if(done == FALSE){ 964 if(reqLength < destCapacity){ 965 dest[reqLength] = FULL_STOP; 966 } 967 reqLength++; 968 } 969 970 labelStart = delimiter; 971 remainingLen = srcLength - (delimiter - src); 972 } 973 } 974 975 CLEANUP: 976 977 if(b1 != b1Stack){ 978 uprv_free(b1); 979 } 980 981 // delete prep; 982 983 return u_terminateUChars(dest, destCapacity, reqLength, status); 984 } 985 986 U_CFUNC int32_t U_EXPORT2 987 idnaref_compare( const UChar *s1, int32_t length1, 988 const UChar *s2, int32_t length2, 989 int32_t options, 990 UErrorCode* status){ 991 992 if(status == NULL || U_FAILURE(*status)){ 993 return -1; 994 } 995 996 UChar b1Stack[MAX_IDN_BUFFER_SIZE], b2Stack[MAX_IDN_BUFFER_SIZE]; 997 UChar *b1 = b1Stack, *b2 = b2Stack; 998 int32_t b1Len, b2Len, b1Capacity = MAX_IDN_BUFFER_SIZE, b2Capacity = MAX_IDN_BUFFER_SIZE; 999 int32_t result = -1; 1000 1001 UParseError parseError; 1002 1003 b1Len = idnaref_IDNToASCII(s1, length1, b1, b1Capacity, options, &parseError, status); 1004 if(*status == U_BUFFER_OVERFLOW_ERROR){ 1005 // redo processing of string 1006 /* we do not have enough room so grow the buffer*/ 1007 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); 1008 if(b1==NULL){ 1009 *status = U_MEMORY_ALLOCATION_ERROR; 1010 goto CLEANUP; 1011 } 1012 1013 *status = U_ZERO_ERROR; // reset error 1014 1015 b1Len = idnaref_IDNToASCII(s1,length1,b1,b1Len, options, &parseError, status); 1016 1017 } 1018 1019 b2Len = idnaref_IDNToASCII(s2,length2,b2,b2Capacity,options, &parseError, status); 1020 if(*status == U_BUFFER_OVERFLOW_ERROR){ 1021 // redo processing of string 1022 /* we do not have enough room so grow the buffer*/ 1023 b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR); 1024 if(b2==NULL){ 1025 *status = U_MEMORY_ALLOCATION_ERROR; 1026 goto CLEANUP; 1027 } 1028 1029 *status = U_ZERO_ERROR; // reset error 1030 1031 b2Len = idnaref_IDNToASCII(s2,length2,b2,b2Len,options, &parseError, status); 1032 1033 } 1034 // when toASCII is applied all label separators are replaced with FULL_STOP 1035 result = compareCaseInsensitiveASCII(b1,b1Len,b2,b2Len); 1036 1037 CLEANUP: 1038 if(b1 != b1Stack){ 1039 uprv_free(b1); 1040 } 1041 1042 if(b2 != b2Stack){ 1043 uprv_free(b2); 1044 } 1045 1046 return result; 1047 } 1048 #endif /* #if !UCONFIG_NO_IDNA */ 1049