1 /* 2 ******************************************************************************* 3 * 4 * Copyright (C) 2003-2009, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ******************************************************************************* 8 * file name: uidna.cpp 9 * encoding: US-ASCII 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 2003feb1 14 * created by: Ram Viswanadha 15 */ 16 17 #include "unicode/utypes.h" 18 19 #if !UCONFIG_NO_IDNA 20 21 #include "unicode/uidna.h" 22 #include "unicode/ustring.h" 23 #include "unicode/usprep.h" 24 #include "punycode.h" 25 #include "ustr_imp.h" 26 #include "cmemory.h" 27 #include "uassert.h" 28 #include "sprpimpl.h" 29 30 /* it is official IDNA ACE Prefix is "xn--" */ 31 static const UChar ACE_PREFIX[] ={ 0x0078,0x006E,0x002d,0x002d } ; 32 #define ACE_PREFIX_LENGTH 4 33 34 #define MAX_LABEL_LENGTH 63 35 /* The Max length of the labels should not be more than MAX_LABEL_LENGTH */ 36 #define MAX_LABEL_BUFFER_SIZE 100 37 38 #define MAX_DOMAIN_NAME_LENGTH 255 39 /* The Max length of the domain names should not be more than MAX_DOMAIN_NAME_LENGTH */ 40 #define MAX_IDN_BUFFER_SIZE MAX_DOMAIN_NAME_LENGTH+1 41 42 #define LOWER_CASE_DELTA 0x0020 43 #define HYPHEN 0x002D 44 #define FULL_STOP 0x002E 45 #define CAPITAL_A 0x0041 46 #define CAPITAL_Z 0x005A 47 48 inline static UChar 49 toASCIILower(UChar ch){ 50 if(CAPITAL_A <= ch && ch <= CAPITAL_Z){ 51 return ch + LOWER_CASE_DELTA; 52 } 53 return ch; 54 } 55 56 inline static UBool 57 startsWithPrefix(const UChar* src , int32_t srcLength){ 58 UBool startsWithPrefix = TRUE; 59 60 if(srcLength < ACE_PREFIX_LENGTH){ 61 return FALSE; 62 } 63 64 for(int8_t i=0; i< ACE_PREFIX_LENGTH; i++){ 65 if(toASCIILower(src[i]) != ACE_PREFIX[i]){ 66 startsWithPrefix = FALSE; 67 } 68 } 69 return startsWithPrefix; 70 } 71 72 73 inline static int32_t 74 compareCaseInsensitiveASCII(const UChar* s1, int32_t s1Len, 75 const UChar* s2, int32_t s2Len){ 76 77 int32_t minLength; 78 int32_t lengthResult; 79 80 // are we comparing different lengths? 81 if(s1Len != s2Len) { 82 if(s1Len < s2Len) { 83 minLength = s1Len; 84 lengthResult = -1; 85 } else { 86 minLength = s2Len; 87 lengthResult = 1; 88 } 89 } else { 90 // ok the lengths are equal 91 minLength = s1Len; 92 lengthResult = 0; 93 } 94 95 UChar c1,c2; 96 int32_t rc; 97 98 for(int32_t i =0;/* no condition */;i++) { 99 100 /* If we reach the ends of both strings then they match */ 101 if(i == minLength) { 102 return lengthResult; 103 } 104 105 c1 = s1[i]; 106 c2 = s2[i]; 107 108 /* Case-insensitive comparison */ 109 if(c1!=c2) { 110 rc=(int32_t)toASCIILower(c1)-(int32_t)toASCIILower(c2); 111 if(rc!=0) { 112 lengthResult=rc; 113 break; 114 } 115 } 116 } 117 return lengthResult; 118 } 119 120 121 /** 122 * Ascertain if the given code point is a label separator as 123 * defined by the IDNA RFC 124 * 125 * @param ch The code point to be ascertained 126 * @return true if the char is a label separator 127 * @stable ICU 2.8 128 */ 129 static inline UBool isLabelSeparator(UChar ch){ 130 switch(ch){ 131 case 0x002e: 132 case 0x3002: 133 case 0xFF0E: 134 case 0xFF61: 135 return TRUE; 136 default: 137 return FALSE; 138 } 139 } 140 141 // returns the length of the label excluding the separator 142 // if *limit == separator then the length returned does not include 143 // the separtor. 144 static inline int32_t 145 getNextSeparator(UChar *src, int32_t srcLength, 146 UChar **limit, UBool *done){ 147 if(srcLength == -1){ 148 int32_t i; 149 for(i=0 ; ;i++){ 150 if(src[i] == 0){ 151 *limit = src + i; // point to null 152 *done = TRUE; 153 return i; 154 } 155 if(isLabelSeparator(src[i])){ 156 *limit = src + (i+1); // go past the delimiter 157 return i; 158 159 } 160 } 161 }else{ 162 int32_t i; 163 for(i=0;i<srcLength;i++){ 164 if(isLabelSeparator(src[i])){ 165 *limit = src + (i+1); // go past the delimiter 166 return i; 167 } 168 } 169 // we have not found the delimiter 170 // if(i==srcLength) 171 *limit = src+srcLength; 172 *done = TRUE; 173 174 return i; 175 } 176 } 177 static inline UBool isLDHChar(UChar ch){ 178 // high runner case 179 if(ch>0x007A){ 180 return FALSE; 181 } 182 //[\\u002D \\u0030-\\u0039 \\u0041-\\u005A \\u0061-\\u007A] 183 if( (ch==0x002D) || 184 (0x0030 <= ch && ch <= 0x0039) || 185 (0x0041 <= ch && ch <= 0x005A) || 186 (0x0061 <= ch && ch <= 0x007A) 187 ){ 188 return TRUE; 189 } 190 return FALSE; 191 } 192 193 static int32_t 194 _internal_toASCII(const UChar* src, int32_t srcLength, 195 UChar* dest, int32_t destCapacity, 196 int32_t options, 197 UStringPrepProfile* nameprep, 198 UParseError* parseError, 199 UErrorCode* status) 200 { 201 202 // TODO Revisit buffer handling. The label should not be over 63 ASCII characters. ICU4J may need to be updated too. 203 UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE]; 204 //initialize pointers to stack buffers 205 UChar *b1 = b1Stack, *b2 = b2Stack; 206 int32_t b1Len=0, b2Len, 207 b1Capacity = MAX_LABEL_BUFFER_SIZE, 208 b2Capacity = MAX_LABEL_BUFFER_SIZE , 209 reqLength=0; 210 211 int32_t namePrepOptions = ((options & UIDNA_ALLOW_UNASSIGNED) != 0) ? USPREP_ALLOW_UNASSIGNED: 0; 212 UBool* caseFlags = NULL; 213 214 // the source contains all ascii codepoints 215 UBool srcIsASCII = TRUE; 216 // assume the source contains all LDH codepoints 217 UBool srcIsLDH = TRUE; 218 219 int32_t j=0; 220 221 //get the options 222 UBool useSTD3ASCIIRules = (UBool)((options & UIDNA_USE_STD3_RULES) != 0); 223 224 int32_t failPos = -1; 225 226 if(srcLength == -1){ 227 srcLength = u_strlen(src); 228 } 229 230 if(srcLength > b1Capacity){ 231 b1 = (UChar*) uprv_malloc(srcLength * U_SIZEOF_UCHAR); 232 if(b1==NULL){ 233 *status = U_MEMORY_ALLOCATION_ERROR; 234 goto CLEANUP; 235 } 236 b1Capacity = srcLength; 237 } 238 239 // step 1 240 for( j=0;j<srcLength;j++){ 241 if(src[j] > 0x7F){ 242 srcIsASCII = FALSE; 243 } 244 b1[b1Len++] = src[j]; 245 } 246 247 // step 2 is performed only if the source contains non ASCII 248 if(srcIsASCII == FALSE){ 249 250 // step 2 251 b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Capacity, namePrepOptions, parseError, status); 252 253 if(*status == U_BUFFER_OVERFLOW_ERROR){ 254 // redo processing of string 255 // we do not have enough room so grow the buffer 256 if(b1 != b1Stack){ 257 uprv_free(b1); 258 } 259 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); 260 if(b1==NULL){ 261 *status = U_MEMORY_ALLOCATION_ERROR; 262 goto CLEANUP; 263 } 264 265 *status = U_ZERO_ERROR; // reset error 266 267 b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Len, namePrepOptions, parseError, status); 268 } 269 } 270 // error bail out 271 if(U_FAILURE(*status)){ 272 goto CLEANUP; 273 } 274 if(b1Len == 0){ 275 *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR; 276 goto CLEANUP; 277 } 278 279 // for step 3 & 4 280 srcIsASCII = TRUE; 281 for( j=0;j<b1Len;j++){ 282 // check if output of usprep_prepare is all ASCII 283 if(b1[j] > 0x7F){ 284 srcIsASCII = FALSE; 285 }else if(isLDHChar(b1[j])==FALSE){ // if the char is in ASCII range verify that it is an LDH character 286 srcIsLDH = FALSE; 287 failPos = j; 288 } 289 } 290 if(useSTD3ASCIIRules == TRUE){ 291 // verify 3a and 3b 292 // 3(a) Verify the absence of non-LDH ASCII code points; that is, the 293 // absence of 0..2C, 2E..2F, 3A..40, 5B..60, and 7B..7F. 294 // 3(b) Verify the absence of leading and trailing hyphen-minus; that 295 // is, the absence of U+002D at the beginning and end of the 296 // sequence. 297 if( srcIsLDH == FALSE /* source at this point should not contain anyLDH characters */ 298 || b1[0] == HYPHEN || b1[b1Len-1] == HYPHEN){ 299 *status = U_IDNA_STD3_ASCII_RULES_ERROR; 300 301 /* populate the parseError struct */ 302 if(srcIsLDH==FALSE){ 303 // failPos is always set the index of failure 304 uprv_syntaxError(b1,failPos, b1Len,parseError); 305 }else if(b1[0] == HYPHEN){ 306 // fail position is 0 307 uprv_syntaxError(b1,0,b1Len,parseError); 308 }else{ 309 // the last index in the source is always length-1 310 uprv_syntaxError(b1, (b1Len>0) ? b1Len-1 : b1Len, b1Len,parseError); 311 } 312 313 goto CLEANUP; 314 } 315 } 316 // Step 4: if the source is ASCII then proceed to step 8 317 if(srcIsASCII){ 318 if(b1Len <= destCapacity){ 319 uprv_memmove(dest, b1, b1Len * U_SIZEOF_UCHAR); 320 reqLength = b1Len; 321 }else{ 322 reqLength = b1Len; 323 goto CLEANUP; 324 } 325 }else{ 326 // step 5 : verify the sequence does not begin with ACE prefix 327 if(!startsWithPrefix(b1,b1Len)){ 328 329 //step 6: encode the sequence with punycode 330 331 // do not preserve the case flags for now! 332 // TODO: Preserve the case while implementing the RFE 333 // caseFlags = (UBool*) uprv_malloc(b1Len * sizeof(UBool)); 334 // uprv_memset(caseFlags,TRUE,b1Len); 335 336 b2Len = u_strToPunycode(b1,b1Len,b2,b2Capacity,caseFlags, status); 337 338 if(*status == U_BUFFER_OVERFLOW_ERROR){ 339 // redo processing of string 340 /* we do not have enough room so grow the buffer*/ 341 b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR); 342 if(b2 == NULL){ 343 *status = U_MEMORY_ALLOCATION_ERROR; 344 goto CLEANUP; 345 } 346 347 *status = U_ZERO_ERROR; // reset error 348 349 b2Len = u_strToPunycode(b1,b1Len,b2,b2Len,caseFlags, status); 350 } 351 //error bail out 352 if(U_FAILURE(*status)){ 353 goto CLEANUP; 354 } 355 // TODO : Reconsider while implementing the case preserve RFE 356 // convert all codepoints to lower case ASCII 357 // toASCIILower(b2,b2Len); 358 reqLength = b2Len+ACE_PREFIX_LENGTH; 359 360 if(reqLength > destCapacity){ 361 *status = U_BUFFER_OVERFLOW_ERROR; 362 goto CLEANUP; 363 } 364 //Step 7: prepend the ACE prefix 365 uprv_memcpy(dest,ACE_PREFIX,ACE_PREFIX_LENGTH * U_SIZEOF_UCHAR); 366 //Step 6: copy the contents in b2 into dest 367 uprv_memcpy(dest+ACE_PREFIX_LENGTH, b2, b2Len * U_SIZEOF_UCHAR); 368 369 }else{ 370 *status = U_IDNA_ACE_PREFIX_ERROR; 371 //position of failure is 0 372 uprv_syntaxError(b1,0,b1Len,parseError); 373 goto CLEANUP; 374 } 375 } 376 // step 8: verify the length of label 377 if(reqLength > MAX_LABEL_LENGTH){ 378 *status = U_IDNA_LABEL_TOO_LONG_ERROR; 379 } 380 381 CLEANUP: 382 if(b1 != b1Stack){ 383 uprv_free(b1); 384 } 385 if(b2 != b2Stack){ 386 uprv_free(b2); 387 } 388 uprv_free(caseFlags); 389 390 return u_terminateUChars(dest, destCapacity, reqLength, status); 391 } 392 393 static int32_t 394 _internal_toUnicode(const UChar* src, int32_t srcLength, 395 UChar* dest, int32_t destCapacity, 396 int32_t options, 397 UStringPrepProfile* nameprep, 398 UParseError* parseError, 399 UErrorCode* status) 400 { 401 402 //get the options 403 //UBool useSTD3ASCIIRules = (UBool)((options & UIDNA_USE_STD3_RULES) != 0); 404 int32_t namePrepOptions = ((options & UIDNA_ALLOW_UNASSIGNED) != 0) ? USPREP_ALLOW_UNASSIGNED: 0; 405 406 // TODO Revisit buffer handling. The label should not be over 63 ASCII characters. ICU4J may need to be updated too. 407 UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE], b3Stack[MAX_LABEL_BUFFER_SIZE]; 408 409 //initialize pointers to stack buffers 410 UChar *b1 = b1Stack, *b2 = b2Stack, *b1Prime=NULL, *b3=b3Stack; 411 int32_t b1Len, b2Len, b1PrimeLen, b3Len, 412 b1Capacity = MAX_LABEL_BUFFER_SIZE, 413 b2Capacity = MAX_LABEL_BUFFER_SIZE, 414 b3Capacity = MAX_LABEL_BUFFER_SIZE, 415 reqLength=0; 416 417 b1Len = 0; 418 UBool* caseFlags = NULL; 419 420 UBool srcIsASCII = TRUE; 421 /*UBool srcIsLDH = TRUE; 422 int32_t failPos =0;*/ 423 424 // step 1: find out if all the codepoints in src are ASCII 425 if(srcLength==-1){ 426 srcLength = 0; 427 for(;src[srcLength]!=0;){ 428 if(src[srcLength]> 0x7f){ 429 srcIsASCII = FALSE; 430 }/*else if(isLDHChar(src[srcLength])==FALSE){ 431 // here we do not assemble surrogates 432 // since we know that LDH code points 433 // are in the ASCII range only 434 srcIsLDH = FALSE; 435 failPos = srcLength; 436 }*/ 437 srcLength++; 438 } 439 }else if(srcLength > 0){ 440 for(int32_t j=0; j<srcLength; j++){ 441 if(src[j]> 0x7f){ 442 srcIsASCII = FALSE; 443 }/*else if(isLDHChar(src[j])==FALSE){ 444 // here we do not assemble surrogates 445 // since we know that LDH code points 446 // are in the ASCII range only 447 srcIsLDH = FALSE; 448 failPos = j; 449 }*/ 450 } 451 }else{ 452 return 0; 453 } 454 455 if(srcIsASCII == FALSE){ 456 // step 2: process the string 457 b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Capacity, namePrepOptions, parseError, status); 458 if(*status == U_BUFFER_OVERFLOW_ERROR){ 459 // redo processing of string 460 /* we do not have enough room so grow the buffer*/ 461 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); 462 if(b1==NULL){ 463 *status = U_MEMORY_ALLOCATION_ERROR; 464 goto CLEANUP; 465 } 466 467 *status = U_ZERO_ERROR; // reset error 468 469 b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Len, namePrepOptions, parseError, status); 470 } 471 //bail out on error 472 if(U_FAILURE(*status)){ 473 goto CLEANUP; 474 } 475 }else{ 476 477 //just point src to b1 478 b1 = (UChar*) src; 479 b1Len = srcLength; 480 } 481 482 // The RFC states that 483 // <quote> 484 // ToUnicode never fails. If any step fails, then the original input 485 // is returned immediately in that step. 486 // </quote> 487 488 //step 3: verify ACE Prefix 489 if(startsWithPrefix(b1,b1Len)){ 490 491 //step 4: Remove the ACE Prefix 492 b1Prime = b1 + ACE_PREFIX_LENGTH; 493 b1PrimeLen = b1Len - ACE_PREFIX_LENGTH; 494 495 //step 5: Decode using punycode 496 b2Len = u_strFromPunycode(b1Prime, b1PrimeLen, b2, b2Capacity, caseFlags,status); 497 498 if(*status == U_BUFFER_OVERFLOW_ERROR){ 499 // redo processing of string 500 /* we do not have enough room so grow the buffer*/ 501 b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR); 502 if(b2==NULL){ 503 *status = U_MEMORY_ALLOCATION_ERROR; 504 goto CLEANUP; 505 } 506 507 *status = U_ZERO_ERROR; // reset error 508 509 b2Len = u_strFromPunycode(b1Prime, b1PrimeLen, b2, b2Len, caseFlags, status); 510 } 511 512 513 //step 6:Apply toASCII 514 b3Len = uidna_toASCII(b2, b2Len, b3, b3Capacity, options, parseError, status); 515 516 if(*status == U_BUFFER_OVERFLOW_ERROR){ 517 // redo processing of string 518 /* we do not have enough room so grow the buffer*/ 519 b3 = (UChar*) uprv_malloc(b3Len * U_SIZEOF_UCHAR); 520 if(b3==NULL){ 521 *status = U_MEMORY_ALLOCATION_ERROR; 522 goto CLEANUP; 523 } 524 525 *status = U_ZERO_ERROR; // reset error 526 527 b3Len = uidna_toASCII(b2,b2Len,b3,b3Len,options,parseError, status); 528 529 } 530 //bail out on error 531 if(U_FAILURE(*status)){ 532 goto CLEANUP; 533 } 534 535 //step 7: verify 536 if(compareCaseInsensitiveASCII(b1, b1Len, b3, b3Len) !=0){ 537 // Cause the original to be returned. 538 *status = U_IDNA_VERIFICATION_ERROR; 539 goto CLEANUP; 540 } 541 542 //step 8: return output of step 5 543 reqLength = b2Len; 544 if(b2Len <= destCapacity) { 545 uprv_memmove(dest, b2, b2Len * U_SIZEOF_UCHAR); 546 } 547 } 548 else{ 549 // See the start of this if statement for why this is commented out. 550 // verify that STD3 ASCII rules are satisfied 551 /*if(useSTD3ASCIIRules == TRUE){ 552 if( srcIsLDH == FALSE // source contains some non-LDH characters 553 || src[0] == HYPHEN || src[srcLength-1] == HYPHEN){ 554 *status = U_IDNA_STD3_ASCII_RULES_ERROR; 555 556 // populate the parseError struct 557 if(srcIsLDH==FALSE){ 558 // failPos is always set the index of failure 559 uprv_syntaxError(src,failPos, srcLength,parseError); 560 }else if(src[0] == HYPHEN){ 561 // fail position is 0 562 uprv_syntaxError(src,0,srcLength,parseError); 563 }else{ 564 // the last index in the source is always length-1 565 uprv_syntaxError(src, (srcLength>0) ? srcLength-1 : srcLength, srcLength,parseError); 566 } 567 568 goto CLEANUP; 569 } 570 }*/ 571 // just return the source 572 //copy the source to destination 573 if(srcLength <= destCapacity){ 574 uprv_memmove(dest,src,srcLength * U_SIZEOF_UCHAR); 575 } 576 reqLength = srcLength; 577 } 578 579 580 CLEANUP: 581 582 if(b1 != b1Stack && b1!=src){ 583 uprv_free(b1); 584 } 585 if(b2 != b2Stack){ 586 uprv_free(b2); 587 } 588 uprv_free(caseFlags); 589 590 // The RFC states that 591 // <quote> 592 // ToUnicode never fails. If any step fails, then the original input 593 // is returned immediately in that step. 594 // </quote> 595 // So if any step fails lets copy source to destination 596 if(U_FAILURE(*status)){ 597 //copy the source to destination 598 if(dest && srcLength <= destCapacity){ 599 // srcLength should have already been set earlier. 600 U_ASSERT(srcLength >= 0); 601 uprv_memmove(dest,src,srcLength * U_SIZEOF_UCHAR); 602 } 603 reqLength = srcLength; 604 *status = U_ZERO_ERROR; 605 } 606 607 return u_terminateUChars(dest, destCapacity, reqLength, status); 608 } 609 610 U_CAPI int32_t U_EXPORT2 611 uidna_toASCII(const UChar* src, int32_t srcLength, 612 UChar* dest, int32_t destCapacity, 613 int32_t options, 614 UParseError* parseError, 615 UErrorCode* status){ 616 617 if(status == NULL || U_FAILURE(*status)){ 618 return 0; 619 } 620 if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ 621 *status = U_ILLEGAL_ARGUMENT_ERROR; 622 return 0; 623 } 624 625 UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status); 626 627 if(U_FAILURE(*status)){ 628 return -1; 629 } 630 631 int32_t retLen = _internal_toASCII(src, srcLength, dest, destCapacity, options, nameprep, parseError, status); 632 633 /* close the profile*/ 634 usprep_close(nameprep); 635 636 return retLen; 637 } 638 639 U_CAPI int32_t U_EXPORT2 640 uidna_toUnicode(const UChar* src, int32_t srcLength, 641 UChar* dest, int32_t destCapacity, 642 int32_t options, 643 UParseError* parseError, 644 UErrorCode* status){ 645 646 if(status == NULL || U_FAILURE(*status)){ 647 return 0; 648 } 649 if( (src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ 650 *status = U_ILLEGAL_ARGUMENT_ERROR; 651 return 0; 652 } 653 654 UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status); 655 656 if(U_FAILURE(*status)){ 657 return -1; 658 } 659 660 int32_t retLen = _internal_toUnicode(src, srcLength, dest, destCapacity, options, nameprep, parseError, status); 661 662 usprep_close(nameprep); 663 664 return retLen; 665 } 666 667 668 U_CAPI int32_t U_EXPORT2 669 uidna_IDNToASCII( const UChar *src, int32_t srcLength, 670 UChar* dest, int32_t destCapacity, 671 int32_t options, 672 UParseError *parseError, 673 UErrorCode *status){ 674 675 if(status == NULL || U_FAILURE(*status)){ 676 return 0; 677 } 678 if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ 679 *status = U_ILLEGAL_ARGUMENT_ERROR; 680 return 0; 681 } 682 683 int32_t reqLength = 0; 684 685 UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status); 686 687 if(U_FAILURE(*status)){ 688 return 0; 689 } 690 691 //initialize pointers 692 UChar *delimiter = (UChar*)src; 693 UChar *labelStart = (UChar*)src; 694 UChar *currentDest = (UChar*) dest; 695 int32_t remainingLen = srcLength; 696 int32_t remainingDestCapacity = destCapacity; 697 int32_t labelLen = 0, labelReqLength = 0; 698 UBool done = FALSE; 699 700 701 for(;;){ 702 703 labelLen = getNextSeparator(labelStart,remainingLen, &delimiter,&done); 704 labelReqLength = 0; 705 if(!(labelLen==0 && done)){// make sure this is not a root label separator. 706 707 labelReqLength = _internal_toASCII( labelStart, labelLen, 708 currentDest, remainingDestCapacity, 709 options, nameprep, 710 parseError, status); 711 712 if(*status == U_BUFFER_OVERFLOW_ERROR){ 713 714 *status = U_ZERO_ERROR; // reset error 715 remainingDestCapacity = 0; 716 } 717 } 718 719 720 if(U_FAILURE(*status)){ 721 break; 722 } 723 724 reqLength +=labelReqLength; 725 // adjust the destination pointer 726 if(labelReqLength < remainingDestCapacity){ 727 currentDest = currentDest + labelReqLength; 728 remainingDestCapacity -= labelReqLength; 729 }else{ 730 // should never occur 731 remainingDestCapacity = 0; 732 } 733 734 if(done == TRUE){ 735 break; 736 } 737 738 // add the label separator 739 if(remainingDestCapacity > 0){ 740 *currentDest++ = FULL_STOP; 741 remainingDestCapacity--; 742 } 743 reqLength++; 744 745 labelStart = delimiter; 746 if(remainingLen >0 ){ 747 remainingLen = (int32_t)(srcLength - (delimiter - src)); 748 } 749 750 } 751 752 if(reqLength > MAX_DOMAIN_NAME_LENGTH){ 753 *status = U_IDNA_DOMAIN_NAME_TOO_LONG_ERROR; 754 } 755 756 usprep_close(nameprep); 757 758 return u_terminateUChars(dest, destCapacity, reqLength, status); 759 } 760 761 U_CAPI int32_t U_EXPORT2 762 uidna_IDNToUnicode( const UChar* src, int32_t srcLength, 763 UChar* dest, int32_t destCapacity, 764 int32_t options, 765 UParseError* parseError, 766 UErrorCode* status){ 767 768 if(status == NULL || U_FAILURE(*status)){ 769 return 0; 770 } 771 if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ 772 *status = U_ILLEGAL_ARGUMENT_ERROR; 773 return 0; 774 } 775 776 int32_t reqLength = 0; 777 778 UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status); 779 780 if(U_FAILURE(*status)){ 781 return 0; 782 } 783 784 //initialize pointers 785 UChar *delimiter = (UChar*)src; 786 UChar *labelStart = (UChar*)src; 787 UChar *currentDest = (UChar*) dest; 788 int32_t remainingLen = srcLength; 789 int32_t remainingDestCapacity = destCapacity; 790 int32_t labelLen = 0, labelReqLength = 0; 791 UBool done = FALSE; 792 793 for(;;){ 794 795 labelLen = getNextSeparator(labelStart,remainingLen, &delimiter,&done); 796 797 // The RFC states that 798 // <quote> 799 // ToUnicode never fails. If any step fails, then the original input 800 // is returned immediately in that step. 801 // </quote> 802 // _internal_toUnicode will copy the label. 803 /*if(labelLen==0 && done==FALSE){ 804 *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR; 805 break; 806 }*/ 807 808 labelReqLength = _internal_toUnicode(labelStart, labelLen, 809 currentDest, remainingDestCapacity, 810 options, nameprep, 811 parseError, status); 812 813 if(*status == U_BUFFER_OVERFLOW_ERROR){ 814 *status = U_ZERO_ERROR; // reset error 815 remainingDestCapacity = 0; 816 } 817 818 if(U_FAILURE(*status)){ 819 break; 820 } 821 822 reqLength +=labelReqLength; 823 // adjust the destination pointer 824 if(labelReqLength < remainingDestCapacity){ 825 currentDest = currentDest + labelReqLength; 826 remainingDestCapacity -= labelReqLength; 827 }else{ 828 // should never occur 829 remainingDestCapacity = 0; 830 } 831 832 if(done == TRUE){ 833 break; 834 } 835 836 // add the label separator 837 // Unlike the ToASCII operation we don't normalize the label separators 838 if(remainingDestCapacity > 0){ 839 *currentDest++ = *(labelStart + labelLen); 840 remainingDestCapacity--; 841 } 842 reqLength++; 843 844 labelStart = delimiter; 845 if(remainingLen >0 ){ 846 remainingLen = (int32_t)(srcLength - (delimiter - src)); 847 } 848 849 } 850 851 if(reqLength > MAX_DOMAIN_NAME_LENGTH){ 852 *status = U_IDNA_DOMAIN_NAME_TOO_LONG_ERROR; 853 } 854 855 usprep_close(nameprep); 856 857 return u_terminateUChars(dest, destCapacity, reqLength, status); 858 } 859 860 U_CAPI int32_t U_EXPORT2 861 uidna_compare( const UChar *s1, int32_t length1, 862 const UChar *s2, int32_t length2, 863 int32_t options, 864 UErrorCode* status){ 865 866 if(status == NULL || U_FAILURE(*status)){ 867 return -1; 868 } 869 870 UChar b1Stack[MAX_IDN_BUFFER_SIZE], b2Stack[MAX_IDN_BUFFER_SIZE]; 871 UChar *b1 = b1Stack, *b2 = b2Stack; 872 int32_t b1Len, b2Len, b1Capacity = MAX_IDN_BUFFER_SIZE, b2Capacity = MAX_IDN_BUFFER_SIZE; 873 int32_t result=-1; 874 875 UParseError parseError; 876 877 b1Len = uidna_IDNToASCII(s1, length1, b1, b1Capacity, options, &parseError, status); 878 if(*status == U_BUFFER_OVERFLOW_ERROR){ 879 // redo processing of string 880 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); 881 if(b1==NULL){ 882 *status = U_MEMORY_ALLOCATION_ERROR; 883 goto CLEANUP; 884 } 885 886 *status = U_ZERO_ERROR; // reset error 887 888 b1Len = uidna_IDNToASCII(s1,length1,b1,b1Len, options, &parseError, status); 889 890 } 891 892 b2Len = uidna_IDNToASCII(s2,length2, b2,b2Capacity, options, &parseError, status); 893 if(*status == U_BUFFER_OVERFLOW_ERROR){ 894 // redo processing of string 895 b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR); 896 if(b2==NULL){ 897 *status = U_MEMORY_ALLOCATION_ERROR; 898 goto CLEANUP; 899 } 900 901 *status = U_ZERO_ERROR; // reset error 902 903 b2Len = uidna_IDNToASCII(s2, length2, b2, b2Len, options, &parseError, status); 904 905 } 906 // when toASCII is applied all label separators are replaced with FULL_STOP 907 result = compareCaseInsensitiveASCII(b1,b1Len,b2,b2Len); 908 909 CLEANUP: 910 if(b1 != b1Stack){ 911 uprv_free(b1); 912 } 913 914 if(b2 != b2Stack){ 915 uprv_free(b2); 916 } 917 918 return result; 919 } 920 921 #endif /* #if !UCONFIG_NO_IDNA */ 922