1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * 6 * Copyright (C) 2003-2011, International Business Machines 7 * Corporation and others. All Rights Reserved. 8 * 9 ******************************************************************************* 10 * file name: idnaref.cpp 11 * encoding: UTF-8 12 * tab size: 8 (not used) 13 * indentation:4 14 * 15 * created on: 2003feb1 16 * created by: Ram Viswanadha 17 */ 18 19 #include "unicode/utypes.h" 20 21 #if !UCONFIG_NO_IDNA && !UCONFIG_NO_TRANSLITERATION 22 #include "idnaref.h" 23 #include "punyref.h" 24 #include "ustr_imp.h" 25 #include "cmemory.h" 26 #include "sprpimpl.h" 27 #include "nptrans.h" 28 #include "testidna.h" 29 #include "punycode.h" 30 #include "unicode/ustring.h" 31 32 /* it is official IDNA ACE Prefix is "xn--" */ 33 static const UChar ACE_PREFIX[] ={ 0x0078,0x006E,0x002d,0x002d } ; 34 #define ACE_PREFIX_LENGTH 4 35 36 #define MAX_LABEL_LENGTH 63 37 #define HYPHEN 0x002D 38 /* The Max length of the labels should not be more than 64 */ 39 #define MAX_LABEL_BUFFER_SIZE 100 40 #define MAX_IDN_BUFFER_SIZE 300 41 42 #define CAPITAL_A 0x0041 43 #define CAPITAL_Z 0x005A 44 #define LOWER_CASE_DELTA 0x0020 45 #define FULL_STOP 0x002E 46 47 48 inline static UBool 49 startsWithPrefix(const UChar* src , int32_t srcLength){ 50 UBool startsWithPrefix = TRUE; 51 52 if(srcLength < ACE_PREFIX_LENGTH){ 53 return FALSE; 54 } 55 56 for(int8_t i=0; i< ACE_PREFIX_LENGTH; i++){ 57 if(u_tolower(src[i]) != ACE_PREFIX[i]){ 58 startsWithPrefix = FALSE; 59 } 60 } 61 return startsWithPrefix; 62 } 63 64 inline static UChar 65 toASCIILower(UChar ch){ 66 if(CAPITAL_A <= ch && ch <= CAPITAL_Z){ 67 return ch + LOWER_CASE_DELTA; 68 } 69 return ch; 70 } 71 72 inline static int32_t 73 compareCaseInsensitiveASCII(const UChar* s1, int32_t s1Len, 74 const UChar* s2, int32_t s2Len){ 75 if(s1Len != s2Len){ 76 return (s1Len > s2Len) ? s1Len : s2Len; 77 } 78 UChar c1,c2; 79 int32_t rc; 80 81 for(int32_t i =0;/* no condition */;i++) { 82 /* If we reach the ends of both strings then they match */ 83 if(i == s1Len) { 84 return 0; 85 } 86 87 c1 = s1[i]; 88 c2 = s2[i]; 89 90 /* Case-insensitive comparison */ 91 if(c1!=c2) { 92 rc=(int32_t)toASCIILower(c1)-(int32_t)toASCIILower(c2); 93 if(rc!=0) { 94 return rc; 95 } 96 } 97 } 98 99 } 100 101 static UErrorCode getError(enum punycode_status status){ 102 switch(status){ 103 case punycode_success: 104 return U_ZERO_ERROR; 105 case punycode_bad_input: /* Input is invalid. */ 106 return U_INVALID_CHAR_FOUND; 107 case punycode_big_output: /* Output would exceed the space provided. */ 108 return U_BUFFER_OVERFLOW_ERROR; 109 case punycode_overflow : /* Input requires wider integers to process. */ 110 return U_INDEX_OUTOFBOUNDS_ERROR; 111 default: 112 return U_INTERNAL_PROGRAM_ERROR; 113 } 114 } 115 116 static inline int32_t convertASCIIToUChars(const char* src,UChar* dest, int32_t length){ 117 int i; 118 for(i=0;i<length;i++){ 119 dest[i] = src[i]; 120 } 121 return i; 122 } 123 static inline int32_t convertUCharsToASCII(const UChar* src,char* dest, int32_t length){ 124 int i; 125 for(i=0;i<length;i++){ 126 dest[i] = (char)src[i]; 127 } 128 return i; 129 } 130 // wrapper around the reference Punycode implementation 131 static int32_t convertToPuny(const UChar* src, int32_t srcLength, 132 UChar* dest, int32_t destCapacity, 133 UErrorCode& status){ 134 uint32_t b1Stack[MAX_LABEL_BUFFER_SIZE]; 135 int32_t b1Len = 0, b1Capacity = MAX_LABEL_BUFFER_SIZE; 136 uint32_t* b1 = b1Stack; 137 char b2Stack[MAX_LABEL_BUFFER_SIZE]; 138 char* b2 = b2Stack; 139 int32_t b2Len =MAX_LABEL_BUFFER_SIZE ; 140 punycode_status error; 141 unsigned char* caseFlags = NULL; 142 143 u_strToUTF32((UChar32*)b1,b1Capacity,&b1Len,src,srcLength,&status); 144 if(status == U_BUFFER_OVERFLOW_ERROR){ 145 // redo processing of string 146 /* we do not have enough room so grow the buffer*/ 147 b1 = (uint32_t*) uprv_malloc(b1Len * sizeof(uint32_t)); 148 if(b1==NULL){ 149 status = U_MEMORY_ALLOCATION_ERROR; 150 goto CLEANUP; 151 } 152 153 status = U_ZERO_ERROR; // reset error 154 155 u_strToUTF32((UChar32*)b1,b1Len,&b1Len,src,srcLength,&status); 156 } 157 if(U_FAILURE(status)){ 158 goto CLEANUP; 159 } 160 161 //caseFlags = (unsigned char*) uprv_malloc(b1Len *sizeof(unsigned char)); 162 163 error = punycode_encode(b1Len,b1,caseFlags, (uint32_t*)&b2Len, b2); 164 status = getError(error); 165 166 if(status == U_BUFFER_OVERFLOW_ERROR){ 167 /* we do not have enough room so grow the buffer*/ 168 b2 = (char*) uprv_malloc( b2Len * sizeof(char)); 169 if(b2==NULL){ 170 status = U_MEMORY_ALLOCATION_ERROR; 171 goto CLEANUP; 172 } 173 174 status = U_ZERO_ERROR; // reset error 175 176 punycode_status error = punycode_encode(b1Len,b1,caseFlags, (uint32_t*)&b2Len, b2); 177 status = getError(error); 178 } 179 if(U_FAILURE(status)){ 180 goto CLEANUP; 181 } 182 183 if(b2Len < destCapacity){ 184 convertASCIIToUChars(b2,dest,b2Len); 185 }else{ 186 status =U_BUFFER_OVERFLOW_ERROR; 187 } 188 189 CLEANUP: 190 if(b1Stack != b1){ 191 uprv_free(b1); 192 } 193 if(b2Stack != b2){ 194 uprv_free(b2); 195 } 196 uprv_free(caseFlags); 197 198 return b2Len; 199 } 200 201 static int32_t convertFromPuny( const UChar* src, int32_t srcLength, 202 UChar* dest, int32_t destCapacity, 203 UErrorCode& status){ 204 char b1Stack[MAX_LABEL_BUFFER_SIZE]; 205 char* b1 = b1Stack; 206 int32_t destLen =0; 207 208 convertUCharsToASCII(src, b1,srcLength); 209 210 uint32_t b2Stack[MAX_LABEL_BUFFER_SIZE]; 211 uint32_t* b2 = b2Stack; 212 int32_t b2Len =MAX_LABEL_BUFFER_SIZE; 213 unsigned char* caseFlags = NULL; //(unsigned char*) uprv_malloc(srcLength * sizeof(unsigned char*)); 214 punycode_status error = punycode_decode(srcLength,b1,(uint32_t*)&b2Len,b2,caseFlags); 215 status = getError(error); 216 if(status == U_BUFFER_OVERFLOW_ERROR){ 217 b2 = (uint32_t*) uprv_malloc(b2Len * sizeof(uint32_t)); 218 if(b2 == NULL){ 219 status = U_MEMORY_ALLOCATION_ERROR; 220 goto CLEANUP; 221 } 222 error = punycode_decode(srcLength,b1,(uint32_t*)&b2Len,b2,caseFlags); 223 status = getError(error); 224 } 225 226 if(U_FAILURE(status)){ 227 goto CLEANUP; 228 } 229 230 u_strFromUTF32(dest,destCapacity,&destLen,(UChar32*)b2,b2Len,&status); 231 232 CLEANUP: 233 if(b1Stack != b1){ 234 uprv_free(b1); 235 } 236 if(b2Stack != b2){ 237 uprv_free(b2); 238 } 239 uprv_free(caseFlags); 240 241 return destLen; 242 } 243 244 245 U_CFUNC int32_t U_EXPORT2 246 idnaref_toASCII(const UChar* src, int32_t srcLength, 247 UChar* dest, int32_t destCapacity, 248 int32_t options, 249 UParseError* parseError, 250 UErrorCode* status){ 251 252 if(status == NULL || U_FAILURE(*status)){ 253 return 0; 254 } 255 if((src == NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ 256 *status = U_ILLEGAL_ARGUMENT_ERROR; 257 return 0; 258 } 259 UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE]; 260 //initialize pointers to stack buffers 261 UChar *b1 = b1Stack, *b2 = b2Stack; 262 int32_t b1Len=0, b2Len=0, 263 b1Capacity = MAX_LABEL_BUFFER_SIZE, 264 b2Capacity = MAX_LABEL_BUFFER_SIZE , 265 reqLength=0; 266 267 //get the options 268 UBool allowUnassigned = (UBool)((options & IDNAREF_ALLOW_UNASSIGNED) != 0); 269 UBool useSTD3ASCIIRules = (UBool)((options & IDNAREF_USE_STD3_RULES) != 0); 270 271 UBool* caseFlags = NULL; 272 273 // assume the source contains all ascii codepoints 274 UBool srcIsASCII = TRUE; 275 // assume the source contains all LDH codepoints 276 UBool srcIsLDH = TRUE; 277 int32_t j=0; 278 279 if(srcLength == -1){ 280 srcLength = u_strlen(src); 281 } 282 283 // step 1 284 for( j=0;j<srcLength;j++){ 285 if(src[j] > 0x7F){ 286 srcIsASCII = FALSE; 287 } 288 b1[b1Len++] = src[j]; 289 } 290 291 NamePrepTransform* prep = TestIDNA::getInstance(*status); 292 if(U_FAILURE(*status)){ 293 goto CLEANUP; 294 } 295 296 // step 2 is performed only if the source contains non ASCII 297 if (!srcIsASCII) { 298 b1Len = prep->process(src,srcLength,b1, b1Capacity,allowUnassigned,parseError,*status); 299 300 if(*status == U_BUFFER_OVERFLOW_ERROR){ 301 // redo processing of string 302 /* we do not have enough room so grow the buffer*/ 303 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); 304 if(b1==NULL){ 305 *status = U_MEMORY_ALLOCATION_ERROR; 306 goto CLEANUP; 307 } 308 309 *status = U_ZERO_ERROR; // reset error 310 311 b1Len = prep->process(src,srcLength,b1, b1Len,allowUnassigned, parseError, *status); 312 } 313 // error bail out 314 if(U_FAILURE(*status)){ 315 goto CLEANUP; 316 } 317 } 318 319 if(b1Len == 0){ 320 *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR; 321 goto CLEANUP; 322 } 323 324 srcIsASCII = TRUE; 325 // step 3 & 4 326 for( j=0;j<b1Len;j++){ 327 if(b1[j] > 0x7F){// check if output of usprep_prepare is all ASCII 328 srcIsASCII = FALSE; 329 }else if(prep->isLDHChar(b1[j])==FALSE){ // if the char is in ASCII range verify that it is an LDH character{ 330 srcIsLDH = FALSE; 331 } 332 } 333 334 if(useSTD3ASCIIRules == TRUE){ 335 // verify 3a and 3b 336 if( srcIsLDH == FALSE /* source contains some non-LDH characters */ 337 || b1[0] == HYPHEN || b1[b1Len-1] == HYPHEN){ 338 *status = U_IDNA_STD3_ASCII_RULES_ERROR; 339 goto CLEANUP; 340 } 341 } 342 if(srcIsASCII){ 343 if(b1Len <= destCapacity){ 344 u_memmove(dest, b1, b1Len); 345 reqLength = b1Len; 346 }else{ 347 reqLength = b1Len; 348 goto CLEANUP; 349 } 350 }else{ 351 // step 5 : verify the sequence does not begin with ACE prefix 352 if(!startsWithPrefix(b1,b1Len)){ 353 354 //step 6: encode the sequence with punycode 355 //caseFlags = (UBool*) uprv_malloc(b1Len * sizeof(UBool)); 356 357 b2Len = convertToPuny(b1,b1Len, b2,b2Capacity,*status); 358 //b2Len = u_strToPunycode(b2,b2Capacity,b1,b1Len, caseFlags, status); 359 if(*status == U_BUFFER_OVERFLOW_ERROR){ 360 // redo processing of string 361 /* we do not have enough room so grow the buffer*/ 362 b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR); 363 if(b2 == NULL){ 364 *status = U_MEMORY_ALLOCATION_ERROR; 365 goto CLEANUP; 366 } 367 368 *status = U_ZERO_ERROR; // reset error 369 370 b2Len = convertToPuny(b1, b1Len, b2, b2Len, *status); 371 //b2Len = u_strToPunycode(b2,b2Len,b1,b1Len, caseFlags, status); 372 373 } 374 //error bail out 375 if(U_FAILURE(*status)){ 376 goto CLEANUP; 377 } 378 reqLength = b2Len+ACE_PREFIX_LENGTH; 379 380 if(reqLength > destCapacity){ 381 *status = U_BUFFER_OVERFLOW_ERROR; 382 goto CLEANUP; 383 } 384 //Step 7: prepend the ACE prefix 385 u_memcpy(dest, ACE_PREFIX, ACE_PREFIX_LENGTH); 386 //Step 6: copy the contents in b2 into dest 387 u_memcpy(dest+ACE_PREFIX_LENGTH, b2, b2Len); 388 389 }else{ 390 *status = U_IDNA_ACE_PREFIX_ERROR; 391 goto CLEANUP; 392 } 393 } 394 395 if(reqLength > MAX_LABEL_LENGTH){ 396 *status = U_IDNA_LABEL_TOO_LONG_ERROR; 397 } 398 399 CLEANUP: 400 if(b1 != b1Stack){ 401 uprv_free(b1); 402 } 403 if(b2 != b2Stack){ 404 uprv_free(b2); 405 } 406 uprv_free(caseFlags); 407 408 // delete prep; 409 410 return u_terminateUChars(dest, destCapacity, reqLength, status); 411 } 412 413 414 U_CFUNC int32_t U_EXPORT2 415 idnaref_toUnicode(const UChar* src, int32_t srcLength, 416 UChar* dest, int32_t destCapacity, 417 int32_t options, 418 UParseError* parseError, 419 UErrorCode* status){ 420 421 if(status == NULL || U_FAILURE(*status)){ 422 return 0; 423 } 424 if((src == NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ 425 *status = U_ILLEGAL_ARGUMENT_ERROR; 426 return 0; 427 } 428 429 430 431 UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE], b3Stack[MAX_LABEL_BUFFER_SIZE]; 432 433 //initialize pointers to stack buffers 434 UChar *b1 = b1Stack, *b2 = b2Stack, *b1Prime=NULL, *b3=b3Stack; 435 int32_t b1Len, b2Len, b1PrimeLen, b3Len, 436 b1Capacity = MAX_LABEL_BUFFER_SIZE, 437 b2Capacity = MAX_LABEL_BUFFER_SIZE, 438 b3Capacity = MAX_LABEL_BUFFER_SIZE, 439 reqLength=0; 440 // UParseError parseError; 441 442 NamePrepTransform* prep = TestIDNA::getInstance(*status); 443 b1Len = 0; 444 UBool* caseFlags = NULL; 445 446 //get the options 447 UBool allowUnassigned = (UBool)((options & IDNAREF_ALLOW_UNASSIGNED) != 0); 448 UBool useSTD3ASCIIRules = (UBool)((options & IDNAREF_USE_STD3_RULES) != 0); 449 450 UBool srcIsASCII = TRUE; 451 UBool srcIsLDH = TRUE; 452 int32_t failPos =0; 453 454 if(U_FAILURE(*status)){ 455 goto CLEANUP; 456 } 457 // step 1: find out if all the codepoints in src are ASCII 458 if(srcLength==-1){ 459 srcLength = 0; 460 for(;src[srcLength]!=0;){ 461 if(src[srcLength]> 0x7f){ 462 srcIsASCII = FALSE; 463 }if(prep->isLDHChar(src[srcLength])==FALSE){ 464 // here we do not assemble surrogates 465 // since we know that LDH code points 466 // are in the ASCII range only 467 srcIsLDH = FALSE; 468 failPos = srcLength; 469 } 470 srcLength++; 471 } 472 }else{ 473 for(int32_t j=0; j<srcLength; j++){ 474 if(src[j]> 0x7f){ 475 srcIsASCII = FALSE; 476 }else if(prep->isLDHChar(src[j])==FALSE){ 477 // here we do not assemble surrogates 478 // since we know that LDH code points 479 // are in the ASCII range only 480 srcIsLDH = FALSE; 481 failPos = j; 482 } 483 } 484 } 485 486 if(srcIsASCII == FALSE){ 487 // step 2: process the string 488 b1Len = prep->process(src,srcLength,b1,b1Capacity,allowUnassigned, parseError, *status); 489 if(*status == U_BUFFER_OVERFLOW_ERROR){ 490 // redo processing of string 491 /* we do not have enough room so grow the buffer*/ 492 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); 493 if(b1==NULL){ 494 *status = U_MEMORY_ALLOCATION_ERROR; 495 goto CLEANUP; 496 } 497 498 *status = U_ZERO_ERROR; // reset error 499 500 b1Len = prep->process(src,srcLength,b1, b1Len,allowUnassigned, parseError, *status); 501 } 502 //bail out on error 503 if(U_FAILURE(*status)){ 504 goto CLEANUP; 505 } 506 }else{ 507 508 // copy everything to b1 509 if(srcLength < b1Capacity){ 510 u_memmove(b1, src, srcLength); 511 }else{ 512 /* we do not have enough room so grow the buffer*/ 513 b1 = (UChar*) uprv_malloc(srcLength * U_SIZEOF_UCHAR); 514 if(b1==NULL){ 515 *status = U_MEMORY_ALLOCATION_ERROR; 516 goto CLEANUP; 517 } 518 u_memmove(b1, src, srcLength); 519 } 520 b1Len = srcLength; 521 } 522 //step 3: verify ACE Prefix 523 if(startsWithPrefix(src,srcLength)){ 524 525 //step 4: Remove the ACE Prefix 526 b1Prime = b1 + ACE_PREFIX_LENGTH; 527 b1PrimeLen = b1Len - ACE_PREFIX_LENGTH; 528 529 //step 5: Decode using punycode 530 b2Len = convertFromPuny(b1Prime,b1PrimeLen, b2, b2Capacity, *status); 531 //b2Len = u_strFromPunycode(b2, b2Capacity,b1Prime,b1PrimeLen, caseFlags, status); 532 533 if(*status == U_BUFFER_OVERFLOW_ERROR){ 534 // redo processing of string 535 /* we do not have enough room so grow the buffer*/ 536 b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR); 537 if(b2==NULL){ 538 *status = U_MEMORY_ALLOCATION_ERROR; 539 goto CLEANUP; 540 } 541 542 *status = U_ZERO_ERROR; // reset error 543 544 b2Len = convertFromPuny(b1Prime,b1PrimeLen, b2, b2Len, *status); 545 //b2Len = u_strFromPunycode(b2, b2Len,b1Prime,b1PrimeLen,caseFlags, status); 546 } 547 548 549 //step 6:Apply toASCII 550 b3Len = idnaref_toASCII(b2,b2Len,b3,b3Capacity,options,parseError, status); 551 552 if(*status == U_BUFFER_OVERFLOW_ERROR){ 553 // redo processing of string 554 /* we do not have enough room so grow the buffer*/ 555 b3 = (UChar*) uprv_malloc(b3Len * U_SIZEOF_UCHAR); 556 if(b3==NULL){ 557 *status = U_MEMORY_ALLOCATION_ERROR; 558 goto CLEANUP; 559 } 560 561 *status = U_ZERO_ERROR; // reset error 562 563 b3Len = idnaref_toASCII(b2,b2Len,b3,b3Len, options, parseError, status); 564 565 } 566 //bail out on error 567 if(U_FAILURE(*status)){ 568 goto CLEANUP; 569 } 570 571 //step 7: verify 572 if(compareCaseInsensitiveASCII(b1, b1Len, b3, b3Len) !=0){ 573 *status = U_IDNA_VERIFICATION_ERROR; 574 goto CLEANUP; 575 } 576 577 //step 8: return output of step 5 578 reqLength = b2Len; 579 if(b2Len <= destCapacity) { 580 u_memmove(dest, b2, b2Len); 581 } 582 }else{ 583 // verify that STD3 ASCII rules are satisfied 584 if(useSTD3ASCIIRules == TRUE){ 585 if( srcIsLDH == FALSE /* source contains some non-LDH characters */ 586 || src[0] == HYPHEN || src[srcLength-1] == HYPHEN){ 587 *status = U_IDNA_STD3_ASCII_RULES_ERROR; 588 589 /* populate the parseError struct */ 590 if(srcIsLDH==FALSE){ 591 // failPos is always set the index of failure 592 uprv_syntaxError(src,failPos, srcLength,parseError); 593 }else if(src[0] == HYPHEN){ 594 // fail position is 0 595 uprv_syntaxError(src,0,srcLength,parseError); 596 }else{ 597 // the last index in the source is always length-1 598 uprv_syntaxError(src, (srcLength>0) ? srcLength-1 : srcLength, srcLength,parseError); 599 } 600 601 goto CLEANUP; 602 } 603 } 604 //copy the source to destination 605 if(srcLength <= destCapacity){ 606 u_memmove(dest, src, srcLength); 607 } 608 reqLength = srcLength; 609 } 610 611 CLEANUP: 612 613 if(b1 != b1Stack){ 614 uprv_free(b1); 615 } 616 if(b2 != b2Stack){ 617 uprv_free(b2); 618 } 619 uprv_free(caseFlags); 620 621 // The RFC states that 622 // <quote> 623 // ToUnicode never fails. If any step fails, then the original input 624 // is returned immediately in that step. 625 // </quote> 626 // So if any step fails lets copy source to destination 627 if(U_FAILURE(*status)){ 628 //copy the source to destination 629 if(dest && srcLength <= destCapacity){ 630 if(srcLength == -1) { 631 u_memmove(dest, src, u_strlen(src)); 632 } else { 633 u_memmove(dest, src, srcLength); 634 } 635 } 636 reqLength = srcLength; 637 *status = U_ZERO_ERROR; 638 } 639 return u_terminateUChars(dest, destCapacity, reqLength, status); 640 } 641 642 643 static int32_t 644 getNextSeparator(UChar *src,int32_t srcLength,NamePrepTransform* prep, 645 UChar **limit, 646 UBool *done, 647 UErrorCode *status){ 648 if(srcLength == -1){ 649 int32_t i; 650 for(i=0 ; ;i++){ 651 if(src[i] == 0){ 652 *limit = src + i; // point to null 653 *done = TRUE; 654 return i; 655 } 656 if(prep->isLabelSeparator(src[i],*status)){ 657 *limit = src + (i+1); // go past the delimiter 658 return i; 659 660 } 661 } 662 }else{ 663 int32_t i; 664 for(i=0;i<srcLength;i++){ 665 if(prep->isLabelSeparator(src[i],*status)){ 666 *limit = src + (i+1); // go past the delimiter 667 return i; 668 } 669 } 670 // we have not found the delimiter 671 if(i==srcLength){ 672 *limit = src+srcLength; 673 *done = TRUE; 674 } 675 return i; 676 } 677 } 678 679 U_CFUNC int32_t U_EXPORT2 680 idnaref_IDNToASCII( const UChar* src, int32_t srcLength, 681 UChar* dest, int32_t destCapacity, 682 int32_t options, 683 UParseError* parseError, 684 UErrorCode* status){ 685 686 if(status == NULL || U_FAILURE(*status)){ 687 return 0; 688 } 689 if((src == NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ 690 *status = U_ILLEGAL_ARGUMENT_ERROR; 691 return 0; 692 } 693 694 int32_t reqLength = 0; 695 // UParseError parseError; 696 697 NamePrepTransform* prep = TestIDNA::getInstance(*status); 698 699 //initialize pointers to stack buffers 700 UChar b1Stack[MAX_LABEL_BUFFER_SIZE]; 701 UChar *b1 = b1Stack; 702 int32_t b1Len, labelLen; 703 UChar* delimiter = (UChar*)src; 704 UChar* labelStart = (UChar*)src; 705 int32_t remainingLen = srcLength; 706 int32_t b1Capacity = MAX_LABEL_BUFFER_SIZE; 707 708 //get the options 709 // UBool allowUnassigned = (UBool)((options & IDNAREF_ALLOW_UNASSIGNED) != 0); 710 // UBool useSTD3ASCIIRules = (UBool)((options & IDNAREF_USE_STD3_RULES) != 0); 711 UBool done = FALSE; 712 713 if(U_FAILURE(*status)){ 714 goto CLEANUP; 715 } 716 717 718 if(srcLength == -1){ 719 for(;;){ 720 721 if(*delimiter == 0){ 722 break; 723 } 724 725 labelLen = getNextSeparator(labelStart, -1, prep, &delimiter, &done, status); 726 b1Len = 0; 727 if(!(labelLen==0 && done)){// make sure this is not a root label separator. 728 729 b1Len = idnaref_toASCII(labelStart, labelLen, b1, b1Capacity, 730 options, parseError, status); 731 732 if(*status == U_BUFFER_OVERFLOW_ERROR){ 733 // redo processing of string 734 /* we do not have enough room so grow the buffer*/ 735 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); 736 if(b1==NULL){ 737 *status = U_MEMORY_ALLOCATION_ERROR; 738 goto CLEANUP; 739 } 740 741 *status = U_ZERO_ERROR; // reset error 742 743 b1Len = idnaref_toASCII(labelStart, labelLen, b1, b1Len, 744 options, parseError, status); 745 746 } 747 } 748 749 if(U_FAILURE(*status)){ 750 goto CLEANUP; 751 } 752 int32_t tempLen = (reqLength + b1Len ); 753 // copy to dest 754 if( tempLen< destCapacity){ 755 u_memmove(dest+reqLength, b1, b1Len); 756 } 757 758 reqLength = tempLen; 759 760 // add the label separator 761 if(done == FALSE){ 762 if(reqLength < destCapacity){ 763 dest[reqLength] = FULL_STOP; 764 } 765 reqLength++; 766 } 767 768 labelStart = delimiter; 769 } 770 }else{ 771 for(;;){ 772 773 if(delimiter == src+srcLength){ 774 break; 775 } 776 777 labelLen = getNextSeparator(labelStart, remainingLen, prep, &delimiter, &done, status); 778 779 b1Len = idnaref_toASCII(labelStart, labelLen, b1, b1Capacity, 780 options,parseError, status); 781 782 if(*status == U_BUFFER_OVERFLOW_ERROR){ 783 // redo processing of string 784 /* we do not have enough room so grow the buffer*/ 785 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); 786 if(b1==NULL){ 787 *status = U_MEMORY_ALLOCATION_ERROR; 788 goto CLEANUP; 789 } 790 791 *status = U_ZERO_ERROR; // reset error 792 793 b1Len = idnaref_toASCII(labelStart, labelLen, b1, b1Len, 794 options, parseError, status); 795 796 } 797 798 if(U_FAILURE(*status)){ 799 goto CLEANUP; 800 } 801 int32_t tempLen = (reqLength + b1Len ); 802 // copy to dest 803 if( tempLen< destCapacity){ 804 u_memmove(dest+reqLength, b1, b1Len); 805 } 806 807 reqLength = tempLen; 808 809 // add the label separator 810 if(done == FALSE){ 811 if(reqLength < destCapacity){ 812 dest[reqLength] = FULL_STOP; 813 } 814 reqLength++; 815 } 816 817 labelStart = delimiter; 818 remainingLen = srcLength - (delimiter - src); 819 } 820 } 821 822 823 CLEANUP: 824 825 if(b1 != b1Stack){ 826 uprv_free(b1); 827 } 828 829 // delete prep; 830 831 return u_terminateUChars(dest, destCapacity, reqLength, status); 832 } 833 834 U_CFUNC int32_t U_EXPORT2 835 idnaref_IDNToUnicode( const UChar* src, int32_t srcLength, 836 UChar* dest, int32_t destCapacity, 837 int32_t options, 838 UParseError* parseError, 839 UErrorCode* status){ 840 841 if(status == NULL || U_FAILURE(*status)){ 842 return 0; 843 } 844 if((src == NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ 845 *status = U_ILLEGAL_ARGUMENT_ERROR; 846 return 0; 847 } 848 849 int32_t reqLength = 0; 850 851 UBool done = FALSE; 852 853 NamePrepTransform* prep = TestIDNA::getInstance(*status); 854 855 //initialize pointers to stack buffers 856 UChar b1Stack[MAX_LABEL_BUFFER_SIZE]; 857 UChar *b1 = b1Stack; 858 int32_t b1Len, labelLen; 859 UChar* delimiter = (UChar*)src; 860 UChar* labelStart = (UChar*)src; 861 int32_t remainingLen = srcLength; 862 int32_t b1Capacity = MAX_LABEL_BUFFER_SIZE; 863 864 //get the options 865 // UBool allowUnassigned = (UBool)((options & IDNAREF_ALLOW_UNASSIGNED) != 0); 866 // UBool useSTD3ASCIIRules = (UBool)((options & IDNAREF_USE_STD3_RULES) != 0); 867 868 if(U_FAILURE(*status)){ 869 goto CLEANUP; 870 } 871 872 if(srcLength == -1){ 873 for(;;){ 874 875 if(*delimiter == 0){ 876 break; 877 } 878 879 labelLen = getNextSeparator(labelStart, -1, prep, &delimiter, &done, status); 880 881 if(labelLen==0 && done==FALSE){ 882 *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR; 883 } 884 b1Len = idnaref_toUnicode(labelStart, labelLen, b1, b1Capacity, 885 options, parseError, status); 886 887 if(*status == U_BUFFER_OVERFLOW_ERROR){ 888 // redo processing of string 889 /* we do not have enough room so grow the buffer*/ 890 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); 891 if(b1==NULL){ 892 *status = U_MEMORY_ALLOCATION_ERROR; 893 goto CLEANUP; 894 } 895 896 *status = U_ZERO_ERROR; // reset error 897 898 b1Len = idnaref_toUnicode( labelStart, labelLen, b1, b1Len, 899 options, parseError, status); 900 901 } 902 903 if(U_FAILURE(*status)){ 904 goto CLEANUP; 905 } 906 int32_t tempLen = (reqLength + b1Len ); 907 // copy to dest 908 if( tempLen< destCapacity){ 909 u_memmove(dest+reqLength, b1, b1Len); 910 } 911 912 reqLength = tempLen; 913 // add the label separator 914 if(done == FALSE){ 915 if(reqLength < destCapacity){ 916 dest[reqLength] = FULL_STOP; 917 } 918 reqLength++; 919 } 920 921 labelStart = delimiter; 922 } 923 }else{ 924 for(;;){ 925 926 if(delimiter == src+srcLength){ 927 break; 928 } 929 930 labelLen = getNextSeparator(labelStart, remainingLen, prep, &delimiter, &done, status); 931 932 if(labelLen==0 && done==FALSE){ 933 *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR; 934 } 935 936 b1Len = idnaref_toUnicode( labelStart,labelLen, b1, b1Capacity, 937 options, parseError, status); 938 939 if(*status == U_BUFFER_OVERFLOW_ERROR){ 940 // redo processing of string 941 /* we do not have enough room so grow the buffer*/ 942 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); 943 if(b1==NULL){ 944 *status = U_MEMORY_ALLOCATION_ERROR; 945 goto CLEANUP; 946 } 947 948 *status = U_ZERO_ERROR; // reset error 949 950 b1Len = idnaref_toUnicode( labelStart, labelLen, b1, b1Len, 951 options, parseError, status); 952 953 } 954 955 if(U_FAILURE(*status)){ 956 goto CLEANUP; 957 } 958 int32_t tempLen = (reqLength + b1Len ); 959 // copy to dest 960 if( tempLen< destCapacity){ 961 u_memmove(dest+reqLength, b1, b1Len); 962 } 963 964 reqLength = tempLen; 965 966 // add the label separator 967 if(done == FALSE){ 968 if(reqLength < destCapacity){ 969 dest[reqLength] = FULL_STOP; 970 } 971 reqLength++; 972 } 973 974 labelStart = delimiter; 975 remainingLen = srcLength - (delimiter - src); 976 } 977 } 978 979 CLEANUP: 980 981 if(b1 != b1Stack){ 982 uprv_free(b1); 983 } 984 985 // delete prep; 986 987 return u_terminateUChars(dest, destCapacity, reqLength, status); 988 } 989 990 U_CFUNC int32_t U_EXPORT2 991 idnaref_compare( const UChar *s1, int32_t length1, 992 const UChar *s2, int32_t length2, 993 int32_t options, 994 UErrorCode* status){ 995 996 if(status == NULL || U_FAILURE(*status)){ 997 return -1; 998 } 999 1000 UChar b1Stack[MAX_IDN_BUFFER_SIZE], b2Stack[MAX_IDN_BUFFER_SIZE]; 1001 UChar *b1 = b1Stack, *b2 = b2Stack; 1002 int32_t b1Len, b2Len, b1Capacity = MAX_IDN_BUFFER_SIZE, b2Capacity = MAX_IDN_BUFFER_SIZE; 1003 int32_t result = -1; 1004 1005 UParseError parseError; 1006 1007 b1Len = idnaref_IDNToASCII(s1, length1, b1, b1Capacity, options, &parseError, status); 1008 if(*status == U_BUFFER_OVERFLOW_ERROR){ 1009 // redo processing of string 1010 /* we do not have enough room so grow the buffer*/ 1011 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); 1012 if(b1==NULL){ 1013 *status = U_MEMORY_ALLOCATION_ERROR; 1014 goto CLEANUP; 1015 } 1016 1017 *status = U_ZERO_ERROR; // reset error 1018 1019 b1Len = idnaref_IDNToASCII(s1,length1,b1,b1Len, options, &parseError, status); 1020 1021 } 1022 1023 b2Len = idnaref_IDNToASCII(s2,length2,b2,b2Capacity,options, &parseError, status); 1024 if(*status == U_BUFFER_OVERFLOW_ERROR){ 1025 // redo processing of string 1026 /* we do not have enough room so grow the buffer*/ 1027 b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR); 1028 if(b2==NULL){ 1029 *status = U_MEMORY_ALLOCATION_ERROR; 1030 goto CLEANUP; 1031 } 1032 1033 *status = U_ZERO_ERROR; // reset error 1034 1035 b2Len = idnaref_IDNToASCII(s2,length2,b2,b2Len,options, &parseError, status); 1036 1037 } 1038 // when toASCII is applied all label separators are replaced with FULL_STOP 1039 result = compareCaseInsensitiveASCII(b1,b1Len,b2,b2Len); 1040 1041 CLEANUP: 1042 if(b1 != b1Stack){ 1043 uprv_free(b1); 1044 } 1045 1046 if(b2 != b2Stack){ 1047 uprv_free(b2); 1048 } 1049 1050 return result; 1051 } 1052 #endif /* #if !UCONFIG_NO_IDNA */ 1053