1 /* 2 ******************************************************************************* 3 * 4 * Copyright (C) 2003-2011, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ******************************************************************************* 8 * file name: idnaref.cpp 9 * encoding: US-ASCII 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 2003feb1 14 * created by: Ram Viswanadha 15 */ 16 17 #include "unicode/utypes.h" 18 19 #if !UCONFIG_NO_IDNA && !UCONFIG_NO_TRANSLITERATION 20 #include "idnaref.h" 21 #include "punyref.h" 22 #include "ustr_imp.h" 23 #include "cmemory.h" 24 #include "sprpimpl.h" 25 #include "nptrans.h" 26 #include "testidna.h" 27 #include "punycode.h" 28 #include "unicode/ustring.h" 29 30 /* it is official IDNA ACE Prefix is "xn--" */ 31 static const UChar ACE_PREFIX[] ={ 0x0078,0x006E,0x002d,0x002d } ; 32 #define ACE_PREFIX_LENGTH 4 33 34 #define MAX_LABEL_LENGTH 63 35 #define HYPHEN 0x002D 36 /* The Max length of the labels should not be more than 64 */ 37 #define MAX_LABEL_BUFFER_SIZE 100 38 #define MAX_IDN_BUFFER_SIZE 300 39 40 #define CAPITAL_A 0x0041 41 #define CAPITAL_Z 0x005A 42 #define LOWER_CASE_DELTA 0x0020 43 #define FULL_STOP 0x002E 44 45 46 inline static UBool 47 startsWithPrefix(const UChar* src , int32_t srcLength){ 48 UBool startsWithPrefix = TRUE; 49 50 if(srcLength < ACE_PREFIX_LENGTH){ 51 return FALSE; 52 } 53 54 for(int8_t i=0; i< ACE_PREFIX_LENGTH; i++){ 55 if(u_tolower(src[i]) != ACE_PREFIX[i]){ 56 startsWithPrefix = FALSE; 57 } 58 } 59 return startsWithPrefix; 60 } 61 62 inline static UChar 63 toASCIILower(UChar ch){ 64 if(CAPITAL_A <= ch && ch <= CAPITAL_Z){ 65 return ch + LOWER_CASE_DELTA; 66 } 67 return ch; 68 } 69 70 inline static int32_t 71 compareCaseInsensitiveASCII(const UChar* s1, int32_t s1Len, 72 const UChar* s2, int32_t s2Len){ 73 if(s1Len != s2Len){ 74 return (s1Len > s2Len) ? s1Len : s2Len; 75 } 76 UChar c1,c2; 77 int32_t rc; 78 79 for(int32_t i =0;/* no condition */;i++) { 80 /* If we reach the ends of both strings then they match */ 81 if(i == s1Len) { 82 return 0; 83 } 84 85 c1 = s1[i]; 86 c2 = s2[i]; 87 88 /* Case-insensitive comparison */ 89 if(c1!=c2) { 90 rc=(int32_t)toASCIILower(c1)-(int32_t)toASCIILower(c2); 91 if(rc!=0) { 92 return rc; 93 } 94 } 95 } 96 97 } 98 99 static UErrorCode getError(enum punycode_status status){ 100 switch(status){ 101 case punycode_success: 102 return U_ZERO_ERROR; 103 case punycode_bad_input: /* Input is invalid. */ 104 return U_INVALID_CHAR_FOUND; 105 case punycode_big_output: /* Output would exceed the space provided. */ 106 return U_BUFFER_OVERFLOW_ERROR; 107 case punycode_overflow : /* Input requires wider integers to process. */ 108 return U_INDEX_OUTOFBOUNDS_ERROR; 109 default: 110 return U_INTERNAL_PROGRAM_ERROR; 111 } 112 } 113 114 static inline int32_t convertASCIIToUChars(const char* src,UChar* dest, int32_t length){ 115 int i; 116 for(i=0;i<length;i++){ 117 dest[i] = src[i]; 118 } 119 return i; 120 } 121 static inline int32_t convertUCharsToASCII(const UChar* src,char* dest, int32_t length){ 122 int i; 123 for(i=0;i<length;i++){ 124 dest[i] = (char)src[i]; 125 } 126 return i; 127 } 128 // wrapper around the reference Punycode implementation 129 static int32_t convertToPuny(const UChar* src, int32_t srcLength, 130 UChar* dest, int32_t destCapacity, 131 UErrorCode& status){ 132 uint32_t b1Stack[MAX_LABEL_BUFFER_SIZE]; 133 int32_t b1Len = 0, b1Capacity = MAX_LABEL_BUFFER_SIZE; 134 uint32_t* b1 = b1Stack; 135 char b2Stack[MAX_LABEL_BUFFER_SIZE]; 136 char* b2 = b2Stack; 137 int32_t b2Len =MAX_LABEL_BUFFER_SIZE ; 138 punycode_status error; 139 unsigned char* caseFlags = NULL; 140 141 u_strToUTF32((UChar32*)b1,b1Capacity,&b1Len,src,srcLength,&status); 142 if(status == U_BUFFER_OVERFLOW_ERROR){ 143 // redo processing of string 144 /* we do not have enough room so grow the buffer*/ 145 b1 = (uint32_t*) uprv_malloc(b1Len * sizeof(uint32_t)); 146 if(b1==NULL){ 147 status = U_MEMORY_ALLOCATION_ERROR; 148 goto CLEANUP; 149 } 150 151 status = U_ZERO_ERROR; // reset error 152 153 u_strToUTF32((UChar32*)b1,b1Len,&b1Len,src,srcLength,&status); 154 } 155 if(U_FAILURE(status)){ 156 goto CLEANUP; 157 } 158 159 //caseFlags = (unsigned char*) uprv_malloc(b1Len *sizeof(unsigned char)); 160 161 error = punycode_encode(b1Len,b1,caseFlags, (uint32_t*)&b2Len, b2); 162 status = getError(error); 163 164 if(status == U_BUFFER_OVERFLOW_ERROR){ 165 /* we do not have enough room so grow the buffer*/ 166 b2 = (char*) uprv_malloc( b2Len * sizeof(char)); 167 if(b2==NULL){ 168 status = U_MEMORY_ALLOCATION_ERROR; 169 goto CLEANUP; 170 } 171 172 status = U_ZERO_ERROR; // reset error 173 174 punycode_status error = punycode_encode(b1Len,b1,caseFlags, (uint32_t*)&b2Len, b2); 175 status = getError(error); 176 } 177 if(U_FAILURE(status)){ 178 goto CLEANUP; 179 } 180 181 if(b2Len < destCapacity){ 182 convertASCIIToUChars(b2,dest,b2Len); 183 }else{ 184 status =U_BUFFER_OVERFLOW_ERROR; 185 } 186 187 CLEANUP: 188 if(b1Stack != b1){ 189 uprv_free(b1); 190 } 191 if(b2Stack != b2){ 192 uprv_free(b2); 193 } 194 uprv_free(caseFlags); 195 196 return b2Len; 197 } 198 199 static int32_t convertFromPuny( const UChar* src, int32_t srcLength, 200 UChar* dest, int32_t destCapacity, 201 UErrorCode& status){ 202 char b1Stack[MAX_LABEL_BUFFER_SIZE]; 203 char* b1 = b1Stack; 204 int32_t destLen =0; 205 206 convertUCharsToASCII(src, b1,srcLength); 207 208 uint32_t b2Stack[MAX_LABEL_BUFFER_SIZE]; 209 uint32_t* b2 = b2Stack; 210 int32_t b2Len =MAX_LABEL_BUFFER_SIZE; 211 unsigned char* caseFlags = NULL; //(unsigned char*) uprv_malloc(srcLength * sizeof(unsigned char*)); 212 punycode_status error = punycode_decode(srcLength,b1,(uint32_t*)&b2Len,b2,caseFlags); 213 status = getError(error); 214 if(status == U_BUFFER_OVERFLOW_ERROR){ 215 b2 = (uint32_t*) uprv_malloc(b2Len * sizeof(uint32_t)); 216 if(b2 == NULL){ 217 status = U_MEMORY_ALLOCATION_ERROR; 218 goto CLEANUP; 219 } 220 error = punycode_decode(srcLength,b1,(uint32_t*)&b2Len,b2,caseFlags); 221 status = getError(error); 222 } 223 224 if(U_FAILURE(status)){ 225 goto CLEANUP; 226 } 227 228 u_strFromUTF32(dest,destCapacity,&destLen,(UChar32*)b2,b2Len,&status); 229 230 CLEANUP: 231 if(b1Stack != b1){ 232 uprv_free(b1); 233 } 234 if(b2Stack != b2){ 235 uprv_free(b2); 236 } 237 uprv_free(caseFlags); 238 239 return destLen; 240 } 241 242 243 U_CFUNC int32_t U_EXPORT2 244 idnaref_toASCII(const UChar* src, int32_t srcLength, 245 UChar* dest, int32_t destCapacity, 246 int32_t options, 247 UParseError* parseError, 248 UErrorCode* status){ 249 250 if(status == NULL || U_FAILURE(*status)){ 251 return 0; 252 } 253 if((src == NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ 254 *status = U_ILLEGAL_ARGUMENT_ERROR; 255 return 0; 256 } 257 UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE]; 258 //initialize pointers to stack buffers 259 UChar *b1 = b1Stack, *b2 = b2Stack; 260 int32_t b1Len=0, b2Len=0, 261 b1Capacity = MAX_LABEL_BUFFER_SIZE, 262 b2Capacity = MAX_LABEL_BUFFER_SIZE , 263 reqLength=0; 264 265 //get the options 266 UBool allowUnassigned = (UBool)((options & IDNAREF_ALLOW_UNASSIGNED) != 0); 267 UBool useSTD3ASCIIRules = (UBool)((options & IDNAREF_USE_STD3_RULES) != 0); 268 269 UBool* caseFlags = NULL; 270 271 // assume the source contains all ascii codepoints 272 UBool srcIsASCII = TRUE; 273 // assume the source contains all LDH codepoints 274 UBool srcIsLDH = TRUE; 275 int32_t j=0; 276 277 if(srcLength == -1){ 278 srcLength = u_strlen(src); 279 } 280 281 // step 1 282 for( j=0;j<srcLength;j++){ 283 if(src[j] > 0x7F){ 284 srcIsASCII = FALSE; 285 } 286 b1[b1Len++] = src[j]; 287 } 288 289 NamePrepTransform* prep = TestIDNA::getInstance(*status); 290 if(U_FAILURE(*status)){ 291 goto CLEANUP; 292 } 293 294 // step 2 is performed only if the source contains non ASCII 295 if (!srcIsASCII) { 296 b1Len = prep->process(src,srcLength,b1, b1Capacity,allowUnassigned,parseError,*status); 297 298 if(*status == U_BUFFER_OVERFLOW_ERROR){ 299 // redo processing of string 300 /* we do not have enough room so grow the buffer*/ 301 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); 302 if(b1==NULL){ 303 *status = U_MEMORY_ALLOCATION_ERROR; 304 goto CLEANUP; 305 } 306 307 *status = U_ZERO_ERROR; // reset error 308 309 b1Len = prep->process(src,srcLength,b1, b1Len,allowUnassigned, parseError, *status); 310 } 311 // error bail out 312 if(U_FAILURE(*status)){ 313 goto CLEANUP; 314 } 315 } 316 317 if(b1Len == 0){ 318 *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR; 319 goto CLEANUP; 320 } 321 322 srcIsASCII = TRUE; 323 // step 3 & 4 324 for( j=0;j<b1Len;j++){ 325 if(b1[j] > 0x7F){// check if output of usprep_prepare is all ASCII 326 srcIsASCII = FALSE; 327 }else if(prep->isLDHChar(b1[j])==FALSE){ // if the char is in ASCII range verify that it is an LDH character{ 328 srcIsLDH = FALSE; 329 } 330 } 331 332 if(useSTD3ASCIIRules == TRUE){ 333 // verify 3a and 3b 334 if( srcIsLDH == FALSE /* source contains some non-LDH characters */ 335 || b1[0] == HYPHEN || b1[b1Len-1] == HYPHEN){ 336 *status = U_IDNA_STD3_ASCII_RULES_ERROR; 337 goto CLEANUP; 338 } 339 } 340 if(srcIsASCII){ 341 if(b1Len <= destCapacity){ 342 uprv_memmove(dest, b1, b1Len * U_SIZEOF_UCHAR); 343 reqLength = b1Len; 344 }else{ 345 reqLength = b1Len; 346 goto CLEANUP; 347 } 348 }else{ 349 // step 5 : verify the sequence does not begin with ACE prefix 350 if(!startsWithPrefix(b1,b1Len)){ 351 352 //step 6: encode the sequence with punycode 353 //caseFlags = (UBool*) uprv_malloc(b1Len * sizeof(UBool)); 354 355 b2Len = convertToPuny(b1,b1Len, b2,b2Capacity,*status); 356 //b2Len = u_strToPunycode(b2,b2Capacity,b1,b1Len, caseFlags, status); 357 if(*status == U_BUFFER_OVERFLOW_ERROR){ 358 // redo processing of string 359 /* we do not have enough room so grow the buffer*/ 360 b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR); 361 if(b2 == NULL){ 362 *status = U_MEMORY_ALLOCATION_ERROR; 363 goto CLEANUP; 364 } 365 366 *status = U_ZERO_ERROR; // reset error 367 368 b2Len = convertToPuny(b1, b1Len, b2, b2Len, *status); 369 //b2Len = u_strToPunycode(b2,b2Len,b1,b1Len, caseFlags, status); 370 371 } 372 //error bail out 373 if(U_FAILURE(*status)){ 374 goto CLEANUP; 375 } 376 reqLength = b2Len+ACE_PREFIX_LENGTH; 377 378 if(reqLength > destCapacity){ 379 *status = U_BUFFER_OVERFLOW_ERROR; 380 goto CLEANUP; 381 } 382 //Step 7: prepend the ACE prefix 383 uprv_memcpy(dest,ACE_PREFIX,ACE_PREFIX_LENGTH * U_SIZEOF_UCHAR); 384 //Step 6: copy the contents in b2 into dest 385 uprv_memcpy(dest+ACE_PREFIX_LENGTH, b2, b2Len * U_SIZEOF_UCHAR); 386 387 }else{ 388 *status = U_IDNA_ACE_PREFIX_ERROR; 389 goto CLEANUP; 390 } 391 } 392 393 if(reqLength > MAX_LABEL_LENGTH){ 394 *status = U_IDNA_LABEL_TOO_LONG_ERROR; 395 } 396 397 CLEANUP: 398 if(b1 != b1Stack){ 399 uprv_free(b1); 400 } 401 if(b2 != b2Stack){ 402 uprv_free(b2); 403 } 404 uprv_free(caseFlags); 405 406 // delete prep; 407 408 return u_terminateUChars(dest, destCapacity, reqLength, status); 409 } 410 411 412 U_CFUNC int32_t U_EXPORT2 413 idnaref_toUnicode(const UChar* src, int32_t srcLength, 414 UChar* dest, int32_t destCapacity, 415 int32_t options, 416 UParseError* parseError, 417 UErrorCode* status){ 418 419 if(status == NULL || U_FAILURE(*status)){ 420 return 0; 421 } 422 if((src == NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ 423 *status = U_ILLEGAL_ARGUMENT_ERROR; 424 return 0; 425 } 426 427 428 429 UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE], b3Stack[MAX_LABEL_BUFFER_SIZE]; 430 431 //initialize pointers to stack buffers 432 UChar *b1 = b1Stack, *b2 = b2Stack, *b1Prime=NULL, *b3=b3Stack; 433 int32_t b1Len, b2Len, b1PrimeLen, b3Len, 434 b1Capacity = MAX_LABEL_BUFFER_SIZE, 435 b2Capacity = MAX_LABEL_BUFFER_SIZE, 436 b3Capacity = MAX_LABEL_BUFFER_SIZE, 437 reqLength=0; 438 // UParseError parseError; 439 440 NamePrepTransform* prep = TestIDNA::getInstance(*status); 441 b1Len = 0; 442 UBool* caseFlags = NULL; 443 444 //get the options 445 UBool allowUnassigned = (UBool)((options & IDNAREF_ALLOW_UNASSIGNED) != 0); 446 UBool useSTD3ASCIIRules = (UBool)((options & IDNAREF_USE_STD3_RULES) != 0); 447 448 UBool srcIsASCII = TRUE; 449 UBool srcIsLDH = TRUE; 450 int32_t failPos =0; 451 452 if(U_FAILURE(*status)){ 453 goto CLEANUP; 454 } 455 // step 1: find out if all the codepoints in src are ASCII 456 if(srcLength==-1){ 457 srcLength = 0; 458 for(;src[srcLength]!=0;){ 459 if(src[srcLength]> 0x7f){ 460 srcIsASCII = FALSE; 461 }if(prep->isLDHChar(src[srcLength])==FALSE){ 462 // here we do not assemble surrogates 463 // since we know that LDH code points 464 // are in the ASCII range only 465 srcIsLDH = FALSE; 466 failPos = srcLength; 467 } 468 srcLength++; 469 } 470 }else{ 471 for(int32_t j=0; j<srcLength; j++){ 472 if(src[j]> 0x7f){ 473 srcIsASCII = FALSE; 474 }else if(prep->isLDHChar(src[j])==FALSE){ 475 // here we do not assemble surrogates 476 // since we know that LDH code points 477 // are in the ASCII range only 478 srcIsLDH = FALSE; 479 failPos = j; 480 } 481 } 482 } 483 484 if(srcIsASCII == FALSE){ 485 // step 2: process the string 486 b1Len = prep->process(src,srcLength,b1,b1Capacity,allowUnassigned, parseError, *status); 487 if(*status == U_BUFFER_OVERFLOW_ERROR){ 488 // redo processing of string 489 /* we do not have enough room so grow the buffer*/ 490 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); 491 if(b1==NULL){ 492 *status = U_MEMORY_ALLOCATION_ERROR; 493 goto CLEANUP; 494 } 495 496 *status = U_ZERO_ERROR; // reset error 497 498 b1Len = prep->process(src,srcLength,b1, b1Len,allowUnassigned, parseError, *status); 499 } 500 //bail out on error 501 if(U_FAILURE(*status)){ 502 goto CLEANUP; 503 } 504 }else{ 505 506 // copy everything to b1 507 if(srcLength < b1Capacity){ 508 uprv_memmove(b1,src, srcLength * U_SIZEOF_UCHAR); 509 }else{ 510 /* we do not have enough room so grow the buffer*/ 511 b1 = (UChar*) uprv_malloc(srcLength * U_SIZEOF_UCHAR); 512 if(b1==NULL){ 513 *status = U_MEMORY_ALLOCATION_ERROR; 514 goto CLEANUP; 515 } 516 uprv_memmove(b1,src, srcLength * U_SIZEOF_UCHAR); 517 } 518 b1Len = srcLength; 519 } 520 //step 3: verify ACE Prefix 521 if(startsWithPrefix(src,srcLength)){ 522 523 //step 4: Remove the ACE Prefix 524 b1Prime = b1 + ACE_PREFIX_LENGTH; 525 b1PrimeLen = b1Len - ACE_PREFIX_LENGTH; 526 527 //step 5: Decode using punycode 528 b2Len = convertFromPuny(b1Prime,b1PrimeLen, b2, b2Capacity, *status); 529 //b2Len = u_strFromPunycode(b2, b2Capacity,b1Prime,b1PrimeLen, caseFlags, status); 530 531 if(*status == U_BUFFER_OVERFLOW_ERROR){ 532 // redo processing of string 533 /* we do not have enough room so grow the buffer*/ 534 b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR); 535 if(b2==NULL){ 536 *status = U_MEMORY_ALLOCATION_ERROR; 537 goto CLEANUP; 538 } 539 540 *status = U_ZERO_ERROR; // reset error 541 542 b2Len = convertFromPuny(b1Prime,b1PrimeLen, b2, b2Len, *status); 543 //b2Len = u_strFromPunycode(b2, b2Len,b1Prime,b1PrimeLen,caseFlags, status); 544 } 545 546 547 //step 6:Apply toASCII 548 b3Len = idnaref_toASCII(b2,b2Len,b3,b3Capacity,options,parseError, status); 549 550 if(*status == U_BUFFER_OVERFLOW_ERROR){ 551 // redo processing of string 552 /* we do not have enough room so grow the buffer*/ 553 b3 = (UChar*) uprv_malloc(b3Len * U_SIZEOF_UCHAR); 554 if(b3==NULL){ 555 *status = U_MEMORY_ALLOCATION_ERROR; 556 goto CLEANUP; 557 } 558 559 *status = U_ZERO_ERROR; // reset error 560 561 b3Len = idnaref_toASCII(b2,b2Len,b3,b3Len, options, parseError, status); 562 563 } 564 //bail out on error 565 if(U_FAILURE(*status)){ 566 goto CLEANUP; 567 } 568 569 //step 7: verify 570 if(compareCaseInsensitiveASCII(b1, b1Len, b3, b3Len) !=0){ 571 *status = U_IDNA_VERIFICATION_ERROR; 572 goto CLEANUP; 573 } 574 575 //step 8: return output of step 5 576 reqLength = b2Len; 577 if(b2Len <= destCapacity) { 578 uprv_memmove(dest, b2, b2Len * U_SIZEOF_UCHAR); 579 } 580 }else{ 581 // verify that STD3 ASCII rules are satisfied 582 if(useSTD3ASCIIRules == TRUE){ 583 if( srcIsLDH == FALSE /* source contains some non-LDH characters */ 584 || src[0] == HYPHEN || src[srcLength-1] == HYPHEN){ 585 *status = U_IDNA_STD3_ASCII_RULES_ERROR; 586 587 /* populate the parseError struct */ 588 if(srcIsLDH==FALSE){ 589 // failPos is always set the index of failure 590 uprv_syntaxError(src,failPos, srcLength,parseError); 591 }else if(src[0] == HYPHEN){ 592 // fail position is 0 593 uprv_syntaxError(src,0,srcLength,parseError); 594 }else{ 595 // the last index in the source is always length-1 596 uprv_syntaxError(src, (srcLength>0) ? srcLength-1 : srcLength, srcLength,parseError); 597 } 598 599 goto CLEANUP; 600 } 601 } 602 //copy the source to destination 603 if(srcLength <= destCapacity){ 604 uprv_memmove(dest,src,srcLength * U_SIZEOF_UCHAR); 605 } 606 reqLength = srcLength; 607 } 608 609 CLEANUP: 610 611 if(b1 != b1Stack){ 612 uprv_free(b1); 613 } 614 if(b2 != b2Stack){ 615 uprv_free(b2); 616 } 617 uprv_free(caseFlags); 618 619 // The RFC states that 620 // <quote> 621 // ToUnicode never fails. If any step fails, then the original input 622 // is returned immediately in that step. 623 // </quote> 624 // So if any step fails lets copy source to destination 625 if(U_FAILURE(*status)){ 626 //copy the source to destination 627 if(dest && srcLength <= destCapacity){ 628 if(srcLength == -1) { 629 uprv_memmove(dest,src,u_strlen(src)* U_SIZEOF_UCHAR); 630 } else { 631 uprv_memmove(dest,src,srcLength * U_SIZEOF_UCHAR); 632 } 633 } 634 reqLength = srcLength; 635 *status = U_ZERO_ERROR; 636 } 637 return u_terminateUChars(dest, destCapacity, reqLength, status); 638 } 639 640 641 static int32_t 642 getNextSeparator(UChar *src,int32_t srcLength,NamePrepTransform* prep, 643 UChar **limit, 644 UBool *done, 645 UErrorCode *status){ 646 if(srcLength == -1){ 647 int32_t i; 648 for(i=0 ; ;i++){ 649 if(src[i] == 0){ 650 *limit = src + i; // point to null 651 *done = TRUE; 652 return i; 653 } 654 if(prep->isLabelSeparator(src[i],*status)){ 655 *limit = src + (i+1); // go past the delimiter 656 return i; 657 658 } 659 } 660 }else{ 661 int32_t i; 662 for(i=0;i<srcLength;i++){ 663 if(prep->isLabelSeparator(src[i],*status)){ 664 *limit = src + (i+1); // go past the delimiter 665 return i; 666 } 667 } 668 // we have not found the delimiter 669 if(i==srcLength){ 670 *limit = src+srcLength; 671 *done = TRUE; 672 } 673 return i; 674 } 675 } 676 677 U_CFUNC int32_t U_EXPORT2 678 idnaref_IDNToASCII( const UChar* src, int32_t srcLength, 679 UChar* dest, int32_t destCapacity, 680 int32_t options, 681 UParseError* parseError, 682 UErrorCode* status){ 683 684 if(status == NULL || U_FAILURE(*status)){ 685 return 0; 686 } 687 if((src == NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ 688 *status = U_ILLEGAL_ARGUMENT_ERROR; 689 return 0; 690 } 691 692 int32_t reqLength = 0; 693 // UParseError parseError; 694 695 NamePrepTransform* prep = TestIDNA::getInstance(*status); 696 697 //initialize pointers to stack buffers 698 UChar b1Stack[MAX_LABEL_BUFFER_SIZE]; 699 UChar *b1 = b1Stack; 700 int32_t b1Len, labelLen; 701 UChar* delimiter = (UChar*)src; 702 UChar* labelStart = (UChar*)src; 703 int32_t remainingLen = srcLength; 704 int32_t b1Capacity = MAX_LABEL_BUFFER_SIZE; 705 706 //get the options 707 // UBool allowUnassigned = (UBool)((options & IDNAREF_ALLOW_UNASSIGNED) != 0); 708 // UBool useSTD3ASCIIRules = (UBool)((options & IDNAREF_USE_STD3_RULES) != 0); 709 UBool done = FALSE; 710 711 if(U_FAILURE(*status)){ 712 goto CLEANUP; 713 } 714 715 716 if(srcLength == -1){ 717 for(;;){ 718 719 if(*delimiter == 0){ 720 break; 721 } 722 723 labelLen = getNextSeparator(labelStart, -1, prep, &delimiter, &done, status); 724 b1Len = 0; 725 if(!(labelLen==0 && done)){// make sure this is not a root label separator. 726 727 b1Len = idnaref_toASCII(labelStart, labelLen, b1, b1Capacity, 728 options, parseError, status); 729 730 if(*status == U_BUFFER_OVERFLOW_ERROR){ 731 // redo processing of string 732 /* we do not have enough room so grow the buffer*/ 733 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); 734 if(b1==NULL){ 735 *status = U_MEMORY_ALLOCATION_ERROR; 736 goto CLEANUP; 737 } 738 739 *status = U_ZERO_ERROR; // reset error 740 741 b1Len = idnaref_toASCII(labelStart, labelLen, b1, b1Len, 742 options, parseError, status); 743 744 } 745 } 746 747 if(U_FAILURE(*status)){ 748 goto CLEANUP; 749 } 750 int32_t tempLen = (reqLength + b1Len ); 751 // copy to dest 752 if( tempLen< destCapacity){ 753 uprv_memmove(dest+reqLength, b1, b1Len * U_SIZEOF_UCHAR); 754 } 755 756 reqLength = tempLen; 757 758 // add the label separator 759 if(done == FALSE){ 760 if(reqLength < destCapacity){ 761 dest[reqLength] = FULL_STOP; 762 } 763 reqLength++; 764 } 765 766 labelStart = delimiter; 767 } 768 }else{ 769 for(;;){ 770 771 if(delimiter == src+srcLength){ 772 break; 773 } 774 775 labelLen = getNextSeparator(labelStart, remainingLen, prep, &delimiter, &done, status); 776 777 b1Len = idnaref_toASCII(labelStart, labelLen, b1, b1Capacity, 778 options,parseError, status); 779 780 if(*status == U_BUFFER_OVERFLOW_ERROR){ 781 // redo processing of string 782 /* we do not have enough room so grow the buffer*/ 783 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); 784 if(b1==NULL){ 785 *status = U_MEMORY_ALLOCATION_ERROR; 786 goto CLEANUP; 787 } 788 789 *status = U_ZERO_ERROR; // reset error 790 791 b1Len = idnaref_toASCII(labelStart, labelLen, b1, b1Len, 792 options, parseError, status); 793 794 } 795 796 if(U_FAILURE(*status)){ 797 goto CLEANUP; 798 } 799 int32_t tempLen = (reqLength + b1Len ); 800 // copy to dest 801 if( tempLen< destCapacity){ 802 uprv_memmove(dest+reqLength, b1, b1Len * U_SIZEOF_UCHAR); 803 } 804 805 reqLength = tempLen; 806 807 // add the label separator 808 if(done == FALSE){ 809 if(reqLength < destCapacity){ 810 dest[reqLength] = FULL_STOP; 811 } 812 reqLength++; 813 } 814 815 labelStart = delimiter; 816 remainingLen = srcLength - (delimiter - src); 817 } 818 } 819 820 821 CLEANUP: 822 823 if(b1 != b1Stack){ 824 uprv_free(b1); 825 } 826 827 // delete prep; 828 829 return u_terminateUChars(dest, destCapacity, reqLength, status); 830 } 831 832 U_CFUNC int32_t U_EXPORT2 833 idnaref_IDNToUnicode( const UChar* src, int32_t srcLength, 834 UChar* dest, int32_t destCapacity, 835 int32_t options, 836 UParseError* parseError, 837 UErrorCode* status){ 838 839 if(status == NULL || U_FAILURE(*status)){ 840 return 0; 841 } 842 if((src == NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ 843 *status = U_ILLEGAL_ARGUMENT_ERROR; 844 return 0; 845 } 846 847 int32_t reqLength = 0; 848 849 UBool done = FALSE; 850 851 NamePrepTransform* prep = TestIDNA::getInstance(*status); 852 853 //initialize pointers to stack buffers 854 UChar b1Stack[MAX_LABEL_BUFFER_SIZE]; 855 UChar *b1 = b1Stack; 856 int32_t b1Len, labelLen; 857 UChar* delimiter = (UChar*)src; 858 UChar* labelStart = (UChar*)src; 859 int32_t remainingLen = srcLength; 860 int32_t b1Capacity = MAX_LABEL_BUFFER_SIZE; 861 862 //get the options 863 // UBool allowUnassigned = (UBool)((options & IDNAREF_ALLOW_UNASSIGNED) != 0); 864 // UBool useSTD3ASCIIRules = (UBool)((options & IDNAREF_USE_STD3_RULES) != 0); 865 866 if(U_FAILURE(*status)){ 867 goto CLEANUP; 868 } 869 870 if(srcLength == -1){ 871 for(;;){ 872 873 if(*delimiter == 0){ 874 break; 875 } 876 877 labelLen = getNextSeparator(labelStart, -1, prep, &delimiter, &done, status); 878 879 if(labelLen==0 && done==FALSE){ 880 *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR; 881 } 882 b1Len = idnaref_toUnicode(labelStart, labelLen, b1, b1Capacity, 883 options, parseError, status); 884 885 if(*status == U_BUFFER_OVERFLOW_ERROR){ 886 // redo processing of string 887 /* we do not have enough room so grow the buffer*/ 888 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); 889 if(b1==NULL){ 890 *status = U_MEMORY_ALLOCATION_ERROR; 891 goto CLEANUP; 892 } 893 894 *status = U_ZERO_ERROR; // reset error 895 896 b1Len = idnaref_toUnicode( labelStart, labelLen, b1, b1Len, 897 options, parseError, status); 898 899 } 900 901 if(U_FAILURE(*status)){ 902 goto CLEANUP; 903 } 904 int32_t tempLen = (reqLength + b1Len ); 905 // copy to dest 906 if( tempLen< destCapacity){ 907 uprv_memmove(dest+reqLength, b1, b1Len * U_SIZEOF_UCHAR); 908 } 909 910 reqLength = tempLen; 911 // add the label separator 912 if(done == FALSE){ 913 if(reqLength < destCapacity){ 914 dest[reqLength] = FULL_STOP; 915 } 916 reqLength++; 917 } 918 919 labelStart = delimiter; 920 } 921 }else{ 922 for(;;){ 923 924 if(delimiter == src+srcLength){ 925 break; 926 } 927 928 labelLen = getNextSeparator(labelStart, remainingLen, prep, &delimiter, &done, status); 929 930 if(labelLen==0 && done==FALSE){ 931 *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR; 932 } 933 934 b1Len = idnaref_toUnicode( labelStart,labelLen, b1, b1Capacity, 935 options, parseError, status); 936 937 if(*status == U_BUFFER_OVERFLOW_ERROR){ 938 // redo processing of string 939 /* we do not have enough room so grow the buffer*/ 940 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); 941 if(b1==NULL){ 942 *status = U_MEMORY_ALLOCATION_ERROR; 943 goto CLEANUP; 944 } 945 946 *status = U_ZERO_ERROR; // reset error 947 948 b1Len = idnaref_toUnicode( labelStart, labelLen, b1, b1Len, 949 options, parseError, status); 950 951 } 952 953 if(U_FAILURE(*status)){ 954 goto CLEANUP; 955 } 956 int32_t tempLen = (reqLength + b1Len ); 957 // copy to dest 958 if( tempLen< destCapacity){ 959 uprv_memmove(dest+reqLength, b1, b1Len * U_SIZEOF_UCHAR); 960 } 961 962 reqLength = tempLen; 963 964 // add the label separator 965 if(done == FALSE){ 966 if(reqLength < destCapacity){ 967 dest[reqLength] = FULL_STOP; 968 } 969 reqLength++; 970 } 971 972 labelStart = delimiter; 973 remainingLen = srcLength - (delimiter - src); 974 } 975 } 976 977 CLEANUP: 978 979 if(b1 != b1Stack){ 980 uprv_free(b1); 981 } 982 983 // delete prep; 984 985 return u_terminateUChars(dest, destCapacity, reqLength, status); 986 } 987 988 U_CFUNC int32_t U_EXPORT2 989 idnaref_compare( const UChar *s1, int32_t length1, 990 const UChar *s2, int32_t length2, 991 int32_t options, 992 UErrorCode* status){ 993 994 if(status == NULL || U_FAILURE(*status)){ 995 return -1; 996 } 997 998 UChar b1Stack[MAX_IDN_BUFFER_SIZE], b2Stack[MAX_IDN_BUFFER_SIZE]; 999 UChar *b1 = b1Stack, *b2 = b2Stack; 1000 int32_t b1Len, b2Len, b1Capacity = MAX_IDN_BUFFER_SIZE, b2Capacity = MAX_IDN_BUFFER_SIZE; 1001 int32_t result = -1; 1002 1003 UParseError parseError; 1004 1005 b1Len = idnaref_IDNToASCII(s1, length1, b1, b1Capacity, options, &parseError, status); 1006 if(*status == U_BUFFER_OVERFLOW_ERROR){ 1007 // redo processing of string 1008 /* we do not have enough room so grow the buffer*/ 1009 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); 1010 if(b1==NULL){ 1011 *status = U_MEMORY_ALLOCATION_ERROR; 1012 goto CLEANUP; 1013 } 1014 1015 *status = U_ZERO_ERROR; // reset error 1016 1017 b1Len = idnaref_IDNToASCII(s1,length1,b1,b1Len, options, &parseError, status); 1018 1019 } 1020 1021 b2Len = idnaref_IDNToASCII(s2,length2,b2,b2Capacity,options, &parseError, status); 1022 if(*status == U_BUFFER_OVERFLOW_ERROR){ 1023 // redo processing of string 1024 /* we do not have enough room so grow the buffer*/ 1025 b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR); 1026 if(b2==NULL){ 1027 *status = U_MEMORY_ALLOCATION_ERROR; 1028 goto CLEANUP; 1029 } 1030 1031 *status = U_ZERO_ERROR; // reset error 1032 1033 b2Len = idnaref_IDNToASCII(s2,length2,b2,b2Len,options, &parseError, status); 1034 1035 } 1036 // when toASCII is applied all label separators are replaced with FULL_STOP 1037 result = compareCaseInsensitiveASCII(b1,b1Len,b2,b2Len); 1038 1039 CLEANUP: 1040 if(b1 != b1Stack){ 1041 uprv_free(b1); 1042 } 1043 1044 if(b2 != b2Stack){ 1045 uprv_free(b2); 1046 } 1047 1048 return result; 1049 } 1050 #endif /* #if !UCONFIG_NO_IDNA */ 1051