Home | History | Annotate | Download | only in Misc
      1 /*
      2  * Copyright (C) 2005, 2007, 2008, 2009 Apple Inc. All rights reserved.
      3  * Copyright (C) 2006 Alexey Proskuryakov (ap (at) nypop.com)
      4  *
      5  * Redistribution and use in source and binary forms, with or without
      6  * modification, are permitted provided that the following conditions
      7  * are met:
      8  *
      9  * 1.  Redistributions of source code must retain the above copyright
     10  *     notice, this list of conditions and the following disclaimer.
     11  * 2.  Redistributions in binary form must reproduce the above copyright
     12  *     notice, this list of conditions and the following disclaimer in the
     13  *     documentation and/or other materials provided with the distribution.
     14  * 3.  Neither the name of Apple Computer, Inc. ("Apple") nor the names of
     15  *     its contributors may be used to endorse or promote products derived
     16  *     from this software without specific prior written permission.
     17  *
     18  * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
     19  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     20  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     21  * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
     22  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
     23  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     24  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
     25  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     28  */
     29 
     30 #import "WebNSURLExtras.h"
     31 
     32 #import "WebKitNSStringExtras.h"
     33 #import "WebLocalizableStrings.h"
     34 #import "WebNSDataExtras.h"
     35 #import "WebNSObjectExtras.h"
     36 #import "WebSystemInterface.h"
     37 #import <Foundation/NSURLRequest.h>
     38 #import <WebCore/KURL.h>
     39 #import <WebCore/LoaderNSURLExtras.h>
     40 #import <WebKitSystemInterface.h>
     41 #import <wtf/Assertions.h>
     42 #import <unicode/uchar.h>
     43 #import <unicode/uidna.h>
     44 #import <unicode/uscript.h>
     45 
     46 using namespace WebCore;
     47 using namespace WTF;
     48 
     49 typedef void (* StringRangeApplierFunction)(NSString *string, NSRange range, void *context);
     50 
     51 // Needs to be big enough to hold an IDN-encoded name.
     52 // For host names bigger than this, we won't do IDN encoding, which is almost certainly OK.
     53 #define HOST_NAME_BUFFER_LENGTH 2048
     54 
     55 #define URL_BYTES_BUFFER_LENGTH 2048
     56 
     57 static pthread_once_t IDNScriptWhiteListFileRead = PTHREAD_ONCE_INIT;
     58 static uint32_t IDNScriptWhiteList[(USCRIPT_CODE_LIMIT + 31) / 32];
     59 
     60 static inline BOOL isLookalikeCharacter(int charCode)
     61 {
     62 // FIXME: Move this code down into WebCore so it can be shared with other platforms.
     63 
     64 // This function treats the following as unsafe, lookalike characters:
     65 // any non-printable character, any character considered as whitespace that isn't already converted to a space by ICU,
     66 // and any ignorable character.
     67 
     68 // We also considered the characters in Mozilla's blacklist (http://kb.mozillazine.org/Network.IDN.blacklist_chars),
     69 // and included all of these characters that ICU can encode.
     70 
     71     if (!u_isprint(charCode) || u_isUWhiteSpace(charCode) || u_hasBinaryProperty(charCode, UCHAR_DEFAULT_IGNORABLE_CODE_POINT))
     72         return YES;
     73 
     74     switch (charCode) {
     75         case 0x00ED: /* LATIN SMALL LETTER I WITH ACUTE */
     76         case 0x01C3: /* LATIN LETTER RETROFLEX CLICK */
     77         case 0x0251: /* LATIN SMALL LETTER ALPHA */
     78         case 0x0261: /* LATIN SMALL LETTER SCRIPT G */
     79         case 0x0337: /* COMBINING SHORT SOLIDUS OVERLAY */
     80         case 0x0338: /* COMBINING LONG SOLIDUS OVERLAY */
     81         case 0x05B4: /* HEBREW POINT HIRIQ */
     82         case 0x05BC: /* HEBREW POINT DAGESH OR MAPIQ */
     83         case 0x05C3: /* HEBREW PUNCTUATION SOF PASUQ */
     84         case 0x05F4: /* HEBREW PUNCTUATION GERSHAYIM */
     85         case 0x0660: /* ARABIC INDIC DIGIT ZERO */
     86         case 0x06D4: /* ARABIC FULL STOP */
     87         case 0x06F0: /* EXTENDED ARABIC INDIC DIGIT ZERO */
     88         case 0x2027: /* HYPHENATION POINT */
     89         case 0x2039: /* SINGLE LEFT-POINTING ANGLE QUOTATION MARK */
     90         case 0x203A: /* SINGLE RIGHT-POINTING ANGLE QUOTATION MARK */
     91         case 0x2044: /* FRACTION SLASH */
     92         case 0x2215: /* DIVISION SLASH */
     93         case 0x2216: /* SET MINUS */
     94         case 0x233F: /* APL FUNCTIONAL SYMBOL SLASH BAR */
     95         case 0x23AE: /* INTEGRAL EXTENSION */
     96         case 0x244A: /* OCR DOUBLE BACKSLASH */
     97         case 0x2571: /* BOX DRAWINGS LIGHT DIAGONAL UPPER RIGHT TO LOWER LEFT */
     98         case 0x2572: /* BOX DRAWINGS LIGHT DIAGONAL UPPER LEFT TO LOWER RIGHT */
     99         case 0x29F8: /* BIG SOLIDUS */
    100         case 0x29f6: /* SOLIDUS WITH OVERBAR */
    101         case 0x2AFB: /* TRIPLE SOLIDUS BINARY RELATION */
    102         case 0x2AFD: /* DOUBLE SOLIDUS OPERATOR */
    103         case 0x3008: /* LEFT ANGLE BRACKET */
    104         case 0x3014: /* LEFT TORTOISE SHELL BRACKET */
    105         case 0x3015: /* RIGHT TORTOISE SHELL BRACKET */
    106         case 0x3033: /* VERTICAL KANA REPEAT MARK UPPER HALF */
    107         case 0x3035: /* VERTICAL KANA REPEAT MARK LOWER HALF */
    108         case 0x321D: /* PARENTHESIZED KOREAN CHARACTER OJEON */
    109         case 0x321E: /* PARENTHESIZED KOREAN CHARACTER O HU */
    110         case 0x33DF: /* SQUARE A OVER M */
    111         case 0xFE14: /* PRESENTATION FORM FOR VERTICAL SEMICOLON */
    112         case 0xFE15: /* PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK */
    113         case 0xFE3F: /* PRESENTATION FORM FOR VERTICAL LEFT ANGLE BRACKET */
    114         case 0xFE5D: /* SMALL LEFT TORTOISE SHELL BRACKET */
    115         case 0xFE5E: /* SMALL RIGHT TORTOISE SHELL BRACKET */
    116             return YES;
    117         default:
    118             return NO;
    119     }
    120 }
    121 
    122 static char hexDigit(int i)
    123 {
    124     if (i < 0 || i > 16) {
    125         LOG_ERROR("illegal hex digit");
    126         return '0';
    127     }
    128     int h = i;
    129     if (h >= 10) {
    130         h = h - 10 + 'A';
    131     }
    132     else {
    133         h += '0';
    134     }
    135     return h;
    136 }
    137 
    138 static BOOL isHexDigit(char c)
    139 {
    140     return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f');
    141 }
    142 
    143 static int hexDigitValue(char c)
    144 {
    145     if (c >= '0' && c <= '9') {
    146         return c - '0';
    147     }
    148     if (c >= 'A' && c <= 'F') {
    149         return c - 'A' + 10;
    150     }
    151     if (c >= 'a' && c <= 'f') {
    152         return c - 'a' + 10;
    153     }
    154     LOG_ERROR("illegal hex digit");
    155     return 0;
    156 }
    157 
    158 static void applyHostNameFunctionToMailToURLString(NSString *string, StringRangeApplierFunction f, void *context)
    159 {
    160     // In a mailto: URL, host names come after a '@' character and end with a '>' or ',' or '?' character.
    161     // Skip quoted strings so that characters in them don't confuse us.
    162     // When we find a '?' character, we are past the part of the URL that contains host names.
    163 
    164     static NSCharacterSet *hostNameOrStringStartCharacters;
    165     if (hostNameOrStringStartCharacters == nil) {
    166         hostNameOrStringStartCharacters = [NSCharacterSet characterSetWithCharactersInString:@"\"@?"];
    167         CFRetain(hostNameOrStringStartCharacters);
    168     }
    169     static NSCharacterSet *hostNameEndCharacters;
    170     if (hostNameEndCharacters == nil) {
    171         hostNameEndCharacters = [NSCharacterSet characterSetWithCharactersInString:@">,?"];
    172         CFRetain(hostNameEndCharacters);
    173     }
    174     static NSCharacterSet *quotedStringCharacters;
    175     if (quotedStringCharacters == nil) {
    176         quotedStringCharacters = [NSCharacterSet characterSetWithCharactersInString:@"\"\\"];
    177         CFRetain(quotedStringCharacters);
    178     }
    179 
    180     unsigned stringLength = [string length];
    181     NSRange remaining = NSMakeRange(0, stringLength);
    182 
    183     while (1) {
    184         // Find start of host name or of quoted string.
    185         NSRange hostNameOrStringStart = [string rangeOfCharacterFromSet:hostNameOrStringStartCharacters options:0 range:remaining];
    186         if (hostNameOrStringStart.location == NSNotFound) {
    187             return;
    188         }
    189         unichar c = [string characterAtIndex:hostNameOrStringStart.location];
    190         remaining.location = NSMaxRange(hostNameOrStringStart);
    191         remaining.length = stringLength - remaining.location;
    192 
    193         if (c == '?') {
    194             return;
    195         }
    196 
    197         if (c == '@') {
    198             // Find end of host name.
    199             unsigned hostNameStart = remaining.location;
    200             NSRange hostNameEnd = [string rangeOfCharacterFromSet:hostNameEndCharacters options:0 range:remaining];
    201             BOOL done;
    202             if (hostNameEnd.location == NSNotFound) {
    203                 hostNameEnd.location = stringLength;
    204                 done = YES;
    205             } else {
    206                 remaining.location = hostNameEnd.location;
    207                 remaining.length = stringLength - remaining.location;
    208                 done = NO;
    209             }
    210 
    211             // Process host name range.
    212             f(string, NSMakeRange(hostNameStart, hostNameEnd.location - hostNameStart), context);
    213 
    214             if (done) {
    215                 return;
    216             }
    217         } else {
    218             // Skip quoted string.
    219             ASSERT(c == '"');
    220             while (1) {
    221                 NSRange escapedCharacterOrStringEnd = [string rangeOfCharacterFromSet:quotedStringCharacters options:0 range:remaining];
    222                 if (escapedCharacterOrStringEnd.location == NSNotFound) {
    223                     return;
    224                 }
    225                 c = [string characterAtIndex:escapedCharacterOrStringEnd.location];
    226                 remaining.location = NSMaxRange(escapedCharacterOrStringEnd);
    227                 remaining.length = stringLength - remaining.location;
    228 
    229                 // If we are the end of the string, then break from the string loop back to the host name loop.
    230                 if (c == '"') {
    231                     break;
    232                 }
    233 
    234                 // Skip escaped character.
    235                 ASSERT(c == '\\');
    236                 if (remaining.length == 0) {
    237                     return;
    238                 }
    239                 remaining.location += 1;
    240                 remaining.length -= 1;
    241             }
    242         }
    243     }
    244 }
    245 
    246 static void applyHostNameFunctionToURLString(NSString *string, StringRangeApplierFunction f, void *context)
    247 {
    248     // Find hostnames. Too bad we can't use any real URL-parsing code to do this,
    249     // but we have to do it before doing all the %-escaping, and this is the only
    250     // code we have that parses mailto URLs anyway.
    251 
    252     // Maybe we should implement this using a character buffer instead?
    253 
    254     if ([string _webkit_hasCaseInsensitivePrefix:@"mailto:"]) {
    255         applyHostNameFunctionToMailToURLString(string, f, context);
    256         return;
    257     }
    258 
    259     // Find the host name in a hierarchical URL.
    260     // It comes after a "://" sequence, with scheme characters preceding.
    261     // If ends with the end of the string or a ":", "/", or a "?".
    262     // If there is a "@" character, the host part is just the part after the "@".
    263     NSRange separatorRange = [string rangeOfString:@"://"];
    264     if (separatorRange.location == NSNotFound) {
    265         return;
    266     }
    267 
    268     // Check that all characters before the :// are valid scheme characters.
    269     static NSCharacterSet *nonSchemeCharacters;
    270     if (nonSchemeCharacters == nil) {
    271         nonSchemeCharacters = [[NSCharacterSet characterSetWithCharactersInString:@"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+-."] invertedSet];
    272         CFRetain(nonSchemeCharacters);
    273     }
    274     if ([string rangeOfCharacterFromSet:nonSchemeCharacters options:0 range:NSMakeRange(0, separatorRange.location)].location != NSNotFound) {
    275         return;
    276     }
    277 
    278     unsigned stringLength = [string length];
    279 
    280     static NSCharacterSet *hostTerminators;
    281     if (hostTerminators == nil) {
    282         hostTerminators = [NSCharacterSet characterSetWithCharactersInString:@":/?#"];
    283         CFRetain(hostTerminators);
    284     }
    285 
    286     // Start after the separator.
    287     unsigned authorityStart = NSMaxRange(separatorRange);
    288 
    289     // Find terminating character.
    290     NSRange hostNameTerminator = [string rangeOfCharacterFromSet:hostTerminators options:0 range:NSMakeRange(authorityStart, stringLength - authorityStart)];
    291     unsigned hostNameEnd = hostNameTerminator.location == NSNotFound ? stringLength : hostNameTerminator.location;
    292 
    293     // Find "@" for the start of the host name.
    294     NSRange userInfoTerminator = [string rangeOfString:@"@" options:0 range:NSMakeRange(authorityStart, hostNameEnd - authorityStart)];
    295     unsigned hostNameStart = userInfoTerminator.location == NSNotFound ? authorityStart : NSMaxRange(userInfoTerminator);
    296 
    297     f(string, NSMakeRange(hostNameStart, hostNameEnd - hostNameStart), context);
    298 }
    299 
    300 @implementation NSURL (WebNSURLExtras)
    301 
    302 static void collectRangesThatNeedMapping(NSString *string, NSRange range, void *context, BOOL encode)
    303 {
    304     BOOL needsMapping = encode
    305         ? [string _web_hostNameNeedsEncodingWithRange:range]
    306         : [string _web_hostNameNeedsDecodingWithRange:range];
    307     if (!needsMapping) {
    308         return;
    309     }
    310 
    311     NSMutableArray **array = (NSMutableArray **)context;
    312     if (*array == nil) {
    313         *array = [[NSMutableArray alloc] init];
    314     }
    315 
    316     [*array addObject:[NSValue valueWithRange:range]];
    317 }
    318 
    319 static void collectRangesThatNeedEncoding(NSString *string, NSRange range, void *context)
    320 {
    321     return collectRangesThatNeedMapping(string, range, context, YES);
    322 }
    323 
    324 static void collectRangesThatNeedDecoding(NSString *string, NSRange range, void *context)
    325 {
    326     return collectRangesThatNeedMapping(string, range, context, NO);
    327 }
    328 
    329 static NSString *mapHostNames(NSString *string, BOOL encode)
    330 {
    331     // Generally, we want to optimize for the case where there is one host name that does not need mapping.
    332 
    333     if (encode && [string canBeConvertedToEncoding:NSASCIIStringEncoding])
    334         return string;
    335 
    336     // Make a list of ranges that actually need mapping.
    337     NSMutableArray *hostNameRanges = nil;
    338     StringRangeApplierFunction f = encode
    339         ? collectRangesThatNeedEncoding
    340         : collectRangesThatNeedDecoding;
    341     applyHostNameFunctionToURLString(string, f, &hostNameRanges);
    342     if (hostNameRanges == nil)
    343         return string;
    344 
    345     // Do the mapping.
    346     NSMutableString *mutableCopy = [string mutableCopy];
    347     unsigned i = [hostNameRanges count];
    348     while (i-- != 0) {
    349         NSRange hostNameRange = [[hostNameRanges objectAtIndex:i] rangeValue];
    350         NSString *mappedHostName = encode
    351             ? [string _web_encodeHostNameWithRange:hostNameRange]
    352             : [string _web_decodeHostNameWithRange:hostNameRange];
    353         [mutableCopy replaceCharactersInRange:hostNameRange withString:mappedHostName];
    354     }
    355     [hostNameRanges release];
    356     return [mutableCopy autorelease];
    357 }
    358 
    359 + (NSURL *)_web_URLWithUserTypedString:(NSString *)string relativeToURL:(NSURL *)URL
    360 {
    361     if (string == nil) {
    362         return nil;
    363     }
    364     string = mapHostNames([string _webkit_stringByTrimmingWhitespace], YES);
    365 
    366     NSData *userTypedData = [string dataUsingEncoding:NSUTF8StringEncoding];
    367     ASSERT(userTypedData);
    368 
    369     const UInt8 *inBytes = static_cast<const UInt8 *>([userTypedData bytes]);
    370     int inLength = [userTypedData length];
    371     if (inLength == 0) {
    372         return [NSURL URLWithString:@""];
    373     }
    374 
    375     char *outBytes = static_cast<char *>(malloc(inLength * 3)); // large enough to %-escape every character
    376     char *p = outBytes;
    377     int outLength = 0;
    378     int i;
    379     for (i = 0; i < inLength; i++) {
    380         UInt8 c = inBytes[i];
    381         if (c <= 0x20 || c >= 0x7f) {
    382             *p++ = '%';
    383             *p++ = hexDigit(c >> 4);
    384             *p++ = hexDigit(c & 0xf);
    385             outLength += 3;
    386         }
    387         else {
    388             *p++ = c;
    389             outLength++;
    390         }
    391     }
    392 
    393     NSData *data = [NSData dataWithBytesNoCopy:outBytes length:outLength]; // adopts outBytes
    394     return [self _web_URLWithData:data relativeToURL:URL];
    395 }
    396 
    397 + (NSURL *)_web_URLWithUserTypedString:(NSString *)string
    398 {
    399     return [self _web_URLWithUserTypedString:string relativeToURL:nil];
    400 }
    401 
    402 + (NSURL *)_web_URLWithDataAsString:(NSString *)string
    403 {
    404     if (string == nil) {
    405         return nil;
    406     }
    407     return [self _web_URLWithDataAsString:string relativeToURL:nil];
    408 }
    409 
    410 + (NSURL *)_web_URLWithDataAsString:(NSString *)string relativeToURL:(NSURL *)baseURL
    411 {
    412     if (string == nil) {
    413         return nil;
    414     }
    415     string = [string _webkit_stringByTrimmingWhitespace];
    416     NSData *data = [string dataUsingEncoding:NSISOLatin1StringEncoding];
    417     return [self _web_URLWithData:data relativeToURL:baseURL];
    418 }
    419 
    420 + (NSURL *)_web_URLWithData:(NSData *)data
    421 {
    422     return [NSURL _web_URLWithData:data relativeToURL:nil];
    423 }
    424 
    425 + (NSURL *)_web_URLWithData:(NSData *)data relativeToURL:(NSURL *)baseURL
    426 {
    427     if (data == nil)
    428         return nil;
    429 
    430     NSURL *result = nil;
    431     size_t length = [data length];
    432     if (length > 0) {
    433         // work around <rdar://4470771>: CFURLCreateAbsoluteURLWithBytes(.., TRUE) doesn't remove non-path components.
    434         baseURL = [baseURL _webkit_URLByRemovingResourceSpecifier];
    435 
    436         const UInt8 *bytes = static_cast<const UInt8*>([data bytes]);
    437         // NOTE: We use UTF-8 here since this encoding is used when computing strings when returning URL components
    438         // (e.g calls to NSURL -path). However, this function is not tolerant of illegal UTF-8 sequences, which
    439         // could either be a malformed string or bytes in a different encoding, like shift-jis, so we fall back
    440         // onto using ISO Latin 1 in those cases.
    441         result = WebCFAutorelease(CFURLCreateAbsoluteURLWithBytes(NULL, bytes, length, kCFStringEncodingUTF8, (CFURLRef)baseURL, YES));
    442         if (!result)
    443             result = WebCFAutorelease(CFURLCreateAbsoluteURLWithBytes(NULL, bytes, length, kCFStringEncodingISOLatin1, (CFURLRef)baseURL, YES));
    444     } else
    445         result = [NSURL URLWithString:@""];
    446 
    447     return result;
    448 }
    449 
    450 - (NSData *)_web_originalData
    451 {
    452     UInt8 *buffer = (UInt8 *)malloc(URL_BYTES_BUFFER_LENGTH);
    453     CFIndex bytesFilled = CFURLGetBytes((CFURLRef)self, buffer, URL_BYTES_BUFFER_LENGTH);
    454     if (bytesFilled == -1) {
    455         CFIndex bytesToAllocate = CFURLGetBytes((CFURLRef)self, NULL, 0);
    456         buffer = (UInt8 *)realloc(buffer, bytesToAllocate);
    457         bytesFilled = CFURLGetBytes((CFURLRef)self, buffer, bytesToAllocate);
    458         ASSERT(bytesFilled == bytesToAllocate);
    459     }
    460 
    461     // buffer is adopted by the NSData
    462     NSData *data = [NSData dataWithBytesNoCopy:buffer length:bytesFilled freeWhenDone:YES];
    463 
    464     NSURL *baseURL = (NSURL *)CFURLGetBaseURL((CFURLRef)self);
    465     if (baseURL)
    466         return [[NSURL _web_URLWithData:data relativeToURL:baseURL] _web_originalData];
    467     return data;
    468 }
    469 
    470 - (NSString *)_web_originalDataAsString
    471 {
    472     return [[[NSString alloc] initWithData:[self _web_originalData] encoding:NSISOLatin1StringEncoding] autorelease];
    473 }
    474 
    475 static CFStringRef createStringWithEscapedUnsafeCharacters(CFStringRef string)
    476 {
    477     CFIndex length = CFStringGetLength(string);
    478     Vector<UChar, 2048> sourceBuffer(length);
    479     CFStringGetCharacters(string, CFRangeMake(0, length), sourceBuffer.data());
    480 
    481     Vector<UChar, 2048> outBuffer;
    482 
    483     CFIndex i = 0;
    484     while (i < length) {
    485         UChar32 c;
    486         U16_NEXT(sourceBuffer, i, length, c)
    487 
    488         if (isLookalikeCharacter(c)) {
    489             uint8_t utf8Buffer[4];
    490             CFIndex offset = 0;
    491             UBool failure = false;
    492             U8_APPEND(utf8Buffer, offset, 4, c, failure)
    493             ASSERT(!failure);
    494 
    495             for (CFIndex j = 0; j < offset; ++j) {
    496                 outBuffer.append('%');
    497                 outBuffer.append(hexDigit(utf8Buffer[j] >> 4));
    498                 outBuffer.append(hexDigit(utf8Buffer[j] & 0xf));
    499             }
    500         } else {
    501             UChar utf16Buffer[2];
    502             CFIndex offset = 0;
    503             UBool failure = false;
    504             U16_APPEND(utf16Buffer, offset, 2, c, failure)
    505             ASSERT(!failure);
    506             for (CFIndex j = 0; j < offset; ++j)
    507                 outBuffer.append(utf16Buffer[j]);
    508         }
    509     }
    510 
    511     return CFStringCreateWithCharacters(NULL, outBuffer.data(), outBuffer.size());
    512 }
    513 
    514 - (NSString *)_web_userVisibleString
    515 {
    516     NSData *data = [self _web_originalData];
    517     const unsigned char *before = static_cast<const unsigned char*>([data bytes]);
    518     int length = [data length];
    519 
    520     bool needsHostNameDecoding = false;
    521 
    522     const unsigned char *p = before;
    523     int bufferLength = (length * 3) + 1;
    524     char *after = static_cast<char *>(malloc(bufferLength)); // large enough to %-escape every character
    525     char *q = after;
    526     int i;
    527     for (i = 0; i < length; i++) {
    528         unsigned char c = p[i];
    529         // unescape escape sequences that indicate bytes greater than 0x7f
    530         if (c == '%' && (i + 1 < length && isHexDigit(p[i + 1])) && i + 2 < length && isHexDigit(p[i + 2])) {
    531             unsigned char u = (hexDigitValue(p[i + 1]) << 4) | hexDigitValue(p[i + 2]);
    532             if (u > 0x7f) {
    533                 // unescape
    534                 *q++ = u;
    535             } else {
    536                 // do not unescape
    537                 *q++ = p[i];
    538                 *q++ = p[i + 1];
    539                 *q++ = p[i + 2];
    540             }
    541             i += 2;
    542         } else {
    543             *q++ = c;
    544 
    545             // Check for "xn--" in an efficient, non-case-sensitive, way.
    546             if (c == '-' && i >= 3 && !needsHostNameDecoding && (q[-4] | 0x20) == 'x' && (q[-3] | 0x20) == 'n' && q[-2] == '-')
    547                 needsHostNameDecoding = true;
    548         }
    549     }
    550     *q = '\0';
    551 
    552     // Check string to see if it can be converted to display using UTF-8
    553     NSString *result = [NSString stringWithUTF8String:after];
    554     if (!result) {
    555         // Could not convert to UTF-8.
    556         // Convert characters greater than 0x7f to escape sequences.
    557         // Shift current string to the end of the buffer
    558         // then we will copy back bytes to the start of the buffer
    559         // as we convert.
    560         int afterlength = q - after;
    561         char *p = after + bufferLength - afterlength - 1;
    562         memmove(p, after, afterlength + 1); // copies trailing '\0'
    563         char *q = after;
    564         while (*p) {
    565             unsigned char c = *p;
    566             if (c > 0x7f) {
    567                 *q++ = '%';
    568                 *q++ = hexDigit(c >> 4);
    569                 *q++ = hexDigit(c & 0xf);
    570             } else {
    571                 *q++ = *p;
    572             }
    573             p++;
    574         }
    575         *q = '\0';
    576         result = [NSString stringWithUTF8String:after];
    577     }
    578 
    579     free(after);
    580 
    581     result = mapHostNames(result, !needsHostNameDecoding);
    582     result = [result precomposedStringWithCanonicalMapping];
    583     return WebCFAutorelease(createStringWithEscapedUnsafeCharacters((CFStringRef)result));
    584 }
    585 
    586 - (BOOL)_web_isEmpty
    587 {
    588     if (!CFURLGetBaseURL((CFURLRef)self))
    589         return CFURLGetBytes((CFURLRef)self, NULL, 0) == 0;
    590     return [[self _web_originalData] length] == 0;
    591 }
    592 
    593 - (const char *)_web_URLCString
    594 {
    595     NSMutableData *data = [NSMutableData data];
    596     [data appendData:[self _web_originalData]];
    597     [data appendBytes:"\0" length:1];
    598     return (const char *)[data bytes];
    599  }
    600 
    601 - (NSURL *)_webkit_canonicalize
    602 {
    603     NSURLRequest *request = [[NSURLRequest alloc] initWithURL:self];
    604     Class concreteClass = WKNSURLProtocolClassForRequest(request);
    605     if (!concreteClass) {
    606         [request release];
    607         return self;
    608     }
    609 
    610     // This applies NSURL's concept of canonicalization, but not KURL's concept. It would
    611     // make sense to apply both, but when we tried that it caused a performance degradation
    612     // (see 5315926). It might make sense to apply only the KURL concept and not the NSURL
    613     // concept, but it's too risky to make that change for WebKit 3.0.
    614     NSURLRequest *newRequest = [concreteClass canonicalRequestForRequest:request];
    615     NSURL *newURL = [newRequest URL];
    616     NSURL *result = [[newURL retain] autorelease];
    617     [request release];
    618 
    619     return result;
    620 }
    621 
    622 - (NSURL *)_web_URLByTruncatingOneCharacterBeforeComponent:(CFURLComponentType)component
    623 {
    624     CFRange fragRg = CFURLGetByteRangeForComponent((CFURLRef)self, component, NULL);
    625     if (fragRg.location == kCFNotFound)
    626         return self;
    627 
    628     UInt8 *urlBytes, buffer[2048];
    629     CFIndex numBytes = CFURLGetBytes((CFURLRef)self, buffer, 2048);
    630     if (numBytes == -1) {
    631         numBytes = CFURLGetBytes((CFURLRef)self, NULL, 0);
    632         urlBytes = static_cast<UInt8*>(malloc(numBytes));
    633         CFURLGetBytes((CFURLRef)self, urlBytes, numBytes);
    634     } else
    635         urlBytes = buffer;
    636 
    637     NSURL *result = (NSURL *)CFMakeCollectable(CFURLCreateWithBytes(NULL, urlBytes, fragRg.location - 1, kCFStringEncodingUTF8, NULL));
    638     if (!result)
    639         result = (NSURL *)CFMakeCollectable(CFURLCreateWithBytes(NULL, urlBytes, fragRg.location - 1, kCFStringEncodingISOLatin1, NULL));
    640 
    641     if (urlBytes != buffer) free(urlBytes);
    642     return result ? [result autorelease] : self;
    643 }
    644 
    645 - (NSURL *)_webkit_URLByRemovingFragment
    646 {
    647     return [self _web_URLByTruncatingOneCharacterBeforeComponent:kCFURLComponentFragment];
    648 }
    649 
    650 - (NSURL *)_webkit_URLByRemovingResourceSpecifier
    651 {
    652     return [self _web_URLByTruncatingOneCharacterBeforeComponent:kCFURLComponentResourceSpecifier];
    653 }
    654 
    655 - (NSURL *)_web_URLByRemovingComponentAndSubsequentCharacter:(CFURLComponentType)component
    656 {
    657     CFRange range = CFURLGetByteRangeForComponent((CFURLRef)self, component, 0);
    658     if (range.location == kCFNotFound)
    659         return self;
    660 
    661     // Remove one subsequent character.
    662     ++range.length;
    663 
    664     UInt8* urlBytes;
    665     UInt8 buffer[2048];
    666     CFIndex numBytes = CFURLGetBytes((CFURLRef)self, buffer, 2048);
    667     if (numBytes == -1) {
    668         numBytes = CFURLGetBytes((CFURLRef)self, NULL, 0);
    669         urlBytes = static_cast<UInt8*>(malloc(numBytes));
    670         CFURLGetBytes((CFURLRef)self, urlBytes, numBytes);
    671     } else
    672         urlBytes = buffer;
    673 
    674     if (numBytes < range.location)
    675         return self;
    676     if (numBytes < range.location + range.length)
    677         range.length = numBytes - range.location;
    678 
    679     memmove(urlBytes + range.location, urlBytes + range.location + range.length, numBytes - range.location + range.length);
    680 
    681     NSURL *result = (NSURL *)CFMakeCollectable(CFURLCreateWithBytes(NULL, urlBytes, numBytes - range.length, kCFStringEncodingUTF8, NULL));
    682     if (!result)
    683         result = (NSURL *)CFMakeCollectable(CFURLCreateWithBytes(NULL, urlBytes, numBytes - range.length, kCFStringEncodingISOLatin1, NULL));
    684 
    685     if (urlBytes != buffer)
    686         free(urlBytes);
    687 
    688     return result ? [result autorelease] : self;
    689 }
    690 
    691 - (NSURL *)_web_URLByRemovingUserInfo
    692 {
    693     return [self _web_URLByRemovingComponentAndSubsequentCharacter:kCFURLComponentUserInfo];
    694 }
    695 
    696 - (BOOL)_webkit_isJavaScriptURL
    697 {
    698     return [[self _web_originalDataAsString] _webkit_isJavaScriptURL];
    699 }
    700 
    701 - (NSString *)_webkit_scriptIfJavaScriptURL
    702 {
    703     return [[self absoluteString] _webkit_scriptIfJavaScriptURL];
    704 }
    705 
    706 - (BOOL)_webkit_isFileURL
    707 {
    708     return [[self _web_originalDataAsString] _webkit_isFileURL];
    709 }
    710 
    711 - (BOOL)_webkit_isFTPDirectoryURL
    712 {
    713     return [[self _web_originalDataAsString] _webkit_isFTPDirectoryURL];
    714 }
    715 
    716 - (BOOL)_webkit_shouldLoadAsEmptyDocument
    717 {
    718     return [[self _web_originalDataAsString] _webkit_hasCaseInsensitivePrefix:@"about:"] || [self _web_isEmpty];
    719 }
    720 
    721 - (NSURL *)_web_URLWithLowercasedScheme
    722 {
    723     CFRange range;
    724     CFURLGetByteRangeForComponent((CFURLRef)self, kCFURLComponentScheme, &range);
    725     if (range.location == kCFNotFound) {
    726         return self;
    727     }
    728 
    729     UInt8 static_buffer[URL_BYTES_BUFFER_LENGTH];
    730     UInt8 *buffer = static_buffer;
    731     CFIndex bytesFilled = CFURLGetBytes((CFURLRef)self, buffer, URL_BYTES_BUFFER_LENGTH);
    732     if (bytesFilled == -1) {
    733         CFIndex bytesToAllocate = CFURLGetBytes((CFURLRef)self, NULL, 0);
    734         buffer = static_cast<UInt8 *>(malloc(bytesToAllocate));
    735         bytesFilled = CFURLGetBytes((CFURLRef)self, buffer, bytesToAllocate);
    736         ASSERT(bytesFilled == bytesToAllocate);
    737     }
    738 
    739     int i;
    740     BOOL changed = NO;
    741     for (i = 0; i < range.length; ++i) {
    742         char c = buffer[range.location + i];
    743         char lower = toASCIILower(c);
    744         if (c != lower) {
    745             buffer[range.location + i] = lower;
    746             changed = YES;
    747         }
    748     }
    749 
    750     NSURL *result = changed
    751         ? (NSURL *)WebCFAutorelease(CFURLCreateAbsoluteURLWithBytes(NULL, buffer, bytesFilled, kCFStringEncodingUTF8, nil, YES))
    752         : (NSURL *)self;
    753 
    754     if (buffer != static_buffer) {
    755         free(buffer);
    756     }
    757 
    758     return result;
    759 }
    760 
    761 
    762 -(BOOL)_web_hasQuestionMarkOnlyQueryString
    763 {
    764     CFRange rangeWithSeparators;
    765     CFURLGetByteRangeForComponent((CFURLRef)self, kCFURLComponentQuery, &rangeWithSeparators);
    766     if (rangeWithSeparators.location != kCFNotFound && rangeWithSeparators.length == 1) {
    767         return YES;
    768     }
    769     return NO;
    770 }
    771 
    772 -(NSData *)_web_schemeSeparatorWithoutColon
    773 {
    774     NSData *result = nil;
    775     CFRange rangeWithSeparators;
    776     CFRange range = CFURLGetByteRangeForComponent((CFURLRef)self, kCFURLComponentScheme, &rangeWithSeparators);
    777     if (rangeWithSeparators.location != kCFNotFound) {
    778         NSString *absoluteString = [self absoluteString];
    779         NSRange separatorsRange = NSMakeRange(range.location + range.length + 1, rangeWithSeparators.length - range.length - 1);
    780         if (separatorsRange.location + separatorsRange.length <= [absoluteString length]) {
    781             NSString *slashes = [absoluteString substringWithRange:separatorsRange];
    782             result = [slashes dataUsingEncoding:NSISOLatin1StringEncoding];
    783         }
    784     }
    785     return result;
    786 }
    787 
    788 #define completeURL (CFURLComponentType)-1
    789 
    790 -(NSData *)_web_dataForURLComponentType:(CFURLComponentType)componentType
    791 {
    792     static int URLComponentTypeBufferLength = 2048;
    793 
    794     UInt8 staticAllBytesBuffer[URLComponentTypeBufferLength];
    795     UInt8 *allBytesBuffer = staticAllBytesBuffer;
    796 
    797     CFIndex bytesFilled = CFURLGetBytes((CFURLRef)self, allBytesBuffer, URLComponentTypeBufferLength);
    798     if (bytesFilled == -1) {
    799         CFIndex bytesToAllocate = CFURLGetBytes((CFURLRef)self, NULL, 0);
    800         allBytesBuffer = static_cast<UInt8 *>(malloc(bytesToAllocate));
    801         bytesFilled = CFURLGetBytes((CFURLRef)self, allBytesBuffer, bytesToAllocate);
    802     }
    803 
    804     CFRange range;
    805     if (componentType != completeURL) {
    806         range = CFURLGetByteRangeForComponent((CFURLRef)self, componentType, NULL);
    807         if (range.location == kCFNotFound) {
    808             return nil;
    809         }
    810     }
    811     else {
    812         range.location = 0;
    813         range.length = bytesFilled;
    814     }
    815 
    816     NSData *componentData = [NSData dataWithBytes:allBytesBuffer + range.location length:range.length];
    817 
    818     const unsigned char *bytes = static_cast<const unsigned char *>([componentData bytes]);
    819     NSMutableData *resultData = [NSMutableData data];
    820     // NOTE: add leading '?' to query strings non-zero length query strings.
    821     // NOTE: retain question-mark only query strings.
    822     if (componentType == kCFURLComponentQuery) {
    823         if (range.length > 0 || [self _web_hasQuestionMarkOnlyQueryString]) {
    824             [resultData appendBytes:"?" length:1];
    825         }
    826     }
    827     int i;
    828     for (i = 0; i < range.length; i++) {
    829         unsigned char c = bytes[i];
    830         if (c <= 0x20 || c >= 0x7f) {
    831             char escaped[3];
    832             escaped[0] = '%';
    833             escaped[1] = hexDigit(c >> 4);
    834             escaped[2] = hexDigit(c & 0xf);
    835             [resultData appendBytes:escaped length:3];
    836         }
    837         else {
    838             char b[1];
    839             b[0] = c;
    840             [resultData appendBytes:b length:1];
    841         }
    842     }
    843 
    844     if (staticAllBytesBuffer != allBytesBuffer) {
    845         free(allBytesBuffer);
    846     }
    847 
    848     return resultData;
    849 }
    850 
    851 -(NSData *)_web_schemeData
    852 {
    853     return [self _web_dataForURLComponentType:kCFURLComponentScheme];
    854 }
    855 
    856 -(NSData *)_web_hostData
    857 {
    858     NSData *result = [self _web_dataForURLComponentType:kCFURLComponentHost];
    859     NSData *scheme = [self _web_schemeData];
    860     // Take off localhost for file
    861     if ([scheme _web_isCaseInsensitiveEqualToCString:"file"]) {
    862         return ([result _web_isCaseInsensitiveEqualToCString:"localhost"]) ? nil : result;
    863     }
    864     return result;
    865 }
    866 
    867 - (NSString *)_web_hostString
    868 {
    869     NSData *data = [self _web_hostData];
    870     if (!data) {
    871         data = [NSData data];
    872     }
    873     return [[[NSString alloc] initWithData:[self _web_hostData] encoding:NSUTF8StringEncoding] autorelease];
    874 }
    875 
    876 - (NSString *)_webkit_suggestedFilenameWithMIMEType:(NSString *)MIMEType
    877 {
    878     return suggestedFilenameWithMIMEType(self, MIMEType);
    879 }
    880 
    881 @end
    882 
    883 @implementation NSString (WebNSURLExtras)
    884 
    885 - (BOOL)_web_isUserVisibleURL
    886 {
    887     BOOL valid = YES;
    888     // get buffer
    889 
    890     char static_buffer[1024];
    891     const char *p;
    892     BOOL success = CFStringGetCString((CFStringRef)self, static_buffer, 1023, kCFStringEncodingUTF8);
    893     if (success) {
    894         p = static_buffer;
    895     } else {
    896         p = [self UTF8String];
    897     }
    898 
    899     int length = strlen(p);
    900 
    901     // check for characters <= 0x20 or >=0x7f, %-escape sequences of %7f, and xn--, these
    902     // are the things that will lead _web_userVisibleString to actually change things.
    903     int i;
    904     for (i = 0; i < length; i++) {
    905         unsigned char c = p[i];
    906         // escape control characters, space, and delete
    907         if (c <= 0x20 || c == 0x7f) {
    908             valid = NO;
    909             break;
    910         } else if (c == '%' && (i + 1 < length && isHexDigit(p[i + 1])) && i + 2 < length && isHexDigit(p[i + 2])) {
    911             unsigned char u = (hexDigitValue(p[i + 1]) << 4) | hexDigitValue(p[i + 2]);
    912             if (u > 0x7f) {
    913                 valid = NO;
    914                 break;
    915             }
    916             i += 2;
    917         } else {
    918             // Check for "xn--" in an efficient, non-case-sensitive, way.
    919             if (c == '-' && i >= 3 && (p[i - 3] | 0x20) == 'x' && (p[i - 2] | 0x20) == 'n' && p[i - 1] == '-') {
    920                 valid = NO;
    921                 break;
    922             }
    923         }
    924     }
    925 
    926     return valid;
    927 }
    928 
    929 
    930 - (BOOL)_webkit_isJavaScriptURL
    931 {
    932     return [self _webkit_hasCaseInsensitivePrefix:@"javascript:"];
    933 }
    934 
    935 - (BOOL)_webkit_isFileURL
    936 {
    937     return [self rangeOfString:@"file:" options:(NSCaseInsensitiveSearch | NSAnchoredSearch)].location != NSNotFound;
    938 }
    939 
    940 - (NSString *)_webkit_stringByReplacingValidPercentEscapes
    941 {
    942     return decodeURLEscapeSequences(self);
    943 }
    944 
    945 - (NSString *)_webkit_scriptIfJavaScriptURL
    946 {
    947     if (![self _webkit_isJavaScriptURL]) {
    948         return nil;
    949     }
    950     return [[self substringFromIndex:11] _webkit_stringByReplacingValidPercentEscapes];
    951 }
    952 
    953 - (BOOL)_webkit_isFTPDirectoryURL
    954 {
    955     int length = [self length];
    956     if (length < 5) {  // 5 is length of "ftp:/"
    957         return NO;
    958     }
    959     unichar lastChar = [self characterAtIndex:length - 1];
    960     return lastChar == '/' && [self _webkit_hasCaseInsensitivePrefix:@"ftp:"];
    961 }
    962 
    963 
    964 static BOOL readIDNScriptWhiteListFile(NSString *filename)
    965 {
    966     if (!filename) {
    967         return NO;
    968     }
    969     FILE *file = fopen([filename fileSystemRepresentation], "r");
    970     if (file == NULL) {
    971         return NO;
    972     }
    973 
    974     // Read a word at a time.
    975     // Allow comments, starting with # character to the end of the line.
    976     while (1) {
    977         // Skip a comment if present.
    978         int result = fscanf(file, " #%*[^\n\r]%*[\n\r]");
    979         if (result == EOF) {
    980             break;
    981         }
    982 
    983         // Read a script name if present.
    984         char word[33];
    985         result = fscanf(file, " %32[^# \t\n\r]%*[^# \t\n\r] ", word);
    986         if (result == EOF) {
    987             break;
    988         }
    989         if (result == 1) {
    990             // Got a word, map to script code and put it into the array.
    991             int32_t script = u_getPropertyValueEnum(UCHAR_SCRIPT, word);
    992             if (script >= 0 && script < USCRIPT_CODE_LIMIT) {
    993                 size_t index = script / 32;
    994                 uint32_t mask = 1 << (script % 32);
    995                 IDNScriptWhiteList[index] |= mask;
    996             }
    997         }
    998     }
    999     fclose(file);
   1000     return YES;
   1001 }
   1002 
   1003 static void readIDNScriptWhiteList(void)
   1004 {
   1005     // Read white list from library.
   1006     NSArray *dirs = NSSearchPathForDirectoriesInDomains(NSLibraryDirectory, NSAllDomainsMask, YES);
   1007     int i, numDirs = [dirs count];
   1008     for (i = 0; i < numDirs; i++) {
   1009         NSString *dir = [dirs objectAtIndex:i];
   1010         if (readIDNScriptWhiteListFile([dir stringByAppendingPathComponent:@"IDNScriptWhiteList.txt"])) {
   1011             return;
   1012         }
   1013     }
   1014 
   1015     // Fall back on white list inside bundle.
   1016     NSBundle *bundle = [NSBundle bundleWithIdentifier:@"com.apple.WebKit"];
   1017     readIDNScriptWhiteListFile([bundle pathForResource:@"IDNScriptWhiteList" ofType:@"txt"]);
   1018 }
   1019 
   1020 static BOOL allCharactersInIDNScriptWhiteList(const UChar *buffer, int32_t length)
   1021 {
   1022     pthread_once(&IDNScriptWhiteListFileRead, readIDNScriptWhiteList);
   1023 
   1024     int32_t i = 0;
   1025     while (i < length) {
   1026         UChar32 c;
   1027         U16_NEXT(buffer, i, length, c)
   1028         UErrorCode error = U_ZERO_ERROR;
   1029         UScriptCode script = uscript_getScript(c, &error);
   1030         if (error != U_ZERO_ERROR) {
   1031             LOG_ERROR("got ICU error while trying to look at scripts: %d", error);
   1032             return NO;
   1033         }
   1034         if (script < 0) {
   1035             LOG_ERROR("got negative number for script code from ICU: %d", script);
   1036             return NO;
   1037         }
   1038         if (script >= USCRIPT_CODE_LIMIT) {
   1039             return NO;
   1040         }
   1041         size_t index = script / 32;
   1042         uint32_t mask = 1 << (script % 32);
   1043         if (!(IDNScriptWhiteList[index] & mask)) {
   1044             return NO;
   1045         }
   1046 
   1047         if (isLookalikeCharacter(c))
   1048             return NO;
   1049     }
   1050     return YES;
   1051 }
   1052 
   1053 static BOOL allCharactersAllowedByTLDRules(const UChar* buffer, int32_t length)
   1054 {
   1055     // Skip trailing dot for root domain.
   1056     if (buffer[length - 1] == '.')
   1057         --length;
   1058 
   1059     if (length > 3
   1060         && buffer[length - 3] == '.'
   1061         && buffer[length - 2] == 0x0440 // CYRILLIC SMALL LETTER ER
   1062         && buffer[length - 1] == 0x0444) // CYRILLIC SMALL LETTER EF
   1063     {
   1064         // Rules defined by <http://www.cctld.ru/ru/docs/rulesrf.php>. This code only checks requirements that matter for presentation purposes.
   1065         for (int32_t i = length - 4; i; --i) {
   1066             UChar ch = buffer[i];
   1067 
   1068             // Only modern Russian letters, digits and dashes are allowed.
   1069             if ((ch >= 0x0430 && ch <= 0x044f)
   1070                 || ch == 0x0451
   1071                 || (ch >= '0' && ch <= '9')
   1072                 || ch == '-')
   1073                 continue;
   1074 
   1075             // Only check top level domain. Lower level registrars may have different rules.
   1076             if (ch == '.')
   1077                 break;
   1078 
   1079             return NO;
   1080         }
   1081         return YES;
   1082     }
   1083 
   1084     // Not a known top level domain with special rules.
   1085     return NO;
   1086 }
   1087 
   1088 // Return value of nil means no mapping is necessary.
   1089 // If makeString is NO, then return value is either nil or self to indicate mapping is necessary.
   1090 // If makeString is YES, then return value is either nil or the mapped string.
   1091 - (NSString *)_web_mapHostNameWithRange:(NSRange)range encode:(BOOL)encode makeString:(BOOL)makeString
   1092 {
   1093     if (range.length > HOST_NAME_BUFFER_LENGTH) {
   1094         return nil;
   1095     }
   1096 
   1097     if ([self length] == 0)
   1098         return nil;
   1099 
   1100     UChar sourceBuffer[HOST_NAME_BUFFER_LENGTH];
   1101     UChar destinationBuffer[HOST_NAME_BUFFER_LENGTH];
   1102 
   1103     NSString *string = self;
   1104     if (encode && [self rangeOfString:@"%" options:NSLiteralSearch range:range].location != NSNotFound) {
   1105         NSString *substring = [self substringWithRange:range];
   1106         substring = WebCFAutorelease(CFURLCreateStringByReplacingPercentEscapes(NULL, (CFStringRef)substring, CFSTR("")));
   1107         if (substring != nil) {
   1108             string = substring;
   1109             range = NSMakeRange(0, [string length]);
   1110         }
   1111     }
   1112 
   1113     int length = range.length;
   1114     [string getCharacters:sourceBuffer range:range];
   1115 
   1116     UErrorCode error = U_ZERO_ERROR;
   1117     int32_t numCharactersConverted = (encode ? uidna_IDNToASCII : uidna_IDNToUnicode)
   1118         (sourceBuffer, length, destinationBuffer, HOST_NAME_BUFFER_LENGTH, UIDNA_ALLOW_UNASSIGNED, NULL, &error);
   1119     if (error != U_ZERO_ERROR) {
   1120         return nil;
   1121     }
   1122     if (numCharactersConverted == length && memcmp(sourceBuffer, destinationBuffer, length * sizeof(UChar)) == 0) {
   1123         return nil;
   1124     }
   1125     if (!encode && !allCharactersInIDNScriptWhiteList(destinationBuffer, numCharactersConverted) && !allCharactersAllowedByTLDRules(destinationBuffer, numCharactersConverted)) {
   1126         return nil;
   1127     }
   1128     return makeString ? (NSString *)[NSString stringWithCharacters:destinationBuffer length:numCharactersConverted] : (NSString *)self;
   1129 }
   1130 
   1131 - (BOOL)_web_hostNameNeedsDecodingWithRange:(NSRange)range
   1132 {
   1133     return [self _web_mapHostNameWithRange:range encode:NO makeString:NO] != nil;
   1134 }
   1135 
   1136 - (BOOL)_web_hostNameNeedsEncodingWithRange:(NSRange)range
   1137 {
   1138     return [self _web_mapHostNameWithRange:range encode:YES makeString:NO] != nil;
   1139 }
   1140 
   1141 - (NSString *)_web_decodeHostNameWithRange:(NSRange)range
   1142 {
   1143     return [self _web_mapHostNameWithRange:range encode:NO makeString:YES];
   1144 }
   1145 
   1146 - (NSString *)_web_encodeHostNameWithRange:(NSRange)range
   1147 {
   1148     return [self _web_mapHostNameWithRange:range encode:YES makeString:YES];
   1149 }
   1150 
   1151 - (NSString *)_web_decodeHostName
   1152 {
   1153     NSString *name = [self _web_mapHostNameWithRange:NSMakeRange(0, [self length]) encode:NO makeString:YES];
   1154     return name == nil ? self : name;
   1155 }
   1156 
   1157 - (NSString *)_web_encodeHostName
   1158 {
   1159     NSString *name = [self _web_mapHostNameWithRange:NSMakeRange(0, [self length]) encode:YES makeString:YES];
   1160     return name == nil ? self : name;
   1161 }
   1162 
   1163 -(NSRange)_webkit_rangeOfURLScheme
   1164 {
   1165     NSRange colon = [self rangeOfString:@":"];
   1166     if (colon.location != NSNotFound && colon.location > 0) {
   1167         NSRange scheme = {0, colon.location};
   1168         static NSCharacterSet *InverseSchemeCharacterSet = nil;
   1169         if (!InverseSchemeCharacterSet) {
   1170             /*
   1171              This stuff is very expensive.  10-15 msec on a 2x1.2GHz.  If not cached it swamps
   1172              everything else when adding items to the autocomplete DB.  Makes me wonder if we
   1173              even need to enforce the character set here.
   1174             */
   1175             NSString *acceptableCharacters = @"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+.-";
   1176             InverseSchemeCharacterSet = [[[NSCharacterSet characterSetWithCharactersInString:acceptableCharacters] invertedSet] retain];
   1177         }
   1178         NSRange illegals = [self rangeOfCharacterFromSet:InverseSchemeCharacterSet options:0 range:scheme];
   1179         if (illegals.location == NSNotFound)
   1180             return scheme;
   1181     }
   1182     return NSMakeRange(NSNotFound, 0);
   1183 }
   1184 
   1185 -(BOOL)_webkit_looksLikeAbsoluteURL
   1186 {
   1187     // Trim whitespace because _web_URLWithString allows whitespace.
   1188     return [[self _webkit_stringByTrimmingWhitespace] _webkit_rangeOfURLScheme].location != NSNotFound;
   1189 }
   1190 
   1191 - (NSString *)_webkit_URLFragment
   1192 {
   1193     NSRange fragmentRange;
   1194 
   1195     fragmentRange = [self rangeOfString:@"#" options:NSLiteralSearch];
   1196     if (fragmentRange.location == NSNotFound)
   1197         return nil;
   1198     return [self substringFromIndex:fragmentRange.location + 1];
   1199 }
   1200 
   1201 @end
   1202