1 /* 2 * Copyright (C) 2005, 2007, 2008, 2009 Apple Inc. All rights reserved. 3 * Copyright (C) 2006 Alexey Proskuryakov (ap (at) nypop.com) 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. Neither the name of Apple Computer, Inc. ("Apple") nor the names of 15 * its contributors may be used to endorse or promote products derived 16 * from this software without specific prior written permission. 17 * 18 * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY 19 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 20 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY 22 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 23 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 24 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 25 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 #import "WebNSURLExtras.h" 31 32 #import "WebKitNSStringExtras.h" 33 #import "WebLocalizableStrings.h" 34 #import "WebNSDataExtras.h" 35 #import "WebNSObjectExtras.h" 36 #import "WebSystemInterface.h" 37 #import <Foundation/NSURLRequest.h> 38 #import <WebCore/KURL.h> 39 #import <WebCore/LoaderNSURLExtras.h> 40 #import <WebKitSystemInterface.h> 41 #import <wtf/Assertions.h> 42 #import <unicode/uchar.h> 43 #import <unicode/uidna.h> 44 #import <unicode/uscript.h> 45 46 using namespace WebCore; 47 using namespace WTF; 48 49 typedef void (* StringRangeApplierFunction)(NSString *string, NSRange range, void *context); 50 51 // Needs to be big enough to hold an IDN-encoded name. 52 // For host names bigger than this, we won't do IDN encoding, which is almost certainly OK. 53 #define HOST_NAME_BUFFER_LENGTH 2048 54 55 #define URL_BYTES_BUFFER_LENGTH 2048 56 57 static pthread_once_t IDNScriptWhiteListFileRead = PTHREAD_ONCE_INIT; 58 static uint32_t IDNScriptWhiteList[(USCRIPT_CODE_LIMIT + 31) / 32]; 59 60 static inline BOOL isLookalikeCharacter(int charCode) 61 { 62 // FIXME: Move this code down into WebCore so it can be shared with other platforms. 63 64 // This function treats the following as unsafe, lookalike characters: 65 // any non-printable character, any character considered as whitespace that isn't already converted to a space by ICU, 66 // and any ignorable character. 67 68 // We also considered the characters in Mozilla's blacklist (http://kb.mozillazine.org/Network.IDN.blacklist_chars), 69 // and included all of these characters that ICU can encode. 70 71 if (!u_isprint(charCode) || u_isUWhiteSpace(charCode) || u_hasBinaryProperty(charCode, UCHAR_DEFAULT_IGNORABLE_CODE_POINT)) 72 return YES; 73 74 switch (charCode) { 75 case 0x00ED: /* LATIN SMALL LETTER I WITH ACUTE */ 76 case 0x01C3: /* LATIN LETTER RETROFLEX CLICK */ 77 case 0x0251: /* LATIN SMALL LETTER ALPHA */ 78 case 0x0261: /* LATIN SMALL LETTER SCRIPT G */ 79 case 0x0337: /* COMBINING SHORT SOLIDUS OVERLAY */ 80 case 0x0338: /* COMBINING LONG SOLIDUS OVERLAY */ 81 case 0x05B4: /* HEBREW POINT HIRIQ */ 82 case 0x05BC: /* HEBREW POINT DAGESH OR MAPIQ */ 83 case 0x05C3: /* HEBREW PUNCTUATION SOF PASUQ */ 84 case 0x05F4: /* HEBREW PUNCTUATION GERSHAYIM */ 85 case 0x0660: /* ARABIC INDIC DIGIT ZERO */ 86 case 0x06D4: /* ARABIC FULL STOP */ 87 case 0x06F0: /* EXTENDED ARABIC INDIC DIGIT ZERO */ 88 case 0x2027: /* HYPHENATION POINT */ 89 case 0x2039: /* SINGLE LEFT-POINTING ANGLE QUOTATION MARK */ 90 case 0x203A: /* SINGLE RIGHT-POINTING ANGLE QUOTATION MARK */ 91 case 0x2044: /* FRACTION SLASH */ 92 case 0x2215: /* DIVISION SLASH */ 93 case 0x2216: /* SET MINUS */ 94 case 0x233F: /* APL FUNCTIONAL SYMBOL SLASH BAR */ 95 case 0x23AE: /* INTEGRAL EXTENSION */ 96 case 0x244A: /* OCR DOUBLE BACKSLASH */ 97 case 0x2571: /* BOX DRAWINGS LIGHT DIAGONAL UPPER RIGHT TO LOWER LEFT */ 98 case 0x2572: /* BOX DRAWINGS LIGHT DIAGONAL UPPER LEFT TO LOWER RIGHT */ 99 case 0x29F8: /* BIG SOLIDUS */ 100 case 0x29f6: /* SOLIDUS WITH OVERBAR */ 101 case 0x2AFB: /* TRIPLE SOLIDUS BINARY RELATION */ 102 case 0x2AFD: /* DOUBLE SOLIDUS OPERATOR */ 103 case 0x3008: /* LEFT ANGLE BRACKET */ 104 case 0x3014: /* LEFT TORTOISE SHELL BRACKET */ 105 case 0x3015: /* RIGHT TORTOISE SHELL BRACKET */ 106 case 0x3033: /* VERTICAL KANA REPEAT MARK UPPER HALF */ 107 case 0x3035: /* VERTICAL KANA REPEAT MARK LOWER HALF */ 108 case 0x321D: /* PARENTHESIZED KOREAN CHARACTER OJEON */ 109 case 0x321E: /* PARENTHESIZED KOREAN CHARACTER O HU */ 110 case 0x33DF: /* SQUARE A OVER M */ 111 case 0xFE14: /* PRESENTATION FORM FOR VERTICAL SEMICOLON */ 112 case 0xFE15: /* PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK */ 113 case 0xFE3F: /* PRESENTATION FORM FOR VERTICAL LEFT ANGLE BRACKET */ 114 case 0xFE5D: /* SMALL LEFT TORTOISE SHELL BRACKET */ 115 case 0xFE5E: /* SMALL RIGHT TORTOISE SHELL BRACKET */ 116 return YES; 117 default: 118 return NO; 119 } 120 } 121 122 static char hexDigit(int i) 123 { 124 if (i < 0 || i > 16) { 125 LOG_ERROR("illegal hex digit"); 126 return '0'; 127 } 128 int h = i; 129 if (h >= 10) { 130 h = h - 10 + 'A'; 131 } 132 else { 133 h += '0'; 134 } 135 return h; 136 } 137 138 static BOOL isHexDigit(char c) 139 { 140 return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f'); 141 } 142 143 static int hexDigitValue(char c) 144 { 145 if (c >= '0' && c <= '9') { 146 return c - '0'; 147 } 148 if (c >= 'A' && c <= 'F') { 149 return c - 'A' + 10; 150 } 151 if (c >= 'a' && c <= 'f') { 152 return c - 'a' + 10; 153 } 154 LOG_ERROR("illegal hex digit"); 155 return 0; 156 } 157 158 static void applyHostNameFunctionToMailToURLString(NSString *string, StringRangeApplierFunction f, void *context) 159 { 160 // In a mailto: URL, host names come after a '@' character and end with a '>' or ',' or '?' character. 161 // Skip quoted strings so that characters in them don't confuse us. 162 // When we find a '?' character, we are past the part of the URL that contains host names. 163 164 static NSCharacterSet *hostNameOrStringStartCharacters; 165 if (hostNameOrStringStartCharacters == nil) { 166 hostNameOrStringStartCharacters = [NSCharacterSet characterSetWithCharactersInString:@"\"@?"]; 167 CFRetain(hostNameOrStringStartCharacters); 168 } 169 static NSCharacterSet *hostNameEndCharacters; 170 if (hostNameEndCharacters == nil) { 171 hostNameEndCharacters = [NSCharacterSet characterSetWithCharactersInString:@">,?"]; 172 CFRetain(hostNameEndCharacters); 173 } 174 static NSCharacterSet *quotedStringCharacters; 175 if (quotedStringCharacters == nil) { 176 quotedStringCharacters = [NSCharacterSet characterSetWithCharactersInString:@"\"\\"]; 177 CFRetain(quotedStringCharacters); 178 } 179 180 unsigned stringLength = [string length]; 181 NSRange remaining = NSMakeRange(0, stringLength); 182 183 while (1) { 184 // Find start of host name or of quoted string. 185 NSRange hostNameOrStringStart = [string rangeOfCharacterFromSet:hostNameOrStringStartCharacters options:0 range:remaining]; 186 if (hostNameOrStringStart.location == NSNotFound) { 187 return; 188 } 189 unichar c = [string characterAtIndex:hostNameOrStringStart.location]; 190 remaining.location = NSMaxRange(hostNameOrStringStart); 191 remaining.length = stringLength - remaining.location; 192 193 if (c == '?') { 194 return; 195 } 196 197 if (c == '@') { 198 // Find end of host name. 199 unsigned hostNameStart = remaining.location; 200 NSRange hostNameEnd = [string rangeOfCharacterFromSet:hostNameEndCharacters options:0 range:remaining]; 201 BOOL done; 202 if (hostNameEnd.location == NSNotFound) { 203 hostNameEnd.location = stringLength; 204 done = YES; 205 } else { 206 remaining.location = hostNameEnd.location; 207 remaining.length = stringLength - remaining.location; 208 done = NO; 209 } 210 211 // Process host name range. 212 f(string, NSMakeRange(hostNameStart, hostNameEnd.location - hostNameStart), context); 213 214 if (done) { 215 return; 216 } 217 } else { 218 // Skip quoted string. 219 ASSERT(c == '"'); 220 while (1) { 221 NSRange escapedCharacterOrStringEnd = [string rangeOfCharacterFromSet:quotedStringCharacters options:0 range:remaining]; 222 if (escapedCharacterOrStringEnd.location == NSNotFound) { 223 return; 224 } 225 c = [string characterAtIndex:escapedCharacterOrStringEnd.location]; 226 remaining.location = NSMaxRange(escapedCharacterOrStringEnd); 227 remaining.length = stringLength - remaining.location; 228 229 // If we are the end of the string, then break from the string loop back to the host name loop. 230 if (c == '"') { 231 break; 232 } 233 234 // Skip escaped character. 235 ASSERT(c == '\\'); 236 if (remaining.length == 0) { 237 return; 238 } 239 remaining.location += 1; 240 remaining.length -= 1; 241 } 242 } 243 } 244 } 245 246 static void applyHostNameFunctionToURLString(NSString *string, StringRangeApplierFunction f, void *context) 247 { 248 // Find hostnames. Too bad we can't use any real URL-parsing code to do this, 249 // but we have to do it before doing all the %-escaping, and this is the only 250 // code we have that parses mailto URLs anyway. 251 252 // Maybe we should implement this using a character buffer instead? 253 254 if ([string _webkit_hasCaseInsensitivePrefix:@"mailto:"]) { 255 applyHostNameFunctionToMailToURLString(string, f, context); 256 return; 257 } 258 259 // Find the host name in a hierarchical URL. 260 // It comes after a "://" sequence, with scheme characters preceding. 261 // If ends with the end of the string or a ":", "/", or a "?". 262 // If there is a "@" character, the host part is just the part after the "@". 263 NSRange separatorRange = [string rangeOfString:@"://"]; 264 if (separatorRange.location == NSNotFound) { 265 return; 266 } 267 268 // Check that all characters before the :// are valid scheme characters. 269 static NSCharacterSet *nonSchemeCharacters; 270 if (nonSchemeCharacters == nil) { 271 nonSchemeCharacters = [[NSCharacterSet characterSetWithCharactersInString:@"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+-."] invertedSet]; 272 CFRetain(nonSchemeCharacters); 273 } 274 if ([string rangeOfCharacterFromSet:nonSchemeCharacters options:0 range:NSMakeRange(0, separatorRange.location)].location != NSNotFound) { 275 return; 276 } 277 278 unsigned stringLength = [string length]; 279 280 static NSCharacterSet *hostTerminators; 281 if (hostTerminators == nil) { 282 hostTerminators = [NSCharacterSet characterSetWithCharactersInString:@":/?#"]; 283 CFRetain(hostTerminators); 284 } 285 286 // Start after the separator. 287 unsigned authorityStart = NSMaxRange(separatorRange); 288 289 // Find terminating character. 290 NSRange hostNameTerminator = [string rangeOfCharacterFromSet:hostTerminators options:0 range:NSMakeRange(authorityStart, stringLength - authorityStart)]; 291 unsigned hostNameEnd = hostNameTerminator.location == NSNotFound ? stringLength : hostNameTerminator.location; 292 293 // Find "@" for the start of the host name. 294 NSRange userInfoTerminator = [string rangeOfString:@"@" options:0 range:NSMakeRange(authorityStart, hostNameEnd - authorityStart)]; 295 unsigned hostNameStart = userInfoTerminator.location == NSNotFound ? authorityStart : NSMaxRange(userInfoTerminator); 296 297 f(string, NSMakeRange(hostNameStart, hostNameEnd - hostNameStart), context); 298 } 299 300 @implementation NSURL (WebNSURLExtras) 301 302 static void collectRangesThatNeedMapping(NSString *string, NSRange range, void *context, BOOL encode) 303 { 304 BOOL needsMapping = encode 305 ? [string _web_hostNameNeedsEncodingWithRange:range] 306 : [string _web_hostNameNeedsDecodingWithRange:range]; 307 if (!needsMapping) { 308 return; 309 } 310 311 NSMutableArray **array = (NSMutableArray **)context; 312 if (*array == nil) { 313 *array = [[NSMutableArray alloc] init]; 314 } 315 316 [*array addObject:[NSValue valueWithRange:range]]; 317 } 318 319 static void collectRangesThatNeedEncoding(NSString *string, NSRange range, void *context) 320 { 321 return collectRangesThatNeedMapping(string, range, context, YES); 322 } 323 324 static void collectRangesThatNeedDecoding(NSString *string, NSRange range, void *context) 325 { 326 return collectRangesThatNeedMapping(string, range, context, NO); 327 } 328 329 static NSString *mapHostNames(NSString *string, BOOL encode) 330 { 331 // Generally, we want to optimize for the case where there is one host name that does not need mapping. 332 333 if (encode && [string canBeConvertedToEncoding:NSASCIIStringEncoding]) 334 return string; 335 336 // Make a list of ranges that actually need mapping. 337 NSMutableArray *hostNameRanges = nil; 338 StringRangeApplierFunction f = encode 339 ? collectRangesThatNeedEncoding 340 : collectRangesThatNeedDecoding; 341 applyHostNameFunctionToURLString(string, f, &hostNameRanges); 342 if (hostNameRanges == nil) 343 return string; 344 345 // Do the mapping. 346 NSMutableString *mutableCopy = [string mutableCopy]; 347 unsigned i = [hostNameRanges count]; 348 while (i-- != 0) { 349 NSRange hostNameRange = [[hostNameRanges objectAtIndex:i] rangeValue]; 350 NSString *mappedHostName = encode 351 ? [string _web_encodeHostNameWithRange:hostNameRange] 352 : [string _web_decodeHostNameWithRange:hostNameRange]; 353 [mutableCopy replaceCharactersInRange:hostNameRange withString:mappedHostName]; 354 } 355 [hostNameRanges release]; 356 return [mutableCopy autorelease]; 357 } 358 359 + (NSURL *)_web_URLWithUserTypedString:(NSString *)string relativeToURL:(NSURL *)URL 360 { 361 if (string == nil) { 362 return nil; 363 } 364 string = mapHostNames([string _webkit_stringByTrimmingWhitespace], YES); 365 366 NSData *userTypedData = [string dataUsingEncoding:NSUTF8StringEncoding]; 367 ASSERT(userTypedData); 368 369 const UInt8 *inBytes = static_cast<const UInt8 *>([userTypedData bytes]); 370 int inLength = [userTypedData length]; 371 if (inLength == 0) { 372 return [NSURL URLWithString:@""]; 373 } 374 375 char *outBytes = static_cast<char *>(malloc(inLength * 3)); // large enough to %-escape every character 376 char *p = outBytes; 377 int outLength = 0; 378 int i; 379 for (i = 0; i < inLength; i++) { 380 UInt8 c = inBytes[i]; 381 if (c <= 0x20 || c >= 0x7f) { 382 *p++ = '%'; 383 *p++ = hexDigit(c >> 4); 384 *p++ = hexDigit(c & 0xf); 385 outLength += 3; 386 } 387 else { 388 *p++ = c; 389 outLength++; 390 } 391 } 392 393 NSData *data = [NSData dataWithBytesNoCopy:outBytes length:outLength]; // adopts outBytes 394 return [self _web_URLWithData:data relativeToURL:URL]; 395 } 396 397 + (NSURL *)_web_URLWithUserTypedString:(NSString *)string 398 { 399 return [self _web_URLWithUserTypedString:string relativeToURL:nil]; 400 } 401 402 + (NSURL *)_web_URLWithDataAsString:(NSString *)string 403 { 404 if (string == nil) { 405 return nil; 406 } 407 return [self _web_URLWithDataAsString:string relativeToURL:nil]; 408 } 409 410 + (NSURL *)_web_URLWithDataAsString:(NSString *)string relativeToURL:(NSURL *)baseURL 411 { 412 if (string == nil) { 413 return nil; 414 } 415 string = [string _webkit_stringByTrimmingWhitespace]; 416 NSData *data = [string dataUsingEncoding:NSISOLatin1StringEncoding]; 417 return [self _web_URLWithData:data relativeToURL:baseURL]; 418 } 419 420 + (NSURL *)_web_URLWithData:(NSData *)data 421 { 422 return [NSURL _web_URLWithData:data relativeToURL:nil]; 423 } 424 425 + (NSURL *)_web_URLWithData:(NSData *)data relativeToURL:(NSURL *)baseURL 426 { 427 if (data == nil) 428 return nil; 429 430 NSURL *result = nil; 431 size_t length = [data length]; 432 if (length > 0) { 433 // work around <rdar://4470771>: CFURLCreateAbsoluteURLWithBytes(.., TRUE) doesn't remove non-path components. 434 baseURL = [baseURL _webkit_URLByRemovingResourceSpecifier]; 435 436 const UInt8 *bytes = static_cast<const UInt8*>([data bytes]); 437 // NOTE: We use UTF-8 here since this encoding is used when computing strings when returning URL components 438 // (e.g calls to NSURL -path). However, this function is not tolerant of illegal UTF-8 sequences, which 439 // could either be a malformed string or bytes in a different encoding, like shift-jis, so we fall back 440 // onto using ISO Latin 1 in those cases. 441 result = WebCFAutorelease(CFURLCreateAbsoluteURLWithBytes(NULL, bytes, length, kCFStringEncodingUTF8, (CFURLRef)baseURL, YES)); 442 if (!result) 443 result = WebCFAutorelease(CFURLCreateAbsoluteURLWithBytes(NULL, bytes, length, kCFStringEncodingISOLatin1, (CFURLRef)baseURL, YES)); 444 } else 445 result = [NSURL URLWithString:@""]; 446 447 return result; 448 } 449 450 - (NSData *)_web_originalData 451 { 452 UInt8 *buffer = (UInt8 *)malloc(URL_BYTES_BUFFER_LENGTH); 453 CFIndex bytesFilled = CFURLGetBytes((CFURLRef)self, buffer, URL_BYTES_BUFFER_LENGTH); 454 if (bytesFilled == -1) { 455 CFIndex bytesToAllocate = CFURLGetBytes((CFURLRef)self, NULL, 0); 456 buffer = (UInt8 *)realloc(buffer, bytesToAllocate); 457 bytesFilled = CFURLGetBytes((CFURLRef)self, buffer, bytesToAllocate); 458 ASSERT(bytesFilled == bytesToAllocate); 459 } 460 461 // buffer is adopted by the NSData 462 NSData *data = [NSData dataWithBytesNoCopy:buffer length:bytesFilled freeWhenDone:YES]; 463 464 NSURL *baseURL = (NSURL *)CFURLGetBaseURL((CFURLRef)self); 465 if (baseURL) 466 return [[NSURL _web_URLWithData:data relativeToURL:baseURL] _web_originalData]; 467 return data; 468 } 469 470 - (NSString *)_web_originalDataAsString 471 { 472 return [[[NSString alloc] initWithData:[self _web_originalData] encoding:NSISOLatin1StringEncoding] autorelease]; 473 } 474 475 static CFStringRef createStringWithEscapedUnsafeCharacters(CFStringRef string) 476 { 477 CFIndex length = CFStringGetLength(string); 478 Vector<UChar, 2048> sourceBuffer(length); 479 CFStringGetCharacters(string, CFRangeMake(0, length), sourceBuffer.data()); 480 481 Vector<UChar, 2048> outBuffer; 482 483 CFIndex i = 0; 484 while (i < length) { 485 UChar32 c; 486 U16_NEXT(sourceBuffer, i, length, c) 487 488 if (isLookalikeCharacter(c)) { 489 uint8_t utf8Buffer[4]; 490 CFIndex offset = 0; 491 UBool failure = false; 492 U8_APPEND(utf8Buffer, offset, 4, c, failure) 493 ASSERT(!failure); 494 495 for (CFIndex j = 0; j < offset; ++j) { 496 outBuffer.append('%'); 497 outBuffer.append(hexDigit(utf8Buffer[j] >> 4)); 498 outBuffer.append(hexDigit(utf8Buffer[j] & 0xf)); 499 } 500 } else { 501 UChar utf16Buffer[2]; 502 CFIndex offset = 0; 503 UBool failure = false; 504 U16_APPEND(utf16Buffer, offset, 2, c, failure) 505 ASSERT(!failure); 506 for (CFIndex j = 0; j < offset; ++j) 507 outBuffer.append(utf16Buffer[j]); 508 } 509 } 510 511 return CFStringCreateWithCharacters(NULL, outBuffer.data(), outBuffer.size()); 512 } 513 514 - (NSString *)_web_userVisibleString 515 { 516 NSData *data = [self _web_originalData]; 517 const unsigned char *before = static_cast<const unsigned char*>([data bytes]); 518 int length = [data length]; 519 520 bool needsHostNameDecoding = false; 521 522 const unsigned char *p = before; 523 int bufferLength = (length * 3) + 1; 524 char *after = static_cast<char *>(malloc(bufferLength)); // large enough to %-escape every character 525 char *q = after; 526 int i; 527 for (i = 0; i < length; i++) { 528 unsigned char c = p[i]; 529 // unescape escape sequences that indicate bytes greater than 0x7f 530 if (c == '%' && (i + 1 < length && isHexDigit(p[i + 1])) && i + 2 < length && isHexDigit(p[i + 2])) { 531 unsigned char u = (hexDigitValue(p[i + 1]) << 4) | hexDigitValue(p[i + 2]); 532 if (u > 0x7f) { 533 // unescape 534 *q++ = u; 535 } else { 536 // do not unescape 537 *q++ = p[i]; 538 *q++ = p[i + 1]; 539 *q++ = p[i + 2]; 540 } 541 i += 2; 542 } else { 543 *q++ = c; 544 545 // Check for "xn--" in an efficient, non-case-sensitive, way. 546 if (c == '-' && i >= 3 && !needsHostNameDecoding && (q[-4] | 0x20) == 'x' && (q[-3] | 0x20) == 'n' && q[-2] == '-') 547 needsHostNameDecoding = true; 548 } 549 } 550 *q = '\0'; 551 552 // Check string to see if it can be converted to display using UTF-8 553 NSString *result = [NSString stringWithUTF8String:after]; 554 if (!result) { 555 // Could not convert to UTF-8. 556 // Convert characters greater than 0x7f to escape sequences. 557 // Shift current string to the end of the buffer 558 // then we will copy back bytes to the start of the buffer 559 // as we convert. 560 int afterlength = q - after; 561 char *p = after + bufferLength - afterlength - 1; 562 memmove(p, after, afterlength + 1); // copies trailing '\0' 563 char *q = after; 564 while (*p) { 565 unsigned char c = *p; 566 if (c > 0x7f) { 567 *q++ = '%'; 568 *q++ = hexDigit(c >> 4); 569 *q++ = hexDigit(c & 0xf); 570 } else { 571 *q++ = *p; 572 } 573 p++; 574 } 575 *q = '\0'; 576 result = [NSString stringWithUTF8String:after]; 577 } 578 579 free(after); 580 581 result = mapHostNames(result, !needsHostNameDecoding); 582 result = [result precomposedStringWithCanonicalMapping]; 583 return WebCFAutorelease(createStringWithEscapedUnsafeCharacters((CFStringRef)result)); 584 } 585 586 - (BOOL)_web_isEmpty 587 { 588 if (!CFURLGetBaseURL((CFURLRef)self)) 589 return CFURLGetBytes((CFURLRef)self, NULL, 0) == 0; 590 return [[self _web_originalData] length] == 0; 591 } 592 593 - (const char *)_web_URLCString 594 { 595 NSMutableData *data = [NSMutableData data]; 596 [data appendData:[self _web_originalData]]; 597 [data appendBytes:"\0" length:1]; 598 return (const char *)[data bytes]; 599 } 600 601 - (NSURL *)_webkit_canonicalize 602 { 603 NSURLRequest *request = [[NSURLRequest alloc] initWithURL:self]; 604 Class concreteClass = WKNSURLProtocolClassForRequest(request); 605 if (!concreteClass) { 606 [request release]; 607 return self; 608 } 609 610 // This applies NSURL's concept of canonicalization, but not KURL's concept. It would 611 // make sense to apply both, but when we tried that it caused a performance degradation 612 // (see 5315926). It might make sense to apply only the KURL concept and not the NSURL 613 // concept, but it's too risky to make that change for WebKit 3.0. 614 NSURLRequest *newRequest = [concreteClass canonicalRequestForRequest:request]; 615 NSURL *newURL = [newRequest URL]; 616 NSURL *result = [[newURL retain] autorelease]; 617 [request release]; 618 619 return result; 620 } 621 622 - (NSURL *)_web_URLByTruncatingOneCharacterBeforeComponent:(CFURLComponentType)component 623 { 624 CFRange fragRg = CFURLGetByteRangeForComponent((CFURLRef)self, component, NULL); 625 if (fragRg.location == kCFNotFound) 626 return self; 627 628 UInt8 *urlBytes, buffer[2048]; 629 CFIndex numBytes = CFURLGetBytes((CFURLRef)self, buffer, 2048); 630 if (numBytes == -1) { 631 numBytes = CFURLGetBytes((CFURLRef)self, NULL, 0); 632 urlBytes = static_cast<UInt8*>(malloc(numBytes)); 633 CFURLGetBytes((CFURLRef)self, urlBytes, numBytes); 634 } else 635 urlBytes = buffer; 636 637 NSURL *result = (NSURL *)CFMakeCollectable(CFURLCreateWithBytes(NULL, urlBytes, fragRg.location - 1, kCFStringEncodingUTF8, NULL)); 638 if (!result) 639 result = (NSURL *)CFMakeCollectable(CFURLCreateWithBytes(NULL, urlBytes, fragRg.location - 1, kCFStringEncodingISOLatin1, NULL)); 640 641 if (urlBytes != buffer) free(urlBytes); 642 return result ? [result autorelease] : self; 643 } 644 645 - (NSURL *)_webkit_URLByRemovingFragment 646 { 647 return [self _web_URLByTruncatingOneCharacterBeforeComponent:kCFURLComponentFragment]; 648 } 649 650 - (NSURL *)_webkit_URLByRemovingResourceSpecifier 651 { 652 return [self _web_URLByTruncatingOneCharacterBeforeComponent:kCFURLComponentResourceSpecifier]; 653 } 654 655 - (NSURL *)_web_URLByRemovingComponentAndSubsequentCharacter:(CFURLComponentType)component 656 { 657 CFRange range = CFURLGetByteRangeForComponent((CFURLRef)self, component, 0); 658 if (range.location == kCFNotFound) 659 return self; 660 661 // Remove one subsequent character. 662 ++range.length; 663 664 UInt8* urlBytes; 665 UInt8 buffer[2048]; 666 CFIndex numBytes = CFURLGetBytes((CFURLRef)self, buffer, 2048); 667 if (numBytes == -1) { 668 numBytes = CFURLGetBytes((CFURLRef)self, NULL, 0); 669 urlBytes = static_cast<UInt8*>(malloc(numBytes)); 670 CFURLGetBytes((CFURLRef)self, urlBytes, numBytes); 671 } else 672 urlBytes = buffer; 673 674 if (numBytes < range.location) 675 return self; 676 if (numBytes < range.location + range.length) 677 range.length = numBytes - range.location; 678 679 memmove(urlBytes + range.location, urlBytes + range.location + range.length, numBytes - range.location + range.length); 680 681 NSURL *result = (NSURL *)CFMakeCollectable(CFURLCreateWithBytes(NULL, urlBytes, numBytes - range.length, kCFStringEncodingUTF8, NULL)); 682 if (!result) 683 result = (NSURL *)CFMakeCollectable(CFURLCreateWithBytes(NULL, urlBytes, numBytes - range.length, kCFStringEncodingISOLatin1, NULL)); 684 685 if (urlBytes != buffer) 686 free(urlBytes); 687 688 return result ? [result autorelease] : self; 689 } 690 691 - (NSURL *)_web_URLByRemovingUserInfo 692 { 693 return [self _web_URLByRemovingComponentAndSubsequentCharacter:kCFURLComponentUserInfo]; 694 } 695 696 - (BOOL)_webkit_isJavaScriptURL 697 { 698 return [[self _web_originalDataAsString] _webkit_isJavaScriptURL]; 699 } 700 701 - (NSString *)_webkit_scriptIfJavaScriptURL 702 { 703 return [[self absoluteString] _webkit_scriptIfJavaScriptURL]; 704 } 705 706 - (BOOL)_webkit_isFileURL 707 { 708 return [[self _web_originalDataAsString] _webkit_isFileURL]; 709 } 710 711 - (BOOL)_webkit_isFTPDirectoryURL 712 { 713 return [[self _web_originalDataAsString] _webkit_isFTPDirectoryURL]; 714 } 715 716 - (BOOL)_webkit_shouldLoadAsEmptyDocument 717 { 718 return [[self _web_originalDataAsString] _webkit_hasCaseInsensitivePrefix:@"about:"] || [self _web_isEmpty]; 719 } 720 721 - (NSURL *)_web_URLWithLowercasedScheme 722 { 723 CFRange range; 724 CFURLGetByteRangeForComponent((CFURLRef)self, kCFURLComponentScheme, &range); 725 if (range.location == kCFNotFound) { 726 return self; 727 } 728 729 UInt8 static_buffer[URL_BYTES_BUFFER_LENGTH]; 730 UInt8 *buffer = static_buffer; 731 CFIndex bytesFilled = CFURLGetBytes((CFURLRef)self, buffer, URL_BYTES_BUFFER_LENGTH); 732 if (bytesFilled == -1) { 733 CFIndex bytesToAllocate = CFURLGetBytes((CFURLRef)self, NULL, 0); 734 buffer = static_cast<UInt8 *>(malloc(bytesToAllocate)); 735 bytesFilled = CFURLGetBytes((CFURLRef)self, buffer, bytesToAllocate); 736 ASSERT(bytesFilled == bytesToAllocate); 737 } 738 739 int i; 740 BOOL changed = NO; 741 for (i = 0; i < range.length; ++i) { 742 char c = buffer[range.location + i]; 743 char lower = toASCIILower(c); 744 if (c != lower) { 745 buffer[range.location + i] = lower; 746 changed = YES; 747 } 748 } 749 750 NSURL *result = changed 751 ? (NSURL *)WebCFAutorelease(CFURLCreateAbsoluteURLWithBytes(NULL, buffer, bytesFilled, kCFStringEncodingUTF8, nil, YES)) 752 : (NSURL *)self; 753 754 if (buffer != static_buffer) { 755 free(buffer); 756 } 757 758 return result; 759 } 760 761 762 -(BOOL)_web_hasQuestionMarkOnlyQueryString 763 { 764 CFRange rangeWithSeparators; 765 CFURLGetByteRangeForComponent((CFURLRef)self, kCFURLComponentQuery, &rangeWithSeparators); 766 if (rangeWithSeparators.location != kCFNotFound && rangeWithSeparators.length == 1) { 767 return YES; 768 } 769 return NO; 770 } 771 772 -(NSData *)_web_schemeSeparatorWithoutColon 773 { 774 NSData *result = nil; 775 CFRange rangeWithSeparators; 776 CFRange range = CFURLGetByteRangeForComponent((CFURLRef)self, kCFURLComponentScheme, &rangeWithSeparators); 777 if (rangeWithSeparators.location != kCFNotFound) { 778 NSString *absoluteString = [self absoluteString]; 779 NSRange separatorsRange = NSMakeRange(range.location + range.length + 1, rangeWithSeparators.length - range.length - 1); 780 if (separatorsRange.location + separatorsRange.length <= [absoluteString length]) { 781 NSString *slashes = [absoluteString substringWithRange:separatorsRange]; 782 result = [slashes dataUsingEncoding:NSISOLatin1StringEncoding]; 783 } 784 } 785 return result; 786 } 787 788 #define completeURL (CFURLComponentType)-1 789 790 -(NSData *)_web_dataForURLComponentType:(CFURLComponentType)componentType 791 { 792 static int URLComponentTypeBufferLength = 2048; 793 794 UInt8 staticAllBytesBuffer[URLComponentTypeBufferLength]; 795 UInt8 *allBytesBuffer = staticAllBytesBuffer; 796 797 CFIndex bytesFilled = CFURLGetBytes((CFURLRef)self, allBytesBuffer, URLComponentTypeBufferLength); 798 if (bytesFilled == -1) { 799 CFIndex bytesToAllocate = CFURLGetBytes((CFURLRef)self, NULL, 0); 800 allBytesBuffer = static_cast<UInt8 *>(malloc(bytesToAllocate)); 801 bytesFilled = CFURLGetBytes((CFURLRef)self, allBytesBuffer, bytesToAllocate); 802 } 803 804 CFRange range; 805 if (componentType != completeURL) { 806 range = CFURLGetByteRangeForComponent((CFURLRef)self, componentType, NULL); 807 if (range.location == kCFNotFound) { 808 return nil; 809 } 810 } 811 else { 812 range.location = 0; 813 range.length = bytesFilled; 814 } 815 816 NSData *componentData = [NSData dataWithBytes:allBytesBuffer + range.location length:range.length]; 817 818 const unsigned char *bytes = static_cast<const unsigned char *>([componentData bytes]); 819 NSMutableData *resultData = [NSMutableData data]; 820 // NOTE: add leading '?' to query strings non-zero length query strings. 821 // NOTE: retain question-mark only query strings. 822 if (componentType == kCFURLComponentQuery) { 823 if (range.length > 0 || [self _web_hasQuestionMarkOnlyQueryString]) { 824 [resultData appendBytes:"?" length:1]; 825 } 826 } 827 int i; 828 for (i = 0; i < range.length; i++) { 829 unsigned char c = bytes[i]; 830 if (c <= 0x20 || c >= 0x7f) { 831 char escaped[3]; 832 escaped[0] = '%'; 833 escaped[1] = hexDigit(c >> 4); 834 escaped[2] = hexDigit(c & 0xf); 835 [resultData appendBytes:escaped length:3]; 836 } 837 else { 838 char b[1]; 839 b[0] = c; 840 [resultData appendBytes:b length:1]; 841 } 842 } 843 844 if (staticAllBytesBuffer != allBytesBuffer) { 845 free(allBytesBuffer); 846 } 847 848 return resultData; 849 } 850 851 -(NSData *)_web_schemeData 852 { 853 return [self _web_dataForURLComponentType:kCFURLComponentScheme]; 854 } 855 856 -(NSData *)_web_hostData 857 { 858 NSData *result = [self _web_dataForURLComponentType:kCFURLComponentHost]; 859 NSData *scheme = [self _web_schemeData]; 860 // Take off localhost for file 861 if ([scheme _web_isCaseInsensitiveEqualToCString:"file"]) { 862 return ([result _web_isCaseInsensitiveEqualToCString:"localhost"]) ? nil : result; 863 } 864 return result; 865 } 866 867 - (NSString *)_web_hostString 868 { 869 NSData *data = [self _web_hostData]; 870 if (!data) { 871 data = [NSData data]; 872 } 873 return [[[NSString alloc] initWithData:[self _web_hostData] encoding:NSUTF8StringEncoding] autorelease]; 874 } 875 876 - (NSString *)_webkit_suggestedFilenameWithMIMEType:(NSString *)MIMEType 877 { 878 return suggestedFilenameWithMIMEType(self, MIMEType); 879 } 880 881 @end 882 883 @implementation NSString (WebNSURLExtras) 884 885 - (BOOL)_web_isUserVisibleURL 886 { 887 BOOL valid = YES; 888 // get buffer 889 890 char static_buffer[1024]; 891 const char *p; 892 BOOL success = CFStringGetCString((CFStringRef)self, static_buffer, 1023, kCFStringEncodingUTF8); 893 if (success) { 894 p = static_buffer; 895 } else { 896 p = [self UTF8String]; 897 } 898 899 int length = strlen(p); 900 901 // check for characters <= 0x20 or >=0x7f, %-escape sequences of %7f, and xn--, these 902 // are the things that will lead _web_userVisibleString to actually change things. 903 int i; 904 for (i = 0; i < length; i++) { 905 unsigned char c = p[i]; 906 // escape control characters, space, and delete 907 if (c <= 0x20 || c == 0x7f) { 908 valid = NO; 909 break; 910 } else if (c == '%' && (i + 1 < length && isHexDigit(p[i + 1])) && i + 2 < length && isHexDigit(p[i + 2])) { 911 unsigned char u = (hexDigitValue(p[i + 1]) << 4) | hexDigitValue(p[i + 2]); 912 if (u > 0x7f) { 913 valid = NO; 914 break; 915 } 916 i += 2; 917 } else { 918 // Check for "xn--" in an efficient, non-case-sensitive, way. 919 if (c == '-' && i >= 3 && (p[i - 3] | 0x20) == 'x' && (p[i - 2] | 0x20) == 'n' && p[i - 1] == '-') { 920 valid = NO; 921 break; 922 } 923 } 924 } 925 926 return valid; 927 } 928 929 930 - (BOOL)_webkit_isJavaScriptURL 931 { 932 return [self _webkit_hasCaseInsensitivePrefix:@"javascript:"]; 933 } 934 935 - (BOOL)_webkit_isFileURL 936 { 937 return [self rangeOfString:@"file:" options:(NSCaseInsensitiveSearch | NSAnchoredSearch)].location != NSNotFound; 938 } 939 940 - (NSString *)_webkit_stringByReplacingValidPercentEscapes 941 { 942 return decodeURLEscapeSequences(self); 943 } 944 945 - (NSString *)_webkit_scriptIfJavaScriptURL 946 { 947 if (![self _webkit_isJavaScriptURL]) { 948 return nil; 949 } 950 return [[self substringFromIndex:11] _webkit_stringByReplacingValidPercentEscapes]; 951 } 952 953 - (BOOL)_webkit_isFTPDirectoryURL 954 { 955 int length = [self length]; 956 if (length < 5) { // 5 is length of "ftp:/" 957 return NO; 958 } 959 unichar lastChar = [self characterAtIndex:length - 1]; 960 return lastChar == '/' && [self _webkit_hasCaseInsensitivePrefix:@"ftp:"]; 961 } 962 963 964 static BOOL readIDNScriptWhiteListFile(NSString *filename) 965 { 966 if (!filename) { 967 return NO; 968 } 969 FILE *file = fopen([filename fileSystemRepresentation], "r"); 970 if (file == NULL) { 971 return NO; 972 } 973 974 // Read a word at a time. 975 // Allow comments, starting with # character to the end of the line. 976 while (1) { 977 // Skip a comment if present. 978 int result = fscanf(file, " #%*[^\n\r]%*[\n\r]"); 979 if (result == EOF) { 980 break; 981 } 982 983 // Read a script name if present. 984 char word[33]; 985 result = fscanf(file, " %32[^# \t\n\r]%*[^# \t\n\r] ", word); 986 if (result == EOF) { 987 break; 988 } 989 if (result == 1) { 990 // Got a word, map to script code and put it into the array. 991 int32_t script = u_getPropertyValueEnum(UCHAR_SCRIPT, word); 992 if (script >= 0 && script < USCRIPT_CODE_LIMIT) { 993 size_t index = script / 32; 994 uint32_t mask = 1 << (script % 32); 995 IDNScriptWhiteList[index] |= mask; 996 } 997 } 998 } 999 fclose(file); 1000 return YES; 1001 } 1002 1003 static void readIDNScriptWhiteList(void) 1004 { 1005 // Read white list from library. 1006 NSArray *dirs = NSSearchPathForDirectoriesInDomains(NSLibraryDirectory, NSAllDomainsMask, YES); 1007 int i, numDirs = [dirs count]; 1008 for (i = 0; i < numDirs; i++) { 1009 NSString *dir = [dirs objectAtIndex:i]; 1010 if (readIDNScriptWhiteListFile([dir stringByAppendingPathComponent:@"IDNScriptWhiteList.txt"])) { 1011 return; 1012 } 1013 } 1014 1015 // Fall back on white list inside bundle. 1016 NSBundle *bundle = [NSBundle bundleWithIdentifier:@"com.apple.WebKit"]; 1017 readIDNScriptWhiteListFile([bundle pathForResource:@"IDNScriptWhiteList" ofType:@"txt"]); 1018 } 1019 1020 static BOOL allCharactersInIDNScriptWhiteList(const UChar *buffer, int32_t length) 1021 { 1022 pthread_once(&IDNScriptWhiteListFileRead, readIDNScriptWhiteList); 1023 1024 int32_t i = 0; 1025 while (i < length) { 1026 UChar32 c; 1027 U16_NEXT(buffer, i, length, c) 1028 UErrorCode error = U_ZERO_ERROR; 1029 UScriptCode script = uscript_getScript(c, &error); 1030 if (error != U_ZERO_ERROR) { 1031 LOG_ERROR("got ICU error while trying to look at scripts: %d", error); 1032 return NO; 1033 } 1034 if (script < 0) { 1035 LOG_ERROR("got negative number for script code from ICU: %d", script); 1036 return NO; 1037 } 1038 if (script >= USCRIPT_CODE_LIMIT) { 1039 return NO; 1040 } 1041 size_t index = script / 32; 1042 uint32_t mask = 1 << (script % 32); 1043 if (!(IDNScriptWhiteList[index] & mask)) { 1044 return NO; 1045 } 1046 1047 if (isLookalikeCharacter(c)) 1048 return NO; 1049 } 1050 return YES; 1051 } 1052 1053 static BOOL allCharactersAllowedByTLDRules(const UChar* buffer, int32_t length) 1054 { 1055 // Skip trailing dot for root domain. 1056 if (buffer[length - 1] == '.') 1057 --length; 1058 1059 if (length > 3 1060 && buffer[length - 3] == '.' 1061 && buffer[length - 2] == 0x0440 // CYRILLIC SMALL LETTER ER 1062 && buffer[length - 1] == 0x0444) // CYRILLIC SMALL LETTER EF 1063 { 1064 // Rules defined by <http://www.cctld.ru/ru/docs/rulesrf.php>. This code only checks requirements that matter for presentation purposes. 1065 for (int32_t i = length - 4; i; --i) { 1066 UChar ch = buffer[i]; 1067 1068 // Only modern Russian letters, digits and dashes are allowed. 1069 if ((ch >= 0x0430 && ch <= 0x044f) 1070 || ch == 0x0451 1071 || (ch >= '0' && ch <= '9') 1072 || ch == '-') 1073 continue; 1074 1075 // Only check top level domain. Lower level registrars may have different rules. 1076 if (ch == '.') 1077 break; 1078 1079 return NO; 1080 } 1081 return YES; 1082 } 1083 1084 // Not a known top level domain with special rules. 1085 return NO; 1086 } 1087 1088 // Return value of nil means no mapping is necessary. 1089 // If makeString is NO, then return value is either nil or self to indicate mapping is necessary. 1090 // If makeString is YES, then return value is either nil or the mapped string. 1091 - (NSString *)_web_mapHostNameWithRange:(NSRange)range encode:(BOOL)encode makeString:(BOOL)makeString 1092 { 1093 if (range.length > HOST_NAME_BUFFER_LENGTH) { 1094 return nil; 1095 } 1096 1097 if ([self length] == 0) 1098 return nil; 1099 1100 UChar sourceBuffer[HOST_NAME_BUFFER_LENGTH]; 1101 UChar destinationBuffer[HOST_NAME_BUFFER_LENGTH]; 1102 1103 NSString *string = self; 1104 if (encode && [self rangeOfString:@"%" options:NSLiteralSearch range:range].location != NSNotFound) { 1105 NSString *substring = [self substringWithRange:range]; 1106 substring = WebCFAutorelease(CFURLCreateStringByReplacingPercentEscapes(NULL, (CFStringRef)substring, CFSTR(""))); 1107 if (substring != nil) { 1108 string = substring; 1109 range = NSMakeRange(0, [string length]); 1110 } 1111 } 1112 1113 int length = range.length; 1114 [string getCharacters:sourceBuffer range:range]; 1115 1116 UErrorCode error = U_ZERO_ERROR; 1117 int32_t numCharactersConverted = (encode ? uidna_IDNToASCII : uidna_IDNToUnicode) 1118 (sourceBuffer, length, destinationBuffer, HOST_NAME_BUFFER_LENGTH, UIDNA_ALLOW_UNASSIGNED, NULL, &error); 1119 if (error != U_ZERO_ERROR) { 1120 return nil; 1121 } 1122 if (numCharactersConverted == length && memcmp(sourceBuffer, destinationBuffer, length * sizeof(UChar)) == 0) { 1123 return nil; 1124 } 1125 if (!encode && !allCharactersInIDNScriptWhiteList(destinationBuffer, numCharactersConverted) && !allCharactersAllowedByTLDRules(destinationBuffer, numCharactersConverted)) { 1126 return nil; 1127 } 1128 return makeString ? (NSString *)[NSString stringWithCharacters:destinationBuffer length:numCharactersConverted] : (NSString *)self; 1129 } 1130 1131 - (BOOL)_web_hostNameNeedsDecodingWithRange:(NSRange)range 1132 { 1133 return [self _web_mapHostNameWithRange:range encode:NO makeString:NO] != nil; 1134 } 1135 1136 - (BOOL)_web_hostNameNeedsEncodingWithRange:(NSRange)range 1137 { 1138 return [self _web_mapHostNameWithRange:range encode:YES makeString:NO] != nil; 1139 } 1140 1141 - (NSString *)_web_decodeHostNameWithRange:(NSRange)range 1142 { 1143 return [self _web_mapHostNameWithRange:range encode:NO makeString:YES]; 1144 } 1145 1146 - (NSString *)_web_encodeHostNameWithRange:(NSRange)range 1147 { 1148 return [self _web_mapHostNameWithRange:range encode:YES makeString:YES]; 1149 } 1150 1151 - (NSString *)_web_decodeHostName 1152 { 1153 NSString *name = [self _web_mapHostNameWithRange:NSMakeRange(0, [self length]) encode:NO makeString:YES]; 1154 return name == nil ? self : name; 1155 } 1156 1157 - (NSString *)_web_encodeHostName 1158 { 1159 NSString *name = [self _web_mapHostNameWithRange:NSMakeRange(0, [self length]) encode:YES makeString:YES]; 1160 return name == nil ? self : name; 1161 } 1162 1163 -(NSRange)_webkit_rangeOfURLScheme 1164 { 1165 NSRange colon = [self rangeOfString:@":"]; 1166 if (colon.location != NSNotFound && colon.location > 0) { 1167 NSRange scheme = {0, colon.location}; 1168 static NSCharacterSet *InverseSchemeCharacterSet = nil; 1169 if (!InverseSchemeCharacterSet) { 1170 /* 1171 This stuff is very expensive. 10-15 msec on a 2x1.2GHz. If not cached it swamps 1172 everything else when adding items to the autocomplete DB. Makes me wonder if we 1173 even need to enforce the character set here. 1174 */ 1175 NSString *acceptableCharacters = @"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+.-"; 1176 InverseSchemeCharacterSet = [[[NSCharacterSet characterSetWithCharactersInString:acceptableCharacters] invertedSet] retain]; 1177 } 1178 NSRange illegals = [self rangeOfCharacterFromSet:InverseSchemeCharacterSet options:0 range:scheme]; 1179 if (illegals.location == NSNotFound) 1180 return scheme; 1181 } 1182 return NSMakeRange(NSNotFound, 0); 1183 } 1184 1185 -(BOOL)_webkit_looksLikeAbsoluteURL 1186 { 1187 // Trim whitespace because _web_URLWithString allows whitespace. 1188 return [[self _webkit_stringByTrimmingWhitespace] _webkit_rangeOfURLScheme].location != NSNotFound; 1189 } 1190 1191 - (NSString *)_webkit_URLFragment 1192 { 1193 NSRange fragmentRange; 1194 1195 fragmentRange = [self rangeOfString:@"#" options:NSLiteralSearch]; 1196 if (fragmentRange.location == NSNotFound) 1197 return nil; 1198 return [self substringFromIndex:fragmentRange.location + 1]; 1199 } 1200 1201 @end 1202