1 /* 2 * Copyright (C) 2004, 2007, 2008 Apple Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY 14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR 17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26 #include "config.h" 27 28 #if !USE(GOOGLEURL) 29 30 #include "KURL.h" 31 32 #include "CString.h" 33 #include "StringHash.h" 34 #include "TextEncoding.h" 35 #include <wtf/HashMap.h> 36 #include <wtf/StdLibExtras.h> 37 38 #if USE(ICU_UNICODE) 39 #include <unicode/uidna.h> 40 #elif USE(QT4_UNICODE) 41 #include <QUrl> 42 #elif USE(GLIB_UNICODE) 43 #include <glib.h> 44 #include <wtf/gtk/GOwnPtr.h> 45 #endif 46 47 #include <stdio.h> 48 49 using namespace std; 50 using namespace WTF; 51 52 namespace WebCore { 53 54 typedef Vector<char, 512> CharBuffer; 55 typedef Vector<UChar, 512> UCharBuffer; 56 57 // FIXME: This file makes too much use of the + operator on String. 58 // We either have to optimize that operator so it doesn't involve 59 // so many allocations, or change this to use Vector<UChar> instead. 60 61 enum URLCharacterClasses { 62 // alpha 63 SchemeFirstChar = 1 << 0, 64 65 // ( alpha | digit | "+" | "-" | "." ) 66 SchemeChar = 1 << 1, 67 68 // mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")" 69 // unreserved = alphanum | mark 70 // ( unreserved | escaped | ";" | ":" | "&" | "=" | "+" | "$" | "," ) 71 UserInfoChar = 1 << 2, 72 73 // alnum | "." | "-" | "%" 74 // The above is what the specification says, but we are lenient to 75 // match existing practice and also allow: 76 // "_" 77 HostnameChar = 1 << 3, 78 79 // hexdigit | ":" | "%" 80 IPv6Char = 1 << 4, 81 82 // "#" | "?" | "/" | nul 83 PathSegmentEndChar = 1 << 5, 84 85 // not allowed in path 86 BadChar = 1 << 6 87 }; 88 89 static const char hexDigits[17] = "0123456789ABCDEF"; 90 91 static const unsigned char characterClassTable[256] = { 92 /* 0 nul */ PathSegmentEndChar, /* 1 soh */ BadChar, 93 /* 2 stx */ BadChar, /* 3 etx */ BadChar, 94 /* 4 eot */ BadChar, /* 5 enq */ BadChar, /* 6 ack */ BadChar, /* 7 bel */ BadChar, 95 /* 8 bs */ BadChar, /* 9 ht */ BadChar, /* 10 nl */ BadChar, /* 11 vt */ BadChar, 96 /* 12 np */ BadChar, /* 13 cr */ BadChar, /* 14 so */ BadChar, /* 15 si */ BadChar, 97 /* 16 dle */ BadChar, /* 17 dc1 */ BadChar, /* 18 dc2 */ BadChar, /* 19 dc3 */ BadChar, 98 /* 20 dc4 */ BadChar, /* 21 nak */ BadChar, /* 22 syn */ BadChar, /* 23 etb */ BadChar, 99 /* 24 can */ BadChar, /* 25 em */ BadChar, /* 26 sub */ BadChar, /* 27 esc */ BadChar, 100 /* 28 fs */ BadChar, /* 29 gs */ BadChar, /* 30 rs */ BadChar, /* 31 us */ BadChar, 101 /* 32 sp */ BadChar, /* 33 ! */ UserInfoChar, 102 /* 34 " */ BadChar, /* 35 # */ PathSegmentEndChar | BadChar, 103 /* 36 $ */ UserInfoChar, /* 37 % */ UserInfoChar | HostnameChar | IPv6Char | BadChar, 104 /* 38 & */ UserInfoChar, /* 39 ' */ UserInfoChar, 105 /* 40 ( */ UserInfoChar, /* 41 ) */ UserInfoChar, 106 /* 42 * */ UserInfoChar, /* 43 + */ SchemeChar | UserInfoChar, 107 /* 44 , */ UserInfoChar, 108 /* 45 - */ SchemeChar | UserInfoChar | HostnameChar, 109 /* 46 . */ SchemeChar | UserInfoChar | HostnameChar | IPv6Char, 110 /* 47 / */ PathSegmentEndChar, 111 /* 48 0 */ SchemeChar | UserInfoChar | HostnameChar | IPv6Char, 112 /* 49 1 */ SchemeChar | UserInfoChar | HostnameChar | IPv6Char, 113 /* 50 2 */ SchemeChar | UserInfoChar | HostnameChar | IPv6Char, 114 /* 51 3 */ SchemeChar | UserInfoChar | HostnameChar | IPv6Char, 115 /* 52 4 */ SchemeChar | UserInfoChar | HostnameChar | IPv6Char, 116 /* 53 5 */ SchemeChar | UserInfoChar | HostnameChar | IPv6Char, 117 /* 54 6 */ SchemeChar | UserInfoChar | HostnameChar | IPv6Char, 118 /* 55 7 */ SchemeChar | UserInfoChar | HostnameChar | IPv6Char, 119 /* 56 8 */ SchemeChar | UserInfoChar | HostnameChar | IPv6Char, 120 /* 57 9 */ SchemeChar | UserInfoChar | HostnameChar | IPv6Char, 121 /* 58 : */ UserInfoChar | IPv6Char, /* 59 ; */ UserInfoChar, 122 /* 60 < */ BadChar, /* 61 = */ UserInfoChar, 123 /* 62 > */ BadChar, /* 63 ? */ PathSegmentEndChar | BadChar, 124 /* 64 @ */ 0, 125 /* 65 A */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar | IPv6Char, 126 /* 66 B */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar | IPv6Char, 127 /* 67 C */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar | IPv6Char, 128 /* 68 D */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar | IPv6Char, 129 /* 69 E */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar | IPv6Char, 130 /* 70 F */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar | IPv6Char, 131 /* 71 G */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar, 132 /* 72 H */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar, 133 /* 73 I */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar, 134 /* 74 J */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar, 135 /* 75 K */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar, 136 /* 76 L */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar, 137 /* 77 M */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar, 138 /* 78 N */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar, 139 /* 79 O */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar, 140 /* 80 P */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar, 141 /* 81 Q */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar, 142 /* 82 R */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar, 143 /* 83 S */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar, 144 /* 84 T */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar, 145 /* 85 U */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar, 146 /* 86 V */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar, 147 /* 87 W */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar, 148 /* 88 X */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar, 149 /* 89 Y */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar, 150 /* 90 Z */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar, 151 /* 91 [ */ 0, 152 /* 92 \ */ 0, /* 93 ] */ 0, 153 /* 94 ^ */ 0, 154 /* 95 _ */ UserInfoChar | HostnameChar, 155 /* 96 ` */ 0, 156 /* 97 a */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar | IPv6Char, 157 /* 98 b */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar | IPv6Char, 158 /* 99 c */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar | IPv6Char, 159 /* 100 d */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar | IPv6Char, 160 /* 101 e */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar | IPv6Char, 161 /* 102 f */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar | IPv6Char, 162 /* 103 g */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar, 163 /* 104 h */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar, 164 /* 105 i */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar, 165 /* 106 j */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar, 166 /* 107 k */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar, 167 /* 108 l */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar, 168 /* 109 m */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar, 169 /* 110 n */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar, 170 /* 111 o */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar, 171 /* 112 p */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar, 172 /* 113 q */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar, 173 /* 114 r */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar, 174 /* 115 s */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar, 175 /* 116 t */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar, 176 /* 117 u */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar, 177 /* 118 v */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar, 178 /* 119 w */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar, 179 /* 120 x */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar, 180 /* 121 y */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar, 181 /* 122 z */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar, 182 /* 123 { */ 0, 183 /* 124 | */ 0, /* 125 } */ 0, /* 126 ~ */ UserInfoChar, /* 127 del */ BadChar, 184 /* 128 */ BadChar, /* 129 */ BadChar, /* 130 */ BadChar, /* 131 */ BadChar, 185 /* 132 */ BadChar, /* 133 */ BadChar, /* 134 */ BadChar, /* 135 */ BadChar, 186 /* 136 */ BadChar, /* 137 */ BadChar, /* 138 */ BadChar, /* 139 */ BadChar, 187 /* 140 */ BadChar, /* 141 */ BadChar, /* 142 */ BadChar, /* 143 */ BadChar, 188 /* 144 */ BadChar, /* 145 */ BadChar, /* 146 */ BadChar, /* 147 */ BadChar, 189 /* 148 */ BadChar, /* 149 */ BadChar, /* 150 */ BadChar, /* 151 */ BadChar, 190 /* 152 */ BadChar, /* 153 */ BadChar, /* 154 */ BadChar, /* 155 */ BadChar, 191 /* 156 */ BadChar, /* 157 */ BadChar, /* 158 */ BadChar, /* 159 */ BadChar, 192 /* 160 */ BadChar, /* 161 */ BadChar, /* 162 */ BadChar, /* 163 */ BadChar, 193 /* 164 */ BadChar, /* 165 */ BadChar, /* 166 */ BadChar, /* 167 */ BadChar, 194 /* 168 */ BadChar, /* 169 */ BadChar, /* 170 */ BadChar, /* 171 */ BadChar, 195 /* 172 */ BadChar, /* 173 */ BadChar, /* 174 */ BadChar, /* 175 */ BadChar, 196 /* 176 */ BadChar, /* 177 */ BadChar, /* 178 */ BadChar, /* 179 */ BadChar, 197 /* 180 */ BadChar, /* 181 */ BadChar, /* 182 */ BadChar, /* 183 */ BadChar, 198 /* 184 */ BadChar, /* 185 */ BadChar, /* 186 */ BadChar, /* 187 */ BadChar, 199 /* 188 */ BadChar, /* 189 */ BadChar, /* 190 */ BadChar, /* 191 */ BadChar, 200 /* 192 */ BadChar, /* 193 */ BadChar, /* 194 */ BadChar, /* 195 */ BadChar, 201 /* 196 */ BadChar, /* 197 */ BadChar, /* 198 */ BadChar, /* 199 */ BadChar, 202 /* 200 */ BadChar, /* 201 */ BadChar, /* 202 */ BadChar, /* 203 */ BadChar, 203 /* 204 */ BadChar, /* 205 */ BadChar, /* 206 */ BadChar, /* 207 */ BadChar, 204 /* 208 */ BadChar, /* 209 */ BadChar, /* 210 */ BadChar, /* 211 */ BadChar, 205 /* 212 */ BadChar, /* 213 */ BadChar, /* 214 */ BadChar, /* 215 */ BadChar, 206 /* 216 */ BadChar, /* 217 */ BadChar, /* 218 */ BadChar, /* 219 */ BadChar, 207 /* 220 */ BadChar, /* 221 */ BadChar, /* 222 */ BadChar, /* 223 */ BadChar, 208 /* 224 */ BadChar, /* 225 */ BadChar, /* 226 */ BadChar, /* 227 */ BadChar, 209 /* 228 */ BadChar, /* 229 */ BadChar, /* 230 */ BadChar, /* 231 */ BadChar, 210 /* 232 */ BadChar, /* 233 */ BadChar, /* 234 */ BadChar, /* 235 */ BadChar, 211 /* 236 */ BadChar, /* 237 */ BadChar, /* 238 */ BadChar, /* 239 */ BadChar, 212 /* 240 */ BadChar, /* 241 */ BadChar, /* 242 */ BadChar, /* 243 */ BadChar, 213 /* 244 */ BadChar, /* 245 */ BadChar, /* 246 */ BadChar, /* 247 */ BadChar, 214 /* 248 */ BadChar, /* 249 */ BadChar, /* 250 */ BadChar, /* 251 */ BadChar, 215 /* 252 */ BadChar, /* 253 */ BadChar, /* 254 */ BadChar, /* 255 */ BadChar 216 }; 217 218 static int copyPathRemovingDots(char* dst, const char* src, int srcStart, int srcEnd); 219 static void encodeRelativeString(const String& rel, const TextEncoding&, CharBuffer& ouput); 220 static String substituteBackslashes(const String&); 221 static bool isValidProtocol(const String&); 222 223 static inline bool isSchemeFirstChar(char c) { return characterClassTable[static_cast<unsigned char>(c)] & SchemeFirstChar; } 224 static inline bool isSchemeFirstChar(UChar c) { return c <= 0xff && (characterClassTable[c] & SchemeFirstChar); } 225 static inline bool isSchemeChar(char c) { return characterClassTable[static_cast<unsigned char>(c)] & SchemeChar; } 226 static inline bool isSchemeChar(UChar c) { return c <= 0xff && (characterClassTable[c] & SchemeChar); } 227 static inline bool isUserInfoChar(unsigned char c) { return characterClassTable[c] & UserInfoChar; } 228 static inline bool isHostnameChar(unsigned char c) { return characterClassTable[c] & HostnameChar; } 229 static inline bool isIPv6Char(unsigned char c) { return characterClassTable[c] & IPv6Char; } 230 static inline bool isPathSegmentEndChar(char c) { return characterClassTable[static_cast<unsigned char>(c)] & PathSegmentEndChar; } 231 static inline bool isPathSegmentEndChar(UChar c) { return c <= 0xff && (characterClassTable[c] & PathSegmentEndChar); } 232 static inline bool isBadChar(unsigned char c) { return characterClassTable[c] & BadChar; } 233 234 static inline int hexDigitValue(UChar c) 235 { 236 ASSERT(isASCIIHexDigit(c)); 237 if (c < 'A') 238 return c - '0'; 239 return (c - 'A' + 10) & 0xF; // handle both upper and lower case without a branch 240 } 241 242 // Copies the source to the destination, assuming all the source characters are 243 // ASCII. The destination buffer must be large enough. Null characters are allowed 244 // in the source string, and no attempt is made to null-terminate the result. 245 static void copyASCII(const UChar* src, int length, char* dest) 246 { 247 for (int i = 0; i < length; i++) 248 dest[i] = static_cast<char>(src[i]); 249 } 250 251 static void appendASCII(const String& base, const char* rel, size_t len, CharBuffer& buffer) 252 { 253 buffer.resize(base.length() + len + 1); 254 copyASCII(base.characters(), base.length(), buffer.data()); 255 memcpy(buffer.data() + base.length(), rel, len); 256 buffer[buffer.size() - 1] = '\0'; 257 } 258 259 // FIXME: Move to PlatformString.h eventually. 260 // Returns the index of the first index in string |s| of any of the characters 261 // in |toFind|. |toFind| should be a null-terminated string, all characters up 262 // to the null will be searched. Returns int if not found. 263 static int findFirstOf(const UChar* s, int sLen, int startPos, const char* toFind) 264 { 265 for (int i = startPos; i < sLen; i++) { 266 const char* cur = toFind; 267 while (*cur) { 268 if (s[i] == *(cur++)) 269 return i; 270 } 271 } 272 return -1; 273 } 274 275 #ifndef NDEBUG 276 static void checkEncodedString(const String& url) 277 { 278 for (unsigned i = 0; i < url.length(); ++i) 279 ASSERT(!(url[i] & ~0x7F)); 280 281 ASSERT(!url.length() || isSchemeFirstChar(url[0])); 282 } 283 #else 284 static inline void checkEncodedString(const String&) 285 { 286 } 287 #endif 288 289 inline bool KURL::protocolIs(const String& string, const char* protocol) 290 { 291 return WebCore::protocolIs(string, protocol); 292 } 293 294 void KURL::invalidate() 295 { 296 m_isValid = false; 297 m_protocolInHTTPFamily = false; 298 m_schemeEnd = 0; 299 m_userStart = 0; 300 m_userEnd = 0; 301 m_passwordEnd = 0; 302 m_hostEnd = 0; 303 m_portEnd = 0; 304 m_pathEnd = 0; 305 m_pathAfterLastSlash = 0; 306 m_queryEnd = 0; 307 m_fragmentEnd = 0; 308 } 309 310 KURL::KURL(ParsedURLStringTag, const char* url) 311 { 312 parse(url, 0); 313 ASSERT(url == m_string); 314 } 315 316 KURL::KURL(ParsedURLStringTag, const String& url) 317 { 318 parse(url); 319 ASSERT(url == m_string); 320 } 321 322 KURL::KURL(const KURL& base, const String& relative) 323 { 324 init(base, relative, UTF8Encoding()); 325 } 326 327 KURL::KURL(const KURL& base, const String& relative, const TextEncoding& encoding) 328 { 329 // For UTF-{7,16,32}, we want to use UTF-8 for the query part as 330 // we do when submitting a form. A form with GET method 331 // has its contents added to a URL as query params and it makes sense 332 // to be consistent. 333 init(base, relative, encoding.encodingForFormSubmission()); 334 } 335 336 void KURL::init(const KURL& base, const String& relative, const TextEncoding& encoding) 337 { 338 // Allow resolutions with a null or empty base URL, but not with any other invalid one. 339 // FIXME: Is this a good rule? 340 if (!base.m_isValid && !base.isEmpty()) { 341 m_string = relative; 342 invalidate(); 343 return; 344 } 345 346 // For compatibility with Win IE, treat backslashes as if they were slashes, 347 // as long as we're not dealing with javascript: or data: URLs. 348 String rel = relative; 349 if (rel.contains('\\') && !(protocolIsJavaScript(rel) || protocolIs(rel, "data"))) 350 rel = substituteBackslashes(rel); 351 352 String* originalString = &rel; 353 354 bool allASCII = charactersAreAllASCII(rel.characters(), rel.length()); 355 CharBuffer strBuffer; 356 char* str; 357 size_t len; 358 if (allASCII) { 359 len = rel.length(); 360 strBuffer.resize(len + 1); 361 copyASCII(rel.characters(), len, strBuffer.data()); 362 strBuffer[len] = 0; 363 str = strBuffer.data(); 364 } else { 365 originalString = 0; 366 encodeRelativeString(rel, encoding, strBuffer); 367 str = strBuffer.data(); 368 len = strlen(str); 369 } 370 371 // Get rid of leading whitespace. 372 while (*str == ' ') { 373 originalString = 0; 374 str++; 375 --len; 376 } 377 378 // Get rid of trailing whitespace. 379 while (len && str[len - 1] == ' ') { 380 originalString = 0; 381 str[--len] = '\0'; 382 } 383 384 // According to the RFC, the reference should be interpreted as an 385 // absolute URI if possible, using the "leftmost, longest" 386 // algorithm. If the URI reference is absolute it will have a 387 // scheme, meaning that it will have a colon before the first 388 // non-scheme element. 389 bool absolute = false; 390 char* p = str; 391 if (isSchemeFirstChar(*p)) { 392 ++p; 393 while (isSchemeChar(*p)) { 394 ++p; 395 } 396 if (*p == ':') { 397 if (p[1] != '/' && equalIgnoringCase(base.protocol(), String(str, p - str)) && base.isHierarchical()) { 398 str = p + 1; 399 originalString = 0; 400 } else 401 absolute = true; 402 } 403 } 404 405 CharBuffer parseBuffer; 406 407 if (absolute) { 408 parse(str, originalString); 409 } else { 410 // If the base is empty or opaque (e.g. data: or javascript:), then the URL is invalid 411 // unless the relative URL is a single fragment. 412 if (!base.isHierarchical()) { 413 if (str[0] == '#') { 414 appendASCII(base.m_string.left(base.m_queryEnd), str, len, parseBuffer); 415 parse(parseBuffer.data(), 0); 416 } else { 417 m_string = relative; 418 invalidate(); 419 } 420 return; 421 } 422 423 switch (str[0]) { 424 case '\0': 425 // The reference is empty, so this is a reference to the same document with any fragment identifier removed. 426 *this = base; 427 removeFragmentIdentifier(); 428 break; 429 case '#': { 430 // must be fragment-only reference 431 appendASCII(base.m_string.left(base.m_queryEnd), str, len, parseBuffer); 432 parse(parseBuffer.data(), 0); 433 break; 434 } 435 case '?': { 436 // query-only reference, special case needed for non-URL results 437 appendASCII(base.m_string.left(base.m_pathEnd), str, len, parseBuffer); 438 parse(parseBuffer.data(), 0); 439 break; 440 } 441 case '/': 442 // must be net-path or absolute-path reference 443 if (str[1] == '/') { 444 // net-path 445 appendASCII(base.m_string.left(base.m_schemeEnd + 1), str, len, parseBuffer); 446 parse(parseBuffer.data(), 0); 447 } else { 448 // abs-path 449 appendASCII(base.m_string.left(base.m_portEnd), str, len, parseBuffer); 450 parse(parseBuffer.data(), 0); 451 } 452 break; 453 default: 454 { 455 // must be relative-path reference 456 457 // Base part plus relative part plus one possible slash added in between plus terminating \0 byte. 458 parseBuffer.resize(base.m_pathEnd + 1 + len + 1); 459 460 char* bufferPos = parseBuffer.data(); 461 462 // first copy everything before the path from the base 463 unsigned baseLength = base.m_string.length(); 464 const UChar* baseCharacters = base.m_string.characters(); 465 CharBuffer baseStringBuffer(baseLength); 466 copyASCII(baseCharacters, baseLength, baseStringBuffer.data()); 467 const char* baseString = baseStringBuffer.data(); 468 const char* baseStringStart = baseString; 469 const char* pathStart = baseStringStart + base.m_portEnd; 470 while (baseStringStart < pathStart) 471 *bufferPos++ = *baseStringStart++; 472 char* bufferPathStart = bufferPos; 473 474 // now copy the base path 475 const char* baseStringEnd = baseString + base.m_pathEnd; 476 477 // go back to the last slash 478 while (baseStringEnd > baseStringStart && baseStringEnd[-1] != '/') 479 baseStringEnd--; 480 481 if (baseStringEnd == baseStringStart) { 482 // no path in base, add a path separator if necessary 483 if (base.m_schemeEnd + 1 != base.m_pathEnd && *str && *str != '?' && *str != '#') 484 *bufferPos++ = '/'; 485 } else { 486 bufferPos += copyPathRemovingDots(bufferPos, baseStringStart, 0, baseStringEnd - baseStringStart); 487 } 488 489 const char* relStringStart = str; 490 const char* relStringPos = relStringStart; 491 492 while (*relStringPos && *relStringPos != '?' && *relStringPos != '#') { 493 if (relStringPos[0] == '.' && bufferPos[-1] == '/') { 494 if (isPathSegmentEndChar(relStringPos[1])) { 495 // skip over "." segment 496 relStringPos += 1; 497 if (relStringPos[0] == '/') 498 relStringPos++; 499 continue; 500 } else if (relStringPos[1] == '.' && isPathSegmentEndChar(relStringPos[2])) { 501 // skip over ".." segment and rewind the last segment 502 // the RFC leaves it up to the app to decide what to do with excess 503 // ".." segments - we choose to drop them since some web content 504 // relies on this. 505 relStringPos += 2; 506 if (relStringPos[0] == '/') 507 relStringPos++; 508 if (bufferPos > bufferPathStart + 1) 509 bufferPos--; 510 while (bufferPos > bufferPathStart + 1 && bufferPos[-1] != '/') 511 bufferPos--; 512 continue; 513 } 514 } 515 516 *bufferPos = *relStringPos; 517 relStringPos++; 518 bufferPos++; 519 } 520 521 // all done with the path work, now copy any remainder 522 // of the relative reference; this will also add a null terminator 523 strcpy(bufferPos, relStringPos); 524 525 parse(parseBuffer.data(), 0); 526 527 ASSERT(strlen(parseBuffer.data()) + 1 <= parseBuffer.size()); 528 break; 529 } 530 } 531 } 532 } 533 534 KURL KURL::copy() const 535 { 536 KURL result = *this; 537 result.m_string = result.m_string.crossThreadString(); 538 return result; 539 } 540 541 bool KURL::hasPath() const 542 { 543 return m_pathEnd != m_portEnd; 544 } 545 546 String KURL::lastPathComponent() const 547 { 548 if (!hasPath()) 549 return String(); 550 551 int end = m_pathEnd - 1; 552 if (m_string[end] == '/') 553 --end; 554 555 int start = m_string.reverseFind('/', end); 556 if (start < m_portEnd) 557 return String(); 558 ++start; 559 560 return m_string.substring(start, end - start + 1); 561 } 562 563 String KURL::protocol() const 564 { 565 return m_string.left(m_schemeEnd); 566 } 567 568 String KURL::host() const 569 { 570 int start = hostStart(); 571 return decodeURLEscapeSequences(m_string.substring(start, m_hostEnd - start)); 572 } 573 574 unsigned short KURL::port() const 575 { 576 if (m_hostEnd == m_portEnd) 577 return 0; 578 579 int number = m_string.substring(m_hostEnd + 1, m_portEnd - m_hostEnd - 1).toInt(); 580 if (number < 0 || number > 0xFFFF) 581 return 0; 582 return number; 583 } 584 585 String KURL::pass() const 586 { 587 if (m_passwordEnd == m_userEnd) 588 return String(); 589 590 return decodeURLEscapeSequences(m_string.substring(m_userEnd + 1, m_passwordEnd - m_userEnd - 1)); 591 } 592 593 String KURL::user() const 594 { 595 return decodeURLEscapeSequences(m_string.substring(m_userStart, m_userEnd - m_userStart)); 596 } 597 598 String KURL::fragmentIdentifier() const 599 { 600 if (m_fragmentEnd == m_queryEnd) 601 return String(); 602 603 return m_string.substring(m_queryEnd + 1, m_fragmentEnd - (m_queryEnd + 1)); 604 } 605 606 bool KURL::hasFragmentIdentifier() const 607 { 608 return m_fragmentEnd != m_queryEnd; 609 } 610 611 String KURL::baseAsString() const 612 { 613 return m_string.left(m_pathAfterLastSlash); 614 } 615 616 #ifdef NDEBUG 617 618 static inline void assertProtocolIsGood(const char*) 619 { 620 } 621 622 #else 623 624 static void assertProtocolIsGood(const char* protocol) 625 { 626 const char* p = protocol; 627 while (*p) { 628 ASSERT(*p > ' ' && *p < 0x7F && !(*p >= 'A' && *p <= 'Z')); 629 ++p; 630 } 631 } 632 633 #endif 634 635 bool KURL::protocolIs(const char* protocol) const 636 { 637 assertProtocolIsGood(protocol); 638 639 // JavaScript URLs are "valid" and should be executed even if KURL decides they are invalid. 640 // The free function protocolIsJavaScript() should be used instead. 641 ASSERT(!equalIgnoringCase(protocol, String("javascript"))); 642 643 if (!m_isValid) 644 return false; 645 646 // Do the comparison without making a new string object. 647 for (int i = 0; i < m_schemeEnd; ++i) { 648 if (!protocol[i] || toASCIILower(m_string[i]) != protocol[i]) 649 return false; 650 } 651 return !protocol[m_schemeEnd]; // We should have consumed all characters in the argument. 652 } 653 654 String KURL::query() const 655 { 656 if (m_queryEnd == m_pathEnd) 657 return String(); 658 659 return m_string.substring(m_pathEnd + 1, m_queryEnd - (m_pathEnd + 1)); 660 } 661 662 String KURL::path() const 663 { 664 return decodeURLEscapeSequences(m_string.substring(m_portEnd, m_pathEnd - m_portEnd)); 665 } 666 667 bool KURL::setProtocol(const String& s) 668 { 669 // Firefox and IE remove everything after the first ':'. 670 int separatorPosition = s.find(':'); 671 String newProtocol = s.substring(0, separatorPosition); 672 673 if (!isValidProtocol(newProtocol)) 674 return false; 675 676 if (!m_isValid) { 677 parse(newProtocol + ":" + m_string); 678 return true; 679 } 680 681 parse(newProtocol + m_string.substring(m_schemeEnd)); 682 return true; 683 } 684 685 void KURL::setHost(const String& s) 686 { 687 if (!m_isValid) 688 return; 689 690 // FIXME: Non-ASCII characters must be encoded and escaped to match parse() expectations, 691 // and to avoid changing more than just the host. 692 693 bool slashSlashNeeded = m_userStart == m_schemeEnd + 1; 694 695 parse(m_string.left(hostStart()) + (slashSlashNeeded ? "//" : "") + s + m_string.substring(m_hostEnd)); 696 } 697 698 void KURL::removePort() 699 { 700 if (m_hostEnd == m_portEnd) 701 return; 702 parse(m_string.left(m_hostEnd) + m_string.substring(m_portEnd)); 703 } 704 705 void KURL::setPort(unsigned short i) 706 { 707 if (!m_isValid) 708 return; 709 710 bool colonNeeded = m_portEnd == m_hostEnd; 711 int portStart = (colonNeeded ? m_hostEnd : m_hostEnd + 1); 712 713 parse(m_string.left(portStart) + (colonNeeded ? ":" : "") + String::number(i) + m_string.substring(m_portEnd)); 714 } 715 716 void KURL::setHostAndPort(const String& hostAndPort) 717 { 718 if (!m_isValid) 719 return; 720 721 // FIXME: Non-ASCII characters must be encoded and escaped to match parse() expectations, 722 // and to avoid changing more than just host and port. 723 724 bool slashSlashNeeded = m_userStart == m_schemeEnd + 1; 725 726 parse(m_string.left(hostStart()) + (slashSlashNeeded ? "//" : "") + hostAndPort + m_string.substring(m_portEnd)); 727 } 728 729 void KURL::setUser(const String& user) 730 { 731 if (!m_isValid) 732 return; 733 734 // FIXME: Non-ASCII characters must be encoded and escaped to match parse() expectations, 735 // and to avoid changing more than just the user login. 736 String u; 737 int end = m_userEnd; 738 if (!user.isEmpty()) { 739 u = user; 740 if (m_userStart == m_schemeEnd + 1) 741 u = "//" + u; 742 // Add '@' if we didn't have one before. 743 if (end == m_hostEnd || (end == m_passwordEnd && m_string[end] != '@')) 744 u.append('@'); 745 } else { 746 // Remove '@' if we now have neither user nor password. 747 if (m_userEnd == m_passwordEnd && end != m_hostEnd && m_string[end] == '@') 748 end += 1; 749 } 750 parse(m_string.left(m_userStart) + u + m_string.substring(end)); 751 } 752 753 void KURL::setPass(const String& password) 754 { 755 if (!m_isValid) 756 return; 757 758 // FIXME: Non-ASCII characters must be encoded and escaped to match parse() expectations, 759 // and to avoid changing more than just the user password. 760 String p; 761 int end = m_passwordEnd; 762 if (!password.isEmpty()) { 763 p = ":" + password + "@"; 764 if (m_userEnd == m_schemeEnd + 1) 765 p = "//" + p; 766 // Eat the existing '@' since we are going to add our own. 767 if (end != m_hostEnd && m_string[end] == '@') 768 end += 1; 769 } else { 770 // Remove '@' if we now have neither user nor password. 771 if (m_userStart == m_userEnd && end != m_hostEnd && m_string[end] == '@') 772 end += 1; 773 } 774 parse(m_string.left(m_userEnd) + p + m_string.substring(end)); 775 } 776 777 void KURL::setFragmentIdentifier(const String& s) 778 { 779 if (!m_isValid) 780 return; 781 782 // FIXME: Non-ASCII characters must be encoded and escaped to match parse() expectations. 783 parse(m_string.left(m_queryEnd) + "#" + s); 784 } 785 786 void KURL::removeFragmentIdentifier() 787 { 788 if (!m_isValid) 789 return; 790 parse(m_string.left(m_queryEnd)); 791 } 792 793 void KURL::setQuery(const String& query) 794 { 795 if (!m_isValid) 796 return; 797 798 // FIXME: '#' and non-ASCII characters must be encoded and escaped. 799 // Usually, the query is encoded using document encoding, not UTF-8, but we don't have 800 // access to the document in this function. 801 if ((query.isEmpty() || query[0] != '?') && !query.isNull()) 802 parse(m_string.left(m_pathEnd) + "?" + query + m_string.substring(m_queryEnd)); 803 else 804 parse(m_string.left(m_pathEnd) + query + m_string.substring(m_queryEnd)); 805 806 } 807 808 void KURL::setPath(const String& s) 809 { 810 if (!m_isValid) 811 return; 812 813 // FIXME: encodeWithURLEscapeSequences does not correctly escape '#' and '?', so fragment and query parts 814 // may be inadvertently affected. 815 parse(m_string.left(m_portEnd) + encodeWithURLEscapeSequences(s) + m_string.substring(m_pathEnd)); 816 } 817 818 String KURL::prettyURL() const 819 { 820 if (!m_isValid) 821 return m_string; 822 823 Vector<UChar> result; 824 825 append(result, protocol()); 826 result.append(':'); 827 828 Vector<UChar> authority; 829 830 if (m_hostEnd != m_passwordEnd) { 831 if (m_userEnd != m_userStart) { 832 append(authority, user()); 833 authority.append('@'); 834 } 835 append(authority, host()); 836 if (hasPort()) { 837 authority.append(':'); 838 append(authority, String::number(port())); 839 } 840 } 841 842 if (!authority.isEmpty()) { 843 result.append('/'); 844 result.append('/'); 845 result.append(authority); 846 } else if (protocolIs("file")) { 847 result.append('/'); 848 result.append('/'); 849 } 850 851 append(result, path()); 852 853 if (m_pathEnd != m_queryEnd) { 854 result.append('?'); 855 append(result, query()); 856 } 857 858 if (m_fragmentEnd != m_queryEnd) { 859 result.append('#'); 860 append(result, fragmentIdentifier()); 861 } 862 863 return String::adopt(result); 864 } 865 866 String decodeURLEscapeSequences(const String& str) 867 { 868 return decodeURLEscapeSequences(str, UTF8Encoding()); 869 } 870 871 String decodeURLEscapeSequences(const String& str, const TextEncoding& encoding) 872 { 873 Vector<UChar> result; 874 875 CharBuffer buffer; 876 877 int length = str.length(); 878 int decodedPosition = 0; 879 int searchPosition = 0; 880 int encodedRunPosition; 881 while ((encodedRunPosition = str.find('%', searchPosition)) >= 0) { 882 // Find the sequence of %-escape codes. 883 int encodedRunEnd = encodedRunPosition; 884 while (length - encodedRunEnd >= 3 885 && str[encodedRunEnd] == '%' 886 && isASCIIHexDigit(str[encodedRunEnd + 1]) 887 && isASCIIHexDigit(str[encodedRunEnd + 2])) 888 encodedRunEnd += 3; 889 if (encodedRunEnd == encodedRunPosition) { 890 ++searchPosition; 891 continue; 892 } 893 searchPosition = encodedRunEnd; 894 895 // Decode the %-escapes into bytes. 896 unsigned runLength = (encodedRunEnd - encodedRunPosition) / 3; 897 buffer.resize(runLength); 898 char* p = buffer.data(); 899 const UChar* q = str.characters() + encodedRunPosition; 900 for (unsigned i = 0; i < runLength; ++i) { 901 *p++ = (hexDigitValue(q[1]) << 4) | hexDigitValue(q[2]); 902 q += 3; 903 } 904 905 // Decode the bytes into Unicode characters. 906 String decoded = (encoding.isValid() ? encoding : UTF8Encoding()).decode(buffer.data(), p - buffer.data()); 907 if (decoded.isEmpty()) 908 continue; 909 910 // Build up the string with what we just skipped and what we just decoded. 911 result.append(str.characters() + decodedPosition, encodedRunPosition - decodedPosition); 912 result.append(decoded.characters(), decoded.length()); 913 decodedPosition = encodedRunEnd; 914 } 915 916 result.append(str.characters() + decodedPosition, length - decodedPosition); 917 918 return String::adopt(result); 919 } 920 921 bool KURL::isLocalFile() const 922 { 923 // Including feed here might be a bad idea since drag and drop uses this check 924 // and including feed would allow feeds to potentially let someone's blog 925 // read the contents of the clipboard on a drag, even without a drop. 926 // Likewise with using the FrameLoader::shouldTreatURLAsLocal() function. 927 return protocolIs("file"); 928 } 929 930 static void appendEscapingBadChars(char*& buffer, const char* strStart, size_t length) 931 { 932 char* p = buffer; 933 934 const char* str = strStart; 935 const char* strEnd = strStart + length; 936 while (str < strEnd) { 937 unsigned char c = *str++; 938 if (isBadChar(c)) { 939 if (c == '%' || c == '?') { 940 *p++ = c; 941 } else if (c != 0x09 && c != 0x0a && c != 0x0d) { 942 *p++ = '%'; 943 *p++ = hexDigits[c >> 4]; 944 *p++ = hexDigits[c & 0xF]; 945 } 946 } else { 947 *p++ = c; 948 } 949 } 950 951 buffer = p; 952 } 953 954 // copy a path, accounting for "." and ".." segments 955 static int copyPathRemovingDots(char* dst, const char* src, int srcStart, int srcEnd) 956 { 957 char* bufferPathStart = dst; 958 959 // empty path is a special case, and need not have a leading slash 960 if (srcStart != srcEnd) { 961 const char* baseStringStart = src + srcStart; 962 const char* baseStringEnd = src + srcEnd; 963 const char* baseStringPos = baseStringStart; 964 965 // this code is unprepared for paths that do not begin with a 966 // slash and we should always have one in the source string 967 ASSERT(baseStringPos[0] == '/'); 968 969 // copy the leading slash into the destination 970 *dst = *baseStringPos; 971 baseStringPos++; 972 dst++; 973 974 while (baseStringPos < baseStringEnd) { 975 if (baseStringPos[0] == '.' && dst[-1] == '/') { 976 if (baseStringPos[1] == '/' || baseStringPos + 1 == baseStringEnd) { 977 // skip over "." segment 978 baseStringPos += 2; 979 continue; 980 } else if (baseStringPos[1] == '.' && (baseStringPos[2] == '/' || 981 baseStringPos + 2 == baseStringEnd)) { 982 // skip over ".." segment and rewind the last segment 983 // the RFC leaves it up to the app to decide what to do with excess 984 // ".." segments - we choose to drop them since some web content 985 // relies on this. 986 baseStringPos += 3; 987 if (dst > bufferPathStart + 1) 988 dst--; 989 // Note that these two while blocks differ subtly. 990 // The first helps to remove multiple adjoining slashes as we rewind. 991 // The +1 to bufferPathStart in the first while block prevents eating a leading slash 992 while (dst > bufferPathStart + 1 && dst[-1] == '/') 993 dst--; 994 while (dst > bufferPathStart && dst[-1] != '/') 995 dst--; 996 continue; 997 } 998 } 999 1000 *dst = *baseStringPos; 1001 baseStringPos++; 1002 dst++; 1003 } 1004 } 1005 *dst = '\0'; 1006 return dst - bufferPathStart; 1007 } 1008 1009 static inline bool hasSlashDotOrDotDot(const char* str) 1010 { 1011 const unsigned char* p = reinterpret_cast<const unsigned char*>(str); 1012 if (!*p) 1013 return false; 1014 unsigned char pc = *p; 1015 while (unsigned char c = *++p) { 1016 if (c == '.' && (pc == '/' || pc == '.')) 1017 return true; 1018 pc = c; 1019 } 1020 return false; 1021 } 1022 1023 static inline bool matchLetter(char c, char lowercaseLetter) 1024 { 1025 return (c | 0x20) == lowercaseLetter; 1026 } 1027 1028 void KURL::parse(const String& string) 1029 { 1030 checkEncodedString(string); 1031 1032 CharBuffer buffer(string.length() + 1); 1033 copyASCII(string.characters(), string.length(), buffer.data()); 1034 buffer[string.length()] = '\0'; 1035 parse(buffer.data(), &string); 1036 } 1037 1038 void KURL::parse(const char* url, const String* originalString) 1039 { 1040 if (!url || url[0] == '\0') { 1041 // valid URL must be non-empty 1042 m_string = originalString ? *originalString : url; 1043 invalidate(); 1044 return; 1045 } 1046 1047 if (!isSchemeFirstChar(url[0])) { 1048 // scheme must start with an alphabetic character 1049 m_string = originalString ? *originalString : url; 1050 invalidate(); 1051 return; 1052 } 1053 1054 int schemeEnd = 0; 1055 while (isSchemeChar(url[schemeEnd])) 1056 schemeEnd++; 1057 1058 if (url[schemeEnd] != ':') { 1059 m_string = originalString ? *originalString : url; 1060 invalidate(); 1061 return; 1062 } 1063 1064 int userStart = schemeEnd + 1; 1065 int userEnd; 1066 int passwordStart; 1067 int passwordEnd; 1068 int hostStart; 1069 int hostEnd; 1070 int portStart; 1071 int portEnd; 1072 1073 bool hierarchical = url[schemeEnd + 1] == '/'; 1074 1075 bool isFile = schemeEnd == 4 1076 && matchLetter(url[0], 'f') 1077 && matchLetter(url[1], 'i') 1078 && matchLetter(url[2], 'l') 1079 && matchLetter(url[3], 'e'); 1080 1081 m_protocolInHTTPFamily = matchLetter(url[0], 'h') 1082 && matchLetter(url[1], 't') 1083 && matchLetter(url[2], 't') 1084 && matchLetter(url[3], 'p') 1085 && (url[4] == ':' || (matchLetter(url[4], 's') && url[5] == ':')); 1086 1087 if (hierarchical && url[schemeEnd + 2] == '/') { 1088 // The part after the scheme is either a net_path or an abs_path whose first path segment is empty. 1089 // Attempt to find an authority. 1090 1091 // FIXME: Authority characters may be scanned twice, and it would be nice to be faster. 1092 userStart += 2; 1093 userEnd = userStart; 1094 1095 int colonPos = 0; 1096 while (isUserInfoChar(url[userEnd])) { 1097 if (url[userEnd] == ':' && colonPos == 0) 1098 colonPos = userEnd; 1099 userEnd++; 1100 } 1101 1102 if (url[userEnd] == '@') { 1103 // actual end of the userinfo, start on the host 1104 if (colonPos != 0) { 1105 passwordEnd = userEnd; 1106 userEnd = colonPos; 1107 passwordStart = colonPos + 1; 1108 } else 1109 passwordStart = passwordEnd = userEnd; 1110 1111 hostStart = passwordEnd + 1; 1112 } else if (url[userEnd] == '[' || isPathSegmentEndChar(url[userEnd])) { 1113 // hit the end of the authority, must have been no user 1114 // or looks like an IPv6 hostname 1115 // either way, try to parse it as a hostname 1116 userEnd = userStart; 1117 passwordStart = passwordEnd = userEnd; 1118 hostStart = userStart; 1119 } else { 1120 // invalid character 1121 m_string = originalString ? *originalString : url; 1122 invalidate(); 1123 return; 1124 } 1125 1126 hostEnd = hostStart; 1127 1128 // IPV6 IP address 1129 if (url[hostEnd] == '[') { 1130 hostEnd++; 1131 while (isIPv6Char(url[hostEnd])) 1132 hostEnd++; 1133 if (url[hostEnd] == ']') 1134 hostEnd++; 1135 else { 1136 // invalid character 1137 m_string = originalString ? *originalString : url; 1138 invalidate(); 1139 return; 1140 } 1141 } else { 1142 while (isHostnameChar(url[hostEnd])) 1143 hostEnd++; 1144 } 1145 1146 if (url[hostEnd] == ':') { 1147 portStart = portEnd = hostEnd + 1; 1148 1149 // possible start of port 1150 portEnd = portStart; 1151 while (isASCIIDigit(url[portEnd])) 1152 portEnd++; 1153 } else 1154 portStart = portEnd = hostEnd; 1155 1156 if (!isPathSegmentEndChar(url[portEnd])) { 1157 // invalid character 1158 m_string = originalString ? *originalString : url; 1159 invalidate(); 1160 return; 1161 } 1162 1163 if (userStart == portEnd && !m_protocolInHTTPFamily && !isFile) { 1164 // No authority found, which means that this is not a net_path, but rather an abs_path whose first two 1165 // path segments are empty. For file, http and https only, an empty authority is allowed. 1166 userStart -= 2; 1167 userEnd = userStart; 1168 passwordStart = userEnd; 1169 passwordEnd = passwordStart; 1170 hostStart = passwordEnd; 1171 hostEnd = hostStart; 1172 portStart = hostEnd; 1173 portEnd = hostEnd; 1174 } 1175 } else { 1176 // the part after the scheme must be an opaque_part or an abs_path 1177 userEnd = userStart; 1178 passwordStart = passwordEnd = userEnd; 1179 hostStart = hostEnd = passwordEnd; 1180 portStart = portEnd = hostEnd; 1181 } 1182 1183 int pathStart = portEnd; 1184 int pathEnd = pathStart; 1185 while (url[pathEnd] && url[pathEnd] != '?' && url[pathEnd] != '#') 1186 pathEnd++; 1187 1188 int queryStart = pathEnd; 1189 int queryEnd = queryStart; 1190 if (url[queryStart] == '?') { 1191 while (url[queryEnd] && url[queryEnd] != '#') 1192 queryEnd++; 1193 } 1194 1195 int fragmentStart = queryEnd; 1196 int fragmentEnd = fragmentStart; 1197 if (url[fragmentStart] == '#') { 1198 fragmentStart++; 1199 fragmentEnd = fragmentStart; 1200 while (url[fragmentEnd]) 1201 fragmentEnd++; 1202 } 1203 1204 // assemble it all, remembering the real ranges 1205 1206 Vector<char, 4096> buffer(fragmentEnd * 3 + 1); 1207 1208 char *p = buffer.data(); 1209 const char *strPtr = url; 1210 1211 // copy in the scheme 1212 const char *schemeEndPtr = url + schemeEnd; 1213 while (strPtr < schemeEndPtr) 1214 *p++ = *strPtr++; 1215 m_schemeEnd = p - buffer.data(); 1216 1217 bool hostIsLocalHost = portEnd - userStart == 9 1218 && matchLetter(url[userStart], 'l') 1219 && matchLetter(url[userStart+1], 'o') 1220 && matchLetter(url[userStart+2], 'c') 1221 && matchLetter(url[userStart+3], 'a') 1222 && matchLetter(url[userStart+4], 'l') 1223 && matchLetter(url[userStart+5], 'h') 1224 && matchLetter(url[userStart+6], 'o') 1225 && matchLetter(url[userStart+7], 's') 1226 && matchLetter(url[userStart+8], 't'); 1227 1228 // File URLs need a host part unless it is just file:// or file://localhost 1229 bool degenFilePath = pathStart == pathEnd && (hostStart == hostEnd || hostIsLocalHost); 1230 1231 bool haveNonHostAuthorityPart = userStart != userEnd || passwordStart != passwordEnd || portStart != portEnd; 1232 1233 // add ":" after scheme 1234 *p++ = ':'; 1235 1236 // if we have at least one authority part or a file URL - add "//" and authority 1237 if (isFile ? !degenFilePath : (haveNonHostAuthorityPart || hostStart != hostEnd)) { 1238 *p++ = '/'; 1239 *p++ = '/'; 1240 1241 m_userStart = p - buffer.data(); 1242 1243 // copy in the user 1244 strPtr = url + userStart; 1245 const char* userEndPtr = url + userEnd; 1246 while (strPtr < userEndPtr) 1247 *p++ = *strPtr++; 1248 m_userEnd = p - buffer.data(); 1249 1250 // copy in the password 1251 if (passwordEnd != passwordStart) { 1252 *p++ = ':'; 1253 strPtr = url + passwordStart; 1254 const char* passwordEndPtr = url + passwordEnd; 1255 while (strPtr < passwordEndPtr) 1256 *p++ = *strPtr++; 1257 } 1258 m_passwordEnd = p - buffer.data(); 1259 1260 // If we had any user info, add "@" 1261 if (p - buffer.data() != m_userStart) 1262 *p++ = '@'; 1263 1264 // copy in the host, except in the case of a file URL with authority="localhost" 1265 if (!(isFile && hostIsLocalHost && !haveNonHostAuthorityPart)) { 1266 strPtr = url + hostStart; 1267 const char* hostEndPtr = url + hostEnd; 1268 while (strPtr < hostEndPtr) 1269 *p++ = *strPtr++; 1270 } 1271 m_hostEnd = p - buffer.data(); 1272 1273 // copy in the port 1274 if (hostEnd != portStart) { 1275 *p++ = ':'; 1276 strPtr = url + portStart; 1277 const char *portEndPtr = url + portEnd; 1278 while (strPtr < portEndPtr) 1279 *p++ = *strPtr++; 1280 } 1281 m_portEnd = p - buffer.data(); 1282 } else 1283 m_userStart = m_userEnd = m_passwordEnd = m_hostEnd = m_portEnd = p - buffer.data(); 1284 1285 // For canonicalization, ensure we have a '/' for no path. 1286 // Do this only for hierarchical URL with protocol http or https. 1287 if (m_protocolInHTTPFamily && hierarchical && pathEnd == pathStart) 1288 *p++ = '/'; 1289 1290 // add path, escaping bad characters 1291 if (!hierarchical || !hasSlashDotOrDotDot(url)) 1292 appendEscapingBadChars(p, url + pathStart, pathEnd - pathStart); 1293 else { 1294 CharBuffer pathBuffer(pathEnd - pathStart + 1); 1295 size_t length = copyPathRemovingDots(pathBuffer.data(), url, pathStart, pathEnd); 1296 appendEscapingBadChars(p, pathBuffer.data(), length); 1297 } 1298 1299 m_pathEnd = p - buffer.data(); 1300 1301 // Find the position after the last slash in the path, or 1302 // the position before the path if there are no slashes in it. 1303 int i; 1304 for (i = m_pathEnd; i > m_portEnd; --i) { 1305 if (buffer[i - 1] == '/') 1306 break; 1307 } 1308 m_pathAfterLastSlash = i; 1309 1310 // add query, escaping bad characters 1311 appendEscapingBadChars(p, url + queryStart, queryEnd - queryStart); 1312 m_queryEnd = p - buffer.data(); 1313 1314 // add fragment, escaping bad characters 1315 if (fragmentEnd != queryEnd) { 1316 *p++ = '#'; 1317 appendEscapingBadChars(p, url + fragmentStart, fragmentEnd - fragmentStart); 1318 } 1319 m_fragmentEnd = p - buffer.data(); 1320 1321 ASSERT(p - buffer.data() <= static_cast<int>(buffer.size())); 1322 1323 // If we didn't end up actually changing the original string and 1324 // it was already in a String, reuse it to avoid extra allocation. 1325 if (originalString && originalString->length() == static_cast<unsigned>(m_fragmentEnd) && strncmp(buffer.data(), url, m_fragmentEnd) == 0) 1326 m_string = *originalString; 1327 else 1328 m_string = String(buffer.data(), m_fragmentEnd); 1329 1330 m_isValid = true; 1331 } 1332 1333 bool equalIgnoringFragmentIdentifier(const KURL& a, const KURL& b) 1334 { 1335 if (a.m_queryEnd != b.m_queryEnd) 1336 return false; 1337 unsigned queryLength = a.m_queryEnd; 1338 for (unsigned i = 0; i < queryLength; ++i) 1339 if (a.string()[i] != b.string()[i]) 1340 return false; 1341 return true; 1342 } 1343 1344 bool protocolHostAndPortAreEqual(const KURL& a, const KURL& b) 1345 { 1346 if (a.m_schemeEnd != b.m_schemeEnd) 1347 return false; 1348 int hostStartA = a.hostStart(); 1349 int hostStartB = b.hostStart(); 1350 if (a.m_hostEnd - hostStartA != b.m_hostEnd - hostStartB) 1351 return false; 1352 1353 // Check the scheme 1354 for (int i = 0; i < a.m_schemeEnd; ++i) 1355 if (a.string()[i] != b.string()[i]) 1356 return false; 1357 1358 // And the host 1359 for (int i = hostStartA; i < a.m_hostEnd; ++i) 1360 if (a.string()[i] != b.string()[i]) 1361 return false; 1362 1363 if (a.port() != b.port()) 1364 return false; 1365 1366 return true; 1367 } 1368 1369 1370 String encodeWithURLEscapeSequences(const String& notEncodedString) 1371 { 1372 CString asUTF8 = notEncodedString.utf8(); 1373 1374 CharBuffer buffer(asUTF8.length() * 3 + 1); 1375 char* p = buffer.data(); 1376 1377 const char* str = asUTF8.data(); 1378 const char* strEnd = str + asUTF8.length(); 1379 while (str < strEnd) { 1380 unsigned char c = *str++; 1381 if (isBadChar(c)) { 1382 *p++ = '%'; 1383 *p++ = hexDigits[c >> 4]; 1384 *p++ = hexDigits[c & 0xF]; 1385 } else 1386 *p++ = c; 1387 } 1388 1389 ASSERT(p - buffer.data() <= static_cast<int>(buffer.size())); 1390 1391 return String(buffer.data(), p - buffer.data()); 1392 } 1393 1394 // Appends the punycoded hostname identified by the given string and length to 1395 // the output buffer. The result will not be null terminated. 1396 static void appendEncodedHostname(UCharBuffer& buffer, const UChar* str, unsigned strLen) 1397 { 1398 // Needs to be big enough to hold an IDN-encoded name. 1399 // For host names bigger than this, we won't do IDN encoding, which is almost certainly OK. 1400 const unsigned hostnameBufferLength = 2048; 1401 1402 if (strLen > hostnameBufferLength || charactersAreAllASCII(str, strLen)) { 1403 buffer.append(str, strLen); 1404 return; 1405 } 1406 1407 #if USE(ICU_UNICODE) 1408 UChar hostnameBuffer[hostnameBufferLength]; 1409 UErrorCode error = U_ZERO_ERROR; 1410 int32_t numCharactersConverted = uidna_IDNToASCII(str, strLen, hostnameBuffer, 1411 hostnameBufferLength, UIDNA_ALLOW_UNASSIGNED, 0, &error); 1412 if (error == U_ZERO_ERROR) 1413 buffer.append(hostnameBuffer, numCharactersConverted); 1414 #elif USE(QT4_UNICODE) 1415 QByteArray result = QUrl::toAce(String(str, strLen)); 1416 buffer.append(result.constData(), result.length()); 1417 #elif USE(GLIB_UNICODE) 1418 GOwnPtr<gchar> utf8Hostname; 1419 GOwnPtr<GError> utf8Err; 1420 utf8Hostname.set(g_utf16_to_utf8(str, strLen, 0, 0, &utf8Err.outPtr())); 1421 if (utf8Err) 1422 return; 1423 1424 GOwnPtr<gchar> encodedHostname; 1425 encodedHostname.set(g_hostname_to_ascii(utf8Hostname.get())); 1426 if (!encodedHostname) 1427 return; 1428 1429 buffer.append(encodedHostname.get(), strlen(encodedHostname.get())); 1430 #endif 1431 } 1432 1433 static void findHostnamesInMailToURL(const UChar* str, int strLen, Vector<pair<int, int> >& nameRanges) 1434 { 1435 // In a mailto: URL, host names come after a '@' character and end with a '>' or ',' or '?' or end of string character. 1436 // Skip quoted strings so that characters in them don't confuse us. 1437 // When we find a '?' character, we are past the part of the URL that contains host names. 1438 1439 nameRanges.clear(); 1440 1441 int p = 0; 1442 while (1) { 1443 // Find start of host name or of quoted string. 1444 int hostnameOrStringStart = findFirstOf(str, strLen, p, "\"@?"); 1445 if (hostnameOrStringStart == -1) 1446 return; 1447 UChar c = str[hostnameOrStringStart]; 1448 p = hostnameOrStringStart + 1; 1449 1450 if (c == '?') 1451 return; 1452 1453 if (c == '@') { 1454 // Find end of host name. 1455 int hostnameStart = p; 1456 int hostnameEnd = findFirstOf(str, strLen, p, ">,?"); 1457 bool done; 1458 if (hostnameEnd == -1) { 1459 hostnameEnd = strLen; 1460 done = true; 1461 } else { 1462 p = hostnameEnd; 1463 done = false; 1464 } 1465 1466 nameRanges.append(make_pair(hostnameStart, hostnameEnd)); 1467 1468 if (done) 1469 return; 1470 } else { 1471 // Skip quoted string. 1472 ASSERT(c == '"'); 1473 while (1) { 1474 int escapedCharacterOrStringEnd = findFirstOf(str, strLen, p, "\"\\"); 1475 if (escapedCharacterOrStringEnd == -1) 1476 return; 1477 1478 c = str[escapedCharacterOrStringEnd]; 1479 p = escapedCharacterOrStringEnd + 1; 1480 1481 // If we are the end of the string, then break from the string loop back to the host name loop. 1482 if (c == '"') 1483 break; 1484 1485 // Skip escaped character. 1486 ASSERT(c == '\\'); 1487 if (p == strLen) 1488 return; 1489 1490 ++p; 1491 } 1492 } 1493 } 1494 } 1495 1496 static bool findHostnameInHierarchicalURL(const UChar* str, int strLen, int& startOffset, int& endOffset) 1497 { 1498 // Find the host name in a hierarchical URL. 1499 // It comes after a "://" sequence, with scheme characters preceding, and 1500 // this should be the first colon in the string. 1501 // It ends with the end of the string or a ":" or a path segment ending character. 1502 // If there is a "@" character, the host part is just the part after the "@". 1503 int separator = findFirstOf(str, strLen, 0, ":"); 1504 if (separator == -1 || separator + 2 >= strLen || 1505 str[separator + 1] != '/' || str[separator + 2] != '/') 1506 return false; 1507 1508 // Check that all characters before the :// are valid scheme characters. 1509 if (!isSchemeFirstChar(str[0])) 1510 return false; 1511 for (int i = 1; i < separator; ++i) { 1512 if (!isSchemeChar(str[i])) 1513 return false; 1514 } 1515 1516 // Start after the separator. 1517 int authorityStart = separator + 3; 1518 1519 // Find terminating character. 1520 int hostnameEnd = strLen; 1521 for (int i = authorityStart; i < strLen; ++i) { 1522 UChar c = str[i]; 1523 if (c == ':' || (isPathSegmentEndChar(c) && c != 0)) { 1524 hostnameEnd = i; 1525 break; 1526 } 1527 } 1528 1529 // Find "@" for the start of the host name. 1530 int userInfoTerminator = findFirstOf(str, strLen, authorityStart, "@"); 1531 int hostnameStart; 1532 if (userInfoTerminator == -1 || userInfoTerminator > hostnameEnd) 1533 hostnameStart = authorityStart; 1534 else 1535 hostnameStart = userInfoTerminator + 1; 1536 1537 startOffset = hostnameStart; 1538 endOffset = hostnameEnd; 1539 return true; 1540 } 1541 1542 // Converts all hostnames found in the given input to punycode, preserving the 1543 // rest of the URL unchanged. The output will NOT be null-terminated. 1544 static void encodeHostnames(const String& str, UCharBuffer& output) 1545 { 1546 output.clear(); 1547 1548 if (protocolIs(str, "mailto")) { 1549 Vector<pair<int, int> > hostnameRanges; 1550 findHostnamesInMailToURL(str.characters(), str.length(), hostnameRanges); 1551 int n = hostnameRanges.size(); 1552 int p = 0; 1553 for (int i = 0; i < n; ++i) { 1554 const pair<int, int>& r = hostnameRanges[i]; 1555 output.append(&str.characters()[p], r.first - p); 1556 appendEncodedHostname(output, &str.characters()[r.first], r.second - r.first); 1557 p = r.second; 1558 } 1559 // This will copy either everything after the last hostname, or the 1560 // whole thing if there is no hostname. 1561 output.append(&str.characters()[p], str.length() - p); 1562 } else { 1563 int hostStart, hostEnd; 1564 if (findHostnameInHierarchicalURL(str.characters(), str.length(), hostStart, hostEnd)) { 1565 output.append(str.characters(), hostStart); // Before hostname. 1566 appendEncodedHostname(output, &str.characters()[hostStart], hostEnd - hostStart); 1567 output.append(&str.characters()[hostEnd], str.length() - hostEnd); // After hostname. 1568 } else { 1569 // No hostname to encode, return the input. 1570 output.append(str.characters(), str.length()); 1571 } 1572 } 1573 } 1574 1575 static void encodeRelativeString(const String& rel, const TextEncoding& encoding, CharBuffer& output) 1576 { 1577 UCharBuffer s; 1578 encodeHostnames(rel, s); 1579 1580 TextEncoding pathEncoding(UTF8Encoding()); // Path is always encoded as UTF-8; other parts may depend on the scheme. 1581 1582 int pathEnd = -1; 1583 if (encoding != pathEncoding && encoding.isValid() && !protocolIs(rel, "mailto") && !protocolIs(rel, "data") && !protocolIsJavaScript(rel)) { 1584 // Find the first instance of either # or ?, keep pathEnd at -1 otherwise. 1585 pathEnd = findFirstOf(s.data(), s.size(), 0, "#?"); 1586 } 1587 1588 if (pathEnd == -1) { 1589 CString decoded = pathEncoding.encode(s.data(), s.size(), URLEncodedEntitiesForUnencodables); 1590 output.resize(decoded.length()); 1591 memcpy(output.data(), decoded.data(), decoded.length()); 1592 } else { 1593 CString pathDecoded = pathEncoding.encode(s.data(), pathEnd, URLEncodedEntitiesForUnencodables); 1594 // Unencodable characters in URLs are represented by converting 1595 // them to XML entities and escaping non-alphanumeric characters. 1596 CString otherDecoded = encoding.encode(s.data() + pathEnd, s.size() - pathEnd, URLEncodedEntitiesForUnencodables); 1597 1598 output.resize(pathDecoded.length() + otherDecoded.length()); 1599 memcpy(output.data(), pathDecoded.data(), pathDecoded.length()); 1600 memcpy(output.data() + pathDecoded.length(), otherDecoded.data(), otherDecoded.length()); 1601 } 1602 output.append('\0'); // null-terminate the output. 1603 } 1604 1605 static String substituteBackslashes(const String& string) 1606 { 1607 int questionPos = string.find('?'); 1608 int hashPos = string.find('#'); 1609 int pathEnd; 1610 1611 if (hashPos >= 0 && (questionPos < 0 || questionPos > hashPos)) 1612 pathEnd = hashPos; 1613 else if (questionPos >= 0) 1614 pathEnd = questionPos; 1615 else 1616 pathEnd = string.length(); 1617 1618 return string.left(pathEnd).replace('\\','/') + string.substring(pathEnd); 1619 } 1620 1621 bool KURL::isHierarchical() const 1622 { 1623 if (!m_isValid) 1624 return false; 1625 ASSERT(m_string[m_schemeEnd] == ':'); 1626 return m_string[m_schemeEnd + 1] == '/'; 1627 } 1628 1629 void KURL::copyToBuffer(CharBuffer& buffer) const 1630 { 1631 // FIXME: This throws away the high bytes of all the characters in the string! 1632 // That's fine for a valid URL, which is all ASCII, but not for invalid URLs. 1633 buffer.resize(m_string.length()); 1634 copyASCII(m_string.characters(), m_string.length(), buffer.data()); 1635 } 1636 1637 bool protocolIs(const String& url, const char* protocol) 1638 { 1639 // Do the comparison without making a new string object. 1640 assertProtocolIsGood(protocol); 1641 for (int i = 0; ; ++i) { 1642 if (!protocol[i]) 1643 return url[i] == ':'; 1644 if (toASCIILower(url[i]) != protocol[i]) 1645 return false; 1646 } 1647 } 1648 1649 bool protocolIsJavaScript(const String& url) 1650 { 1651 return protocolIs(url, "javascript"); 1652 } 1653 1654 bool isValidProtocol(const String& protocol) 1655 { 1656 // RFC3986: ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) 1657 if (protocol.isEmpty()) 1658 return false; 1659 if (!isSchemeFirstChar(protocol[0])) 1660 return false; 1661 unsigned protocolLength = protocol.length(); 1662 for (unsigned i = 1; i < protocolLength; i++) { 1663 if (!isSchemeChar(protocol[i])) 1664 return false; 1665 } 1666 return true; 1667 } 1668 1669 bool isDefaultPortForProtocol(unsigned short port, const String& protocol) 1670 { 1671 if (protocol.isEmpty()) 1672 return false; 1673 1674 typedef HashMap<String, unsigned, CaseFoldingHash> DefaultPortsMap; 1675 DEFINE_STATIC_LOCAL(DefaultPortsMap, defaultPorts, ()); 1676 if (defaultPorts.isEmpty()) { 1677 defaultPorts.set("http", 80); 1678 defaultPorts.set("https", 443); 1679 defaultPorts.set("ftp", 21); 1680 defaultPorts.set("ftps", 990); 1681 } 1682 return defaultPorts.get(protocol) == port; 1683 } 1684 1685 bool portAllowed(const KURL& url) 1686 { 1687 unsigned short port = url.port(); 1688 1689 // Since most URLs don't have a port, return early for the "no port" case. 1690 if (!port) 1691 return true; 1692 1693 // This blocked port list matches the port blocking that Mozilla implements. 1694 // See http://www.mozilla.org/projects/netlib/PortBanning.html for more information. 1695 static const unsigned short blockedPortList[] = { 1696 1, // tcpmux 1697 7, // echo 1698 9, // discard 1699 11, // systat 1700 13, // daytime 1701 15, // netstat 1702 17, // qotd 1703 19, // chargen 1704 20, // FTP-data 1705 21, // FTP-control 1706 22, // SSH 1707 23, // telnet 1708 25, // SMTP 1709 37, // time 1710 42, // name 1711 43, // nicname 1712 53, // domain 1713 77, // priv-rjs 1714 79, // finger 1715 87, // ttylink 1716 95, // supdup 1717 101, // hostriame 1718 102, // iso-tsap 1719 103, // gppitnp 1720 104, // acr-nema 1721 109, // POP2 1722 110, // POP3 1723 111, // sunrpc 1724 113, // auth 1725 115, // SFTP 1726 117, // uucp-path 1727 119, // nntp 1728 123, // NTP 1729 135, // loc-srv / epmap 1730 139, // netbios 1731 143, // IMAP2 1732 179, // BGP 1733 389, // LDAP 1734 465, // SMTP+SSL 1735 512, // print / exec 1736 513, // login 1737 514, // shell 1738 515, // printer 1739 526, // tempo 1740 530, // courier 1741 531, // Chat 1742 532, // netnews 1743 540, // UUCP 1744 556, // remotefs 1745 563, // NNTP+SSL 1746 587, // ESMTP 1747 601, // syslog-conn 1748 636, // LDAP+SSL 1749 993, // IMAP+SSL 1750 995, // POP3+SSL 1751 2049, // NFS 1752 3659, // apple-sasl / PasswordServer [Apple addition] 1753 4045, // lockd 1754 6000, // X11 1755 6665, // Alternate IRC [Apple addition] 1756 6666, // Alternate IRC [Apple addition] 1757 6667, // Standard IRC [Apple addition] 1758 6668, // Alternate IRC [Apple addition] 1759 6669, // Alternate IRC [Apple addition] 1760 1761 }; 1762 const unsigned short* const blockedPortListEnd = blockedPortList + sizeof(blockedPortList) / sizeof(blockedPortList[0]); 1763 1764 #ifndef NDEBUG 1765 // The port list must be sorted for binary_search to work. 1766 static bool checkedPortList = false; 1767 if (!checkedPortList) { 1768 for (const unsigned short* p = blockedPortList; p != blockedPortListEnd - 1; ++p) 1769 ASSERT(*p < *(p + 1)); 1770 checkedPortList = true; 1771 } 1772 #endif 1773 1774 // If the port is not in the blocked port list, allow it. 1775 if (!binary_search(blockedPortList, blockedPortListEnd, port)) 1776 return true; 1777 1778 // Allow ports 21 and 22 for FTP URLs, as Mozilla does. 1779 if ((port == 21 || port == 22) && url.protocolIs("ftp")) 1780 return true; 1781 1782 // Allow any port number in a file URL, since the port number is ignored. 1783 if (url.protocolIs("file")) 1784 return true; 1785 1786 return false; 1787 } 1788 1789 String mimeTypeFromDataURL(const String& url) 1790 { 1791 ASSERT(protocolIs(url, "data")); 1792 int index = url.find(';'); 1793 if (index == -1) 1794 index = url.find(','); 1795 if (index != -1) { 1796 int len = index - 5; 1797 if (len > 0) 1798 return url.substring(5, len); 1799 return "text/plain"; // Data URLs with no MIME type are considered text/plain. 1800 } 1801 return ""; 1802 } 1803 1804 const KURL& blankURL() 1805 { 1806 DEFINE_STATIC_LOCAL(KURL, staticBlankURL, (ParsedURLString, "about:blank")); 1807 return staticBlankURL; 1808 } 1809 1810 #ifndef NDEBUG 1811 void KURL::print() const 1812 { 1813 printf("%s\n", m_string.utf8().data()); 1814 } 1815 #endif 1816 1817 } 1818 1819 #endif // !USE(GOOGLEURL) 1820