1 /* 2 * Copyright (C) 1999 Lars Knoll (knoll (at) kde.org) 3 * (C) 1999 Antti Koivisto (koivisto (at) kde.org) 4 * (C) 2001 Dirk Mueller (mueller (at) kde.org) 5 * (C) 2006 Alexey Proskuryakov (ap (at) webkit.org) 6 * Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved. 7 * 8 * This library is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU Library General Public 10 * License as published by the Free Software Foundation; either 11 * version 2 of the License, or (at your option) any later version. 12 * 13 * This library is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * Library General Public License for more details. 17 * 18 * You should have received a copy of the GNU Library General Public License 19 * along with this library; see the file COPYING.LIB. If not, write to 20 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 21 * Boston, MA 02110-1301, USA. 22 */ 23 24 #include "config.h" 25 #include "KURL.h" 26 #include "LinkHash.h" 27 #include "PlatformString.h" 28 #include <wtf/text/AtomicString.h> 29 #include <wtf/text/StringHash.h> 30 #include <wtf/text/StringImpl.h> 31 32 namespace WebCore { 33 34 static inline size_t findSlashDotDotSlash(const UChar* characters, size_t length, size_t position) 35 { 36 if (length < 4) 37 return notFound; 38 size_t loopLimit = length - 3; 39 for (size_t i = position; i < loopLimit; ++i) { 40 if (characters[i] == '/' && characters[i + 1] == '.' && characters[i + 2] == '.' && characters[i + 3] == '/') 41 return i; 42 } 43 return notFound; 44 } 45 46 static inline size_t findSlashSlash(const UChar* characters, size_t length, size_t position) 47 { 48 if (length < 2) 49 return notFound; 50 size_t loopLimit = length - 1; 51 for (size_t i = position; i < loopLimit; ++i) { 52 if (characters[i] == '/' && characters[i + 1] == '/') 53 return i; 54 } 55 return notFound; 56 } 57 58 static inline size_t findSlashDotSlash(const UChar* characters, size_t length, size_t position) 59 { 60 if (length < 3) 61 return notFound; 62 size_t loopLimit = length - 2; 63 for (size_t i = position; i < loopLimit; ++i) { 64 if (characters[i] == '/' && characters[i + 1] == '.' && characters[i + 2] == '/') 65 return i; 66 } 67 return notFound; 68 } 69 70 static inline bool containsColonSlashSlash(const UChar* characters, unsigned length) 71 { 72 if (length < 3) 73 return false; 74 unsigned loopLimit = length - 2; 75 for (unsigned i = 0; i < loopLimit; ++i) { 76 if (characters[i] == ':' && characters[i + 1] == '/' && characters[i + 2] == '/') 77 return true; 78 } 79 return false; 80 } 81 82 static inline void squeezeOutNullCharacters(Vector<UChar, 512>& string) 83 { 84 size_t size = string.size(); 85 size_t i = 0; 86 for (i = 0; i < size; ++i) { 87 if (!string[i]) 88 break; 89 } 90 if (i == size) 91 return; 92 size_t j = i; 93 for (++i; i < size; ++i) { 94 if (UChar character = string[i]) 95 string[j++] = character; 96 } 97 ASSERT(j < size); 98 string.shrink(j); 99 } 100 101 static void cleanSlashDotDotSlashes(Vector<UChar, 512>& path, size_t firstSlash) 102 { 103 size_t slash = firstSlash; 104 do { 105 size_t previousSlash = slash ? reverseFind(path.data(), path.size(), '/', slash - 1) : notFound; 106 // Don't remove the host, i.e. http://foo.org/../foo.html 107 if (previousSlash == notFound || (previousSlash > 3 && path[previousSlash - 2] == ':' && path[previousSlash - 1] == '/')) { 108 path[slash] = 0; 109 path[slash + 1] = 0; 110 path[slash + 2] = 0; 111 } else { 112 for (size_t i = previousSlash; i < slash + 3; ++i) 113 path[i] = 0; 114 } 115 slash += 3; 116 } while ((slash = findSlashDotDotSlash(path.data(), path.size(), slash)) != notFound); 117 squeezeOutNullCharacters(path); 118 } 119 120 static void mergeDoubleSlashes(Vector<UChar, 512>& path, size_t firstSlash) 121 { 122 size_t refPos = find(path.data(), path.size(), '#'); 123 if (!refPos || refPos == notFound) 124 refPos = path.size(); 125 126 size_t slash = firstSlash; 127 while (slash < refPos) { 128 if (!slash || path[slash - 1] != ':') 129 path[slash++] = 0; 130 else 131 slash += 2; 132 if ((slash = findSlashSlash(path.data(), path.size(), slash)) == notFound) 133 break; 134 } 135 squeezeOutNullCharacters(path); 136 } 137 138 static void cleanSlashDotSlashes(Vector<UChar, 512>& path, size_t firstSlash) 139 { 140 size_t slash = firstSlash; 141 do { 142 path[slash] = 0; 143 path[slash + 1] = 0; 144 slash += 2; 145 } while ((slash = findSlashDotSlash(path.data(), path.size(), slash)) != notFound); 146 squeezeOutNullCharacters(path); 147 } 148 149 static inline void cleanPath(Vector<UChar, 512>& path) 150 { 151 // FIXME: Should not do this in the query or anchor part of the URL. 152 size_t firstSlash = findSlashDotDotSlash(path.data(), path.size(), 0); 153 if (firstSlash != notFound) 154 cleanSlashDotDotSlashes(path, firstSlash); 155 156 // FIXME: Should not do this in the query part. 157 firstSlash = findSlashSlash(path.data(), path.size(), 0); 158 if (firstSlash != notFound) 159 mergeDoubleSlashes(path, firstSlash); 160 161 // FIXME: Should not do this in the query or anchor part. 162 firstSlash = findSlashDotSlash(path.data(), path.size(), 0); 163 if (firstSlash != notFound) 164 cleanSlashDotSlashes(path, firstSlash); 165 } 166 167 static inline bool matchLetter(UChar c, UChar lowercaseLetter) 168 { 169 return (c | 0x20) == lowercaseLetter; 170 } 171 172 static inline bool needsTrailingSlash(const UChar* characters, unsigned length) 173 { 174 if (length < 6) 175 return false; 176 if (!matchLetter(characters[0], 'h') 177 || !matchLetter(characters[1], 't') 178 || !matchLetter(characters[2], 't') 179 || !matchLetter(characters[3], 'p')) 180 return false; 181 if (!(characters[4] == ':' 182 || (matchLetter(characters[4], 's') && characters[5] == ':'))) 183 return false; 184 185 unsigned pos = characters[4] == ':' ? 5 : 6; 186 187 // Skip initial two slashes if present. 188 if (pos + 1 < length && characters[pos] == '/' && characters[pos + 1] == '/') 189 pos += 2; 190 191 // Find next slash. 192 while (pos < length && characters[pos] != '/') 193 ++pos; 194 195 return pos == length; 196 } 197 198 static ALWAYS_INLINE LinkHash visitedLinkHashInline(const UChar* url, unsigned length) 199 { 200 return AlreadyHashed::avoidDeletedValue(StringHasher::computeHash(url, length)); 201 } 202 203 LinkHash visitedLinkHash(const UChar* url, unsigned length) 204 { 205 return visitedLinkHashInline(url, length); 206 } 207 208 static ALWAYS_INLINE void visitedURLInline(const KURL& base, const AtomicString& attributeURL, Vector<UChar, 512>& buffer) 209 { 210 if (attributeURL.isNull()) 211 return; 212 213 const UChar* characters = attributeURL.characters(); 214 unsigned length = attributeURL.length(); 215 216 // This is a poor man's completeURL. Faster with less memory allocation. 217 // FIXME: It's missing a lot of what completeURL does and a lot of what KURL does. 218 // For example, it does not handle international domain names properly. 219 220 // FIXME: It is wrong that we do not do further processing on strings that have "://" in them: 221 // 1) The "://" could be in the query or anchor. 222 // 2) The URL's path could have a "/./" or a "/../" or a "//" sequence in it. 223 224 // FIXME: needsTrailingSlash does not properly return true for a URL that has no path, but does 225 // have a query or anchor. 226 227 bool hasColonSlashSlash = containsColonSlashSlash(characters, length); 228 229 if (hasColonSlashSlash && !needsTrailingSlash(characters, length)) { 230 buffer.append(attributeURL.characters(), attributeURL.length()); 231 return; 232 } 233 234 235 if (hasColonSlashSlash) { 236 // FIXME: This is incorrect for URLs that have a query or anchor; the "/" needs to go at the 237 // end of the path, *before* the query or anchor. 238 buffer.append(characters, length); 239 buffer.append('/'); 240 return; 241 } 242 243 if (!length) 244 buffer.append(base.string().characters(), base.string().length()); 245 else { 246 switch (characters[0]) { 247 case '/': 248 buffer.append(base.string().characters(), base.pathStart()); 249 break; 250 case '#': 251 buffer.append(base.string().characters(), base.pathEnd()); 252 break; 253 default: 254 buffer.append(base.string().characters(), base.pathAfterLastSlash()); 255 break; 256 } 257 } 258 buffer.append(characters, length); 259 cleanPath(buffer); 260 if (needsTrailingSlash(buffer.data(), buffer.size())) { 261 // FIXME: This is incorrect for URLs that have a query or anchor; the "/" needs to go at the 262 // end of the path, *before* the query or anchor. 263 buffer.append('/'); 264 } 265 266 return; 267 } 268 269 void visitedURL(const KURL& base, const AtomicString& attributeURL, Vector<UChar, 512>& buffer) 270 { 271 return visitedURLInline(base, attributeURL, buffer); 272 } 273 274 LinkHash visitedLinkHash(const KURL& base, const AtomicString& attributeURL) 275 { 276 Vector<UChar, 512> url; 277 visitedURLInline(base, attributeURL, url); 278 if (url.isEmpty()) 279 return 0; 280 281 return visitedLinkHashInline(url.data(), url.size()); 282 } 283 284 } // namespace WebCore 285